aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig13
-rw-r--r--fs/9p/Makefile1
-rw-r--r--fs/9p/acl.c392
-rw-r--r--fs/9p/acl.h49
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/9p/v9fs.c22
-rw-r--r--fs/9p/v9fs.h10
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c30
-rw-r--r--fs/9p/vfs_dir.c4
-rw-r--r--fs/9p/vfs_file.c265
-rw-r--r--fs/9p/vfs_inode.c258
-rw-r--r--fs/9p/vfs_super.c36
-rw-r--r--fs/9p/xattr.c52
-rw-r--r--fs/9p/xattr.h6
-rw-r--r--fs/Kconfig9
-rw-r--r--fs/Kconfig.binfmt4
-rw-r--r--fs/Makefile7
-rw-r--r--fs/adfs/super.c9
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/affs/super.c9
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/super.c19
-rw-r--r--fs/afs/write.c19
-rw-r--r--fs/aio.c14
-rw-r--r--fs/anon_inodes.c16
-rw-r--r--fs/autofs/Kconfig22
-rw-r--r--fs/autofs/Makefile7
-rw-r--r--fs/autofs/autofs_i.h165
-rw-r--r--fs/autofs/dirhash.c250
-rw-r--r--fs/autofs/init.c52
-rw-r--r--fs/autofs/inode.c288
-rw-r--r--fs/autofs/root.c645
-rw-r--r--fs/autofs/symlink.c26
-rw-r--r--fs/autofs/waitq.c205
-rw-r--r--fs/autofs4/init.c8
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/befs/linuxvfs.c11
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/binfmt_misc.c9
-rw-r--r--fs/block_dev.c42
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/buffer.c29
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/super.c50
-rw-r--r--fs/cifs/Kconfig3
-rw-r--r--fs/cifs/cifsencrypt.c427
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h55
-rw-r--r--fs/cifs/cifspdu.h13
-rw-r--r--fs/cifs/cifsproto.h14
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c51
-rw-r--r--fs/cifs/file.c67
-rw-r--r--fs/cifs/inode.c15
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/sess.c166
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/cache.c17
-rw-r--r--fs/coda/cnode.c19
-rw-r--r--fs/coda/dir.c157
-rw-r--r--fs/coda/file.c31
-rw-r--r--fs/coda/inode.c69
-rw-r--r--fs/coda/pioctl.c22
-rw-r--r--fs/coda/psdev.c41
-rw-r--r--fs/coda/symlink.c3
-rw-r--r--fs/coda/upcall.c89
-rw-r--r--fs/compat.c45
-rw-r--r--fs/compat_ioctl.c29
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/configfs/mount.c8
-rw-r--r--fs/cramfs/inode.c9
-rw-r--r--fs/dcache.c277
-rw-r--r--fs/debugfs/inode.c9
-rw-r--r--fs/devpts/inode.c32
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/ecryptfs/keystore.c45
-rw-r--r--fs/ecryptfs/main.c20
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/super.c8
-rw-r--r--fs/eventpoll.c35
-rw-r--r--fs/exec.c173
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/file.c6
-rw-r--r--fs/exofs/inode.c64
-rw-r--r--fs/exofs/namei.c2
-rw-r--r--fs/exofs/super.c10
-rw-r--r--fs/exportfs/expfs.c17
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c11
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c12
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/balloc.c17
-rw-r--r--fs/ext3/ialloc.c11
-rw-r--r--fs/ext3/inode.c24
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext3/resize.c13
-rw-r--r--fs/ext3/super.c49
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/block_validity.c7
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h110
-rw-r--r--fs/ext4/ext4_extents.h65
-rw-r--r--fs/ext4/extents.c368
-rw-r--r--fs/ext4/file.c44
-rw-r--r--fs/ext4/fsync.c83
-rw-r--r--fs/ext4/ialloc.c135
-rw-r--r--fs/ext4/inode.c594
-rw-r--r--fs/ext4/mballoc.c555
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c22
-rw-r--r--fs/ext4/namei.c65
-rw-r--r--fs/ext4/page-io.c430
-rw-r--r--fs/ext4/resize.c52
-rw-r--r--fs/ext4/super.c547
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h10
-rw-r--r--fs/fat/namei_msdos.c9
-rw-r--r--fs/fat/namei_vfat.c9
-rw-r--r--fs/fcntl.c62
-rw-r--r--fs/file_table.c17
-rw-r--r--fs/freevxfs/vxfs_inode.c1
-rw-r--r--fs/freevxfs/vxfs_super.c9
-rw-r--r--fs/fs-writeback.c88
-rw-r--r--fs/fuse/control.c11
-rw-r--r--fs/fuse/dev.c19
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c52
-rw-r--r--fs/gfs2/ops_inode.c8
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/hfs/hfs_fs.h13
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/hfs/super.c10
-rw-r--r--fs/hfsplus/dir.c6
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs.h3
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hostfs/hostfs_user.c14
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c24
-rw-r--r--fs/inode.c527
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c39
-rw-r--r--fs/isofs/inode.c66
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c8
-rw-r--r--fs/jbd/journal.c44
-rw-r--r--fs/jbd/recovery.c2
-rw-r--r--fs/jbd/transaction.c6
-rw-r--r--fs/jbd2/checkpoint.c10
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c6
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--fs/jffs2/dir.c4
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/jfs_imap.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/libfs.c22
-rw-r--r--fs/lockd/clntlock.c15
-rw-r--r--fs/lockd/clntproc.c13
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/lockd/svc.c13
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svclock.c37
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/lockd/svcsubs.c9
-rw-r--r--fs/locks.c76
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/dev_mtd.c18
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/logfs.h22
-rw-r--r--fs/logfs/super.c77
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c18
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/Kconfig20
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c28
-rw-r--r--fs/nfs/dir.c1015
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/dns_resolve.c6
-rw-r--r--fs/nfs/file.c86
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/idmap.c211
-rw-r--r--fs/nfs/inode.c39
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/nfs2xdr.c107
-rw-r--r--fs/nfs/nfs3proc.c62
-rw-r--r--fs/nfs/nfs3xdr.c196
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c497
-rw-r--r--fs/nfs/nfs4state.c42
-rw-r--r--fs/nfs/nfs4xdr.c700
-rw-r--r--fs/nfs/nfsroot.c568
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/proc.c35
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/super.c168
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c259
-rw-r--r--fs/nfs/write.c22
-rw-r--r--fs/nfsd/Kconfig13
-rw-r--r--fs/nfsd/export.c73
-rw-r--r--fs/nfsd/nfs4callback.c245
-rw-r--r--fs/nfsd/nfs4idmap.c105
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4state.c519
-rw-r--r--fs/nfsd/nfs4xdr.c18
-rw-r--r--fs/nfsd/nfsctl.c34
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/state.h52
-rw-r--r--fs/nfsd/vfs.c16
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/super.c16
-rw-r--r--fs/notify/fsnotify.c33
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/ntfs/super.c28
-rw-r--r--fs/ocfs2/aops.c19
-rw-r--r--fs/ocfs2/aops.h3
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c10
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c6
-rw-r--r--fs/openpromfs/inode.c8
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/pipe.c11
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/base.c111
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/root.c16
-rw-r--r--fs/proc/softirqs.c4
-rw-r--r--fs/proc/stat.c14
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/quota/Kconfig4
-rw-r--r--fs/quota/dquot.c30
-rw-r--r--fs/ramfs/inode.c18
-rw-r--r--fs/read_write.c90
-rw-r--r--fs/reiserfs/inode.c26
-rw-r--r--fs/reiserfs/ioctl.c6
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c7
-rw-r--r--fs/romfs/super.c17
-rw-r--r--fs/select.c6
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/signalfd.c10
-rw-r--r--fs/smbfs/Kconfig56
-rw-r--r--fs/smbfs/Makefile18
-rw-r--r--fs/smbfs/cache.c208
-rw-r--r--fs/smbfs/dir.c702
-rw-r--r--fs/smbfs/file.c454
-rw-r--r--fs/smbfs/getopt.c64
-rw-r--r--fs/smbfs/getopt.h14
-rw-r--r--fs/smbfs/inode.c844
-rw-r--r--fs/smbfs/ioctl.c69
-rw-r--r--fs/smbfs/proc.c3507
-rw-r--r--fs/smbfs/proto.h87
-rw-r--r--fs/smbfs/request.c818
-rw-r--r--fs/smbfs/request.h70
-rw-r--r--fs/smbfs/smb_debug.h34
-rw-r--r--fs/smbfs/smbiod.c344
-rw-r--r--fs/smbfs/sock.c386
-rw-r--r--fs/smbfs/symlink.c68
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xattr.c9
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/super.c119
-rw-r--r--fs/sysfs/mount.c32
-rw-r--r--fs/sysv/namei.c2
-rw-r--r--fs/sysv/super.c17
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/super.c13
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/xfs_inode.h2
317 files changed, 10956 insertions, 14623 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 795233702a4e..7e0511476797 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -17,3 +17,16 @@ config 9P_FSCACHE
17 Choose Y here to enable persistent, read-only local 17 Choose Y here to enable persistent, read-only local
18 caching support for 9p clients using FS-Cache 18 caching support for 9p clients using FS-Cache
19 19
20
21config 9P_FS_POSIX_ACL
22 bool "9P POSIX Access Control Lists"
23 depends on 9P_FS
24 select FS_POSIX_ACL
25 help
26 POSIX Access Control Lists (ACLs) support permissions for users and
27 groups beyond the owner/group/world scheme.
28
29 To learn more about Access Control Lists, visit the POSIX ACLs for
30 Linux website <http://acl.bestbits.at/>.
31
32 If you don't know what Access Control Lists are, say N
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 91fba025fcbe..f8ba37effd1b 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_9P_FS) := 9p.o
13 xattr_user.o 13 xattr_user.o
14 14
159p-$(CONFIG_9P_FSCACHE) += cache.o 159p-$(CONFIG_9P_FSCACHE) += cache.o
169p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
new file mode 100644
index 000000000000..12d602351dbe
--- /dev/null
+++ b/fs/9p/acl.c
@@ -0,0 +1,392 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/fs.h>
17#include <net/9p/9p.h>
18#include <net/9p/client.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/posix_acl_xattr.h>
22#include "xattr.h"
23#include "acl.h"
24#include "v9fs_vfs.h"
25#include "v9fs.h"
26
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{
29 ssize_t size;
30 void *value = NULL;
31 struct posix_acl *acl = NULL;;
32
33 size = v9fs_fid_xattr_get(fid, name, NULL, 0);
34 if (size > 0) {
35 value = kzalloc(size, GFP_NOFS);
36 if (!value)
37 return ERR_PTR(-ENOMEM);
38 size = v9fs_fid_xattr_get(fid, name, value, size);
39 if (size > 0) {
40 acl = posix_acl_from_xattr(value, size);
41 if (IS_ERR(acl))
42 goto err_out;
43 }
44 } else if (size == -ENODATA || size == 0 ||
45 size == -ENOSYS || size == -EOPNOTSUPP) {
46 acl = NULL;
47 } else
48 acl = ERR_PTR(-EIO);
49
50err_out:
51 kfree(value);
52 return acl;
53}
54
55int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
56{
57 int retval = 0;
58 struct posix_acl *pacl, *dacl;
59 struct v9fs_session_info *v9ses;
60
61 v9ses = v9fs_inode2v9ses(inode);
62 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
63 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
64 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
65 return 0;
66 }
67 /* get the default/access acl values and cache them */
68 dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
69 pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
70
71 if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
72 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
73 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
74 posix_acl_release(dacl);
75 posix_acl_release(pacl);
76 } else
77 retval = -EIO;
78
79 return retval;
80}
81
82static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
83{
84 struct posix_acl *acl;
85 /*
86 * 9p Always cache the acl value when
87 * instantiating the inode (v9fs_inode_from_fid)
88 */
89 acl = get_cached_acl(inode, type);
90 BUG_ON(acl == ACL_NOT_CACHED);
91 return acl;
92}
93
94int v9fs_check_acl(struct inode *inode, int mask)
95{
96 struct posix_acl *acl;
97 struct v9fs_session_info *v9ses;
98
99 v9ses = v9fs_inode2v9ses(inode);
100 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
101 /*
102 * On access = client mode get the acl
103 * values from the server
104 */
105 return 0;
106 }
107 acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
108
109 if (IS_ERR(acl))
110 return PTR_ERR(acl);
111 if (acl) {
112 int error = posix_acl_permission(inode, acl, mask);
113 posix_acl_release(acl);
114 return error;
115 }
116 return -EAGAIN;
117}
118
119static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
120{
121 int retval;
122 char *name;
123 size_t size;
124 void *buffer;
125 struct inode *inode = dentry->d_inode;
126
127 set_cached_acl(inode, type, acl);
128 /* Set a setxattr request to server */
129 size = posix_acl_xattr_size(acl->a_count);
130 buffer = kmalloc(size, GFP_KERNEL);
131 if (!buffer)
132 return -ENOMEM;
133 retval = posix_acl_to_xattr(acl, buffer, size);
134 if (retval < 0)
135 goto err_free_out;
136 switch (type) {
137 case ACL_TYPE_ACCESS:
138 name = POSIX_ACL_XATTR_ACCESS;
139 break;
140 case ACL_TYPE_DEFAULT:
141 name = POSIX_ACL_XATTR_DEFAULT;
142 break;
143 default:
144 BUG();
145 }
146 retval = v9fs_xattr_set(dentry, name, buffer, size, 0);
147err_free_out:
148 kfree(buffer);
149 return retval;
150}
151
152int v9fs_acl_chmod(struct dentry *dentry)
153{
154 int retval = 0;
155 struct posix_acl *acl, *clone;
156 struct inode *inode = dentry->d_inode;
157
158 if (S_ISLNK(inode->i_mode))
159 return -EOPNOTSUPP;
160 acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
161 if (acl) {
162 clone = posix_acl_clone(acl, GFP_KERNEL);
163 posix_acl_release(acl);
164 if (!clone)
165 return -ENOMEM;
166 retval = posix_acl_chmod_masq(clone, inode->i_mode);
167 if (!retval)
168 retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone);
169 posix_acl_release(clone);
170 }
171 return retval;
172}
173
174int v9fs_set_create_acl(struct dentry *dentry,
175 struct posix_acl *dpacl, struct posix_acl *pacl)
176{
177 if (dpacl)
178 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
179 if (pacl)
180 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
181 posix_acl_release(dpacl);
182 posix_acl_release(pacl);
183 return 0;
184}
185
186int v9fs_acl_mode(struct inode *dir, mode_t *modep,
187 struct posix_acl **dpacl, struct posix_acl **pacl)
188{
189 int retval = 0;
190 mode_t mode = *modep;
191 struct posix_acl *acl = NULL;
192
193 if (!S_ISLNK(mode)) {
194 acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT);
195 if (IS_ERR(acl))
196 return PTR_ERR(acl);
197 if (!acl)
198 mode &= ~current_umask();
199 }
200 if (acl) {
201 struct posix_acl *clone;
202
203 if (S_ISDIR(mode))
204 *dpacl = acl;
205 clone = posix_acl_clone(acl, GFP_NOFS);
206 retval = -ENOMEM;
207 if (!clone)
208 goto cleanup;
209
210 retval = posix_acl_create_masq(clone, &mode);
211 if (retval < 0) {
212 posix_acl_release(clone);
213 goto cleanup;
214 }
215 if (retval > 0)
216 *pacl = clone;
217 }
218 *modep = mode;
219 return 0;
220cleanup:
221 posix_acl_release(acl);
222 return retval;
223
224}
225
226static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
227 void *buffer, size_t size, int type)
228{
229 char *full_name;
230
231 switch (type) {
232 case ACL_TYPE_ACCESS:
233 full_name = POSIX_ACL_XATTR_ACCESS;
234 break;
235 case ACL_TYPE_DEFAULT:
236 full_name = POSIX_ACL_XATTR_DEFAULT;
237 break;
238 default:
239 BUG();
240 }
241 return v9fs_xattr_get(dentry, full_name, buffer, size);
242}
243
244static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
245 void *buffer, size_t size, int type)
246{
247 struct v9fs_session_info *v9ses;
248 struct posix_acl *acl;
249 int error;
250
251 if (strcmp(name, "") != 0)
252 return -EINVAL;
253
254 v9ses = v9fs_inode2v9ses(dentry->d_inode);
255 /*
256 * We allow set/get/list of acl when access=client is not specified
257 */
258 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
259 return v9fs_remote_get_acl(dentry, name, buffer, size, type);
260
261 acl = v9fs_get_cached_acl(dentry->d_inode, type);
262 if (IS_ERR(acl))
263 return PTR_ERR(acl);
264 if (acl == NULL)
265 return -ENODATA;
266 error = posix_acl_to_xattr(acl, buffer, size);
267 posix_acl_release(acl);
268
269 return error;
270}
271
272static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
273 const void *value, size_t size,
274 int flags, int type)
275{
276 char *full_name;
277
278 switch (type) {
279 case ACL_TYPE_ACCESS:
280 full_name = POSIX_ACL_XATTR_ACCESS;
281 break;
282 case ACL_TYPE_DEFAULT:
283 full_name = POSIX_ACL_XATTR_DEFAULT;
284 break;
285 default:
286 BUG();
287 }
288 return v9fs_xattr_set(dentry, full_name, value, size, flags);
289}
290
291
292static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
293 const void *value, size_t size,
294 int flags, int type)
295{
296 int retval;
297 struct posix_acl *acl;
298 struct v9fs_session_info *v9ses;
299 struct inode *inode = dentry->d_inode;
300
301 if (strcmp(name, "") != 0)
302 return -EINVAL;
303
304 v9ses = v9fs_inode2v9ses(dentry->d_inode);
305 /*
306 * set the attribute on the remote. Without even looking at the
307 * xattr value. We leave it to the server to validate
308 */
309 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
310 return v9fs_remote_set_acl(dentry, name,
311 value, size, flags, type);
312
313 if (S_ISLNK(inode->i_mode))
314 return -EOPNOTSUPP;
315 if (!is_owner_or_cap(inode))
316 return -EPERM;
317 if (value) {
318 /* update the cached acl value */
319 acl = posix_acl_from_xattr(value, size);
320 if (IS_ERR(acl))
321 return PTR_ERR(acl);
322 else if (acl) {
323 retval = posix_acl_valid(acl);
324 if (retval)
325 goto err_out;
326 }
327 } else
328 acl = NULL;
329
330 switch (type) {
331 case ACL_TYPE_ACCESS:
332 name = POSIX_ACL_XATTR_ACCESS;
333 if (acl) {
334 mode_t mode = inode->i_mode;
335 retval = posix_acl_equiv_mode(acl, &mode);
336 if (retval < 0)
337 goto err_out;
338 else {
339 struct iattr iattr;
340 if (retval == 0) {
341 /*
342 * ACL can be represented
343 * by the mode bits. So don't
344 * update ACL.
345 */
346 acl = NULL;
347 value = NULL;
348 size = 0;
349 }
350 /* Updte the mode bits */
351 iattr.ia_mode = ((mode & S_IALLUGO) |
352 (inode->i_mode & ~S_IALLUGO));
353 iattr.ia_valid = ATTR_MODE;
354 /* FIXME should we update ctime ?
355 * What is the following setxattr update the
356 * mode ?
357 */
358 v9fs_vfs_setattr_dotl(dentry, &iattr);
359 }
360 }
361 break;
362 case ACL_TYPE_DEFAULT:
363 name = POSIX_ACL_XATTR_DEFAULT;
364 if (!S_ISDIR(inode->i_mode)) {
365 retval = -EINVAL;
366 goto err_out;
367 }
368 break;
369 default:
370 BUG();
371 }
372 retval = v9fs_xattr_set(dentry, name, value, size, flags);
373 if (!retval)
374 set_cached_acl(inode, type, acl);
375err_out:
376 posix_acl_release(acl);
377 return retval;
378}
379
380const struct xattr_handler v9fs_xattr_acl_access_handler = {
381 .prefix = POSIX_ACL_XATTR_ACCESS,
382 .flags = ACL_TYPE_ACCESS,
383 .get = v9fs_xattr_get_acl,
384 .set = v9fs_xattr_set_acl,
385};
386
387const struct xattr_handler v9fs_xattr_acl_default_handler = {
388 .prefix = POSIX_ACL_XATTR_DEFAULT,
389 .flags = ACL_TYPE_DEFAULT,
390 .get = v9fs_xattr_get_acl,
391 .set = v9fs_xattr_set_acl,
392};
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
new file mode 100644
index 000000000000..59e18c2e8c7e
--- /dev/null
+++ b/fs/9p/acl.h
@@ -0,0 +1,49 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14#ifndef FS_9P_ACL_H
15#define FS_9P_ACL_H
16
17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask);
20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *);
23extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
24 struct posix_acl **dpacl, struct posix_acl **pacl);
25#else
26#define v9fs_check_acl NULL
27static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
28{
29 return 0;
30}
31static inline int v9fs_acl_chmod(struct dentry *dentry)
32{
33 return 0;
34}
35static inline int v9fs_set_create_acl(struct dentry *dentry,
36 struct posix_acl *dpacl,
37 struct posix_acl *pacl)
38{
39 return 0;
40}
41static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
42 struct posix_acl **dpacl,
43 struct posix_acl **pacl)
44{
45 return 0;
46}
47
48#endif
49#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 6406f896bf95..b00223c99d70 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -149,6 +149,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
149 switch (access) { 149 switch (access) {
150 case V9FS_ACCESS_SINGLE: 150 case V9FS_ACCESS_SINGLE:
151 case V9FS_ACCESS_USER: 151 case V9FS_ACCESS_USER:
152 case V9FS_ACCESS_CLIENT:
152 uid = current_fsuid(); 153 uid = current_fsuid();
153 any = 0; 154 any = 0;
154 break; 155 break;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 38dc0e067599..2f77cd33ba83 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -193,7 +193,17 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
193 v9ses->flags |= V9FS_ACCESS_USER; 193 v9ses->flags |= V9FS_ACCESS_USER;
194 else if (strcmp(s, "any") == 0) 194 else if (strcmp(s, "any") == 0)
195 v9ses->flags |= V9FS_ACCESS_ANY; 195 v9ses->flags |= V9FS_ACCESS_ANY;
196 else { 196 else if (strcmp(s, "client") == 0) {
197#ifdef CONFIG_9P_FS_POSIX_ACL
198 v9ses->flags |= V9FS_ACCESS_CLIENT;
199#else
200 P9_DPRINTK(P9_DEBUG_ERROR,
201 "access=client option not supported\n");
202 kfree(s);
203 ret = -EINVAL;
204 goto free_and_return;
205#endif
206 } else {
197 v9ses->flags |= V9FS_ACCESS_SINGLE; 207 v9ses->flags |= V9FS_ACCESS_SINGLE;
198 v9ses->uid = simple_strtoul(s, &e, 10); 208 v9ses->uid = simple_strtoul(s, &e, 10);
199 if (*e != '\0') 209 if (*e != '\0')
@@ -278,6 +288,16 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
278 288
279 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 289 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
280 290
291 if (!v9fs_proto_dotl(v9ses) &&
292 ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
293 /*
294 * We support ACCESS_CLIENT only for dotl.
295 * Fall back to ACCESS_USER
296 */
297 v9ses->flags &= ~V9FS_ACCESS_MASK;
298 v9ses->flags |= V9FS_ACCESS_USER;
299 }
300 /*FIXME !! */
281 /* for legacy mode, fall back to V9FS_ACCESS_ANY */ 301 /* for legacy mode, fall back to V9FS_ACCESS_ANY */
282 if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) && 302 if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
283 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { 303 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 4c963c9fc41f..cb6396855e2d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -33,13 +33,17 @@
33 * 33 *
34 * Session flags reflect options selected by users at mount time 34 * Session flags reflect options selected by users at mount time
35 */ 35 */
36#define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \
37 V9FS_ACCESS_USER | \
38 V9FS_ACCESS_CLIENT)
39#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
40
36enum p9_session_flags { 41enum p9_session_flags {
37 V9FS_PROTO_2000U = 0x01, 42 V9FS_PROTO_2000U = 0x01,
38 V9FS_PROTO_2000L = 0x02, 43 V9FS_PROTO_2000L = 0x02,
39 V9FS_ACCESS_SINGLE = 0x04, 44 V9FS_ACCESS_SINGLE = 0x04,
40 V9FS_ACCESS_USER = 0x08, 45 V9FS_ACCESS_USER = 0x08,
41 V9FS_ACCESS_ANY = 0x0C, 46 V9FS_ACCESS_CLIENT = 0x10
42 V9FS_ACCESS_MASK = 0x0C,
43}; 47};
44 48
45/* possible values of ->cache */ 49/* possible values of ->cache */
@@ -113,8 +117,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses);
113void v9fs_session_cancel(struct v9fs_session_info *v9ses); 117void v9fs_session_cancel(struct v9fs_session_info *v9ses);
114void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); 118void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
115 119
116#define V9FS_MAGIC 0x01021997
117
118/* other default globals */ 120/* other default globals */
119#define V9FS_PORT 564 121#define V9FS_PORT 564
120#define V9FS_DEFUSER "nobody" 122#define V9FS_DEFUSER "nobody"
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 88418c419ea7..bab0eac873f4 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -64,3 +64,7 @@ int v9fs_uflags2omode(int uflags, int extended);
64 64
65ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 65ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
66void v9fs_blank_wstat(struct p9_wstat *wstat); 66void v9fs_blank_wstat(struct p9_wstat *wstat);
67int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
68int v9fs_file_fsync_dotl(struct file *filp, int datasync);
69
70#define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 90e38449f4b3..b7f2a8e3863e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -154,10 +154,40 @@ static int v9fs_launder_page(struct page *page)
154 return 0; 154 return 0;
155} 155}
156 156
157/**
158 * v9fs_direct_IO - 9P address space operation for direct I/O
159 * @rw: direction (read or write)
160 * @iocb: target I/O control block
161 * @iov: array of vectors that define I/O buffer
162 * @pos: offset in file to begin the operation
163 * @nr_segs: size of iovec array
164 *
165 * The presence of v9fs_direct_IO() in the address space ops vector
166 * allowes open() O_DIRECT flags which would have failed otherwise.
167 *
168 * In the non-cached mode, we shunt off direct read and write requests before
169 * the VFS gets them, so this method should never be called.
170 *
171 * Direct IO is not 'yet' supported in the cached mode. Hence when
172 * this routine is called through generic_file_aio_read(), the read/write fails
173 * with an error.
174 *
175 */
176ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
177 loff_t pos, unsigned long nr_segs)
178{
179 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
180 "off/no(%lld/%lu) EINVAL\n",
181 iocb->ki_filp->f_path.dentry->d_name.name,
182 (long long) pos, nr_segs);
183
184 return -EINVAL;
185}
157const struct address_space_operations v9fs_addr_operations = { 186const struct address_space_operations v9fs_addr_operations = {
158 .readpage = v9fs_vfs_readpage, 187 .readpage = v9fs_vfs_readpage,
159 .readpages = v9fs_vfs_readpages, 188 .readpages = v9fs_vfs_readpages,
160 .releasepage = v9fs_release_page, 189 .releasepage = v9fs_release_page,
161 .invalidatepage = v9fs_invalidate_page, 190 .invalidatepage = v9fs_invalidate_page,
162 .launder_page = v9fs_launder_page, 191 .launder_page = v9fs_launder_page,
192 .direct_IO = v9fs_direct_IO,
163}; 193};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 899f168fd19c..b84ebe8cefed 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -242,7 +242,8 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
242 while (rdir->head < rdir->tail) { 242 while (rdir->head < rdir->tail) {
243 243
244 err = p9dirent_read(rdir->buf + rdir->head, 244 err = p9dirent_read(rdir->buf + rdir->head,
245 buflen - rdir->head, &curdirent, 245 rdir->tail - rdir->head,
246 &curdirent,
246 fid->clnt->proto_version); 247 fid->clnt->proto_version);
247 if (err < 0) { 248 if (err < 0) {
248 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 249 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -314,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = {
314 .readdir = v9fs_dir_readdir_dotl, 315 .readdir = v9fs_dir_readdir_dotl,
315 .open = v9fs_file_open, 316 .open = v9fs_file_open,
316 .release = v9fs_dir_release, 317 .release = v9fs_dir_release,
318 .fsync = v9fs_file_fsync_dotl,
317}; 319};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e97c92bd6f16..240c30674396 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -33,6 +33,7 @@
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/list.h> 34#include <linux/list.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/utsname.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <linux/idr.h> 38#include <linux/idr.h>
38#include <net/9p/9p.h> 39#include <net/9p/9p.h>
@@ -44,6 +45,7 @@
44#include "cache.h" 45#include "cache.h"
45 46
46static const struct file_operations v9fs_cached_file_operations; 47static const struct file_operations v9fs_cached_file_operations;
48static const struct file_operations v9fs_cached_file_operations_dotl;
47 49
48/** 50/**
49 * v9fs_file_open - open a file (or directory) 51 * v9fs_file_open - open a file (or directory)
@@ -92,6 +94,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
92 /* enable cached file options */ 94 /* enable cached file options */
93 if(file->f_op == &v9fs_file_operations) 95 if(file->f_op == &v9fs_file_operations)
94 file->f_op = &v9fs_cached_file_operations; 96 file->f_op = &v9fs_cached_file_operations;
97 else if (file->f_op == &v9fs_file_operations_dotl)
98 file->f_op = &v9fs_cached_file_operations_dotl;
95 99
96#ifdef CONFIG_9P_FSCACHE 100#ifdef CONFIG_9P_FSCACHE
97 v9fs_cache_inode_set_cookie(inode, file); 101 v9fs_cache_inode_set_cookie(inode, file);
@@ -130,6 +134,206 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
130 return res; 134 return res;
131} 135}
132 136
137static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
138{
139 struct p9_flock flock;
140 struct p9_fid *fid;
141 uint8_t status;
142 int res = 0;
143 unsigned char fl_type;
144
145 fid = filp->private_data;
146 BUG_ON(fid == NULL);
147
148 if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
149 BUG();
150
151 res = posix_lock_file_wait(filp, fl);
152 if (res < 0)
153 goto out;
154
155 /* convert posix lock to p9 tlock args */
156 memset(&flock, 0, sizeof(flock));
157 flock.type = fl->fl_type;
158 flock.start = fl->fl_start;
159 if (fl->fl_end == OFFSET_MAX)
160 flock.length = 0;
161 else
162 flock.length = fl->fl_end - fl->fl_start + 1;
163 flock.proc_id = fl->fl_pid;
164 flock.client_id = utsname()->nodename;
165 if (IS_SETLKW(cmd))
166 flock.flags = P9_LOCK_FLAGS_BLOCK;
167
168 /*
169 * if its a blocked request and we get P9_LOCK_BLOCKED as the status
170 * for lock request, keep on trying
171 */
172 for (;;) {
173 res = p9_client_lock_dotl(fid, &flock, &status);
174 if (res < 0)
175 break;
176
177 if (status != P9_LOCK_BLOCKED)
178 break;
179 if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
180 break;
181 schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
182 }
183
184 /* map 9p status to VFS status */
185 switch (status) {
186 case P9_LOCK_SUCCESS:
187 res = 0;
188 break;
189 case P9_LOCK_BLOCKED:
190 res = -EAGAIN;
191 break;
192 case P9_LOCK_ERROR:
193 case P9_LOCK_GRACE:
194 res = -ENOLCK;
195 break;
196 default:
197 BUG();
198 }
199
200 /*
201 * incase server returned error for lock request, revert
202 * it locally
203 */
204 if (res < 0 && fl->fl_type != F_UNLCK) {
205 fl_type = fl->fl_type;
206 fl->fl_type = F_UNLCK;
207 res = posix_lock_file_wait(filp, fl);
208 fl->fl_type = fl_type;
209 }
210out:
211 return res;
212}
213
214static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
215{
216 struct p9_getlock glock;
217 struct p9_fid *fid;
218 int res = 0;
219
220 fid = filp->private_data;
221 BUG_ON(fid == NULL);
222
223 posix_test_lock(filp, fl);
224 /*
225 * if we have a conflicting lock locally, no need to validate
226 * with server
227 */
228 if (fl->fl_type != F_UNLCK)
229 return res;
230
231 /* convert posix lock to p9 tgetlock args */
232 memset(&glock, 0, sizeof(glock));
233 glock.type = fl->fl_type;
234 glock.start = fl->fl_start;
235 if (fl->fl_end == OFFSET_MAX)
236 glock.length = 0;
237 else
238 glock.length = fl->fl_end - fl->fl_start + 1;
239 glock.proc_id = fl->fl_pid;
240 glock.client_id = utsname()->nodename;
241
242 res = p9_client_getlock_dotl(fid, &glock);
243 if (res < 0)
244 return res;
245 if (glock.type != F_UNLCK) {
246 fl->fl_type = glock.type;
247 fl->fl_start = glock.start;
248 if (glock.length == 0)
249 fl->fl_end = OFFSET_MAX;
250 else
251 fl->fl_end = glock.start + glock.length - 1;
252 fl->fl_pid = glock.proc_id;
253 } else
254 fl->fl_type = F_UNLCK;
255
256 return res;
257}
258
259/**
260 * v9fs_file_lock_dotl - lock a file (or directory)
261 * @filp: file to be locked
262 * @cmd: lock command
263 * @fl: file lock structure
264 *
265 */
266
267static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
268{
269 struct inode *inode = filp->f_path.dentry->d_inode;
270 int ret = -ENOLCK;
271
272 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
273 cmd, fl, filp->f_path.dentry->d_name.name);
274
275 /* No mandatory locks */
276 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
277 goto out_err;
278
279 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
280 filemap_write_and_wait(inode->i_mapping);
281 invalidate_mapping_pages(&inode->i_data, 0, -1);
282 }
283
284 if (IS_SETLK(cmd) || IS_SETLKW(cmd))
285 ret = v9fs_file_do_lock(filp, cmd, fl);
286 else if (IS_GETLK(cmd))
287 ret = v9fs_file_getlock(filp, fl);
288 else
289 ret = -EINVAL;
290out_err:
291 return ret;
292}
293
294/**
295 * v9fs_file_flock_dotl - lock a file
296 * @filp: file to be locked
297 * @cmd: lock command
298 * @fl: file lock structure
299 *
300 */
301
302static int v9fs_file_flock_dotl(struct file *filp, int cmd,
303 struct file_lock *fl)
304{
305 struct inode *inode = filp->f_path.dentry->d_inode;
306 int ret = -ENOLCK;
307
308 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
309 cmd, fl, filp->f_path.dentry->d_name.name);
310
311 /* No mandatory locks */
312 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
313 goto out_err;
314
315 if (!(fl->fl_flags & FL_FLOCK))
316 goto out_err;
317
318 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
319 filemap_write_and_wait(inode->i_mapping);
320 invalidate_mapping_pages(&inode->i_data, 0, -1);
321 }
322 /* Convert flock to posix lock */
323 fl->fl_owner = (fl_owner_t)filp;
324 fl->fl_start = 0;
325 fl->fl_end = OFFSET_MAX;
326 fl->fl_flags |= FL_POSIX;
327 fl->fl_flags ^= FL_FLOCK;
328
329 if (IS_SETLK(cmd) | IS_SETLKW(cmd))
330 ret = v9fs_file_do_lock(filp, cmd, fl);
331 else
332 ret = -EINVAL;
333out_err:
334 return ret;
335}
336
133/** 337/**
134 * v9fs_file_readn - read from a file 338 * v9fs_file_readn - read from a file
135 * @filp: file pointer to read 339 * @filp: file pointer to read
@@ -219,7 +423,9 @@ static ssize_t
219v9fs_file_write(struct file *filp, const char __user * data, 423v9fs_file_write(struct file *filp, const char __user * data,
220 size_t count, loff_t * offset) 424 size_t count, loff_t * offset)
221{ 425{
222 int n, rsize, total = 0; 426 ssize_t retval;
427 size_t total = 0;
428 int n;
223 struct p9_fid *fid; 429 struct p9_fid *fid;
224 struct p9_client *clnt; 430 struct p9_client *clnt;
225 struct inode *inode = filp->f_path.dentry->d_inode; 431 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -232,14 +438,19 @@ v9fs_file_write(struct file *filp, const char __user * data,
232 fid = filp->private_data; 438 fid = filp->private_data;
233 clnt = fid->clnt; 439 clnt = fid->clnt;
234 440
235 rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ; 441 retval = generic_write_checks(filp, &origin, &count, 0);
442 if (retval)
443 goto out;
236 444
237 do { 445 retval = -EINVAL;
238 if (count < rsize) 446 if ((ssize_t) count < 0)
239 rsize = count; 447 goto out;
448 retval = 0;
449 if (!count)
450 goto out;
240 451
241 n = p9_client_write(fid, NULL, data+total, origin+total, 452 do {
242 rsize); 453 n = p9_client_write(fid, NULL, data+total, origin+total, count);
243 if (n <= 0) 454 if (n <= 0)
244 break; 455 break;
245 count -= n; 456 count -= n;
@@ -258,9 +469,11 @@ v9fs_file_write(struct file *filp, const char __user * data,
258 } 469 }
259 470
260 if (n < 0) 471 if (n < 0)
261 return n; 472 retval = n;
262 473 else
263 return total; 474 retval = total;
475out:
476 return retval;
264} 477}
265 478
266static int v9fs_file_fsync(struct file *filp, int datasync) 479static int v9fs_file_fsync(struct file *filp, int datasync)
@@ -278,6 +491,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
278 return retval; 491 return retval;
279} 492}
280 493
494int v9fs_file_fsync_dotl(struct file *filp, int datasync)
495{
496 struct p9_fid *fid;
497 int retval;
498
499 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
500 filp, datasync);
501
502 fid = filp->private_data;
503
504 retval = p9_client_fsync(fid, datasync);
505 return retval;
506}
507
281static const struct file_operations v9fs_cached_file_operations = { 508static const struct file_operations v9fs_cached_file_operations = {
282 .llseek = generic_file_llseek, 509 .llseek = generic_file_llseek,
283 .read = do_sync_read, 510 .read = do_sync_read,
@@ -290,6 +517,19 @@ static const struct file_operations v9fs_cached_file_operations = {
290 .fsync = v9fs_file_fsync, 517 .fsync = v9fs_file_fsync,
291}; 518};
292 519
520static const struct file_operations v9fs_cached_file_operations_dotl = {
521 .llseek = generic_file_llseek,
522 .read = do_sync_read,
523 .aio_read = generic_file_aio_read,
524 .write = v9fs_file_write,
525 .open = v9fs_file_open,
526 .release = v9fs_dir_release,
527 .lock = v9fs_file_lock_dotl,
528 .flock = v9fs_file_flock_dotl,
529 .mmap = generic_file_readonly_mmap,
530 .fsync = v9fs_file_fsync_dotl,
531};
532
293const struct file_operations v9fs_file_operations = { 533const struct file_operations v9fs_file_operations = {
294 .llseek = generic_file_llseek, 534 .llseek = generic_file_llseek,
295 .read = v9fs_file_read, 535 .read = v9fs_file_read,
@@ -307,7 +547,8 @@ const struct file_operations v9fs_file_operations_dotl = {
307 .write = v9fs_file_write, 547 .write = v9fs_file_write,
308 .open = v9fs_file_open, 548 .open = v9fs_file_open,
309 .release = v9fs_dir_release, 549 .release = v9fs_dir_release,
310 .lock = v9fs_file_lock, 550 .lock = v9fs_file_lock_dotl,
551 .flock = v9fs_file_flock_dotl,
311 .mmap = generic_file_readonly_mmap, 552 .mmap = generic_file_readonly_mmap,
312 .fsync = v9fs_file_fsync, 553 .fsync = v9fs_file_fsync_dotl,
313}; 554};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9e670d527646..34bf71b56542 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -36,6 +36,7 @@
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/xattr.h> 38#include <linux/xattr.h>
39#include <linux/posix_acl.h>
39#include <net/9p/9p.h> 40#include <net/9p/9p.h>
40#include <net/9p/client.h> 41#include <net/9p/client.h>
41 42
@@ -44,6 +45,7 @@
44#include "fid.h" 45#include "fid.h"
45#include "cache.h" 46#include "cache.h"
46#include "xattr.h" 47#include "xattr.h"
48#include "acl.h"
47 49
48static const struct inode_operations v9fs_dir_inode_operations; 50static const struct inode_operations v9fs_dir_inode_operations;
49static const struct inode_operations v9fs_dir_inode_operations_dotu; 51static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -53,6 +55,10 @@ static const struct inode_operations v9fs_file_inode_operations_dotl;
53static const struct inode_operations v9fs_symlink_inode_operations; 55static const struct inode_operations v9fs_symlink_inode_operations;
54static const struct inode_operations v9fs_symlink_inode_operations_dotl; 56static const struct inode_operations v9fs_symlink_inode_operations_dotl;
55 57
58static int
59v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
60 dev_t rdev);
61
56/** 62/**
57 * unixmode2p9mode - convert unix mode bits to plan 9 63 * unixmode2p9mode - convert unix mode bits to plan 9
58 * @v9ses: v9fs session information 64 * @v9ses: v9fs session information
@@ -500,6 +506,11 @@ v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
500 v9fs_vcookie_set_qid(ret, &st->qid); 506 v9fs_vcookie_set_qid(ret, &st->qid);
501 v9fs_cache_inode_get_cookie(ret); 507 v9fs_cache_inode_get_cookie(ret);
502#endif 508#endif
509 err = v9fs_get_acl(ret, fid);
510 if (err) {
511 iput(ret);
512 goto error;
513 }
503 kfree(st); 514 kfree(st);
504 return ret; 515 return ret;
505error: 516error:
@@ -553,13 +564,6 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
553 return retval; 564 return retval;
554} 565}
555 566
556static int
557v9fs_open_created(struct inode *inode, struct file *file)
558{
559 return 0;
560}
561
562
563/** 567/**
564 * v9fs_create - Create a file 568 * v9fs_create - Create a file
565 * @v9ses: session information 569 * @v9ses: session information
@@ -655,29 +659,37 @@ error:
655 */ 659 */
656 660
657static int 661static int
658v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, 662v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
659 struct nameidata *nd) 663 struct nameidata *nd)
660{ 664{
661 int err = 0; 665 int err = 0;
662 char *name = NULL; 666 char *name = NULL;
663 gid_t gid; 667 gid_t gid;
664 int flags; 668 int flags;
669 mode_t mode;
665 struct v9fs_session_info *v9ses; 670 struct v9fs_session_info *v9ses;
666 struct p9_fid *fid = NULL; 671 struct p9_fid *fid = NULL;
667 struct p9_fid *dfid, *ofid; 672 struct p9_fid *dfid, *ofid;
668 struct file *filp; 673 struct file *filp;
669 struct p9_qid qid; 674 struct p9_qid qid;
670 struct inode *inode; 675 struct inode *inode;
676 struct posix_acl *pacl = NULL, *dacl = NULL;
671 677
672 v9ses = v9fs_inode2v9ses(dir); 678 v9ses = v9fs_inode2v9ses(dir);
673 if (nd && nd->flags & LOOKUP_OPEN) 679 if (nd && nd->flags & LOOKUP_OPEN)
674 flags = nd->intent.open.flags - 1; 680 flags = nd->intent.open.flags - 1;
675 else 681 else {
676 flags = O_RDWR; 682 /*
683 * create call without LOOKUP_OPEN is due
684 * to mknod of regular files. So use mknod
685 * operation.
686 */
687 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
688 }
677 689
678 name = (char *) dentry->d_name.name; 690 name = (char *) dentry->d_name.name;
679 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " 691 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
680 "mode:0x%x\n", name, flags, mode); 692 "mode:0x%x\n", name, flags, omode);
681 693
682 dfid = v9fs_fid_lookup(dentry->d_parent); 694 dfid = v9fs_fid_lookup(dentry->d_parent);
683 if (IS_ERR(dfid)) { 695 if (IS_ERR(dfid)) {
@@ -695,6 +707,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
695 } 707 }
696 708
697 gid = v9fs_get_fsgid_for_create(dir); 709 gid = v9fs_get_fsgid_for_create(dir);
710
711 mode = omode;
712 /* Update mode based on ACL value */
713 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
714 if (err) {
715 P9_DPRINTK(P9_DEBUG_VFS,
716 "Failed to get acl values in creat %d\n", err);
717 goto error;
718 }
698 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); 719 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
699 if (err < 0) { 720 if (err < 0) {
700 P9_DPRINTK(P9_DEBUG_VFS, 721 P9_DPRINTK(P9_DEBUG_VFS,
@@ -702,46 +723,52 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
702 err); 723 err);
703 goto error; 724 goto error;
704 } 725 }
726 /* instantiate inode and assign the unopened fid to the dentry */
727 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
728 (nd && nd->flags & LOOKUP_OPEN)) {
729 fid = p9_client_walk(dfid, 1, &name, 1);
730 if (IS_ERR(fid)) {
731 err = PTR_ERR(fid);
732 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
733 err);
734 fid = NULL;
735 goto error;
736 }
705 737
706 /* No need to populate the inode if we are not opening the file AND 738 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
707 * not in cached mode. 739 if (IS_ERR(inode)) {
708 */ 740 err = PTR_ERR(inode);
709 if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) { 741 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
710 /* Not in cached mode. No need to populate inode with stat */ 742 err);
711 dentry->d_op = &v9fs_dentry_operations; 743 goto error;
712 p9_client_clunk(ofid); 744 }
713 d_instantiate(dentry, NULL);
714 return 0;
715 }
716
717 /* Now walk from the parent so we can get an unopened fid. */
718 fid = p9_client_walk(dfid, 1, &name, 1);
719 if (IS_ERR(fid)) {
720 err = PTR_ERR(fid);
721 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
722 fid = NULL;
723 goto error;
724 }
725
726 /* instantiate inode and assign the unopened fid to dentry */
727 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
728 if (IS_ERR(inode)) {
729 err = PTR_ERR(inode);
730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
731 goto error;
732 }
733 if (v9ses->cache)
734 dentry->d_op = &v9fs_cached_dentry_operations; 745 dentry->d_op = &v9fs_cached_dentry_operations;
735 else 746 d_instantiate(dentry, inode);
747 err = v9fs_fid_add(dentry, fid);
748 if (err < 0)
749 goto error;
750 /* The fid would get clunked via a dput */
751 fid = NULL;
752 } else {
753 /*
754 * Not in cached mode. No need to populate
755 * inode with stat. We need to get an inode
756 * so that we can set the acl with dentry
757 */
758 inode = v9fs_get_inode(dir->i_sb, mode);
759 if (IS_ERR(inode)) {
760 err = PTR_ERR(inode);
761 goto error;
762 }
736 dentry->d_op = &v9fs_dentry_operations; 763 dentry->d_op = &v9fs_dentry_operations;
737 d_instantiate(dentry, inode); 764 d_instantiate(dentry, inode);
738 err = v9fs_fid_add(dentry, fid); 765 }
739 if (err < 0) 766 /* Now set the ACL based on the default value */
740 goto error; 767 v9fs_set_create_acl(dentry, dacl, pacl);
741 768
742 /* if we are opening a file, assign the open fid to the file */ 769 /* if we are opening a file, assign the open fid to the file */
743 if (nd && nd->flags & LOOKUP_OPEN) { 770 if (nd && nd->flags & LOOKUP_OPEN) {
744 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); 771 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
745 if (IS_ERR(filp)) { 772 if (IS_ERR(filp)) {
746 p9_client_clunk(ofid); 773 p9_client_clunk(ofid);
747 return PTR_ERR(filp); 774 return PTR_ERR(filp);
@@ -800,7 +827,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
800 827
801 /* if we are opening a file, assign the open fid to the file */ 828 /* if we are opening a file, assign the open fid to the file */
802 if (nd && nd->flags & LOOKUP_OPEN) { 829 if (nd && nd->flags & LOOKUP_OPEN) {
803 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); 830 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
804 if (IS_ERR(filp)) { 831 if (IS_ERR(filp)) {
805 err = PTR_ERR(filp); 832 err = PTR_ERR(filp);
806 goto error; 833 goto error;
@@ -859,23 +886,28 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
859 * 886 *
860 */ 887 */
861 888
862static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry, 889static int v9fs_vfs_mkdir_dotl(struct inode *dir,
863 int mode) 890 struct dentry *dentry, int omode)
864{ 891{
865 int err; 892 int err;
866 struct v9fs_session_info *v9ses; 893 struct v9fs_session_info *v9ses;
867 struct p9_fid *fid = NULL, *dfid = NULL; 894 struct p9_fid *fid = NULL, *dfid = NULL;
868 gid_t gid; 895 gid_t gid;
869 char *name; 896 char *name;
897 mode_t mode;
870 struct inode *inode; 898 struct inode *inode;
871 struct p9_qid qid; 899 struct p9_qid qid;
872 struct dentry *dir_dentry; 900 struct dentry *dir_dentry;
901 struct posix_acl *dacl = NULL, *pacl = NULL;
873 902
874 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 903 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
875 err = 0; 904 err = 0;
876 v9ses = v9fs_inode2v9ses(dir); 905 v9ses = v9fs_inode2v9ses(dir);
877 906
878 mode |= S_IFDIR; 907 omode |= S_IFDIR;
908 if (dir->i_mode & S_ISGID)
909 omode |= S_ISGID;
910
879 dir_dentry = v9fs_dentry_from_dir_inode(dir); 911 dir_dentry = v9fs_dentry_from_dir_inode(dir);
880 dfid = v9fs_fid_lookup(dir_dentry); 912 dfid = v9fs_fid_lookup(dir_dentry);
881 if (IS_ERR(dfid)) { 913 if (IS_ERR(dfid)) {
@@ -886,11 +918,14 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
886 } 918 }
887 919
888 gid = v9fs_get_fsgid_for_create(dir); 920 gid = v9fs_get_fsgid_for_create(dir);
889 if (gid < 0) { 921 mode = omode;
890 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); 922 /* Update mode based on ACL value */
923 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
924 if (err) {
925 P9_DPRINTK(P9_DEBUG_VFS,
926 "Failed to get acl values in mkdir %d\n", err);
891 goto error; 927 goto error;
892 } 928 }
893
894 name = (char *) dentry->d_name.name; 929 name = (char *) dentry->d_name.name;
895 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); 930 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
896 if (err < 0) 931 if (err < 0)
@@ -920,7 +955,23 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
920 if (err < 0) 955 if (err < 0)
921 goto error; 956 goto error;
922 fid = NULL; 957 fid = NULL;
958 } else {
959 /*
960 * Not in cached mode. No need to populate
961 * inode with stat. We need to get an inode
962 * so that we can set the acl with dentry
963 */
964 inode = v9fs_get_inode(dir->i_sb, mode);
965 if (IS_ERR(inode)) {
966 err = PTR_ERR(inode);
967 goto error;
968 }
969 dentry->d_op = &v9fs_dentry_operations;
970 d_instantiate(dentry, inode);
923 } 971 }
972 /* Now set the ACL based on the default value */
973 v9fs_set_create_acl(dentry, dacl, pacl);
974
924error: 975error:
925 if (fid) 976 if (fid)
926 p9_client_clunk(fid); 977 p9_client_clunk(fid);
@@ -979,7 +1030,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
979 1030
980 result = v9fs_fid_add(dentry, fid); 1031 result = v9fs_fid_add(dentry, fid);
981 if (result < 0) 1032 if (result < 0)
982 goto error; 1033 goto error_iput;
983 1034
984inst_out: 1035inst_out:
985 if (v9ses->cache) 1036 if (v9ses->cache)
@@ -990,6 +1041,8 @@ inst_out:
990 d_add(dentry, inode); 1041 d_add(dentry, inode);
991 return NULL; 1042 return NULL;
992 1043
1044error_iput:
1045 iput(inode);
993error: 1046error:
994 p9_client_clunk(fid); 1047 p9_client_clunk(fid);
995 1048
@@ -1237,7 +1290,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1237 * 1290 *
1238 */ 1291 */
1239 1292
1240static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) 1293int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1241{ 1294{
1242 int retval; 1295 int retval;
1243 struct v9fs_session_info *v9ses; 1296 struct v9fs_session_info *v9ses;
@@ -1279,6 +1332,12 @@ static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1279 1332
1280 setattr_copy(dentry->d_inode, iattr); 1333 setattr_copy(dentry->d_inode, iattr);
1281 mark_inode_dirty(dentry->d_inode); 1334 mark_inode_dirty(dentry->d_inode);
1335 if (iattr->ia_valid & ATTR_MODE) {
1336 /* We also want to update ACL when we update mode bits */
1337 retval = v9fs_acl_chmod(dentry);
1338 if (retval < 0)
1339 return retval;
1340 }
1282 return 0; 1341 return 0;
1283} 1342}
1284 1343
@@ -1473,7 +1532,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1473 if (IS_ERR(fid)) 1532 if (IS_ERR(fid))
1474 return PTR_ERR(fid); 1533 return PTR_ERR(fid);
1475 1534
1476 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) 1535 if (!v9fs_proto_dotu(v9ses))
1477 return -EBADF; 1536 return -EBADF;
1478 1537
1479 st = p9_client_stat(fid); 1538 st = p9_client_stat(fid);
@@ -1616,11 +1675,6 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1616 1675
1617 gid = v9fs_get_fsgid_for_create(dir); 1676 gid = v9fs_get_fsgid_for_create(dir);
1618 1677
1619 if (gid < 0) {
1620 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
1621 goto error;
1622 }
1623
1624 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ 1678 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
1625 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); 1679 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
1626 1680
@@ -1789,9 +1843,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1789 kfree(st); 1843 kfree(st);
1790 } else { 1844 } else {
1791 /* Caching disabled. No need to get upto date stat info. 1845 /* Caching disabled. No need to get upto date stat info.
1792 * This dentry will be released immediately. So, just i_count++ 1846 * This dentry will be released immediately. So, just hold the
1847 * inode
1793 */ 1848 */
1794 atomic_inc(&old_dentry->d_inode->i_count); 1849 ihold(old_dentry->d_inode);
1795 } 1850 }
1796 1851
1797 dentry->d_op = old_dentry->d_op; 1852 dentry->d_op = old_dentry->d_op;
@@ -1854,21 +1909,23 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1854 * 1909 *
1855 */ 1910 */
1856static int 1911static int
1857v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode, 1912v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1858 dev_t rdev) 1913 dev_t rdev)
1859{ 1914{
1860 int err; 1915 int err;
1861 char *name; 1916 char *name;
1917 mode_t mode;
1862 struct v9fs_session_info *v9ses; 1918 struct v9fs_session_info *v9ses;
1863 struct p9_fid *fid = NULL, *dfid = NULL; 1919 struct p9_fid *fid = NULL, *dfid = NULL;
1864 struct inode *inode; 1920 struct inode *inode;
1865 gid_t gid; 1921 gid_t gid;
1866 struct p9_qid qid; 1922 struct p9_qid qid;
1867 struct dentry *dir_dentry; 1923 struct dentry *dir_dentry;
1924 struct posix_acl *dacl = NULL, *pacl = NULL;
1868 1925
1869 P9_DPRINTK(P9_DEBUG_VFS, 1926 P9_DPRINTK(P9_DEBUG_VFS,
1870 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1927 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1871 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1928 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
1872 1929
1873 if (!new_valid_dev(rdev)) 1930 if (!new_valid_dev(rdev))
1874 return -EINVAL; 1931 return -EINVAL;
@@ -1884,11 +1941,14 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
1884 } 1941 }
1885 1942
1886 gid = v9fs_get_fsgid_for_create(dir); 1943 gid = v9fs_get_fsgid_for_create(dir);
1887 if (gid < 0) { 1944 mode = omode;
1888 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); 1945 /* Update mode based on ACL value */
1946 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
1947 if (err) {
1948 P9_DPRINTK(P9_DEBUG_VFS,
1949 "Failed to get acl values in mknod %d\n", err);
1889 goto error; 1950 goto error;
1890 } 1951 }
1891
1892 name = (char *) dentry->d_name.name; 1952 name = (char *) dentry->d_name.name;
1893 1953
1894 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); 1954 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
@@ -1932,13 +1992,68 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
1932 dentry->d_op = &v9fs_dentry_operations; 1992 dentry->d_op = &v9fs_dentry_operations;
1933 d_instantiate(dentry, inode); 1993 d_instantiate(dentry, inode);
1934 } 1994 }
1935 1995 /* Now set the ACL based on the default value */
1996 v9fs_set_create_acl(dentry, dacl, pacl);
1936error: 1997error:
1937 if (fid) 1998 if (fid)
1938 p9_client_clunk(fid); 1999 p9_client_clunk(fid);
1939 return err; 2000 return err;
1940} 2001}
1941 2002
2003static int
2004v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
2005{
2006 int retval;
2007 struct p9_fid *fid;
2008 char *target = NULL;
2009
2010 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
2011 retval = -EPERM;
2012 fid = v9fs_fid_lookup(dentry);
2013 if (IS_ERR(fid))
2014 return PTR_ERR(fid);
2015
2016 retval = p9_client_readlink(fid, &target);
2017 if (retval < 0)
2018 return retval;
2019
2020 strncpy(buffer, target, buflen);
2021 P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
2022
2023 retval = strnlen(buffer, buflen);
2024 return retval;
2025}
2026
2027/**
2028 * v9fs_vfs_follow_link_dotl - follow a symlink path
2029 * @dentry: dentry for symlink
2030 * @nd: nameidata
2031 *
2032 */
2033
2034static void *
2035v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
2036{
2037 int len = 0;
2038 char *link = __getname();
2039
2040 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
2041
2042 if (!link)
2043 link = ERR_PTR(-ENOMEM);
2044 else {
2045 len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
2046 if (len < 0) {
2047 __putname(link);
2048 link = ERR_PTR(len);
2049 } else
2050 link[min(len, PATH_MAX-1)] = 0;
2051 }
2052 nd_set_link(nd, link);
2053
2054 return NULL;
2055}
2056
1942static const struct inode_operations v9fs_dir_inode_operations_dotu = { 2057static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1943 .create = v9fs_vfs_create, 2058 .create = v9fs_vfs_create,
1944 .lookup = v9fs_vfs_lookup, 2059 .lookup = v9fs_vfs_lookup,
@@ -1969,7 +2084,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
1969 .getxattr = generic_getxattr, 2084 .getxattr = generic_getxattr,
1970 .removexattr = generic_removexattr, 2085 .removexattr = generic_removexattr,
1971 .listxattr = v9fs_listxattr, 2086 .listxattr = v9fs_listxattr,
1972 2087 .check_acl = v9fs_check_acl,
1973}; 2088};
1974 2089
1975static const struct inode_operations v9fs_dir_inode_operations = { 2090static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1996,6 +2111,7 @@ static const struct inode_operations v9fs_file_inode_operations_dotl = {
1996 .getxattr = generic_getxattr, 2111 .getxattr = generic_getxattr,
1997 .removexattr = generic_removexattr, 2112 .removexattr = generic_removexattr,
1998 .listxattr = v9fs_listxattr, 2113 .listxattr = v9fs_listxattr,
2114 .check_acl = v9fs_check_acl,
1999}; 2115};
2000 2116
2001static const struct inode_operations v9fs_symlink_inode_operations = { 2117static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -2007,8 +2123,8 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
2007}; 2123};
2008 2124
2009static const struct inode_operations v9fs_symlink_inode_operations_dotl = { 2125static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
2010 .readlink = generic_readlink, 2126 .readlink = v9fs_vfs_readlink_dotl,
2011 .follow_link = v9fs_vfs_follow_link, 2127 .follow_link = v9fs_vfs_follow_link_dotl,
2012 .put_link = v9fs_vfs_put_link, 2128 .put_link = v9fs_vfs_put_link,
2013 .getattr = v9fs_vfs_getattr_dotl, 2129 .getattr = v9fs_vfs_getattr_dotl,
2014 .setattr = v9fs_vfs_setattr_dotl, 2130 .setattr = v9fs_vfs_setattr_dotl,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 1d12ba0ed3db..c55c614500ad 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -39,6 +39,7 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/statfs.h> 41#include <linux/statfs.h>
42#include <linux/magic.h>
42#include <net/9p/9p.h> 43#include <net/9p/9p.h>
43#include <net/9p/client.h> 44#include <net/9p/client.h>
44 45
@@ -46,6 +47,7 @@
46#include "v9fs_vfs.h" 47#include "v9fs_vfs.h"
47#include "fid.h" 48#include "fid.h"
48#include "xattr.h" 49#include "xattr.h"
50#include "acl.h"
49 51
50static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; 52static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
51 53
@@ -66,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
66 * v9fs_fill_super - populate superblock with info 68 * v9fs_fill_super - populate superblock with info
67 * @sb: superblock 69 * @sb: superblock
68 * @v9ses: session information 70 * @v9ses: session information
69 * @flags: flags propagated from v9fs_get_sb() 71 * @flags: flags propagated from v9fs_mount()
70 * 72 *
71 */ 73 */
72 74
@@ -88,22 +90,25 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
88 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 90 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
89 MS_NOATIME; 91 MS_NOATIME;
90 92
93#ifdef CONFIG_9P_FS_POSIX_ACL
94 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
95 sb->s_flags |= MS_POSIXACL;
96#endif
97
91 save_mount_options(sb, data); 98 save_mount_options(sb, data);
92} 99}
93 100
94/** 101/**
95 * v9fs_get_sb - mount a superblock 102 * v9fs_mount - mount a superblock
96 * @fs_type: file system type 103 * @fs_type: file system type
97 * @flags: mount flags 104 * @flags: mount flags
98 * @dev_name: device name that was mounted 105 * @dev_name: device name that was mounted
99 * @data: mount options 106 * @data: mount options
100 * @mnt: mountpoint record to be instantiated
101 * 107 *
102 */ 108 */
103 109
104static int v9fs_get_sb(struct file_system_type *fs_type, int flags, 110static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
105 const char *dev_name, void *data, 111 const char *dev_name, void *data)
106 struct vfsmount *mnt)
107{ 112{
108 struct super_block *sb = NULL; 113 struct super_block *sb = NULL;
109 struct inode *inode = NULL; 114 struct inode *inode = NULL;
@@ -117,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
117 122
118 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 123 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
119 if (!v9ses) 124 if (!v9ses)
120 return -ENOMEM; 125 return ERR_PTR(-ENOMEM);
121 126
122 fid = v9fs_session_init(v9ses, dev_name, data); 127 fid = v9fs_session_init(v9ses, dev_name, data);
123 if (IS_ERR(fid)) { 128 if (IS_ERR(fid)) {
@@ -149,7 +154,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
149 goto release_sb; 154 goto release_sb;
150 } 155 }
151 sb->s_root = root; 156 sb->s_root = root;
152
153 if (v9fs_proto_dotl(v9ses)) { 157 if (v9fs_proto_dotl(v9ses)) {
154 struct p9_stat_dotl *st = NULL; 158 struct p9_stat_dotl *st = NULL;
155 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 159 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
@@ -174,19 +178,21 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
174 p9stat_free(st); 178 p9stat_free(st);
175 kfree(st); 179 kfree(st);
176 } 180 }
177 181 retval = v9fs_get_acl(inode, fid);
182 if (retval)
183 goto release_sb;
178 v9fs_fid_add(root, fid); 184 v9fs_fid_add(root, fid);
179 185
180 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 186 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
181 simple_set_mnt(mnt, sb); 187 return dget(sb->s_root);
182 return 0;
183 188
184clunk_fid: 189clunk_fid:
185 p9_client_clunk(fid); 190 p9_client_clunk(fid);
186close_session: 191close_session:
187 v9fs_session_close(v9ses); 192 v9fs_session_close(v9ses);
188 kfree(v9ses); 193 kfree(v9ses);
189 return retval; 194 return ERR_PTR(retval);
195
190release_sb: 196release_sb:
191 /* 197 /*
192 * we will do the session_close and root dentry release 198 * we will do the session_close and root dentry release
@@ -196,7 +202,7 @@ release_sb:
196 */ 202 */
197 p9_client_clunk(fid); 203 p9_client_clunk(fid);
198 deactivate_locked_super(sb); 204 deactivate_locked_super(sb);
199 return retval; 205 return ERR_PTR(retval);
200} 206}
201 207
202/** 208/**
@@ -249,7 +255,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
249 if (v9fs_proto_dotl(v9ses)) { 255 if (v9fs_proto_dotl(v9ses)) {
250 res = p9_client_statfs(fid, &rs); 256 res = p9_client_statfs(fid, &rs);
251 if (res == 0) { 257 if (res == 0) {
252 buf->f_type = rs.type; 258 buf->f_type = V9FS_MAGIC;
253 buf->f_bsize = rs.bsize; 259 buf->f_bsize = rs.bsize;
254 buf->f_blocks = rs.blocks; 260 buf->f_blocks = rs.blocks;
255 buf->f_bfree = rs.bfree; 261 buf->f_bfree = rs.bfree;
@@ -292,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
292 298
293struct file_system_type v9fs_fs_type = { 299struct file_system_type v9fs_fs_type = {
294 .name = "9p", 300 .name = "9p",
295 .get_sb = v9fs_get_sb, 301 .mount = v9fs_mount,
296 .kill_sb = v9fs_kill_super, 302 .kill_sb = v9fs_kill_super,
297 .owner = THIS_MODULE, 303 .owner = THIS_MODULE,
298 .fs_flags = FS_RENAME_DOES_D_MOVE, 304 .fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f88e5c2dc873..43ec7df84336 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -21,30 +21,13 @@
21#include "fid.h" 21#include "fid.h"
22#include "xattr.h" 22#include "xattr.h"
23 23
24/* 24ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
25 * v9fs_xattr_get() 25 void *buffer, size_t buffer_size)
26 *
27 * Copy an extended attribute into the buffer
28 * provided, or compute the buffer size required.
29 * Buffer is NULL to compute the size of the buffer required.
30 *
31 * Returns a negative error number on failure, or the number of bytes
32 * used / required on success.
33 */
34ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t buffer_size)
36{ 26{
37 ssize_t retval; 27 ssize_t retval;
38 int msize, read_count; 28 int msize, read_count;
39 u64 offset = 0, attr_size; 29 u64 offset = 0, attr_size;
40 struct p9_fid *fid, *attr_fid; 30 struct p9_fid *attr_fid;
41
42 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
43 __func__, name, buffer_size);
44
45 fid = v9fs_fid_lookup(dentry);
46 if (IS_ERR(fid))
47 return PTR_ERR(fid);
48 31
49 attr_fid = p9_client_xattrwalk(fid, name, &attr_size); 32 attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
50 if (IS_ERR(attr_fid)) { 33 if (IS_ERR(attr_fid)) {
@@ -88,6 +71,31 @@ error:
88 71
89} 72}
90 73
74
75/*
76 * v9fs_xattr_get()
77 *
78 * Copy an extended attribute into the buffer
79 * provided, or compute the buffer size required.
80 * Buffer is NULL to compute the size of the buffer required.
81 *
82 * Returns a negative error number on failure, or the number of bytes
83 * used / required on success.
84 */
85ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
86 void *buffer, size_t buffer_size)
87{
88 struct p9_fid *fid;
89
90 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
91 __func__, name, buffer_size);
92 fid = v9fs_fid_lookup(dentry);
93 if (IS_ERR(fid))
94 return PTR_ERR(fid);
95
96 return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
97}
98
91/* 99/*
92 * v9fs_xattr_set() 100 * v9fs_xattr_set()
93 * 101 *
@@ -156,5 +164,9 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
156 164
157const struct xattr_handler *v9fs_xattr_handlers[] = { 165const struct xattr_handler *v9fs_xattr_handlers[] = {
158 &v9fs_xattr_user_handler, 166 &v9fs_xattr_user_handler,
167#ifdef CONFIG_9P_FS_POSIX_ACL
168 &v9fs_xattr_acl_access_handler,
169 &v9fs_xattr_acl_default_handler,
170#endif
159 NULL 171 NULL
160}; 172};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index 9ddf672ae5c4..eaa837c53bd5 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -15,10 +15,16 @@
15#define FS_9P_XATTR_H 15#define FS_9P_XATTR_H
16 16
17#include <linux/xattr.h> 17#include <linux/xattr.h>
18#include <net/9p/9p.h>
19#include <net/9p/client.h>
18 20
19extern const struct xattr_handler *v9fs_xattr_handlers[]; 21extern const struct xattr_handler *v9fs_xattr_handlers[];
20extern struct xattr_handler v9fs_xattr_user_handler; 22extern struct xattr_handler v9fs_xattr_user_handler;
23extern const struct xattr_handler v9fs_xattr_acl_access_handler;
24extern const struct xattr_handler v9fs_xattr_acl_default_handler;
21 25
26extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
27 void *, size_t);
22extern ssize_t v9fs_xattr_get(struct dentry *, const char *, 28extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
23 void *, size_t); 29 void *, size_t);
24extern int v9fs_xattr_set(struct dentry *, const char *, 30extern int v9fs_xattr_set(struct dentry *, const char *,
diff --git a/fs/Kconfig b/fs/Kconfig
index 65781de44fc0..771f457402d4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,10 +47,12 @@ source "fs/nilfs2/Kconfig"
47 47
48endif # BLOCK 48endif # BLOCK
49 49
50config EXPORTFS
51 tristate
52
50config FILE_LOCKING 53config FILE_LOCKING
51 bool "Enable POSIX file locking API" if EMBEDDED 54 bool "Enable POSIX file locking API" if EMBEDDED
52 default y 55 default y
53 select BKL # while lockd still uses it.
54 help 56 help
55 This option enables standard file locking support, required 57 This option enables standard file locking support, required
56 for filesystems like NFS and for the flock() system 58 for filesystems like NFS and for the flock() system
@@ -60,7 +62,6 @@ source "fs/notify/Kconfig"
60 62
61source "fs/quota/Kconfig" 63source "fs/quota/Kconfig"
62 64
63source "fs/autofs/Kconfig"
64source "fs/autofs4/Kconfig" 65source "fs/autofs4/Kconfig"
65source "fs/fuse/Kconfig" 66source "fs/fuse/Kconfig"
66 67
@@ -222,9 +223,6 @@ config LOCKD_V4
222 depends on FILE_LOCKING 223 depends on FILE_LOCKING
223 default y 224 default y
224 225
225config EXPORTFS
226 tristate
227
228config NFS_ACL_SUPPORT 226config NFS_ACL_SUPPORT
229 tristate 227 tristate
230 select FS_POSIX_ACL 228 select FS_POSIX_ACL
@@ -235,7 +233,6 @@ config NFS_COMMON
235 default y 233 default y
236 234
237source "net/sunrpc/Kconfig" 235source "net/sunrpc/Kconfig"
238source "fs/smbfs/Kconfig"
239source "fs/ceph/Kconfig" 236source "fs/ceph/Kconfig"
240source "fs/cifs/Kconfig" 237source "fs/cifs/Kconfig"
241source "fs/ncpfs/Kconfig" 238source "fs/ncpfs/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index bb4cc5b8abc8..79e2ca7973b7 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC
42 42
43config CORE_DUMP_DEFAULT_ELF_HEADERS 43config CORE_DUMP_DEFAULT_ELF_HEADERS
44 bool "Write ELF core dumps with partial segments" 44 bool "Write ELF core dumps with partial segments"
45 default n 45 default y
46 depends on BINFMT_ELF && ELF_CORE 46 depends on BINFMT_ELF && ELF_CORE
47 help 47 help
48 ELF core dump files describe each memory mapping of the crashed 48 ELF core dump files describe each memory mapping of the crashed
@@ -60,7 +60,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS
60 inherited. See Documentation/filesystems/proc.txt for details. 60 inherited. See Documentation/filesystems/proc.txt for details.
61 61
62 This config option changes the default setting of coredump_filter 62 This config option changes the default setting of coredump_filter
63 seen at boot time. If unsure, say N. 63 seen at boot time. If unsure, say Y.
64 64
65config BINFMT_FLAT 65config BINFMT_FLAT
66 bool "Kernel support for flat binaries" 66 bool "Kernel support for flat binaries"
diff --git a/fs/Makefile b/fs/Makefile
index e6ec1d309b1d..a7f7cef0c0c8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
29obj-$(CONFIG_AIO) += aio.o 29obj-$(CONFIG_AIO) += aio.o
30obj-$(CONFIG_FILE_LOCKING) += locks.o 30obj-$(CONFIG_FILE_LOCKING) += locks.o
31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
32 32obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
33nfsd-$(CONFIG_NFSD) := nfsctl.o
34obj-y += $(nfsd-y) $(nfsd-m)
35
36obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o 33obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
37obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o 34obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
38obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o 35obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
@@ -91,7 +88,6 @@ obj-$(CONFIG_NFSD) += nfsd/
91obj-$(CONFIG_LOCKD) += lockd/ 88obj-$(CONFIG_LOCKD) += lockd/
92obj-$(CONFIG_NLS) += nls/ 89obj-$(CONFIG_NLS) += nls/
93obj-$(CONFIG_SYSV_FS) += sysv/ 90obj-$(CONFIG_SYSV_FS) += sysv/
94obj-$(CONFIG_SMB_FS) += smbfs/
95obj-$(CONFIG_CIFS) += cifs/ 91obj-$(CONFIG_CIFS) += cifs/
96obj-$(CONFIG_NCP_FS) += ncpfs/ 92obj-$(CONFIG_NCP_FS) += ncpfs/
97obj-$(CONFIG_HPFS_FS) += hpfs/ 93obj-$(CONFIG_HPFS_FS) += hpfs/
@@ -104,7 +100,6 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
104obj-$(CONFIG_AFFS_FS) += affs/ 100obj-$(CONFIG_AFFS_FS) += affs/
105obj-$(CONFIG_ROMFS_FS) += romfs/ 101obj-$(CONFIG_ROMFS_FS) += romfs/
106obj-$(CONFIG_QNX4FS_FS) += qnx4/ 102obj-$(CONFIG_QNX4FS_FS) += qnx4/
107obj-$(CONFIG_AUTOFS_FS) += autofs/
108obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 103obj-$(CONFIG_AUTOFS4_FS) += autofs4/
109obj-$(CONFIG_ADFS_FS) += adfs/ 104obj-$(CONFIG_ADFS_FS) += adfs/
110obj-$(CONFIG_FUSE_FS) += fuse/ 105obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236f..959dbff2d42d 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
490 return -EINVAL; 490 return -EINVAL;
491} 491}
492 492
493static int adfs_get_sb(struct file_system_type *fs_type, 493static struct dentry *adfs_mount(struct file_system_type *fs_type,
494 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 494 int flags, const char *dev_name, void *data)
495{ 495{
496 return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super, 496 return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
497 mnt);
498} 497}
499 498
500static struct file_system_type adfs_fs_type = { 499static struct file_system_type adfs_fs_type = {
501 .owner = THIS_MODULE, 500 .owner = THIS_MODULE,
502 .name = "adfs", 501 .name = "adfs",
503 .get_sb = adfs_get_sb, 502 .mount = adfs_mount,
504 .kill_sb = kill_block_super, 503 .kill_sb = kill_block_super,
505 .fs_flags = FS_REQUIRES_DEV, 504 .fs_flags = FS_REQUIRES_DEV,
506}; 505};
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c4a9875bd1a6..0a90dcd46de2 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -894,9 +894,9 @@ affs_truncate(struct inode *inode)
894 if (AFFS_SB(sb)->s_flags & SF_OFS) { 894 if (AFFS_SB(sb)->s_flags & SF_OFS) {
895 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0); 895 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
896 u32 tmp; 896 u32 tmp;
897 if (IS_ERR(ext_bh)) { 897 if (IS_ERR(bh)) {
898 affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)", 898 affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
899 ext, PTR_ERR(ext_bh)); 899 ext, PTR_ERR(bh));
900 return; 900 return;
901 } 901 }
902 tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next); 902 tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3a0fdec175ba..5d828903ac69 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); 388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
389 mark_buffer_dirty_inode(inode_bh, inode); 389 mark_buffer_dirty_inode(inode_bh, inode);
390 inode->i_nlink = 2; 390 inode->i_nlink = 2;
391 atomic_inc(&inode->i_count); 391 ihold(inode);
392 } 392 }
393 affs_fix_checksum(sb, bh); 393 affs_fix_checksum(sb, bh);
394 mark_buffer_dirty_inode(bh, inode); 394 mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238a..0cf7f4384cbd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
573 return 0; 573 return 0;
574} 574}
575 575
576static int affs_get_sb(struct file_system_type *fs_type, 576static struct dentry *affs_mount(struct file_system_type *fs_type,
577 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 577 int flags, const char *dev_name, void *data)
578{ 578{
579 return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super, 579 return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
580 mnt);
581} 580}
582 581
583static struct file_system_type affs_fs_type = { 582static struct file_system_type affs_fs_type = {
584 .owner = THIS_MODULE, 583 .owner = THIS_MODULE,
585 .name = "affs", 584 .name = "affs",
586 .get_sb = affs_get_sb, 585 .mount = affs_mount,
587 .kill_sb = kill_block_super, 586 .kill_sb = kill_block_super,
588 .fs_flags = FS_REQUIRES_DEV, 587 .fs_flags = FS_REQUIRES_DEV,
589}; 588};
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0d38c09bd55e..5439e1bc9a86 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
1045 if (ret < 0) 1045 if (ret < 0)
1046 goto link_error; 1046 goto link_error;
1047 1047
1048 atomic_inc(&vnode->vfs_inode.i_count); 1048 ihold(&vnode->vfs_inode);
1049 d_instantiate(dentry, &vnode->vfs_inode); 1049 d_instantiate(dentry, &vnode->vfs_inode);
1050 key_put(key); 1050 key_put(key);
1051 _leave(" = 0"); 1051 _leave(" = 0");
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d98ae0..27201cffece4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
30 30
31static void afs_i_init_once(void *foo); 31static void afs_i_init_once(void *foo);
32static int afs_get_sb(struct file_system_type *fs_type, 32static struct dentry *afs_mount(struct file_system_type *fs_type,
33 int flags, const char *dev_name, 33 int flags, const char *dev_name, void *data);
34 void *data, struct vfsmount *mnt);
35static struct inode *afs_alloc_inode(struct super_block *sb); 34static struct inode *afs_alloc_inode(struct super_block *sb);
36static void afs_put_super(struct super_block *sb); 35static void afs_put_super(struct super_block *sb);
37static void afs_destroy_inode(struct inode *inode); 36static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
40struct file_system_type afs_fs_type = { 39struct file_system_type afs_fs_type = {
41 .owner = THIS_MODULE, 40 .owner = THIS_MODULE,
42 .name = "afs", 41 .name = "afs",
43 .get_sb = afs_get_sb, 42 .mount = afs_mount,
44 .kill_sb = kill_anon_super, 43 .kill_sb = kill_anon_super,
45 .fs_flags = 0, 44 .fs_flags = 0,
46}; 45};
@@ -359,11 +358,8 @@ error:
359/* 358/*
360 * get an AFS superblock 359 * get an AFS superblock
361 */ 360 */
362static int afs_get_sb(struct file_system_type *fs_type, 361static struct dentry *afs_mount(struct file_system_type *fs_type,
363 int flags, 362 int flags, const char *dev_name, void *options)
364 const char *dev_name,
365 void *options,
366 struct vfsmount *mnt)
367{ 363{
368 struct afs_mount_params params; 364 struct afs_mount_params params;
369 struct super_block *sb; 365 struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
427 ASSERTCMP(sb->s_flags, &, MS_ACTIVE); 423 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
428 } 424 }
429 425
430 simple_set_mnt(mnt, sb);
431 afs_put_volume(params.volume); 426 afs_put_volume(params.volume);
432 afs_put_cell(params.cell); 427 afs_put_cell(params.cell);
433 kfree(new_opts); 428 kfree(new_opts);
434 _leave(" = 0 [%p]", sb); 429 _leave(" = 0 [%p]", sb);
435 return 0; 430 return dget(sb->s_root);
436 431
437error: 432error:
438 afs_put_volume(params.volume); 433 afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
440 key_put(params.key); 435 key_put(params.key);
441 kfree(new_opts); 436 kfree(new_opts);
442 _leave(" = %d", ret); 437 _leave(" = %d", ret);
443 return ret; 438 return ERR_PTR(ret);
444} 439}
445 440
446/* 441/*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 722743b152d8..15690bb1d3b5 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -438,7 +438,6 @@ no_more:
438 */ 438 */
439int afs_writepage(struct page *page, struct writeback_control *wbc) 439int afs_writepage(struct page *page, struct writeback_control *wbc)
440{ 440{
441 struct backing_dev_info *bdi = page->mapping->backing_dev_info;
442 struct afs_writeback *wb; 441 struct afs_writeback *wb;
443 int ret; 442 int ret;
444 443
@@ -455,8 +454,6 @@ int afs_writepage(struct page *page, struct writeback_control *wbc)
455 } 454 }
456 455
457 wbc->nr_to_write -= ret; 456 wbc->nr_to_write -= ret;
458 if (wbc->nonblocking && bdi_write_congested(bdi))
459 wbc->encountered_congestion = 1;
460 457
461 _leave(" = 0"); 458 _leave(" = 0");
462 return 0; 459 return 0;
@@ -469,7 +466,6 @@ static int afs_writepages_region(struct address_space *mapping,
469 struct writeback_control *wbc, 466 struct writeback_control *wbc,
470 pgoff_t index, pgoff_t end, pgoff_t *_next) 467 pgoff_t index, pgoff_t end, pgoff_t *_next)
471{ 468{
472 struct backing_dev_info *bdi = mapping->backing_dev_info;
473 struct afs_writeback *wb; 469 struct afs_writeback *wb;
474 struct page *page; 470 struct page *page;
475 int ret, n; 471 int ret, n;
@@ -529,11 +525,6 @@ static int afs_writepages_region(struct address_space *mapping,
529 525
530 wbc->nr_to_write -= ret; 526 wbc->nr_to_write -= ret;
531 527
532 if (wbc->nonblocking && bdi_write_congested(bdi)) {
533 wbc->encountered_congestion = 1;
534 break;
535 }
536
537 cond_resched(); 528 cond_resched();
538 } while (index < end && wbc->nr_to_write > 0); 529 } while (index < end && wbc->nr_to_write > 0);
539 530
@@ -548,24 +539,16 @@ static int afs_writepages_region(struct address_space *mapping,
548int afs_writepages(struct address_space *mapping, 539int afs_writepages(struct address_space *mapping,
549 struct writeback_control *wbc) 540 struct writeback_control *wbc)
550{ 541{
551 struct backing_dev_info *bdi = mapping->backing_dev_info;
552 pgoff_t start, end, next; 542 pgoff_t start, end, next;
553 int ret; 543 int ret;
554 544
555 _enter(""); 545 _enter("");
556 546
557 if (wbc->nonblocking && bdi_write_congested(bdi)) {
558 wbc->encountered_congestion = 1;
559 _leave(" = 0 [congest]");
560 return 0;
561 }
562
563 if (wbc->range_cyclic) { 547 if (wbc->range_cyclic) {
564 start = mapping->writeback_index; 548 start = mapping->writeback_index;
565 end = -1; 549 end = -1;
566 ret = afs_writepages_region(mapping, wbc, start, end, &next); 550 ret = afs_writepages_region(mapping, wbc, start, end, &next);
567 if (start > 0 && wbc->nr_to_write > 0 && ret == 0 && 551 if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
568 !(wbc->nonblocking && wbc->encountered_congestion))
569 ret = afs_writepages_region(mapping, wbc, 0, start, 552 ret = afs_writepages_region(mapping, wbc, 0, start,
570 &next); 553 &next);
571 mapping->writeback_index = next; 554 mapping->writeback_index = next;
diff --git a/fs/aio.c b/fs/aio.c
index 250b0a73c8a8..8c8f6c5b6d79 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1543,7 +1543,19 @@ static void aio_batch_add(struct address_space *mapping,
1543 } 1543 }
1544 1544
1545 abe = mempool_alloc(abe_pool, GFP_KERNEL); 1545 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1546 BUG_ON(!igrab(mapping->host)); 1546
1547 /*
1548 * we should be using igrab here, but
1549 * we don't want to hammer on the global
1550 * inode spinlock just to take an extra
1551 * reference on a file that we must already
1552 * have a reference to.
1553 *
1554 * When we're called, we always have a reference
1555 * on the file, so we must always have a reference
1556 * on the inode, so ihold() is safe here.
1557 */
1558 ihold(mapping->host);
1547 abe->mapping = mapping; 1559 abe->mapping = mapping;
1548 hlist_add_head(&abe->list, &batch_hash[bucket]); 1560 hlist_add_head(&abe->list, &batch_hash[bucket]);
1549 return; 1561 return;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e4b75d6eda83..57ce55b2564c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
26static struct inode *anon_inode_inode; 26static struct inode *anon_inode_inode;
27static const struct file_operations anon_inode_fops; 27static const struct file_operations anon_inode_fops;
28 28
29static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, 29static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
30 const char *dev_name, void *data, 30 int flags, const char *dev_name, void *data)
31 struct vfsmount *mnt)
32{ 31{
33 return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, 32 return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
34 mnt);
35} 33}
36 34
37/* 35/*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
45 43
46static struct file_system_type anon_inode_fs_type = { 44static struct file_system_type anon_inode_fs_type = {
47 .name = "anon_inodefs", 45 .name = "anon_inodefs",
48 .get_sb = anon_inodefs_get_sb, 46 .mount = anon_inodefs_mount,
49 .kill_sb = kill_anon_super, 47 .kill_sb = kill_anon_super,
50}; 48};
51static const struct dentry_operations anon_inodefs_dentry_operations = { 49static const struct dentry_operations anon_inodefs_dentry_operations = {
@@ -111,10 +109,9 @@ struct file *anon_inode_getfile(const char *name,
111 path.mnt = mntget(anon_inode_mnt); 109 path.mnt = mntget(anon_inode_mnt);
112 /* 110 /*
113 * We know the anon_inode inode count is always greater than zero, 111 * We know the anon_inode inode count is always greater than zero,
114 * so we can avoid doing an igrab() and we can use an open-coded 112 * so ihold() is safe.
115 * atomic_inc().
116 */ 113 */
117 atomic_inc(&anon_inode_inode->i_count); 114 ihold(anon_inode_inode);
118 115
119 path.dentry->d_op = &anon_inodefs_dentry_operations; 116 path.dentry->d_op = &anon_inodefs_dentry_operations;
120 d_instantiate(path.dentry, anon_inode_inode); 117 d_instantiate(path.dentry, anon_inode_inode);
@@ -194,6 +191,7 @@ static struct inode *anon_inode_mkinode(void)
194 if (!inode) 191 if (!inode)
195 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
196 193
194 inode->i_ino = get_next_ino();
197 inode->i_fop = &anon_inode_fops; 195 inode->i_fop = &anon_inode_fops;
198 196
199 inode->i_mapping->a_ops = &anon_aops; 197 inode->i_mapping->a_ops = &anon_aops;
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
deleted file mode 100644
index 480e210c83ab..000000000000
--- a/fs/autofs/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
1config AUTOFS_FS
2 tristate "Kernel automounter support"
3 depends on BKL # unfixable, just use autofs4
4 help
5 The automounter is a tool to automatically mount remote file systems
6 on demand. This implementation is partially kernel-based to reduce
7 overhead in the already-mounted case; this is unlike the BSD
8 automounter (amd), which is a pure user space daemon.
9
10 To use the automounter you need the user-space tools from the autofs
11 package; you can find the location in <file:Documentation/Changes>.
12 You also want to answer Y to "NFS file system support", below.
13
14 If you want to use the newer version of the automounter with more
15 features, say N here and say Y to "Kernel automounter v4 support",
16 below.
17
18 To compile this support as a module, choose M here: the module will be
19 called autofs.
20
21 If you are not a part of a fairly large, distributed network, you
22 probably do not need an automounter, and can say N here.
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
deleted file mode 100644
index 453a60f46d05..000000000000
--- a/fs/autofs/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the linux autofs-filesystem routines.
3#
4
5obj-$(CONFIG_AUTOFS_FS) += autofs.o
6
7autofs-objs := dirhash.o init.o inode.o root.o symlink.o waitq.o
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
deleted file mode 100644
index 901a3e67ec45..000000000000
--- a/fs/autofs/autofs_i.h
+++ /dev/null
@@ -1,165 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * linux/fs/autofs/autofs_i.h
4 *
5 * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/* Internal header file for autofs */
14
15#include <linux/auto_fs.h>
16
17/* This is the range of ioctl() numbers we claim as ours */
18#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
19#define AUTOFS_IOC_COUNT 32
20
21#include <linux/kernel.h>
22#include <linux/slab.h>
23#include <linux/time.h>
24#include <linux/string.h>
25#include <linux/wait.h>
26#include <linux/dcache.h>
27#include <linux/namei.h>
28#include <linux/mount.h>
29#include <linux/sched.h>
30
31#include <asm/current.h>
32#include <asm/uaccess.h>
33
34#ifdef DEBUG
35#define DPRINTK(D) (printk D)
36#else
37#define DPRINTK(D) ((void)0)
38#endif
39
40/*
41 * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
42 * kernel will keep the negative response cached for up to the time given
43 * here, although the time can be shorter if the kernel throws the dcache
44 * entry away. This probably should be settable from user space.
45 */
46#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ) /* 1 minute */
47
48/* Structures associated with the root directory hash table */
49
50#define AUTOFS_HASH_SIZE 67
51
52struct autofs_dir_ent {
53 int hash;
54 char *name;
55 int len;
56 ino_t ino;
57 struct dentry *dentry;
58 /* Linked list of entries */
59 struct autofs_dir_ent *next;
60 struct autofs_dir_ent **back;
61 /* The following entries are for the expiry system */
62 unsigned long last_usage;
63 struct list_head exp;
64};
65
66struct autofs_dirhash {
67 struct autofs_dir_ent *h[AUTOFS_HASH_SIZE];
68 struct list_head expiry_head;
69};
70
71struct autofs_wait_queue {
72 wait_queue_head_t queue;
73 struct autofs_wait_queue *next;
74 autofs_wqt_t wait_queue_token;
75 /* We use the following to see what we are waiting for */
76 int hash;
77 int len;
78 char *name;
79 /* This is for status reporting upon return */
80 int status;
81 int wait_ctr;
82};
83
84struct autofs_symlink {
85 char *data;
86 int len;
87 time_t mtime;
88};
89
90#define AUTOFS_MAX_SYMLINKS 256
91
92#define AUTOFS_ROOT_INO 1
93#define AUTOFS_FIRST_SYMLINK 2
94#define AUTOFS_FIRST_DIR_INO (AUTOFS_FIRST_SYMLINK+AUTOFS_MAX_SYMLINKS)
95
96#define AUTOFS_SYMLINK_BITMAP_LEN \
97 ((AUTOFS_MAX_SYMLINKS+((sizeof(long)*1)-1))/(sizeof(long)*8))
98
99#define AUTOFS_SBI_MAGIC 0x6d4a556d
100
101struct autofs_sb_info {
102 u32 magic;
103 struct file *pipe;
104 struct pid *oz_pgrp;
105 int catatonic;
106 struct super_block *sb;
107 unsigned long exp_timeout;
108 ino_t next_dir_ino;
109 struct autofs_wait_queue *queues; /* Wait queue pointer */
110 struct autofs_dirhash dirhash; /* Root directory hash */
111 struct autofs_symlink symlink[AUTOFS_MAX_SYMLINKS];
112 unsigned long symlink_bitmap[AUTOFS_SYMLINK_BITMAP_LEN];
113};
114
115static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
116{
117 return (struct autofs_sb_info *)(sb->s_fs_info);
118}
119
120/* autofs_oz_mode(): do we see the man behind the curtain? (The
121 processes which do manipulations for us in user space sees the raw
122 filesystem without "magic".) */
123
124static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
125 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
126}
127
128/* Hash operations */
129
130void autofs_initialize_hash(struct autofs_dirhash *);
131struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *,struct qstr *);
132void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
133void autofs_hash_delete(struct autofs_dir_ent *);
134struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
135void autofs_hash_dputall(struct autofs_dirhash *);
136void autofs_hash_nuke(struct autofs_sb_info *);
137
138/* Expiration-handling functions */
139
140void autofs_update_usage(struct autofs_dirhash *,struct autofs_dir_ent *);
141struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *, struct vfsmount *mnt);
142
143/* Operations structures */
144
145extern const struct inode_operations autofs_root_inode_operations;
146extern const struct inode_operations autofs_symlink_inode_operations;
147extern const struct file_operations autofs_root_operations;
148
149/* Initializing function */
150
151int autofs_fill_super(struct super_block *, void *, int);
152void autofs_kill_sb(struct super_block *sb);
153struct inode *autofs_iget(struct super_block *, unsigned long);
154
155/* Queue management functions */
156
157int autofs_wait(struct autofs_sb_info *,struct qstr *);
158int autofs_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
159void autofs_catatonic_mode(struct autofs_sb_info *);
160
161#ifdef DEBUG
162void autofs_say(const char *name, int len);
163#else
164#define autofs_say(n,l) ((void)0)
165#endif
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
deleted file mode 100644
index e947915109e5..000000000000
--- a/fs/autofs/dirhash.c
+++ /dev/null
@@ -1,250 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/dirhash.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include "autofs_i.h"
14
15/* Functions for maintenance of expiry queue */
16
17static void autofs_init_usage(struct autofs_dirhash *dh,
18 struct autofs_dir_ent *ent)
19{
20 list_add_tail(&ent->exp, &dh->expiry_head);
21 ent->last_usage = jiffies;
22}
23
24static void autofs_delete_usage(struct autofs_dir_ent *ent)
25{
26 list_del(&ent->exp);
27}
28
29void autofs_update_usage(struct autofs_dirhash *dh,
30 struct autofs_dir_ent *ent)
31{
32 autofs_delete_usage(ent); /* Unlink from current position */
33 autofs_init_usage(dh,ent); /* Relink at queue tail */
34}
35
36struct autofs_dir_ent *autofs_expire(struct super_block *sb,
37 struct autofs_sb_info *sbi,
38 struct vfsmount *mnt)
39{
40 struct autofs_dirhash *dh = &sbi->dirhash;
41 struct autofs_dir_ent *ent;
42 unsigned long timeout = sbi->exp_timeout;
43
44 while (1) {
45 struct path path;
46 int umount_ok;
47
48 if ( list_empty(&dh->expiry_head) || sbi->catatonic )
49 return NULL; /* No entries */
50 /* We keep the list sorted by last_usage and want old stuff */
51 ent = list_entry(dh->expiry_head.next, struct autofs_dir_ent, exp);
52 if (jiffies - ent->last_usage < timeout)
53 break;
54 /* Move to end of list in case expiry isn't desirable */
55 autofs_update_usage(dh, ent);
56
57 /* Check to see that entry is expirable */
58 if ( ent->ino < AUTOFS_FIRST_DIR_INO )
59 return ent; /* Symlinks are always expirable */
60
61 /* Get the dentry for the autofs subdirectory */
62 path.dentry = ent->dentry;
63
64 if (!path.dentry) {
65 /* Should only happen in catatonic mode */
66 printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
67 autofs_delete_usage(ent);
68 continue;
69 }
70
71 if (!path.dentry->d_inode) {
72 dput(path.dentry);
73 printk("autofs: negative dentry on expiry queue: %s\n",
74 ent->name);
75 autofs_delete_usage(ent);
76 continue;
77 }
78
79 /* Make sure entry is mounted and unused; note that dentry will
80 point to the mounted-on-top root. */
81 if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
82 !d_mountpoint(path.dentry)) {
83 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
84 continue;
85 }
86 path.mnt = mnt;
87 path_get(&path);
88 if (!follow_down(&path)) {
89 path_put(&path);
90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
91 continue;
92 }
93 while (d_mountpoint(path.dentry) && follow_down(&path))
94 ;
95 umount_ok = may_umount(path.mnt);
96 path_put(&path);
97
98 if (umount_ok) {
99 DPRINTK(("autofs: signaling expire on %s\n", ent->name));
100 return ent; /* Expirable! */
101 }
102 DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
103 }
104 return NULL; /* No expirable entries */
105}
106
107void autofs_initialize_hash(struct autofs_dirhash *dh) {
108 memset(&dh->h, 0, AUTOFS_HASH_SIZE*sizeof(struct autofs_dir_ent *));
109 INIT_LIST_HEAD(&dh->expiry_head);
110}
111
112struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *dh, struct qstr *name)
113{
114 struct autofs_dir_ent *dhn;
115
116 DPRINTK(("autofs_hash_lookup: hash = 0x%08x, name = ", name->hash));
117 autofs_say(name->name,name->len);
118
119 for ( dhn = dh->h[(unsigned) name->hash % AUTOFS_HASH_SIZE] ; dhn ; dhn = dhn->next ) {
120 if ( name->hash == dhn->hash &&
121 name->len == dhn->len &&
122 !memcmp(name->name, dhn->name, name->len) )
123 break;
124 }
125
126 return dhn;
127}
128
129void autofs_hash_insert(struct autofs_dirhash *dh, struct autofs_dir_ent *ent)
130{
131 struct autofs_dir_ent **dhnp;
132
133 DPRINTK(("autofs_hash_insert: hash = 0x%08x, name = ", ent->hash));
134 autofs_say(ent->name,ent->len);
135
136 autofs_init_usage(dh,ent);
137 if (ent->dentry)
138 dget(ent->dentry);
139
140 dhnp = &dh->h[(unsigned) ent->hash % AUTOFS_HASH_SIZE];
141 ent->next = *dhnp;
142 ent->back = dhnp;
143 *dhnp = ent;
144 if ( ent->next )
145 ent->next->back = &(ent->next);
146}
147
148void autofs_hash_delete(struct autofs_dir_ent *ent)
149{
150 *(ent->back) = ent->next;
151 if ( ent->next )
152 ent->next->back = ent->back;
153
154 autofs_delete_usage(ent);
155
156 if ( ent->dentry )
157 dput(ent->dentry);
158 kfree(ent->name);
159 kfree(ent);
160}
161
162/*
163 * Used by readdir(). We must validate "ptr", so we can't simply make it
164 * a pointer. Values below 0xffff are reserved; calling with any value
165 * <= 0x10000 will return the first entry found.
166 *
167 * "last" can be NULL or the value returned by the last search *if* we
168 * want the next sequential entry.
169 */
170struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *dh,
171 off_t *ptr, struct autofs_dir_ent *last)
172{
173 int bucket, ecount, i;
174 struct autofs_dir_ent *ent;
175
176 bucket = (*ptr >> 16) - 1;
177 ecount = *ptr & 0xffff;
178
179 if ( bucket < 0 ) {
180 bucket = ecount = 0;
181 }
182
183 DPRINTK(("autofs_hash_enum: bucket %d, entry %d\n", bucket, ecount));
184
185 ent = last ? last->next : NULL;
186
187 if ( ent ) {
188 ecount++;
189 } else {
190 while ( bucket < AUTOFS_HASH_SIZE ) {
191 ent = dh->h[bucket];
192 for ( i = ecount ; ent && i ; i-- )
193 ent = ent->next;
194
195 if (ent) {
196 ecount++; /* Point to *next* entry */
197 break;
198 }
199
200 bucket++; ecount = 0;
201 }
202 }
203
204#ifdef DEBUG
205 if ( !ent )
206 printk("autofs_hash_enum: nothing found\n");
207 else {
208 printk("autofs_hash_enum: found hash %08x, name", ent->hash);
209 autofs_say(ent->name,ent->len);
210 }
211#endif
212
213 *ptr = ((bucket+1) << 16) + ecount;
214 return ent;
215}
216
217/* Iterate over all the ents, and remove all dentry pointers. Used on
218 entering catatonic mode, in order to make the filesystem unmountable. */
219void autofs_hash_dputall(struct autofs_dirhash *dh)
220{
221 int i;
222 struct autofs_dir_ent *ent;
223
224 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
225 for ( ent = dh->h[i] ; ent ; ent = ent->next ) {
226 if ( ent->dentry ) {
227 dput(ent->dentry);
228 ent->dentry = NULL;
229 }
230 }
231 }
232}
233
234/* Delete everything. This is used on filesystem destruction, so we
235 make no attempt to keep the pointers valid */
236void autofs_hash_nuke(struct autofs_sb_info *sbi)
237{
238 int i;
239 struct autofs_dir_ent *ent, *nent;
240
241 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
242 for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
243 nent = ent->next;
244 if ( ent->dentry )
245 dput(ent->dentry);
246 kfree(ent->name);
247 kfree(ent);
248 }
249 }
250}
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
deleted file mode 100644
index cea5219b4f37..000000000000
--- a/fs/autofs/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/init.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include "autofs_i.h"
16
17static int autofs_get_sb(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
19{
20 return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
21}
22
23static struct file_system_type autofs_fs_type = {
24 .owner = THIS_MODULE,
25 .name = "autofs",
26 .get_sb = autofs_get_sb,
27 .kill_sb = autofs_kill_sb,
28};
29
30static int __init init_autofs_fs(void)
31{
32 return register_filesystem(&autofs_fs_type);
33}
34
35static void __exit exit_autofs_fs(void)
36{
37 unregister_filesystem(&autofs_fs_type);
38}
39
40module_init(init_autofs_fs);
41module_exit(exit_autofs_fs);
42
43#ifdef DEBUG
44void autofs_say(const char *name, int len)
45{
46 printk("(%d: ", len);
47 while ( len-- )
48 printk("%c", *name++);
49 printk(")\n");
50}
51#endif
52MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
deleted file mode 100644
index e1734f2d6e26..000000000000
--- a/fs/autofs/inode.c
+++ /dev/null
@@ -1,288 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/inode.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/kernel.h>
14#include <linux/mm.h>
15#include <linux/slab.h>
16#include <linux/file.h>
17#include <linux/parser.h>
18#include <linux/bitops.h>
19#include <linux/magic.h>
20#include "autofs_i.h"
21#include <linux/module.h>
22
23void autofs_kill_sb(struct super_block *sb)
24{
25 struct autofs_sb_info *sbi = autofs_sbi(sb);
26 unsigned int n;
27
28 /*
29 * In the event of a failure in get_sb_nodev the superblock
30 * info is not present so nothing else has been setup, so
31 * just call kill_anon_super when we are called from
32 * deactivate_super.
33 */
34 if (!sbi)
35 goto out_kill_sb;
36
37 if (!sbi->catatonic)
38 autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
39
40 put_pid(sbi->oz_pgrp);
41
42 autofs_hash_nuke(sbi);
43 for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) {
44 if (test_bit(n, sbi->symlink_bitmap))
45 kfree(sbi->symlink[n].data);
46 }
47
48 kfree(sb->s_fs_info);
49
50out_kill_sb:
51 DPRINTK(("autofs: shutting down\n"));
52 kill_anon_super(sb);
53}
54
55static const struct super_operations autofs_sops = {
56 .statfs = simple_statfs,
57 .show_options = generic_show_options,
58};
59
60enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
61
62static const match_table_t autofs_tokens = {
63 {Opt_fd, "fd=%u"},
64 {Opt_uid, "uid=%u"},
65 {Opt_gid, "gid=%u"},
66 {Opt_pgrp, "pgrp=%u"},
67 {Opt_minproto, "minproto=%u"},
68 {Opt_maxproto, "maxproto=%u"},
69 {Opt_err, NULL}
70};
71
72static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
73 pid_t *pgrp, int *minproto, int *maxproto)
74{
75 char *p;
76 substring_t args[MAX_OPT_ARGS];
77 int option;
78
79 *uid = current_uid();
80 *gid = current_gid();
81 *pgrp = task_pgrp_nr(current);
82
83 *minproto = *maxproto = AUTOFS_PROTO_VERSION;
84
85 *pipefd = -1;
86
87 if (!options)
88 return 1;
89
90 while ((p = strsep(&options, ",")) != NULL) {
91 int token;
92 if (!*p)
93 continue;
94
95 token = match_token(p, autofs_tokens, args);
96 switch (token) {
97 case Opt_fd:
98 if (match_int(&args[0], &option))
99 return 1;
100 *pipefd = option;
101 break;
102 case Opt_uid:
103 if (match_int(&args[0], &option))
104 return 1;
105 *uid = option;
106 break;
107 case Opt_gid:
108 if (match_int(&args[0], &option))
109 return 1;
110 *gid = option;
111 break;
112 case Opt_pgrp:
113 if (match_int(&args[0], &option))
114 return 1;
115 *pgrp = option;
116 break;
117 case Opt_minproto:
118 if (match_int(&args[0], &option))
119 return 1;
120 *minproto = option;
121 break;
122 case Opt_maxproto:
123 if (match_int(&args[0], &option))
124 return 1;
125 *maxproto = option;
126 break;
127 default:
128 return 1;
129 }
130 }
131 return (*pipefd < 0);
132}
133
134int autofs_fill_super(struct super_block *s, void *data, int silent)
135{
136 struct inode * root_inode;
137 struct dentry * root;
138 struct file * pipe;
139 int pipefd;
140 struct autofs_sb_info *sbi;
141 int minproto, maxproto;
142 pid_t pgid;
143
144 save_mount_options(s, data);
145
146 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
147 if (!sbi)
148 goto fail_unlock;
149 DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
150
151 s->s_fs_info = sbi;
152 sbi->magic = AUTOFS_SBI_MAGIC;
153 sbi->pipe = NULL;
154 sbi->catatonic = 1;
155 sbi->exp_timeout = 0;
156 autofs_initialize_hash(&sbi->dirhash);
157 sbi->queues = NULL;
158 memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
159 sbi->next_dir_ino = AUTOFS_FIRST_DIR_INO;
160 s->s_blocksize = 1024;
161 s->s_blocksize_bits = 10;
162 s->s_magic = AUTOFS_SUPER_MAGIC;
163 s->s_op = &autofs_sops;
164 s->s_time_gran = 1;
165 sbi->sb = s;
166
167 root_inode = autofs_iget(s, AUTOFS_ROOT_INO);
168 if (IS_ERR(root_inode))
169 goto fail_free;
170 root = d_alloc_root(root_inode);
171 pipe = NULL;
172
173 if (!root)
174 goto fail_iput;
175
176 /* Can this call block? - WTF cares? s is locked. */
177 if (parse_options(data, &pipefd, &root_inode->i_uid,
178 &root_inode->i_gid, &pgid, &minproto,
179 &maxproto)) {
180 printk("autofs: called with bogus options\n");
181 goto fail_dput;
182 }
183
184 /* Couldn't this be tested earlier? */
185 if (minproto > AUTOFS_PROTO_VERSION ||
186 maxproto < AUTOFS_PROTO_VERSION) {
187 printk("autofs: kernel does not match daemon version\n");
188 goto fail_dput;
189 }
190
191 DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid));
192 sbi->oz_pgrp = find_get_pid(pgid);
193
194 if (!sbi->oz_pgrp) {
195 printk("autofs: could not find process group %d\n", pgid);
196 goto fail_dput;
197 }
198
199 pipe = fget(pipefd);
200
201 if (!pipe) {
202 printk("autofs: could not open pipe file descriptor\n");
203 goto fail_put_pid;
204 }
205
206 if (!pipe->f_op || !pipe->f_op->write)
207 goto fail_fput;
208 sbi->pipe = pipe;
209 sbi->catatonic = 0;
210
211 /*
212 * Success! Install the root dentry now to indicate completion.
213 */
214 s->s_root = root;
215 return 0;
216
217fail_fput:
218 printk("autofs: pipe file descriptor does not contain proper ops\n");
219 fput(pipe);
220fail_put_pid:
221 put_pid(sbi->oz_pgrp);
222fail_dput:
223 dput(root);
224 goto fail_free;
225fail_iput:
226 printk("autofs: get root dentry failed\n");
227 iput(root_inode);
228fail_free:
229 kfree(sbi);
230 s->s_fs_info = NULL;
231fail_unlock:
232 return -EINVAL;
233}
234
235struct inode *autofs_iget(struct super_block *sb, unsigned long ino)
236{
237 unsigned int n;
238 struct autofs_sb_info *sbi = autofs_sbi(sb);
239 struct inode *inode;
240
241 inode = iget_locked(sb, ino);
242 if (!inode)
243 return ERR_PTR(-ENOMEM);
244 if (!(inode->i_state & I_NEW))
245 return inode;
246
247 /* Initialize to the default case (stub directory) */
248
249 inode->i_op = &simple_dir_inode_operations;
250 inode->i_fop = &simple_dir_operations;
251 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
252 inode->i_nlink = 2;
253 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
254
255 if (ino == AUTOFS_ROOT_INO) {
256 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
257 inode->i_op = &autofs_root_inode_operations;
258 inode->i_fop = &autofs_root_operations;
259 goto done;
260 }
261
262 inode->i_uid = inode->i_sb->s_root->d_inode->i_uid;
263 inode->i_gid = inode->i_sb->s_root->d_inode->i_gid;
264
265 if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) {
266 /* Symlink inode - should be in symlink list */
267 struct autofs_symlink *sl;
268
269 n = ino - AUTOFS_FIRST_SYMLINK;
270 if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) {
271 printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino);
272 goto done;
273 }
274
275 inode->i_op = &autofs_symlink_inode_operations;
276 sl = &sbi->symlink[n];
277 inode->i_private = sl;
278 inode->i_mode = S_IFLNK | S_IRWXUGO;
279 inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
280 inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
281 inode->i_size = sl->len;
282 inode->i_nlink = 1;
283 }
284
285done:
286 unlock_new_inode(inode);
287 return inode;
288}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
deleted file mode 100644
index 0c4ca81aeaeb..000000000000
--- a/fs/autofs/root.c
+++ /dev/null
@@ -1,645 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/root.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/capability.h>
14#include <linux/errno.h>
15#include <linux/stat.h>
16#include <linux/slab.h>
17#include <linux/param.h>
18#include <linux/time.h>
19#include <linux/compat.h>
20#include <linux/smp_lock.h>
21#include "autofs_i.h"
22
23static int autofs_root_readdir(struct file *,void *,filldir_t);
24static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *);
25static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
26static int autofs_root_unlink(struct inode *,struct dentry *);
27static int autofs_root_rmdir(struct inode *,struct dentry *);
28static int autofs_root_mkdir(struct inode *,struct dentry *,int);
29static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
30#ifdef CONFIG_COMPAT
31static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
32#endif
33
34const struct file_operations autofs_root_operations = {
35 .llseek = generic_file_llseek,
36 .read = generic_read_dir,
37 .readdir = autofs_root_readdir,
38 .unlocked_ioctl = autofs_root_ioctl,
39#ifdef CONFIG_COMPAT
40 .compat_ioctl = autofs_root_compat_ioctl,
41#endif
42};
43
44const struct inode_operations autofs_root_inode_operations = {
45 .lookup = autofs_root_lookup,
46 .unlink = autofs_root_unlink,
47 .symlink = autofs_root_symlink,
48 .mkdir = autofs_root_mkdir,
49 .rmdir = autofs_root_rmdir,
50};
51
52static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
53{
54 struct autofs_dir_ent *ent = NULL;
55 struct autofs_dirhash *dirhash;
56 struct autofs_sb_info *sbi;
57 struct inode * inode = filp->f_path.dentry->d_inode;
58 off_t onr, nr;
59
60 lock_kernel();
61
62 sbi = autofs_sbi(inode->i_sb);
63 dirhash = &sbi->dirhash;
64 nr = filp->f_pos;
65
66 switch(nr)
67 {
68 case 0:
69 if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
70 goto out;
71 filp->f_pos = ++nr;
72 /* fall through */
73 case 1:
74 if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
75 goto out;
76 filp->f_pos = ++nr;
77 /* fall through */
78 default:
79 while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) {
80 if (!ent->dentry || d_mountpoint(ent->dentry)) {
81 if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0)
82 goto out;
83 filp->f_pos = nr;
84 }
85 }
86 break;
87 }
88
89out:
90 unlock_kernel();
91 return 0;
92}
93
94static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, struct autofs_sb_info *sbi)
95{
96 struct inode * inode;
97 struct autofs_dir_ent *ent;
98 int status = 0;
99
100 if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) {
101 do {
102 if (status && dentry->d_inode) {
103 if (status != -ENOENT)
104 printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name);
105 return 0; /* Try to get the kernel to invalidate this dentry */
106 }
107
108 /* Turn this into a real negative dentry? */
109 if (status == -ENOENT) {
110 dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
111 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
112 return 1;
113 } else if (status) {
114 /* Return a negative dentry, but leave it "pending" */
115 return 1;
116 }
117 status = autofs_wait(sbi, &dentry->d_name);
118 } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)));
119 }
120
121 /* Abuse this field as a pointer to the directory entry, used to
122 find the expire list pointers */
123 dentry->d_time = (unsigned long) ent;
124
125 if (!dentry->d_inode) {
126 inode = autofs_iget(sb, ent->ino);
127 if (IS_ERR(inode)) {
128 /* Failed, but leave pending for next time */
129 return 1;
130 }
131 dentry->d_inode = inode;
132 }
133
134 /* If this is a directory that isn't a mount point, bitch at the
135 daemon and fix it in user space */
136 if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
137 return !autofs_wait(sbi, &dentry->d_name);
138 }
139
140 /* We don't update the usages for the autofs daemon itself, this
141 is necessary for recursive autofs mounts */
142 if (!autofs_oz_mode(sbi)) {
143 autofs_update_usage(&sbi->dirhash,ent);
144 }
145
146 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
147 return 1;
148}
149
150
151/*
152 * Revalidate is called on every cache lookup. Some of those
153 * cache lookups may actually happen while the dentry is not
154 * yet completely filled in, and revalidate has to delay such
155 * lookups..
156 */
157static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
158{
159 struct inode * dir;
160 struct autofs_sb_info *sbi;
161 struct autofs_dir_ent *ent;
162 int res;
163
164 lock_kernel();
165 dir = dentry->d_parent->d_inode;
166 sbi = autofs_sbi(dir->i_sb);
167
168 /* Pending dentry */
169 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
170 if (autofs_oz_mode(sbi))
171 res = 1;
172 else
173 res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
174 unlock_kernel();
175 return res;
176 }
177
178 /* Negative dentry.. invalidate if "old" */
179 if (!dentry->d_inode) {
180 unlock_kernel();
181 return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
182 }
183
184 /* Check for a non-mountpoint directory */
185 if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
186 if (autofs_oz_mode(sbi))
187 res = 1;
188 else
189 res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
190 unlock_kernel();
191 return res;
192 }
193
194 /* Update the usage list */
195 if (!autofs_oz_mode(sbi)) {
196 ent = (struct autofs_dir_ent *) dentry->d_time;
197 if (ent)
198 autofs_update_usage(&sbi->dirhash,ent);
199 }
200 unlock_kernel();
201 return 1;
202}
203
204static const struct dentry_operations autofs_dentry_operations = {
205 .d_revalidate = autofs_revalidate,
206};
207
208static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
209{
210 struct autofs_sb_info *sbi;
211 int oz_mode;
212
213 DPRINTK(("autofs_root_lookup: name = "));
214 lock_kernel();
215 autofs_say(dentry->d_name.name,dentry->d_name.len);
216
217 if (dentry->d_name.len > NAME_MAX) {
218 unlock_kernel();
219 return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
220 }
221
222 sbi = autofs_sbi(dir->i_sb);
223
224 oz_mode = autofs_oz_mode(sbi);
225 DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
226 "oz_mode = %d\n", task_pid_nr(current),
227 task_pgrp_nr(current), sbi->catatonic,
228 oz_mode));
229
230 /*
231 * Mark the dentry incomplete, but add it. This is needed so
232 * that the VFS layer knows about the dentry, and we can count
233 * on catching any lookups through the revalidate.
234 *
235 * Let all the hard work be done by the revalidate function that
236 * needs to be able to do this anyway..
237 *
238 * We need to do this before we release the directory semaphore.
239 */
240 dentry->d_op = &autofs_dentry_operations;
241 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
242 d_add(dentry, NULL);
243
244 mutex_unlock(&dir->i_mutex);
245 autofs_revalidate(dentry, nd);
246 mutex_lock(&dir->i_mutex);
247
248 /*
249 * If we are still pending, check if we had to handle
250 * a signal. If so we can force a restart..
251 */
252 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
253 /* See if we were interrupted */
254 if (signal_pending(current)) {
255 sigset_t *sigset = &current->pending.signal;
256 if (sigismember (sigset, SIGKILL) ||
257 sigismember (sigset, SIGQUIT) ||
258 sigismember (sigset, SIGINT)) {
259 unlock_kernel();
260 return ERR_PTR(-ERESTARTNOINTR);
261 }
262 }
263 }
264 unlock_kernel();
265
266 /*
267 * If this dentry is unhashed, then we shouldn't honour this
268 * lookup even if the dentry is positive. Returning ENOENT here
269 * doesn't do the right thing for all system calls, but it should
270 * be OK for the operations we permit from an autofs.
271 */
272 if (dentry->d_inode && d_unhashed(dentry))
273 return ERR_PTR(-ENOENT);
274
275 return NULL;
276}
277
278static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
279{
280 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
281 struct autofs_dirhash *dh = &sbi->dirhash;
282 struct autofs_dir_ent *ent;
283 unsigned int n;
284 int slsize;
285 struct autofs_symlink *sl;
286 struct inode *inode;
287
288 DPRINTK(("autofs_root_symlink: %s <- ", symname));
289 autofs_say(dentry->d_name.name,dentry->d_name.len);
290
291 lock_kernel();
292 if (!autofs_oz_mode(sbi)) {
293 unlock_kernel();
294 return -EACCES;
295 }
296
297 if (autofs_hash_lookup(dh, &dentry->d_name)) {
298 unlock_kernel();
299 return -EEXIST;
300 }
301
302 n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS);
303 if (n >= AUTOFS_MAX_SYMLINKS) {
304 unlock_kernel();
305 return -ENOSPC;
306 }
307
308 set_bit(n,sbi->symlink_bitmap);
309 sl = &sbi->symlink[n];
310 sl->len = strlen(symname);
311 sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL);
312 if (!sl->data) {
313 clear_bit(n,sbi->symlink_bitmap);
314 unlock_kernel();
315 return -ENOSPC;
316 }
317
318 ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
319 if (!ent) {
320 kfree(sl->data);
321 clear_bit(n,sbi->symlink_bitmap);
322 unlock_kernel();
323 return -ENOSPC;
324 }
325
326 ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
327 if (!ent->name) {
328 kfree(sl->data);
329 kfree(ent);
330 clear_bit(n,sbi->symlink_bitmap);
331 unlock_kernel();
332 return -ENOSPC;
333 }
334
335 memcpy(sl->data,symname,slsize);
336 sl->mtime = get_seconds();
337
338 ent->ino = AUTOFS_FIRST_SYMLINK + n;
339 ent->hash = dentry->d_name.hash;
340 memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
341 ent->dentry = NULL; /* We don't keep the dentry for symlinks */
342
343 autofs_hash_insert(dh,ent);
344
345 inode = autofs_iget(dir->i_sb, ent->ino);
346 if (IS_ERR(inode))
347 return PTR_ERR(inode);
348
349 d_instantiate(dentry, inode);
350 unlock_kernel();
351 return 0;
352}
353
354/*
355 * NOTE!
356 *
357 * Normal filesystems would do a "d_delete()" to tell the VFS dcache
358 * that the file no longer exists. However, doing that means that the
359 * VFS layer can turn the dentry into a negative dentry, which we
360 * obviously do not want (we're dropping the entry not because it
361 * doesn't exist, but because it has timed out).
362 *
363 * Also see autofs_root_rmdir()..
364 */
365static int autofs_root_unlink(struct inode *dir, struct dentry *dentry)
366{
367 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
368 struct autofs_dirhash *dh = &sbi->dirhash;
369 struct autofs_dir_ent *ent;
370 unsigned int n;
371
372 /* This allows root to remove symlinks */
373 lock_kernel();
374 if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) {
375 unlock_kernel();
376 return -EACCES;
377 }
378
379 ent = autofs_hash_lookup(dh, &dentry->d_name);
380 if (!ent) {
381 unlock_kernel();
382 return -ENOENT;
383 }
384
385 n = ent->ino - AUTOFS_FIRST_SYMLINK;
386 if (n >= AUTOFS_MAX_SYMLINKS) {
387 unlock_kernel();
388 return -EISDIR; /* It's a directory, dummy */
389 }
390 if (!test_bit(n,sbi->symlink_bitmap)) {
391 unlock_kernel();
392 return -EINVAL; /* Nonexistent symlink? Shouldn't happen */
393 }
394
395 dentry->d_time = (unsigned long)(struct autofs_dirhash *)NULL;
396 autofs_hash_delete(ent);
397 clear_bit(n,sbi->symlink_bitmap);
398 kfree(sbi->symlink[n].data);
399 d_drop(dentry);
400
401 unlock_kernel();
402 return 0;
403}
404
405static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry)
406{
407 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
408 struct autofs_dirhash *dh = &sbi->dirhash;
409 struct autofs_dir_ent *ent;
410
411 lock_kernel();
412 if (!autofs_oz_mode(sbi)) {
413 unlock_kernel();
414 return -EACCES;
415 }
416
417 ent = autofs_hash_lookup(dh, &dentry->d_name);
418 if (!ent) {
419 unlock_kernel();
420 return -ENOENT;
421 }
422
423 if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) {
424 unlock_kernel();
425 return -ENOTDIR; /* Not a directory */
426 }
427
428 if (ent->dentry != dentry) {
429 printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name);
430 }
431
432 dentry->d_time = (unsigned long)(struct autofs_dir_ent *)NULL;
433 autofs_hash_delete(ent);
434 drop_nlink(dir);
435 d_drop(dentry);
436 unlock_kernel();
437
438 return 0;
439}
440
441static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
444 struct autofs_dirhash *dh = &sbi->dirhash;
445 struct autofs_dir_ent *ent;
446 struct inode *inode;
447 ino_t ino;
448
449 lock_kernel();
450 if (!autofs_oz_mode(sbi)) {
451 unlock_kernel();
452 return -EACCES;
453 }
454
455 ent = autofs_hash_lookup(dh, &dentry->d_name);
456 if (ent) {
457 unlock_kernel();
458 return -EEXIST;
459 }
460
461 if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) {
462 printk("autofs: Out of inode numbers -- what the heck did you do??\n");
463 unlock_kernel();
464 return -ENOSPC;
465 }
466 ino = sbi->next_dir_ino++;
467
468 ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
469 if (!ent) {
470 unlock_kernel();
471 return -ENOSPC;
472 }
473
474 ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
475 if (!ent->name) {
476 kfree(ent);
477 unlock_kernel();
478 return -ENOSPC;
479 }
480
481 ent->hash = dentry->d_name.hash;
482 memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
483 ent->ino = ino;
484 ent->dentry = dentry;
485 autofs_hash_insert(dh,ent);
486
487 inc_nlink(dir);
488
489 inode = autofs_iget(dir->i_sb, ino);
490 if (IS_ERR(inode)) {
491 drop_nlink(dir);
492 return PTR_ERR(inode);
493 }
494
495 d_instantiate(dentry, inode);
496 unlock_kernel();
497
498 return 0;
499}
500
501/* Get/set timeout ioctl() operation */
502#ifdef CONFIG_COMPAT
503static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
504 unsigned int __user *p)
505{
506 unsigned long ntimeout;
507
508 if (get_user(ntimeout, p) ||
509 put_user(sbi->exp_timeout / HZ, p))
510 return -EFAULT;
511
512 if (ntimeout > UINT_MAX/HZ)
513 sbi->exp_timeout = 0;
514 else
515 sbi->exp_timeout = ntimeout * HZ;
516
517 return 0;
518}
519#endif
520
521static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
522 unsigned long __user *p)
523{
524 unsigned long ntimeout;
525
526 if (get_user(ntimeout, p) ||
527 put_user(sbi->exp_timeout / HZ, p))
528 return -EFAULT;
529
530 if (ntimeout > ULONG_MAX/HZ)
531 sbi->exp_timeout = 0;
532 else
533 sbi->exp_timeout = ntimeout * HZ;
534
535 return 0;
536}
537
538/* Return protocol version */
539static inline int autofs_get_protover(int __user *p)
540{
541 return put_user(AUTOFS_PROTO_VERSION, p);
542}
543
544/* Perform an expiry operation */
545static inline int autofs_expire_run(struct super_block *sb,
546 struct autofs_sb_info *sbi,
547 struct vfsmount *mnt,
548 struct autofs_packet_expire __user *pkt_p)
549{
550 struct autofs_dir_ent *ent;
551 struct autofs_packet_expire pkt;
552
553 memset(&pkt,0,sizeof pkt);
554
555 pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
556 pkt.hdr.type = autofs_ptype_expire;
557
558 if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt)))
559 return -EAGAIN;
560
561 pkt.len = ent->len;
562 memcpy(pkt.name, ent->name, pkt.len);
563 pkt.name[pkt.len] = '\0';
564
565 if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
566 return -EFAULT;
567
568 return 0;
569}
570
571/*
572 * ioctl()'s on the root directory is the chief method for the daemon to
573 * generate kernel reactions
574 */
575static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
576 unsigned int cmd, unsigned long arg)
577{
578 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
579 void __user *argp = (void __user *)arg;
580
581 DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
582
583 if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
584 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
585 return -ENOTTY;
586
587 if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
588 return -EPERM;
589
590 switch(cmd) {
591 case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */
592 return autofs_wait_release(sbi,(autofs_wqt_t)arg,0);
593 case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */
594 return autofs_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
595 case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
596 autofs_catatonic_mode(sbi);
597 return 0;
598 case AUTOFS_IOC_PROTOVER: /* Get protocol version */
599 return autofs_get_protover(argp);
600#ifdef CONFIG_COMPAT
601 case AUTOFS_IOC_SETTIMEOUT32:
602 return autofs_compat_get_set_timeout(sbi, argp);
603#endif
604 case AUTOFS_IOC_SETTIMEOUT:
605 return autofs_get_set_timeout(sbi, argp);
606 case AUTOFS_IOC_EXPIRE:
607 return autofs_expire_run(inode->i_sb, sbi, filp->f_path.mnt,
608 argp);
609 default:
610 return -ENOSYS;
611 }
612
613}
614
615static long autofs_root_ioctl(struct file *filp,
616 unsigned int cmd, unsigned long arg)
617{
618 int ret;
619
620 lock_kernel();
621 ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
622 filp, cmd, arg);
623 unlock_kernel();
624
625 return ret;
626}
627
628#ifdef CONFIG_COMPAT
629static long autofs_root_compat_ioctl(struct file *filp,
630 unsigned int cmd, unsigned long arg)
631{
632 struct inode *inode = filp->f_path.dentry->d_inode;
633 int ret;
634
635 lock_kernel();
636 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
637 ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
638 else
639 ret = autofs_do_root_ioctl(inode, filp, cmd,
640 (unsigned long)compat_ptr(arg));
641 unlock_kernel();
642
643 return ret;
644}
645#endif
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
deleted file mode 100644
index 7ce9cb2c9ce2..000000000000
--- a/fs/autofs/symlink.c
+++ /dev/null
@@ -1,26 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/symlink.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include "autofs_i.h"
14
15/* Nothing to release.. */
16static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
17{
18 char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
19 nd_set_link(nd, s);
20 return NULL;
21}
22
23const struct inode_operations autofs_symlink_inode_operations = {
24 .readlink = generic_readlink,
25 .follow_link = autofs_follow_link
26};
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
deleted file mode 100644
index be46805972f0..000000000000
--- a/fs/autofs/waitq.c
+++ /dev/null
@@ -1,205 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/waitq.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/slab.h>
14#include <linux/time.h>
15#include <linux/signal.h>
16#include <linux/file.h>
17#include "autofs_i.h"
18
19/* We make this a static variable rather than a part of the superblock; it
20 is better if we don't reassign numbers easily even across filesystems */
21static autofs_wqt_t autofs_next_wait_queue = 1;
22
23/* These are the signals we allow interrupting a pending mount */
24#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
25
26void autofs_catatonic_mode(struct autofs_sb_info *sbi)
27{
28 struct autofs_wait_queue *wq, *nwq;
29
30 DPRINTK(("autofs: entering catatonic mode\n"));
31
32 sbi->catatonic = 1;
33 wq = sbi->queues;
34 sbi->queues = NULL; /* Erase all wait queues */
35 while ( wq ) {
36 nwq = wq->next;
37 wq->status = -ENOENT; /* Magic is gone - report failure */
38 kfree(wq->name);
39 wq->name = NULL;
40 wake_up(&wq->queue);
41 wq = nwq;
42 }
43 fput(sbi->pipe); /* Close the pipe */
44 sbi->pipe = NULL;
45 autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */
46}
47
48static int autofs_write(struct file *file, const void *addr, int bytes)
49{
50 unsigned long sigpipe, flags;
51 mm_segment_t fs;
52 const char *data = (const char *)addr;
53 ssize_t wr = 0;
54
55 /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
56
57 sigpipe = sigismember(&current->pending.signal, SIGPIPE);
58
59 /* Save pointer to user space and point back to kernel space */
60 fs = get_fs();
61 set_fs(KERNEL_DS);
62
63 while (bytes &&
64 (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
65 data += wr;
66 bytes -= wr;
67 }
68
69 set_fs(fs);
70
71 /* Keep the currently executing process from receiving a
72 SIGPIPE unless it was already supposed to get one */
73 if (wr == -EPIPE && !sigpipe) {
74 spin_lock_irqsave(&current->sighand->siglock, flags);
75 sigdelset(&current->pending.signal, SIGPIPE);
76 recalc_sigpending();
77 spin_unlock_irqrestore(&current->sighand->siglock, flags);
78 }
79
80 return (bytes > 0);
81}
82
83static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq)
84{
85 struct autofs_packet_missing pkt;
86
87 DPRINTK(("autofs_wait: wait id = 0x%08lx, name = ", wq->wait_queue_token));
88 autofs_say(wq->name,wq->len);
89
90 memset(&pkt,0,sizeof pkt); /* For security reasons */
91
92 pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
93 pkt.hdr.type = autofs_ptype_missing;
94 pkt.wait_queue_token = wq->wait_queue_token;
95 pkt.len = wq->len;
96 memcpy(pkt.name, wq->name, pkt.len);
97 pkt.name[pkt.len] = '\0';
98
99 if ( autofs_write(sbi->pipe,&pkt,sizeof(struct autofs_packet_missing)) )
100 autofs_catatonic_mode(sbi);
101}
102
103int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
104{
105 struct autofs_wait_queue *wq;
106 int status;
107
108 /* In catatonic mode, we don't wait for nobody */
109 if ( sbi->catatonic )
110 return -ENOENT;
111
112 /* We shouldn't be able to get here, but just in case */
113 if ( name->len > NAME_MAX )
114 return -ENOENT;
115
116 for ( wq = sbi->queues ; wq ; wq = wq->next ) {
117 if ( wq->hash == name->hash &&
118 wq->len == name->len &&
119 wq->name && !memcmp(wq->name,name->name,name->len) )
120 break;
121 }
122
123 if ( !wq ) {
124 /* Create a new wait queue */
125 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
126 if ( !wq )
127 return -ENOMEM;
128
129 wq->name = kmalloc(name->len,GFP_KERNEL);
130 if ( !wq->name ) {
131 kfree(wq);
132 return -ENOMEM;
133 }
134 wq->wait_queue_token = autofs_next_wait_queue++;
135 init_waitqueue_head(&wq->queue);
136 wq->hash = name->hash;
137 wq->len = name->len;
138 wq->status = -EINTR; /* Status return if interrupted */
139 memcpy(wq->name, name->name, name->len);
140 wq->next = sbi->queues;
141 sbi->queues = wq;
142
143 /* autofs_notify_daemon() may block */
144 wq->wait_ctr = 2;
145 autofs_notify_daemon(sbi,wq);
146 } else
147 wq->wait_ctr++;
148
149 /* wq->name is NULL if and only if the lock is already released */
150
151 if ( sbi->catatonic ) {
152 /* We might have slept, so check again for catatonic mode */
153 wq->status = -ENOENT;
154 kfree(wq->name);
155 wq->name = NULL;
156 }
157
158 if ( wq->name ) {
159 /* Block all but "shutdown" signals while waiting */
160 sigset_t sigmask;
161
162 siginitsetinv(&sigmask, SHUTDOWN_SIGS);
163 sigprocmask(SIG_BLOCK, &sigmask, &sigmask);
164
165 interruptible_sleep_on(&wq->queue);
166
167 sigprocmask(SIG_SETMASK, &sigmask, NULL);
168 } else {
169 DPRINTK(("autofs_wait: skipped sleeping\n"));
170 }
171
172 status = wq->status;
173
174 if ( ! --wq->wait_ctr ) /* Are we the last process to need status? */
175 kfree(wq);
176
177 return status;
178}
179
180
181int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
182{
183 struct autofs_wait_queue *wq, **wql;
184
185 for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
186 if ( wq->wait_queue_token == wait_queue_token )
187 break;
188 }
189 if ( !wq )
190 return -EINVAL;
191
192 *wql = wq->next; /* Unlink from chain */
193 kfree(wq->name);
194 wq->name = NULL; /* Do not wait on this queue */
195
196 wq->status = status;
197
198 if ( ! --wq->wait_ctr ) /* Is anyone still waiting for this guy? */
199 kfree(wq);
200 else
201 wake_up(&wq->queue);
202
203 return 0;
204}
205
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd8957..c038727b4050 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include "autofs_i.h" 15#include "autofs_i.h"
16 16
17static int autofs_get_sb(struct file_system_type *fs_type, 17static struct dentry *autofs_mount(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 18 int flags, const char *dev_name, void *data)
19{ 19{
20 return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt); 20 return mount_nodev(fs_type, flags, data, autofs4_fill_super);
21} 21}
22 22
23static struct file_system_type autofs_fs_type = { 23static struct file_system_type autofs_fs_type = {
24 .owner = THIS_MODULE, 24 .owner = THIS_MODULE,
25 .name = "autofs", 25 .name = "autofs",
26 .get_sb = autofs_get_sb, 26 .mount = autofs_mount,
27 .kill_sb = autofs4_kill_sb, 27 .kill_sb = autofs4_kill_sb,
28}; 28};
29 29
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 821b2b955dac..ac87e49fa706 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
398 inode->i_gid = sb->s_root->d_inode->i_gid; 398 inode->i_gid = sb->s_root->d_inode->i_gid;
399 } 399 }
400 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 400 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
401 inode->i_ino = get_next_ino();
401 402
402 if (S_ISDIR(inf->mode)) { 403 if (S_ISDIR(inf->mode)) {
403 inode->i_nlink = 2; 404 inode->i_nlink = 2;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d2824885..aa4e7c7ae3c6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
913 return 0; 913 return 0;
914} 914}
915 915
916static int 916static struct dentry *
917befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, 917befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
918 void *data, struct vfsmount *mnt) 918 void *data)
919{ 919{
920 return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super, 920 return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
921 mnt);
922} 921}
923 922
924static struct file_system_type befs_fs_type = { 923static struct file_system_type befs_fs_type = {
925 .owner = THIS_MODULE, 924 .owner = THIS_MODULE,
926 .name = "befs", 925 .name = "befs",
927 .get_sb = befs_get_sb, 926 .mount = befs_mount,
928 .kill_sb = kill_block_super, 927 .kill_sb = kill_block_super,
929 .fs_flags = FS_REQUIRES_DEV, 928 .fs_flags = FS_REQUIRES_DEV,
930}; 929};
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d967e052b779..685ecff3ab31 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
176 inc_nlink(inode); 176 inc_nlink(inode);
177 inode->i_ctime = CURRENT_TIME_SEC; 177 inode->i_ctime = CURRENT_TIME_SEC;
178 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
179 atomic_inc(&inode->i_count); 179 ihold(inode);
180 d_instantiate(new, inode); 180 d_instantiate(new, inode);
181 mutex_unlock(&info->bfs_lock); 181 mutex_unlock(&info->bfs_lock);
182 return 0; 182 return 0;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a8..76db6d7d49bb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
450 return ret; 450 return ret;
451} 451}
452 452
453static int bfs_get_sb(struct file_system_type *fs_type, 453static struct dentry *bfs_mount(struct file_system_type *fs_type,
454 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 454 int flags, const char *dev_name, void *data)
455{ 455{
456 return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt); 456 return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
457} 457}
458 458
459static struct file_system_type bfs_fs_type = { 459static struct file_system_type bfs_fs_type = {
460 .owner = THIS_MODULE, 460 .owner = THIS_MODULE,
461 .name = "bfs", 461 .name = "bfs",
462 .get_sb = bfs_get_sb, 462 .mount = bfs_mount,
463 .kill_sb = kill_block_super, 463 .kill_sb = kill_block_super,
464 .fs_flags = FS_REQUIRES_DEV, 464 .fs_flags = FS_REQUIRES_DEV,
465}; 465};
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 139fc8083f53..1befe2ec8186 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
495 struct inode * inode = new_inode(sb); 495 struct inode * inode = new_inode(sb);
496 496
497 if (inode) { 497 if (inode) {
498 inode->i_ino = get_next_ino();
498 inode->i_mode = mode; 499 inode->i_mode = mode;
499 inode->i_atime = inode->i_mtime = inode->i_ctime = 500 inode->i_atime = inode->i_mtime = inode->i_ctime =
500 current_fs_time(inode->i_sb); 501 current_fs_time(inode->i_sb);
@@ -705,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
705 return err; 706 return err;
706} 707}
707 708
708static int bm_get_sb(struct file_system_type *fs_type, 709static struct dentry *bm_mount(struct file_system_type *fs_type,
709 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 710 int flags, const char *dev_name, void *data)
710{ 711{
711 return get_sb_single(fs_type, flags, data, bm_fill_super, mnt); 712 return mount_single(fs_type, flags, data, bm_fill_super);
712} 713}
713 714
714static struct linux_binfmt misc_format = { 715static struct linux_binfmt misc_format = {
@@ -719,7 +720,7 @@ static struct linux_binfmt misc_format = {
719static struct file_system_type bm_fs_type = { 720static struct file_system_type bm_fs_type = {
720 .owner = THIS_MODULE, 721 .owner = THIS_MODULE,
721 .name = "binfmt_misc", 722 .name = "binfmt_misc",
722 .get_sb = bm_get_sb, 723 .mount = bm_mount,
723 .kill_sb = kill_litter_super, 724 .kill_sb = kill_litter_super,
724}; 725};
725 726
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b737451e2e9d..06e8ff12b97c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -48,6 +48,21 @@ inline struct block_device *I_BDEV(struct inode *inode)
48 48
49EXPORT_SYMBOL(I_BDEV); 49EXPORT_SYMBOL(I_BDEV);
50 50
51/*
52 * move the inode from it's current bdi to the a new bdi. if the inode is dirty
53 * we need to move it onto the dirty list of @dst so that the inode is always
54 * on the right list.
55 */
56static void bdev_inode_switch_bdi(struct inode *inode,
57 struct backing_dev_info *dst)
58{
59 spin_lock(&inode_lock);
60 inode->i_data.backing_dev_info = dst;
61 if (inode->i_state & I_DIRTY)
62 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
63 spin_unlock(&inode_lock);
64}
65
51static sector_t max_block(struct block_device *bdev) 66static sector_t max_block(struct block_device *bdev)
52{ 67{
53 sector_t retval = ~((sector_t)0); 68 sector_t retval = ~((sector_t)0);
@@ -449,15 +464,15 @@ static const struct super_operations bdev_sops = {
449 .evict_inode = bdev_evict_inode, 464 .evict_inode = bdev_evict_inode,
450}; 465};
451 466
452static int bd_get_sb(struct file_system_type *fs_type, 467static struct dentry *bd_mount(struct file_system_type *fs_type,
453 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 468 int flags, const char *dev_name, void *data)
454{ 469{
455 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); 470 return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
456} 471}
457 472
458static struct file_system_type bd_type = { 473static struct file_system_type bd_type = {
459 .name = "bdev", 474 .name = "bdev",
460 .get_sb = bd_get_sb, 475 .mount = bd_mount,
461 .kill_sb = kill_anon_super, 476 .kill_sb = kill_anon_super,
462}; 477};
463 478
@@ -550,7 +565,7 @@ EXPORT_SYMBOL(bdget);
550 */ 565 */
551struct block_device *bdgrab(struct block_device *bdev) 566struct block_device *bdgrab(struct block_device *bdev)
552{ 567{
553 atomic_inc(&bdev->bd_inode->i_count); 568 ihold(bdev->bd_inode);
554 return bdev; 569 return bdev;
555} 570}
556 571
@@ -580,7 +595,7 @@ static struct block_device *bd_acquire(struct inode *inode)
580 spin_lock(&bdev_lock); 595 spin_lock(&bdev_lock);
581 bdev = inode->i_bdev; 596 bdev = inode->i_bdev;
582 if (bdev) { 597 if (bdev) {
583 atomic_inc(&bdev->bd_inode->i_count); 598 ihold(bdev->bd_inode);
584 spin_unlock(&bdev_lock); 599 spin_unlock(&bdev_lock);
585 return bdev; 600 return bdev;
586 } 601 }
@@ -591,12 +606,12 @@ static struct block_device *bd_acquire(struct inode *inode)
591 spin_lock(&bdev_lock); 606 spin_lock(&bdev_lock);
592 if (!inode->i_bdev) { 607 if (!inode->i_bdev) {
593 /* 608 /*
594 * We take an additional bd_inode->i_count for inode, 609 * We take an additional reference to bd_inode,
595 * and it's released in clear_inode() of inode. 610 * and it's released in clear_inode() of inode.
596 * So, we can access it via ->i_mapping always 611 * So, we can access it via ->i_mapping always
597 * without igrab(). 612 * without igrab().
598 */ 613 */
599 atomic_inc(&bdev->bd_inode->i_count); 614 ihold(bdev->bd_inode);
600 inode->i_bdev = bdev; 615 inode->i_bdev = bdev;
601 inode->i_mapping = bdev->bd_inode->i_mapping; 616 inode->i_mapping = bdev->bd_inode->i_mapping;
602 list_add(&inode->i_devices, &bdev->bd_inodes); 617 list_add(&inode->i_devices, &bdev->bd_inodes);
@@ -1390,7 +1405,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1390 bdi = blk_get_backing_dev_info(bdev); 1405 bdi = blk_get_backing_dev_info(bdev);
1391 if (bdi == NULL) 1406 if (bdi == NULL)
1392 bdi = &default_backing_dev_info; 1407 bdi = &default_backing_dev_info;
1393 bdev->bd_inode->i_data.backing_dev_info = bdi; 1408 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1394 } 1409 }
1395 if (bdev->bd_invalidated) 1410 if (bdev->bd_invalidated)
1396 rescan_partitions(disk, bdev); 1411 rescan_partitions(disk, bdev);
@@ -1405,8 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1405 if (ret) 1420 if (ret)
1406 goto out_clear; 1421 goto out_clear;
1407 bdev->bd_contains = whole; 1422 bdev->bd_contains = whole;
1408 bdev->bd_inode->i_data.backing_dev_info = 1423 bdev_inode_switch_bdi(bdev->bd_inode,
1409 whole->bd_inode->i_data.backing_dev_info; 1424 whole->bd_inode->i_data.backing_dev_info);
1410 bdev->bd_part = disk_get_part(disk, partno); 1425 bdev->bd_part = disk_get_part(disk, partno);
1411 if (!(disk->flags & GENHD_FL_UP) || 1426 if (!(disk->flags & GENHD_FL_UP) ||
1412 !bdev->bd_part || !bdev->bd_part->nr_sects) { 1427 !bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1439,7 +1454,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1439 disk_put_part(bdev->bd_part); 1454 disk_put_part(bdev->bd_part);
1440 bdev->bd_disk = NULL; 1455 bdev->bd_disk = NULL;
1441 bdev->bd_part = NULL; 1456 bdev->bd_part = NULL;
1442 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1457 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1443 if (bdev != bdev->bd_contains) 1458 if (bdev != bdev->bd_contains)
1444 __blkdev_put(bdev->bd_contains, mode, 1); 1459 __blkdev_put(bdev->bd_contains, mode, 1);
1445 bdev->bd_contains = NULL; 1460 bdev->bd_contains = NULL;
@@ -1533,7 +1548,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1533 disk_put_part(bdev->bd_part); 1548 disk_put_part(bdev->bd_part);
1534 bdev->bd_part = NULL; 1549 bdev->bd_part = NULL;
1535 bdev->bd_disk = NULL; 1550 bdev->bd_disk = NULL;
1536 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1551 bdev_inode_switch_bdi(bdev->bd_inode,
1552 &default_backing_dev_info);
1537 if (bdev != bdev->bd_contains) 1553 if (bdev != bdev->bd_contains)
1538 victim = bdev->bd_contains; 1554 victim = bdev->bd_contains;
1539 bdev->bd_contains = NULL; 1555 bdev->bd_contains = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c03864406af3..64f99cf69ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3849,7 +3849,7 @@ again:
3849 p = &root->inode_tree.rb_node; 3849 p = &root->inode_tree.rb_node;
3850 parent = NULL; 3850 parent = NULL;
3851 3851
3852 if (hlist_unhashed(&inode->i_hash)) 3852 if (inode_unhashed(inode))
3853 return; 3853 return;
3854 3854
3855 spin_lock(&root->inode_lock); 3855 spin_lock(&root->inode_lock);
@@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4758 } 4758 }
4759 4759
4760 btrfs_set_trans_block_group(trans, dir); 4760 btrfs_set_trans_block_group(trans, dir);
4761 atomic_inc(&inode->i_count); 4761 ihold(inode);
4762 4762
4763 err = btrfs_add_nondir(trans, dentry, inode, 1, index); 4763 err = btrfs_add_nondir(trans, dentry, inode, 1, index);
4764 4764
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f5..ebe46c628748 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -560,8 +560,8 @@ static int btrfs_test_super(struct super_block *s, void *data)
560 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 560 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
561 * for multiple device setup. Make sure to keep it in sync. 561 * for multiple device setup. Make sure to keep it in sync.
562 */ 562 */
563static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 563static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
564 const char *dev_name, void *data, struct vfsmount *mnt) 564 const char *dev_name, void *data)
565{ 565{
566 struct block_device *bdev = NULL; 566 struct block_device *bdev = NULL;
567 struct super_block *s; 567 struct super_block *s;
@@ -580,7 +580,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
580 &subvol_name, &subvol_objectid, 580 &subvol_name, &subvol_objectid,
581 &fs_devices); 581 &fs_devices);
582 if (error) 582 if (error)
583 return error; 583 return ERR_PTR(error);
584 584
585 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 585 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
586 if (error) 586 if (error)
@@ -656,11 +656,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
656 root = new_root; 656 root = new_root;
657 } 657 }
658 658
659 mnt->mnt_sb = s;
660 mnt->mnt_root = root;
661
662 kfree(subvol_name); 659 kfree(subvol_name);
663 return 0; 660 return root;
664 661
665error_s: 662error_s:
666 error = PTR_ERR(s); 663 error = PTR_ERR(s);
@@ -669,7 +666,7 @@ error_close_devices:
669error_free_subvol_name: 666error_free_subvol_name:
670 kfree(subvol_name); 667 kfree(subvol_name);
671error: 668error:
672 return error; 669 return ERR_PTR(error);
673} 670}
674 671
675static int btrfs_remount(struct super_block *sb, int *flags, char *data) 672static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -746,7 +743,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
746static struct file_system_type btrfs_fs_type = { 743static struct file_system_type btrfs_fs_type = {
747 .owner = THIS_MODULE, 744 .owner = THIS_MODULE,
748 .name = "btrfs", 745 .name = "btrfs",
749 .get_sb = btrfs_get_sb, 746 .mount = btrfs_mount,
750 .kill_sb = kill_anon_super, 747 .kill_sb = kill_anon_super,
751 .fs_flags = FS_REQUIRES_DEV, 748 .fs_flags = FS_REQUIRES_DEV,
752}; 749};
diff --git a/fs/buffer.c b/fs/buffer.c
index 7f0b9b083f77..5930e382959b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -905,7 +905,6 @@ try_again:
905 905
906 bh->b_state = 0; 906 bh->b_state = 0;
907 atomic_set(&bh->b_count, 0); 907 atomic_set(&bh->b_count, 0);
908 bh->b_private = NULL;
909 bh->b_size = size; 908 bh->b_size = size;
910 909
911 /* Link the buffer to its page */ 910 /* Link the buffer to its page */
@@ -1706,7 +1705,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1706 * and kswapd activity, but those code paths have their own 1705 * and kswapd activity, but those code paths have their own
1707 * higher-level throttling. 1706 * higher-level throttling.
1708 */ 1707 */
1709 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1708 if (wbc->sync_mode != WB_SYNC_NONE) {
1710 lock_buffer(bh); 1709 lock_buffer(bh);
1711 } else if (!trylock_buffer(bh)) { 1710 } else if (!trylock_buffer(bh)) {
1712 redirty_page_for_writepage(wbc, page); 1711 redirty_page_for_writepage(wbc, page);
@@ -1834,9 +1833,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1834} 1833}
1835EXPORT_SYMBOL(page_zero_new_buffers); 1834EXPORT_SYMBOL(page_zero_new_buffers);
1836 1835
1837int block_prepare_write(struct page *page, unsigned from, unsigned to, 1836int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1838 get_block_t *get_block) 1837 get_block_t *get_block)
1839{ 1838{
1839 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1840 unsigned to = from + len;
1840 struct inode *inode = page->mapping->host; 1841 struct inode *inode = page->mapping->host;
1841 unsigned block_start, block_end; 1842 unsigned block_start, block_end;
1842 sector_t block; 1843 sector_t block;
@@ -1916,7 +1917,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
1916 } 1917 }
1917 return err; 1918 return err;
1918} 1919}
1919EXPORT_SYMBOL(block_prepare_write); 1920EXPORT_SYMBOL(__block_write_begin);
1920 1921
1921static int __block_commit_write(struct inode *inode, struct page *page, 1922static int __block_commit_write(struct inode *inode, struct page *page,
1922 unsigned from, unsigned to) 1923 unsigned from, unsigned to)
@@ -1953,15 +1954,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1953 return 0; 1954 return 0;
1954} 1955}
1955 1956
1956int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1957 get_block_t *get_block)
1958{
1959 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1960
1961 return block_prepare_write(page, start, start + len, get_block);
1962}
1963EXPORT_SYMBOL(__block_write_begin);
1964
1965/* 1957/*
1966 * block_write_begin takes care of the basic task of block allocation and 1958 * block_write_begin takes care of the basic task of block allocation and
1967 * bringing partial write blocks uptodate first. 1959 * bringing partial write blocks uptodate first.
@@ -2379,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2379 else 2371 else
2380 end = PAGE_CACHE_SIZE; 2372 end = PAGE_CACHE_SIZE;
2381 2373
2382 ret = block_prepare_write(page, 0, end, get_block); 2374 ret = __block_write_begin(page, 0, end, get_block);
2383 if (!ret) 2375 if (!ret)
2384 ret = block_commit_write(page, 0, end); 2376 ret = block_commit_write(page, 0, end);
2385 2377
@@ -2466,11 +2458,10 @@ int nobh_write_begin(struct address_space *mapping,
2466 *fsdata = NULL; 2458 *fsdata = NULL;
2467 2459
2468 if (page_has_buffers(page)) { 2460 if (page_has_buffers(page)) {
2469 unlock_page(page); 2461 ret = __block_write_begin(page, pos, len, get_block);
2470 page_cache_release(page); 2462 if (unlikely(ret))
2471 *pagep = NULL; 2463 goto out_release;
2472 return block_write_begin(mapping, pos, len, flags, pagep, 2464 return ret;
2473 get_block);
2474 } 2465 }
2475 2466
2476 if (PageMappedToDisk(page)) 2467 if (PageMappedToDisk(page))
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 51bcc5ce3230..e9c874abc9e1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -591,7 +591,6 @@ static int ceph_writepages_start(struct address_space *mapping,
591 struct writeback_control *wbc) 591 struct writeback_control *wbc)
592{ 592{
593 struct inode *inode = mapping->host; 593 struct inode *inode = mapping->host;
594 struct backing_dev_info *bdi = mapping->backing_dev_info;
595 struct ceph_inode_info *ci = ceph_inode(inode); 594 struct ceph_inode_info *ci = ceph_inode(inode);
596 struct ceph_fs_client *fsc; 595 struct ceph_fs_client *fsc;
597 pgoff_t index, start, end; 596 pgoff_t index, start, end;
@@ -633,13 +632,6 @@ static int ceph_writepages_start(struct address_space *mapping,
633 632
634 pagevec_init(&pvec, 0); 633 pagevec_init(&pvec, 0);
635 634
636 /* ?? */
637 if (wbc->nonblocking && bdi_write_congested(bdi)) {
638 dout(" writepages congested\n");
639 wbc->encountered_congestion = 1;
640 goto out_final;
641 }
642
643 /* where to start/end? */ 635 /* where to start/end? */
644 if (wbc->range_cyclic) { 636 if (wbc->range_cyclic) {
645 start = mapping->writeback_index; /* Start from prev offset */ 637 start = mapping->writeback_index; /* Start from prev offset */
@@ -885,7 +877,6 @@ out:
885 rc = 0; /* vfs expects us to return 0 */ 877 rc = 0; /* vfs expects us to return 0 */
886 ceph_put_snap_context(snapc); 878 ceph_put_snap_context(snapc);
887 dout("writepages done, rc = %d\n", rc); 879 dout("writepages done, rc = %d\n", rc);
888out_final:
889 return rc; 880 return rc;
890} 881}
891 882
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e0421891..08b460ae0539 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -635,7 +635,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
635/* 635/*
636 * mount: join the ceph cluster, and open root directory. 636 * mount: join the ceph cluster, and open root directory.
637 */ 637 */
638static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, 638static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
639 const char *path) 639 const char *path)
640{ 640{
641 int err; 641 int err;
@@ -678,16 +678,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
678 } 678 }
679 } 679 }
680 680
681 mnt->mnt_root = root;
682 mnt->mnt_sb = fsc->sb;
683
684 fsc->mount_state = CEPH_MOUNT_MOUNTED; 681 fsc->mount_state = CEPH_MOUNT_MOUNTED;
685 dout("mount success\n"); 682 dout("mount success\n");
686 err = 0; 683 mutex_unlock(&fsc->client->mount_mutex);
684 return root;
687 685
688out: 686out:
689 mutex_unlock(&fsc->client->mount_mutex); 687 mutex_unlock(&fsc->client->mount_mutex);
690 return err; 688 return ERR_PTR(err);
691 689
692fail: 690fail:
693 if (first) { 691 if (first) {
@@ -777,41 +775,45 @@ static int ceph_register_bdi(struct super_block *sb,
777 return err; 775 return err;
778} 776}
779 777
780static int ceph_get_sb(struct file_system_type *fs_type, 778static struct dentry *ceph_mount(struct file_system_type *fs_type,
781 int flags, const char *dev_name, void *data, 779 int flags, const char *dev_name, void *data)
782 struct vfsmount *mnt)
783{ 780{
784 struct super_block *sb; 781 struct super_block *sb;
785 struct ceph_fs_client *fsc; 782 struct ceph_fs_client *fsc;
783 struct dentry *res;
786 int err; 784 int err;
787 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 785 int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
788 const char *path = NULL; 786 const char *path = NULL;
789 struct ceph_mount_options *fsopt = NULL; 787 struct ceph_mount_options *fsopt = NULL;
790 struct ceph_options *opt = NULL; 788 struct ceph_options *opt = NULL;
791 789
792 dout("ceph_get_sb\n"); 790 dout("ceph_mount\n");
793 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); 791 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
794 if (err < 0) 792 if (err < 0) {
793 res = ERR_PTR(err);
795 goto out_final; 794 goto out_final;
795 }
796 796
797 /* create client (which we may/may not use) */ 797 /* create client (which we may/may not use) */
798 fsc = create_fs_client(fsopt, opt); 798 fsc = create_fs_client(fsopt, opt);
799 if (IS_ERR(fsc)) { 799 if (IS_ERR(fsc)) {
800 err = PTR_ERR(fsc); 800 res = ERR_CAST(fsc);
801 kfree(fsopt); 801 kfree(fsopt);
802 kfree(opt); 802 kfree(opt);
803 goto out_final; 803 goto out_final;
804 } 804 }
805 805
806 err = ceph_mdsc_init(fsc); 806 err = ceph_mdsc_init(fsc);
807 if (err < 0) 807 if (err < 0) {
808 res = ERR_PTR(err);
808 goto out; 809 goto out;
810 }
809 811
810 if (ceph_test_opt(fsc->client, NOSHARE)) 812 if (ceph_test_opt(fsc->client, NOSHARE))
811 compare_super = NULL; 813 compare_super = NULL;
812 sb = sget(fs_type, compare_super, ceph_set_super, fsc); 814 sb = sget(fs_type, compare_super, ceph_set_super, fsc);
813 if (IS_ERR(sb)) { 815 if (IS_ERR(sb)) {
814 err = PTR_ERR(sb); 816 res = ERR_CAST(sb);
815 goto out; 817 goto out;
816 } 818 }
817 819
@@ -823,16 +825,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
823 } else { 825 } else {
824 dout("get_sb using new client %p\n", fsc); 826 dout("get_sb using new client %p\n", fsc);
825 err = ceph_register_bdi(sb, fsc); 827 err = ceph_register_bdi(sb, fsc);
826 if (err < 0) 828 if (err < 0) {
829 res = ERR_PTR(err);
827 goto out_splat; 830 goto out_splat;
831 }
828 } 832 }
829 833
830 err = ceph_mount(fsc, mnt, path); 834 res = ceph_real_mount(fsc, path);
831 if (err < 0) 835 if (IS_ERR(res))
832 goto out_splat; 836 goto out_splat;
833 dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, 837 dout("root %p inode %p ino %llx.%llx\n", res,
834 mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode)); 838 res->d_inode, ceph_vinop(res->d_inode));
835 return 0; 839 return res;
836 840
837out_splat: 841out_splat:
838 ceph_mdsc_close_sessions(fsc->mdsc); 842 ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +847,8 @@ out:
843 ceph_mdsc_destroy(fsc); 847 ceph_mdsc_destroy(fsc);
844 destroy_fs_client(fsc); 848 destroy_fs_client(fsc);
845out_final: 849out_final:
846 dout("ceph_get_sb fail %d\n", err); 850 dout("ceph_mount fail %ld\n", PTR_ERR(res));
847 return err; 851 return res;
848} 852}
849 853
850static void ceph_kill_sb(struct super_block *s) 854static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +864,7 @@ static void ceph_kill_sb(struct super_block *s)
860static struct file_system_type ceph_fs_type = { 864static struct file_system_type ceph_fs_type = {
861 .owner = THIS_MODULE, 865 .owner = THIS_MODULE,
862 .name = "ceph", 866 .name = "ceph",
863 .get_sb = ceph_get_sb, 867 .mount = ceph_mount,
864 .kill_sb = ceph_kill_sb, 868 .kill_sb = ceph_kill_sb,
865 .fs_flags = FS_RENAME_DOES_D_MOVE, 869 .fs_flags = FS_RENAME_DOES_D_MOVE,
866}; 870};
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb2..0ed213970ced 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,9 @@ config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)" 2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select CRYPTO
6 select CRYPTO_MD5
7 select CRYPTO_ARC4
5 help 8 help
6 This is the client VFS module for the Common Internet File System 9 This is the client VFS module for the Common Internet File System
7 (CIFS) protocol which is the successor to the Server Message Block 10 (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056294cf..f856732161ab 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,18 +43,32 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
43 unsigned char *p24); 43 unsigned char *p24);
44 44
45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
46 const struct session_key *key, char *signature) 46 struct TCP_Server_Info *server, char *signature)
47{ 47{
48 struct MD5Context context; 48 int rc;
49 49
50 if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) 50 if (cifs_pdu == NULL || signature == NULL || server == NULL)
51 return -EINVAL; 51 return -EINVAL;
52 52
53 cifs_MD5_init(&context); 53 if (!server->secmech.sdescmd5) {
54 cifs_MD5_update(&context, (char *)&key->data, key->len); 54 cERROR(1, "%s: Can't generate signature\n", __func__);
55 cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 55 return -1;
56 }
57
58 rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
59 if (rc) {
60 cERROR(1, "%s: Oould not init md5\n", __func__);
61 return rc;
62 }
63
64 crypto_shash_update(&server->secmech.sdescmd5->shash,
65 server->session_key.response, server->session_key.len);
66
67 crypto_shash_update(&server->secmech.sdescmd5->shash,
68 cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
69
70 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
56 71
57 cifs_MD5_final(signature, &context);
58 return 0; 72 return 0;
59} 73}
60 74
@@ -79,8 +93,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
79 server->sequence_number++; 93 server->sequence_number++;
80 spin_unlock(&GlobalMid_Lock); 94 spin_unlock(&GlobalMid_Lock);
81 95
82 rc = cifs_calculate_signature(cifs_pdu, &server->session_key, 96 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
83 smb_signature);
84 if (rc) 97 if (rc)
85 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 98 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
86 else 99 else
@@ -90,16 +103,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
90} 103}
91 104
92static int cifs_calc_signature2(const struct kvec *iov, int n_vec, 105static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
93 const struct session_key *key, char *signature) 106 struct TCP_Server_Info *server, char *signature)
94{ 107{
95 struct MD5Context context;
96 int i; 108 int i;
109 int rc;
97 110
98 if ((iov == NULL) || (signature == NULL) || (key == NULL)) 111 if (iov == NULL || signature == NULL || server == NULL)
99 return -EINVAL; 112 return -EINVAL;
100 113
101 cifs_MD5_init(&context); 114 if (!server->secmech.sdescmd5) {
102 cifs_MD5_update(&context, (char *)&key->data, key->len); 115 cERROR(1, "%s: Can't generate signature\n", __func__);
116 return -1;
117 }
118
119 rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
120 if (rc) {
121 cERROR(1, "%s: Oould not init md5\n", __func__);
122 return rc;
123 }
124
125 crypto_shash_update(&server->secmech.sdescmd5->shash,
126 server->session_key.response, server->session_key.len);
127
103 for (i = 0; i < n_vec; i++) { 128 for (i = 0; i < n_vec; i++) {
104 if (iov[i].iov_len == 0) 129 if (iov[i].iov_len == 0)
105 continue; 130 continue;
@@ -112,18 +137,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
112 if (i == 0) { 137 if (i == 0) {
113 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ 138 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
114 break; /* nothing to sign or corrupt header */ 139 break; /* nothing to sign or corrupt header */
115 cifs_MD5_update(&context, iov[0].iov_base+4, 140 crypto_shash_update(&server->secmech.sdescmd5->shash,
116 iov[0].iov_len-4); 141 iov[i].iov_base + 4, iov[i].iov_len - 4);
117 } else 142 } else
118 cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); 143 crypto_shash_update(&server->secmech.sdescmd5->shash,
144 iov[i].iov_base, iov[i].iov_len);
119 } 145 }
120 146
121 cifs_MD5_final(signature, &context); 147 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
122 148
123 return 0; 149 return rc;
124} 150}
125 151
126
127int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
128 __u32 *pexpected_response_sequence_number) 153 __u32 *pexpected_response_sequence_number)
129{ 154{
@@ -146,8 +171,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
146 server->sequence_number++; 171 server->sequence_number++;
147 spin_unlock(&GlobalMid_Lock); 172 spin_unlock(&GlobalMid_Lock);
148 173
149 rc = cifs_calc_signature2(iov, n_vec, &server->session_key, 174 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
150 smb_signature);
151 if (rc) 175 if (rc)
152 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 176 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
153 else 177 else
@@ -157,14 +181,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
157} 181}
158 182
159int cifs_verify_signature(struct smb_hdr *cifs_pdu, 183int cifs_verify_signature(struct smb_hdr *cifs_pdu,
160 const struct session_key *session_key, 184 struct TCP_Server_Info *server,
161 __u32 expected_sequence_number) 185 __u32 expected_sequence_number)
162{ 186{
163 unsigned int rc; 187 unsigned int rc;
164 char server_response_sig[8]; 188 char server_response_sig[8];
165 char what_we_think_sig_should_be[20]; 189 char what_we_think_sig_should_be[20];
166 190
167 if (cifs_pdu == NULL || session_key == NULL) 191 if (cifs_pdu == NULL || server == NULL)
168 return -EINVAL; 192 return -EINVAL;
169 193
170 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 194 if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +217,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
193 cpu_to_le32(expected_sequence_number); 217 cpu_to_le32(expected_sequence_number);
194 cifs_pdu->Signature.Sequence.Reserved = 0; 218 cifs_pdu->Signature.Sequence.Reserved = 0;
195 219
196 rc = cifs_calculate_signature(cifs_pdu, session_key, 220 rc = cifs_calculate_signature(cifs_pdu, server,
197 what_we_think_sig_should_be); 221 what_we_think_sig_should_be);
198 222
199 if (rc) 223 if (rc)
@@ -209,18 +233,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 233
210} 234}
211 235
212/* We fill in key by putting in 40 byte array which was allocated by caller */ 236/* first calculate 24 bytes ntlm response and then 16 byte session key */
213int cifs_calculate_session_key(struct session_key *key, const char *rn, 237int setup_ntlm_response(struct cifsSesInfo *ses)
214 const char *password)
215{ 238{
216 char temp_key[16]; 239 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
217 if ((key == NULL) || (rn == NULL)) 240 char temp_key[CIFS_SESS_KEY_SIZE];
241
242 if (!ses)
218 return -EINVAL; 243 return -EINVAL;
219 244
220 E_md4hash(password, temp_key); 245 ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
221 mdfour(key->data.ntlm, temp_key, 16); 246 if (!ses->auth_key.response) {
222 memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE); 247 cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
223 key->len = 40; 248 return -ENOMEM;
249 }
250 ses->auth_key.len = temp_len;
251
252 SMBNTencrypt(ses->password, ses->server->cryptkey,
253 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
254
255 E_md4hash(ses->password, temp_key);
256 mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
257
224 return 0; 258 return 0;
225} 259}
226 260
@@ -294,15 +328,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
294 * two times the unicode length of a server name + 328 * two times the unicode length of a server name +
295 * size of a timestamp (which is 8 bytes). 329 * size of a timestamp (which is 8 bytes).
296 */ 330 */
297 ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; 331 ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
298 ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL); 332 ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
299 if (!ses->tiblob) { 333 if (!ses->auth_key.response) {
300 ses->tilen = 0; 334 ses->auth_key.len = 0;
301 cERROR(1, "Challenge target info allocation failure"); 335 cERROR(1, "Challenge target info allocation failure");
302 return -ENOMEM; 336 return -ENOMEM;
303 } 337 }
304 338
305 blobptr = ses->tiblob; 339 blobptr = ses->auth_key.response;
306 attrptr = (struct ntlmssp2_name *) blobptr; 340 attrptr = (struct ntlmssp2_name *) blobptr;
307 341
308 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); 342 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +391,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
357 * about target string i.e. for some, just user name might suffice. 391 * about target string i.e. for some, just user name might suffice.
358 */ 392 */
359static int 393static int
360find_domain_name(struct cifsSesInfo *ses) 394find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
361{ 395{
362 unsigned int attrsize; 396 unsigned int attrsize;
363 unsigned int type; 397 unsigned int type;
@@ -366,11 +400,11 @@ find_domain_name(struct cifsSesInfo *ses)
366 unsigned char *blobend; 400 unsigned char *blobend;
367 struct ntlmssp2_name *attrptr; 401 struct ntlmssp2_name *attrptr;
368 402
369 if (!ses->tilen || !ses->tiblob) 403 if (!ses->auth_key.len || !ses->auth_key.response)
370 return 0; 404 return 0;
371 405
372 blobptr = ses->tiblob; 406 blobptr = ses->auth_key.response;
373 blobend = ses->tiblob + ses->tilen; 407 blobend = blobptr + ses->auth_key.len;
374 408
375 while (blobptr + onesize < blobend) { 409 while (blobptr + onesize < blobend) {
376 attrptr = (struct ntlmssp2_name *) blobptr; 410 attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +420,13 @@ find_domain_name(struct cifsSesInfo *ses)
386 if (!attrsize) 420 if (!attrsize)
387 break; 421 break;
388 if (!ses->domainName) { 422 if (!ses->domainName) {
389 struct nls_table *default_nls;
390 ses->domainName = 423 ses->domainName =
391 kmalloc(attrsize + 1, GFP_KERNEL); 424 kmalloc(attrsize + 1, GFP_KERNEL);
392 if (!ses->domainName) 425 if (!ses->domainName)
393 return -ENOMEM; 426 return -ENOMEM;
394 default_nls = load_nls_default();
395 cifs_from_ucs2(ses->domainName, 427 cifs_from_ucs2(ses->domainName,
396 (__le16 *)blobptr, attrsize, attrsize, 428 (__le16 *)blobptr, attrsize, attrsize,
397 default_nls, false); 429 nls_cp, false);
398 unload_nls(default_nls);
399 break; 430 break;
400 } 431 }
401 } 432 }
@@ -405,82 +436,136 @@ find_domain_name(struct cifsSesInfo *ses)
405 return 0; 436 return 0;
406} 437}
407 438
408static int calc_ntlmv2_hash(struct cifsSesInfo *ses, 439static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
409 const struct nls_table *nls_cp) 440 const struct nls_table *nls_cp)
410{ 441{
411 int rc = 0; 442 int rc = 0;
412 int len; 443 int len;
413 char nt_hash[16]; 444 char nt_hash[CIFS_NTHASH_SIZE];
414 struct HMACMD5Context *pctxt;
415 wchar_t *user; 445 wchar_t *user;
416 wchar_t *domain; 446 wchar_t *domain;
447 wchar_t *server;
417 448
418 pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); 449 if (!ses->server->secmech.sdeschmacmd5) {
419 450 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
420 if (pctxt == NULL) 451 return -1;
421 return -ENOMEM; 452 }
422 453
423 /* calculate md4 hash of password */ 454 /* calculate md4 hash of password */
424 E_md4hash(ses->password, nt_hash); 455 E_md4hash(ses->password, nt_hash);
425 456
426 /* convert Domainname to unicode and uppercase */ 457 crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
427 hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); 458 CIFS_NTHASH_SIZE);
459
460 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
461 if (rc) {
462 cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
463 return rc;
464 }
428 465
429 /* convert ses->userName to unicode and uppercase */ 466 /* convert ses->userName to unicode and uppercase */
430 len = strlen(ses->userName); 467 len = strlen(ses->userName);
431 user = kmalloc(2 + (len * 2), GFP_KERNEL); 468 user = kmalloc(2 + (len * 2), GFP_KERNEL);
432 if (user == NULL) 469 if (user == NULL) {
470 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
471 rc = -ENOMEM;
433 goto calc_exit_2; 472 goto calc_exit_2;
473 }
434 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 474 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
435 UniStrupr(user); 475 UniStrupr(user);
436 hmac_md5_update((char *)user, 2*len, pctxt); 476
477 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
478 (char *)user, 2 * len);
437 479
438 /* convert ses->domainName to unicode and uppercase */ 480 /* convert ses->domainName to unicode and uppercase */
439 if (ses->domainName) { 481 if (ses->domainName) {
440 len = strlen(ses->domainName); 482 len = strlen(ses->domainName);
441 483
442 domain = kmalloc(2 + (len * 2), GFP_KERNEL); 484 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
443 if (domain == NULL) 485 if (domain == NULL) {
486 cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
487 rc = -ENOMEM;
444 goto calc_exit_1; 488 goto calc_exit_1;
489 }
445 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, 490 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
446 nls_cp); 491 nls_cp);
447 /* the following line was removed since it didn't work well 492 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
448 with lower cased domain name that passed as an option. 493 (char *)domain, 2 * len);
449 Maybe converting the domain name earlier makes sense */
450 /* UniStrupr(domain); */
451
452 hmac_md5_update((char *)domain, 2*len, pctxt);
453
454 kfree(domain); 494 kfree(domain);
495 } else if (ses->serverName) {
496 len = strlen(ses->serverName);
497
498 server = kmalloc(2 + (len * 2), GFP_KERNEL);
499 if (server == NULL) {
500 cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
501 rc = -ENOMEM;
502 goto calc_exit_1;
503 }
504 len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
505 nls_cp);
506 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
507 (char *)server, 2 * len);
508 kfree(server);
455 } 509 }
510
511 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
512 ntlmv2_hash);
513
456calc_exit_1: 514calc_exit_1:
457 kfree(user); 515 kfree(user);
458calc_exit_2: 516calc_exit_2:
459 /* BB FIXME what about bytes 24 through 40 of the signing key? 517 return rc;
460 compare with the NTLM example */ 518}
461 hmac_md5_final(ses->ntlmv2_hash, pctxt); 519
520static int
521CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
522{
523 int rc;
524 unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
525
526 if (!ses->server->secmech.sdeschmacmd5) {
527 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
528 return -1;
529 }
530
531 crypto_shash_setkey(ses->server->secmech.hmacmd5,
532 ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
533
534 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
535 if (rc) {
536 cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
537 return rc;
538 }
539
540 if (ses->server->secType == RawNTLMSSP)
541 memcpy(ses->auth_key.response + offset,
542 ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
543 else
544 memcpy(ses->auth_key.response + offset,
545 ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
546 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
547 ses->auth_key.response + offset, ses->auth_key.len - offset);
548
549 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
550 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
462 551
463 kfree(pctxt);
464 return rc; 552 return rc;
465} 553}
466 554
555
467int 556int
468setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, 557setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
469 const struct nls_table *nls_cp)
470{ 558{
471 int rc; 559 int rc;
472 struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; 560 int baselen;
473 struct HMACMD5Context context; 561 unsigned int tilen;
474 562 struct ntlmv2_resp *buf;
475 buf->blob_signature = cpu_to_le32(0x00000101); 563 char ntlmv2_hash[16];
476 buf->reserved = 0; 564 unsigned char *tiblob = NULL; /* target info blob */
477 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
478 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
479 buf->reserved2 = 0;
480 565
481 if (ses->server->secType == RawNTLMSSP) { 566 if (ses->server->secType == RawNTLMSSP) {
482 if (!ses->domainName) { 567 if (!ses->domainName) {
483 rc = find_domain_name(ses); 568 rc = find_domain_name(ses, nls_cp);
484 if (rc) { 569 if (rc) {
485 cERROR(1, "error %d finding domain name", rc); 570 cERROR(1, "error %d finding domain name", rc);
486 goto setup_ntlmv2_rsp_ret; 571 goto setup_ntlmv2_rsp_ret;
@@ -490,51 +575,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
490 rc = build_avpair_blob(ses, nls_cp); 575 rc = build_avpair_blob(ses, nls_cp);
491 if (rc) { 576 if (rc) {
492 cERROR(1, "error %d building av pair blob", rc); 577 cERROR(1, "error %d building av pair blob", rc);
493 return rc; 578 goto setup_ntlmv2_rsp_ret;
494 } 579 }
495 } 580 }
496 581
497 /* calculate buf->ntlmv2_hash */ 582 baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
498 rc = calc_ntlmv2_hash(ses, nls_cp); 583 tilen = ses->auth_key.len;
584 tiblob = ses->auth_key.response;
585
586 ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
587 if (!ses->auth_key.response) {
588 rc = ENOMEM;
589 ses->auth_key.len = 0;
590 cERROR(1, "%s: Can't allocate auth blob", __func__);
591 goto setup_ntlmv2_rsp_ret;
592 }
593 ses->auth_key.len += baselen;
594
595 buf = (struct ntlmv2_resp *)
596 (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
597 buf->blob_signature = cpu_to_le32(0x00000101);
598 buf->reserved = 0;
599 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
600 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
601 buf->reserved2 = 0;
602
603 memcpy(ses->auth_key.response + baselen, tiblob, tilen);
604
605 /* calculate ntlmv2_hash */
606 rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
499 if (rc) { 607 if (rc) {
500 cERROR(1, "could not get v2 hash rc %d", rc); 608 cERROR(1, "could not get v2 hash rc %d", rc);
501 goto setup_ntlmv2_rsp_ret; 609 goto setup_ntlmv2_rsp_ret;
502 } 610 }
503 CalcNTLMv2_response(ses, resp_buf); 611
612 /* calculate first part of the client response (CR1) */
613 rc = CalcNTLMv2_response(ses, ntlmv2_hash);
614 if (rc) {
615 cERROR(1, "Could not calculate CR1 rc: %d", rc);
616 goto setup_ntlmv2_rsp_ret;
617 }
504 618
505 /* now calculate the session key for NTLMv2 */ 619 /* now calculate the session key for NTLMv2 */
506 hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); 620 crypto_shash_setkey(ses->server->secmech.hmacmd5,
507 hmac_md5_update(resp_buf, 16, &context); 621 ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
508 hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context); 622
623 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
624 if (rc) {
625 cERROR(1, "%s: Could not init hmacmd5\n", __func__);
626 goto setup_ntlmv2_rsp_ret;
627 }
509 628
510 memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf, 629 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
511 sizeof(struct ntlmv2_resp)); 630 ses->auth_key.response + CIFS_SESS_KEY_SIZE,
512 ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp); 631 CIFS_HMAC_MD5_HASH_SIZE);
513 632
514 return 0; 633 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
634 ses->auth_key.response);
515 635
516setup_ntlmv2_rsp_ret: 636setup_ntlmv2_rsp_ret:
517 kfree(ses->tiblob); 637 kfree(tiblob);
518 ses->tiblob = NULL;
519 ses->tilen = 0;
520 638
521 return rc; 639 return rc;
522} 640}
523 641
524void CalcNTLMv2_response(const struct cifsSesInfo *ses, 642int
525 char *v2_session_response) 643calc_seckey(struct cifsSesInfo *ses)
526{ 644{
527 struct HMACMD5Context context; 645 int rc;
528 /* rest of v2 struct already generated */ 646 struct crypto_blkcipher *tfm_arc4;
529 memcpy(v2_session_response + 8, ses->cryptKey, 8); 647 struct scatterlist sgin, sgout;
530 hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); 648 struct blkcipher_desc desc;
649 unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
650
651 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
652
653 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
654 if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
655 cERROR(1, "could not allocate crypto API arc4\n");
656 return PTR_ERR(tfm_arc4);
657 }
531 658
532 hmac_md5_update(v2_session_response+8, 659 desc.tfm = tfm_arc4;
533 sizeof(struct ntlmv2_resp) - 8, &context);
534 660
535 if (ses->tilen) 661 crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
536 hmac_md5_update(ses->tiblob, ses->tilen, &context); 662 CIFS_SESS_KEY_SIZE);
537 663
538 hmac_md5_final(v2_session_response, &context); 664 sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
539/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ 665 sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
666
667 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
668 if (rc) {
669 cERROR(1, "could not encrypt session key rc: %d\n", rc);
670 crypto_free_blkcipher(tfm_arc4);
671 return rc;
672 }
673
674 /* make secondary_key/nonce as session key */
675 memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
676 /* and make len as that of session key only */
677 ses->auth_key.len = CIFS_SESS_KEY_SIZE;
678
679 crypto_free_blkcipher(tfm_arc4);
680
681 return 0;
682}
683
684void
685cifs_crypto_shash_release(struct TCP_Server_Info *server)
686{
687 if (server->secmech.md5)
688 crypto_free_shash(server->secmech.md5);
689
690 if (server->secmech.hmacmd5)
691 crypto_free_shash(server->secmech.hmacmd5);
692
693 kfree(server->secmech.sdeschmacmd5);
694
695 kfree(server->secmech.sdescmd5);
696}
697
698int
699cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
700{
701 int rc;
702 unsigned int size;
703
704 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
705 if (!server->secmech.hmacmd5 ||
706 IS_ERR(server->secmech.hmacmd5)) {
707 cERROR(1, "could not allocate crypto hmacmd5\n");
708 return PTR_ERR(server->secmech.hmacmd5);
709 }
710
711 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
712 if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
713 cERROR(1, "could not allocate crypto md5\n");
714 rc = PTR_ERR(server->secmech.md5);
715 goto crypto_allocate_md5_fail;
716 }
717
718 size = sizeof(struct shash_desc) +
719 crypto_shash_descsize(server->secmech.hmacmd5);
720 server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
721 if (!server->secmech.sdeschmacmd5) {
722 cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
723 rc = -ENOMEM;
724 goto crypto_allocate_hmacmd5_sdesc_fail;
725 }
726 server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
727 server->secmech.sdeschmacmd5->shash.flags = 0x0;
728
729
730 size = sizeof(struct shash_desc) +
731 crypto_shash_descsize(server->secmech.md5);
732 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
733 if (!server->secmech.sdescmd5) {
734 cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
735 rc = -ENOMEM;
736 goto crypto_allocate_md5_sdesc_fail;
737 }
738 server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
739 server->secmech.sdescmd5->shash.flags = 0x0;
740
741 return 0;
742
743crypto_allocate_md5_sdesc_fail:
744 kfree(server->secmech.sdeschmacmd5);
745
746crypto_allocate_hmacmd5_sdesc_fail:
747 crypto_free_shash(server->secmech.md5);
748
749crypto_allocate_md5_fail:
750 crypto_free_shash(server->secmech.hmacmd5);
751
752 return rc;
540} 753}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 34371637f210..75c4eaa79588 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -318,7 +318,6 @@ cifs_alloc_inode(struct super_block *sb)
318 return NULL; 318 return NULL;
319 cifs_inode->cifsAttrs = 0x20; /* default */ 319 cifs_inode->cifsAttrs = 0x20; /* default */
320 cifs_inode->time = 0; 320 cifs_inode->time = 0;
321 cifs_inode->write_behind_rc = 0;
322 /* Until the file is open and we have gotten oplock 321 /* Until the file is open and we have gotten oplock
323 info back from the server, can not assume caching of 322 info back from the server, can not assume caching of
324 file data or metadata */ 323 file data or metadata */
@@ -545,9 +544,9 @@ static const struct super_operations cifs_super_ops = {
545#endif 544#endif
546}; 545};
547 546
548static int 547static struct dentry *
549cifs_get_sb(struct file_system_type *fs_type, 548cifs_do_mount(struct file_system_type *fs_type,
550 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 549 int flags, const char *dev_name, void *data)
551{ 550{
552 int rc; 551 int rc;
553 struct super_block *sb; 552 struct super_block *sb;
@@ -557,18 +556,17 @@ cifs_get_sb(struct file_system_type *fs_type,
557 cFYI(1, "Devname: %s flags: %d ", dev_name, flags); 556 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
558 557
559 if (IS_ERR(sb)) 558 if (IS_ERR(sb))
560 return PTR_ERR(sb); 559 return ERR_CAST(sb);
561 560
562 sb->s_flags = flags; 561 sb->s_flags = flags;
563 562
564 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); 563 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
565 if (rc) { 564 if (rc) {
566 deactivate_locked_super(sb); 565 deactivate_locked_super(sb);
567 return rc; 566 return ERR_PTR(rc);
568 } 567 }
569 sb->s_flags |= MS_ACTIVE; 568 sb->s_flags |= MS_ACTIVE;
570 simple_set_mnt(mnt, sb); 569 return dget(sb->s_root);
571 return 0;
572} 570}
573 571
574static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 572static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +632,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
634struct file_system_type cifs_fs_type = { 632struct file_system_type cifs_fs_type = {
635 .owner = THIS_MODULE, 633 .owner = THIS_MODULE,
636 .name = "cifs", 634 .name = "cifs",
637 .get_sb = cifs_get_sb, 635 .mount = cifs_do_mount,
638 .kill_sb = kill_anon_super, 636 .kill_sb = kill_anon_super,
639 /* .fs_flags */ 637 /* .fs_flags */
640}; 638};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a16b42..897b2b2b28b5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
112extern const struct export_operations cifs_export_ops; 112extern const struct export_operations cifs_export_ops;
113#endif /* EXPERIMENTAL */ 113#endif /* EXPERIMENTAL */
114 114
115#define CIFS_VERSION "1.67" 115#define CIFS_VERSION "1.68"
116#endif /* _CIFSFS_H */ 116#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f6f24..f259e4d7612d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include "cifs_fs_sb.h" 26#include "cifs_fs_sb.h"
27#include "cifsacl.h" 27#include "cifsacl.h"
28#include <crypto/internal/hash.h>
29#include <linux/scatterlist.h>
30
28/* 31/*
29 * The sizes of various internal tables and strings 32 * The sizes of various internal tables and strings
30 */ 33 */
@@ -74,7 +77,7 @@
74 * CIFS vfs client Status information (based on what we know.) 77 * CIFS vfs client Status information (based on what we know.)
75 */ 78 */
76 79
77 /* associated with each tcp and smb session */ 80/* associated with each tcp and smb session */
78enum statusEnum { 81enum statusEnum {
79 CifsNew = 0, 82 CifsNew = 0,
80 CifsGood, 83 CifsGood,
@@ -99,14 +102,29 @@ enum protocolEnum {
99 102
100struct session_key { 103struct session_key {
101 unsigned int len; 104 unsigned int len;
102 union { 105 char *response;
103 char ntlm[CIFS_SESS_KEY_SIZE + 16]; 106};
104 char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */ 107
105 struct { 108/* crypto security descriptor definition */
106 char key[16]; 109struct sdesc {
107 struct ntlmv2_resp resp; 110 struct shash_desc shash;
108 } ntlmv2; 111 char ctx[];
109 } data; 112};
113
114/* crypto hashing related structure/fields, not specific to a sec mech */
115struct cifs_secmech {
116 struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
117 struct crypto_shash *md5; /* md5 hash function */
118 struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
119 struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
120};
121
122/* per smb session structure/fields */
123struct ntlmssp_auth {
124 __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
125 __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
126 unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
127 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
110}; 128};
111 129
112struct cifs_cred { 130struct cifs_cred {
@@ -179,12 +197,14 @@ struct TCP_Server_Info {
179 int capabilities; /* allow selective disabling of caps by smb sess */ 197 int capabilities; /* allow selective disabling of caps by smb sess */
180 int timeAdj; /* Adjust for difference in server time zone in sec */ 198 int timeAdj; /* Adjust for difference in server time zone in sec */
181 __u16 CurrentMid; /* multiplex id - rotating counter */ 199 __u16 CurrentMid; /* multiplex id - rotating counter */
200 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
182 /* 16th byte of RFC1001 workstation name is always null */ 201 /* 16th byte of RFC1001 workstation name is always null */
183 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 202 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
184 __u32 sequence_number; /* needed for CIFS PDU signature */ 203 __u32 sequence_number; /* needed for CIFS PDU signature */
185 struct session_key session_key; 204 struct session_key session_key;
186 unsigned long lstrp; /* when we got last response from this server */ 205 unsigned long lstrp; /* when we got last response from this server */
187 u16 dialect; /* dialect index that server chose */ 206 u16 dialect; /* dialect index that server chose */
207 struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
188 /* extended security flavors that server supports */ 208 /* extended security flavors that server supports */
189 bool sec_kerberos; /* supports plain Kerberos */ 209 bool sec_kerberos; /* supports plain Kerberos */
190 bool sec_mskerberos; /* supports legacy MS Kerberos */ 210 bool sec_mskerberos; /* supports legacy MS Kerberos */
@@ -222,11 +242,8 @@ struct cifsSesInfo {
222 char userName[MAX_USERNAME_SIZE + 1]; 242 char userName[MAX_USERNAME_SIZE + 1];
223 char *domainName; 243 char *domainName;
224 char *password; 244 char *password;
225 char cryptKey[CIFS_CRYPTO_KEY_SIZE];
226 struct session_key auth_key; 245 struct session_key auth_key;
227 char ntlmv2_hash[16]; 246 struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
228 unsigned int tilen; /* length of the target info blob */
229 unsigned char *tiblob; /* target info blob in challenge response */
230 bool need_reconnect:1; /* connection reset, uid now invalid */ 247 bool need_reconnect:1; /* connection reset, uid now invalid */
231}; 248};
232/* no more than one of the following three session flags may be set */ 249/* no more than one of the following three session flags may be set */
@@ -395,16 +412,19 @@ struct cifsFileInfo {
395 struct list_head llist; /* list of byte range locks we have. */ 412 struct list_head llist; /* list of byte range locks we have. */
396 bool invalidHandle:1; /* file closed via session abend */ 413 bool invalidHandle:1; /* file closed via session abend */
397 bool oplock_break_cancelled:1; 414 bool oplock_break_cancelled:1;
398 atomic_t count; /* reference count */ 415 int count; /* refcount protected by cifs_file_list_lock */
399 struct mutex fh_mutex; /* prevents reopen race after dead ses*/ 416 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
400 struct cifs_search_info srch_inf; 417 struct cifs_search_info srch_inf;
401 struct work_struct oplock_break; /* work for oplock breaks */ 418 struct work_struct oplock_break; /* work for oplock breaks */
402}; 419};
403 420
404/* Take a reference on the file private data */ 421/*
422 * Take a reference on the file private data. Must be called with
423 * cifs_file_list_lock held.
424 */
405static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) 425static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
406{ 426{
407 atomic_inc(&cifs_file->count); 427 ++cifs_file->count;
408} 428}
409 429
410void cifsFileInfo_put(struct cifsFileInfo *cifs_file); 430void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +437,6 @@ struct cifsInodeInfo {
417 struct list_head lockList; 437 struct list_head lockList;
418 /* BB add in lists for dirty pages i.e. write caching info for oplock */ 438 /* BB add in lists for dirty pages i.e. write caching info for oplock */
419 struct list_head openFileList; 439 struct list_head openFileList;
420 int write_behind_rc;
421 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 440 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
422 unsigned long time; /* jiffies of last update/check of inode */ 441 unsigned long time; /* jiffies of last update/check of inode */
423 bool clientCanCacheRead:1; /* read oplock */ 442 bool clientCanCacheRead:1; /* read oplock */
@@ -668,7 +687,7 @@ require use of the stronger protocol */
668 * GlobalMid_Lock protects: 687 * GlobalMid_Lock protects:
669 * list operations on pending_mid_q and oplockQ 688 * list operations on pending_mid_q and oplockQ
670 * updates to XID counters, multiplex id and SMB sequence numbers 689 * updates to XID counters, multiplex id and SMB sequence numbers
671 * GlobalSMBSesLock protects: 690 * cifs_file_list_lock protects:
672 * list operations on tcp and SMB session lists and tCon lists 691 * list operations on tcp and SMB session lists and tCon lists
673 * f_owner.lock protects certain per file struct operations 692 * f_owner.lock protects certain per file struct operations
674 * mapping->page_lock protects certain per page operations 693 * mapping->page_lock protects certain per page operations
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b5656d4c..de36b09763a8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
131#define CIFS_CRYPTO_KEY_SIZE (8) 131#define CIFS_CRYPTO_KEY_SIZE (8)
132 132
133/* 133/*
134 * Size of the ntlm client response
135 */
136#define CIFS_AUTH_RESP_SIZE (24)
137
138/*
134 * Size of the session key (crypto key encrypted with the password 139 * Size of the session key (crypto key encrypted with the password
135 */ 140 */
136#define CIFS_SESS_KEY_SIZE (24) 141#define CIFS_SESS_KEY_SIZE (16)
142
143#define CIFS_CLIENT_CHALLENGE_SIZE (8)
144#define CIFS_SERVER_CHALLENGE_SIZE (8)
145#define CIFS_HMAC_MD5_HASH_SIZE (16)
146#define CIFS_CPHTXT_SIZE (16)
147#define CIFS_NTHASH_SIZE (16)
137 148
138/* 149/*
139 * Maximum user name length 150 * Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40ba7ba..edb6d90efdf2 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -362,13 +362,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
362extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 362extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
363 __u32 *); 363 __u32 *);
364extern int cifs_verify_signature(struct smb_hdr *, 364extern int cifs_verify_signature(struct smb_hdr *,
365 const struct session_key *session_key, 365 struct TCP_Server_Info *server,
366 __u32 expected_sequence_number); 366 __u32 expected_sequence_number);
367extern int cifs_calculate_session_key(struct session_key *key, const char *rn, 367extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
368 const char *pass); 368extern int setup_ntlm_response(struct cifsSesInfo *);
369extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); 369extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
370extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 370extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
371 const struct nls_table *); 371extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
372extern int calc_seckey(struct cifsSesInfo *);
373
372#ifdef CONFIG_CIFS_WEAK_PW_HASH 374#ifdef CONFIG_CIFS_WEAK_PW_HASH
373extern void calc_lanman_hash(const char *password, const char *cryptkey, 375extern void calc_lanman_hash(const char *password, const char *cryptkey,
374 bool encrypt, char *lnm_session_key); 376 bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f317b15..2f2632b6df5a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -503,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
503 503
504 if (rsp->EncryptionKeyLength == 504 if (rsp->EncryptionKeyLength ==
505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { 505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
506 memcpy(ses->cryptKey, rsp->EncryptionKey, 506 memcpy(ses->server->cryptkey, rsp->EncryptionKey,
507 CIFS_CRYPTO_KEY_SIZE); 507 CIFS_CRYPTO_KEY_SIZE);
508 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 508 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
509 rc = -EIO; /* need cryptkey unless plain text */ 509 rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
574 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); 574 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
575 server->timeAdj *= 60; 575 server->timeAdj *= 60;
576 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 576 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
577 memcpy(ses->cryptKey, pSMBr->u.EncryptionKey, 577 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
578 CIFS_CRYPTO_KEY_SIZE); 578 CIFS_CRYPTO_KEY_SIZE);
579 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 579 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
580 && (pSMBr->EncryptionKeyLength == 0)) { 580 && (pSMBr->EncryptionKeyLength == 0)) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176acb58..9eb327defa1d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -175,6 +175,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
175 } 175 }
176 server->sequence_number = 0; 176 server->sequence_number = 0;
177 server->session_estab = false; 177 server->session_estab = false;
178 kfree(server->session_key.response);
179 server->session_key.response = NULL;
180 server->session_key.len = 0;
178 181
179 spin_lock(&GlobalMid_Lock); 182 spin_lock(&GlobalMid_Lock);
180 list_for_each(tmp, &server->pending_mid_q) { 183 list_for_each(tmp, &server->pending_mid_q) {
@@ -1064,7 +1067,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1064 } 1067 }
1065 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1068 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1066 value, strlen(value)); 1069 value, strlen(value));
1067 if (i < 0) { 1070 if (i == 0) {
1068 printk(KERN_WARNING "CIFS: Could not parse" 1071 printk(KERN_WARNING "CIFS: Could not parse"
1069 " srcaddr: %s\n", 1072 " srcaddr: %s\n",
1070 value); 1073 value);
@@ -1560,8 +1563,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1560 server->tcpStatus = CifsExiting; 1563 server->tcpStatus = CifsExiting;
1561 spin_unlock(&GlobalMid_Lock); 1564 spin_unlock(&GlobalMid_Lock);
1562 1565
1566 cifs_crypto_shash_release(server);
1563 cifs_fscache_release_client_cookie(server); 1567 cifs_fscache_release_client_cookie(server);
1564 1568
1569 kfree(server->session_key.response);
1570 server->session_key.response = NULL;
1571 server->session_key.len = 0;
1572
1565 task = xchg(&server->tsk, NULL); 1573 task = xchg(&server->tsk, NULL);
1566 if (task) 1574 if (task)
1567 force_sig(SIGKILL, task); 1575 force_sig(SIGKILL, task);
@@ -1614,10 +1622,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1614 goto out_err; 1622 goto out_err;
1615 } 1623 }
1616 1624
1625 rc = cifs_crypto_shash_allocate(tcp_ses);
1626 if (rc) {
1627 cERROR(1, "could not setup hash structures rc %d", rc);
1628 goto out_err;
1629 }
1630
1617 tcp_ses->hostname = extract_hostname(volume_info->UNC); 1631 tcp_ses->hostname = extract_hostname(volume_info->UNC);
1618 if (IS_ERR(tcp_ses->hostname)) { 1632 if (IS_ERR(tcp_ses->hostname)) {
1619 rc = PTR_ERR(tcp_ses->hostname); 1633 rc = PTR_ERR(tcp_ses->hostname);
1620 goto out_err; 1634 goto out_err_crypto_release;
1621 } 1635 }
1622 1636
1623 tcp_ses->noblocksnd = volume_info->noblocksnd; 1637 tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1661,7 +1675,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1661 } 1675 }
1662 if (rc < 0) { 1676 if (rc < 0) {
1663 cERROR(1, "Error connecting to socket. Aborting operation"); 1677 cERROR(1, "Error connecting to socket. Aborting operation");
1664 goto out_err; 1678 goto out_err_crypto_release;
1665 } 1679 }
1666 1680
1667 /* 1681 /*
@@ -1675,7 +1689,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1675 rc = PTR_ERR(tcp_ses->tsk); 1689 rc = PTR_ERR(tcp_ses->tsk);
1676 cERROR(1, "error %d create cifsd thread", rc); 1690 cERROR(1, "error %d create cifsd thread", rc);
1677 module_put(THIS_MODULE); 1691 module_put(THIS_MODULE);
1678 goto out_err; 1692 goto out_err_crypto_release;
1679 } 1693 }
1680 1694
1681 /* thread spawned, put it on the list */ 1695 /* thread spawned, put it on the list */
@@ -1687,6 +1701,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1687 1701
1688 return tcp_ses; 1702 return tcp_ses;
1689 1703
1704out_err_crypto_release:
1705 cifs_crypto_shash_release(tcp_ses);
1706
1690out_err: 1707out_err:
1691 if (tcp_ses) { 1708 if (tcp_ses) {
1692 if (!IS_ERR(tcp_ses->hostname)) 1709 if (!IS_ERR(tcp_ses->hostname))
@@ -1801,8 +1818,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1801 if (ses == NULL) 1818 if (ses == NULL)
1802 goto get_ses_fail; 1819 goto get_ses_fail;
1803 1820
1804 ses->tilen = 0;
1805 ses->tiblob = NULL;
1806 /* new SMB session uses our server ref */ 1821 /* new SMB session uses our server ref */
1807 ses->server = server; 1822 ses->server = server;
1808 if (server->addr.sockAddr6.sin6_family == AF_INET6) 1823 if (server->addr.sockAddr6.sin6_family == AF_INET6)
@@ -1823,10 +1838,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1823 goto get_ses_fail; 1838 goto get_ses_fail;
1824 } 1839 }
1825 if (volume_info->domainname) { 1840 if (volume_info->domainname) {
1826 int len = strlen(volume_info->domainname); 1841 ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
1827 ses->domainName = kmalloc(len + 1, GFP_KERNEL); 1842 if (!ses->domainName)
1828 if (ses->domainName) 1843 goto get_ses_fail;
1829 strcpy(ses->domainName, volume_info->domainname);
1830 } 1844 }
1831 ses->cred_uid = volume_info->cred_uid; 1845 ses->cred_uid = volume_info->cred_uid;
1832 ses->linux_uid = volume_info->linux_uid; 1846 ses->linux_uid = volume_info->linux_uid;
@@ -2985,13 +2999,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2985#ifdef CONFIG_CIFS_WEAK_PW_HASH 2999#ifdef CONFIG_CIFS_WEAK_PW_HASH
2986 if ((global_secflags & CIFSSEC_MAY_LANMAN) && 3000 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
2987 (ses->server->secType == LANMAN)) 3001 (ses->server->secType == LANMAN))
2988 calc_lanman_hash(tcon->password, ses->cryptKey, 3002 calc_lanman_hash(tcon->password, ses->server->cryptkey,
2989 ses->server->secMode & 3003 ses->server->secMode &
2990 SECMODE_PW_ENCRYPT ? true : false, 3004 SECMODE_PW_ENCRYPT ? true : false,
2991 bcc_ptr); 3005 bcc_ptr);
2992 else 3006 else
2993#endif /* CIFS_WEAK_PW_HASH */ 3007#endif /* CIFS_WEAK_PW_HASH */
2994 SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr); 3008 SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
2995 3009
2996 bcc_ptr += CIFS_SESS_KEY_SIZE; 3010 bcc_ptr += CIFS_SESS_KEY_SIZE;
2997 if (ses->capabilities & CAP_UNICODE) { 3011 if (ses->capabilities & CAP_UNICODE) {
@@ -3178,10 +3192,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3178 } else { 3192 } else {
3179 mutex_lock(&ses->server->srv_mutex); 3193 mutex_lock(&ses->server->srv_mutex);
3180 if (!server->session_estab) { 3194 if (!server->session_estab) {
3181 memcpy(&server->session_key.data, 3195 server->session_key.response = ses->auth_key.response;
3182 &ses->auth_key.data, ses->auth_key.len);
3183 server->session_key.len = ses->auth_key.len; 3196 server->session_key.len = ses->auth_key.len;
3184 ses->server->session_estab = true; 3197 server->sequence_number = 0x2;
3198 server->session_estab = true;
3199 ses->auth_key.response = NULL;
3185 } 3200 }
3186 mutex_unlock(&server->srv_mutex); 3201 mutex_unlock(&server->srv_mutex);
3187 3202
@@ -3192,6 +3207,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3192 spin_unlock(&GlobalMid_Lock); 3207 spin_unlock(&GlobalMid_Lock);
3193 } 3208 }
3194 3209
3210 kfree(ses->auth_key.response);
3211 ses->auth_key.response = NULL;
3212 ses->auth_key.len = 0;
3213 kfree(ses->ntlmssp);
3214 ses->ntlmssp = NULL;
3215
3195 return rc; 3216 return rc;
3196} 3217}
3197 3218
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c81e7b14d53..ae82159cf7fa 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -131,8 +131,7 @@ static inline int cifs_open_inode_helper(struct inode *inode,
131 /* BB no need to lock inode until after invalidate 131 /* BB no need to lock inode until after invalidate
132 since namei code should already have it locked? */ 132 since namei code should already have it locked? */
133 rc = filemap_write_and_wait(inode->i_mapping); 133 rc = filemap_write_and_wait(inode->i_mapping);
134 if (rc != 0) 134 mapping_set_error(inode->i_mapping, rc);
135 pCifsInode->write_behind_rc = rc;
136 } 135 }
137 cFYI(1, "invalidating remote inode since open detected it " 136 cFYI(1, "invalidating remote inode since open detected it "
138 "changed"); 137 "changed");
@@ -232,6 +231,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
232 if (pCifsFile == NULL) 231 if (pCifsFile == NULL)
233 return pCifsFile; 232 return pCifsFile;
234 233
234 pCifsFile->count = 1;
235 pCifsFile->netfid = fileHandle; 235 pCifsFile->netfid = fileHandle;
236 pCifsFile->pid = current->tgid; 236 pCifsFile->pid = current->tgid;
237 pCifsFile->uid = current_fsuid(); 237 pCifsFile->uid = current_fsuid();
@@ -242,7 +242,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
242 mutex_init(&pCifsFile->fh_mutex); 242 mutex_init(&pCifsFile->fh_mutex);
243 mutex_init(&pCifsFile->lock_mutex); 243 mutex_init(&pCifsFile->lock_mutex);
244 INIT_LIST_HEAD(&pCifsFile->llist); 244 INIT_LIST_HEAD(&pCifsFile->llist);
245 atomic_set(&pCifsFile->count, 1);
246 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); 245 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
247 246
248 spin_lock(&cifs_file_list_lock); 247 spin_lock(&cifs_file_list_lock);
@@ -267,7 +266,8 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
267 266
268/* 267/*
269 * Release a reference on the file private data. This may involve closing 268 * Release a reference on the file private data. This may involve closing
270 * the filehandle out on the server. 269 * the filehandle out on the server. Must be called without holding
270 * cifs_file_list_lock.
271 */ 271 */
272void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 272void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
273{ 273{
@@ -276,7 +276,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
276 struct cifsLockInfo *li, *tmp; 276 struct cifsLockInfo *li, *tmp;
277 277
278 spin_lock(&cifs_file_list_lock); 278 spin_lock(&cifs_file_list_lock);
279 if (!atomic_dec_and_test(&cifs_file->count)) { 279 if (--cifs_file->count > 0) {
280 spin_unlock(&cifs_file_list_lock); 280 spin_unlock(&cifs_file_list_lock);
281 return; 281 return;
282 } 282 }
@@ -605,8 +605,7 @@ reopen_success:
605 605
606 if (can_flush) { 606 if (can_flush) {
607 rc = filemap_write_and_wait(inode->i_mapping); 607 rc = filemap_write_and_wait(inode->i_mapping);
608 if (rc != 0) 608 mapping_set_error(inode->i_mapping, rc);
609 CIFS_I(inode)->write_behind_rc = rc;
610 609
611 pCifsInode->clientCanCacheAll = false; 610 pCifsInode->clientCanCacheAll = false;
612 pCifsInode->clientCanCacheRead = false; 611 pCifsInode->clientCanCacheRead = false;
@@ -1303,7 +1302,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1303static int cifs_writepages(struct address_space *mapping, 1302static int cifs_writepages(struct address_space *mapping,
1304 struct writeback_control *wbc) 1303 struct writeback_control *wbc)
1305{ 1304{
1306 struct backing_dev_info *bdi = mapping->backing_dev_info;
1307 unsigned int bytes_to_write; 1305 unsigned int bytes_to_write;
1308 unsigned int bytes_written; 1306 unsigned int bytes_written;
1309 struct cifs_sb_info *cifs_sb; 1307 struct cifs_sb_info *cifs_sb;
@@ -1326,15 +1324,6 @@ static int cifs_writepages(struct address_space *mapping,
1326 int scanned = 0; 1324 int scanned = 0;
1327 int xid, long_op; 1325 int xid, long_op;
1328 1326
1329 /*
1330 * BB: Is this meaningful for a non-block-device file system?
1331 * If it is, we should test it again after we do I/O
1332 */
1333 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1334 wbc->encountered_congestion = 1;
1335 return 0;
1336 }
1337
1338 cifs_sb = CIFS_SB(mapping->host->i_sb); 1327 cifs_sb = CIFS_SB(mapping->host->i_sb);
1339 1328
1340 /* 1329 /*
@@ -1363,6 +1352,7 @@ static int cifs_writepages(struct address_space *mapping,
1363 if (!experimEnabled && tcon->ses->server->secMode & 1352 if (!experimEnabled && tcon->ses->server->secMode &
1364 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 1353 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1365 cifsFileInfo_put(open_file); 1354 cifsFileInfo_put(open_file);
1355 kfree(iov);
1366 return generic_writepages(mapping, wbc); 1356 return generic_writepages(mapping, wbc);
1367 } 1357 }
1368 cifsFileInfo_put(open_file); 1358 cifsFileInfo_put(open_file);
@@ -1488,12 +1478,7 @@ retry:
1488 if (rc || bytes_written < bytes_to_write) { 1478 if (rc || bytes_written < bytes_to_write) {
1489 cERROR(1, "Write2 ret %d, wrote %d", 1479 cERROR(1, "Write2 ret %d, wrote %d",
1490 rc, bytes_written); 1480 rc, bytes_written);
1491 /* BB what if continued retry is 1481 mapping_set_error(mapping, rc);
1492 requested via mount flags? */
1493 if (rc == -ENOSPC)
1494 set_bit(AS_ENOSPC, &mapping->flags);
1495 else
1496 set_bit(AS_EIO, &mapping->flags);
1497 } else { 1482 } else {
1498 cifs_stats_bytes_written(tcon, bytes_written); 1483 cifs_stats_bytes_written(tcon, bytes_written);
1499 } 1484 }
@@ -1638,11 +1623,10 @@ int cifs_fsync(struct file *file, int datasync)
1638 1623
1639 rc = filemap_write_and_wait(inode->i_mapping); 1624 rc = filemap_write_and_wait(inode->i_mapping);
1640 if (rc == 0) { 1625 if (rc == 0) {
1641 rc = CIFS_I(inode)->write_behind_rc; 1626 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1642 CIFS_I(inode)->write_behind_rc = 0; 1627
1643 tcon = tlink_tcon(smbfile->tlink); 1628 tcon = tlink_tcon(smbfile->tlink);
1644 if (!rc && tcon && smbfile && 1629 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1645 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1646 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1630 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1647 } 1631 }
1648 1632
@@ -1687,21 +1671,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
1687 struct inode *inode = file->f_path.dentry->d_inode; 1671 struct inode *inode = file->f_path.dentry->d_inode;
1688 int rc = 0; 1672 int rc = 0;
1689 1673
1690 /* Rather than do the steps manually: 1674 if (file->f_mode & FMODE_WRITE)
1691 lock the inode for writing 1675 rc = filemap_write_and_wait(inode->i_mapping);
1692 loop through pages looking for write behind data (dirty pages)
1693 coalesce into contiguous 16K (or smaller) chunks to write to server
1694 send to server (prefer in parallel)
1695 deal with writebehind errors
1696 unlock inode for writing
1697 filemapfdatawrite appears easier for the time being */
1698
1699 rc = filemap_fdatawrite(inode->i_mapping);
1700 /* reset wb rc if we were able to write out dirty pages */
1701 if (!rc) {
1702 rc = CIFS_I(inode)->write_behind_rc;
1703 CIFS_I(inode)->write_behind_rc = 0;
1704 }
1705 1676
1706 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc); 1677 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1707 1678
@@ -2280,7 +2251,7 @@ void cifs_oplock_break(struct work_struct *work)
2280 oplock_break); 2251 oplock_break);
2281 struct inode *inode = cfile->dentry->d_inode; 2252 struct inode *inode = cfile->dentry->d_inode;
2282 struct cifsInodeInfo *cinode = CIFS_I(inode); 2253 struct cifsInodeInfo *cinode = CIFS_I(inode);
2283 int rc, waitrc = 0; 2254 int rc = 0;
2284 2255
2285 if (inode && S_ISREG(inode->i_mode)) { 2256 if (inode && S_ISREG(inode->i_mode)) {
2286 if (cinode->clientCanCacheRead) 2257 if (cinode->clientCanCacheRead)
@@ -2289,13 +2260,10 @@ void cifs_oplock_break(struct work_struct *work)
2289 break_lease(inode, O_WRONLY); 2260 break_lease(inode, O_WRONLY);
2290 rc = filemap_fdatawrite(inode->i_mapping); 2261 rc = filemap_fdatawrite(inode->i_mapping);
2291 if (cinode->clientCanCacheRead == 0) { 2262 if (cinode->clientCanCacheRead == 0) {
2292 waitrc = filemap_fdatawait(inode->i_mapping); 2263 rc = filemap_fdatawait(inode->i_mapping);
2264 mapping_set_error(inode->i_mapping, rc);
2293 invalidate_remote_inode(inode); 2265 invalidate_remote_inode(inode);
2294 } 2266 }
2295 if (!rc)
2296 rc = waitrc;
2297 if (rc)
2298 cinode->write_behind_rc = rc;
2299 cFYI(1, "Oplock flush inode %p rc %d", inode, rc); 2267 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2300 } 2268 }
2301 2269
@@ -2314,7 +2282,7 @@ void cifs_oplock_break(struct work_struct *work)
2314 /* 2282 /*
2315 * We might have kicked in before is_valid_oplock_break() 2283 * We might have kicked in before is_valid_oplock_break()
2316 * finished grabbing reference for us. Make sure it's done by 2284 * finished grabbing reference for us. Make sure it's done by
2317 * waiting for GlobalSMSSeslock. 2285 * waiting for cifs_file_list_lock.
2318 */ 2286 */
2319 spin_lock(&cifs_file_list_lock); 2287 spin_lock(&cifs_file_list_lock);
2320 spin_unlock(&cifs_file_list_lock); 2288 spin_unlock(&cifs_file_list_lock);
@@ -2322,6 +2290,7 @@ void cifs_oplock_break(struct work_struct *work)
2322 cifs_oplock_break_put(cfile); 2290 cifs_oplock_break_put(cfile);
2323} 2291}
2324 2292
2293/* must be called while holding cifs_file_list_lock */
2325void cifs_oplock_break_get(struct cifsFileInfo *cfile) 2294void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2326{ 2295{
2327 cifs_sb_active(cfile->dentry->d_sb); 2296 cifs_sb_active(cfile->dentry->d_sb);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 94979309698a..39869c3c3efb 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1682,8 +1682,7 @@ cifs_invalidate_mapping(struct inode *inode)
1682 /* write back any cached data */ 1682 /* write back any cached data */
1683 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1683 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1684 rc = filemap_write_and_wait(inode->i_mapping); 1684 rc = filemap_write_and_wait(inode->i_mapping);
1685 if (rc) 1685 mapping_set_error(inode->i_mapping, rc);
1686 cifs_i->write_behind_rc = rc;
1687 } 1686 }
1688 invalidate_remote_inode(inode); 1687 invalidate_remote_inode(inode);
1689 cifs_fscache_reset_inode_cookie(inode); 1688 cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1942,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1943 * the flush returns error? 1942 * the flush returns error?
1944 */ 1943 */
1945 rc = filemap_write_and_wait(inode->i_mapping); 1944 rc = filemap_write_and_wait(inode->i_mapping);
1946 if (rc != 0) { 1945 mapping_set_error(inode->i_mapping, rc);
1947 cifsInode->write_behind_rc = rc; 1946 rc = 0;
1948 rc = 0;
1949 }
1950 1947
1951 if (attrs->ia_valid & ATTR_SIZE) { 1948 if (attrs->ia_valid & ATTR_SIZE) {
1952 rc = cifs_set_file_size(inode, attrs, xid, full_path); 1949 rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2084,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2087 * the flush returns error? 2084 * the flush returns error?
2088 */ 2085 */
2089 rc = filemap_write_and_wait(inode->i_mapping); 2086 rc = filemap_write_and_wait(inode->i_mapping);
2090 if (rc != 0) { 2087 mapping_set_error(inode->i_mapping, rc);
2091 cifsInode->write_behind_rc = rc; 2088 rc = 0;
2092 rc = 0;
2093 }
2094 2089
2095 if (attrs->ia_valid & ATTR_SIZE) { 2090 if (attrs->ia_valid & ATTR_SIZE) {
2096 rc = cifs_set_file_size(inode, attrs, xid, full_path); 2091 rc = cifs_set_file_size(inode, attrs, xid, full_path);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6a6803..c4e296fe3518 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -577,7 +577,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
577 * cifs_oplock_break_put() can't be called 577 * cifs_oplock_break_put() can't be called
578 * from here. Get reference after queueing 578 * from here. Get reference after queueing
579 * succeeded. cifs_oplock_break() will 579 * succeeded. cifs_oplock_break() will
580 * synchronize using GlobalSMSSeslock. 580 * synchronize using cifs_file_list_lock.
581 */ 581 */
582 if (queue_work(system_nrt_wq, 582 if (queue_work(system_nrt_wq,
583 &netfile->oplock_break)) 583 &netfile->oplock_break))
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd96592..7b01d3f6eed6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include "cifs_spnego.h" 33#include "cifs_spnego.h"
34 34
35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
36 unsigned char *p24);
37
38/* 35/*
39 * Checks if this is the first smb session to be reconnected after 36 * Checks if this is the first smb session to be reconnected after
40 * the socket has been reestablished (so we know whether to use vc 0). 37 * the socket has been reestablished (so we know whether to use vc 0).
@@ -402,23 +399,22 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
402 return -EINVAL; 399 return -EINVAL;
403 } 400 }
404 401
405 memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); 402 memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
406 /* BB we could decode pblob->NegotiateFlags; some may be useful */ 403 /* BB we could decode pblob->NegotiateFlags; some may be useful */
407 /* In particular we can examine sign flags */ 404 /* In particular we can examine sign flags */
408 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
409 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 406 we must set the MIC field of the AUTHENTICATE_MESSAGE */
410 407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
411 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
412 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
413 ses->tilen = tilen; 410 if (tilen) {
414 if (ses->tilen) { 411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
415 ses->tiblob = kmalloc(tilen, GFP_KERNEL); 412 if (!ses->auth_key.response) {
416 if (!ses->tiblob) {
417 cERROR(1, "Challenge target info allocation failure"); 413 cERROR(1, "Challenge target info allocation failure");
418 ses->tilen = 0;
419 return -ENOMEM; 414 return -ENOMEM;
420 } 415 }
421 memcpy(ses->tiblob, bcc_ptr + tioffset, ses->tilen); 416 memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
417 ses->auth_key.len = tilen;
422 } 418 }
423 419
424 return 0; 420 return 0;
@@ -443,10 +439,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
443 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
444 NTLMSSP_NEGOTIATE_NTLM; 440 NTLMSSP_NEGOTIATE_NTLM;
445 if (ses->server->secMode & 441 if (ses->server->secMode &
446 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
447 flags |= NTLMSSP_NEGOTIATE_SIGN; 443 flags |= NTLMSSP_NEGOTIATE_SIGN;
448 if (ses->server->secMode & SECMODE_SIGN_REQUIRED) 444 if (!ses->server->session_estab)
449 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 445 flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
446 NTLMSSP_NEGOTIATE_EXTENDED_SEC;
447 }
450 448
451 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 449 sec_blob->NegotiateFlags |= cpu_to_le32(flags);
452 450
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
469 const struct nls_table *nls_cp) 467 const struct nls_table *nls_cp)
470{ 468{
471 int rc; 469 int rc;
472 unsigned int size;
473 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; 470 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
474 __u32 flags; 471 __u32 flags;
475 unsigned char *tmp; 472 unsigned char *tmp;
476 struct ntlmv2_resp ntlmv2_response = {};
477 473
478 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); 474 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
479 sec_blob->MessageType = NtLmAuthenticate; 475 sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
497 sec_blob->LmChallengeResponse.MaximumLength = 0; 493 sec_blob->LmChallengeResponse.MaximumLength = 0;
498 494
499 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); 495 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
500 rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); 496 rc = setup_ntlmv2_rsp(ses, nls_cp);
501 if (rc) { 497 if (rc) {
502 cERROR(1, "Error %d during NTLMSSP authentication", rc); 498 cERROR(1, "Error %d during NTLMSSP authentication", rc);
503 goto setup_ntlmv2_ret; 499 goto setup_ntlmv2_ret;
504 } 500 }
505 size = sizeof(struct ntlmv2_resp); 501 memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
506 memcpy(tmp, (char *)&ntlmv2_response, size); 502 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
507 tmp += size; 503 tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
508 if (ses->tilen > 0) {
509 memcpy(tmp, ses->tiblob, ses->tilen);
510 tmp += ses->tilen;
511 }
512 504
513 sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen); 505 sec_blob->NtChallengeResponse.Length =
506 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
514 sec_blob->NtChallengeResponse.MaximumLength = 507 sec_blob->NtChallengeResponse.MaximumLength =
515 cpu_to_le16(size + ses->tilen); 508 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
516 kfree(ses->tiblob);
517 ses->tiblob = NULL;
518 ses->tilen = 0;
519 509
520 if (ses->domainName == NULL) { 510 if (ses->domainName == NULL) {
521 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); 511 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,9 +544,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
554 sec_blob->WorkstationName.MaximumLength = 0; 544 sec_blob->WorkstationName.MaximumLength = 0;
555 tmp += 2; 545 tmp += 2;
556 546
557 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); 547 if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
558 sec_blob->SessionKey.Length = 0; 548 !calc_seckey(ses)) {
559 sec_blob->SessionKey.MaximumLength = 0; 549 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
550 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
551 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
552 sec_blob->SessionKey.MaximumLength =
553 cpu_to_le16(CIFS_CPHTXT_SIZE);
554 tmp += CIFS_CPHTXT_SIZE;
555 } else {
556 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
557 sec_blob->SessionKey.Length = 0;
558 sec_blob->SessionKey.MaximumLength = 0;
559 }
560 560
561setup_ntlmv2_ret: 561setup_ntlmv2_ret:
562 *buflen = tmp - pbuffer; 562 *buflen = tmp - pbuffer;
@@ -600,8 +600,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
600 return -EINVAL; 600 return -EINVAL;
601 601
602 type = ses->server->secType; 602 type = ses->server->secType;
603
604 cFYI(1, "sess setup type %d", type); 603 cFYI(1, "sess setup type %d", type);
604 if (type == RawNTLMSSP) {
605 /* if memory allocation is successful, caller of this function
606 * frees it.
607 */
608 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
609 if (!ses->ntlmssp)
610 return -ENOMEM;
611 }
612
605ssetup_ntlmssp_authenticate: 613ssetup_ntlmssp_authenticate:
606 if (phase == NtLmChallenge) 614 if (phase == NtLmChallenge)
607 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 615 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +674,14 @@ ssetup_ntlmssp_authenticate:
666 /* no capabilities flags in old lanman negotiation */ 674 /* no capabilities flags in old lanman negotiation */
667 675
668 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 676 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
669 /* BB calculate hash with password */
670 /* and copy into bcc */
671 677
672 calc_lanman_hash(ses->password, ses->cryptKey, 678 /* Calculate hash with password and copy into bcc_ptr.
679 * Encryption Key (stored as in cryptkey) gets used if the
680 * security mode bit in Negottiate Protocol response states
681 * to use challenge/response method (i.e. Password bit is 1).
682 */
683
684 calc_lanman_hash(ses->password, ses->server->cryptkey,
673 ses->server->secMode & SECMODE_PW_ENCRYPT ? 685 ses->server->secMode & SECMODE_PW_ENCRYPT ?
674 true : false, lnm_session_key); 686 true : false, lnm_session_key);
675 687
@@ -687,24 +699,27 @@ ssetup_ntlmssp_authenticate:
687 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 699 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
688#endif 700#endif
689 } else if (type == NTLM) { 701 } else if (type == NTLM) {
690 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
691
692 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 702 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
693 pSMB->req_no_secext.CaseInsensitivePasswordLength = 703 pSMB->req_no_secext.CaseInsensitivePasswordLength =
694 cpu_to_le16(CIFS_SESS_KEY_SIZE); 704 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
695 pSMB->req_no_secext.CaseSensitivePasswordLength = 705 pSMB->req_no_secext.CaseSensitivePasswordLength =
696 cpu_to_le16(CIFS_SESS_KEY_SIZE); 706 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
707
708 /* calculate ntlm response and session key */
709 rc = setup_ntlm_response(ses);
710 if (rc) {
711 cERROR(1, "Error %d during NTLM authentication", rc);
712 goto ssetup_exit;
713 }
697 714
698 /* calculate session key */ 715 /* copy ntlm response */
699 SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key); 716 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
717 CIFS_AUTH_RESP_SIZE);
718 bcc_ptr += CIFS_AUTH_RESP_SIZE;
719 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
720 CIFS_AUTH_RESP_SIZE);
721 bcc_ptr += CIFS_AUTH_RESP_SIZE;
700 722
701 cifs_calculate_session_key(&ses->auth_key,
702 ntlm_session_key, ses->password);
703 /* copy session key */
704 memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
705 bcc_ptr += CIFS_SESS_KEY_SIZE;
706 memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
707 bcc_ptr += CIFS_SESS_KEY_SIZE;
708 if (ses->capabilities & CAP_UNICODE) { 723 if (ses->capabilities & CAP_UNICODE) {
709 /* unicode strings must be word aligned */ 724 /* unicode strings must be word aligned */
710 if (iov[0].iov_len % 2) { 725 if (iov[0].iov_len % 2) {
@@ -715,47 +730,26 @@ ssetup_ntlmssp_authenticate:
715 } else 730 } else
716 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 731 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
717 } else if (type == NTLMv2) { 732 } else if (type == NTLMv2) {
718 char *v2_sess_key =
719 kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
720
721 /* BB FIXME change all users of v2_sess_key to
722 struct ntlmv2_resp */
723
724 if (v2_sess_key == NULL) {
725 rc = -ENOMEM;
726 goto ssetup_exit;
727 }
728
729 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 733 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
730 734
731 /* LM2 password would be here if we supported it */ 735 /* LM2 password would be here if we supported it */
732 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; 736 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
733 /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
734 737
735 /* calculate session key */ 738 /* calculate nlmv2 response and session key */
736 rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 739 rc = setup_ntlmv2_rsp(ses, nls_cp);
737 if (rc) { 740 if (rc) {
738 cERROR(1, "Error %d during NTLMv2 authentication", rc); 741 cERROR(1, "Error %d during NTLMv2 authentication", rc);
739 kfree(v2_sess_key);
740 goto ssetup_exit; 742 goto ssetup_exit;
741 } 743 }
742 memcpy(bcc_ptr, (char *)v2_sess_key, 744 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
743 sizeof(struct ntlmv2_resp)); 745 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
744 bcc_ptr += sizeof(struct ntlmv2_resp); 746 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
745 kfree(v2_sess_key); 747
746 /* set case sensitive password length after tilen may get 748 /* set case sensitive password length after tilen may get
747 * assigned, tilen is 0 otherwise. 749 * assigned, tilen is 0 otherwise.
748 */ 750 */
749 pSMB->req_no_secext.CaseSensitivePasswordLength = 751 pSMB->req_no_secext.CaseSensitivePasswordLength =
750 cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen); 752 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
751 if (ses->tilen > 0) {
752 memcpy(bcc_ptr, ses->tiblob, ses->tilen);
753 bcc_ptr += ses->tilen;
754 /* we never did allocate ses->domainName to free */
755 kfree(ses->tiblob);
756 ses->tiblob = NULL;
757 ses->tilen = 0;
758 }
759 753
760 if (ses->capabilities & CAP_UNICODE) { 754 if (ses->capabilities & CAP_UNICODE) {
761 if (iov[0].iov_len % 2) { 755 if (iov[0].iov_len % 2) {
@@ -768,6 +762,7 @@ ssetup_ntlmssp_authenticate:
768 } else if (type == Kerberos) { 762 } else if (type == Kerberos) {
769#ifdef CONFIG_CIFS_UPCALL 763#ifdef CONFIG_CIFS_UPCALL
770 struct cifs_spnego_msg *msg; 764 struct cifs_spnego_msg *msg;
765
771 spnego_key = cifs_get_spnego_key(ses); 766 spnego_key = cifs_get_spnego_key(ses);
772 if (IS_ERR(spnego_key)) { 767 if (IS_ERR(spnego_key)) {
773 rc = PTR_ERR(spnego_key); 768 rc = PTR_ERR(spnego_key);
@@ -785,16 +780,17 @@ ssetup_ntlmssp_authenticate:
785 rc = -EKEYREJECTED; 780 rc = -EKEYREJECTED;
786 goto ssetup_exit; 781 goto ssetup_exit;
787 } 782 }
788 /* bail out if key is too long */ 783
789 if (msg->sesskey_len > 784 ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
790 sizeof(ses->auth_key.data.krb5)) { 785 if (!ses->auth_key.response) {
791 cERROR(1, "Kerberos signing key too long (%u bytes)", 786 cERROR(1, "Kerberos can't allocate (%u bytes) memory",
792 msg->sesskey_len); 787 msg->sesskey_len);
793 rc = -EOVERFLOW; 788 rc = -ENOMEM;
794 goto ssetup_exit; 789 goto ssetup_exit;
795 } 790 }
791 memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
796 ses->auth_key.len = msg->sesskey_len; 792 ses->auth_key.len = msg->sesskey_len;
797 memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len); 793
798 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 794 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
799 capabilities |= CAP_EXTENDED_SECURITY; 795 capabilities |= CAP_EXTENDED_SECURITY;
800 pSMB->req.Capabilities = cpu_to_le32(capabilities); 796 pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -897,8 +893,6 @@ ssetup_ntlmssp_authenticate:
897 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); 893 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
898 /* SMB request buf freed in SendReceive2 */ 894 /* SMB request buf freed in SendReceive2 */
899 895
900 cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
901
902 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 896 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
903 smb_buf = (struct smb_hdr *)iov[0].iov_base; 897 smb_buf = (struct smb_hdr *)iov[0].iov_base;
904 898
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91eb6eb4..e0588cdf4cc5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
544 SECMODE_SIGN_ENABLED))) { 544 SECMODE_SIGN_ENABLED))) {
545 rc = cifs_verify_signature(midQ->resp_buf, 545 rc = cifs_verify_signature(midQ->resp_buf,
546 &ses->server->session_key, 546 ses->server,
547 midQ->sequence_number+1); 547 midQ->sequence_number+1);
548 if (rc) { 548 if (rc) {
549 cERROR(1, "Unexpected SMB signature"); 549 cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
732 SECMODE_SIGN_ENABLED))) { 732 SECMODE_SIGN_ENABLED))) {
733 rc = cifs_verify_signature(out_buf, 733 rc = cifs_verify_signature(out_buf,
734 &ses->server->session_key, 734 ses->server,
735 midQ->sequence_number+1); 735 midQ->sequence_number+1);
736 if (rc) { 736 if (rc) {
737 cERROR(1, "Unexpected SMB signature"); 737 cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
982 SECMODE_SIGN_ENABLED))) { 982 SECMODE_SIGN_ENABLED))) {
983 rc = cifs_verify_signature(out_buf, 983 rc = cifs_verify_signature(out_buf,
984 &ses->server->session_key, 984 ses->server,
985 midQ->sequence_number+1); 985 midQ->sequence_number+1);
986 if (rc) { 986 if (rc) {
987 cERROR(1, "Unexpected SMB signature"); 987 cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index a5bf5771a22a..9060f08e70cf 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -17,6 +17,7 @@
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/spinlock.h>
20 21
21#include <linux/coda.h> 22#include <linux/coda.h>
22#include <linux/coda_linux.h> 23#include <linux/coda_linux.h>
@@ -31,19 +32,23 @@ void coda_cache_enter(struct inode *inode, int mask)
31{ 32{
32 struct coda_inode_info *cii = ITOC(inode); 33 struct coda_inode_info *cii = ITOC(inode);
33 34
35 spin_lock(&cii->c_lock);
34 cii->c_cached_epoch = atomic_read(&permission_epoch); 36 cii->c_cached_epoch = atomic_read(&permission_epoch);
35 if (cii->c_uid != current_fsuid()) { 37 if (cii->c_uid != current_fsuid()) {
36 cii->c_uid = current_fsuid(); 38 cii->c_uid = current_fsuid();
37 cii->c_cached_perm = mask; 39 cii->c_cached_perm = mask;
38 } else 40 } else
39 cii->c_cached_perm |= mask; 41 cii->c_cached_perm |= mask;
42 spin_unlock(&cii->c_lock);
40} 43}
41 44
42/* remove cached acl from an inode */ 45/* remove cached acl from an inode */
43void coda_cache_clear_inode(struct inode *inode) 46void coda_cache_clear_inode(struct inode *inode)
44{ 47{
45 struct coda_inode_info *cii = ITOC(inode); 48 struct coda_inode_info *cii = ITOC(inode);
49 spin_lock(&cii->c_lock);
46 cii->c_cached_epoch = atomic_read(&permission_epoch) - 1; 50 cii->c_cached_epoch = atomic_read(&permission_epoch) - 1;
51 spin_unlock(&cii->c_lock);
47} 52}
48 53
49/* remove all acl caches */ 54/* remove all acl caches */
@@ -57,13 +62,15 @@ void coda_cache_clear_all(struct super_block *sb)
57int coda_cache_check(struct inode *inode, int mask) 62int coda_cache_check(struct inode *inode, int mask)
58{ 63{
59 struct coda_inode_info *cii = ITOC(inode); 64 struct coda_inode_info *cii = ITOC(inode);
60 int hit; 65 int hit;
61 66
62 hit = (mask & cii->c_cached_perm) == mask && 67 spin_lock(&cii->c_lock);
63 cii->c_uid == current_fsuid() && 68 hit = (mask & cii->c_cached_perm) == mask &&
64 cii->c_cached_epoch == atomic_read(&permission_epoch); 69 cii->c_uid == current_fsuid() &&
70 cii->c_cached_epoch == atomic_read(&permission_epoch);
71 spin_unlock(&cii->c_lock);
65 72
66 return hit; 73 return hit;
67} 74}
68 75
69 76
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index a7a780929eec..602240569c89 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -45,13 +45,15 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr)
45static int coda_test_inode(struct inode *inode, void *data) 45static int coda_test_inode(struct inode *inode, void *data)
46{ 46{
47 struct CodaFid *fid = (struct CodaFid *)data; 47 struct CodaFid *fid = (struct CodaFid *)data;
48 return coda_fideq(&(ITOC(inode)->c_fid), fid); 48 struct coda_inode_info *cii = ITOC(inode);
49 return coda_fideq(&cii->c_fid, fid);
49} 50}
50 51
51static int coda_set_inode(struct inode *inode, void *data) 52static int coda_set_inode(struct inode *inode, void *data)
52{ 53{
53 struct CodaFid *fid = (struct CodaFid *)data; 54 struct CodaFid *fid = (struct CodaFid *)data;
54 ITOC(inode)->c_fid = *fid; 55 struct coda_inode_info *cii = ITOC(inode);
56 cii->c_fid = *fid;
55 return 0; 57 return 0;
56} 58}
57 59
@@ -71,6 +73,7 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid,
71 cii = ITOC(inode); 73 cii = ITOC(inode);
72 /* we still need to set i_ino for things like stat(2) */ 74 /* we still need to set i_ino for things like stat(2) */
73 inode->i_ino = hash; 75 inode->i_ino = hash;
76 /* inode is locked and unique, no need to grab cii->c_lock */
74 cii->c_mapcount = 0; 77 cii->c_mapcount = 0;
75 unlock_new_inode(inode); 78 unlock_new_inode(inode);
76 } 79 }
@@ -107,14 +110,20 @@ int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_bloc
107} 110}
108 111
109 112
113/* Although we treat Coda file identifiers as immutable, there is one
114 * special case for files created during a disconnection where they may
115 * not be globally unique. When an identifier collision is detected we
116 * first try to flush the cached inode from the kernel and finally
117 * resort to renaming/rehashing in-place. Userspace remembers both old
118 * and new values of the identifier to handle any in-flight upcalls.
119 * The real solution is to use globally unique UUIDs as identifiers, but
120 * retrofitting the existing userspace code for this is non-trivial. */
110void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid, 121void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid,
111 struct CodaFid *newfid) 122 struct CodaFid *newfid)
112{ 123{
113 struct coda_inode_info *cii; 124 struct coda_inode_info *cii = ITOC(inode);
114 unsigned long hash = coda_f2i(newfid); 125 unsigned long hash = coda_f2i(newfid);
115 126
116 cii = ITOC(inode);
117
118 BUG_ON(!coda_fideq(&cii->c_fid, oldfid)); 127 BUG_ON(!coda_fideq(&cii->c_fid, oldfid));
119 128
120 /* replace fid and rehash inode */ 129 /* replace fid and rehash inode */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index ccd98b0f2b0b..5d8b35539601 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -17,7 +17,7 @@
17#include <linux/stat.h> 17#include <linux/stat.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/smp_lock.h> 20#include <linux/spinlock.h>
21 21
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23 23
@@ -116,15 +116,11 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
116 goto exit; 116 goto exit;
117 } 117 }
118 118
119 lock_kernel();
120
121 error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, 119 error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length,
122 &type, &resfid); 120 &type, &resfid);
123 if (!error) 121 if (!error)
124 error = coda_cnode_make(&inode, &resfid, dir->i_sb); 122 error = coda_cnode_make(&inode, &resfid, dir->i_sb);
125 123
126 unlock_kernel();
127
128 if (error && error != -ENOENT) 124 if (error && error != -ENOENT)
129 return ERR_PTR(error); 125 return ERR_PTR(error);
130 126
@@ -140,28 +136,24 @@ exit:
140 136
141int coda_permission(struct inode *inode, int mask) 137int coda_permission(struct inode *inode, int mask)
142{ 138{
143 int error = 0; 139 int error;
144 140
145 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 141 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
146 142
147 if (!mask) 143 if (!mask)
148 return 0; 144 return 0;
149 145
150 if ((mask & MAY_EXEC) && !execute_ok(inode)) 146 if ((mask & MAY_EXEC) && !execute_ok(inode))
151 return -EACCES; 147 return -EACCES;
152 148
153 lock_kernel();
154
155 if (coda_cache_check(inode, mask)) 149 if (coda_cache_check(inode, mask))
156 goto out; 150 return 0;
157 151
158 error = venus_access(inode->i_sb, coda_i2f(inode), mask); 152 error = venus_access(inode->i_sb, coda_i2f(inode), mask);
159 153
160 if (!error) 154 if (!error)
161 coda_cache_enter(inode, mask); 155 coda_cache_enter(inode, mask);
162 156
163 out:
164 unlock_kernel();
165 return error; 157 return error;
166} 158}
167 159
@@ -200,41 +192,34 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
200/* creation routines: create, mknod, mkdir, link, symlink */ 192/* creation routines: create, mknod, mkdir, link, symlink */
201static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) 193static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd)
202{ 194{
203 int error=0; 195 int error;
204 const char *name=de->d_name.name; 196 const char *name=de->d_name.name;
205 int length=de->d_name.len; 197 int length=de->d_name.len;
206 struct inode *inode; 198 struct inode *inode;
207 struct CodaFid newfid; 199 struct CodaFid newfid;
208 struct coda_vattr attrs; 200 struct coda_vattr attrs;
209 201
210 lock_kernel(); 202 if (coda_isroot(dir) && coda_iscontrol(name, length))
211
212 if (coda_isroot(dir) && coda_iscontrol(name, length)) {
213 unlock_kernel();
214 return -EPERM; 203 return -EPERM;
215 }
216 204
217 error = venus_create(dir->i_sb, coda_i2f(dir), name, length, 205 error = venus_create(dir->i_sb, coda_i2f(dir), name, length,
218 0, mode, &newfid, &attrs); 206 0, mode, &newfid, &attrs);
219 207 if (error)
220 if ( error ) { 208 goto err_out;
221 unlock_kernel();
222 d_drop(de);
223 return error;
224 }
225 209
226 inode = coda_iget(dir->i_sb, &newfid, &attrs); 210 inode = coda_iget(dir->i_sb, &newfid, &attrs);
227 if ( IS_ERR(inode) ) { 211 if (IS_ERR(inode)) {
228 unlock_kernel(); 212 error = PTR_ERR(inode);
229 d_drop(de); 213 goto err_out;
230 return PTR_ERR(inode);
231 } 214 }
232 215
233 /* invalidate the directory cnode's attributes */ 216 /* invalidate the directory cnode's attributes */
234 coda_dir_update_mtime(dir); 217 coda_dir_update_mtime(dir);
235 unlock_kernel();
236 d_instantiate(de, inode); 218 d_instantiate(de, inode);
237 return 0; 219 return 0;
220err_out:
221 d_drop(de);
222 return error;
238} 223}
239 224
240static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) 225static int coda_mkdir(struct inode *dir, struct dentry *de, int mode)
@@ -246,36 +231,29 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode)
246 int error; 231 int error;
247 struct CodaFid newfid; 232 struct CodaFid newfid;
248 233
249 lock_kernel(); 234 if (coda_isroot(dir) && coda_iscontrol(name, len))
250
251 if (coda_isroot(dir) && coda_iscontrol(name, len)) {
252 unlock_kernel();
253 return -EPERM; 235 return -EPERM;
254 }
255 236
256 attrs.va_mode = mode; 237 attrs.va_mode = mode;
257 error = venus_mkdir(dir->i_sb, coda_i2f(dir), 238 error = venus_mkdir(dir->i_sb, coda_i2f(dir),
258 name, len, &newfid, &attrs); 239 name, len, &newfid, &attrs);
259 240 if (error)
260 if ( error ) { 241 goto err_out;
261 unlock_kernel();
262 d_drop(de);
263 return error;
264 }
265 242
266 inode = coda_iget(dir->i_sb, &newfid, &attrs); 243 inode = coda_iget(dir->i_sb, &newfid, &attrs);
267 if ( IS_ERR(inode) ) { 244 if (IS_ERR(inode)) {
268 unlock_kernel(); 245 error = PTR_ERR(inode);
269 d_drop(de); 246 goto err_out;
270 return PTR_ERR(inode);
271 } 247 }
272 248
273 /* invalidate the directory cnode's attributes */ 249 /* invalidate the directory cnode's attributes */
274 coda_dir_inc_nlink(dir); 250 coda_dir_inc_nlink(dir);
275 coda_dir_update_mtime(dir); 251 coda_dir_update_mtime(dir);
276 unlock_kernel();
277 d_instantiate(de, inode); 252 d_instantiate(de, inode);
278 return 0; 253 return 0;
254err_out:
255 d_drop(de);
256 return error;
279} 257}
280 258
281/* try to make de an entry in dir_inodde linked to source_de */ 259/* try to make de an entry in dir_inodde linked to source_de */
@@ -287,52 +265,38 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
287 int len = de->d_name.len; 265 int len = de->d_name.len;
288 int error; 266 int error;
289 267
290 lock_kernel(); 268 if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
291
292 if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) {
293 unlock_kernel();
294 return -EPERM; 269 return -EPERM;
295 }
296 270
297 error = venus_link(dir_inode->i_sb, coda_i2f(inode), 271 error = venus_link(dir_inode->i_sb, coda_i2f(inode),
298 coda_i2f(dir_inode), (const char *)name, len); 272 coda_i2f(dir_inode), (const char *)name, len);
299
300 if (error) { 273 if (error) {
301 d_drop(de); 274 d_drop(de);
302 goto out; 275 return error;
303 } 276 }
304 277
305 coda_dir_update_mtime(dir_inode); 278 coda_dir_update_mtime(dir_inode);
306 atomic_inc(&inode->i_count); 279 ihold(inode);
307 d_instantiate(de, inode); 280 d_instantiate(de, inode);
308 inc_nlink(inode); 281 inc_nlink(inode);
309 282 return 0;
310out:
311 unlock_kernel();
312 return(error);
313} 283}
314 284
315 285
316static int coda_symlink(struct inode *dir_inode, struct dentry *de, 286static int coda_symlink(struct inode *dir_inode, struct dentry *de,
317 const char *symname) 287 const char *symname)
318{ 288{
319 const char *name = de->d_name.name; 289 const char *name = de->d_name.name;
320 int len = de->d_name.len; 290 int len = de->d_name.len;
321 int symlen; 291 int symlen;
322 int error = 0; 292 int error;
323
324 lock_kernel();
325 293
326 if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) { 294 if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
327 unlock_kernel();
328 return -EPERM; 295 return -EPERM;
329 }
330 296
331 symlen = strlen(symname); 297 symlen = strlen(symname);
332 if ( symlen > CODA_MAXPATHLEN ) { 298 if (symlen > CODA_MAXPATHLEN)
333 unlock_kernel();
334 return -ENAMETOOLONG; 299 return -ENAMETOOLONG;
335 }
336 300
337 /* 301 /*
338 * This entry is now negative. Since we do not create 302 * This entry is now negative. Since we do not create
@@ -343,10 +307,9 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
343 symname, symlen); 307 symname, symlen);
344 308
345 /* mtime is no good anymore */ 309 /* mtime is no good anymore */
346 if ( !error ) 310 if (!error)
347 coda_dir_update_mtime(dir_inode); 311 coda_dir_update_mtime(dir_inode);
348 312
349 unlock_kernel();
350 return error; 313 return error;
351} 314}
352 315
@@ -357,17 +320,12 @@ static int coda_unlink(struct inode *dir, struct dentry *de)
357 const char *name = de->d_name.name; 320 const char *name = de->d_name.name;
358 int len = de->d_name.len; 321 int len = de->d_name.len;
359 322
360 lock_kernel();
361
362 error = venus_remove(dir->i_sb, coda_i2f(dir), name, len); 323 error = venus_remove(dir->i_sb, coda_i2f(dir), name, len);
363 if ( error ) { 324 if (error)
364 unlock_kernel();
365 return error; 325 return error;
366 }
367 326
368 coda_dir_update_mtime(dir); 327 coda_dir_update_mtime(dir);
369 drop_nlink(de->d_inode); 328 drop_nlink(de->d_inode);
370 unlock_kernel();
371 return 0; 329 return 0;
372} 330}
373 331
@@ -377,8 +335,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
377 int len = de->d_name.len; 335 int len = de->d_name.len;
378 int error; 336 int error;
379 337
380 lock_kernel();
381
382 error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); 338 error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len);
383 if (!error) { 339 if (!error) {
384 /* VFS may delete the child */ 340 /* VFS may delete the child */
@@ -389,7 +345,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
389 coda_dir_drop_nlink(dir); 345 coda_dir_drop_nlink(dir);
390 coda_dir_update_mtime(dir); 346 coda_dir_update_mtime(dir);
391 } 347 }
392 unlock_kernel();
393 return error; 348 return error;
394} 349}
395 350
@@ -403,15 +358,12 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
403 int new_length = new_dentry->d_name.len; 358 int new_length = new_dentry->d_name.len;
404 int error; 359 int error;
405 360
406 lock_kernel();
407
408 error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), 361 error = venus_rename(old_dir->i_sb, coda_i2f(old_dir),
409 coda_i2f(new_dir), old_length, new_length, 362 coda_i2f(new_dir), old_length, new_length,
410 (const char *) old_name, (const char *)new_name); 363 (const char *) old_name, (const char *)new_name);
411 364 if (!error) {
412 if ( !error ) { 365 if (new_dentry->d_inode) {
413 if ( new_dentry->d_inode ) { 366 if (S_ISDIR(new_dentry->d_inode->i_mode)) {
414 if ( S_ISDIR(new_dentry->d_inode->i_mode) ) {
415 coda_dir_drop_nlink(old_dir); 367 coda_dir_drop_nlink(old_dir);
416 coda_dir_inc_nlink(new_dir); 368 coda_dir_inc_nlink(new_dir);
417 } 369 }
@@ -423,8 +375,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
423 coda_flag_inode(new_dir, C_VATTR); 375 coda_flag_inode(new_dir, C_VATTR);
424 } 376 }
425 } 377 }
426 unlock_kernel();
427
428 return error; 378 return error;
429} 379}
430 380
@@ -594,10 +544,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
594 struct inode *inode = de->d_inode; 544 struct inode *inode = de->d_inode;
595 struct coda_inode_info *cii; 545 struct coda_inode_info *cii;
596 546
597 if (!inode) 547 if (!inode || coda_isroot(inode))
598 return 1;
599 lock_kernel();
600 if (coda_isroot(inode))
601 goto out; 548 goto out;
602 if (is_bad_inode(inode)) 549 if (is_bad_inode(inode))
603 goto bad; 550 goto bad;
@@ -617,13 +564,12 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
617 goto out; 564 goto out;
618 565
619 /* clear the flags. */ 566 /* clear the flags. */
567 spin_lock(&cii->c_lock);
620 cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH); 568 cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH);
621 569 spin_unlock(&cii->c_lock);
622bad: 570bad:
623 unlock_kernel();
624 return 0; 571 return 0;
625out: 572out:
626 unlock_kernel();
627 return 1; 573 return 1;
628} 574}
629 575
@@ -656,20 +602,19 @@ static int coda_dentry_delete(struct dentry * dentry)
656int coda_revalidate_inode(struct dentry *dentry) 602int coda_revalidate_inode(struct dentry *dentry)
657{ 603{
658 struct coda_vattr attr; 604 struct coda_vattr attr;
659 int error = 0; 605 int error;
660 int old_mode; 606 int old_mode;
661 ino_t old_ino; 607 ino_t old_ino;
662 struct inode *inode = dentry->d_inode; 608 struct inode *inode = dentry->d_inode;
663 struct coda_inode_info *cii = ITOC(inode); 609 struct coda_inode_info *cii = ITOC(inode);
664 610
665 lock_kernel(); 611 if (!cii->c_flags)
666 if ( !cii->c_flags ) 612 return 0;
667 goto ok;
668 613
669 if (cii->c_flags & (C_VATTR | C_PURGE | C_FLUSH)) { 614 if (cii->c_flags & (C_VATTR | C_PURGE | C_FLUSH)) {
670 error = venus_getattr(inode->i_sb, &(cii->c_fid), &attr); 615 error = venus_getattr(inode->i_sb, &(cii->c_fid), &attr);
671 if ( error ) 616 if (error)
672 goto return_bad; 617 return -EIO;
673 618
674 /* this inode may be lost if: 619 /* this inode may be lost if:
675 - it's ino changed 620 - it's ino changed
@@ -688,17 +633,13 @@ int coda_revalidate_inode(struct dentry *dentry)
688 /* the following can happen when a local fid is replaced 633 /* the following can happen when a local fid is replaced
689 with a global one, here we lose and declare the inode bad */ 634 with a global one, here we lose and declare the inode bad */
690 if (inode->i_ino != old_ino) 635 if (inode->i_ino != old_ino)
691 goto return_bad; 636 return -EIO;
692 637
693 coda_flag_inode_children(inode, C_FLUSH); 638 coda_flag_inode_children(inode, C_FLUSH);
639
640 spin_lock(&cii->c_lock);
694 cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH); 641 cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH);
642 spin_unlock(&cii->c_lock);
695 } 643 }
696
697ok:
698 unlock_kernel();
699 return 0; 644 return 0;
700
701return_bad:
702 unlock_kernel();
703 return -EIO;
704} 645}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index ad3cd2abeeb4..c8b50ba4366a 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -15,7 +15,7 @@
15#include <linux/stat.h> 15#include <linux/stat.h>
16#include <linux/cred.h> 16#include <linux/cred.h>
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/smp_lock.h> 18#include <linux/spinlock.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
@@ -109,19 +109,24 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
109 109
110 coda_inode = coda_file->f_path.dentry->d_inode; 110 coda_inode = coda_file->f_path.dentry->d_inode;
111 host_inode = host_file->f_path.dentry->d_inode; 111 host_inode = host_file->f_path.dentry->d_inode;
112
113 cii = ITOC(coda_inode);
114 spin_lock(&cii->c_lock);
112 coda_file->f_mapping = host_file->f_mapping; 115 coda_file->f_mapping = host_file->f_mapping;
113 if (coda_inode->i_mapping == &coda_inode->i_data) 116 if (coda_inode->i_mapping == &coda_inode->i_data)
114 coda_inode->i_mapping = host_inode->i_mapping; 117 coda_inode->i_mapping = host_inode->i_mapping;
115 118
116 /* only allow additional mmaps as long as userspace isn't changing 119 /* only allow additional mmaps as long as userspace isn't changing
117 * the container file on us! */ 120 * the container file on us! */
118 else if (coda_inode->i_mapping != host_inode->i_mapping) 121 else if (coda_inode->i_mapping != host_inode->i_mapping) {
122 spin_unlock(&cii->c_lock);
119 return -EBUSY; 123 return -EBUSY;
124 }
120 125
121 /* keep track of how often the coda_inode/host_file has been mmapped */ 126 /* keep track of how often the coda_inode/host_file has been mmapped */
122 cii = ITOC(coda_inode);
123 cii->c_mapcount++; 127 cii->c_mapcount++;
124 cfi->cfi_mapcount++; 128 cfi->cfi_mapcount++;
129 spin_unlock(&cii->c_lock);
125 130
126 return host_file->f_op->mmap(host_file, vma); 131 return host_file->f_op->mmap(host_file, vma);
127} 132}
@@ -138,8 +143,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
138 if (!cfi) 143 if (!cfi)
139 return -ENOMEM; 144 return -ENOMEM;
140 145
141 lock_kernel();
142
143 error = venus_open(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, 146 error = venus_open(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags,
144 &host_file); 147 &host_file);
145 if (!host_file) 148 if (!host_file)
@@ -147,7 +150,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
147 150
148 if (error) { 151 if (error) {
149 kfree(cfi); 152 kfree(cfi);
150 unlock_kernel();
151 return error; 153 return error;
152 } 154 }
153 155
@@ -159,8 +161,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
159 161
160 BUG_ON(coda_file->private_data != NULL); 162 BUG_ON(coda_file->private_data != NULL);
161 coda_file->private_data = cfi; 163 coda_file->private_data = cfi;
162
163 unlock_kernel();
164 return 0; 164 return 0;
165} 165}
166 166
@@ -171,9 +171,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
171 struct coda_file_info *cfi; 171 struct coda_file_info *cfi;
172 struct coda_inode_info *cii; 172 struct coda_inode_info *cii;
173 struct inode *host_inode; 173 struct inode *host_inode;
174 int err = 0; 174 int err;
175
176 lock_kernel();
177 175
178 cfi = CODA_FTOC(coda_file); 176 cfi = CODA_FTOC(coda_file);
179 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 177 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -185,18 +183,18 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
185 cii = ITOC(coda_inode); 183 cii = ITOC(coda_inode);
186 184
187 /* did we mmap this file? */ 185 /* did we mmap this file? */
186 spin_lock(&cii->c_lock);
188 if (coda_inode->i_mapping == &host_inode->i_data) { 187 if (coda_inode->i_mapping == &host_inode->i_data) {
189 cii->c_mapcount -= cfi->cfi_mapcount; 188 cii->c_mapcount -= cfi->cfi_mapcount;
190 if (!cii->c_mapcount) 189 if (!cii->c_mapcount)
191 coda_inode->i_mapping = &coda_inode->i_data; 190 coda_inode->i_mapping = &coda_inode->i_data;
192 } 191 }
192 spin_unlock(&cii->c_lock);
193 193
194 fput(cfi->cfi_container); 194 fput(cfi->cfi_container);
195 kfree(coda_file->private_data); 195 kfree(coda_file->private_data);
196 coda_file->private_data = NULL; 196 coda_file->private_data = NULL;
197 197
198 unlock_kernel();
199
200 /* VFS fput ignores the return value from file_operations->release, so 198 /* VFS fput ignores the return value from file_operations->release, so
201 * there is no use returning an error here */ 199 * there is no use returning an error here */
202 return 0; 200 return 0;
@@ -207,7 +205,7 @@ int coda_fsync(struct file *coda_file, int datasync)
207 struct file *host_file; 205 struct file *host_file;
208 struct inode *coda_inode = coda_file->f_path.dentry->d_inode; 206 struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
209 struct coda_file_info *cfi; 207 struct coda_file_info *cfi;
210 int err = 0; 208 int err;
211 209
212 if (!(S_ISREG(coda_inode->i_mode) || S_ISDIR(coda_inode->i_mode) || 210 if (!(S_ISREG(coda_inode->i_mode) || S_ISDIR(coda_inode->i_mode) ||
213 S_ISLNK(coda_inode->i_mode))) 211 S_ISLNK(coda_inode->i_mode)))
@@ -218,11 +216,8 @@ int coda_fsync(struct file *coda_file, int datasync)
218 host_file = cfi->cfi_container; 216 host_file = cfi->cfi_container;
219 217
220 err = vfs_fsync(host_file, datasync); 218 err = vfs_fsync(host_file, datasync);
221 if ( !err && !datasync ) { 219 if (!err && !datasync)
222 lock_kernel();
223 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); 220 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
224 unlock_kernel();
225 }
226 221
227 return err; 222 return err;
228} 223}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index bfe8179b1295..5ea57c8c7f97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -15,7 +15,8 @@
15#include <linux/stat.h> 15#include <linux/stat.h>
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/unistd.h> 17#include <linux/unistd.h>
18#include <linux/smp_lock.h> 18#include <linux/mutex.h>
19#include <linux/spinlock.h>
19#include <linux/file.h> 20#include <linux/file.h>
20#include <linux/vfs.h> 21#include <linux/vfs.h>
21#include <linux/slab.h> 22#include <linux/slab.h>
@@ -51,6 +52,7 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
51 ei->c_flags = 0; 52 ei->c_flags = 0;
52 ei->c_uid = 0; 53 ei->c_uid = 0;
53 ei->c_cached_perm = 0; 54 ei->c_cached_perm = 0;
55 spin_lock_init(&ei->c_lock);
54 return &ei->vfs_inode; 56 return &ei->vfs_inode;
55} 57}
56 58
@@ -143,13 +145,11 @@ static int get_device_index(struct coda_mount_data *data)
143static int coda_fill_super(struct super_block *sb, void *data, int silent) 145static int coda_fill_super(struct super_block *sb, void *data, int silent)
144{ 146{
145 struct inode *root = NULL; 147 struct inode *root = NULL;
146 struct venus_comm *vc = NULL; 148 struct venus_comm *vc;
147 struct CodaFid fid; 149 struct CodaFid fid;
148 int error; 150 int error;
149 int idx; 151 int idx;
150 152
151 lock_kernel();
152
153 idx = get_device_index((struct coda_mount_data *) data); 153 idx = get_device_index((struct coda_mount_data *) data);
154 154
155 /* Ignore errors in data, for backward compatibility */ 155 /* Ignore errors in data, for backward compatibility */
@@ -159,23 +159,26 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
159 printk(KERN_INFO "coda_read_super: device index: %i\n", idx); 159 printk(KERN_INFO "coda_read_super: device index: %i\n", idx);
160 160
161 vc = &coda_comms[idx]; 161 vc = &coda_comms[idx];
162 mutex_lock(&vc->vc_mutex);
163
162 if (!vc->vc_inuse) { 164 if (!vc->vc_inuse) {
163 printk("coda_read_super: No pseudo device\n"); 165 printk("coda_read_super: No pseudo device\n");
164 unlock_kernel(); 166 error = -EINVAL;
165 return -EINVAL; 167 goto unlock_out;
166 } 168 }
167 169
168 if ( vc->vc_sb ) { 170 if (vc->vc_sb) {
169 printk("coda_read_super: Device already mounted\n"); 171 printk("coda_read_super: Device already mounted\n");
170 unlock_kernel(); 172 error = -EBUSY;
171 return -EBUSY; 173 goto unlock_out;
172 } 174 }
173 175
174 error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); 176 error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
175 if (error) 177 if (error)
176 goto bdi_err; 178 goto unlock_out;
177 179
178 vc->vc_sb = sb; 180 vc->vc_sb = sb;
181 mutex_unlock(&vc->vc_mutex);
179 182
180 sb->s_fs_info = vc; 183 sb->s_fs_info = vc;
181 sb->s_flags |= MS_NOATIME; 184 sb->s_flags |= MS_NOATIME;
@@ -204,28 +207,33 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
204 printk("coda_read_super: rootinode is %ld dev %s\n", 207 printk("coda_read_super: rootinode is %ld dev %s\n",
205 root->i_ino, root->i_sb->s_id); 208 root->i_ino, root->i_sb->s_id);
206 sb->s_root = d_alloc_root(root); 209 sb->s_root = d_alloc_root(root);
207 if (!sb->s_root) 210 if (!sb->s_root) {
211 error = -EINVAL;
208 goto error; 212 goto error;
209 unlock_kernel(); 213 }
210 return 0; 214 return 0;
211 215
212 error: 216error:
213 bdi_destroy(&vc->bdi);
214 bdi_err:
215 if (root) 217 if (root)
216 iput(root); 218 iput(root);
217 if (vc)
218 vc->vc_sb = NULL;
219 219
220 unlock_kernel(); 220 mutex_lock(&vc->vc_mutex);
221 return -EINVAL; 221 bdi_destroy(&vc->bdi);
222 vc->vc_sb = NULL;
223 sb->s_fs_info = NULL;
224unlock_out:
225 mutex_unlock(&vc->vc_mutex);
226 return error;
222} 227}
223 228
224static void coda_put_super(struct super_block *sb) 229static void coda_put_super(struct super_block *sb)
225{ 230{
226 bdi_destroy(&coda_vcp(sb)->bdi); 231 struct venus_comm *vcp = coda_vcp(sb);
227 coda_vcp(sb)->vc_sb = NULL; 232 mutex_lock(&vcp->vc_mutex);
233 bdi_destroy(&vcp->bdi);
234 vcp->vc_sb = NULL;
228 sb->s_fs_info = NULL; 235 sb->s_fs_info = NULL;
236 mutex_unlock(&vcp->vc_mutex);
229 237
230 printk("Coda: Bye bye.\n"); 238 printk("Coda: Bye bye.\n");
231} 239}
@@ -251,8 +259,6 @@ int coda_setattr(struct dentry *de, struct iattr *iattr)
251 struct coda_vattr vattr; 259 struct coda_vattr vattr;
252 int error; 260 int error;
253 261
254 lock_kernel();
255
256 memset(&vattr, 0, sizeof(vattr)); 262 memset(&vattr, 0, sizeof(vattr));
257 263
258 inode->i_ctime = CURRENT_TIME_SEC; 264 inode->i_ctime = CURRENT_TIME_SEC;
@@ -262,13 +268,10 @@ int coda_setattr(struct dentry *de, struct iattr *iattr)
262 /* Venus is responsible for truncating the container-file!!! */ 268 /* Venus is responsible for truncating the container-file!!! */
263 error = venus_setattr(inode->i_sb, coda_i2f(inode), &vattr); 269 error = venus_setattr(inode->i_sb, coda_i2f(inode), &vattr);
264 270
265 if ( !error ) { 271 if (!error) {
266 coda_vattr_to_iattr(inode, &vattr); 272 coda_vattr_to_iattr(inode, &vattr);
267 coda_cache_clear_inode(inode); 273 coda_cache_clear_inode(inode);
268 } 274 }
269
270 unlock_kernel();
271
272 return error; 275 return error;
273} 276}
274 277
@@ -282,12 +285,8 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
282{ 285{
283 int error; 286 int error;
284 287
285 lock_kernel();
286
287 error = venus_statfs(dentry, buf); 288 error = venus_statfs(dentry, buf);
288 289
289 unlock_kernel();
290
291 if (error) { 290 if (error) {
292 /* fake something like AFS does */ 291 /* fake something like AFS does */
293 buf->f_blocks = 9000000; 292 buf->f_blocks = 9000000;
@@ -307,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
307 306
308/* init_coda: used by filesystems.c to register coda */ 307/* init_coda: used by filesystems.c to register coda */
309 308
310static int coda_get_sb(struct file_system_type *fs_type, 309static struct dentry *coda_mount(struct file_system_type *fs_type,
311 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 310 int flags, const char *dev_name, void *data)
312{ 311{
313 return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt); 312 return mount_nodev(fs_type, flags, data, coda_fill_super);
314} 313}
315 314
316struct file_system_type coda_fs_type = { 315struct file_system_type coda_fs_type = {
317 .owner = THIS_MODULE, 316 .owner = THIS_MODULE,
318 .name = "coda", 317 .name = "coda",
319 .get_sb = coda_get_sb, 318 .mount = coda_mount,
320 .kill_sb = kill_anon_super, 319 .kill_sb = kill_anon_super,
321 .fs_flags = FS_BINARY_MOUNTDATA, 320 .fs_flags = FS_BINARY_MOUNTDATA,
322}; 321};
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 028a9a0f588b..2fd89b5c5c7b 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -23,8 +23,6 @@
23#include <linux/coda_fs_i.h> 23#include <linux/coda_fs_i.h>
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26#include <linux/smp_lock.h>
27
28/* pioctl ops */ 26/* pioctl ops */
29static int coda_ioctl_permission(struct inode *inode, int mask); 27static int coda_ioctl_permission(struct inode *inode, int mask);
30static long coda_pioctl(struct file *filp, unsigned int cmd, 28static long coda_pioctl(struct file *filp, unsigned int cmd,
@@ -58,13 +56,9 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
58 struct inode *target_inode = NULL; 56 struct inode *target_inode = NULL;
59 struct coda_inode_info *cnp; 57 struct coda_inode_info *cnp;
60 58
61 lock_kernel();
62
63 /* get the Pioctl data arguments from user space */ 59 /* get the Pioctl data arguments from user space */
64 if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { 60 if (copy_from_user(&data, (void __user *)user_data, sizeof(data)))
65 error = -EINVAL; 61 return -EINVAL;
66 goto out;
67 }
68 62
69 /* 63 /*
70 * Look up the pathname. Note that the pathname is in 64 * Look up the pathname. Note that the pathname is in
@@ -76,13 +70,12 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
76 error = user_lpath(data.path, &path); 70 error = user_lpath(data.path, &path);
77 71
78 if (error) 72 if (error)
79 goto out; 73 return error;
80 else 74
81 target_inode = path.dentry->d_inode; 75 target_inode = path.dentry->d_inode;
82 76
83 /* return if it is not a Coda inode */ 77 /* return if it is not a Coda inode */
84 if (target_inode->i_sb != inode->i_sb) { 78 if (target_inode->i_sb != inode->i_sb) {
85 path_put(&path);
86 error = -EINVAL; 79 error = -EINVAL;
87 goto out; 80 goto out;
88 } 81 }
@@ -91,10 +84,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
91 cnp = ITOC(target_inode); 84 cnp = ITOC(target_inode);
92 85
93 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); 86 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
94
95 path_put(&path);
96
97out: 87out:
98 unlock_kernel(); 88 path_put(&path);
99 return error; 89 return error;
100} 90}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index fdc2f3ef7ecd..62647a8595e4 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -35,7 +35,7 @@
35#include <linux/poll.h> 35#include <linux/poll.h>
36#include <linux/init.h> 36#include <linux/init.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/smp_lock.h> 38#include <linux/mutex.h>
39#include <linux/device.h> 39#include <linux/device.h>
40#include <asm/io.h> 40#include <asm/io.h>
41#include <asm/system.h> 41#include <asm/system.h>
@@ -67,8 +67,10 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait)
67 unsigned int mask = POLLOUT | POLLWRNORM; 67 unsigned int mask = POLLOUT | POLLWRNORM;
68 68
69 poll_wait(file, &vcp->vc_waitq, wait); 69 poll_wait(file, &vcp->vc_waitq, wait);
70 mutex_lock(&vcp->vc_mutex);
70 if (!list_empty(&vcp->vc_pending)) 71 if (!list_empty(&vcp->vc_pending))
71 mask |= POLLIN | POLLRDNORM; 72 mask |= POLLIN | POLLRDNORM;
73 mutex_unlock(&vcp->vc_mutex);
72 74
73 return mask; 75 return mask;
74} 76}
@@ -108,16 +110,9 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
108 return -EFAULT; 110 return -EFAULT;
109 111
110 if (DOWNCALL(hdr.opcode)) { 112 if (DOWNCALL(hdr.opcode)) {
111 struct super_block *sb = NULL; 113 union outputArgs *dcbuf;
112 union outputArgs *dcbuf;
113 int size = sizeof(*dcbuf); 114 int size = sizeof(*dcbuf);
114 115
115 sb = vcp->vc_sb;
116 if ( !sb ) {
117 count = nbytes;
118 goto out;
119 }
120
121 if ( nbytes < sizeof(struct coda_out_hdr) ) { 116 if ( nbytes < sizeof(struct coda_out_hdr) ) {
122 printk("coda_downcall opc %d uniq %d, not enough!\n", 117 printk("coda_downcall opc %d uniq %d, not enough!\n",
123 hdr.opcode, hdr.unique); 118 hdr.opcode, hdr.unique);
@@ -137,9 +132,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
137 } 132 }
138 133
139 /* what downcall errors does Venus handle ? */ 134 /* what downcall errors does Venus handle ? */
140 lock_kernel(); 135 error = coda_downcall(vcp, hdr.opcode, dcbuf);
141 error = coda_downcall(hdr.opcode, dcbuf, sb);
142 unlock_kernel();
143 136
144 CODA_FREE(dcbuf, nbytes); 137 CODA_FREE(dcbuf, nbytes);
145 if (error) { 138 if (error) {
@@ -152,7 +145,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
152 } 145 }
153 146
154 /* Look for the message on the processing queue. */ 147 /* Look for the message on the processing queue. */
155 lock_kernel(); 148 mutex_lock(&vcp->vc_mutex);
156 list_for_each(lh, &vcp->vc_processing) { 149 list_for_each(lh, &vcp->vc_processing) {
157 tmp = list_entry(lh, struct upc_req , uc_chain); 150 tmp = list_entry(lh, struct upc_req , uc_chain);
158 if (tmp->uc_unique == hdr.unique) { 151 if (tmp->uc_unique == hdr.unique) {
@@ -161,7 +154,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
161 break; 154 break;
162 } 155 }
163 } 156 }
164 unlock_kernel(); 157 mutex_unlock(&vcp->vc_mutex);
165 158
166 if (!req) { 159 if (!req) {
167 printk("psdev_write: msg (%d, %d) not found\n", 160 printk("psdev_write: msg (%d, %d) not found\n",
@@ -216,7 +209,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
216 if (nbytes == 0) 209 if (nbytes == 0)
217 return 0; 210 return 0;
218 211
219 lock_kernel(); 212 mutex_lock(&vcp->vc_mutex);
220 213
221 add_wait_queue(&vcp->vc_waitq, &wait); 214 add_wait_queue(&vcp->vc_waitq, &wait);
222 set_current_state(TASK_INTERRUPTIBLE); 215 set_current_state(TASK_INTERRUPTIBLE);
@@ -230,7 +223,9 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
230 retval = -ERESTARTSYS; 223 retval = -ERESTARTSYS;
231 break; 224 break;
232 } 225 }
226 mutex_unlock(&vcp->vc_mutex);
233 schedule(); 227 schedule();
228 mutex_lock(&vcp->vc_mutex);
234 } 229 }
235 230
236 set_current_state(TASK_RUNNING); 231 set_current_state(TASK_RUNNING);
@@ -263,7 +258,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
263 CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); 258 CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
264 kfree(req); 259 kfree(req);
265out: 260out:
266 unlock_kernel(); 261 mutex_unlock(&vcp->vc_mutex);
267 return (count ? count : retval); 262 return (count ? count : retval);
268} 263}
269 264
@@ -276,10 +271,10 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
276 if (idx < 0 || idx >= MAX_CODADEVS) 271 if (idx < 0 || idx >= MAX_CODADEVS)
277 return -ENODEV; 272 return -ENODEV;
278 273
279 lock_kernel();
280
281 err = -EBUSY; 274 err = -EBUSY;
282 vcp = &coda_comms[idx]; 275 vcp = &coda_comms[idx];
276 mutex_lock(&vcp->vc_mutex);
277
283 if (!vcp->vc_inuse) { 278 if (!vcp->vc_inuse) {
284 vcp->vc_inuse++; 279 vcp->vc_inuse++;
285 280
@@ -293,7 +288,7 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
293 err = 0; 288 err = 0;
294 } 289 }
295 290
296 unlock_kernel(); 291 mutex_unlock(&vcp->vc_mutex);
297 return err; 292 return err;
298} 293}
299 294
@@ -308,7 +303,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
308 return -1; 303 return -1;
309 } 304 }
310 305
311 lock_kernel(); 306 mutex_lock(&vcp->vc_mutex);
312 307
313 /* Wakeup clients so they can return. */ 308 /* Wakeup clients so they can return. */
314 list_for_each_entry_safe(req, tmp, &vcp->vc_pending, uc_chain) { 309 list_for_each_entry_safe(req, tmp, &vcp->vc_pending, uc_chain) {
@@ -333,7 +328,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
333 328
334 file->private_data = NULL; 329 file->private_data = NULL;
335 vcp->vc_inuse--; 330 vcp->vc_inuse--;
336 unlock_kernel(); 331 mutex_unlock(&vcp->vc_mutex);
337 return 0; 332 return 0;
338} 333}
339 334
@@ -362,9 +357,11 @@ static int init_coda_psdev(void)
362 err = PTR_ERR(coda_psdev_class); 357 err = PTR_ERR(coda_psdev_class);
363 goto out_chrdev; 358 goto out_chrdev;
364 } 359 }
365 for (i = 0; i < MAX_CODADEVS; i++) 360 for (i = 0; i < MAX_CODADEVS; i++) {
361 mutex_init(&(&coda_comms[i])->vc_mutex);
366 device_create(coda_psdev_class, NULL, 362 device_create(coda_psdev_class, NULL,
367 MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i); 363 MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i);
364 }
368 coda_sysctl_init(); 365 coda_sysctl_init();
369 goto out; 366 goto out;
370 367
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index 4513b7258458..af78f007a2b0 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -14,7 +14,6 @@
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/smp_lock.h>
18 17
19#include <linux/coda.h> 18#include <linux/coda.h>
20#include <linux/coda_linux.h> 19#include <linux/coda_linux.h>
@@ -29,11 +28,9 @@ static int coda_symlink_filler(struct file *file, struct page *page)
29 unsigned int len = PAGE_SIZE; 28 unsigned int len = PAGE_SIZE;
30 char *p = kmap(page); 29 char *p = kmap(page);
31 30
32 lock_kernel();
33 cii = ITOC(inode); 31 cii = ITOC(inode);
34 32
35 error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len); 33 error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len);
36 unlock_kernel();
37 if (error) 34 if (error)
38 goto fail; 35 goto fail;
39 SetPageUptodate(page); 36 SetPageUptodate(page);
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index b8893ab6f9e6..c3563cab9758 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,6 +27,7 @@
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/mutex.h>
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
32#include <linux/vfs.h> 33#include <linux/vfs.h>
@@ -606,7 +607,8 @@ static void coda_unblock_signals(sigset_t *old)
606 (r)->uc_opcode != CODA_RELEASE) || \ 607 (r)->uc_opcode != CODA_RELEASE) || \
607 (r)->uc_flags & CODA_REQ_READ)) 608 (r)->uc_flags & CODA_REQ_READ))
608 609
609static inline void coda_waitfor_upcall(struct upc_req *req) 610static inline void coda_waitfor_upcall(struct venus_comm *vcp,
611 struct upc_req *req)
610{ 612{
611 DECLARE_WAITQUEUE(wait, current); 613 DECLARE_WAITQUEUE(wait, current);
612 unsigned long timeout = jiffies + coda_timeout * HZ; 614 unsigned long timeout = jiffies + coda_timeout * HZ;
@@ -639,10 +641,12 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
639 break; 641 break;
640 } 642 }
641 643
644 mutex_unlock(&vcp->vc_mutex);
642 if (blocked) 645 if (blocked)
643 schedule_timeout(HZ); 646 schedule_timeout(HZ);
644 else 647 else
645 schedule(); 648 schedule();
649 mutex_lock(&vcp->vc_mutex);
646 } 650 }
647 if (blocked) 651 if (blocked)
648 coda_unblock_signals(&old); 652 coda_unblock_signals(&old);
@@ -667,18 +671,23 @@ static int coda_upcall(struct venus_comm *vcp,
667{ 671{
668 union outputArgs *out; 672 union outputArgs *out;
669 union inputArgs *sig_inputArgs; 673 union inputArgs *sig_inputArgs;
670 struct upc_req *req, *sig_req; 674 struct upc_req *req = NULL, *sig_req;
671 int error = 0; 675 int error;
676
677 mutex_lock(&vcp->vc_mutex);
672 678
673 if (!vcp->vc_inuse) { 679 if (!vcp->vc_inuse) {
674 printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n"); 680 printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n");
675 return -ENXIO; 681 error = -ENXIO;
682 goto exit;
676 } 683 }
677 684
678 /* Format the request message. */ 685 /* Format the request message. */
679 req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); 686 req = kmalloc(sizeof(struct upc_req), GFP_KERNEL);
680 if (!req) 687 if (!req) {
681 return -ENOMEM; 688 error = -ENOMEM;
689 goto exit;
690 }
682 691
683 req->uc_data = (void *)buffer; 692 req->uc_data = (void *)buffer;
684 req->uc_flags = 0; 693 req->uc_flags = 0;
@@ -705,7 +714,7 @@ static int coda_upcall(struct venus_comm *vcp,
705 * ENODEV. */ 714 * ENODEV. */
706 715
707 /* Go to sleep. Wake up on signals only after the timeout. */ 716 /* Go to sleep. Wake up on signals only after the timeout. */
708 coda_waitfor_upcall(req); 717 coda_waitfor_upcall(vcp, req);
709 718
710 /* Op went through, interrupt or not... */ 719 /* Op went through, interrupt or not... */
711 if (req->uc_flags & CODA_REQ_WRITE) { 720 if (req->uc_flags & CODA_REQ_WRITE) {
@@ -759,6 +768,7 @@ static int coda_upcall(struct venus_comm *vcp,
759 768
760exit: 769exit:
761 kfree(req); 770 kfree(req);
771 mutex_unlock(&vcp->vc_mutex);
762 return error; 772 return error;
763} 773}
764 774
@@ -796,21 +806,24 @@ exit:
796 * 806 *
797 * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */ 807 * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */
798 808
799int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) 809int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out)
800{ 810{
801 struct inode *inode = NULL; 811 struct inode *inode = NULL;
802 struct CodaFid *fid, *newfid; 812 struct CodaFid *fid = NULL, *newfid;
813 struct super_block *sb;
803 814
804 /* Handle invalidation requests. */ 815 /* Handle invalidation requests. */
805 if ( !sb || !sb->s_root) 816 mutex_lock(&vcp->vc_mutex);
806 return 0; 817 sb = vcp->vc_sb;
818 if (!sb || !sb->s_root)
819 goto unlock_out;
807 820
808 switch (opcode) { 821 switch (opcode) {
809 case CODA_FLUSH: 822 case CODA_FLUSH:
810 coda_cache_clear_all(sb); 823 coda_cache_clear_all(sb);
811 shrink_dcache_sb(sb); 824 shrink_dcache_sb(sb);
812 if (sb->s_root->d_inode) 825 if (sb->s_root->d_inode)
813 coda_flag_inode(sb->s_root->d_inode, C_FLUSH); 826 coda_flag_inode(sb->s_root->d_inode, C_FLUSH);
814 break; 827 break;
815 828
816 case CODA_PURGEUSER: 829 case CODA_PURGEUSER:
@@ -819,45 +832,53 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb)
819 832
820 case CODA_ZAPDIR: 833 case CODA_ZAPDIR:
821 fid = &out->coda_zapdir.CodaFid; 834 fid = &out->coda_zapdir.CodaFid;
822 inode = coda_fid_to_inode(fid, sb);
823 if (inode) {
824 coda_flag_inode_children(inode, C_PURGE);
825 coda_flag_inode(inode, C_VATTR);
826 }
827 break; 835 break;
828 836
829 case CODA_ZAPFILE: 837 case CODA_ZAPFILE:
830 fid = &out->coda_zapfile.CodaFid; 838 fid = &out->coda_zapfile.CodaFid;
831 inode = coda_fid_to_inode(fid, sb);
832 if (inode)
833 coda_flag_inode(inode, C_VATTR);
834 break; 839 break;
835 840
836 case CODA_PURGEFID: 841 case CODA_PURGEFID:
837 fid = &out->coda_purgefid.CodaFid; 842 fid = &out->coda_purgefid.CodaFid;
843 break;
844
845 case CODA_REPLACE:
846 fid = &out->coda_replace.OldFid;
847 break;
848 }
849 if (fid)
838 inode = coda_fid_to_inode(fid, sb); 850 inode = coda_fid_to_inode(fid, sb);
839 if (inode) {
840 coda_flag_inode_children(inode, C_PURGE);
841 851
842 /* catch the dentries later if some are still busy */ 852unlock_out:
843 coda_flag_inode(inode, C_PURGE); 853 mutex_unlock(&vcp->vc_mutex);
844 d_prune_aliases(inode);
845 854
846 } 855 if (!inode)
856 return 0;
857
858 switch (opcode) {
859 case CODA_ZAPDIR:
860 coda_flag_inode_children(inode, C_PURGE);
861 coda_flag_inode(inode, C_VATTR);
862 break;
863
864 case CODA_ZAPFILE:
865 coda_flag_inode(inode, C_VATTR);
866 break;
867
868 case CODA_PURGEFID:
869 coda_flag_inode_children(inode, C_PURGE);
870
871 /* catch the dentries later if some are still busy */
872 coda_flag_inode(inode, C_PURGE);
873 d_prune_aliases(inode);
847 break; 874 break;
848 875
849 case CODA_REPLACE: 876 case CODA_REPLACE:
850 fid = &out->coda_replace.OldFid;
851 newfid = &out->coda_replace.NewFid; 877 newfid = &out->coda_replace.NewFid;
852 inode = coda_fid_to_inode(fid, sb); 878 coda_replace_fid(inode, fid, newfid);
853 if (inode)
854 coda_replace_fid(inode, fid, newfid);
855 break; 879 break;
856 } 880 }
857 881 iput(inode);
858 if (inode)
859 iput(inode);
860
861 return 0; 882 return 0;
862} 883}
863 884
diff --git a/fs/compat.c b/fs/compat.c
index 0644a154672b..ff66c0d7583d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -29,8 +29,6 @@
29#include <linux/vfs.h> 29#include <linux/vfs.h>
30#include <linux/ioctl.h> 30#include <linux/ioctl.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/smb.h>
33#include <linux/smb_mount.h>
34#include <linux/ncp_mount.h> 32#include <linux/ncp_mount.h>
35#include <linux/nfs4_mount.h> 33#include <linux/nfs4_mount.h>
36#include <linux/syscalls.h> 34#include <linux/syscalls.h>
@@ -608,14 +606,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
608 /* 606 /*
609 * Single unix specification: 607 * Single unix specification:
610 * We should -EINVAL if an element length is not >= 0 and fitting an 608 * We should -EINVAL if an element length is not >= 0 and fitting an
611 * ssize_t. The total length is fitting an ssize_t 609 * ssize_t.
612 * 610 *
613 * Be careful here because iov_len is a size_t not an ssize_t 611 * In Linux, the total length is limited to MAX_RW_COUNT, there is
612 * no overflow possibility.
614 */ 613 */
615 tot_len = 0; 614 tot_len = 0;
616 ret = -EINVAL; 615 ret = -EINVAL;
617 for (seg = 0; seg < nr_segs; seg++) { 616 for (seg = 0; seg < nr_segs; seg++) {
618 compat_ssize_t tmp = tot_len;
619 compat_uptr_t buf; 617 compat_uptr_t buf;
620 compat_ssize_t len; 618 compat_ssize_t len;
621 619
@@ -626,13 +624,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
626 } 624 }
627 if (len < 0) /* size_t not fitting in compat_ssize_t .. */ 625 if (len < 0) /* size_t not fitting in compat_ssize_t .. */
628 goto out; 626 goto out;
629 tot_len += len;
630 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
631 goto out;
632 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { 627 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
633 ret = -EFAULT; 628 ret = -EFAULT;
634 goto out; 629 goto out;
635 } 630 }
631 if (len > MAX_RW_COUNT - tot_len)
632 len = MAX_RW_COUNT - tot_len;
633 tot_len += len;
636 iov->iov_base = compat_ptr(buf); 634 iov->iov_base = compat_ptr(buf);
637 iov->iov_len = (compat_size_t) len; 635 iov->iov_len = (compat_size_t) len;
638 uvector++; 636 uvector++;
@@ -745,30 +743,6 @@ static void *do_ncp_super_data_conv(void *raw_data)
745 return raw_data; 743 return raw_data;
746} 744}
747 745
748struct compat_smb_mount_data {
749 compat_int_t version;
750 __compat_uid_t mounted_uid;
751 __compat_uid_t uid;
752 __compat_gid_t gid;
753 compat_mode_t file_mode;
754 compat_mode_t dir_mode;
755};
756
757static void *do_smb_super_data_conv(void *raw_data)
758{
759 struct smb_mount_data *s = raw_data;
760 struct compat_smb_mount_data *c_s = raw_data;
761
762 if (c_s->version != SMB_MOUNT_OLDVERSION)
763 goto out;
764 s->dir_mode = c_s->dir_mode;
765 s->file_mode = c_s->file_mode;
766 s->gid = c_s->gid;
767 s->uid = c_s->uid;
768 s->mounted_uid = c_s->mounted_uid;
769 out:
770 return raw_data;
771}
772 746
773struct compat_nfs_string { 747struct compat_nfs_string {
774 compat_uint_t len; 748 compat_uint_t len;
@@ -835,7 +809,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
835 return 0; 809 return 0;
836} 810}
837 811
838#define SMBFS_NAME "smbfs"
839#define NCPFS_NAME "ncpfs" 812#define NCPFS_NAME "ncpfs"
840#define NFS4_NAME "nfs4" 813#define NFS4_NAME "nfs4"
841 814
@@ -870,9 +843,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name,
870 retval = -EINVAL; 843 retval = -EINVAL;
871 844
872 if (kernel_type && data_page) { 845 if (kernel_type && data_page) {
873 if (!strcmp(kernel_type, SMBFS_NAME)) { 846 if (!strcmp(kernel_type, NCPFS_NAME)) {
874 do_smb_super_data_conv((void *)data_page);
875 } else if (!strcmp(kernel_type, NCPFS_NAME)) {
876 do_ncp_super_data_conv((void *)data_page); 847 do_ncp_super_data_conv((void *)data_page);
877 } else if (!strcmp(kernel_type, NFS4_NAME)) { 848 } else if (!strcmp(kernel_type, NFS4_NAME)) {
878 if (do_nfs4_super_data_conv((void *) data_page)) 849 if (do_nfs4_super_data_conv((void *) data_page))
@@ -1963,7 +1934,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1963} 1934}
1964#endif /* HAVE_SET_RESTORE_SIGMASK */ 1935#endif /* HAVE_SET_RESTORE_SIGMASK */
1965 1936
1966#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1937#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
1967/* Stuff for NFS server syscalls... */ 1938/* Stuff for NFS server syscalls... */
1968struct compat_nfsctl_svc { 1939struct compat_nfsctl_svc {
1969 u16 svc32_port; 1940 u16 svc32_port;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d0ad09d57789..410ed188faa1 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -46,7 +46,6 @@
46#include <linux/videodev.h> 46#include <linux/videodev.h>
47#include <linux/netdevice.h> 47#include <linux/netdevice.h>
48#include <linux/raw.h> 48#include <linux/raw.h>
49#include <linux/smb_fs.h>
50#include <linux/blkdev.h> 49#include <linux/blkdev.h>
51#include <linux/elevator.h> 50#include <linux/elevator.h>
52#include <linux/rtc.h> 51#include <linux/rtc.h>
@@ -558,25 +557,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
558 557
559#endif /* CONFIG_BLOCK */ 558#endif /* CONFIG_BLOCK */
560 559
561static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
562 compat_uid_t __user *argp)
563{
564 mm_segment_t old_fs = get_fs();
565 __kernel_uid_t kuid;
566 int err;
567
568 cmd = SMB_IOC_GETMOUNTUID;
569
570 set_fs(KERNEL_DS);
571 err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
572 set_fs(old_fs);
573
574 if (err >= 0)
575 err = put_user(kuid, argp);
576
577 return err;
578}
579
580/* Bluetooth ioctls */ 560/* Bluetooth ioctls */
581#define HCIUARTSETPROTO _IOW('U', 200, int) 561#define HCIUARTSETPROTO _IOW('U', 200, int)
582#define HCIUARTGETPROTO _IOR('U', 201, int) 562#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -1199,8 +1179,9 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
1199COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) 1179COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
1200COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) 1180COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
1201COMPATIBLE_IOCTL(OSS_GETVERSION) 1181COMPATIBLE_IOCTL(OSS_GETVERSION)
1202/* SMB ioctls which do not need any translations */ 1182/* Raw devices */
1203COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) 1183COMPATIBLE_IOCTL(RAW_SETBIND)
1184COMPATIBLE_IOCTL(RAW_GETBIND)
1204/* Watchdog */ 1185/* Watchdog */
1205COMPATIBLE_IOCTL(WDIOC_GETSUPPORT) 1186COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
1206COMPATIBLE_IOCTL(WDIOC_GETSTATUS) 1187COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -1458,10 +1439,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1458 case MTIOCPOS32: 1439 case MTIOCPOS32:
1459 return mt_ioctl_trans(fd, cmd, argp); 1440 return mt_ioctl_trans(fd, cmd, argp);
1460#endif 1441#endif
1461 /* One SMB ioctl needs translations. */
1462#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1463 case SMB_IOC_GETMOUNTUID_32:
1464 return do_smb_getmountuid(fd, cmd, argp);
1465 /* Serial */ 1442 /* Serial */
1466 case TIOCGSERIAL: 1443 case TIOCGSERIAL:
1467 case TIOCSSERIAL: 1444 case TIOCSSERIAL:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cf78d44a8d6a..253476d78ed8 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
135{ 135{
136 struct inode * inode = new_inode(configfs_sb); 136 struct inode * inode = new_inode(configfs_sb);
137 if (inode) { 137 if (inode) {
138 inode->i_ino = get_next_ino();
138 inode->i_mapping->a_ops = &configfs_aops; 139 inode->i_mapping->a_ops = &configfs_aops;
139 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; 140 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
140 inode->i_op = &configfs_inode_operations; 141 inode->i_op = &configfs_inode_operations;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2d..7d3607febe1c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
104 return 0; 104 return 0;
105} 105}
106 106
107static int configfs_get_sb(struct file_system_type *fs_type, 107static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
108 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 108 int flags, const char *dev_name, void *data)
109{ 109{
110 return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt); 110 return mount_single(fs_type, flags, data, configfs_fill_super);
111} 111}
112 112
113static struct file_system_type configfs_fs_type = { 113static struct file_system_type configfs_fs_type = {
114 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
115 .name = "configfs", 115 .name = "configfs",
116 .get_sb = configfs_get_sb, 116 .mount = configfs_do_mount,
117 .kill_sb = kill_litter_super, 117 .kill_sb = kill_litter_super,
118}; 118};
119 119
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d33..32fd5fe9ca0e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
533 .statfs = cramfs_statfs, 533 .statfs = cramfs_statfs,
534}; 534};
535 535
536static int cramfs_get_sb(struct file_system_type *fs_type, 536static struct dentry *cramfs_mount(struct file_system_type *fs_type,
537 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 537 int flags, const char *dev_name, void *data)
538{ 538{
539 return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super, 539 return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
540 mnt);
541} 540}
542 541
543static struct file_system_type cramfs_fs_type = { 542static struct file_system_type cramfs_fs_type = {
544 .owner = THIS_MODULE, 543 .owner = THIS_MODULE,
545 .name = "cramfs", 544 .name = "cramfs",
546 .get_sb = cramfs_get_sb, 545 .mount = cramfs_mount,
547 .kill_sb = kill_block_super, 546 .kill_sb = kill_block_super,
548 .fs_flags = FS_REQUIRES_DEV, 547 .fs_flags = FS_REQUIRES_DEV,
549}; 548};
diff --git a/fs/dcache.c b/fs/dcache.c
index 83293be48149..23702a9d4e6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 67 .age_limit = 45,
68}; 68};
69 69
70static void __d_free(struct dentry *dentry) 70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos)
76{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos);
80}
81#endif
82
83static void __d_free(struct rcu_head *head)
71{ 84{
85 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
86
72 WARN_ON(!list_empty(&dentry->d_alias)); 87 WARN_ON(!list_empty(&dentry->d_alias));
73 if (dname_external(dentry)) 88 if (dname_external(dentry))
74 kfree(dentry->d_name.name); 89 kfree(dentry->d_name.name);
75 kmem_cache_free(dentry_cache, dentry); 90 kmem_cache_free(dentry_cache, dentry);
76} 91}
77 92
78static void d_callback(struct rcu_head *head)
79{
80 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
81 __d_free(dentry);
82}
83
84/* 93/*
85 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 94 * no dcache_lock, please.
86 * inside dcache_lock.
87 */ 95 */
88static void d_free(struct dentry *dentry) 96static void d_free(struct dentry *dentry)
89{ 97{
98 percpu_counter_dec(&nr_dentry);
90 if (dentry->d_op && dentry->d_op->d_release) 99 if (dentry->d_op && dentry->d_op->d_release)
91 dentry->d_op->d_release(dentry); 100 dentry->d_op->d_release(dentry);
101
92 /* if dentry was never inserted into hash, immediate free is OK */ 102 /* if dentry was never inserted into hash, immediate free is OK */
93 if (hlist_unhashed(&dentry->d_hash)) 103 if (hlist_unhashed(&dentry->d_hash))
94 __d_free(dentry); 104 __d_free(&dentry->d_u.d_rcu);
95 else 105 else
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 106 call_rcu(&dentry->d_u.d_rcu, __d_free);
97} 107}
98 108
99/* 109/*
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry)
123} 133}
124 134
125/* 135/*
126 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. 136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
127 */ 137 */
128static void dentry_lru_add(struct dentry *dentry) 138static void dentry_lru_add(struct dentry *dentry)
129{ 139{
130 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 140 if (list_empty(&dentry->d_lru)) {
131 dentry->d_sb->s_nr_dentry_unused++; 141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
132 dentry_stat.nr_unused++; 142 dentry->d_sb->s_nr_dentry_unused++;
133} 143 percpu_counter_inc(&nr_dentry_unused);
134 144 }
135static void dentry_lru_add_tail(struct dentry *dentry)
136{
137 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
138 dentry->d_sb->s_nr_dentry_unused++;
139 dentry_stat.nr_unused++;
140} 145}
141 146
142static void dentry_lru_del(struct dentry *dentry) 147static void dentry_lru_del(struct dentry *dentry)
143{ 148{
144 if (!list_empty(&dentry->d_lru)) { 149 if (!list_empty(&dentry->d_lru)) {
145 list_del(&dentry->d_lru); 150 list_del_init(&dentry->d_lru);
146 dentry->d_sb->s_nr_dentry_unused--; 151 dentry->d_sb->s_nr_dentry_unused--;
147 dentry_stat.nr_unused--; 152 percpu_counter_dec(&nr_dentry_unused);
148 } 153 }
149} 154}
150 155
151static void dentry_lru_del_init(struct dentry *dentry) 156static void dentry_lru_move_tail(struct dentry *dentry)
152{ 157{
153 if (likely(!list_empty(&dentry->d_lru))) { 158 if (list_empty(&dentry->d_lru)) {
154 list_del_init(&dentry->d_lru); 159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
155 dentry->d_sb->s_nr_dentry_unused--; 160 dentry->d_sb->s_nr_dentry_unused++;
156 dentry_stat.nr_unused--; 161 percpu_counter_inc(&nr_dentry_unused);
162 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
157 } 164 }
158} 165}
159 166
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry)
172 struct dentry *parent; 179 struct dentry *parent;
173 180
174 list_del(&dentry->d_u.d_child); 181 list_del(&dentry->d_u.d_child);
175 dentry_stat.nr_dentry--; /* For d_free, below */
176 /*drops the locks, at that point nobody can reach this dentry */ 182 /*drops the locks, at that point nobody can reach this dentry */
177 dentry_iput(dentry); 183 dentry_iput(dentry);
178 if (IS_ROOT(dentry)) 184 if (IS_ROOT(dentry))
@@ -237,13 +243,15 @@ repeat:
237 if (dentry->d_op->d_delete(dentry)) 243 if (dentry->d_op->d_delete(dentry))
238 goto unhash_it; 244 goto unhash_it;
239 } 245 }
246
240 /* Unreachable? Get rid of it */ 247 /* Unreachable? Get rid of it */
241 if (d_unhashed(dentry)) 248 if (d_unhashed(dentry))
242 goto kill_it; 249 goto kill_it;
243 if (list_empty(&dentry->d_lru)) { 250
244 dentry->d_flags |= DCACHE_REFERENCED; 251 /* Otherwise leave it cached and ensure it's on the LRU */
245 dentry_lru_add(dentry); 252 dentry->d_flags |= DCACHE_REFERENCED;
246 } 253 dentry_lru_add(dentry);
254
247 spin_unlock(&dentry->d_lock); 255 spin_unlock(&dentry->d_lock);
248 spin_unlock(&dcache_lock); 256 spin_unlock(&dcache_lock);
249 return; 257 return;
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry)
318EXPORT_SYMBOL(d_invalidate); 326EXPORT_SYMBOL(d_invalidate);
319 327
320/* This should be called _only_ with dcache_lock held */ 328/* This should be called _only_ with dcache_lock held */
321
322static inline struct dentry * __dget_locked(struct dentry *dentry) 329static inline struct dentry * __dget_locked(struct dentry *dentry)
323{ 330{
324 atomic_inc(&dentry->d_count); 331 atomic_inc(&dentry->d_count);
325 dentry_lru_del_init(dentry); 332 dentry_lru_del(dentry);
326 return dentry; 333 return dentry;
327} 334}
328 335
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry)
441 448
442 if (dentry->d_op && dentry->d_op->d_delete) 449 if (dentry->d_op && dentry->d_op->d_delete)
443 dentry->d_op->d_delete(dentry); 450 dentry->d_op->d_delete(dentry);
444 dentry_lru_del_init(dentry); 451 dentry_lru_del(dentry);
445 __d_drop(dentry); 452 __d_drop(dentry);
446 dentry = d_kill(dentry); 453 dentry = d_kill(dentry);
447 spin_lock(&dcache_lock); 454 spin_lock(&dcache_lock);
448 } 455 }
449} 456}
450 457
451/* 458static void shrink_dentry_list(struct list_head *list)
452 * Shrink the dentry LRU on a given superblock.
453 * @sb : superblock to shrink dentry LRU.
454 * @count: If count is NULL, we prune all dentries on superblock.
455 * @flags: If flags is non-zero, we need to do special processing based on
456 * which flags are set. This means we don't need to maintain multiple
457 * similar copies of this loop.
458 */
459static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
460{ 459{
461 LIST_HEAD(referenced);
462 LIST_HEAD(tmp);
463 struct dentry *dentry; 460 struct dentry *dentry;
464 int cnt = 0;
465 461
466 BUG_ON(!sb); 462 while (!list_empty(list)) {
467 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); 463 dentry = list_entry(list->prev, struct dentry, d_lru);
468 spin_lock(&dcache_lock); 464 dentry_lru_del(dentry);
469 if (count != NULL)
470 /* called from prune_dcache() and shrink_dcache_parent() */
471 cnt = *count;
472restart:
473 if (count == NULL)
474 list_splice_init(&sb->s_dentry_lru, &tmp);
475 else {
476 while (!list_empty(&sb->s_dentry_lru)) {
477 dentry = list_entry(sb->s_dentry_lru.prev,
478 struct dentry, d_lru);
479 BUG_ON(dentry->d_sb != sb);
480 465
481 spin_lock(&dentry->d_lock);
482 /*
483 * If we are honouring the DCACHE_REFERENCED flag and
484 * the dentry has this flag set, don't free it. Clear
485 * the flag and put it back on the LRU.
486 */
487 if ((flags & DCACHE_REFERENCED)
488 && (dentry->d_flags & DCACHE_REFERENCED)) {
489 dentry->d_flags &= ~DCACHE_REFERENCED;
490 list_move(&dentry->d_lru, &referenced);
491 spin_unlock(&dentry->d_lock);
492 } else {
493 list_move_tail(&dentry->d_lru, &tmp);
494 spin_unlock(&dentry->d_lock);
495 cnt--;
496 if (!cnt)
497 break;
498 }
499 cond_resched_lock(&dcache_lock);
500 }
501 }
502 while (!list_empty(&tmp)) {
503 dentry = list_entry(tmp.prev, struct dentry, d_lru);
504 dentry_lru_del_init(dentry);
505 spin_lock(&dentry->d_lock);
506 /* 466 /*
507 * We found an inuse dentry which was not removed from 467 * We found an inuse dentry which was not removed from
508 * the LRU because of laziness during lookup. Do not free 468 * the LRU because of laziness during lookup. Do not free
509 * it - just keep it off the LRU list. 469 * it - just keep it off the LRU list.
510 */ 470 */
471 spin_lock(&dentry->d_lock);
511 if (atomic_read(&dentry->d_count)) { 472 if (atomic_read(&dentry->d_count)) {
512 spin_unlock(&dentry->d_lock); 473 spin_unlock(&dentry->d_lock);
513 continue; 474 continue;
@@ -516,13 +477,60 @@ restart:
516 /* dentry->d_lock was dropped in prune_one_dentry() */ 477 /* dentry->d_lock was dropped in prune_one_dentry() */
517 cond_resched_lock(&dcache_lock); 478 cond_resched_lock(&dcache_lock);
518 } 479 }
519 if (count == NULL && !list_empty(&sb->s_dentry_lru)) 480}
520 goto restart; 481
521 if (count != NULL) 482/**
522 *count = cnt; 483 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
484 * @sb: superblock to shrink dentry LRU.
485 * @count: number of entries to prune
486 * @flags: flags to control the dentry processing
487 *
488 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
489 */
490static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
491{
492 /* called from prune_dcache() and shrink_dcache_parent() */
493 struct dentry *dentry;
494 LIST_HEAD(referenced);
495 LIST_HEAD(tmp);
496 int cnt = *count;
497
498 spin_lock(&dcache_lock);
499 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb);
503
504 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU.
508 */
509 if (flags & DCACHE_REFERENCED) {
510 spin_lock(&dentry->d_lock);
511 if (dentry->d_flags & DCACHE_REFERENCED) {
512 dentry->d_flags &= ~DCACHE_REFERENCED;
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock);
519 }
520
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
523 if (!list_empty(&referenced)) 530 if (!list_empty(&referenced))
524 list_splice(&referenced, &sb->s_dentry_lru); 531 list_splice(&referenced, &sb->s_dentry_lru);
525 spin_unlock(&dcache_lock); 532 spin_unlock(&dcache_lock);
533
526} 534}
527 535
528/** 536/**
@@ -538,7 +546,7 @@ static void prune_dcache(int count)
538{ 546{
539 struct super_block *sb, *p = NULL; 547 struct super_block *sb, *p = NULL;
540 int w_count; 548 int w_count;
541 int unused = dentry_stat.nr_unused; 549 int unused = percpu_counter_sum_positive(&nr_dentry_unused);
542 int prune_ratio; 550 int prune_ratio;
543 int pruned; 551 int pruned;
544 552
@@ -608,13 +616,19 @@ static void prune_dcache(int count)
608 * shrink_dcache_sb - shrink dcache for a superblock 616 * shrink_dcache_sb - shrink dcache for a superblock
609 * @sb: superblock 617 * @sb: superblock
610 * 618 *
611 * Shrink the dcache for the specified super block. This 619 * Shrink the dcache for the specified super block. This is used to free
612 * is used to free the dcache before unmounting a file 620 * the dcache before unmounting a file system.
613 * system
614 */ 621 */
615void shrink_dcache_sb(struct super_block * sb) 622void shrink_dcache_sb(struct super_block *sb)
616{ 623{
617 __shrink_dcache_sb(sb, NULL, 0); 624 LIST_HEAD(tmp);
625
626 spin_lock(&dcache_lock);
627 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp);
629 shrink_dentry_list(&tmp);
630 }
631 spin_unlock(&dcache_lock);
618} 632}
619EXPORT_SYMBOL(shrink_dcache_sb); 633EXPORT_SYMBOL(shrink_dcache_sb);
620 634
@@ -632,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
632 646
633 /* detach this root from the system */ 647 /* detach this root from the system */
634 spin_lock(&dcache_lock); 648 spin_lock(&dcache_lock);
635 dentry_lru_del_init(dentry); 649 dentry_lru_del(dentry);
636 __d_drop(dentry); 650 __d_drop(dentry);
637 spin_unlock(&dcache_lock); 651 spin_unlock(&dcache_lock);
638 652
@@ -646,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
646 spin_lock(&dcache_lock); 660 spin_lock(&dcache_lock);
647 list_for_each_entry(loop, &dentry->d_subdirs, 661 list_for_each_entry(loop, &dentry->d_subdirs,
648 d_u.d_child) { 662 d_u.d_child) {
649 dentry_lru_del_init(loop); 663 dentry_lru_del(loop);
650 __d_drop(loop); 664 __d_drop(loop);
651 cond_resched_lock(&dcache_lock); 665 cond_resched_lock(&dcache_lock);
652 } 666 }
@@ -703,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
703 * otherwise we ascend to the parent and move to the 717 * otherwise we ascend to the parent and move to the
704 * next sibling if there is one */ 718 * next sibling if there is one */
705 if (!parent) 719 if (!parent)
706 goto out; 720 return;
707
708 dentry = parent; 721 dentry = parent;
709
710 } while (list_empty(&dentry->d_subdirs)); 722 } while (list_empty(&dentry->d_subdirs));
711 723
712 dentry = list_entry(dentry->d_subdirs.next, 724 dentry = list_entry(dentry->d_subdirs.next,
713 struct dentry, d_u.d_child); 725 struct dentry, d_u.d_child);
714 } 726 }
715out:
716 /* several dentries were freed, need to correct nr_dentry */
717 spin_lock(&dcache_lock);
718 dentry_stat.nr_dentry -= detached;
719 spin_unlock(&dcache_lock);
720} 727}
721 728
722/* 729/*
@@ -830,14 +837,15 @@ resume:
830 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
831 next = tmp->next; 838 next = tmp->next;
832 839
833 dentry_lru_del_init(dentry);
834 /* 840 /*
835 * move only zero ref count dentries to the end 841 * move only zero ref count dentries to the end
836 * of the unused list for prune_dcache 842 * of the unused list for prune_dcache
837 */ 843 */
838 if (!atomic_read(&dentry->d_count)) { 844 if (!atomic_read(&dentry->d_count)) {
839 dentry_lru_add_tail(dentry); 845 dentry_lru_move_tail(dentry);
840 found++; 846 found++;
847 } else {
848 dentry_lru_del(dentry);
841 } 849 }
842 850
843 /* 851 /*
@@ -900,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
900 */ 908 */
901static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
902{ 910{
911 int nr_unused;
912
903 if (nr) { 913 if (nr) {
904 if (!(gfp_mask & __GFP_FS)) 914 if (!(gfp_mask & __GFP_FS))
905 return -1; 915 return -1;
906 prune_dcache(nr); 916 prune_dcache(nr);
907 } 917 }
908 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 918
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
909} 921}
910 922
911static struct shrinker dcache_shrinker = { 923static struct shrinker dcache_shrinker = {
@@ -972,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
972 spin_lock(&dcache_lock); 984 spin_lock(&dcache_lock);
973 if (parent) 985 if (parent)
974 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 986 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
975 dentry_stat.nr_dentry++;
976 spin_unlock(&dcache_lock); 987 spin_unlock(&dcache_lock);
977 988
989 percpu_counter_inc(&nr_dentry);
990
978 return dentry; 991 return dentry;
979} 992}
980EXPORT_SYMBOL(d_alloc); 993EXPORT_SYMBOL(d_alloc);
@@ -1478,33 +1491,26 @@ out:
1478 * This is used by ncpfs in its readdir implementation. 1491 * This is used by ncpfs in its readdir implementation.
1479 * Zero is returned in the dentry is invalid. 1492 * Zero is returned in the dentry is invalid.
1480 */ 1493 */
1481 1494int d_validate(struct dentry *dentry, struct dentry *parent)
1482int d_validate(struct dentry *dentry, struct dentry *dparent)
1483{ 1495{
1484 struct hlist_head *base; 1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
1485 struct hlist_node *lhp; 1497 struct hlist_node *node;
1498 struct dentry *d;
1486 1499
1487 /* Check whether the ptr might be valid at all.. */ 1500 /* Check whether the ptr might be valid at all.. */
1488 if (!kmem_ptr_validate(dentry_cache, dentry)) 1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1489 goto out; 1502 return 0;
1490 1503 if (dentry->d_parent != parent)
1491 if (dentry->d_parent != dparent) 1504 return 0;
1492 goto out;
1493 1505
1494 spin_lock(&dcache_lock); 1506 rcu_read_lock();
1495 base = d_hash(dparent, dentry->d_name.hash); 1507 hlist_for_each_entry_rcu(d, node, head, d_hash) {
1496 hlist_for_each(lhp,base) { 1508 if (d == dentry) {
1497 /* hlist_for_each_entry_rcu() not required for d_hash list 1509 dget(dentry);
1498 * as it is parsed under dcache_lock
1499 */
1500 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1501 __dget_locked(dentry);
1502 spin_unlock(&dcache_lock);
1503 return 1; 1510 return 1;
1504 } 1511 }
1505 } 1512 }
1506 spin_unlock(&dcache_lock); 1513 rcu_read_unlock();
1507out:
1508 return 0; 1514 return 0;
1509} 1515}
1510EXPORT_SYMBOL(d_validate); 1516EXPORT_SYMBOL(d_validate);
@@ -1994,7 +2000,7 @@ global_root:
1994 * Returns a pointer into the buffer or an error code if the 2000 * Returns a pointer into the buffer or an error code if the
1995 * path was too long. 2001 * path was too long.
1996 * 2002 *
1997 * "buflen" should be positive. Caller holds the dcache_lock. 2003 * "buflen" should be positive.
1998 * 2004 *
1999 * If path is not reachable from the supplied root, then the value of 2005 * If path is not reachable from the supplied root, then the value of
2000 * root is changed (without modifying refcounts). 2006 * root is changed (without modifying refcounts).
@@ -2006,10 +2012,12 @@ char *__d_path(const struct path *path, struct path *root,
2006 int error; 2012 int error;
2007 2013
2008 prepend(&res, &buflen, "\0", 1); 2014 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock);
2009 error = prepend_path(path, root, &res, &buflen); 2016 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock);
2018
2010 if (error) 2019 if (error)
2011 return ERR_PTR(error); 2020 return ERR_PTR(error);
2012
2013 return res; 2021 return res;
2014} 2022}
2015 2023
@@ -2419,6 +2427,9 @@ static void __init dcache_init(void)
2419{ 2427{
2420 int loop; 2428 int loop;
2421 2429
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2422 /* 2433 /*
2423 * A constructor could be added for stable state like the lists, 2434 * A constructor could be added for stable state like the lists,
2424 * but it is probably not worth it because of the cache nature 2435 * but it is probably not worth it because of the cache nature
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 30a87b3dbcac..37a8ca7c1222 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
40 struct inode *inode = new_inode(sb); 40 struct inode *inode = new_inode(sb);
41 41
42 if (inode) { 42 if (inode) {
43 inode->i_ino = get_next_ino();
43 inode->i_mode = mode; 44 inode->i_mode = mode;
44 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 45 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
45 switch (mode & S_IFMT) { 46 switch (mode & S_IFMT) {
@@ -134,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
134 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 135 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
135} 136}
136 137
137static int debug_get_sb(struct file_system_type *fs_type, 138static struct dentry *debug_mount(struct file_system_type *fs_type,
138 int flags, const char *dev_name, 139 int flags, const char *dev_name,
139 void *data, struct vfsmount *mnt) 140 void *data)
140{ 141{
141 return get_sb_single(fs_type, flags, data, debug_fill_super, mnt); 142 return mount_single(fs_type, flags, data, debug_fill_super);
142} 143}
143 144
144static struct file_system_type debug_fs_type = { 145static struct file_system_type debug_fs_type = {
145 .owner = THIS_MODULE, 146 .owner = THIS_MODULE,
146 .name = "debugfs", 147 .name = "debugfs",
147 .get_sb = debug_get_sb, 148 .mount = debug_mount,
148 .kill_sb = kill_litter_super, 149 .kill_sb = kill_litter_super,
149}; 150};
150 151
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b5235..1bb547c9cad6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
331} 331}
332 332
333/* 333/*
334 * devpts_get_sb() 334 * devpts_mount()
335 * 335 *
336 * If the '-o newinstance' mount option was specified, mount a new 336 * If the '-o newinstance' mount option was specified, mount a new
337 * (private) instance of devpts. PTYs created in this instance are 337 * (private) instance of devpts. PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
345 * semantics in devpts while preserving backward compatibility of the 345 * semantics in devpts while preserving backward compatibility of the
346 * current 'single-namespace' semantics. i.e all mounts of devpts 346 * current 'single-namespace' semantics. i.e all mounts of devpts
347 * without the 'newinstance' mount option should bind to the initial 347 * without the 'newinstance' mount option should bind to the initial
348 * kernel mount, like get_sb_single(). 348 * kernel mount, like mount_single().
349 * 349 *
350 * Mounts with 'newinstance' option create a new, private namespace. 350 * Mounts with 'newinstance' option create a new, private namespace.
351 * 351 *
352 * NOTE: 352 * NOTE:
353 * 353 *
354 * For single-mount semantics, devpts cannot use get_sb_single(), 354 * For single-mount semantics, devpts cannot use mount_single(),
355 * because get_sb_single()/sget() find and use the super-block from 355 * because mount_single()/sget() find and use the super-block from
356 * the most recent mount of devpts. But that recent mount may be a 356 * the most recent mount of devpts. But that recent mount may be a
357 * 'newinstance' mount and get_sb_single() would pick the newinstance 357 * 'newinstance' mount and mount_single() would pick the newinstance
358 * super-block instead of the initial super-block. 358 * super-block instead of the initial super-block.
359 */ 359 */
360static int devpts_get_sb(struct file_system_type *fs_type, 360static struct dentry *devpts_mount(struct file_system_type *fs_type,
361 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 361 int flags, const char *dev_name, void *data)
362{ 362{
363 int error; 363 int error;
364 struct pts_mount_opts opts; 364 struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
366 366
367 error = parse_mount_options(data, PARSE_MOUNT, &opts); 367 error = parse_mount_options(data, PARSE_MOUNT, &opts);
368 if (error) 368 if (error)
369 return error; 369 return ERR_PTR(error);
370 370
371 if (opts.newinstance) 371 if (opts.newinstance)
372 s = sget(fs_type, NULL, set_anon_super, NULL); 372 s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
374 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); 374 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
375 375
376 if (IS_ERR(s)) 376 if (IS_ERR(s))
377 return PTR_ERR(s); 377 return ERR_CAST(s);
378 378
379 if (!s->s_root) { 379 if (!s->s_root) {
380 s->s_flags = flags; 380 s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
390 if (error) 390 if (error)
391 goto out_undo_sget; 391 goto out_undo_sget;
392 392
393 simple_set_mnt(mnt, s); 393 return dget(s->s_root);
394
395 return 0;
396 394
397out_undo_sget: 395out_undo_sget:
398 deactivate_locked_super(s); 396 deactivate_locked_super(s);
399 return error; 397 return ERR_PTR(error);
400} 398}
401 399
402#else 400#else
@@ -404,10 +402,10 @@ out_undo_sget:
404 * This supports only the legacy single-instance semantics (no 402 * This supports only the legacy single-instance semantics (no
405 * multiple-instance semantics) 403 * multiple-instance semantics)
406 */ 404 */
407static int devpts_get_sb(struct file_system_type *fs_type, int flags, 405static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
408 const char *dev_name, void *data, struct vfsmount *mnt) 406 const char *dev_name, void *data)
409{ 407{
410 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); 408 return mount_single(fs_type, flags, data, devpts_fill_super);
411} 409}
412#endif 410#endif
413 411
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
421 419
422static struct file_system_type devpts_fs_type = { 420static struct file_system_type devpts_fs_type = {
423 .name = "devpts", 421 .name = "devpts",
424 .get_sb = devpts_get_sb, 422 .mount = devpts_mount,
425 .kill_sb = devpts_kill_sb, 423 .kill_sb = devpts_kill_sb,
426}; 424};
427 425
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 48d74c7391d1..85882f6ba5f7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
218 * filesystems can use it to hold additional state between get_block calls and 218 * filesystems can use it to hold additional state between get_block calls and
219 * dio_complete. 219 * dio_complete.
220 */ 220 */
221static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) 221static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async)
222{ 222{
223 ssize_t transferred = 0; 223 ssize_t transferred = 0;
224 224
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b959429..413a3c48f0bb 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
377#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010 377#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010
378#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020 378#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020
379#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040 379#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040
380#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY 0x00000080
380 u32 flags; 381 u32 flags;
381 struct list_head global_auth_tok_list; 382 struct list_head global_auth_tok_list;
382 struct mutex global_auth_tok_list_mutex; 383 struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc94203380..9d1a22d62765 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/xattr.h>
35#include <asm/unaligned.h> 36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
70 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 71 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
71 struct dentry *dentry_save; 72 struct dentry *dentry_save;
72 struct vfsmount *vfsmount_save; 73 struct vfsmount *vfsmount_save;
74 unsigned int flags_save;
73 int rc; 75 int rc;
74 76
75 dentry_save = nd->path.dentry; 77 dentry_save = nd->path.dentry;
76 vfsmount_save = nd->path.mnt; 78 vfsmount_save = nd->path.mnt;
79 flags_save = nd->flags;
77 nd->path.dentry = lower_dentry; 80 nd->path.dentry = lower_dentry;
78 nd->path.mnt = lower_mnt; 81 nd->path.mnt = lower_mnt;
82 nd->flags &= ~LOOKUP_OPEN;
79 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
80 nd->path.dentry = dentry_save; 84 nd->path.dentry = dentry_save;
81 nd->path.mnt = vfsmount_save; 85 nd->path.mnt = vfsmount_save;
86 nd->flags = flags_save;
82 return rc; 87 return rc;
83} 88}
84 89
@@ -1108,10 +1113,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1108 rc = -EOPNOTSUPP; 1113 rc = -EOPNOTSUPP;
1109 goto out; 1114 goto out;
1110 } 1115 }
1111 mutex_lock(&lower_dentry->d_inode->i_mutex); 1116
1112 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, 1117 rc = vfs_setxattr(lower_dentry, name, value, size, flags);
1113 size, flags);
1114 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1115out: 1118out:
1116 return rc; 1119 return rc;
1117} 1120}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cfa2ea4..b1f6858a5223 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
446 */ 446 */
447static int 447static int
448ecryptfs_find_auth_tok_for_sig( 448ecryptfs_find_auth_tok_for_sig(
449 struct key **auth_tok_key,
449 struct ecryptfs_auth_tok **auth_tok, 450 struct ecryptfs_auth_tok **auth_tok,
450 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 451 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
451 char *sig) 452 char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
453 struct ecryptfs_global_auth_tok *global_auth_tok; 454 struct ecryptfs_global_auth_tok *global_auth_tok;
454 int rc = 0; 455 int rc = 0;
455 456
457 (*auth_tok_key) = NULL;
456 (*auth_tok) = NULL; 458 (*auth_tok) = NULL;
457 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 459 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
458 mount_crypt_stat, sig)) { 460 mount_crypt_stat, sig)) {
459 struct key *auth_tok_key;
460 461
461 rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok, 462 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
463 * mount_crypt_stat structure, we prevent to use auth toks that
464 * are not inserted through the ecryptfs_add_global_auth_tok
465 * function.
466 */
467 if (mount_crypt_stat->flags
468 & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
469 return -EINVAL;
470
471 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
462 sig); 472 sig);
463 } else 473 } else
464 (*auth_tok) = global_auth_tok->global_auth_tok; 474 (*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
509 char *filename, size_t filename_size) 519 char *filename, size_t filename_size)
510{ 520{
511 struct ecryptfs_write_tag_70_packet_silly_stack *s; 521 struct ecryptfs_write_tag_70_packet_silly_stack *s;
522 struct key *auth_tok_key = NULL;
512 int rc = 0; 523 int rc = 0;
513 524
514 s = kmalloc(sizeof(*s), GFP_KERNEL); 525 s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
606 } 617 }
607 dest[s->i++] = s->cipher_code; 618 dest[s->i++] = s->cipher_code;
608 rc = ecryptfs_find_auth_tok_for_sig( 619 rc = ecryptfs_find_auth_tok_for_sig(
620 &auth_tok_key,
609 &s->auth_tok, mount_crypt_stat, 621 &s->auth_tok, mount_crypt_stat,
610 mount_crypt_stat->global_default_fnek_sig); 622 mount_crypt_stat->global_default_fnek_sig);
611 if (rc) { 623 if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
753out_unlock: 765out_unlock:
754 mutex_unlock(s->tfm_mutex); 766 mutex_unlock(s->tfm_mutex);
755out: 767out:
768 if (auth_tok_key)
769 key_put(auth_tok_key);
756 kfree(s); 770 kfree(s);
757 return rc; 771 return rc;
758} 772}
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
798 char *data, size_t max_packet_size) 812 char *data, size_t max_packet_size)
799{ 813{
800 struct ecryptfs_parse_tag_70_packet_silly_stack *s; 814 struct ecryptfs_parse_tag_70_packet_silly_stack *s;
815 struct key *auth_tok_key = NULL;
801 int rc = 0; 816 int rc = 0;
802 817
803 (*packet_size) = 0; 818 (*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
910 * >= ECRYPTFS_MAX_IV_BYTES. */ 925 * >= ECRYPTFS_MAX_IV_BYTES. */
911 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); 926 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
912 s->desc.info = s->iv; 927 s->desc.info = s->iv;
913 rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat, 928 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
929 &s->auth_tok, mount_crypt_stat,
914 s->fnek_sig_hex); 930 s->fnek_sig_hex);
915 if (rc) { 931 if (rc) {
916 printk(KERN_ERR "%s: Error attempting to find auth tok for " 932 printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
986 (*filename_size) = 0; 1002 (*filename_size) = 0;
987 (*filename) = NULL; 1003 (*filename) = NULL;
988 } 1004 }
1005 if (auth_tok_key)
1006 key_put(auth_tok_key);
989 kfree(s); 1007 kfree(s);
990 return rc; 1008 return rc;
991} 1009}
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1557 ECRYPTFS_VERSION_MAJOR, 1575 ECRYPTFS_VERSION_MAJOR,
1558 ECRYPTFS_VERSION_MINOR); 1576 ECRYPTFS_VERSION_MINOR);
1559 rc = -EINVAL; 1577 rc = -EINVAL;
1560 goto out; 1578 goto out_release_key;
1561 } 1579 }
1562 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD 1580 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
1563 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) { 1581 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
1564 printk(KERN_ERR "Invalid auth_tok structure " 1582 printk(KERN_ERR "Invalid auth_tok structure "
1565 "returned from key query\n"); 1583 "returned from key query\n");
1566 rc = -EINVAL; 1584 rc = -EINVAL;
1567 goto out; 1585 goto out_release_key;
1586 }
1587out_release_key:
1588 if (rc) {
1589 key_put(*auth_tok_key);
1590 (*auth_tok_key) = NULL;
1568 } 1591 }
1569out: 1592out:
1570 return rc; 1593 return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1688 struct ecryptfs_auth_tok_list_item *auth_tok_list_item; 1711 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
1689 size_t tag_11_contents_size; 1712 size_t tag_11_contents_size;
1690 size_t tag_11_packet_size; 1713 size_t tag_11_packet_size;
1714 struct key *auth_tok_key = NULL;
1691 int rc = 0; 1715 int rc = 0;
1692 1716
1693 INIT_LIST_HEAD(&auth_tok_list); 1717 INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1784 * just one will be sufficient to decrypt to get the FEK. */ 1808 * just one will be sufficient to decrypt to get the FEK. */
1785find_next_matching_auth_tok: 1809find_next_matching_auth_tok:
1786 found_auth_tok = 0; 1810 found_auth_tok = 0;
1811 if (auth_tok_key) {
1812 key_put(auth_tok_key);
1813 auth_tok_key = NULL;
1814 }
1787 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { 1815 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
1788 candidate_auth_tok = &auth_tok_list_item->auth_tok; 1816 candidate_auth_tok = &auth_tok_list_item->auth_tok;
1789 if (unlikely(ecryptfs_verbosity > 0)) { 1817 if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
1800 rc = -EINVAL; 1828 rc = -EINVAL;
1801 goto out_wipe_list; 1829 goto out_wipe_list;
1802 } 1830 }
1803 ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, 1831 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
1832 &matching_auth_tok,
1804 crypt_stat->mount_crypt_stat, 1833 crypt_stat->mount_crypt_stat,
1805 candidate_auth_tok_sig); 1834 candidate_auth_tok_sig);
1806 if (matching_auth_tok) { 1835 if (!rc) {
1807 found_auth_tok = 1; 1836 found_auth_tok = 1;
1808 goto found_matching_auth_tok; 1837 goto found_matching_auth_tok;
1809 } 1838 }
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
1866out_wipe_list: 1895out_wipe_list:
1867 wipe_auth_tok_list(&auth_tok_list); 1896 wipe_auth_tok_list(&auth_tok_list);
1868out: 1897out:
1898 if (auth_tok_key)
1899 key_put(auth_tok_key);
1869 return rc; 1900 return rc;
1870} 1901}
1871 1902
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18adb20..a9dbd62518e6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, 208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, 209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, 210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_err }; 211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
212 ecryptfs_opt_err };
212 213
213static const match_table_t tokens = { 214static const match_table_t tokens = {
214 {ecryptfs_opt_sig, "sig=%s"}, 215 {ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, 224 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, 225 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
225 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, 226 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
227 {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
226 {ecryptfs_opt_err, NULL} 228 {ecryptfs_opt_err, NULL}
227}; 229};
228 230
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
406 case ecryptfs_opt_unlink_sigs: 408 case ecryptfs_opt_unlink_sigs:
407 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS; 409 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
408 break; 410 break;
411 case ecryptfs_opt_mount_auth_tok_only:
412 mount_crypt_stat->flags |=
413 ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
414 break;
409 case ecryptfs_opt_err: 415 case ecryptfs_opt_err:
410 default: 416 default:
411 printk(KERN_WARNING 417 printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
540 * ecryptfs_interpose to perform most of the linking 546 * ecryptfs_interpose to perform most of the linking
541 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) 547 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
542 */ 548 */
543static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 549static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
544 const char *dev_name, void *raw_data, 550 const char *dev_name, void *raw_data)
545 struct vfsmount *mnt)
546{ 551{
547 struct super_block *s; 552 struct super_block *s;
548 struct ecryptfs_sb_info *sbi; 553 struct ecryptfs_sb_info *sbi;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
607 err = "Reading sb failed"; 612 err = "Reading sb failed";
608 goto out; 613 goto out;
609 } 614 }
610 simple_set_mnt(mnt, s); 615 return dget(s->s_root);
611 return 0;
612 616
613out: 617out:
614 if (sbi) { 618 if (sbi) {
@@ -616,7 +620,7 @@ out:
616 kmem_cache_free(ecryptfs_sb_info_cache, sbi); 620 kmem_cache_free(ecryptfs_sb_info_cache, sbi);
617 } 621 }
618 printk(KERN_ERR "%s; rc = [%d]\n", err, rc); 622 printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
619 return rc; 623 return ERR_PTR(rc);
620} 624}
621 625
622/** 626/**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
639static struct file_system_type ecryptfs_fs_type = { 643static struct file_system_type ecryptfs_fs_type = {
640 .owner = THIS_MODULE, 644 .owner = THIS_MODULE,
641 .name = "ecryptfs", 645 .name = "ecryptfs",
642 .get_sb = ecryptfs_get_sb, 646 .mount = ecryptfs_mount,
643 .kill_sb = ecryptfs_kill_block_super, 647 .kill_sb = ecryptfs_kill_block_super,
644 .fs_flags = 0 648 .fs_flags = 0
645}; 649};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286a3aa9..253732382d37 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -180,6 +180,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
180 seq_printf(m, ",ecryptfs_encrypted_view"); 180 seq_printf(m, ",ecryptfs_encrypted_view");
181 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS) 181 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
182 seq_printf(m, ",ecryptfs_unlink_sigs"); 182 seq_printf(m, ",ecryptfs_unlink_sigs");
183 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
184 seq_printf(m, ",ecryptfs_mount_auth_tok_only");
183 185
184 return 0; 186 return 0;
185} 187}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f04942810818..5073a07652cc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); 20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
21static int efs_fill_super(struct super_block *s, void *d, int silent); 21static int efs_fill_super(struct super_block *s, void *d, int silent);
22 22
23static int efs_get_sb(struct file_system_type *fs_type, 23static struct dentry *efs_mount(struct file_system_type *fs_type,
24 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 24 int flags, const char *dev_name, void *data)
25{ 25{
26 return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt); 26 return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
27} 27}
28 28
29static struct file_system_type efs_fs_type = { 29static struct file_system_type efs_fs_type = {
30 .owner = THIS_MODULE, 30 .owner = THIS_MODULE,
31 .name = "efs", 31 .name = "efs",
32 .get_sb = efs_get_sb, 32 .mount = efs_mount,
33 .kill_sb = kill_block_super, 33 .kill_sb = kill_block_super,
34 .fs_flags = FS_REQUIRES_DEV, 34 .fs_flags = FS_REQUIRES_DEV,
35}; 35};
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 256bb7bb102a..8cf07242067d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -77,9 +77,6 @@
77/* Maximum number of nesting allowed inside epoll sets */ 77/* Maximum number of nesting allowed inside epoll sets */
78#define EP_MAX_NESTS 4 78#define EP_MAX_NESTS 4
79 79
80/* Maximum msec timeout value storeable in a long int */
81#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
82
83#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 80#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
84 81
85#define EP_UNACTIVE_PTR ((void *) -1L) 82#define EP_UNACTIVE_PTR ((void *) -1L)
@@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep,
1117static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1114static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1118 int maxevents, long timeout) 1115 int maxevents, long timeout)
1119{ 1116{
1120 int res, eavail; 1117 int res, eavail, timed_out = 0;
1121 unsigned long flags; 1118 unsigned long flags;
1122 long jtimeout; 1119 long slack;
1123 wait_queue_t wait; 1120 wait_queue_t wait;
1124 1121 struct timespec end_time;
1125 /* 1122 ktime_t expires, *to = NULL;
1126 * Calculate the timeout by checking for the "infinite" value (-1) 1123
1127 * and the overflow condition. The passed timeout is in milliseconds, 1124 if (timeout > 0) {
1128 * that why (t * HZ) / 1000. 1125 ktime_get_ts(&end_time);
1129 */ 1126 timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
1130 jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? 1127 slack = select_estimate_accuracy(&end_time);
1131 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; 1128 to = &expires;
1129 *to = timespec_to_ktime(end_time);
1130 } else if (timeout == 0) {
1131 timed_out = 1;
1132 }
1132 1133
1133retry: 1134retry:
1134 spin_lock_irqsave(&ep->lock, flags); 1135 spin_lock_irqsave(&ep->lock, flags);
@@ -1150,7 +1151,7 @@ retry:
1150 * to TASK_INTERRUPTIBLE before doing the checks. 1151 * to TASK_INTERRUPTIBLE before doing the checks.
1151 */ 1152 */
1152 set_current_state(TASK_INTERRUPTIBLE); 1153 set_current_state(TASK_INTERRUPTIBLE);
1153 if (!list_empty(&ep->rdllist) || !jtimeout) 1154 if (!list_empty(&ep->rdllist) || timed_out)
1154 break; 1155 break;
1155 if (signal_pending(current)) { 1156 if (signal_pending(current)) {
1156 res = -EINTR; 1157 res = -EINTR;
@@ -1158,7 +1159,9 @@ retry:
1158 } 1159 }
1159 1160
1160 spin_unlock_irqrestore(&ep->lock, flags); 1161 spin_unlock_irqrestore(&ep->lock, flags);
1161 jtimeout = schedule_timeout(jtimeout); 1162 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
1163 timed_out = 1;
1164
1162 spin_lock_irqsave(&ep->lock, flags); 1165 spin_lock_irqsave(&ep->lock, flags);
1163 } 1166 }
1164 __remove_wait_queue(&ep->wq, &wait); 1167 __remove_wait_queue(&ep->wq, &wait);
@@ -1176,7 +1179,7 @@ retry:
1176 * more luck. 1179 * more luck.
1177 */ 1180 */
1178 if (!res && eavail && 1181 if (!res && eavail &&
1179 !(res = ep_send_events(ep, events, maxevents)) && jtimeout) 1182 !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
1180 goto retry; 1183 goto retry;
1181 1184
1182 return res; 1185 return res;
diff --git a/fs/exec.c b/fs/exec.c
index 6d2b6f936858..99d33a1371e9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -54,6 +54,7 @@
54#include <linux/fsnotify.h> 54#include <linux/fsnotify.h>
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57#include <linux/oom.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
@@ -65,6 +66,12 @@ char core_pattern[CORENAME_MAX_SIZE] = "core";
65unsigned int core_pipe_limit; 66unsigned int core_pipe_limit;
66int suid_dumpable = 0; 67int suid_dumpable = 0;
67 68
69struct core_name {
70 char *corename;
71 int used, size;
72};
73static atomic_t call_count = ATOMIC_INIT(1);
74
68/* The maximal length of core_pattern is also specified in sysctl.c */ 75/* The maximal length of core_pattern is also specified in sysctl.c */
69 76
70static LIST_HEAD(formats); 77static LIST_HEAD(formats);
@@ -759,6 +766,10 @@ static int exec_mmap(struct mm_struct *mm)
759 tsk->mm = mm; 766 tsk->mm = mm;
760 tsk->active_mm = mm; 767 tsk->active_mm = mm;
761 activate_mm(active_mm, mm); 768 activate_mm(active_mm, mm);
769 if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
770 atomic_dec(&old_mm->oom_disable_count);
771 atomic_inc(&tsk->mm->oom_disable_count);
772 }
762 task_unlock(tsk); 773 task_unlock(tsk);
763 arch_pick_mmap_layout(mm); 774 arch_pick_mmap_layout(mm);
764 if (old_mm) { 775 if (old_mm) {
@@ -998,7 +1009,7 @@ int flush_old_exec(struct linux_binprm * bprm)
998 1009
999 bprm->mm = NULL; /* We're using it now */ 1010 bprm->mm = NULL; /* We're using it now */
1000 1011
1001 current->flags &= ~PF_RANDOMIZE; 1012 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
1002 flush_thread(); 1013 flush_thread();
1003 current->personality &= ~bprm->per_clear; 1014 current->personality &= ~bprm->per_clear;
1004 1015
@@ -1078,14 +1089,14 @@ EXPORT_SYMBOL(setup_new_exec);
1078 */ 1089 */
1079int prepare_bprm_creds(struct linux_binprm *bprm) 1090int prepare_bprm_creds(struct linux_binprm *bprm)
1080{ 1091{
1081 if (mutex_lock_interruptible(&current->cred_guard_mutex)) 1092 if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
1082 return -ERESTARTNOINTR; 1093 return -ERESTARTNOINTR;
1083 1094
1084 bprm->cred = prepare_exec_creds(); 1095 bprm->cred = prepare_exec_creds();
1085 if (likely(bprm->cred)) 1096 if (likely(bprm->cred))
1086 return 0; 1097 return 0;
1087 1098
1088 mutex_unlock(&current->cred_guard_mutex); 1099 mutex_unlock(&current->signal->cred_guard_mutex);
1089 return -ENOMEM; 1100 return -ENOMEM;
1090} 1101}
1091 1102
@@ -1093,7 +1104,7 @@ void free_bprm(struct linux_binprm *bprm)
1093{ 1104{
1094 free_arg_pages(bprm); 1105 free_arg_pages(bprm);
1095 if (bprm->cred) { 1106 if (bprm->cred) {
1096 mutex_unlock(&current->cred_guard_mutex); 1107 mutex_unlock(&current->signal->cred_guard_mutex);
1097 abort_creds(bprm->cred); 1108 abort_creds(bprm->cred);
1098 } 1109 }
1099 kfree(bprm); 1110 kfree(bprm);
@@ -1114,13 +1125,13 @@ void install_exec_creds(struct linux_binprm *bprm)
1114 * credentials; any time after this it may be unlocked. 1125 * credentials; any time after this it may be unlocked.
1115 */ 1126 */
1116 security_bprm_committed_creds(bprm); 1127 security_bprm_committed_creds(bprm);
1117 mutex_unlock(&current->cred_guard_mutex); 1128 mutex_unlock(&current->signal->cred_guard_mutex);
1118} 1129}
1119EXPORT_SYMBOL(install_exec_creds); 1130EXPORT_SYMBOL(install_exec_creds);
1120 1131
1121/* 1132/*
1122 * determine how safe it is to execute the proposed program 1133 * determine how safe it is to execute the proposed program
1123 * - the caller must hold current->cred_guard_mutex to protect against 1134 * - the caller must hold ->cred_guard_mutex to protect against
1124 * PTRACE_ATTACH 1135 * PTRACE_ATTACH
1125 */ 1136 */
1126int check_unsafe_exec(struct linux_binprm *bprm) 1137int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1401,7 +1412,6 @@ int do_execve(const char * filename,
1401 if (retval < 0) 1412 if (retval < 0)
1402 goto out; 1413 goto out;
1403 1414
1404 current->flags &= ~PF_KTHREAD;
1405 retval = search_binary_handler(bprm,regs); 1415 retval = search_binary_handler(bprm,regs);
1406 if (retval < 0) 1416 if (retval < 0)
1407 goto out; 1417 goto out;
@@ -1454,127 +1464,148 @@ void set_binfmt(struct linux_binfmt *new)
1454 1464
1455EXPORT_SYMBOL(set_binfmt); 1465EXPORT_SYMBOL(set_binfmt);
1456 1466
1467static int expand_corename(struct core_name *cn)
1468{
1469 char *old_corename = cn->corename;
1470
1471 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
1472 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
1473
1474 if (!cn->corename) {
1475 kfree(old_corename);
1476 return -ENOMEM;
1477 }
1478
1479 return 0;
1480}
1481
1482static int cn_printf(struct core_name *cn, const char *fmt, ...)
1483{
1484 char *cur;
1485 int need;
1486 int ret;
1487 va_list arg;
1488
1489 va_start(arg, fmt);
1490 need = vsnprintf(NULL, 0, fmt, arg);
1491 va_end(arg);
1492
1493 if (likely(need < cn->size - cn->used - 1))
1494 goto out_printf;
1495
1496 ret = expand_corename(cn);
1497 if (ret)
1498 goto expand_fail;
1499
1500out_printf:
1501 cur = cn->corename + cn->used;
1502 va_start(arg, fmt);
1503 vsnprintf(cur, need + 1, fmt, arg);
1504 va_end(arg);
1505 cn->used += need;
1506 return 0;
1507
1508expand_fail:
1509 return ret;
1510}
1511
1457/* format_corename will inspect the pattern parameter, and output a 1512/* format_corename will inspect the pattern parameter, and output a
1458 * name into corename, which must have space for at least 1513 * name into corename, which must have space for at least
1459 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1514 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1460 */ 1515 */
1461static int format_corename(char *corename, long signr) 1516static int format_corename(struct core_name *cn, long signr)
1462{ 1517{
1463 const struct cred *cred = current_cred(); 1518 const struct cred *cred = current_cred();
1464 const char *pat_ptr = core_pattern; 1519 const char *pat_ptr = core_pattern;
1465 int ispipe = (*pat_ptr == '|'); 1520 int ispipe = (*pat_ptr == '|');
1466 char *out_ptr = corename;
1467 char *const out_end = corename + CORENAME_MAX_SIZE;
1468 int rc;
1469 int pid_in_pattern = 0; 1521 int pid_in_pattern = 0;
1522 int err = 0;
1523
1524 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
1525 cn->corename = kmalloc(cn->size, GFP_KERNEL);
1526 cn->used = 0;
1527
1528 if (!cn->corename)
1529 return -ENOMEM;
1470 1530
1471 /* Repeat as long as we have more pattern to process and more output 1531 /* Repeat as long as we have more pattern to process and more output
1472 space */ 1532 space */
1473 while (*pat_ptr) { 1533 while (*pat_ptr) {
1474 if (*pat_ptr != '%') { 1534 if (*pat_ptr != '%') {
1475 if (out_ptr == out_end) 1535 if (*pat_ptr == 0)
1476 goto out; 1536 goto out;
1477 *out_ptr++ = *pat_ptr++; 1537 err = cn_printf(cn, "%c", *pat_ptr++);
1478 } else { 1538 } else {
1479 switch (*++pat_ptr) { 1539 switch (*++pat_ptr) {
1540 /* single % at the end, drop that */
1480 case 0: 1541 case 0:
1481 goto out; 1542 goto out;
1482 /* Double percent, output one percent */ 1543 /* Double percent, output one percent */
1483 case '%': 1544 case '%':
1484 if (out_ptr == out_end) 1545 err = cn_printf(cn, "%c", '%');
1485 goto out;
1486 *out_ptr++ = '%';
1487 break; 1546 break;
1488 /* pid */ 1547 /* pid */
1489 case 'p': 1548 case 'p':
1490 pid_in_pattern = 1; 1549 pid_in_pattern = 1;
1491 rc = snprintf(out_ptr, out_end - out_ptr, 1550 err = cn_printf(cn, "%d",
1492 "%d", task_tgid_vnr(current)); 1551 task_tgid_vnr(current));
1493 if (rc > out_end - out_ptr)
1494 goto out;
1495 out_ptr += rc;
1496 break; 1552 break;
1497 /* uid */ 1553 /* uid */
1498 case 'u': 1554 case 'u':
1499 rc = snprintf(out_ptr, out_end - out_ptr, 1555 err = cn_printf(cn, "%d", cred->uid);
1500 "%d", cred->uid);
1501 if (rc > out_end - out_ptr)
1502 goto out;
1503 out_ptr += rc;
1504 break; 1556 break;
1505 /* gid */ 1557 /* gid */
1506 case 'g': 1558 case 'g':
1507 rc = snprintf(out_ptr, out_end - out_ptr, 1559 err = cn_printf(cn, "%d", cred->gid);
1508 "%d", cred->gid);
1509 if (rc > out_end - out_ptr)
1510 goto out;
1511 out_ptr += rc;
1512 break; 1560 break;
1513 /* signal that caused the coredump */ 1561 /* signal that caused the coredump */
1514 case 's': 1562 case 's':
1515 rc = snprintf(out_ptr, out_end - out_ptr, 1563 err = cn_printf(cn, "%ld", signr);
1516 "%ld", signr);
1517 if (rc > out_end - out_ptr)
1518 goto out;
1519 out_ptr += rc;
1520 break; 1564 break;
1521 /* UNIX time of coredump */ 1565 /* UNIX time of coredump */
1522 case 't': { 1566 case 't': {
1523 struct timeval tv; 1567 struct timeval tv;
1524 do_gettimeofday(&tv); 1568 do_gettimeofday(&tv);
1525 rc = snprintf(out_ptr, out_end - out_ptr, 1569 err = cn_printf(cn, "%lu", tv.tv_sec);
1526 "%lu", tv.tv_sec);
1527 if (rc > out_end - out_ptr)
1528 goto out;
1529 out_ptr += rc;
1530 break; 1570 break;
1531 } 1571 }
1532 /* hostname */ 1572 /* hostname */
1533 case 'h': 1573 case 'h':
1534 down_read(&uts_sem); 1574 down_read(&uts_sem);
1535 rc = snprintf(out_ptr, out_end - out_ptr, 1575 err = cn_printf(cn, "%s",
1536 "%s", utsname()->nodename); 1576 utsname()->nodename);
1537 up_read(&uts_sem); 1577 up_read(&uts_sem);
1538 if (rc > out_end - out_ptr)
1539 goto out;
1540 out_ptr += rc;
1541 break; 1578 break;
1542 /* executable */ 1579 /* executable */
1543 case 'e': 1580 case 'e':
1544 rc = snprintf(out_ptr, out_end - out_ptr, 1581 err = cn_printf(cn, "%s", current->comm);
1545 "%s", current->comm);
1546 if (rc > out_end - out_ptr)
1547 goto out;
1548 out_ptr += rc;
1549 break; 1582 break;
1550 /* core limit size */ 1583 /* core limit size */
1551 case 'c': 1584 case 'c':
1552 rc = snprintf(out_ptr, out_end - out_ptr, 1585 err = cn_printf(cn, "%lu",
1553 "%lu", rlimit(RLIMIT_CORE)); 1586 rlimit(RLIMIT_CORE));
1554 if (rc > out_end - out_ptr)
1555 goto out;
1556 out_ptr += rc;
1557 break; 1587 break;
1558 default: 1588 default:
1559 break; 1589 break;
1560 } 1590 }
1561 ++pat_ptr; 1591 ++pat_ptr;
1562 } 1592 }
1593
1594 if (err)
1595 return err;
1563 } 1596 }
1597
1564 /* Backward compatibility with core_uses_pid: 1598 /* Backward compatibility with core_uses_pid:
1565 * 1599 *
1566 * If core_pattern does not include a %p (as is the default) 1600 * If core_pattern does not include a %p (as is the default)
1567 * and core_uses_pid is set, then .%pid will be appended to 1601 * and core_uses_pid is set, then .%pid will be appended to
1568 * the filename. Do not do this for piped commands. */ 1602 * the filename. Do not do this for piped commands. */
1569 if (!ispipe && !pid_in_pattern && core_uses_pid) { 1603 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1570 rc = snprintf(out_ptr, out_end - out_ptr, 1604 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
1571 ".%d", task_tgid_vnr(current)); 1605 if (err)
1572 if (rc > out_end - out_ptr) 1606 return err;
1573 goto out;
1574 out_ptr += rc;
1575 } 1607 }
1576out: 1608out:
1577 *out_ptr = 0;
1578 return ispipe; 1609 return ispipe;
1579} 1610}
1580 1611
@@ -1851,7 +1882,7 @@ static int umh_pipe_setup(struct subprocess_info *info)
1851void do_coredump(long signr, int exit_code, struct pt_regs *regs) 1882void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1852{ 1883{
1853 struct core_state core_state; 1884 struct core_state core_state;
1854 char corename[CORENAME_MAX_SIZE + 1]; 1885 struct core_name cn;
1855 struct mm_struct *mm = current->mm; 1886 struct mm_struct *mm = current->mm;
1856 struct linux_binfmt * binfmt; 1887 struct linux_binfmt * binfmt;
1857 const struct cred *old_cred; 1888 const struct cred *old_cred;
@@ -1906,7 +1937,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1906 */ 1937 */
1907 clear_thread_flag(TIF_SIGPENDING); 1938 clear_thread_flag(TIF_SIGPENDING);
1908 1939
1909 ispipe = format_corename(corename, signr); 1940 ispipe = format_corename(&cn, signr);
1941
1942 if (ispipe == -ENOMEM) {
1943 printk(KERN_WARNING "format_corename failed\n");
1944 printk(KERN_WARNING "Aborting core\n");
1945 goto fail_corename;
1946 }
1910 1947
1911 if (ispipe) { 1948 if (ispipe) {
1912 int dump_count; 1949 int dump_count;
@@ -1943,7 +1980,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1943 goto fail_dropcount; 1980 goto fail_dropcount;
1944 } 1981 }
1945 1982
1946 helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); 1983 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
1947 if (!helper_argv) { 1984 if (!helper_argv) {
1948 printk(KERN_WARNING "%s failed to allocate memory\n", 1985 printk(KERN_WARNING "%s failed to allocate memory\n",
1949 __func__); 1986 __func__);
@@ -1956,7 +1993,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1956 argv_free(helper_argv); 1993 argv_free(helper_argv);
1957 if (retval) { 1994 if (retval) {
1958 printk(KERN_INFO "Core dump to %s pipe failed\n", 1995 printk(KERN_INFO "Core dump to %s pipe failed\n",
1959 corename); 1996 cn.corename);
1960 goto close_fail; 1997 goto close_fail;
1961 } 1998 }
1962 } else { 1999 } else {
@@ -1965,7 +2002,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1965 if (cprm.limit < binfmt->min_coredump) 2002 if (cprm.limit < binfmt->min_coredump)
1966 goto fail_unlock; 2003 goto fail_unlock;
1967 2004
1968 cprm.file = filp_open(corename, 2005 cprm.file = filp_open(cn.corename,
1969 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 2006 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1970 0600); 2007 0600);
1971 if (IS_ERR(cprm.file)) 2008 if (IS_ERR(cprm.file))
@@ -2007,6 +2044,8 @@ fail_dropcount:
2007 if (ispipe) 2044 if (ispipe)
2008 atomic_dec(&core_dump_count); 2045 atomic_dec(&core_dump_count);
2009fail_unlock: 2046fail_unlock:
2047 kfree(cn.corename);
2048fail_corename:
2010 coredump_finish(mm); 2049 coredump_finish(mm);
2011 revert_creds(old_cred); 2050 revert_creds(old_cred);
2012fail_creds: 2051fail_creds:
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index d91e9d829bc1..dcc941d82d67 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -420,7 +420,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
420 err = exofs_write_begin(NULL, page->mapping, pos, len, 420 err = exofs_write_begin(NULL, page->mapping, pos, len,
421 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 421 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
422 if (err) 422 if (err)
423 EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n", 423 EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n",
424 err); 424 err);
425 425
426 de->inode_no = cpu_to_le64(inode->i_ino); 426 de->inode_no = cpu_to_le64(inode->i_ino);
@@ -556,7 +556,7 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
556 err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0, 556 err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0,
557 &page, NULL); 557 &page, NULL);
558 if (err) 558 if (err)
559 EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n", 559 EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILED => %d\n",
560 err); 560 err);
561 if (pde) 561 if (pde)
562 pde->rec_len = cpu_to_le16(to - from); 562 pde->rec_len = cpu_to_le16(to - from);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 68cb23e3bb98..b905c79b4f0a 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync)
46{ 46{
47 int ret; 47 int ret;
48 struct inode *inode = filp->f_mapping->host; 48 struct inode *inode = filp->f_mapping->host;
49 struct writeback_control wbc = {
50 .sync_mode = WB_SYNC_ALL,
51 .nr_to_write = 0, /* metadata-only; caller takes care of data */
52 };
53 struct super_block *sb; 49 struct super_block *sb;
54 50
55 if (!(inode->i_state & I_DIRTY)) 51 if (!(inode->i_state & I_DIRTY))
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync)
57 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 53 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
58 return 0; 54 return 0;
59 55
60 ret = sync_inode(inode, &wbc); 56 ret = sync_inode_metadata(inode, 1);
61 57
62 /* This is a good place to write the sb */ 58 /* This is a good place to write the sb */
63 /* TODO: Sechedule an sb-sync on create */ 59 /* TODO: Sechedule an sb-sync on create */
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 44602754f758..42685424817b 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -185,7 +185,7 @@ static void update_write_page(struct page *page, int ret)
185/* Called at the end of reads, to optionally unlock pages and update their 185/* Called at the end of reads, to optionally unlock pages and update their
186 * status. 186 * status.
187 */ 187 */
188static int __readpages_done(struct page_collect *pcol, bool do_unlock) 188static int __readpages_done(struct page_collect *pcol)
189{ 189{
190 int i; 190 int i;
191 u64 resid; 191 u64 resid;
@@ -221,7 +221,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
221 page_stat ? "bad_bytes" : "good_bytes"); 221 page_stat ? "bad_bytes" : "good_bytes");
222 222
223 ret = update_read_page(page, page_stat); 223 ret = update_read_page(page, page_stat);
224 if (do_unlock) 224 if (!pcol->read_4_write)
225 unlock_page(page); 225 unlock_page(page);
226 length += PAGE_SIZE; 226 length += PAGE_SIZE;
227 } 227 }
@@ -236,7 +236,7 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
236{ 236{
237 struct page_collect *pcol = p; 237 struct page_collect *pcol = p;
238 238
239 __readpages_done(pcol, true); 239 __readpages_done(pcol);
240 atomic_dec(&pcol->sbi->s_curr_pending); 240 atomic_dec(&pcol->sbi->s_curr_pending);
241 kfree(pcol); 241 kfree(pcol);
242} 242}
@@ -257,7 +257,7 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
257 } 257 }
258} 258}
259 259
260static int read_exec(struct page_collect *pcol, bool is_sync) 260static int read_exec(struct page_collect *pcol)
261{ 261{
262 struct exofs_i_info *oi = exofs_i(pcol->inode); 262 struct exofs_i_info *oi = exofs_i(pcol->inode);
263 struct exofs_io_state *ios = pcol->ios; 263 struct exofs_io_state *ios = pcol->ios;
@@ -267,17 +267,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
267 if (!pcol->pages) 267 if (!pcol->pages)
268 return 0; 268 return 0;
269 269
270 /* see comment in _readpage() about sync reads */
271 WARN_ON(is_sync && (pcol->nr_pages != 1));
272
273 ios->pages = pcol->pages; 270 ios->pages = pcol->pages;
274 ios->nr_pages = pcol->nr_pages; 271 ios->nr_pages = pcol->nr_pages;
275 ios->length = pcol->length; 272 ios->length = pcol->length;
276 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 273 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
277 274
278 if (is_sync) { 275 if (pcol->read_4_write) {
279 exofs_oi_read(oi, pcol->ios); 276 exofs_oi_read(oi, pcol->ios);
280 return __readpages_done(pcol, false); 277 return __readpages_done(pcol);
281 } 278 }
282 279
283 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 280 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -303,7 +300,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
303 return 0; 300 return 0;
304 301
305err: 302err:
306 if (!is_sync) 303 if (!pcol->read_4_write)
307 _unlock_pcol_pages(pcol, ret, READ); 304 _unlock_pcol_pages(pcol, ret, READ);
308 305
309 pcol_free(pcol); 306 pcol_free(pcol);
@@ -356,7 +353,7 @@ static int readpage_strip(void *data, struct page *page)
356 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 353 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
357 " splitting\n", inode->i_ino, page->index); 354 " splitting\n", inode->i_ino, page->index);
358 355
359 return read_exec(pcol, false); 356 return read_exec(pcol);
360 } 357 }
361 358
362try_again: 359try_again:
@@ -366,7 +363,7 @@ try_again:
366 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 363 } else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
367 page->index)) { 364 page->index)) {
368 /* Discontinuity detected, split the request */ 365 /* Discontinuity detected, split the request */
369 ret = read_exec(pcol, false); 366 ret = read_exec(pcol);
370 if (unlikely(ret)) 367 if (unlikely(ret))
371 goto fail; 368 goto fail;
372 goto try_again; 369 goto try_again;
@@ -391,7 +388,7 @@ try_again:
391 page, len, pcol->nr_pages, pcol->length); 388 page, len, pcol->nr_pages, pcol->length);
392 389
393 /* split the request, and start again with current page */ 390 /* split the request, and start again with current page */
394 ret = read_exec(pcol, false); 391 ret = read_exec(pcol);
395 if (unlikely(ret)) 392 if (unlikely(ret))
396 goto fail; 393 goto fail;
397 394
@@ -420,27 +417,24 @@ static int exofs_readpages(struct file *file, struct address_space *mapping,
420 return ret; 417 return ret;
421 } 418 }
422 419
423 return read_exec(&pcol, false); 420 return read_exec(&pcol);
424} 421}
425 422
426static int _readpage(struct page *page, bool is_sync) 423static int _readpage(struct page *page, bool read_4_write)
427{ 424{
428 struct page_collect pcol; 425 struct page_collect pcol;
429 int ret; 426 int ret;
430 427
431 _pcol_init(&pcol, 1, page->mapping->host); 428 _pcol_init(&pcol, 1, page->mapping->host);
432 429
433 /* readpage_strip might call read_exec(,is_sync==false) at several 430 pcol.read_4_write = read_4_write;
434 * places but not if we have a single page.
435 */
436 pcol.read_4_write = is_sync;
437 ret = readpage_strip(&pcol, page); 431 ret = readpage_strip(&pcol, page);
438 if (ret) { 432 if (ret) {
439 EXOFS_ERR("_readpage => %d\n", ret); 433 EXOFS_ERR("_readpage => %d\n", ret);
440 return ret; 434 return ret;
441 } 435 }
442 436
443 return read_exec(&pcol, is_sync); 437 return read_exec(&pcol);
444} 438}
445 439
446/* 440/*
@@ -1036,6 +1030,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1036 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1037 } 1031 }
1038 1032
1033 inode->i_mapping->backing_dev_info = sb->s_bdi;
1039 if (S_ISREG(inode->i_mode)) { 1034 if (S_ISREG(inode->i_mode)) {
1040 inode->i_op = &exofs_file_inode_operations; 1035 inode->i_op = &exofs_file_inode_operations;
1041 inode->i_fop = &exofs_file_operations; 1036 inode->i_fop = &exofs_file_operations;
@@ -1072,8 +1067,10 @@ bad_inode:
1072int __exofs_wait_obj_created(struct exofs_i_info *oi) 1067int __exofs_wait_obj_created(struct exofs_i_info *oi)
1073{ 1068{
1074 if (!obj_created(oi)) { 1069 if (!obj_created(oi)) {
1070 EXOFS_DBGMSG("!obj_created\n");
1075 BUG_ON(!obj_2bcreated(oi)); 1071 BUG_ON(!obj_2bcreated(oi));
1076 wait_event(oi->i_wq, obj_created(oi)); 1072 wait_event(oi->i_wq, obj_created(oi));
1073 EXOFS_DBGMSG("wait_event done\n");
1077 } 1074 }
1078 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1075 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1079} 1076}
@@ -1107,7 +1104,6 @@ static void create_done(struct exofs_io_state *ios, void *p)
1107 1104
1108 set_obj_created(oi); 1105 set_obj_created(oi);
1109 1106
1110 atomic_dec(&inode->i_count);
1111 wake_up(&oi->i_wq); 1107 wake_up(&oi->i_wq);
1112} 1108}
1113 1109
@@ -1135,6 +1131,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1135 1131
1136 sbi = sb->s_fs_info; 1132 sbi = sb->s_fs_info;
1137 1133
1134 inode->i_mapping->backing_dev_info = sb->s_bdi;
1138 sb->s_dirt = 1; 1135 sb->s_dirt = 1;
1139 inode_init_owner(inode, dir, mode); 1136 inode_init_owner(inode, dir, mode);
1140 inode->i_ino = sbi->s_nextid++; 1137 inode->i_ino = sbi->s_nextid++;
@@ -1157,17 +1154,11 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1157 ios->obj.id = exofs_oi_objno(oi); 1154 ios->obj.id = exofs_oi_objno(oi);
1158 exofs_make_credential(oi->i_cred, &ios->obj); 1155 exofs_make_credential(oi->i_cred, &ios->obj);
1159 1156
1160 /* increment the refcount so that the inode will still be around when we
1161 * reach the callback
1162 */
1163 atomic_inc(&inode->i_count);
1164
1165 ios->done = create_done; 1157 ios->done = create_done;
1166 ios->private = inode; 1158 ios->private = inode;
1167 ios->cred = oi->i_cred; 1159 ios->cred = oi->i_cred;
1168 ret = exofs_sbi_create(ios); 1160 ret = exofs_sbi_create(ios);
1169 if (ret) { 1161 if (ret) {
1170 atomic_dec(&inode->i_count);
1171 exofs_put_io_state(ios); 1162 exofs_put_io_state(ios);
1172 return ERR_PTR(ret); 1163 return ERR_PTR(ret);
1173 } 1164 }
@@ -1257,12 +1248,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1257 ios->out_attr_len = 1; 1248 ios->out_attr_len = 1;
1258 ios->out_attr = &attr; 1249 ios->out_attr = &attr;
1259 1250
1260 if (!obj_created(oi)) { 1251 wait_obj_created(oi);
1261 EXOFS_DBGMSG("!obj_created\n");
1262 BUG_ON(!obj_2bcreated(oi));
1263 wait_event(oi->i_wq, obj_created(oi));
1264 EXOFS_DBGMSG("wait_event done\n");
1265 }
1266 1252
1267 if (!do_sync) { 1253 if (!do_sync) {
1268 args->sbi = sbi; 1254 args->sbi = sbi;
@@ -1325,12 +1311,12 @@ void exofs_evict_inode(struct inode *inode)
1325 inode->i_size = 0; 1311 inode->i_size = 0;
1326 end_writeback(inode); 1312 end_writeback(inode);
1327 1313
1328 /* if we are deleting an obj that hasn't been created yet, wait */ 1314 /* if we are deleting an obj that hasn't been created yet, wait.
1329 if (!obj_created(oi)) { 1315 * This also makes sure that create_done cannot be called with an
1330 BUG_ON(!obj_2bcreated(oi)); 1316 * already evicted inode.
1331 wait_event(oi->i_wq, obj_created(oi)); 1317 */
1332 /* ignore the error attempt a remove anyway */ 1318 wait_obj_created(oi);
1333 } 1319 /* ignore the error, attempt a remove anyway */
1334 1320
1335 /* Now Remove the OSD objects */ 1321 /* Now Remove the OSD objects */
1336 ret = exofs_get_io_state(&sbi->layout, &ios); 1322 ret = exofs_get_io_state(&sbi->layout, &ios);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b7dd0c236863..264e95d02830 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
153 153
154 inode->i_ctime = CURRENT_TIME; 154 inode->i_ctime = CURRENT_TIME;
155 inode_inc_link_count(inode); 155 inode_inc_link_count(inode);
156 atomic_inc(&inode->i_count); 156 ihold(inode);
157 157
158 return exofs_add_nondir(dentry, inode); 158 return exofs_add_nondir(dentry, inode);
159} 159}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..79c3ae6e0456 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
659/* 659/*
660 * Set up the superblock (calls exofs_fill_super eventually) 660 * Set up the superblock (calls exofs_fill_super eventually)
661 */ 661 */
662static int exofs_get_sb(struct file_system_type *type, 662static struct dentry *exofs_mount(struct file_system_type *type,
663 int flags, const char *dev_name, 663 int flags, const char *dev_name,
664 void *data, struct vfsmount *mnt) 664 void *data)
665{ 665{
666 struct exofs_mountopt opts; 666 struct exofs_mountopt opts;
667 int ret; 667 int ret;
668 668
669 ret = parse_options(data, &opts); 669 ret = parse_options(data, &opts);
670 if (ret) 670 if (ret)
671 return ret; 671 return ERR_PTR(ret);
672 672
673 opts.dev_name = dev_name; 673 opts.dev_name = dev_name;
674 return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); 674 return mount_nodev(type, flags, &opts, exofs_fill_super);
675} 675}
676 676
677/* 677/*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
809static struct file_system_type exofs_type = { 809static struct file_system_type exofs_type = {
810 .owner = THIS_MODULE, 810 .owner = THIS_MODULE,
811 .name = "exofs", 811 .name = "exofs",
812 .get_sb = exofs_get_sb, 812 .mount = exofs_mount,
813 .kill_sb = generic_shutdown_super, 813 .kill_sb = generic_shutdown_super,
814}; 814};
815 815
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index e9e175949a63..51b304056f10 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -74,21 +74,20 @@ static struct dentry *
74find_disconnected_root(struct dentry *dentry) 74find_disconnected_root(struct dentry *dentry)
75{ 75{
76 dget(dentry); 76 dget(dentry);
77 spin_lock(&dentry->d_lock); 77 while (!IS_ROOT(dentry)) {
78 while (!IS_ROOT(dentry) && 78 struct dentry *parent = dget_parent(dentry);
79 (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) { 79
80 struct dentry *parent = dentry->d_parent; 80 if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
81 dget(parent); 81 dput(parent);
82 spin_unlock(&dentry->d_lock); 82 break;
83 }
84
83 dput(dentry); 85 dput(dentry);
84 dentry = parent; 86 dentry = parent;
85 spin_lock(&dentry->d_lock);
86 } 87 }
87 spin_unlock(&dentry->d_lock);
88 return dentry; 88 return dentry;
89} 89}
90 90
91
92/* 91/*
93 * Make sure target_dir is fully connected to the dentry tree. 92 * Make sure target_dir is fully connected to the dentry tree.
94 * 93 *
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c6c684b44ea1..0d06f4e75699 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -646,10 +646,9 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
646 return here; 646 return here;
647} 647}
648 648
649/* 649/**
650 * ext2_try_to_allocate() 650 * ext2_try_to_allocate()
651 * @sb: superblock 651 * @sb: superblock
652 * @handle: handle to this transaction
653 * @group: given allocation block group 652 * @group: given allocation block group
654 * @bitmap_bh: bufferhead holds the block bitmap 653 * @bitmap_bh: bufferhead holds the block bitmap
655 * @grp_goal: given target block within the group 654 * @grp_goal: given target block within the group
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 764109886ec0..2709b34206ab 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
98 if (IS_DIRSYNC(dir)) { 98 if (IS_DIRSYNC(dir)) {
99 err = write_one_page(page, 1); 99 err = write_one_page(page, 1);
100 if (!err) 100 if (!err)
101 err = ext2_sync_inode(dir); 101 err = sync_inode_metadata(dir, 1);
102 } else { 102 } else {
103 unlock_page(page); 103 unlock_page(page);
104 } 104 }
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 416daa62242c..6346a2acf326 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
120extern struct inode *ext2_iget (struct super_block *, unsigned long); 120extern struct inode *ext2_iget (struct super_block *, unsigned long);
121extern int ext2_write_inode (struct inode *, struct writeback_control *); 121extern int ext2_write_inode (struct inode *, struct writeback_control *);
122extern void ext2_evict_inode(struct inode *); 122extern void ext2_evict_inode(struct inode *);
123extern int ext2_sync_inode (struct inode *);
124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); 123extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
125extern int ext2_setattr (struct dentry *, struct iattr *); 124extern int ext2_setattr (struct dentry *, struct iattr *);
126extern void ext2_set_inode_flags(struct inode *inode); 125extern void ext2_set_inode_flags(struct inode *inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 533699c16040..40ad210a5049 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
1203 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 1203 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
1204 if (inode_needs_sync(inode)) { 1204 if (inode_needs_sync(inode)) {
1205 sync_mapping_buffers(inode->i_mapping); 1205 sync_mapping_buffers(inode->i_mapping);
1206 ext2_sync_inode (inode); 1206 sync_inode_metadata(inode, 1);
1207 } else { 1207 } else {
1208 mark_inode_dirty(inode); 1208 mark_inode_dirty(inode);
1209 } 1209 }
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
1523 return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1523 return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1524} 1524}
1525 1525
1526int ext2_sync_inode(struct inode *inode)
1527{
1528 struct writeback_control wbc = {
1529 .sync_mode = WB_SYNC_ALL,
1530 .nr_to_write = 0, /* sys_fsync did this */
1531 };
1532 return sync_inode(inode, &wbc);
1533}
1534
1535int ext2_setattr(struct dentry *dentry, struct iattr *iattr) 1526int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1536{ 1527{
1537 struct inode *inode = dentry->d_inode; 1528 struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 71efb0e9a3f2..f8aecd2e3297 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
206 206
207 inode->i_ctime = CURRENT_TIME_SEC; 207 inode->i_ctime = CURRENT_TIME_SEC;
208 inode_inc_link_count(inode); 208 inode_inc_link_count(inode);
209 atomic_inc(&inode->i_count); 209 ihold(inode);
210 210
211 err = ext2_add_link(dentry, inode); 211 err = ext2_add_link(dentry, inode);
212 if (!err) { 212 if (!err) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85df87d0f7b7..d89e0b6a2d78 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1221,9 +1221,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1221 } 1221 }
1222 1222
1223 es = sbi->s_es; 1223 es = sbi->s_es;
1224 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1224 if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) {
1225 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1226 invalidate_inodes(sb)) {
1227 ext2_msg(sb, KERN_WARNING, "warning: refusing change of " 1225 ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
1228 "xip flag with busy inodes while remounting"); 1226 "xip flag with busy inodes while remounting");
1229 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; 1227 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
@@ -1358,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1358 return 0; 1356 return 0;
1359} 1357}
1360 1358
1361static int ext2_get_sb(struct file_system_type *fs_type, 1359static struct dentry *ext2_mount(struct file_system_type *fs_type,
1362 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1360 int flags, const char *dev_name, void *data)
1363{ 1361{
1364 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); 1362 return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
1365} 1363}
1366 1364
1367#ifdef CONFIG_QUOTA 1365#ifdef CONFIG_QUOTA
@@ -1475,7 +1473,7 @@ out:
1475static struct file_system_type ext2_fs_type = { 1473static struct file_system_type ext2_fs_type = {
1476 .owner = THIS_MODULE, 1474 .owner = THIS_MODULE,
1477 .name = "ext2", 1475 .name = "ext2",
1478 .get_sb = ext2_get_sb, 1476 .mount = ext2_mount,
1479 .kill_sb = kill_block_super, 1477 .kill_sb = kill_block_super,
1480 .fs_flags = FS_REQUIRES_DEV, 1478 .fs_flags = FS_REQUIRES_DEV,
1481}; 1479};
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8c29ae15129e..f84700be3274 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
699 EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 699 EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
700 inode->i_ctime = CURRENT_TIME_SEC; 700 inode->i_ctime = CURRENT_TIME_SEC;
701 if (IS_SYNC(inode)) { 701 if (IS_SYNC(inode)) {
702 error = ext2_sync_inode (inode); 702 error = sync_inode_metadata(inode, 1);
703 /* In case sync failed due to ENOSPC the inode was actually 703 /* In case sync failed due to ENOSPC the inode was actually
704 * written (only some dirty data were not) so we just proceed 704 * written (only some dirty data were not) so we just proceed
705 * as if nothing happened and cleanup the unused block */ 705 * as if nothing happened and cleanup the unused block */
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 4a32511f4ded..b3db22649426 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -792,9 +792,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
792 if (here < 0) 792 if (here < 0)
793 here = 0; 793 here = 0;
794 794
795 p = ((char *)bh->b_data) + (here >> 3); 795 p = bh->b_data + (here >> 3);
796 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 796 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
797 next = (r - ((char *)bh->b_data)) << 3; 797 next = (r - bh->b_data) << 3;
798 798
799 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh)) 799 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
800 return next; 800 return next;
@@ -810,8 +810,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
810 810
811/** 811/**
812 * claim_block() 812 * claim_block()
813 * @lock: the spin lock for this block group
813 * @block: the free block (group relative) to allocate 814 * @block: the free block (group relative) to allocate
814 * @bh: the bufferhead containts the block group bitmap 815 * @bh: the buffer_head contains the block group bitmap
815 * 816 *
816 * We think we can allocate this block in this bitmap. Try to set the bit. 817 * We think we can allocate this block in this bitmap. Try to set the bit.
817 * If that succeeds then check that nobody has allocated and then freed the 818 * If that succeeds then check that nobody has allocated and then freed the
@@ -956,9 +957,11 @@ fail_access:
956 * but we will shift to the place where start_block is, 957 * but we will shift to the place where start_block is,
957 * then start from there, when looking for a reservable space. 958 * then start from there, when looking for a reservable space.
958 * 959 *
959 * @size: the target new reservation window size 960 * @my_rsv: the reservation window
960 * 961 *
961 * @group_first_block: the first block we consider to start 962 * @sb: the super block
963 *
964 * @start_block: the first block we consider to start
962 * the real search from 965 * the real search from
963 * 966 *
964 * @last_block: 967 * @last_block:
@@ -1084,7 +1087,7 @@ static int find_next_reservable_window(
1084 * 1087 *
1085 * failed: we failed to find a reservation window in this group 1088 * failed: we failed to find a reservation window in this group
1086 * 1089 *
1087 * @rsv: the reservation 1090 * @my_rsv: the reservation window
1088 * 1091 *
1089 * @grp_goal: The goal (group-relative). It is where the search for a 1092 * @grp_goal: The goal (group-relative). It is where the search for a
1090 * free reservable space should start from. 1093 * free reservable space should start from.
@@ -1273,8 +1276,8 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1273 * @group: given allocation block group 1276 * @group: given allocation block group
1274 * @bitmap_bh: bufferhead holds the block bitmap 1277 * @bitmap_bh: bufferhead holds the block bitmap
1275 * @grp_goal: given target block within the group 1278 * @grp_goal: given target block within the group
1276 * @count: target number of blocks to allocate
1277 * @my_rsv: reservation window 1279 * @my_rsv: reservation window
1280 * @count: target number of blocks to allocate
1278 * @errp: pointer to store the error code 1281 * @errp: pointer to store the error code
1279 * 1282 *
1280 * This is the main function used to allocate a new block and its reservation 1283 * This is the main function used to allocate a new block and its reservation
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4ab72db3559e..9724aef22460 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -570,9 +570,14 @@ got:
570 ei->i_state_flags = 0; 570 ei->i_state_flags = 0;
571 ext3_set_inode_state(inode, EXT3_STATE_NEW); 571 ext3_set_inode_state(inode, EXT3_STATE_NEW);
572 572
573 ei->i_extra_isize = 573 /* See comment in ext3_iget for explanation */
574 (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? 574 if (ino >= EXT3_FIRST_INO(sb) + 1 &&
575 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; 575 EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
576 ei->i_extra_isize =
577 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
578 } else {
579 ei->i_extra_isize = 0;
580 }
576 581
577 ret = inode; 582 ret = inode;
578 dquot_initialize(inode); 583 dquot_initialize(inode);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5e0faf4cda79..a9580617edd2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -498,7 +498,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
498} 498}
499 499
500/** 500/**
501 * ext3_blks_to_allocate: Look up the block map and count the number 501 * ext3_blks_to_allocate - Look up the block map and count the number
502 * of direct blocks need to be allocated for the given branch. 502 * of direct blocks need to be allocated for the given branch.
503 * 503 *
504 * @branch: chain of indirect blocks 504 * @branch: chain of indirect blocks
@@ -536,14 +536,18 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
536} 536}
537 537
538/** 538/**
539 * ext3_alloc_blocks: multiple allocate blocks needed for a branch 539 * ext3_alloc_blocks - multiple allocate blocks needed for a branch
540 * @handle: handle for this transaction
541 * @inode: owner
542 * @goal: preferred place for allocation
540 * @indirect_blks: the number of blocks need to allocate for indirect 543 * @indirect_blks: the number of blocks need to allocate for indirect
541 * blocks 544 * blocks
542 * 545 * @blks: number of blocks need to allocated for direct blocks
543 * @new_blocks: on return it will store the new block numbers for 546 * @new_blocks: on return it will store the new block numbers for
544 * the indirect blocks(if needed) and the first direct block, 547 * the indirect blocks(if needed) and the first direct block,
545 * @blks: on return it will store the total number of allocated 548 * @err: here we store the error value
546 * direct blocks 549 *
550 * return the number of direct blocks allocated
547 */ 551 */
548static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, 552static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
549 ext3_fsblk_t goal, int indirect_blks, int blks, 553 ext3_fsblk_t goal, int indirect_blks, int blks,
@@ -598,9 +602,11 @@ failed_out:
598 602
599/** 603/**
600 * ext3_alloc_branch - allocate and set up a chain of blocks. 604 * ext3_alloc_branch - allocate and set up a chain of blocks.
605 * @handle: handle for this transaction
601 * @inode: owner 606 * @inode: owner
602 * @indirect_blks: number of allocated indirect blocks 607 * @indirect_blks: number of allocated indirect blocks
603 * @blks: number of allocated direct blocks 608 * @blks: number of allocated direct blocks
609 * @goal: preferred place for allocation
604 * @offsets: offsets (in the blocks) to store the pointers to next. 610 * @offsets: offsets (in the blocks) to store the pointers to next.
605 * @branch: place to store the chain in. 611 * @branch: place to store the chain in.
606 * 612 *
@@ -700,10 +706,9 @@ failed:
700 706
701/** 707/**
702 * ext3_splice_branch - splice the allocated branch onto inode. 708 * ext3_splice_branch - splice the allocated branch onto inode.
709 * @handle: handle for this transaction
703 * @inode: owner 710 * @inode: owner
704 * @block: (logical) number of block we are adding 711 * @block: (logical) number of block we are adding
705 * @chain: chain of indirect blocks (with a missing link - see
706 * ext3_alloc_branch)
707 * @where: location of missing link 712 * @where: location of missing link
708 * @num: number of indirect blocks we are adding 713 * @num: number of indirect blocks we are adding
709 * @blks: number of direct blocks we are adding 714 * @blks: number of direct blocks we are adding
@@ -1696,8 +1701,8 @@ static int ext3_journalled_writepage(struct page *page,
1696 * doesn't seem much point in redirtying the page here. 1701 * doesn't seem much point in redirtying the page here.
1697 */ 1702 */
1698 ClearPageChecked(page); 1703 ClearPageChecked(page);
1699 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 1704 ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
1700 ext3_get_block); 1705 ext3_get_block);
1701 if (ret != 0) { 1706 if (ret != 0) {
1702 ext3_journal_stop(handle); 1707 ext3_journal_stop(handle);
1703 goto out_unlock; 1708 goto out_unlock;
@@ -2530,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
2530 */ 2535 */
2531 } else { 2536 } else {
2532 /* Shared branch grows from an indirect block */ 2537 /* Shared branch grows from an indirect block */
2533 BUFFER_TRACE(partial->bh, "get_write_access");
2534 ext3_free_branches(handle, inode, partial->bh, 2538 ext3_free_branches(handle, inode, partial->bh,
2535 partial->p, 2539 partial->p,
2536 partial->p+1, (chain+n-1) - partial); 2540 partial->p+1, (chain+n-1) - partial);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2b35ddb70d65..bce9dce639b8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2260,7 +2260,7 @@ retry:
2260 2260
2261 inode->i_ctime = CURRENT_TIME_SEC; 2261 inode->i_ctime = CURRENT_TIME_SEC;
2262 inc_nlink(inode); 2262 inc_nlink(inode);
2263 atomic_inc(&inode->i_count); 2263 ihold(inode);
2264 2264
2265 err = ext3_add_entry(handle, dentry, inode); 2265 err = ext3_add_entry(handle, dentry, inode);
2266 if (!err) { 2266 if (!err) {
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0ccd7b12b73c..e746d30b1232 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -977,7 +977,8 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
977 o_blocks_count = le32_to_cpu(es->s_blocks_count); 977 o_blocks_count = le32_to_cpu(es->s_blocks_count);
978 978
979 if (test_opt(sb, DEBUG)) 979 if (test_opt(sb, DEBUG))
980 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", 980 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
981 " upto "E3FSBLK" blocks\n",
981 o_blocks_count, n_blocks_count); 982 o_blocks_count, n_blocks_count);
982 983
983 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 984 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -985,7 +986,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
985 986
986 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 987 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
987 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 988 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
988 " too large to resize to %lu blocks safely\n", 989 " too large to resize to "E3FSBLK" blocks safely\n",
989 sb->s_id, n_blocks_count); 990 sb->s_id, n_blocks_count);
990 if (sizeof(sector_t) < 8) 991 if (sizeof(sector_t) < 8)
991 ext3_warning(sb, __func__, 992 ext3_warning(sb, __func__,
@@ -1065,11 +1066,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1065 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1066 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1067 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1068 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1068 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, 1069 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1069 o_blocks_count + add); 1070 o_blocks_count, o_blocks_count + add);
1070 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
1071 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count, 1072 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
1072 o_blocks_count + add); 1073 o_blocks_count, o_blocks_count + add);
1073 if ((err = ext3_journal_stop(handle))) 1074 if ((err = ext3_journal_stop(handle)))
1074 goto exit_put; 1075 goto exit_put;
1075 if (test_opt(sb, DEBUG)) 1076 if (test_opt(sb, DEBUG))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 377768009106..2fedaf8b5012 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1301,9 +1301,9 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1301 ext3_msg(sb, KERN_WARNING, 1301 ext3_msg(sb, KERN_WARNING,
1302 "warning: mounting fs with errors, " 1302 "warning: mounting fs with errors, "
1303 "running e2fsck is recommended"); 1303 "running e2fsck is recommended");
1304 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1304 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1305 le16_to_cpu(es->s_mnt_count) >= 1305 le16_to_cpu(es->s_mnt_count) >=
1306 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1306 le16_to_cpu(es->s_max_mnt_count))
1307 ext3_msg(sb, KERN_WARNING, 1307 ext3_msg(sb, KERN_WARNING,
1308 "warning: maximal mount count reached, " 1308 "warning: maximal mount count reached, "
1309 "running e2fsck is recommended"); 1309 "running e2fsck is recommended");
@@ -1320,7 +1320,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1320 valid forever! :) */ 1320 valid forever! :) */
1321 es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 1321 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1322#endif 1322#endif
1323 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1323 if (!le16_to_cpu(es->s_max_mnt_count))
1324 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1324 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1325 le16_add_cpu(&es->s_mnt_count, 1); 1325 le16_add_cpu(&es->s_mnt_count, 1);
1326 es->s_mtime = cpu_to_le32(get_seconds()); 1326 es->s_mtime = cpu_to_le32(get_seconds());
@@ -1647,7 +1647,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1647 * Note: s_es must be initialized as soon as possible because 1647 * Note: s_es must be initialized as soon as possible because
1648 * some ext3 macro-instructions depend on its value 1648 * some ext3 macro-instructions depend on its value
1649 */ 1649 */
1650 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1650 es = (struct ext3_super_block *) (bh->b_data + offset);
1651 sbi->s_es = es; 1651 sbi->s_es = es;
1652 sb->s_magic = le16_to_cpu(es->s_magic); 1652 sb->s_magic = le16_to_cpu(es->s_magic);
1653 if (sb->s_magic != EXT3_SUPER_MAGIC) 1653 if (sb->s_magic != EXT3_SUPER_MAGIC)
@@ -1758,7 +1758,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1758 "error: can't read superblock on 2nd try"); 1758 "error: can't read superblock on 2nd try");
1759 goto failed_mount; 1759 goto failed_mount;
1760 } 1760 }
1761 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1761 es = (struct ext3_super_block *)(bh->b_data + offset);
1762 sbi->s_es = es; 1762 sbi->s_es = es;
1763 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1763 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1764 ext3_msg(sb, KERN_ERR, 1764 ext3_msg(sb, KERN_ERR,
@@ -1857,13 +1857,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1857 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1857 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1858 le32_to_cpu(es->s_first_data_block) - 1) 1858 le32_to_cpu(es->s_first_data_block) - 1)
1859 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1859 / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1860 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1860 db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
1861 EXT3_DESC_PER_BLOCK(sb);
1862 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1861 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1863 GFP_KERNEL); 1862 GFP_KERNEL);
1864 if (sbi->s_group_desc == NULL) { 1863 if (sbi->s_group_desc == NULL) {
1865 ext3_msg(sb, KERN_ERR, 1864 ext3_msg(sb, KERN_ERR,
1866 "error: not enough memory"); 1865 "error: not enough memory");
1866 ret = -ENOMEM;
1867 goto failed_mount; 1867 goto failed_mount;
1868 } 1868 }
1869 1869
@@ -1951,6 +1951,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1951 } 1951 }
1952 if (err) { 1952 if (err) {
1953 ext3_msg(sb, KERN_ERR, "error: insufficient memory"); 1953 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1954 ret = err;
1954 goto failed_mount3; 1955 goto failed_mount3;
1955 } 1956 }
1956 1957
@@ -2159,7 +2160,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2159 goto out_bdev; 2160 goto out_bdev;
2160 } 2161 }
2161 2162
2162 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 2163 es = (struct ext3_super_block *) (bh->b_data + offset);
2163 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2164 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2164 !(le32_to_cpu(es->s_feature_incompat) & 2165 !(le32_to_cpu(es->s_feature_incompat) &
2165 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2166 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
@@ -2352,6 +2353,21 @@ static int ext3_commit_super(struct super_block *sb,
2352 2353
2353 if (!sbh) 2354 if (!sbh)
2354 return error; 2355 return error;
2356
2357 if (buffer_write_io_error(sbh)) {
2358 /*
2359 * Oh, dear. A previous attempt to write the
2360 * superblock failed. This could happen because the
2361 * USB device was yanked out. Or it could happen to
2362 * be a transient write error and maybe the block will
2363 * be remapped. Nothing we can do but to retry the
2364 * write and hope for the best.
2365 */
2366 ext3_msg(sb, KERN_ERR, "previous I/O error to "
2367 "superblock detected");
2368 clear_buffer_write_io_error(sbh);
2369 set_buffer_uptodate(sbh);
2370 }
2355 /* 2371 /*
2356 * If the file system is mounted read-only, don't update the 2372 * If the file system is mounted read-only, don't update the
2357 * superblock write time. This avoids updating the superblock 2373 * superblock write time. This avoids updating the superblock
@@ -2368,8 +2384,15 @@ static int ext3_commit_super(struct super_block *sb,
2368 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2384 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2369 BUFFER_TRACE(sbh, "marking dirty"); 2385 BUFFER_TRACE(sbh, "marking dirty");
2370 mark_buffer_dirty(sbh); 2386 mark_buffer_dirty(sbh);
2371 if (sync) 2387 if (sync) {
2372 error = sync_dirty_buffer(sbh); 2388 error = sync_dirty_buffer(sbh);
2389 if (buffer_write_io_error(sbh)) {
2390 ext3_msg(sb, KERN_ERR, "I/O error while writing "
2391 "superblock");
2392 clear_buffer_write_io_error(sbh);
2393 set_buffer_uptodate(sbh);
2394 }
2395 }
2373 return error; 2396 return error;
2374} 2397}
2375 2398
@@ -2997,16 +3020,16 @@ out:
2997 3020
2998#endif 3021#endif
2999 3022
3000static int ext3_get_sb(struct file_system_type *fs_type, 3023static struct dentry *ext3_mount(struct file_system_type *fs_type,
3001 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3024 int flags, const char *dev_name, void *data)
3002{ 3025{
3003 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); 3026 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3004} 3027}
3005 3028
3006static struct file_system_type ext3_fs_type = { 3029static struct file_system_type ext3_fs_type = {
3007 .owner = THIS_MODULE, 3030 .owner = THIS_MODULE,
3008 .name = "ext3", 3031 .name = "ext3",
3009 .get_sb = ext3_get_sb, 3032 .mount = ext3_mount,
3010 .kill_sb = kill_block_super, 3033 .kill_sb = kill_block_super,
3011 .fs_flags = FS_REQUIRES_DEV, 3034 .fs_flags = FS_REQUIRES_DEV,
3012}; 3035};
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a1e5fe..c947e36eda6c 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_EXT4_FS) += ext4.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
10 10
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd30799a43ed..14c3af26c671 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
171 * less than the blocksize * 8 ( which is the size 171 * less than the blocksize * 8 ( which is the size
172 * of bitmap ), set rest of the block bitmap to 1 172 * of bitmap ), set rest of the block bitmap to 1
173 */ 173 */
174 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 174 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
175 bh->b_data);
175 } 176 }
176 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 177 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
177} 178}
@@ -489,7 +490,7 @@ error_return:
489 * Check if filesystem has nblocks free & available for allocation. 490 * Check if filesystem has nblocks free & available for allocation.
490 * On success return 1, return 0 on failure. 491 * On success return 1, return 0 on failure.
491 */ 492 */
492int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) 493static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
493{ 494{
494 s64 free_blocks, dirty_blocks, root_blocks; 495 s64 free_blocks, dirty_blocks, root_blocks;
495 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 496 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3db5084db9bd..fac90f3fba80 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,16 +29,15 @@ struct ext4_system_zone {
29 29
30static struct kmem_cache *ext4_system_zone_cachep; 30static struct kmem_cache *ext4_system_zone_cachep;
31 31
32int __init init_ext4_system_zone(void) 32int __init ext4_init_system_zone(void)
33{ 33{
34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
35 SLAB_RECLAIM_ACCOUNT);
36 if (ext4_system_zone_cachep == NULL) 35 if (ext4_system_zone_cachep == NULL)
37 return -ENOMEM; 36 return -ENOMEM;
38 return 0; 37 return 0;
39} 38}
40 39
41void exit_ext4_system_zone(void) 40void ext4_exit_system_zone(void)
42{ 41{
43 kmem_cache_destroy(ext4_system_zone_cachep); 42 kmem_cache_destroy(ext4_system_zone_cachep);
44} 43}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 374510f72baa..ece76fb6a40c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
39 struct file *filp); 39 struct file *filp);
40 40
41const struct file_operations ext4_dir_operations = { 41const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = ext4_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl, 45 .unlocked_ioctl = ext4_ioctl,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 889ec9d5e6ad..8b5dd6369f82 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -168,7 +168,20 @@ struct mpage_da_data {
168 int pages_written; 168 int pages_written;
169 int retval; 169 int retval;
170}; 170};
171#define EXT4_IO_UNWRITTEN 0x1 171
172/*
173 * Flags for ext4_io_end->flags
174 */
175#define EXT4_IO_END_UNWRITTEN 0x0001
176#define EXT4_IO_END_ERROR 0x0002
177
178struct ext4_io_page {
179 struct page *p_page;
180 int p_count;
181};
182
183#define MAX_IO_PAGES 128
184
172typedef struct ext4_io_end { 185typedef struct ext4_io_end {
173 struct list_head list; /* per-file finished IO list */ 186 struct list_head list; /* per-file finished IO list */
174 struct inode *inode; /* file being written to */ 187 struct inode *inode; /* file being written to */
@@ -179,8 +192,18 @@ typedef struct ext4_io_end {
179 struct work_struct work; /* data work queue */ 192 struct work_struct work; /* data work queue */
180 struct kiocb *iocb; /* iocb struct for AIO */ 193 struct kiocb *iocb; /* iocb struct for AIO */
181 int result; /* error value for AIO */ 194 int result; /* error value for AIO */
195 int num_io_pages;
196 struct ext4_io_page *pages[MAX_IO_PAGES];
182} ext4_io_end_t; 197} ext4_io_end_t;
183 198
199struct ext4_io_submit {
200 int io_op;
201 struct bio *io_bio;
202 ext4_io_end_t *io_end;
203 struct ext4_io_page *io_page;
204 sector_t io_next_block;
205};
206
184/* 207/*
185 * Special inodes numbers 208 * Special inodes numbers
186 */ 209 */
@@ -205,6 +228,7 @@ typedef struct ext4_io_end {
205#define EXT4_MIN_BLOCK_SIZE 1024 228#define EXT4_MIN_BLOCK_SIZE 1024
206#define EXT4_MAX_BLOCK_SIZE 65536 229#define EXT4_MAX_BLOCK_SIZE 65536
207#define EXT4_MIN_BLOCK_LOG_SIZE 10 230#define EXT4_MIN_BLOCK_LOG_SIZE 10
231#define EXT4_MAX_BLOCK_LOG_SIZE 16
208#ifdef __KERNEL__ 232#ifdef __KERNEL__
209# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) 233# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
210#else 234#else
@@ -889,6 +913,7 @@ struct ext4_inode_info {
889#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 913#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
890#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 914#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
891#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 915#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
916#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
892 917
893#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 918#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
894#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 919#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1087,7 +1112,6 @@ struct ext4_sb_info {
1087 struct completion s_kobj_unregister; 1112 struct completion s_kobj_unregister;
1088 1113
1089 /* Journaling */ 1114 /* Journaling */
1090 struct inode *s_journal_inode;
1091 struct journal_s *s_journal; 1115 struct journal_s *s_journal;
1092 struct list_head s_orphan; 1116 struct list_head s_orphan;
1093 struct mutex s_orphan_lock; 1117 struct mutex s_orphan_lock;
@@ -1120,10 +1144,7 @@ struct ext4_sb_info {
1120 /* for buddy allocator */ 1144 /* for buddy allocator */
1121 struct ext4_group_info ***s_group_info; 1145 struct ext4_group_info ***s_group_info;
1122 struct inode *s_buddy_cache; 1146 struct inode *s_buddy_cache;
1123 long s_blocks_reserved;
1124 spinlock_t s_reserve_lock;
1125 spinlock_t s_md_lock; 1147 spinlock_t s_md_lock;
1126 tid_t s_last_transaction;
1127 unsigned short *s_mb_offsets; 1148 unsigned short *s_mb_offsets;
1128 unsigned int *s_mb_maxs; 1149 unsigned int *s_mb_maxs;
1129 1150
@@ -1141,7 +1162,6 @@ struct ext4_sb_info {
1141 unsigned long s_mb_last_start; 1162 unsigned long s_mb_last_start;
1142 1163
1143 /* stats for buddy allocator */ 1164 /* stats for buddy allocator */
1144 spinlock_t s_mb_pa_lock;
1145 atomic_t s_bal_reqs; /* number of reqs with len > 1 */ 1165 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
1146 atomic_t s_bal_success; /* we found long enough chunks */ 1166 atomic_t s_bal_success; /* we found long enough chunks */
1147 atomic_t s_bal_allocated; /* in blocks */ 1167 atomic_t s_bal_allocated; /* in blocks */
@@ -1172,6 +1192,11 @@ struct ext4_sb_info {
1172 1192
1173 /* timer for periodic error stats printing */ 1193 /* timer for periodic error stats printing */
1174 struct timer_list s_err_report; 1194 struct timer_list s_err_report;
1195
1196 /* Lazy inode table initialization info */
1197 struct ext4_li_request *s_li_request;
1198 /* Wait multiplier for lazy initialization thread */
1199 unsigned int s_li_wait_mult;
1175}; 1200};
1176 1201
1177static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1202static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1533,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1533void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 1558void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
1534 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); 1559 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
1535 1560
1536extern struct proc_dir_entry *ext4_proc_root; 1561/*
1562 * Timeout and state flag for lazy initialization inode thread.
1563 */
1564#define EXT4_DEF_LI_WAIT_MULT 10
1565#define EXT4_DEF_LI_MAX_START_DELAY 5
1566#define EXT4_LAZYINIT_QUIT 0x0001
1567#define EXT4_LAZYINIT_RUNNING 0x0002
1568
1569/*
1570 * Lazy inode table initialization info
1571 */
1572struct ext4_lazy_init {
1573 unsigned long li_state;
1574
1575 wait_queue_head_t li_wait_daemon;
1576 wait_queue_head_t li_wait_task;
1577 struct timer_list li_timer;
1578 struct task_struct *li_task;
1579
1580 struct list_head li_request_list;
1581 struct mutex li_list_mtx;
1582};
1583
1584struct ext4_li_request {
1585 struct super_block *lr_super;
1586 struct ext4_sb_info *lr_sbi;
1587 ext4_group_t lr_next_group;
1588 struct list_head lr_request;
1589 unsigned long lr_next_sched;
1590 unsigned long lr_timeout;
1591};
1592
1593struct ext4_features {
1594 struct kobject f_kobj;
1595 struct completion f_kobj_unregister;
1596};
1537 1597
1538/* 1598/*
1539 * Function prototypes 1599 * Function prototypes
@@ -1561,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
1561extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1621extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1562 ext4_fsblk_t goal, unsigned long *count, int *errp); 1622 ext4_fsblk_t goal, unsigned long *count, int *errp);
1563extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1623extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1564extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1565extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1624extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1566 ext4_fsblk_t block, unsigned long count); 1625 ext4_fsblk_t block, unsigned long count);
1567extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1626extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1605,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1605extern unsigned long ext4_count_free_inodes(struct super_block *); 1664extern unsigned long ext4_count_free_inodes(struct super_block *);
1606extern unsigned long ext4_count_dirs(struct super_block *); 1665extern unsigned long ext4_count_dirs(struct super_block *);
1607extern void ext4_check_inodes_bitmap(struct super_block *); 1666extern void ext4_check_inodes_bitmap(struct super_block *);
1608extern unsigned ext4_init_inode_bitmap(struct super_block *sb, 1667extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1609 struct buffer_head *bh, 1668extern int ext4_init_inode_table(struct super_block *sb,
1610 ext4_group_t group, 1669 ext4_group_t group, int barrier);
1611 struct ext4_group_desc *desc);
1612extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1613 1670
1614/* mballoc.c */ 1671/* mballoc.c */
1615extern long ext4_mb_stats; 1672extern long ext4_mb_stats;
@@ -1620,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1620 struct ext4_allocation_request *, int *); 1677 struct ext4_allocation_request *, int *);
1621extern int ext4_mb_reserve_blocks(struct super_block *, int); 1678extern int ext4_mb_reserve_blocks(struct super_block *, int);
1622extern void ext4_discard_preallocations(struct inode *); 1679extern void ext4_discard_preallocations(struct inode *);
1623extern int __init init_ext4_mballoc(void); 1680extern int __init ext4_init_mballoc(void);
1624extern void exit_ext4_mballoc(void); 1681extern void ext4_exit_mballoc(void);
1625extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1682extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1626 struct buffer_head *bh, ext4_fsblk_t block, 1683 struct buffer_head *bh, ext4_fsblk_t block,
1627 unsigned long count, int flags); 1684 unsigned long count, int flags);
1628extern int ext4_mb_add_groupinfo(struct super_block *sb, 1685extern int ext4_mb_add_groupinfo(struct super_block *sb,
1629 ext4_group_t i, struct ext4_group_desc *desc); 1686 ext4_group_t i, struct ext4_group_desc *desc);
1630extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1687extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
1631extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1688
1632 ext4_group_t, int);
1633/* inode.c */ 1689/* inode.c */
1634struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1690struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1635 ext4_lblk_t, int, int *); 1691 ext4_lblk_t, int, int *);
@@ -1657,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
1657extern int ext4_alloc_da_blocks(struct inode *inode); 1713extern int ext4_alloc_da_blocks(struct inode *inode);
1658extern void ext4_set_aops(struct inode *inode); 1714extern void ext4_set_aops(struct inode *inode);
1659extern int ext4_writepage_trans_blocks(struct inode *); 1715extern int ext4_writepage_trans_blocks(struct inode *);
1660extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1661extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1716extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1662extern int ext4_block_truncate_page(handle_t *handle, 1717extern int ext4_block_truncate_page(handle_t *handle,
1663 struct address_space *mapping, loff_t from); 1718 struct address_space *mapping, loff_t from);
1664extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1719extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1665extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1720extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1666extern int flush_completed_IO(struct inode *inode);
1667extern void ext4_da_update_reserve_space(struct inode *inode, 1721extern void ext4_da_update_reserve_space(struct inode *inode,
1668 int used, int quota_claim); 1722 int used, int quota_claim);
1669/* ioctl.c */ 1723/* ioctl.c */
@@ -1960,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations;
1960/* file.c */ 2014/* file.c */
1961extern const struct inode_operations ext4_file_inode_operations; 2015extern const struct inode_operations ext4_file_inode_operations;
1962extern const struct file_operations ext4_file_operations; 2016extern const struct file_operations ext4_file_operations;
2017extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
1963 2018
1964/* namei.c */ 2019/* namei.c */
1965extern const struct inode_operations ext4_dir_inode_operations; 2020extern const struct inode_operations ext4_dir_inode_operations;
@@ -1973,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
1973/* block_validity */ 2028/* block_validity */
1974extern void ext4_release_system_zone(struct super_block *sb); 2029extern void ext4_release_system_zone(struct super_block *sb);
1975extern int ext4_setup_system_zone(struct super_block *sb); 2030extern int ext4_setup_system_zone(struct super_block *sb);
1976extern int __init init_ext4_system_zone(void); 2031extern int __init ext4_init_system_zone(void);
1977extern void exit_ext4_system_zone(void); 2032extern void ext4_exit_system_zone(void);
1978extern int ext4_data_block_valid(struct ext4_sb_info *sbi, 2033extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
1979 ext4_fsblk_t start_blk, 2034 ext4_fsblk_t start_blk,
1980 unsigned int count); 2035 unsigned int count);
@@ -2002,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2002 __u64 start_orig, __u64 start_donor, 2057 __u64 start_orig, __u64 start_donor,
2003 __u64 len, __u64 *moved_len); 2058 __u64 len, __u64 *moved_len);
2004 2059
2060/* page-io.c */
2061extern int __init ext4_init_pageio(void);
2062extern void ext4_exit_pageio(void);
2063extern void ext4_free_io_end(ext4_io_end_t *io);
2064extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2065extern int ext4_end_io_nolock(ext4_io_end_t *io);
2066extern void ext4_io_submit(struct ext4_io_submit *io);
2067extern int ext4_bio_write_page(struct ext4_io_submit *io,
2068 struct page *page,
2069 int len,
2070 struct writeback_control *wbc);
2005 2071
2006/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ 2072/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
2007enum ext4_state_bits { 2073enum ext4_state_bits {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bdb6ce7e2eb4..28ce70fd9cd0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228/*
229 * ext4_ext_pblock:
230 * combine low and high parts of physical block number into ext4_fsblk_t
231 */
232static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
233{
234 ext4_fsblk_t block;
235
236 block = le32_to_cpu(ex->ee_start_lo);
237 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
238 return block;
239}
240
241/*
242 * ext4_idx_pblock:
243 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
244 */
245static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
246{
247 ext4_fsblk_t block;
248
249 block = le32_to_cpu(ix->ei_leaf_lo);
250 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
251 return block;
252}
253
254/*
255 * ext4_ext_store_pblock:
256 * stores a large physical block number into an extent struct,
257 * breaking it into parts
258 */
259static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
260 ext4_fsblk_t pb)
261{
262 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
263 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
264 0xffff);
265}
266
267/*
268 * ext4_idx_store_pblock:
269 * stores a large physical block number into an index struct,
270 * breaking it into parts
271 */
272static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
273 ext4_fsblk_t pb)
274{
275 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
276 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
277 0xffff);
278}
279
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, 280extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks); 281 sector_t lblocks);
230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
233extern int ext4_extent_tree_init(handle_t *, struct inode *); 282extern int ext4_extent_tree_init(handle_t *, struct inode *);
234extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 283extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
235 int num, 284 int num,
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
237extern int ext4_can_extents_be_merged(struct inode *inode, 286extern int ext4_can_extents_be_merged(struct inode *inode,
238 struct ext4_extent *ex1, 287 struct ext4_extent *ex1,
239 struct ext4_extent *ex2); 288 struct ext4_extent *ex2);
240extern int ext4_ext_try_to_merge(struct inode *inode,
241 struct ext4_ext_path *path,
242 struct ext4_extent *);
243extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
244extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); 289extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
245extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
246 ext_prepare_callback, void *);
247extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 290extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
248 struct ext4_ext_path *); 291 struct ext4_ext_path *);
249extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
250 ext4_lblk_t *, ext4_fsblk_t *);
251extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
252 ext4_lblk_t *, ext4_fsblk_t *);
253extern void ext4_ext_drop_refs(struct ext4_ext_path *); 292extern void ext4_ext_drop_refs(struct ext4_ext_path *);
254extern int ext4_ext_check_inode(struct inode *inode); 293extern int ext4_ext_check_inode(struct inode *inode);
255#endif /* _EXT4_EXTENTS */ 294#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06328d3e5717..0554c48cb1fd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,55 +44,6 @@
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h" 45#include "ext4_extents.h"
46 46
47
48/*
49 * ext_pblock:
50 * combine low and high parts of physical block number into ext4_fsblk_t
51 */
52ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53{
54 ext4_fsblk_t block;
55
56 block = le32_to_cpu(ex->ee_start_lo);
57 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
58 return block;
59}
60
61/*
62 * idx_pblock:
63 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
64 */
65ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
66{
67 ext4_fsblk_t block;
68
69 block = le32_to_cpu(ix->ei_leaf_lo);
70 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
71 return block;
72}
73
74/*
75 * ext4_ext_store_pblock:
76 * stores a large physical block number into an extent struct,
77 * breaking it into parts
78 */
79void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
80{
81 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
82 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
83}
84
85/*
86 * ext4_idx_store_pblock:
87 * stores a large physical block number into an index struct,
88 * breaking it into parts
89 */
90static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
91{
92 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
93 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
94}
95
96static int ext4_ext_truncate_extend_restart(handle_t *handle, 47static int ext4_ext_truncate_extend_restart(handle_t *handle,
97 struct inode *inode, 48 struct inode *inode,
98 int needed) 49 int needed)
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
169 /* try to predict block placement */ 120 /* try to predict block placement */
170 ex = path[depth].p_ext; 121 ex = path[depth].p_ext;
171 if (ex) 122 if (ex)
172 return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); 123 return (ext4_ext_pblock(ex) +
124 (block - le32_to_cpu(ex->ee_block)));
173 125
174 /* it looks like index is empty; 126 /* it looks like index is empty;
175 * try to find starting block from index itself */ 127 * try to find starting block from index itself */
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
354 306
355static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 307static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
356{ 308{
357 ext4_fsblk_t block = ext_pblock(ext); 309 ext4_fsblk_t block = ext4_ext_pblock(ext);
358 int len = ext4_ext_get_actual_len(ext); 310 int len = ext4_ext_get_actual_len(ext);
359 311
360 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 312 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
363static int ext4_valid_extent_idx(struct inode *inode, 315static int ext4_valid_extent_idx(struct inode *inode,
364 struct ext4_extent_idx *ext_idx) 316 struct ext4_extent_idx *ext_idx)
365{ 317{
366 ext4_fsblk_t block = idx_pblock(ext_idx); 318 ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
367 319
368 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); 320 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
369} 321}
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
463 for (k = 0; k <= l; k++, path++) { 415 for (k = 0; k <= l; k++, path++) {
464 if (path->p_idx) { 416 if (path->p_idx) {
465 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 417 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
466 idx_pblock(path->p_idx)); 418 ext4_idx_pblock(path->p_idx));
467 } else if (path->p_ext) { 419 } else if (path->p_ext) {
468 ext_debug(" %d:[%d]%d:%llu ", 420 ext_debug(" %d:[%d]%d:%llu ",
469 le32_to_cpu(path->p_ext->ee_block), 421 le32_to_cpu(path->p_ext->ee_block),
470 ext4_ext_is_uninitialized(path->p_ext), 422 ext4_ext_is_uninitialized(path->p_ext),
471 ext4_ext_get_actual_len(path->p_ext), 423 ext4_ext_get_actual_len(path->p_ext),
472 ext_pblock(path->p_ext)); 424 ext4_ext_pblock(path->p_ext));
473 } else 425 } else
474 ext_debug(" []"); 426 ext_debug(" []");
475 } 427 }
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
494 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 446 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
495 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 447 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
496 ext4_ext_is_uninitialized(ex), 448 ext4_ext_is_uninitialized(ex),
497 ext4_ext_get_actual_len(ex), ext_pblock(ex)); 449 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
498 } 450 }
499 ext_debug("\n"); 451 ext_debug("\n");
500} 452}
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
545 497
546 path->p_idx = l - 1; 498 path->p_idx = l - 1;
547 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 499 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
548 idx_pblock(path->p_idx)); 500 ext4_idx_pblock(path->p_idx));
549 501
550#ifdef CHECK_BINSEARCH 502#ifdef CHECK_BINSEARCH
551 { 503 {
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
614 path->p_ext = l - 1; 566 path->p_ext = l - 1;
615 ext_debug(" -> %d:%llu:[%d]%d ", 567 ext_debug(" -> %d:%llu:[%d]%d ",
616 le32_to_cpu(path->p_ext->ee_block), 568 le32_to_cpu(path->p_ext->ee_block),
617 ext_pblock(path->p_ext), 569 ext4_ext_pblock(path->p_ext),
618 ext4_ext_is_uninitialized(path->p_ext), 570 ext4_ext_is_uninitialized(path->p_ext),
619 ext4_ext_get_actual_len(path->p_ext)); 571 ext4_ext_get_actual_len(path->p_ext));
620 572
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
682 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 634 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
683 635
684 ext4_ext_binsearch_idx(inode, path + ppos, block); 636 ext4_ext_binsearch_idx(inode, path + ppos, block);
685 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 637 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
686 path[ppos].p_depth = i; 638 path[ppos].p_depth = i;
687 path[ppos].p_ext = NULL; 639 path[ppos].p_ext = NULL;
688 640
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
721 ext4_ext_binsearch(inode, path + ppos, block); 673 ext4_ext_binsearch(inode, path + ppos, block);
722 /* if not an empty leaf */ 674 /* if not an empty leaf */
723 if (path[ppos].p_ext) 675 if (path[ppos].p_ext)
724 path[ppos].p_block = ext_pblock(path[ppos].p_ext); 676 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
725 677
726 ext4_ext_show_path(inode, path); 678 ext4_ext_show_path(inode, path);
727 679
@@ -739,9 +691,9 @@ err:
739 * insert new index [@logical;@ptr] into the block at @curp; 691 * insert new index [@logical;@ptr] into the block at @curp;
740 * check where to insert: before @curp or after @curp 692 * check where to insert: before @curp or after @curp
741 */ 693 */
742int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 694static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
743 struct ext4_ext_path *curp, 695 struct ext4_ext_path *curp,
744 int logical, ext4_fsblk_t ptr) 696 int logical, ext4_fsblk_t ptr)
745{ 697{
746 struct ext4_extent_idx *ix; 698 struct ext4_extent_idx *ix;
747 int len, err; 699 int len, err;
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
917 EXT_MAX_EXTENT(path[depth].p_hdr)) { 869 EXT_MAX_EXTENT(path[depth].p_hdr)) {
918 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 870 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
919 le32_to_cpu(path[depth].p_ext->ee_block), 871 le32_to_cpu(path[depth].p_ext->ee_block),
920 ext_pblock(path[depth].p_ext), 872 ext4_ext_pblock(path[depth].p_ext),
921 ext4_ext_is_uninitialized(path[depth].p_ext), 873 ext4_ext_is_uninitialized(path[depth].p_ext),
922 ext4_ext_get_actual_len(path[depth].p_ext), 874 ext4_ext_get_actual_len(path[depth].p_ext),
923 newblock); 875 newblock);
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
1007 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 959 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
1008 ext_debug("%d: move %d:%llu in new index %llu\n", i, 960 ext_debug("%d: move %d:%llu in new index %llu\n", i,
1009 le32_to_cpu(path[i].p_idx->ei_block), 961 le32_to_cpu(path[i].p_idx->ei_block),
1010 idx_pblock(path[i].p_idx), 962 ext4_idx_pblock(path[i].p_idx),
1011 newblock); 963 newblock);
1012 /*memmove(++fidx, path[i].p_idx++, 964 /*memmove(++fidx, path[i].p_idx++,
1013 sizeof(struct ext4_extent_idx)); 965 sizeof(struct ext4_extent_idx));
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1146 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1098 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1147 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1099 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1148 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1100 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1149 idx_pblock(EXT_FIRST_INDEX(neh))); 1101 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1150 1102
1151 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1103 neh->eh_depth = cpu_to_le16(path->p_depth + 1);
1152 err = ext4_ext_dirty(handle, inode, curp); 1104 err = ext4_ext_dirty(handle, inode, curp);
@@ -1232,9 +1184,9 @@ out:
1232 * returns 0 at @phys 1184 * returns 0 at @phys
1233 * return value contains 0 (success) or error code 1185 * return value contains 0 (success) or error code
1234 */ 1186 */
1235int 1187static int ext4_ext_search_left(struct inode *inode,
1236ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, 1188 struct ext4_ext_path *path,
1237 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1189 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1238{ 1190{
1239 struct ext4_extent_idx *ix; 1191 struct ext4_extent_idx *ix;
1240 struct ext4_extent *ex; 1192 struct ext4_extent *ex;
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1286 } 1238 }
1287 1239
1288 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1240 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1289 *phys = ext_pblock(ex) + ee_len - 1; 1241 *phys = ext4_ext_pblock(ex) + ee_len - 1;
1290 return 0; 1242 return 0;
1291} 1243}
1292 1244
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1297 * returns 0 at @phys 1249 * returns 0 at @phys
1298 * return value contains 0 (success) or error code 1250 * return value contains 0 (success) or error code
1299 */ 1251 */
1300int 1252static int ext4_ext_search_right(struct inode *inode,
1301ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, 1253 struct ext4_ext_path *path,
1302 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1254 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1303{ 1255{
1304 struct buffer_head *bh = NULL; 1256 struct buffer_head *bh = NULL;
1305 struct ext4_extent_header *eh; 1257 struct ext4_extent_header *eh;
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1342 } 1294 }
1343 } 1295 }
1344 *logical = le32_to_cpu(ex->ee_block); 1296 *logical = le32_to_cpu(ex->ee_block);
1345 *phys = ext_pblock(ex); 1297 *phys = ext4_ext_pblock(ex);
1346 return 0; 1298 return 0;
1347 } 1299 }
1348 1300
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1357 /* next allocated block in this leaf */ 1309 /* next allocated block in this leaf */
1358 ex++; 1310 ex++;
1359 *logical = le32_to_cpu(ex->ee_block); 1311 *logical = le32_to_cpu(ex->ee_block);
1360 *phys = ext_pblock(ex); 1312 *phys = ext4_ext_pblock(ex);
1361 return 0; 1313 return 0;
1362 } 1314 }
1363 1315
@@ -1376,7 +1328,7 @@ got_index:
1376 * follow it and find the closest allocated 1328 * follow it and find the closest allocated
1377 * block to the right */ 1329 * block to the right */
1378 ix++; 1330 ix++;
1379 block = idx_pblock(ix); 1331 block = ext4_idx_pblock(ix);
1380 while (++depth < path->p_depth) { 1332 while (++depth < path->p_depth) {
1381 bh = sb_bread(inode->i_sb, block); 1333 bh = sb_bread(inode->i_sb, block);
1382 if (bh == NULL) 1334 if (bh == NULL)
@@ -1388,7 +1340,7 @@ got_index:
1388 return -EIO; 1340 return -EIO;
1389 } 1341 }
1390 ix = EXT_FIRST_INDEX(eh); 1342 ix = EXT_FIRST_INDEX(eh);
1391 block = idx_pblock(ix); 1343 block = ext4_idx_pblock(ix);
1392 put_bh(bh); 1344 put_bh(bh);
1393 } 1345 }
1394 1346
@@ -1402,7 +1354,7 @@ got_index:
1402 } 1354 }
1403 ex = EXT_FIRST_EXTENT(eh); 1355 ex = EXT_FIRST_EXTENT(eh);
1404 *logical = le32_to_cpu(ex->ee_block); 1356 *logical = le32_to_cpu(ex->ee_block);
1405 *phys = ext_pblock(ex); 1357 *phys = ext4_ext_pblock(ex);
1406 put_bh(bh); 1358 put_bh(bh);
1407 return 0; 1359 return 0;
1408} 1360}
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1573 return 0; 1525 return 0;
1574#endif 1526#endif
1575 1527
1576 if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) 1528 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1577 return 1; 1529 return 1;
1578 return 0; 1530 return 0;
1579} 1531}
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1585 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1537 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1586 * 1 if they got merged. 1538 * 1 if they got merged.
1587 */ 1539 */
1588int ext4_ext_try_to_merge(struct inode *inode, 1540static int ext4_ext_try_to_merge(struct inode *inode,
1589 struct ext4_ext_path *path, 1541 struct ext4_ext_path *path,
1590 struct ext4_extent *ex) 1542 struct ext4_extent *ex)
1591{ 1543{
1592 struct ext4_extent_header *eh; 1544 struct ext4_extent_header *eh;
1593 unsigned int depth, len; 1545 unsigned int depth, len;
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
1632 * such that there will be no overlap, and then returns 1. 1584 * such that there will be no overlap, and then returns 1.
1633 * If there is no overlap found, it returns 0. 1585 * If there is no overlap found, it returns 0.
1634 */ 1586 */
1635unsigned int ext4_ext_check_overlap(struct inode *inode, 1587static unsigned int ext4_ext_check_overlap(struct inode *inode,
1636 struct ext4_extent *newext, 1588 struct ext4_extent *newext,
1637 struct ext4_ext_path *path) 1589 struct ext4_ext_path *path)
1638{ 1590{
1639 ext4_lblk_t b1, b2; 1591 ext4_lblk_t b1, b2;
1640 unsigned int depth, len1; 1592 unsigned int depth, len1;
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1706 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1658 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1707 && ext4_can_extents_be_merged(inode, ex, newext)) { 1659 && ext4_can_extents_be_merged(inode, ex, newext)) {
1708 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1660 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1709 ext4_ext_is_uninitialized(newext), 1661 ext4_ext_is_uninitialized(newext),
1710 ext4_ext_get_actual_len(newext), 1662 ext4_ext_get_actual_len(newext),
1711 le32_to_cpu(ex->ee_block), 1663 le32_to_cpu(ex->ee_block),
1712 ext4_ext_is_uninitialized(ex), 1664 ext4_ext_is_uninitialized(ex),
1713 ext4_ext_get_actual_len(ex), ext_pblock(ex)); 1665 ext4_ext_get_actual_len(ex),
1666 ext4_ext_pblock(ex));
1714 err = ext4_ext_get_access(handle, inode, path + depth); 1667 err = ext4_ext_get_access(handle, inode, path + depth);
1715 if (err) 1668 if (err)
1716 return err; 1669 return err;
@@ -1780,7 +1733,7 @@ has_space:
1780 /* there is no extent in this leaf, create first one */ 1733 /* there is no extent in this leaf, create first one */
1781 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1734 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
1782 le32_to_cpu(newext->ee_block), 1735 le32_to_cpu(newext->ee_block),
1783 ext_pblock(newext), 1736 ext4_ext_pblock(newext),
1784 ext4_ext_is_uninitialized(newext), 1737 ext4_ext_is_uninitialized(newext),
1785 ext4_ext_get_actual_len(newext)); 1738 ext4_ext_get_actual_len(newext));
1786 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1739 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@@ -1794,7 +1747,7 @@ has_space:
1794 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1747 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1795 "move %d from 0x%p to 0x%p\n", 1748 "move %d from 0x%p to 0x%p\n",
1796 le32_to_cpu(newext->ee_block), 1749 le32_to_cpu(newext->ee_block),
1797 ext_pblock(newext), 1750 ext4_ext_pblock(newext),
1798 ext4_ext_is_uninitialized(newext), 1751 ext4_ext_is_uninitialized(newext),
1799 ext4_ext_get_actual_len(newext), 1752 ext4_ext_get_actual_len(newext),
1800 nearex, len, nearex + 1, nearex + 2); 1753 nearex, len, nearex + 1, nearex + 2);
@@ -1808,7 +1761,7 @@ has_space:
1808 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " 1761 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1809 "move %d from 0x%p to 0x%p\n", 1762 "move %d from 0x%p to 0x%p\n",
1810 le32_to_cpu(newext->ee_block), 1763 le32_to_cpu(newext->ee_block),
1811 ext_pblock(newext), 1764 ext4_ext_pblock(newext),
1812 ext4_ext_is_uninitialized(newext), 1765 ext4_ext_is_uninitialized(newext),
1813 ext4_ext_get_actual_len(newext), 1766 ext4_ext_get_actual_len(newext),
1814 nearex, len, nearex + 1, nearex + 2); 1767 nearex, len, nearex + 1, nearex + 2);
@@ -1819,7 +1772,7 @@ has_space:
1819 le16_add_cpu(&eh->eh_entries, 1); 1772 le16_add_cpu(&eh->eh_entries, 1);
1820 nearex = path[depth].p_ext; 1773 nearex = path[depth].p_ext;
1821 nearex->ee_block = newext->ee_block; 1774 nearex->ee_block = newext->ee_block;
1822 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1775 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1823 nearex->ee_len = newext->ee_len; 1776 nearex->ee_len = newext->ee_len;
1824 1777
1825merge: 1778merge:
@@ -1845,9 +1798,9 @@ cleanup:
1845 return err; 1798 return err;
1846} 1799}
1847 1800
1848int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1801static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1849 ext4_lblk_t num, ext_prepare_callback func, 1802 ext4_lblk_t num, ext_prepare_callback func,
1850 void *cbdata) 1803 void *cbdata)
1851{ 1804{
1852 struct ext4_ext_path *path = NULL; 1805 struct ext4_ext_path *path = NULL;
1853 struct ext4_ext_cache cbex; 1806 struct ext4_ext_cache cbex;
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1923 } else { 1876 } else {
1924 cbex.ec_block = le32_to_cpu(ex->ee_block); 1877 cbex.ec_block = le32_to_cpu(ex->ee_block);
1925 cbex.ec_len = ext4_ext_get_actual_len(ex); 1878 cbex.ec_len = ext4_ext_get_actual_len(ex);
1926 cbex.ec_start = ext_pblock(ex); 1879 cbex.ec_start = ext4_ext_pblock(ex);
1927 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1928 } 1881 }
1929 1882
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2073 2026
2074 /* free index block */ 2027 /* free index block */
2075 path--; 2028 path--;
2076 leaf = idx_pblock(path->p_idx); 2029 leaf = ext4_idx_pblock(path->p_idx);
2077 if (unlikely(path->p_hdr->eh_entries == 0)) { 2030 if (unlikely(path->p_hdr->eh_entries == 0)) {
2078 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2031 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2079 return -EIO; 2032 return -EIO;
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2181 ext4_fsblk_t start; 2134 ext4_fsblk_t start;
2182 2135
2183 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2136 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2184 start = ext_pblock(ex) + ee_len - num; 2137 start = ext4_ext_pblock(ex) + ee_len - num;
2185 ext_debug("free last %u blocks starting %llu\n", num, start); 2138 ext_debug("free last %u blocks starting %llu\n", num, start);
2186 ext4_free_blocks(handle, inode, 0, start, num, flags); 2139 ext4_free_blocks(handle, inode, 0, start, num, flags);
2187 } else if (from == le32_to_cpu(ex->ee_block) 2140 } else if (from == le32_to_cpu(ex->ee_block)
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2310 goto out; 2263 goto out;
2311 2264
2312 ext_debug("new extent: %u:%u:%llu\n", block, num, 2265 ext_debug("new extent: %u:%u:%llu\n", block, num,
2313 ext_pblock(ex)); 2266 ext4_ext_pblock(ex));
2314 ex--; 2267 ex--;
2315 ex_ee_block = le32_to_cpu(ex->ee_block); 2268 ex_ee_block = le32_to_cpu(ex->ee_block);
2316 ex_ee_len = ext4_ext_get_actual_len(ex); 2269 ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2421,9 +2374,9 @@ again:
2421 struct buffer_head *bh; 2374 struct buffer_head *bh;
2422 /* go to the next level */ 2375 /* go to the next level */
2423 ext_debug("move to level %d (block %llu)\n", 2376 ext_debug("move to level %d (block %llu)\n",
2424 i + 1, idx_pblock(path[i].p_idx)); 2377 i + 1, ext4_idx_pblock(path[i].p_idx));
2425 memset(path + i + 1, 0, sizeof(*path)); 2378 memset(path + i + 1, 0, sizeof(*path));
2426 bh = sb_bread(sb, idx_pblock(path[i].p_idx)); 2379 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
2427 if (!bh) { 2380 if (!bh) {
2428 /* should we reset i_size? */ 2381 /* should we reset i_size? */
2429 err = -EIO; 2382 err = -EIO;
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
2535#endif 2488#endif
2536} 2489}
2537 2490
2538static void bi_complete(struct bio *bio, int error)
2539{
2540 complete((struct completion *)bio->bi_private);
2541}
2542
2543/* FIXME!! we need to try to merge to left or right after zero-out */ 2491/* FIXME!! we need to try to merge to left or right after zero-out */
2544static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2492static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2545{ 2493{
2494 ext4_fsblk_t ee_pblock;
2495 unsigned int ee_len;
2546 int ret; 2496 int ret;
2547 struct bio *bio;
2548 int blkbits, blocksize;
2549 sector_t ee_pblock;
2550 struct completion event;
2551 unsigned int ee_len, len, done, offset;
2552 2497
2553
2554 blkbits = inode->i_blkbits;
2555 blocksize = inode->i_sb->s_blocksize;
2556 ee_len = ext4_ext_get_actual_len(ex); 2498 ee_len = ext4_ext_get_actual_len(ex);
2557 ee_pblock = ext_pblock(ex); 2499 ee_pblock = ext4_ext_pblock(ex);
2558
2559 /* convert ee_pblock to 512 byte sectors */
2560 ee_pblock = ee_pblock << (blkbits - 9);
2561
2562 while (ee_len > 0) {
2563
2564 if (ee_len > BIO_MAX_PAGES)
2565 len = BIO_MAX_PAGES;
2566 else
2567 len = ee_len;
2568
2569 bio = bio_alloc(GFP_NOIO, len);
2570 if (!bio)
2571 return -ENOMEM;
2572
2573 bio->bi_sector = ee_pblock;
2574 bio->bi_bdev = inode->i_sb->s_bdev;
2575
2576 done = 0;
2577 offset = 0;
2578 while (done < len) {
2579 ret = bio_add_page(bio, ZERO_PAGE(0),
2580 blocksize, offset);
2581 if (ret != blocksize) {
2582 /*
2583 * We can't add any more pages because of
2584 * hardware limitations. Start a new bio.
2585 */
2586 break;
2587 }
2588 done++;
2589 offset += blocksize;
2590 if (offset >= PAGE_CACHE_SIZE)
2591 offset = 0;
2592 }
2593 2500
2594 init_completion(&event); 2501 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
2595 bio->bi_private = &event; 2502 if (ret > 0)
2596 bio->bi_end_io = bi_complete; 2503 ret = 0;
2597 submit_bio(WRITE, bio);
2598 wait_for_completion(&event);
2599 2504
2600 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2505 return ret;
2601 bio_put(bio);
2602 return -EIO;
2603 }
2604 bio_put(bio);
2605 ee_len -= done;
2606 ee_pblock += done << (blkbits - 9);
2607 }
2608 return 0;
2609} 2506}
2610 2507
2611#define EXT4_EXT_ZERO_LEN 7 2508#define EXT4_EXT_ZERO_LEN 7
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2651 ee_block = le32_to_cpu(ex->ee_block); 2548 ee_block = le32_to_cpu(ex->ee_block);
2652 ee_len = ext4_ext_get_actual_len(ex); 2549 ee_len = ext4_ext_get_actual_len(ex);
2653 allocated = ee_len - (map->m_lblk - ee_block); 2550 allocated = ee_len - (map->m_lblk - ee_block);
2654 newblock = map->m_lblk - ee_block + ext_pblock(ex); 2551 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2655 2552
2656 ex2 = ex; 2553 ex2 = ex;
2657 orig_ex.ee_block = ex->ee_block; 2554 orig_ex.ee_block = ex->ee_block;
2658 orig_ex.ee_len = cpu_to_le16(ee_len); 2555 orig_ex.ee_len = cpu_to_le16(ee_len);
2659 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2556 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2660 2557
2661 /* 2558 /*
2662 * It is safe to convert extent to initialized via explicit 2559 * It is safe to convert extent to initialized via explicit
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2675 /* update the extent length and mark as initialized */ 2572 /* update the extent length and mark as initialized */
2676 ex->ee_block = orig_ex.ee_block; 2573 ex->ee_block = orig_ex.ee_block;
2677 ex->ee_len = orig_ex.ee_len; 2574 ex->ee_len = orig_ex.ee_len;
2678 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2575 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2679 ext4_ext_dirty(handle, inode, path + depth); 2576 ext4_ext_dirty(handle, inode, path + depth);
2680 /* zeroed the full extent */ 2577 /* zeroed the full extent */
2681 return allocated; 2578 return allocated;
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2710 ex->ee_block = orig_ex.ee_block; 2607 ex->ee_block = orig_ex.ee_block;
2711 ex->ee_len = cpu_to_le16(ee_len - allocated); 2608 ex->ee_len = cpu_to_le16(ee_len - allocated);
2712 ext4_ext_mark_uninitialized(ex); 2609 ext4_ext_mark_uninitialized(ex);
2713 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2610 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2714 ext4_ext_dirty(handle, inode, path + depth); 2611 ext4_ext_dirty(handle, inode, path + depth);
2715 2612
2716 ex3 = &newex; 2613 ex3 = &newex;
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2725 goto fix_extent_len; 2622 goto fix_extent_len;
2726 ex->ee_block = orig_ex.ee_block; 2623 ex->ee_block = orig_ex.ee_block;
2727 ex->ee_len = orig_ex.ee_len; 2624 ex->ee_len = orig_ex.ee_len;
2728 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2625 ext4_ext_store_pblock(ex,
2626 ext4_ext_pblock(&orig_ex));
2729 ext4_ext_dirty(handle, inode, path + depth); 2627 ext4_ext_dirty(handle, inode, path + depth);
2730 /* blocks available from map->m_lblk */ 2628 /* blocks available from map->m_lblk */
2731 return allocated; 2629 return allocated;
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2782 /* update the extent length and mark as initialized */ 2680 /* update the extent length and mark as initialized */
2783 ex->ee_block = orig_ex.ee_block; 2681 ex->ee_block = orig_ex.ee_block;
2784 ex->ee_len = orig_ex.ee_len; 2682 ex->ee_len = orig_ex.ee_len;
2785 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2683 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2786 ext4_ext_dirty(handle, inode, path + depth); 2684 ext4_ext_dirty(handle, inode, path + depth);
2787 /* zeroed the full extent */ 2685 /* zeroed the full extent */
2788 /* blocks available from map->m_lblk */ 2686 /* blocks available from map->m_lblk */
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2833 /* update the extent length and mark as initialized */ 2731 /* update the extent length and mark as initialized */
2834 ex->ee_block = orig_ex.ee_block; 2732 ex->ee_block = orig_ex.ee_block;
2835 ex->ee_len = orig_ex.ee_len; 2733 ex->ee_len = orig_ex.ee_len;
2836 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2734 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2837 ext4_ext_dirty(handle, inode, path + depth); 2735 ext4_ext_dirty(handle, inode, path + depth);
2838 /* zero out the first half */ 2736 /* zero out the first half */
2839 /* blocks available from map->m_lblk */ 2737 /* blocks available from map->m_lblk */
@@ -2902,7 +2800,7 @@ insert:
2902 /* update the extent length and mark as initialized */ 2800 /* update the extent length and mark as initialized */
2903 ex->ee_block = orig_ex.ee_block; 2801 ex->ee_block = orig_ex.ee_block;
2904 ex->ee_len = orig_ex.ee_len; 2802 ex->ee_len = orig_ex.ee_len;
2905 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2803 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2906 ext4_ext_dirty(handle, inode, path + depth); 2804 ext4_ext_dirty(handle, inode, path + depth);
2907 /* zero out the first half */ 2805 /* zero out the first half */
2908 return allocated; 2806 return allocated;
@@ -2915,7 +2813,7 @@ out:
2915fix_extent_len: 2813fix_extent_len:
2916 ex->ee_block = orig_ex.ee_block; 2814 ex->ee_block = orig_ex.ee_block;
2917 ex->ee_len = orig_ex.ee_len; 2815 ex->ee_len = orig_ex.ee_len;
2918 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2816 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2919 ext4_ext_mark_uninitialized(ex); 2817 ext4_ext_mark_uninitialized(ex);
2920 ext4_ext_dirty(handle, inode, path + depth); 2818 ext4_ext_dirty(handle, inode, path + depth);
2921 return err; 2819 return err;
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2973 ee_block = le32_to_cpu(ex->ee_block); 2871 ee_block = le32_to_cpu(ex->ee_block);
2974 ee_len = ext4_ext_get_actual_len(ex); 2872 ee_len = ext4_ext_get_actual_len(ex);
2975 allocated = ee_len - (map->m_lblk - ee_block); 2873 allocated = ee_len - (map->m_lblk - ee_block);
2976 newblock = map->m_lblk - ee_block + ext_pblock(ex); 2874 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2977 2875
2978 ex2 = ex; 2876 ex2 = ex;
2979 orig_ex.ee_block = ex->ee_block; 2877 orig_ex.ee_block = ex->ee_block;
2980 orig_ex.ee_len = cpu_to_le16(ee_len); 2878 orig_ex.ee_len = cpu_to_le16(ee_len);
2981 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2879 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2982 2880
2983 /* 2881 /*
2984 * It is safe to convert extent to initialized via explicit 2882 * It is safe to convert extent to initialized via explicit
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3027 /* update the extent length and mark as initialized */ 2925 /* update the extent length and mark as initialized */
3028 ex->ee_block = orig_ex.ee_block; 2926 ex->ee_block = orig_ex.ee_block;
3029 ex->ee_len = orig_ex.ee_len; 2927 ex->ee_len = orig_ex.ee_len;
3030 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2928 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3031 ext4_ext_dirty(handle, inode, path + depth); 2929 ext4_ext_dirty(handle, inode, path + depth);
3032 /* zeroed the full extent */ 2930 /* zeroed the full extent */
3033 /* blocks available from map->m_lblk */ 2931 /* blocks available from map->m_lblk */
@@ -3099,7 +2997,7 @@ insert:
3099 /* update the extent length and mark as initialized */ 2997 /* update the extent length and mark as initialized */
3100 ex->ee_block = orig_ex.ee_block; 2998 ex->ee_block = orig_ex.ee_block;
3101 ex->ee_len = orig_ex.ee_len; 2999 ex->ee_len = orig_ex.ee_len;
3102 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3000 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3103 ext4_ext_dirty(handle, inode, path + depth); 3001 ext4_ext_dirty(handle, inode, path + depth);
3104 /* zero out the first half */ 3002 /* zero out the first half */
3105 return allocated; 3003 return allocated;
@@ -3112,7 +3010,7 @@ out:
3112fix_extent_len: 3010fix_extent_len:
3113 ex->ee_block = orig_ex.ee_block; 3011 ex->ee_block = orig_ex.ee_block;
3114 ex->ee_len = orig_ex.ee_len; 3012 ex->ee_len = orig_ex.ee_len;
3115 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3013 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3116 ext4_ext_mark_uninitialized(ex); 3014 ext4_ext_mark_uninitialized(ex);
3117 ext4_ext_dirty(handle, inode, path + depth); 3015 ext4_ext_dirty(handle, inode, path + depth);
3118 return err; 3016 return err;
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3180 unmap_underlying_metadata(bdev, block + i); 3078 unmap_underlying_metadata(bdev, block + i);
3181} 3079}
3182 3080
3081/*
3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary
3083 */
3084static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3085 struct ext4_map_blocks *map,
3086 struct ext4_ext_path *path,
3087 unsigned int len)
3088{
3089 int i, depth;
3090 struct ext4_extent_header *eh;
3091 struct ext4_extent *ex, *last_ex;
3092
3093 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
3094 return 0;
3095
3096 depth = ext_depth(inode);
3097 eh = path[depth].p_hdr;
3098 ex = path[depth].p_ext;
3099
3100 if (unlikely(!eh->eh_entries)) {
3101 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
3102 "EOFBLOCKS_FL set");
3103 return -EIO;
3104 }
3105 last_ex = EXT_LAST_EXTENT(eh);
3106 /*
3107 * We should clear the EOFBLOCKS_FL flag if we are writing the
3108 * last block in the last extent in the file. We test this by
3109 * first checking to see if the caller to
3110 * ext4_ext_get_blocks() was interested in the last block (or
3111 * a block beyond the last block) in the current extent. If
3112 * this turns out to be false, we can bail out from this
3113 * function immediately.
3114 */
3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
3116 ext4_ext_get_actual_len(last_ex))
3117 return 0;
3118 /*
3119 * If the caller does appear to be planning to write at or
3120 * beyond the end of the current extent, we then test to see
3121 * if the current extent is the last extent in the file, by
3122 * checking to make sure it was reached via the rightmost node
3123 * at each level of the tree.
3124 */
3125 for (i = depth-1; i >= 0; i--)
3126 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3127 return 0;
3128 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3129 return ext4_mark_inode_dirty(handle, inode);
3130}
3131
3183static int 3132static int
3184ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3133ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3185 struct ext4_map_blocks *map, 3134 struct ext4_map_blocks *map,
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3206 * completed 3155 * completed
3207 */ 3156 */
3208 if (io) 3157 if (io)
3209 io->flag = EXT4_IO_UNWRITTEN; 3158 io->flag = EXT4_IO_END_UNWRITTEN;
3210 else 3159 else
3211 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3160 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3212 if (ext4_should_dioread_nolock(inode)) 3161 if (ext4_should_dioread_nolock(inode))
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3217 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3166 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3218 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3167 ret = ext4_convert_unwritten_extents_endio(handle, inode,
3219 path); 3168 path);
3220 if (ret >= 0) 3169 if (ret >= 0) {
3221 ext4_update_inode_fsync_trans(handle, inode, 1); 3170 ext4_update_inode_fsync_trans(handle, inode, 1);
3171 err = check_eofblocks_fl(handle, inode, map, path,
3172 map->m_len);
3173 } else
3174 err = ret;
3222 goto out2; 3175 goto out2;
3223 } 3176 }
3224 /* buffered IO case */ 3177 /* buffered IO case */
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3244 3197
3245 /* buffered write, writepage time, convert*/ 3198 /* buffered write, writepage time, convert*/
3246 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3247 if (ret >= 0) 3200 if (ret >= 0) {
3248 ext4_update_inode_fsync_trans(handle, inode, 1); 3201 ext4_update_inode_fsync_trans(handle, inode, 1);
3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
3203 if (err < 0)
3204 goto out2;
3205 }
3206
3249out: 3207out:
3250 if (ret <= 0) { 3208 if (ret <= 0) {
3251 err = ret; 3209 err = ret;
@@ -3292,6 +3250,7 @@ out2:
3292 } 3250 }
3293 return err ? err : allocated; 3251 return err ? err : allocated;
3294} 3252}
3253
3295/* 3254/*
3296 * Block allocation/map/preallocation routine for extents based files 3255 * Block allocation/map/preallocation routine for extents based files
3297 * 3256 *
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3315{ 3274{
3316 struct ext4_ext_path *path = NULL; 3275 struct ext4_ext_path *path = NULL;
3317 struct ext4_extent_header *eh; 3276 struct ext4_extent_header *eh;
3318 struct ext4_extent newex, *ex, *last_ex; 3277 struct ext4_extent newex, *ex;
3319 ext4_fsblk_t newblock; 3278 ext4_fsblk_t newblock;
3320 int i, err = 0, depth, ret, cache_type; 3279 int err = 0, depth, ret, cache_type;
3321 unsigned int allocated = 0; 3280 unsigned int allocated = 0;
3322 struct ext4_allocation_request ar; 3281 struct ext4_allocation_request ar;
3323 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3341 /* block is already allocated */ 3300 /* block is already allocated */
3342 newblock = map->m_lblk 3301 newblock = map->m_lblk
3343 - le32_to_cpu(newex.ee_block) 3302 - le32_to_cpu(newex.ee_block)
3344 + ext_pblock(&newex); 3303 + ext4_ext_pblock(&newex);
3345 /* number of remaining blocks in the extent */ 3304 /* number of remaining blocks in the extent */
3346 allocated = ext4_ext_get_actual_len(&newex) - 3305 allocated = ext4_ext_get_actual_len(&newex) -
3347 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3306 (map->m_lblk - le32_to_cpu(newex.ee_block));
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3379 ex = path[depth].p_ext; 3338 ex = path[depth].p_ext;
3380 if (ex) { 3339 if (ex) {
3381 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3340 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3382 ext4_fsblk_t ee_start = ext_pblock(ex); 3341 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3383 unsigned short ee_len; 3342 unsigned short ee_len;
3384 3343
3385 /* 3344 /*
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3488 */ 3447 */
3489 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3448 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3490 if (io) 3449 if (io)
3491 io->flag = EXT4_IO_UNWRITTEN; 3450 io->flag = EXT4_IO_END_UNWRITTEN;
3492 else 3451 else
3493 ext4_set_inode_state(inode, 3452 ext4_set_inode_state(inode,
3494 EXT4_STATE_DIO_UNWRITTEN); 3453 EXT4_STATE_DIO_UNWRITTEN);
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3497 map->m_flags |= EXT4_MAP_UNINIT; 3456 map->m_flags |= EXT4_MAP_UNINIT;
3498 } 3457 }
3499 3458
3500 if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { 3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len);
3501 if (unlikely(!eh->eh_entries)) { 3460 if (err)
3502 EXT4_ERROR_INODE(inode, 3461 goto out2;
3503 "eh->eh_entries == 0 and " 3462
3504 "EOFBLOCKS_FL set");
3505 err = -EIO;
3506 goto out2;
3507 }
3508 last_ex = EXT_LAST_EXTENT(eh);
3509 /*
3510 * If the current leaf block was reached by looking at
3511 * the last index block all the way down the tree, and
3512 * we are extending the inode beyond the last extent
3513 * in the current leaf block, then clear the
3514 * EOFBLOCKS_FL flag.
3515 */
3516 for (i = depth-1; i >= 0; i--) {
3517 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3518 break;
3519 }
3520 if ((i < 0) &&
3521 (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
3522 ext4_ext_get_actual_len(last_ex)))
3523 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3524 }
3525 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3463 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3526 if (err) { 3464 if (err) {
3527 /* free data blocks we just allocated */ 3465 /* free data blocks we just allocated */
3528 /* not a good idea to call discard here directly, 3466 /* not a good idea to call discard here directly,
3529 * but otherwise we'd need to call it every free() */ 3467 * but otherwise we'd need to call it every free() */
3530 ext4_discard_preallocations(inode); 3468 ext4_discard_preallocations(inode);
3531 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), 3469 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
3532 ext4_ext_get_actual_len(&newex), 0); 3470 ext4_ext_get_actual_len(&newex), 0);
3533 goto out2; 3471 goto out2;
3534 } 3472 }
3535 3473
3536 /* previous routine could use block we allocated */ 3474 /* previous routine could use block we allocated */
3537 newblock = ext_pblock(&newex); 3475 newblock = ext4_ext_pblock(&newex);
3538 allocated = ext4_ext_get_actual_len(&newex); 3476 allocated = ext4_ext_get_actual_len(&newex);
3539 if (allocated > map->m_len) 3477 if (allocated > map->m_len)
3540 allocated = map->m_len; 3478 allocated = map->m_len;
@@ -3729,7 +3667,7 @@ retry:
3729 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3667 printk(KERN_ERR "%s: ext4_ext_map_blocks "
3730 "returned error inode#%lu, block=%u, " 3668 "returned error inode#%lu, block=%u, "
3731 "max_blocks=%u", __func__, 3669 "max_blocks=%u", __func__,
3732 inode->i_ino, block, max_blocks); 3670 inode->i_ino, map.m_lblk, max_blocks);
3733#endif 3671#endif
3734 ext4_mark_inode_dirty(handle, inode); 3672 ext4_mark_inode_dirty(handle, inode);
3735 ret2 = ext4_journal_stop(handle); 3673 ret2 = ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ee92b66d4558..5a5c55ddceef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
130 return dquot_file_open(inode, filp); 130 return dquot_file_open(inode, filp);
131} 131}
132 132
133/*
134 * ext4_llseek() copied from generic_file_llseek() to handle both
135 * block-mapped and extent-mapped maxbytes values. This should
136 * otherwise be identical with generic_file_llseek().
137 */
138loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
139{
140 struct inode *inode = file->f_mapping->host;
141 loff_t maxbytes;
142
143 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
144 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
145 else
146 maxbytes = inode->i_sb->s_maxbytes;
147 mutex_lock(&inode->i_mutex);
148 switch (origin) {
149 case SEEK_END:
150 offset += inode->i_size;
151 break;
152 case SEEK_CUR:
153 if (offset == 0) {
154 mutex_unlock(&inode->i_mutex);
155 return file->f_pos;
156 }
157 offset += file->f_pos;
158 break;
159 }
160
161 if (offset < 0 || offset > maxbytes) {
162 mutex_unlock(&inode->i_mutex);
163 return -EINVAL;
164 }
165
166 if (offset != file->f_pos) {
167 file->f_pos = offset;
168 file->f_version = 0;
169 }
170 mutex_unlock(&inode->i_mutex);
171
172 return offset;
173}
174
133const struct file_operations ext4_file_operations = { 175const struct file_operations ext4_file_operations = {
134 .llseek = generic_file_llseek, 176 .llseek = ext4_llseek,
135 .read = do_sync_read, 177 .read = do_sync_read,
136 .write = do_sync_write, 178 .write = do_sync_write,
137 .aio_read = generic_file_aio_read, 179 .aio_read = generic_file_aio_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 3f3ff5ee8f9d..c1a7bc923cf6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,89 @@
34 34
35#include <trace/events/ext4.h> 35#include <trace/events/ext4.h>
36 36
37static void dump_completed_IO(struct inode * inode)
38{
39#ifdef EXT4_DEBUG
40 struct list_head *cur, *before, *after;
41 ext4_io_end_t *io, *io0, *io1;
42 unsigned long flags;
43
44 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
45 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
46 return;
47 }
48
49 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
50 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
51 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
52 cur = &io->list;
53 before = cur->prev;
54 io0 = container_of(before, ext4_io_end_t, list);
55 after = cur->next;
56 io1 = container_of(after, ext4_io_end_t, list);
57
58 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
59 io, inode->i_ino, io0, io1);
60 }
61 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
62#endif
63}
64
65/*
66 * This function is called from ext4_sync_file().
67 *
68 * When IO is completed, the work to convert unwritten extents to
69 * written is queued on workqueue but may not get immediately
70 * scheduled. When fsync is called, we need to ensure the
71 * conversion is complete before fsync returns.
72 * The inode keeps track of a list of pending/completed IO that
73 * might needs to do the conversion. This function walks through
74 * the list and convert the related unwritten extents for completed IO
75 * to written.
76 * The function return the number of pending IOs on success.
77 */
78static int flush_completed_IO(struct inode *inode)
79{
80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode);
82 unsigned long flags;
83 int ret = 0;
84 int ret2 = 0;
85
86 if (list_empty(&ei->i_completed_io_list))
87 return ret;
88
89 dump_completed_IO(inode);
90 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
91 while (!list_empty(&ei->i_completed_io_list)){
92 io = list_entry(ei->i_completed_io_list.next,
93 ext4_io_end_t, list);
94 /*
95 * Calling ext4_end_io_nolock() to convert completed
96 * IO to written.
97 *
98 * When ext4_sync_file() is called, run_queue() may already
99 * about to flush the work corresponding to this io structure.
100 * It will be upset if it founds the io structure related
101 * to the work-to-be schedule is freed.
102 *
103 * Thus we need to keep the io structure still valid here after
104 * convertion finished. The io structure has a flag to
105 * avoid double converting from both fsync and background work
106 * queue work.
107 */
108 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
109 ret = ext4_end_io_nolock(io);
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 if (ret < 0)
112 ret2 = ret;
113 else
114 list_del_init(&io->list);
115 }
116 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
117 return (ret2 < 0) ? ret2 : 0;
118}
119
37/* 120/*
38 * If we're not journaling and this is a just-created file, we have to 121 * If we're not journaling and this is a just-created file, we have to
39 * sync our parent directory (if it was freshly created) since 122 * sync our parent directory (if it was freshly created) since
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f21..1ce240a23ebb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
50 * need to use it within a single byte (to ensure we get endianness right). 50 * need to use it within a single byte (to ensure we get endianness right).
51 * We can use memset for the rest of the bitmap as there are no other users. 51 * We can use memset for the rest of the bitmap as there are no other users.
52 */ 52 */
53void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 53void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
54{ 54{
55 int i; 55 int i;
56 56
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
65} 65}
66 66
67/* Initializes an uninitialized inode bitmap */ 67/* Initializes an uninitialized inode bitmap */
68unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, 68static unsigned ext4_init_inode_bitmap(struct super_block *sb,
69 ext4_group_t block_group, 69 struct buffer_head *bh,
70 struct ext4_group_desc *gdp) 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp)
71{ 72{
72 struct ext4_sb_info *sbi = EXT4_SB(sb); 73 struct ext4_sb_info *sbi = EXT4_SB(sb);
73 74
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
85 } 86 }
86 87
87 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 88 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
88 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 89 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
89 bh->b_data); 90 bh->b_data);
90 91
91 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
107 desc = ext4_get_group_desc(sb, block_group, NULL); 108 desc = ext4_get_group_desc(sb, block_group, NULL);
108 if (!desc) 109 if (!desc)
109 return NULL; 110 return NULL;
111
110 bitmap_blk = ext4_inode_bitmap(sb, desc); 112 bitmap_blk = ext4_inode_bitmap(sb, desc);
111 bh = sb_getblk(sb, bitmap_blk); 113 bh = sb_getblk(sb, bitmap_blk);
112 if (unlikely(!bh)) { 114 if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 125 unlock_buffer(bh);
124 return bh; 126 return bh;
125 } 127 }
128
126 ext4_lock_group(sb, block_group); 129 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 130 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 131 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
133 return bh; 136 return bh;
134 } 137 }
135 ext4_unlock_group(sb, block_group); 138 ext4_unlock_group(sb, block_group);
139
136 if (buffer_uptodate(bh)) { 140 if (buffer_uptodate(bh)) {
137 /* 141 /*
138 * if not uninit if bh is uptodate, 142 * if not uninit if bh is uptodate,
@@ -411,8 +415,8 @@ struct orlov_stats {
411 * for a particular block group or flex_bg. If flex_size is 1, then g 415 * for a particular block group or flex_bg. If flex_size is 1, then g
412 * is a block group number; otherwise it is flex_bg number. 416 * is a block group number; otherwise it is flex_bg number.
413 */ 417 */
414void get_orlov_stats(struct super_block *sb, ext4_group_t g, 418static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
415 int flex_size, struct orlov_stats *stats) 419 int flex_size, struct orlov_stats *stats)
416{ 420{
417 struct ext4_group_desc *desc; 421 struct ext4_group_desc *desc;
418 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 422 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
712{ 716{
713 int free = 0, retval = 0, count; 717 int free = 0, retval = 0, count;
714 struct ext4_sb_info *sbi = EXT4_SB(sb); 718 struct ext4_sb_info *sbi = EXT4_SB(sb);
719 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
716 721
722 /*
723 * We have to be sure that new inode allocation does not race with
724 * inode table initialization, because otherwise we may end up
725 * allocating and writing new inode right before sb_issue_zeroout
726 * takes place and overwriting our new inode with zeroes. So we
727 * take alloc_sem to prevent it.
728 */
729 down_read(&grp->alloc_sem);
717 ext4_lock_group(sb, group); 730 ext4_lock_group(sb, group);
718 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 731 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
719 /* not a free inode */ 732 /* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
724 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 737 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
725 ino > EXT4_INODES_PER_GROUP(sb)) { 738 ino > EXT4_INODES_PER_GROUP(sb)) {
726 ext4_unlock_group(sb, group); 739 ext4_unlock_group(sb, group);
740 up_read(&grp->alloc_sem);
727 ext4_error(sb, "reserved inode or inode > inodes count - " 741 ext4_error(sb, "reserved inode or inode > inodes count - "
728 "block_group = %u, inode=%lu", group, 742 "block_group = %u, inode=%lu", group,
729 ino + group * EXT4_INODES_PER_GROUP(sb)); 743 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
772 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 786 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
773err_ret: 787err_ret:
774 ext4_unlock_group(sb, group); 788 ext4_unlock_group(sb, group);
789 up_read(&grp->alloc_sem);
775 return retval; 790 return retval;
776} 791}
777 792
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1205 } 1220 }
1206 return count; 1221 return count;
1207} 1222}
1223
1224/*
1225 * Zeroes not yet zeroed inode table - just write zeroes through the whole
1226 * inode table. Must be called without any spinlock held. The only place
1227 * where it is called from on active part of filesystem is ext4lazyinit
1228 * thread, so we do not need any special locks, however we have to prevent
1229 * inode allocation from the current group, so we take alloc_sem lock, to
1230 * block ext4_claim_inode until we are finished.
1231 */
1232extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1233 int barrier)
1234{
1235 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1236 struct ext4_sb_info *sbi = EXT4_SB(sb);
1237 struct ext4_group_desc *gdp = NULL;
1238 struct buffer_head *group_desc_bh;
1239 handle_t *handle;
1240 ext4_fsblk_t blk;
1241 int num, ret = 0, used_blks = 0;
1242
1243 /* This should not happen, but just to be sure check this */
1244 if (sb->s_flags & MS_RDONLY) {
1245 ret = 1;
1246 goto out;
1247 }
1248
1249 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
1250 if (!gdp)
1251 goto out;
1252
1253 /*
1254 * We do not need to lock this, because we are the only one
1255 * handling this flag.
1256 */
1257 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1258 goto out;
1259
1260 handle = ext4_journal_start_sb(sb, 1);
1261 if (IS_ERR(handle)) {
1262 ret = PTR_ERR(handle);
1263 goto out;
1264 }
1265
1266 down_write(&grp->alloc_sem);
1267 /*
1268 * If inode bitmap was already initialized there may be some
1269 * used inodes so we need to skip blocks with used inodes in
1270 * inode table.
1271 */
1272 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
1273 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
1274 ext4_itable_unused_count(sb, gdp)),
1275 sbi->s_inodes_per_block);
1276
1277 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1278 ext4_error(sb, "Something is wrong with group %u\n"
1279 "Used itable blocks: %d"
1280 "itable unused count: %u\n",
1281 group, used_blks,
1282 ext4_itable_unused_count(sb, gdp));
1283 ret = 1;
1284 goto out;
1285 }
1286
1287 blk = ext4_inode_table(sb, gdp) + used_blks;
1288 num = sbi->s_itb_per_group - used_blks;
1289
1290 BUFFER_TRACE(group_desc_bh, "get_write_access");
1291 ret = ext4_journal_get_write_access(handle,
1292 group_desc_bh);
1293 if (ret)
1294 goto err_out;
1295
1296 /*
1297 * Skip zeroout if the inode table is full. But we set the ZEROED
1298 * flag anyway, because obviously, when it is full it does not need
1299 * further zeroing.
1300 */
1301 if (unlikely(num == 0))
1302 goto skip_zeroout;
1303
1304 ext4_debug("going to zero out inode table in group %d\n",
1305 group);
1306 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1307 if (ret < 0)
1308 goto err_out;
1309 if (barrier)
1310 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1311
1312skip_zeroout:
1313 ext4_lock_group(sb, group);
1314 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
1315 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1316 ext4_unlock_group(sb, group);
1317
1318 BUFFER_TRACE(group_desc_bh,
1319 "call ext4_handle_dirty_metadata");
1320 ret = ext4_handle_dirty_metadata(handle, NULL,
1321 group_desc_bh);
1322
1323err_out:
1324 up_write(&grp->alloc_sem);
1325 ext4_journal_stop(handle);
1326out:
1327 return ret;
1328}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4b8debeb3965..191616470466 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
60} 60}
61 61
62static void ext4_invalidatepage(struct page *page, unsigned long offset); 62static void ext4_invalidatepage(struct page *page, unsigned long offset);
63static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
64 struct buffer_head *bh_result, int create);
65static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
66static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
67static int __ext4_journalled_writepage(struct page *page, unsigned int len);
68static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
63 69
64/* 70/*
65 * Test whether an inode is a fast symlink. 71 * Test whether an inode is a fast symlink.
@@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
755 * parent to disk. 761 * parent to disk.
756 */ 762 */
757 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 763 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
764 if (unlikely(!bh)) {
765 err = -EIO;
766 goto failed;
767 }
768
758 branch[n].bh = bh; 769 branch[n].bh = bh;
759 lock_buffer(bh); 770 lock_buffer(bh);
760 BUFFER_TRACE(bh, "call get_create_access"); 771 BUFFER_TRACE(bh, "call get_create_access");
@@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
1207 break; 1218 break;
1208 idx++; 1219 idx++;
1209 num++; 1220 num++;
1210 if (num >= max_pages) 1221 if (num >= max_pages) {
1222 done = 1;
1211 break; 1223 break;
1224 }
1212 } 1225 }
1213 pagevec_release(&pvec); 1226 pagevec_release(&pvec);
1214 } 1227 }
@@ -1538,10 +1551,10 @@ static int do_journal_get_write_access(handle_t *handle,
1538 if (!buffer_mapped(bh) || buffer_freed(bh)) 1551 if (!buffer_mapped(bh) || buffer_freed(bh))
1539 return 0; 1552 return 0;
1540 /* 1553 /*
1541 * __block_prepare_write() could have dirtied some buffers. Clean 1554 * __block_write_begin() could have dirtied some buffers. Clean
1542 * the dirty bit as jbd2_journal_get_write_access() could complain 1555 * the dirty bit as jbd2_journal_get_write_access() could complain
1543 * otherwise about fs integrity issues. Setting of the dirty bit 1556 * otherwise about fs integrity issues. Setting of the dirty bit
1544 * by __block_prepare_write() isn't a real problem here as we clear 1557 * by __block_write_begin() isn't a real problem here as we clear
1545 * the bit before releasing a page lock and thus writeback cannot 1558 * the bit before releasing a page lock and thus writeback cannot
1546 * ever write the buffer. 1559 * ever write the buffer.
1547 */ 1560 */
@@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page,
1995 * 2008 *
1996 * As pages are already locked by write_cache_pages(), we can't use it 2009 * As pages are already locked by write_cache_pages(), we can't use it
1997 */ 2010 */
1998static int mpage_da_submit_io(struct mpage_da_data *mpd) 2011static int mpage_da_submit_io(struct mpage_da_data *mpd,
2012 struct ext4_map_blocks *map)
1999{ 2013{
2000 long pages_skipped;
2001 struct pagevec pvec; 2014 struct pagevec pvec;
2002 unsigned long index, end; 2015 unsigned long index, end;
2003 int ret = 0, err, nr_pages, i; 2016 int ret = 0, err, nr_pages, i;
2004 struct inode *inode = mpd->inode; 2017 struct inode *inode = mpd->inode;
2005 struct address_space *mapping = inode->i_mapping; 2018 struct address_space *mapping = inode->i_mapping;
2019 loff_t size = i_size_read(inode);
2020 unsigned int len, block_start;
2021 struct buffer_head *bh, *page_bufs = NULL;
2022 int journal_data = ext4_should_journal_data(inode);
2023 sector_t pblock = 0, cur_logical = 0;
2024 struct ext4_io_submit io_submit;
2006 2025
2007 BUG_ON(mpd->next_page <= mpd->first_page); 2026 BUG_ON(mpd->next_page <= mpd->first_page);
2027 memset(&io_submit, 0, sizeof(io_submit));
2008 /* 2028 /*
2009 * We need to start from the first_page to the next_page - 1 2029 * We need to start from the first_page to the next_page - 1
2010 * to make sure we also write the mapped dirty buffer_heads. 2030 * to make sure we also write the mapped dirty buffer_heads.
@@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
2020 if (nr_pages == 0) 2040 if (nr_pages == 0)
2021 break; 2041 break;
2022 for (i = 0; i < nr_pages; i++) { 2042 for (i = 0; i < nr_pages; i++) {
2043 int commit_write = 0, redirty_page = 0;
2023 struct page *page = pvec.pages[i]; 2044 struct page *page = pvec.pages[i];
2024 2045
2025 index = page->index; 2046 index = page->index;
2026 if (index > end) 2047 if (index > end)
2027 break; 2048 break;
2049
2050 if (index == size >> PAGE_CACHE_SHIFT)
2051 len = size & ~PAGE_CACHE_MASK;
2052 else
2053 len = PAGE_CACHE_SIZE;
2054 if (map) {
2055 cur_logical = index << (PAGE_CACHE_SHIFT -
2056 inode->i_blkbits);
2057 pblock = map->m_pblk + (cur_logical -
2058 map->m_lblk);
2059 }
2028 index++; 2060 index++;
2029 2061
2030 BUG_ON(!PageLocked(page)); 2062 BUG_ON(!PageLocked(page));
2031 BUG_ON(PageWriteback(page)); 2063 BUG_ON(PageWriteback(page));
2032 2064
2033 pages_skipped = mpd->wbc->pages_skipped;
2034 err = mapping->a_ops->writepage(page, mpd->wbc);
2035 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
2036 /*
2037 * have successfully written the page
2038 * without skipping the same
2039 */
2040 mpd->pages_written++;
2041 /* 2065 /*
2042 * In error case, we have to continue because 2066 * If the page does not have buffers (for
2043 * remaining pages are still locked 2067 * whatever reason), try to create them using
2044 * XXX: unlock and re-dirty them? 2068 * __block_write_begin. If this fails,
2069 * redirty the page and move on.
2045 */ 2070 */
2046 if (ret == 0) 2071 if (!page_has_buffers(page)) {
2047 ret = err; 2072 if (__block_write_begin(page, 0, len,
2048 } 2073 noalloc_get_block_write)) {
2049 pagevec_release(&pvec); 2074 redirty_page:
2050 } 2075 redirty_page_for_writepage(mpd->wbc,
2051 return ret; 2076 page);
2052} 2077 unlock_page(page);
2053 2078 continue;
2054/* 2079 }
2055 * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers 2080 commit_write = 1;
2056 * 2081 }
2057 * the function goes through all passed space and put actual disk
2058 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
2059 */
2060static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
2061 struct ext4_map_blocks *map)
2062{
2063 struct inode *inode = mpd->inode;
2064 struct address_space *mapping = inode->i_mapping;
2065 int blocks = map->m_len;
2066 sector_t pblock = map->m_pblk, cur_logical;
2067 struct buffer_head *head, *bh;
2068 pgoff_t index, end;
2069 struct pagevec pvec;
2070 int nr_pages, i;
2071
2072 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
2073 end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
2074 cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2075
2076 pagevec_init(&pvec, 0);
2077
2078 while (index <= end) {
2079 /* XXX: optimize tail */
2080 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
2081 if (nr_pages == 0)
2082 break;
2083 for (i = 0; i < nr_pages; i++) {
2084 struct page *page = pvec.pages[i];
2085
2086 index = page->index;
2087 if (index > end)
2088 break;
2089 index++;
2090
2091 BUG_ON(!PageLocked(page));
2092 BUG_ON(PageWriteback(page));
2093 BUG_ON(!page_has_buffers(page));
2094
2095 bh = page_buffers(page);
2096 head = bh;
2097
2098 /* skip blocks out of the range */
2099 do {
2100 if (cur_logical >= map->m_lblk)
2101 break;
2102 cur_logical++;
2103 } while ((bh = bh->b_this_page) != head);
2104 2082
2083 bh = page_bufs = page_buffers(page);
2084 block_start = 0;
2105 do { 2085 do {
2106 if (cur_logical >= map->m_lblk + blocks) 2086 if (!bh)
2107 break; 2087 goto redirty_page;
2108 2088 if (map && (cur_logical >= map->m_lblk) &&
2109 if (buffer_delay(bh) || buffer_unwritten(bh)) { 2089 (cur_logical <= (map->m_lblk +
2110 2090 (map->m_len - 1)))) {
2111 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
2112
2113 if (buffer_delay(bh)) { 2091 if (buffer_delay(bh)) {
2114 clear_buffer_delay(bh); 2092 clear_buffer_delay(bh);
2115 bh->b_blocknr = pblock; 2093 bh->b_blocknr = pblock;
2116 } else {
2117 /*
2118 * unwritten already should have
2119 * blocknr assigned. Verify that
2120 */
2121 clear_buffer_unwritten(bh);
2122 BUG_ON(bh->b_blocknr != pblock);
2123 } 2094 }
2095 if (buffer_unwritten(bh) ||
2096 buffer_mapped(bh))
2097 BUG_ON(bh->b_blocknr != pblock);
2098 if (map->m_flags & EXT4_MAP_UNINIT)
2099 set_buffer_uninit(bh);
2100 clear_buffer_unwritten(bh);
2101 }
2124 2102
2125 } else if (buffer_mapped(bh)) 2103 /* redirty page if block allocation undone */
2126 BUG_ON(bh->b_blocknr != pblock); 2104 if (buffer_delay(bh) || buffer_unwritten(bh))
2127 2105 redirty_page = 1;
2128 if (map->m_flags & EXT4_MAP_UNINIT) 2106 bh = bh->b_this_page;
2129 set_buffer_uninit(bh); 2107 block_start += bh->b_size;
2130 cur_logical++; 2108 cur_logical++;
2131 pblock++; 2109 pblock++;
2132 } while ((bh = bh->b_this_page) != head); 2110 } while (bh != page_bufs);
2111
2112 if (redirty_page)
2113 goto redirty_page;
2114
2115 if (commit_write)
2116 /* mark the buffer_heads as dirty & uptodate */
2117 block_commit_write(page, 0, len);
2118
2119 /*
2120 * Delalloc doesn't support data journalling,
2121 * but eventually maybe we'll lift this
2122 * restriction.
2123 */
2124 if (unlikely(journal_data && PageChecked(page)))
2125 err = __ext4_journalled_writepage(page, len);
2126 else
2127 err = ext4_bio_write_page(&io_submit, page,
2128 len, mpd->wbc);
2129
2130 if (!err)
2131 mpd->pages_written++;
2132 /*
2133 * In error case, we have to continue because
2134 * remaining pages are still locked
2135 */
2136 if (ret == 0)
2137 ret = err;
2133 } 2138 }
2134 pagevec_release(&pvec); 2139 pagevec_release(&pvec);
2135 } 2140 }
2141 ext4_io_submit(&io_submit);
2142 return ret;
2136} 2143}
2137 2144
2138
2139static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2145static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2140 sector_t logical, long blk_cnt) 2146 sector_t logical, long blk_cnt)
2141{ 2147{
@@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode)
2187} 2193}
2188 2194
2189/* 2195/*
2190 * mpage_da_map_blocks - go through given space 2196 * mpage_da_map_and_submit - go through given space, map them
2197 * if necessary, and then submit them for I/O
2191 * 2198 *
2192 * @mpd - bh describing space 2199 * @mpd - bh describing space
2193 * 2200 *
2194 * The function skips space we know is already mapped to disk blocks. 2201 * The function skips space we know is already mapped to disk blocks.
2195 * 2202 *
2196 */ 2203 */
2197static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2204static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2198{ 2205{
2199 int err, blks, get_blocks_flags; 2206 int err, blks, get_blocks_flags;
2200 struct ext4_map_blocks map; 2207 struct ext4_map_blocks map, *mapp = NULL;
2201 sector_t next = mpd->b_blocknr; 2208 sector_t next = mpd->b_blocknr;
2202 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; 2209 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
2203 loff_t disksize = EXT4_I(mpd->inode)->i_disksize; 2210 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
2204 handle_t *handle = NULL; 2211 handle_t *handle = NULL;
2205 2212
2206 /* 2213 /*
2207 * We consider only non-mapped and non-allocated blocks 2214 * If the blocks are mapped already, or we couldn't accumulate
2208 */ 2215 * any blocks, then proceed immediately to the submission stage.
2209 if ((mpd->b_state & (1 << BH_Mapped)) &&
2210 !(mpd->b_state & (1 << BH_Delay)) &&
2211 !(mpd->b_state & (1 << BH_Unwritten)))
2212 return 0;
2213
2214 /*
2215 * If we didn't accumulate anything to write simply return
2216 */ 2216 */
2217 if (!mpd->b_size) 2217 if ((mpd->b_size == 0) ||
2218 return 0; 2218 ((mpd->b_state & (1 << BH_Mapped)) &&
2219 !(mpd->b_state & (1 << BH_Delay)) &&
2220 !(mpd->b_state & (1 << BH_Unwritten))))
2221 goto submit_io;
2219 2222
2220 handle = ext4_journal_current_handle(); 2223 handle = ext4_journal_current_handle();
2221 BUG_ON(!handle); 2224 BUG_ON(!handle);
@@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2252 2255
2253 err = blks; 2256 err = blks;
2254 /* 2257 /*
2255 * If get block returns with error we simply 2258 * If get block returns EAGAIN or ENOSPC and there
2256 * return. Later writepage will redirty the page and 2259 * appears to be free blocks we will call
2257 * writepages will find the dirty page again 2260 * ext4_writepage() for all of the pages which will
2261 * just redirty the pages.
2258 */ 2262 */
2259 if (err == -EAGAIN) 2263 if (err == -EAGAIN)
2260 return 0; 2264 goto submit_io;
2261 2265
2262 if (err == -ENOSPC && 2266 if (err == -ENOSPC &&
2263 ext4_count_free_blocks(sb)) { 2267 ext4_count_free_blocks(sb)) {
2264 mpd->retval = err; 2268 mpd->retval = err;
2265 return 0; 2269 goto submit_io;
2266 } 2270 }
2267 2271
2268 /* 2272 /*
@@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2287 /* invalidate all the pages */ 2291 /* invalidate all the pages */
2288 ext4_da_block_invalidatepages(mpd, next, 2292 ext4_da_block_invalidatepages(mpd, next,
2289 mpd->b_size >> mpd->inode->i_blkbits); 2293 mpd->b_size >> mpd->inode->i_blkbits);
2290 return err; 2294 return;
2291 } 2295 }
2292 BUG_ON(blks == 0); 2296 BUG_ON(blks == 0);
2293 2297
2298 mapp = &map;
2294 if (map.m_flags & EXT4_MAP_NEW) { 2299 if (map.m_flags & EXT4_MAP_NEW) {
2295 struct block_device *bdev = mpd->inode->i_sb->s_bdev; 2300 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
2296 int i; 2301 int i;
@@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2299 unmap_underlying_metadata(bdev, map.m_pblk + i); 2304 unmap_underlying_metadata(bdev, map.m_pblk + i);
2300 } 2305 }
2301 2306
2302 /*
2303 * If blocks are delayed marked, we need to
2304 * put actual blocknr and drop delayed bit
2305 */
2306 if ((mpd->b_state & (1 << BH_Delay)) ||
2307 (mpd->b_state & (1 << BH_Unwritten)))
2308 mpage_put_bnr_to_bhs(mpd, &map);
2309
2310 if (ext4_should_order_data(mpd->inode)) { 2307 if (ext4_should_order_data(mpd->inode)) {
2311 err = ext4_jbd2_file_inode(handle, mpd->inode); 2308 err = ext4_jbd2_file_inode(handle, mpd->inode);
2312 if (err) 2309 if (err)
2313 return err; 2310 /* This only happens if the journal is aborted */
2311 return;
2314 } 2312 }
2315 2313
2316 /* 2314 /*
@@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2321 disksize = i_size_read(mpd->inode); 2319 disksize = i_size_read(mpd->inode);
2322 if (disksize > EXT4_I(mpd->inode)->i_disksize) { 2320 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
2323 ext4_update_i_disksize(mpd->inode, disksize); 2321 ext4_update_i_disksize(mpd->inode, disksize);
2324 return ext4_mark_inode_dirty(handle, mpd->inode); 2322 err = ext4_mark_inode_dirty(handle, mpd->inode);
2323 if (err)
2324 ext4_error(mpd->inode->i_sb,
2325 "Failed to mark inode %lu dirty",
2326 mpd->inode->i_ino);
2325 } 2327 }
2326 2328
2327 return 0; 2329submit_io:
2330 mpage_da_submit_io(mpd, mapp);
2331 mpd->io_done = 1;
2328} 2332}
2329 2333
2330#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ 2334#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2401,9 +2405,7 @@ flush_it:
2401 * We couldn't merge the block to our extent, so we 2405 * We couldn't merge the block to our extent, so we
2402 * need to flush current extent and start new one 2406 * need to flush current extent and start new one
2403 */ 2407 */
2404 if (mpage_da_map_blocks(mpd) == 0) 2408 mpage_da_map_and_submit(mpd);
2405 mpage_da_submit_io(mpd);
2406 mpd->io_done = 1;
2407 return; 2409 return;
2408} 2410}
2409 2411
@@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2422 * The function finds extents of pages and scan them for all blocks. 2424 * The function finds extents of pages and scan them for all blocks.
2423 */ 2425 */
2424static int __mpage_da_writepage(struct page *page, 2426static int __mpage_da_writepage(struct page *page,
2425 struct writeback_control *wbc, void *data) 2427 struct writeback_control *wbc,
2428 struct mpage_da_data *mpd)
2426{ 2429{
2427 struct mpage_da_data *mpd = data;
2428 struct inode *inode = mpd->inode; 2430 struct inode *inode = mpd->inode;
2429 struct buffer_head *bh, *head; 2431 struct buffer_head *bh, *head;
2430 sector_t logical; 2432 sector_t logical;
@@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page,
2435 if (mpd->next_page != page->index) { 2437 if (mpd->next_page != page->index) {
2436 /* 2438 /*
2437 * Nope, we can't. So, we map non-allocated blocks 2439 * Nope, we can't. So, we map non-allocated blocks
2438 * and start IO on them using writepage() 2440 * and start IO on them
2439 */ 2441 */
2440 if (mpd->next_page != mpd->first_page) { 2442 if (mpd->next_page != mpd->first_page) {
2441 if (mpage_da_map_blocks(mpd) == 0) 2443 mpage_da_map_and_submit(mpd);
2442 mpage_da_submit_io(mpd);
2443 /* 2444 /*
2444 * skip rest of the page in the page_vec 2445 * skip rest of the page in the page_vec
2445 */ 2446 */
2446 mpd->io_done = 1;
2447 redirty_page_for_writepage(wbc, page); 2447 redirty_page_for_writepage(wbc, page);
2448 unlock_page(page); 2448 unlock_page(page);
2449 return MPAGE_DA_EXTENT_TAIL; 2449 return MPAGE_DA_EXTENT_TAIL;
@@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2550 if (buffer_delay(bh)) 2550 if (buffer_delay(bh))
2551 return 0; /* Not sure this could or should happen */ 2551 return 0; /* Not sure this could or should happen */
2552 /* 2552 /*
2553 * XXX: __block_prepare_write() unmaps passed block, 2553 * XXX: __block_write_begin() unmaps passed block, is it OK?
2554 * is it OK?
2555 */ 2554 */
2556 ret = ext4_da_reserve_space(inode, iblock); 2555 ret = ext4_da_reserve_space(inode, iblock);
2557 if (ret) 2556 if (ret)
@@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2583/* 2582/*
2584 * This function is used as a standard get_block_t calback function 2583 * This function is used as a standard get_block_t calback function
2585 * when there is no desire to allocate any blocks. It is used as a 2584 * when there is no desire to allocate any blocks. It is used as a
2586 * callback function for block_prepare_write() and block_write_full_page(). 2585 * callback function for block_write_begin() and block_write_full_page().
2587 * These functions should only try to map a single block at a time. 2586 * These functions should only try to map a single block at a time.
2588 * 2587 *
2589 * Since this function doesn't do block allocations even if the caller 2588 * Since this function doesn't do block allocations even if the caller
@@ -2623,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page,
2623 int ret = 0; 2622 int ret = 0;
2624 int err; 2623 int err;
2625 2624
2625 ClearPageChecked(page);
2626 page_bufs = page_buffers(page); 2626 page_bufs = page_buffers(page);
2627 BUG_ON(!page_bufs); 2627 BUG_ON(!page_bufs);
2628 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); 2628 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2700,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2700static int ext4_writepage(struct page *page, 2700static int ext4_writepage(struct page *page,
2701 struct writeback_control *wbc) 2701 struct writeback_control *wbc)
2702{ 2702{
2703 int ret = 0; 2703 int ret = 0, commit_write = 0;
2704 loff_t size; 2704 loff_t size;
2705 unsigned int len; 2705 unsigned int len;
2706 struct buffer_head *page_bufs = NULL; 2706 struct buffer_head *page_bufs = NULL;
@@ -2713,71 +2713,44 @@ static int ext4_writepage(struct page *page,
2713 else 2713 else
2714 len = PAGE_CACHE_SIZE; 2714 len = PAGE_CACHE_SIZE;
2715 2715
2716 if (page_has_buffers(page)) { 2716 /*
2717 page_bufs = page_buffers(page); 2717 * If the page does not have buffers (for whatever reason),
2718 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2718 * try to create them using __block_write_begin. If this
2719 ext4_bh_delay_or_unwritten)) { 2719 * fails, redirty the page and move on.
2720 /* 2720 */
2721 * We don't want to do block allocation 2721 if (!page_has_buffers(page)) {
2722 * So redirty the page and return 2722 if (__block_write_begin(page, 0, len,
2723 * We may reach here when we do a journal commit 2723 noalloc_get_block_write)) {
2724 * via journal_submit_inode_data_buffers. 2724 redirty_page:
2725 * If we don't have mapping block we just ignore
2726 * them. We can also reach here via shrink_page_list
2727 */
2728 redirty_page_for_writepage(wbc, page); 2725 redirty_page_for_writepage(wbc, page);
2729 unlock_page(page); 2726 unlock_page(page);
2730 return 0; 2727 return 0;
2731 } 2728 }
2732 } else { 2729 commit_write = 1;
2730 }
2731 page_bufs = page_buffers(page);
2732 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2733 ext4_bh_delay_or_unwritten)) {
2733 /* 2734 /*
2734 * The test for page_has_buffers() is subtle: 2735 * We don't want to do block allocation, so redirty
2735 * We know the page is dirty but it lost buffers. That means 2736 * the page and return. We may reach here when we do
2736 * that at some moment in time after write_begin()/write_end() 2737 * a journal commit via journal_submit_inode_data_buffers.
2737 * has been called all buffers have been clean and thus they 2738 * We can also reach here via shrink_page_list
2738 * must have been written at least once. So they are all
2739 * mapped and we can happily proceed with mapping them
2740 * and writing the page.
2741 *
2742 * Try to initialize the buffer_heads and check whether
2743 * all are mapped and non delay. We don't want to
2744 * do block allocation here.
2745 */ 2739 */
2746 ret = block_prepare_write(page, 0, len, 2740 goto redirty_page;
2747 noalloc_get_block_write); 2741 }
2748 if (!ret) { 2742 if (commit_write)
2749 page_bufs = page_buffers(page);
2750 /* check whether all are mapped and non delay */
2751 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2752 ext4_bh_delay_or_unwritten)) {
2753 redirty_page_for_writepage(wbc, page);
2754 unlock_page(page);
2755 return 0;
2756 }
2757 } else {
2758 /*
2759 * We can't do block allocation here
2760 * so just redity the page and unlock
2761 * and return
2762 */
2763 redirty_page_for_writepage(wbc, page);
2764 unlock_page(page);
2765 return 0;
2766 }
2767 /* now mark the buffer_heads as dirty and uptodate */ 2743 /* now mark the buffer_heads as dirty and uptodate */
2768 block_commit_write(page, 0, len); 2744 block_commit_write(page, 0, len);
2769 }
2770 2745
2771 if (PageChecked(page) && ext4_should_journal_data(inode)) { 2746 if (PageChecked(page) && ext4_should_journal_data(inode))
2772 /* 2747 /*
2773 * It's mmapped pagecache. Add buffers and journal it. There 2748 * It's mmapped pagecache. Add buffers and journal it. There
2774 * doesn't seem much point in redirtying the page here. 2749 * doesn't seem much point in redirtying the page here.
2775 */ 2750 */
2776 ClearPageChecked(page);
2777 return __ext4_journalled_writepage(page, len); 2751 return __ext4_journalled_writepage(page, len);
2778 }
2779 2752
2780 if (page_bufs && buffer_uninit(page_bufs)) { 2753 if (buffer_uninit(page_bufs)) {
2781 ext4_set_bh_endio(page_bufs, inode); 2754 ext4_set_bh_endio(page_bufs, inode);
2782 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2755 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2783 wbc, ext4_end_io_buffer_write); 2756 wbc, ext4_end_io_buffer_write);
@@ -2824,25 +2797,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2824 */ 2797 */
2825static int write_cache_pages_da(struct address_space *mapping, 2798static int write_cache_pages_da(struct address_space *mapping,
2826 struct writeback_control *wbc, 2799 struct writeback_control *wbc,
2827 struct mpage_da_data *mpd) 2800 struct mpage_da_data *mpd,
2801 pgoff_t *done_index)
2828{ 2802{
2829 int ret = 0; 2803 int ret = 0;
2830 int done = 0; 2804 int done = 0;
2831 struct pagevec pvec; 2805 struct pagevec pvec;
2832 int nr_pages; 2806 unsigned nr_pages;
2833 pgoff_t index; 2807 pgoff_t index;
2834 pgoff_t end; /* Inclusive */ 2808 pgoff_t end; /* Inclusive */
2835 long nr_to_write = wbc->nr_to_write; 2809 long nr_to_write = wbc->nr_to_write;
2810 int tag;
2836 2811
2837 pagevec_init(&pvec, 0); 2812 pagevec_init(&pvec, 0);
2838 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2813 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2839 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2814 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2840 2815
2816 if (wbc->sync_mode == WB_SYNC_ALL)
2817 tag = PAGECACHE_TAG_TOWRITE;
2818 else
2819 tag = PAGECACHE_TAG_DIRTY;
2820
2821 *done_index = index;
2841 while (!done && (index <= end)) { 2822 while (!done && (index <= end)) {
2842 int i; 2823 int i;
2843 2824
2844 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2825 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2845 PAGECACHE_TAG_DIRTY,
2846 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2826 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2847 if (nr_pages == 0) 2827 if (nr_pages == 0)
2848 break; 2828 break;
@@ -2862,6 +2842,8 @@ static int write_cache_pages_da(struct address_space *mapping,
2862 break; 2842 break;
2863 } 2843 }
2864 2844
2845 *done_index = page->index + 1;
2846
2865 lock_page(page); 2847 lock_page(page);
2866 2848
2867 /* 2849 /*
@@ -2947,6 +2929,8 @@ static int ext4_da_writepages(struct address_space *mapping,
2947 long desired_nr_to_write, nr_to_writebump = 0; 2929 long desired_nr_to_write, nr_to_writebump = 0;
2948 loff_t range_start = wbc->range_start; 2930 loff_t range_start = wbc->range_start;
2949 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2931 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2932 pgoff_t done_index = 0;
2933 pgoff_t end;
2950 2934
2951 trace_ext4_da_writepages(inode, wbc); 2935 trace_ext4_da_writepages(inode, wbc);
2952 2936
@@ -2982,8 +2966,11 @@ static int ext4_da_writepages(struct address_space *mapping,
2982 wbc->range_start = index << PAGE_CACHE_SHIFT; 2966 wbc->range_start = index << PAGE_CACHE_SHIFT;
2983 wbc->range_end = LLONG_MAX; 2967 wbc->range_end = LLONG_MAX;
2984 wbc->range_cyclic = 0; 2968 wbc->range_cyclic = 0;
2985 } else 2969 end = -1;
2970 } else {
2986 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2971 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2972 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2973 }
2987 2974
2988 /* 2975 /*
2989 * This works around two forms of stupidity. The first is in 2976 * This works around two forms of stupidity. The first is in
@@ -3002,9 +2989,12 @@ static int ext4_da_writepages(struct address_space *mapping,
3002 * sbi->max_writeback_mb_bump whichever is smaller. 2989 * sbi->max_writeback_mb_bump whichever is smaller.
3003 */ 2990 */
3004 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); 2991 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
3005 if (!range_cyclic && range_whole) 2992 if (!range_cyclic && range_whole) {
3006 desired_nr_to_write = wbc->nr_to_write * 8; 2993 if (wbc->nr_to_write == LONG_MAX)
3007 else 2994 desired_nr_to_write = wbc->nr_to_write;
2995 else
2996 desired_nr_to_write = wbc->nr_to_write * 8;
2997 } else
3008 desired_nr_to_write = ext4_num_dirty_pages(inode, index, 2998 desired_nr_to_write = ext4_num_dirty_pages(inode, index,
3009 max_pages); 2999 max_pages);
3010 if (desired_nr_to_write > max_pages) 3000 if (desired_nr_to_write > max_pages)
@@ -3021,6 +3011,9 @@ static int ext4_da_writepages(struct address_space *mapping,
3021 pages_skipped = wbc->pages_skipped; 3011 pages_skipped = wbc->pages_skipped;
3022 3012
3023retry: 3013retry:
3014 if (wbc->sync_mode == WB_SYNC_ALL)
3015 tag_pages_for_writeback(mapping, index, end);
3016
3024 while (!ret && wbc->nr_to_write > 0) { 3017 while (!ret && wbc->nr_to_write > 0) {
3025 3018
3026 /* 3019 /*
@@ -3059,16 +3052,14 @@ retry:
3059 mpd.io_done = 0; 3052 mpd.io_done = 0;
3060 mpd.pages_written = 0; 3053 mpd.pages_written = 0;
3061 mpd.retval = 0; 3054 mpd.retval = 0;
3062 ret = write_cache_pages_da(mapping, wbc, &mpd); 3055 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
3063 /* 3056 /*
3064 * If we have a contiguous extent of pages and we 3057 * If we have a contiguous extent of pages and we
3065 * haven't done the I/O yet, map the blocks and submit 3058 * haven't done the I/O yet, map the blocks and submit
3066 * them for I/O. 3059 * them for I/O.
3067 */ 3060 */
3068 if (!mpd.io_done && mpd.next_page != mpd.first_page) { 3061 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
3069 if (mpage_da_map_blocks(&mpd) == 0) 3062 mpage_da_map_and_submit(&mpd);
3070 mpage_da_submit_io(&mpd);
3071 mpd.io_done = 1;
3072 ret = MPAGE_DA_EXTENT_TAIL; 3063 ret = MPAGE_DA_EXTENT_TAIL;
3073 } 3064 }
3074 trace_ext4_da_write_pages(inode, &mpd); 3065 trace_ext4_da_write_pages(inode, &mpd);
@@ -3115,14 +3106,13 @@ retry:
3115 __func__, wbc->nr_to_write, ret); 3106 __func__, wbc->nr_to_write, ret);
3116 3107
3117 /* Update index */ 3108 /* Update index */
3118 index += pages_written;
3119 wbc->range_cyclic = range_cyclic; 3109 wbc->range_cyclic = range_cyclic;
3120 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 3110 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3121 /* 3111 /*
3122 * set the writeback_index so that range_cyclic 3112 * set the writeback_index so that range_cyclic
3123 * mode will write it back later 3113 * mode will write it back later
3124 */ 3114 */
3125 mapping->writeback_index = index; 3115 mapping->writeback_index = done_index;
3126 3116
3127out_writepages: 3117out_writepages:
3128 wbc->nr_to_write -= nr_to_writebump; 3118 wbc->nr_to_write -= nr_to_writebump;
@@ -3457,15 +3447,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3457 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3447 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3458} 3448}
3459 3449
3460static void ext4_free_io_end(ext4_io_end_t *io)
3461{
3462 BUG_ON(!io);
3463 if (io->page)
3464 put_page(io->page);
3465 iput(io->inode);
3466 kfree(io);
3467}
3468
3469static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) 3450static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3470{ 3451{
3471 struct buffer_head *head, *bh; 3452 struct buffer_head *head, *bh;
@@ -3642,173 +3623,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3642 EXT4_GET_BLOCKS_IO_CREATE_EXT); 3623 EXT4_GET_BLOCKS_IO_CREATE_EXT);
3643} 3624}
3644 3625
3645static void dump_completed_IO(struct inode * inode)
3646{
3647#ifdef EXT4_DEBUG
3648 struct list_head *cur, *before, *after;
3649 ext4_io_end_t *io, *io0, *io1;
3650 unsigned long flags;
3651
3652 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3653 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3654 return;
3655 }
3656
3657 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3658 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3659 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3660 cur = &io->list;
3661 before = cur->prev;
3662 io0 = container_of(before, ext4_io_end_t, list);
3663 after = cur->next;
3664 io1 = container_of(after, ext4_io_end_t, list);
3665
3666 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3667 io, inode->i_ino, io0, io1);
3668 }
3669 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3670#endif
3671}
3672
3673/*
3674 * check a range of space and convert unwritten extents to written.
3675 */
3676static int ext4_end_io_nolock(ext4_io_end_t *io)
3677{
3678 struct inode *inode = io->inode;
3679 loff_t offset = io->offset;
3680 ssize_t size = io->size;
3681 int ret = 0;
3682
3683 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3684 "list->prev 0x%p\n",
3685 io, inode->i_ino, io->list.next, io->list.prev);
3686
3687 if (list_empty(&io->list))
3688 return ret;
3689
3690 if (io->flag != EXT4_IO_UNWRITTEN)
3691 return ret;
3692
3693 ret = ext4_convert_unwritten_extents(inode, offset, size);
3694 if (ret < 0) {
3695 printk(KERN_EMERG "%s: failed to convert unwritten"
3696 "extents to written extents, error is %d"
3697 " io is still on inode %lu aio dio list\n",
3698 __func__, ret, inode->i_ino);
3699 return ret;
3700 }
3701
3702 if (io->iocb)
3703 aio_complete(io->iocb, io->result, 0);
3704 /* clear the DIO AIO unwritten flag */
3705 io->flag = 0;
3706 return ret;
3707}
3708
3709/*
3710 * work on completed aio dio IO, to convert unwritten extents to extents
3711 */
3712static void ext4_end_io_work(struct work_struct *work)
3713{
3714 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3715 struct inode *inode = io->inode;
3716 struct ext4_inode_info *ei = EXT4_I(inode);
3717 unsigned long flags;
3718 int ret;
3719
3720 mutex_lock(&inode->i_mutex);
3721 ret = ext4_end_io_nolock(io);
3722 if (ret < 0) {
3723 mutex_unlock(&inode->i_mutex);
3724 return;
3725 }
3726
3727 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3728 if (!list_empty(&io->list))
3729 list_del_init(&io->list);
3730 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3731 mutex_unlock(&inode->i_mutex);
3732 ext4_free_io_end(io);
3733}
3734
3735/*
3736 * This function is called from ext4_sync_file().
3737 *
3738 * When IO is completed, the work to convert unwritten extents to
3739 * written is queued on workqueue but may not get immediately
3740 * scheduled. When fsync is called, we need to ensure the
3741 * conversion is complete before fsync returns.
3742 * The inode keeps track of a list of pending/completed IO that
3743 * might needs to do the conversion. This function walks through
3744 * the list and convert the related unwritten extents for completed IO
3745 * to written.
3746 * The function return the number of pending IOs on success.
3747 */
3748int flush_completed_IO(struct inode *inode)
3749{
3750 ext4_io_end_t *io;
3751 struct ext4_inode_info *ei = EXT4_I(inode);
3752 unsigned long flags;
3753 int ret = 0;
3754 int ret2 = 0;
3755
3756 if (list_empty(&ei->i_completed_io_list))
3757 return ret;
3758
3759 dump_completed_IO(inode);
3760 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3761 while (!list_empty(&ei->i_completed_io_list)){
3762 io = list_entry(ei->i_completed_io_list.next,
3763 ext4_io_end_t, list);
3764 /*
3765 * Calling ext4_end_io_nolock() to convert completed
3766 * IO to written.
3767 *
3768 * When ext4_sync_file() is called, run_queue() may already
3769 * about to flush the work corresponding to this io structure.
3770 * It will be upset if it founds the io structure related
3771 * to the work-to-be schedule is freed.
3772 *
3773 * Thus we need to keep the io structure still valid here after
3774 * convertion finished. The io structure has a flag to
3775 * avoid double converting from both fsync and background work
3776 * queue work.
3777 */
3778 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3779 ret = ext4_end_io_nolock(io);
3780 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3781 if (ret < 0)
3782 ret2 = ret;
3783 else
3784 list_del_init(&io->list);
3785 }
3786 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3787 return (ret2 < 0) ? ret2 : 0;
3788}
3789
3790static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3791{
3792 ext4_io_end_t *io = NULL;
3793
3794 io = kmalloc(sizeof(*io), flags);
3795
3796 if (io) {
3797 igrab(inode);
3798 io->inode = inode;
3799 io->flag = 0;
3800 io->offset = 0;
3801 io->size = 0;
3802 io->page = NULL;
3803 io->iocb = NULL;
3804 io->result = 0;
3805 INIT_WORK(&io->work, ext4_end_io_work);
3806 INIT_LIST_HEAD(&io->list);
3807 }
3808
3809 return io;
3810}
3811
3812static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3626static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3813 ssize_t size, void *private, int ret, 3627 ssize_t size, void *private, int ret,
3814 bool is_async) 3628 bool is_async)
@@ -3828,7 +3642,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3828 size); 3642 size);
3829 3643
3830 /* if not aio dio with unwritten extents, just free io and return */ 3644 /* if not aio dio with unwritten extents, just free io and return */
3831 if (io_end->flag != EXT4_IO_UNWRITTEN){ 3645 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3832 ext4_free_io_end(io_end); 3646 ext4_free_io_end(io_end);
3833 iocb->private = NULL; 3647 iocb->private = NULL;
3834out: 3648out:
@@ -3845,14 +3659,14 @@ out:
3845 } 3659 }
3846 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3660 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3847 3661
3848 /* queue the work to convert unwritten extents to written */
3849 queue_work(wq, &io_end->work);
3850
3851 /* Add the io_end to per-inode completed aio dio list*/ 3662 /* Add the io_end to per-inode completed aio dio list*/
3852 ei = EXT4_I(io_end->inode); 3663 ei = EXT4_I(io_end->inode);
3853 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 3664 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3854 list_add_tail(&io_end->list, &ei->i_completed_io_list); 3665 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3855 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 3666 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3667
3668 /* queue the work to convert unwritten extents to written */
3669 queue_work(wq, &io_end->work);
3856 iocb->private = NULL; 3670 iocb->private = NULL;
3857} 3671}
3858 3672
@@ -3873,7 +3687,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3873 goto out; 3687 goto out;
3874 } 3688 }
3875 3689
3876 io_end->flag = EXT4_IO_UNWRITTEN; 3690 io_end->flag = EXT4_IO_END_UNWRITTEN;
3877 inode = io_end->inode; 3691 inode = io_end->inode;
3878 3692
3879 /* Add the io_end to per-inode completed io list*/ 3693 /* Add the io_end to per-inode completed io list*/
@@ -5464,6 +5278,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5464{ 5278{
5465 struct inode *inode = dentry->d_inode; 5279 struct inode *inode = dentry->d_inode;
5466 int error, rc = 0; 5280 int error, rc = 0;
5281 int orphan = 0;
5467 const unsigned int ia_valid = attr->ia_valid; 5282 const unsigned int ia_valid = attr->ia_valid;
5468 5283
5469 error = inode_change_ok(inode, attr); 5284 error = inode_change_ok(inode, attr);
@@ -5519,8 +5334,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5519 error = PTR_ERR(handle); 5334 error = PTR_ERR(handle);
5520 goto err_out; 5335 goto err_out;
5521 } 5336 }
5522 5337 if (ext4_handle_valid(handle)) {
5523 error = ext4_orphan_add(handle, inode); 5338 error = ext4_orphan_add(handle, inode);
5339 orphan = 1;
5340 }
5524 EXT4_I(inode)->i_disksize = attr->ia_size; 5341 EXT4_I(inode)->i_disksize = attr->ia_size;
5525 rc = ext4_mark_inode_dirty(handle, inode); 5342 rc = ext4_mark_inode_dirty(handle, inode);
5526 if (!error) 5343 if (!error)
@@ -5538,6 +5355,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5538 goto err_out; 5355 goto err_out;
5539 } 5356 }
5540 ext4_orphan_del(handle, inode); 5357 ext4_orphan_del(handle, inode);
5358 orphan = 0;
5541 ext4_journal_stop(handle); 5359 ext4_journal_stop(handle);
5542 goto err_out; 5360 goto err_out;
5543 } 5361 }
@@ -5560,7 +5378,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5560 * If the call to ext4_truncate failed to get a transaction handle at 5378 * If the call to ext4_truncate failed to get a transaction handle at
5561 * all, we need to clean up the in-core orphan list manually. 5379 * all, we need to clean up the in-core orphan list manually.
5562 */ 5380 */
5563 if (inode->i_nlink) 5381 if (orphan && inode->i_nlink)
5564 ext4_orphan_del(NULL, inode); 5382 ext4_orphan_del(NULL, inode);
5565 5383
5566 if (!rc && (ia_valid & ATTR_MODE)) 5384 if (!rc && (ia_valid & ATTR_MODE))
@@ -5643,7 +5461,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5643 * 5461 *
5644 * Also account for superblock, inode, quota and xattr blocks 5462 * Also account for superblock, inode, quota and xattr blocks
5645 */ 5463 */
5646int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5464static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5647{ 5465{
5648 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 5466 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
5649 int gdpblocks; 5467 int gdpblocks;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 19aa0d44d822..c58eba34724a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
338static struct kmem_cache *ext4_pspace_cachep; 338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep; 339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_ext_cachep; 340static struct kmem_cache *ext4_free_ext_cachep;
341
342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
341static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
342 ext4_group_t group); 350 ext4_group_t group);
343static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -939,6 +947,85 @@ out:
939} 947}
940 948
941/* 949/*
950 * lock the group_info alloc_sem of all the groups
951 * belonging to the same buddy cache page. This
952 * make sure other parallel operation on the buddy
953 * cache doesn't happen whild holding the buddy cache
954 * lock
955 */
956static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
957 ext4_group_t group)
958{
959 int i;
960 int block, pnum;
961 int blocks_per_page;
962 int groups_per_page;
963 ext4_group_t ngroups = ext4_get_groups_count(sb);
964 ext4_group_t first_group;
965 struct ext4_group_info *grp;
966
967 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
968 /*
969 * the buddy cache inode stores the block bitmap
970 * and buddy information in consecutive blocks.
971 * So for each group we need two blocks.
972 */
973 block = group * 2;
974 pnum = block / blocks_per_page;
975 first_group = pnum * blocks_per_page / 2;
976
977 groups_per_page = blocks_per_page >> 1;
978 if (groups_per_page == 0)
979 groups_per_page = 1;
980 /* read all groups the page covers into the cache */
981 for (i = 0; i < groups_per_page; i++) {
982
983 if ((first_group + i) >= ngroups)
984 break;
985 grp = ext4_get_group_info(sb, first_group + i);
986 /* take all groups write allocation
987 * semaphore. This make sure there is
988 * no block allocation going on in any
989 * of that groups
990 */
991 down_write_nested(&grp->alloc_sem, i);
992 }
993 return i;
994}
995
996static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
997 ext4_group_t group, int locked_group)
998{
999 int i;
1000 int block, pnum;
1001 int blocks_per_page;
1002 ext4_group_t first_group;
1003 struct ext4_group_info *grp;
1004
1005 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1006 /*
1007 * the buddy cache inode stores the block bitmap
1008 * and buddy information in consecutive blocks.
1009 * So for each group we need two blocks.
1010 */
1011 block = group * 2;
1012 pnum = block / blocks_per_page;
1013 first_group = pnum * blocks_per_page / 2;
1014 /* release locks on all the groups */
1015 for (i = 0; i < locked_group; i++) {
1016
1017 grp = ext4_get_group_info(sb, first_group + i);
1018 /* take all groups write allocation
1019 * semaphore. This make sure there is
1020 * no block allocation going on in any
1021 * of that groups
1022 */
1023 up_write(&grp->alloc_sem);
1024 }
1025
1026}
1027
1028/*
942 * Locking note: This routine calls ext4_mb_init_cache(), which takes the 1029 * Locking note: This routine calls ext4_mb_init_cache(), which takes the
943 * block group lock of all groups for this page; do not hold the BG lock when 1030 * block group lock of all groups for this page; do not hold the BG lock when
944 * calling this routine! 1031 * calling this routine!
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1915 return 0; 2002 return 0;
1916} 2003}
1917 2004
1918/*
1919 * lock the group_info alloc_sem of all the groups
1920 * belonging to the same buddy cache page. This
1921 * make sure other parallel operation on the buddy
1922 * cache doesn't happen whild holding the buddy cache
1923 * lock
1924 */
1925int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1926{
1927 int i;
1928 int block, pnum;
1929 int blocks_per_page;
1930 int groups_per_page;
1931 ext4_group_t ngroups = ext4_get_groups_count(sb);
1932 ext4_group_t first_group;
1933 struct ext4_group_info *grp;
1934
1935 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1936 /*
1937 * the buddy cache inode stores the block bitmap
1938 * and buddy information in consecutive blocks.
1939 * So for each group we need two blocks.
1940 */
1941 block = group * 2;
1942 pnum = block / blocks_per_page;
1943 first_group = pnum * blocks_per_page / 2;
1944
1945 groups_per_page = blocks_per_page >> 1;
1946 if (groups_per_page == 0)
1947 groups_per_page = 1;
1948 /* read all groups the page covers into the cache */
1949 for (i = 0; i < groups_per_page; i++) {
1950
1951 if ((first_group + i) >= ngroups)
1952 break;
1953 grp = ext4_get_group_info(sb, first_group + i);
1954 /* take all groups write allocation
1955 * semaphore. This make sure there is
1956 * no block allocation going on in any
1957 * of that groups
1958 */
1959 down_write_nested(&grp->alloc_sem, i);
1960 }
1961 return i;
1962}
1963
1964void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1965 ext4_group_t group, int locked_group)
1966{
1967 int i;
1968 int block, pnum;
1969 int blocks_per_page;
1970 ext4_group_t first_group;
1971 struct ext4_group_info *grp;
1972
1973 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1974 /*
1975 * the buddy cache inode stores the block bitmap
1976 * and buddy information in consecutive blocks.
1977 * So for each group we need two blocks.
1978 */
1979 block = group * 2;
1980 pnum = block / blocks_per_page;
1981 first_group = pnum * blocks_per_page / 2;
1982 /* release locks on all the groups */
1983 for (i = 0; i < locked_group; i++) {
1984
1985 grp = ext4_get_group_info(sb, first_group + i);
1986 /* take all groups write allocation
1987 * semaphore. This make sure there is
1988 * no block allocation going on in any
1989 * of that groups
1990 */
1991 up_write(&grp->alloc_sem);
1992 }
1993
1994}
1995
1996static noinline_for_stack int 2005static noinline_for_stack int
1997ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 2006ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1998{ 2007{
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
2233 .release = seq_release, 2242 .release = seq_release,
2234}; 2243};
2235 2244
2245static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2246{
2247 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2248 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2249
2250 BUG_ON(!cachep);
2251 return cachep;
2252}
2236 2253
2237/* Create and initialize ext4_group_info data for the given group. */ 2254/* Create and initialize ext4_group_info data for the given group. */
2238int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2255int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2239 struct ext4_group_desc *desc) 2256 struct ext4_group_desc *desc)
2240{ 2257{
2241 int i, len; 2258 int i;
2242 int metalen = 0; 2259 int metalen = 0;
2243 struct ext4_sb_info *sbi = EXT4_SB(sb); 2260 struct ext4_sb_info *sbi = EXT4_SB(sb);
2244 struct ext4_group_info **meta_group_info; 2261 struct ext4_group_info **meta_group_info;
2262 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2245 2263
2246 /* 2264 /*
2247 * First check if this group is the first of a reserved block. 2265 * First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2261 meta_group_info; 2279 meta_group_info;
2262 } 2280 }
2263 2281
2264 /*
2265 * calculate needed size. if change bb_counters size,
2266 * don't forget about ext4_mb_generate_buddy()
2267 */
2268 len = offsetof(typeof(**meta_group_info),
2269 bb_counters[sb->s_blocksize_bits + 2]);
2270
2271 meta_group_info = 2282 meta_group_info =
2272 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2283 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2273 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2284 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2274 2285
2275 meta_group_info[i] = kzalloc(len, GFP_KERNEL); 2286 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2276 if (meta_group_info[i] == NULL) { 2287 if (meta_group_info[i] == NULL) {
2277 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2288 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2278 goto exit_group_info; 2289 goto exit_group_info;
2279 } 2290 }
2291 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2280 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2292 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2281 &(meta_group_info[i]->bb_state)); 2293 &(meta_group_info[i]->bb_state));
2282 2294
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2331 int num_meta_group_infos_max; 2343 int num_meta_group_infos_max;
2332 int array_size; 2344 int array_size;
2333 struct ext4_group_desc *desc; 2345 struct ext4_group_desc *desc;
2346 struct kmem_cache *cachep;
2334 2347
2335 /* This is the number of blocks used by GDT */ 2348 /* This is the number of blocks used by GDT */
2336 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2349 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2373,6 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2373 printk(KERN_ERR "EXT4-fs: can't get new inode\n"); 2386 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
2374 goto err_freesgi; 2387 goto err_freesgi;
2375 } 2388 }
2389 sbi->s_buddy_cache->i_ino = get_next_ino();
2376 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; 2390 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2377 for (i = 0; i < ngroups; i++) { 2391 for (i = 0; i < ngroups; i++) {
2378 desc = ext4_get_group_desc(sb, i, NULL); 2392 desc = ext4_get_group_desc(sb, i, NULL);
@@ -2388,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
2388 return 0; 2402 return 0;
2389 2403
2390err_freebuddy: 2404err_freebuddy:
2405 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2391 while (i-- > 0) 2406 while (i-- > 0)
2392 kfree(ext4_get_group_info(sb, i)); 2407 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2393 i = num_meta_group_infos; 2408 i = num_meta_group_infos;
2394 while (i-- > 0) 2409 while (i-- > 0)
2395 kfree(sbi->s_group_info[i]); 2410 kfree(sbi->s_group_info[i]);
@@ -2406,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2406 unsigned offset; 2421 unsigned offset;
2407 unsigned max; 2422 unsigned max;
2408 int ret; 2423 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2409 2427
2410 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2411 2429
2412 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2430 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2413 if (sbi->s_mb_offsets == NULL) { 2431 if (sbi->s_mb_offsets == NULL) {
2414 return -ENOMEM; 2432 ret = -ENOMEM;
2433 goto out;
2415 } 2434 }
2416 2435
2417 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); 2436 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2418 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2437 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2419 if (sbi->s_mb_maxs == NULL) { 2438 if (sbi->s_mb_maxs == NULL) {
2420 kfree(sbi->s_mb_offsets); 2439 ret = -ENOMEM;
2421 return -ENOMEM; 2440 goto out;
2441 }
2442
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2444 cachep = ext4_groupinfo_caches[cache_index];
2445 if (!cachep) {
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2422 } 2466 }
2423 2467
2424 /* order 0 is regular bitmap */ 2468 /* order 0 is regular bitmap */
@@ -2439,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2439 /* init file for buddy data */ 2483 /* init file for buddy data */
2440 ret = ext4_mb_init_backend(sb); 2484 ret = ext4_mb_init_backend(sb);
2441 if (ret != 0) { 2485 if (ret != 0) {
2442 kfree(sbi->s_mb_offsets); 2486 goto out;
2443 kfree(sbi->s_mb_maxs);
2444 return ret;
2445 } 2487 }
2446 2488
2447 spin_lock_init(&sbi->s_md_lock); 2489 spin_lock_init(&sbi->s_md_lock);
@@ -2456,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2456 2498
2457 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2499 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2458 if (sbi->s_locality_groups == NULL) { 2500 if (sbi->s_locality_groups == NULL) {
2459 kfree(sbi->s_mb_offsets); 2501 ret = -ENOMEM;
2460 kfree(sbi->s_mb_maxs); 2502 goto out;
2461 return -ENOMEM;
2462 } 2503 }
2463 for_each_possible_cpu(i) { 2504 for_each_possible_cpu(i) {
2464 struct ext4_locality_group *lg; 2505 struct ext4_locality_group *lg;
@@ -2475,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2475 2516
2476 if (sbi->s_journal) 2517 if (sbi->s_journal)
2477 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2518 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2478 return 0; 2519out:
2520 if (ret) {
2521 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 }
2525 return ret;
2479} 2526}
2480 2527
2481/* need to called with the ext4 group lock held */ 2528/* need to called with the ext4 group lock held */
@@ -2503,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
2503 int num_meta_group_infos; 2550 int num_meta_group_infos;
2504 struct ext4_group_info *grinfo; 2551 struct ext4_group_info *grinfo;
2505 struct ext4_sb_info *sbi = EXT4_SB(sb); 2552 struct ext4_sb_info *sbi = EXT4_SB(sb);
2553 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2506 2554
2507 if (sbi->s_group_info) { 2555 if (sbi->s_group_info) {
2508 for (i = 0; i < ngroups; i++) { 2556 for (i = 0; i < ngroups; i++) {
@@ -2513,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
2513 ext4_lock_group(sb, i); 2561 ext4_lock_group(sb, i);
2514 ext4_mb_cleanup_pa(grinfo); 2562 ext4_mb_cleanup_pa(grinfo);
2515 ext4_unlock_group(sb, i); 2563 ext4_unlock_group(sb, i);
2516 kfree(grinfo); 2564 kmem_cache_free(cachep, grinfo);
2517 } 2565 }
2518 num_meta_group_infos = (ngroups + 2566 num_meta_group_infos = (ngroups +
2519 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2567 EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2557,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
2557 return 0; 2605 return 0;
2558} 2606}
2559 2607
2560static inline void ext4_issue_discard(struct super_block *sb, 2608static inline int ext4_issue_discard(struct super_block *sb,
2561 ext4_group_t block_group, ext4_grpblk_t block, int count) 2609 ext4_group_t block_group, ext4_grpblk_t block, int count)
2562{ 2610{
2563 int ret; 2611 int ret;
@@ -2567,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
2567 trace_ext4_discard_blocks(sb, 2615 trace_ext4_discard_blocks(sb,
2568 (unsigned long long) discard_block, count); 2616 (unsigned long long) discard_block, count);
2569 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2617 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2570 if (ret == EOPNOTSUPP) { 2618 if (ret == -EOPNOTSUPP) {
2571 ext4_warning(sb, "discard not supported, disabling"); 2619 ext4_warning(sb, "discard not supported, disabling");
2572 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); 2620 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2573 } 2621 }
2622 return ret;
2574} 2623}
2575 2624
2576/* 2625/*
@@ -2658,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
2658 2707
2659#endif 2708#endif
2660 2709
2661int __init init_ext4_mballoc(void) 2710int __init ext4_init_mballoc(void)
2662{ 2711{
2663 ext4_pspace_cachep = 2712 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2664 kmem_cache_create("ext4_prealloc_space", 2713 SLAB_RECLAIM_ACCOUNT);
2665 sizeof(struct ext4_prealloc_space),
2666 0, SLAB_RECLAIM_ACCOUNT, NULL);
2667 if (ext4_pspace_cachep == NULL) 2714 if (ext4_pspace_cachep == NULL)
2668 return -ENOMEM; 2715 return -ENOMEM;
2669 2716
2670 ext4_ac_cachep = 2717 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2671 kmem_cache_create("ext4_alloc_context", 2718 SLAB_RECLAIM_ACCOUNT);
2672 sizeof(struct ext4_allocation_context),
2673 0, SLAB_RECLAIM_ACCOUNT, NULL);
2674 if (ext4_ac_cachep == NULL) { 2719 if (ext4_ac_cachep == NULL) {
2675 kmem_cache_destroy(ext4_pspace_cachep); 2720 kmem_cache_destroy(ext4_pspace_cachep);
2676 return -ENOMEM; 2721 return -ENOMEM;
2677 } 2722 }
2678 2723
2679 ext4_free_ext_cachep = 2724 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2680 kmem_cache_create("ext4_free_block_extents", 2725 SLAB_RECLAIM_ACCOUNT);
2681 sizeof(struct ext4_free_data),
2682 0, SLAB_RECLAIM_ACCOUNT, NULL);
2683 if (ext4_free_ext_cachep == NULL) { 2726 if (ext4_free_ext_cachep == NULL) {
2684 kmem_cache_destroy(ext4_pspace_cachep); 2727 kmem_cache_destroy(ext4_pspace_cachep);
2685 kmem_cache_destroy(ext4_ac_cachep); 2728 kmem_cache_destroy(ext4_ac_cachep);
@@ -2689,8 +2732,9 @@ int __init init_ext4_mballoc(void)
2689 return 0; 2732 return 0;
2690} 2733}
2691 2734
2692void exit_ext4_mballoc(void) 2735void ext4_exit_mballoc(void)
2693{ 2736{
2737 int i;
2694 /* 2738 /*
2695 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2696 * before destroying the slab cache. 2740 * before destroying the slab cache.
@@ -2699,6 +2743,15 @@ void exit_ext4_mballoc(void)
2699 kmem_cache_destroy(ext4_pspace_cachep); 2743 kmem_cache_destroy(ext4_pspace_cachep);
2700 kmem_cache_destroy(ext4_ac_cachep); 2744 kmem_cache_destroy(ext4_ac_cachep);
2701 kmem_cache_destroy(ext4_free_ext_cachep); 2745 kmem_cache_destroy(ext4_free_ext_cachep);
2746
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2702 ext4_remove_debugfs_entry(); 2755 ext4_remove_debugfs_entry();
2703} 2756}
2704 2757
@@ -3535,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3535 */ 3588 */
3536static noinline_for_stack int 3589static noinline_for_stack int
3537ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, 3590ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3538 struct ext4_prealloc_space *pa, 3591 struct ext4_prealloc_space *pa)
3539 struct ext4_allocation_context *ac)
3540{ 3592{
3541 struct super_block *sb = e4b->bd_sb; 3593 struct super_block *sb = e4b->bd_sb;
3542 struct ext4_sb_info *sbi = EXT4_SB(sb); 3594 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3554,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3554 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3606 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3555 end = bit + pa->pa_len; 3607 end = bit + pa->pa_len;
3556 3608
3557 if (ac) {
3558 ac->ac_sb = sb;
3559 ac->ac_inode = pa->pa_inode;
3560 }
3561
3562 while (bit < end) { 3609 while (bit < end) {
3563 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); 3610 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3564 if (bit >= end) 3611 if (bit >= end)
@@ -3569,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3569 (unsigned) next - bit, (unsigned) group); 3616 (unsigned) next - bit, (unsigned) group);
3570 free += next - bit; 3617 free += next - bit;
3571 3618
3572 if (ac) { 3619 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3573 ac->ac_b_ex.fe_group = group; 3620 trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
3574 ac->ac_b_ex.fe_start = bit; 3621 grp_blk_start + bit, next - bit);
3575 ac->ac_b_ex.fe_len = next - bit;
3576 ac->ac_b_ex.fe_logical = 0;
3577 trace_ext4_mballoc_discard(ac);
3578 }
3579
3580 trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
3581 next - bit);
3582 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3622 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3583 bit = next + 1; 3623 bit = next + 1;
3584 } 3624 }
@@ -3601,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3601 3641
3602static noinline_for_stack int 3642static noinline_for_stack int
3603ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3643ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3604 struct ext4_prealloc_space *pa, 3644 struct ext4_prealloc_space *pa)
3605 struct ext4_allocation_context *ac)
3606{ 3645{
3607 struct super_block *sb = e4b->bd_sb; 3646 struct super_block *sb = e4b->bd_sb;
3608 ext4_group_t group; 3647 ext4_group_t group;
3609 ext4_grpblk_t bit; 3648 ext4_grpblk_t bit;
3610 3649
3611 trace_ext4_mb_release_group_pa(sb, ac, pa); 3650 trace_ext4_mb_release_group_pa(sb, pa);
3612 BUG_ON(pa->pa_deleted == 0); 3651 BUG_ON(pa->pa_deleted == 0);
3613 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3652 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3614 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3653 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3615 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 3654 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3616 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 3655 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3617 3656 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3618 if (ac) {
3619 ac->ac_sb = sb;
3620 ac->ac_inode = NULL;
3621 ac->ac_b_ex.fe_group = group;
3622 ac->ac_b_ex.fe_start = bit;
3623 ac->ac_b_ex.fe_len = pa->pa_len;
3624 ac->ac_b_ex.fe_logical = 0;
3625 trace_ext4_mballoc_discard(ac);
3626 }
3627 3657
3628 return 0; 3658 return 0;
3629} 3659}
@@ -3644,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3644 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3674 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3645 struct buffer_head *bitmap_bh = NULL; 3675 struct buffer_head *bitmap_bh = NULL;
3646 struct ext4_prealloc_space *pa, *tmp; 3676 struct ext4_prealloc_space *pa, *tmp;
3647 struct ext4_allocation_context *ac;
3648 struct list_head list; 3677 struct list_head list;
3649 struct ext4_buddy e4b; 3678 struct ext4_buddy e4b;
3650 int err; 3679 int err;
@@ -3673,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3673 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3702 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3674 3703
3675 INIT_LIST_HEAD(&list); 3704 INIT_LIST_HEAD(&list);
3676 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3677 if (ac)
3678 ac->ac_sb = sb;
3679repeat: 3705repeat:
3680 ext4_lock_group(sb, group); 3706 ext4_lock_group(sb, group);
3681 list_for_each_entry_safe(pa, tmp, 3707 list_for_each_entry_safe(pa, tmp,
@@ -3730,9 +3756,9 @@ repeat:
3730 spin_unlock(pa->pa_obj_lock); 3756 spin_unlock(pa->pa_obj_lock);
3731 3757
3732 if (pa->pa_type == MB_GROUP_PA) 3758 if (pa->pa_type == MB_GROUP_PA)
3733 ext4_mb_release_group_pa(&e4b, pa, ac); 3759 ext4_mb_release_group_pa(&e4b, pa);
3734 else 3760 else
3735 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3761 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3736 3762
3737 list_del(&pa->u.pa_tmp_list); 3763 list_del(&pa->u.pa_tmp_list);
3738 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3764 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3740,8 +3766,6 @@ repeat:
3740 3766
3741out: 3767out:
3742 ext4_unlock_group(sb, group); 3768 ext4_unlock_group(sb, group);
3743 if (ac)
3744 kmem_cache_free(ext4_ac_cachep, ac);
3745 ext4_mb_unload_buddy(&e4b); 3769 ext4_mb_unload_buddy(&e4b);
3746 put_bh(bitmap_bh); 3770 put_bh(bitmap_bh);
3747 return free; 3771 return free;
@@ -3762,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
3762 struct super_block *sb = inode->i_sb; 3786 struct super_block *sb = inode->i_sb;
3763 struct buffer_head *bitmap_bh = NULL; 3787 struct buffer_head *bitmap_bh = NULL;
3764 struct ext4_prealloc_space *pa, *tmp; 3788 struct ext4_prealloc_space *pa, *tmp;
3765 struct ext4_allocation_context *ac;
3766 ext4_group_t group = 0; 3789 ext4_group_t group = 0;
3767 struct list_head list; 3790 struct list_head list;
3768 struct ext4_buddy e4b; 3791 struct ext4_buddy e4b;
@@ -3778,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
3778 3801
3779 INIT_LIST_HEAD(&list); 3802 INIT_LIST_HEAD(&list);
3780 3803
3781 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3782 if (ac) {
3783 ac->ac_sb = sb;
3784 ac->ac_inode = inode;
3785 }
3786repeat: 3804repeat:
3787 /* first, collect all pa's in the inode */ 3805 /* first, collect all pa's in the inode */
3788 spin_lock(&ei->i_prealloc_lock); 3806 spin_lock(&ei->i_prealloc_lock);
@@ -3852,7 +3870,7 @@ repeat:
3852 3870
3853 ext4_lock_group(sb, group); 3871 ext4_lock_group(sb, group);
3854 list_del(&pa->pa_group_list); 3872 list_del(&pa->pa_group_list);
3855 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3873 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3856 ext4_unlock_group(sb, group); 3874 ext4_unlock_group(sb, group);
3857 3875
3858 ext4_mb_unload_buddy(&e4b); 3876 ext4_mb_unload_buddy(&e4b);
@@ -3861,8 +3879,6 @@ repeat:
3861 list_del(&pa->u.pa_tmp_list); 3879 list_del(&pa->u.pa_tmp_list);
3862 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3880 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3863 } 3881 }
3864 if (ac)
3865 kmem_cache_free(ext4_ac_cachep, ac);
3866} 3882}
3867 3883
3868/* 3884/*
@@ -4060,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4060 struct ext4_buddy e4b; 4076 struct ext4_buddy e4b;
4061 struct list_head discard_list; 4077 struct list_head discard_list;
4062 struct ext4_prealloc_space *pa, *tmp; 4078 struct ext4_prealloc_space *pa, *tmp;
4063 struct ext4_allocation_context *ac;
4064 4079
4065 mb_debug(1, "discard locality group preallocation\n"); 4080 mb_debug(1, "discard locality group preallocation\n");
4066 4081
4067 INIT_LIST_HEAD(&discard_list); 4082 INIT_LIST_HEAD(&discard_list);
4068 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4069 if (ac)
4070 ac->ac_sb = sb;
4071 4083
4072 spin_lock(&lg->lg_prealloc_lock); 4084 spin_lock(&lg->lg_prealloc_lock);
4073 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4085 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4119,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4119 } 4131 }
4120 ext4_lock_group(sb, group); 4132 ext4_lock_group(sb, group);
4121 list_del(&pa->pa_group_list); 4133 list_del(&pa->pa_group_list);
4122 ext4_mb_release_group_pa(&e4b, pa, ac); 4134 ext4_mb_release_group_pa(&e4b, pa);
4123 ext4_unlock_group(sb, group); 4135 ext4_unlock_group(sb, group);
4124 4136
4125 ext4_mb_unload_buddy(&e4b); 4137 ext4_mb_unload_buddy(&e4b);
4126 list_del(&pa->u.pa_tmp_list); 4138 list_del(&pa->u.pa_tmp_list);
4127 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 4139 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4128 } 4140 }
4129 if (ac)
4130 kmem_cache_free(ext4_ac_cachep, ac);
4131} 4141}
4132 4142
4133/* 4143/*
@@ -4491,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4491{ 4501{
4492 struct buffer_head *bitmap_bh = NULL; 4502 struct buffer_head *bitmap_bh = NULL;
4493 struct super_block *sb = inode->i_sb; 4503 struct super_block *sb = inode->i_sb;
4494 struct ext4_allocation_context *ac = NULL;
4495 struct ext4_group_desc *gdp; 4504 struct ext4_group_desc *gdp;
4496 unsigned long freed = 0; 4505 unsigned long freed = 0;
4497 unsigned int overflow; 4506 unsigned int overflow;
@@ -4531,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4531 if (!bh) 4540 if (!bh)
4532 tbh = sb_find_get_block(inode->i_sb, 4541 tbh = sb_find_get_block(inode->i_sb,
4533 block + i); 4542 block + i);
4543 if (unlikely(!tbh))
4544 continue;
4534 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 4545 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4535 inode, tbh, block + i); 4546 inode, tbh, block + i);
4536 } 4547 }
@@ -4546,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4546 if (!ext4_should_writeback_data(inode)) 4557 if (!ext4_should_writeback_data(inode))
4547 flags |= EXT4_FREE_BLOCKS_METADATA; 4558 flags |= EXT4_FREE_BLOCKS_METADATA;
4548 4559
4549 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4550 if (ac) {
4551 ac->ac_inode = inode;
4552 ac->ac_sb = sb;
4553 }
4554
4555do_more: 4560do_more:
4556 overflow = 0; 4561 overflow = 0;
4557 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4562 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4609,12 +4614,7 @@ do_more:
4609 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4614 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4610 } 4615 }
4611#endif 4616#endif
4612 if (ac) { 4617 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4613 ac->ac_b_ex.fe_group = block_group;
4614 ac->ac_b_ex.fe_start = bit;
4615 ac->ac_b_ex.fe_len = count;
4616 trace_ext4_mballoc_free(ac);
4617 }
4618 4618
4619 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4619 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4620 if (err) 4620 if (err)
@@ -4640,12 +4640,12 @@ do_more:
4640 * with group lock held. generate_buddy look at 4640 * with group lock held. generate_buddy look at
4641 * them with group lock_held 4641 * them with group lock_held
4642 */ 4642 */
4643 if (test_opt(sb, DISCARD))
4644 ext4_issue_discard(sb, block_group, bit, count);
4643 ext4_lock_group(sb, block_group); 4645 ext4_lock_group(sb, block_group);
4644 mb_clear_bits(bitmap_bh->b_data, bit, count); 4646 mb_clear_bits(bitmap_bh->b_data, bit, count);
4645 mb_free_blocks(inode, &e4b, bit, count); 4647 mb_free_blocks(inode, &e4b, bit, count);
4646 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4648 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4647 if (test_opt(sb, DISCARD))
4648 ext4_issue_discard(sb, block_group, bit, count);
4649 } 4649 }
4650 4650
4651 ret = ext4_free_blks_count(sb, gdp) + count; 4651 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4685,7 +4685,190 @@ error_return:
4685 dquot_free_block(inode, freed); 4685 dquot_free_block(inode, freed);
4686 brelse(bitmap_bh); 4686 brelse(bitmap_bh);
4687 ext4_std_error(sb, err); 4687 ext4_std_error(sb, err);
4688 if (ac)
4689 kmem_cache_free(ext4_ac_cachep, ac);
4690 return; 4688 return;
4691} 4689}
4690
4691/**
4692 * ext4_trim_extent -- function to TRIM one single free extent in the group
4693 * @sb: super block for the file system
4694 * @start: starting block of the free extent in the alloc. group
4695 * @count: number of blocks to TRIM
4696 * @group: alloc. group we are working with
4697 * @e4b: ext4 buddy for the group
4698 *
4699 * Trim "count" blocks starting at "start" in the "group". To assure that no
4700 * one will allocate those blocks, mark it as used in buddy bitmap. This must
4701 * be called with under the group lock.
4702 */
4703static int ext4_trim_extent(struct super_block *sb, int start, int count,
4704 ext4_group_t group, struct ext4_buddy *e4b)
4705{
4706 struct ext4_free_extent ex;
4707 int ret = 0;
4708
4709 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4710
4711 ex.fe_start = start;
4712 ex.fe_group = group;
4713 ex.fe_len = count;
4714
4715 /*
4716 * Mark blocks used, so no one can reuse them while
4717 * being trimmed.
4718 */
4719 mb_mark_used(e4b, &ex);
4720 ext4_unlock_group(sb, group);
4721
4722 ret = ext4_issue_discard(sb, group, start, count);
4723 if (ret)
4724 ext4_std_error(sb, ret);
4725
4726 ext4_lock_group(sb, group);
4727 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4728 return ret;
4729}
4730
4731/**
4732 * ext4_trim_all_free -- function to trim all free space in alloc. group
4733 * @sb: super block for file system
4734 * @e4b: ext4 buddy
4735 * @start: first group block to examine
4736 * @max: last group block to examine
4737 * @minblocks: minimum extent block count
4738 *
4739 * ext4_trim_all_free walks through group's buddy bitmap searching for free
4740 * extents. When the free block is found, ext4_trim_extent is called to TRIM
4741 * the extent.
4742 *
4743 *
4744 * ext4_trim_all_free walks through group's block bitmap searching for free
4745 * extents. When the free extent is found, mark it as used in group buddy
4746 * bitmap. Then issue a TRIM command on this extent and free the extent in
4747 * the group buddy bitmap. This is done until whole group is scanned.
4748 */
4749ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4750 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4751{
4752 void *bitmap;
4753 ext4_grpblk_t next, count = 0;
4754 ext4_group_t group;
4755 int ret = 0;
4756
4757 BUG_ON(e4b == NULL);
4758
4759 bitmap = e4b->bd_bitmap;
4760 group = e4b->bd_group;
4761 start = (e4b->bd_info->bb_first_free > start) ?
4762 e4b->bd_info->bb_first_free : start;
4763 ext4_lock_group(sb, group);
4764
4765 while (start < max) {
4766 start = mb_find_next_zero_bit(bitmap, max, start);
4767 if (start >= max)
4768 break;
4769 next = mb_find_next_bit(bitmap, max, start);
4770
4771 if ((next - start) >= minblocks) {
4772 ret = ext4_trim_extent(sb, start,
4773 next - start, group, e4b);
4774 if (ret < 0)
4775 break;
4776 count += next - start;
4777 }
4778 start = next + 1;
4779
4780 if (fatal_signal_pending(current)) {
4781 count = -ERESTARTSYS;
4782 break;
4783 }
4784
4785 if (need_resched()) {
4786 ext4_unlock_group(sb, group);
4787 cond_resched();
4788 ext4_lock_group(sb, group);
4789 }
4790
4791 if ((e4b->bd_info->bb_free - count) < minblocks)
4792 break;
4793 }
4794 ext4_unlock_group(sb, group);
4795
4796 ext4_debug("trimmed %d blocks in the group %d\n",
4797 count, group);
4798
4799 if (ret < 0)
4800 count = ret;
4801
4802 return count;
4803}
4804
4805/**
4806 * ext4_trim_fs() -- trim ioctl handle function
4807 * @sb: superblock for filesystem
4808 * @range: fstrim_range structure
4809 *
4810 * start: First Byte to trim
4811 * len: number of Bytes to trim from start
4812 * minlen: minimum extent length in Bytes
4813 * ext4_trim_fs goes through all allocation groups containing Bytes from
4814 * start to start+len. For each such a group ext4_trim_all_free function
4815 * is invoked to trim all free space.
4816 */
4817int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4818{
4819 struct ext4_buddy e4b;
4820 ext4_group_t first_group, last_group;
4821 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4822 ext4_grpblk_t cnt = 0, first_block, last_block;
4823 uint64_t start, len, minlen, trimmed;
4824 int ret = 0;
4825
4826 start = range->start >> sb->s_blocksize_bits;
4827 len = range->len >> sb->s_blocksize_bits;
4828 minlen = range->minlen >> sb->s_blocksize_bits;
4829 trimmed = 0;
4830
4831 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4832 return -EINVAL;
4833
4834 /* Determine first and last group to examine based on start and len */
4835 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4836 &first_group, &first_block);
4837 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4838 &last_group, &last_block);
4839 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4840 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4841
4842 if (first_group > last_group)
4843 return -EINVAL;
4844
4845 for (group = first_group; group <= last_group; group++) {
4846 ret = ext4_mb_load_buddy(sb, group, &e4b);
4847 if (ret) {
4848 ext4_error(sb, "Error in loading buddy "
4849 "information for %u", group);
4850 break;
4851 }
4852
4853 if (len >= EXT4_BLOCKS_PER_GROUP(sb))
4854 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
4855 else
4856 last_block = len;
4857
4858 if (e4b.bd_info->bb_free >= minlen) {
4859 cnt = ext4_trim_all_free(sb, &e4b, first_block,
4860 last_block, minlen);
4861 if (cnt < 0) {
4862 ret = cnt;
4863 ext4_mb_unload_buddy(&e4b);
4864 break;
4865 }
4866 }
4867 ext4_mb_unload_buddy(&e4b);
4868 trimmed += cnt;
4869 first_block = 0;
4870 }
4871 range->len = trimmed * sb->s_blocksize;
4872
4873 return ret;
4874}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 1765c2c50a9b..25f3a974b725 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
412 struct buffer_head *bh; 412 struct buffer_head *bh;
413 struct ext4_extent_header *eh; 413 struct ext4_extent_header *eh;
414 414
415 block = idx_pblock(ix); 415 block = ext4_idx_pblock(ix);
416 bh = sb_bread(inode->i_sb, block); 416 bh = sb_bread(inode->i_sb, block);
417 if (!bh) 417 if (!bh)
418 return -EIO; 418 return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 5f1ed9fc913c..b9f3e7862f13 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
86 /* leaf block */ 86 /* leaf block */
87 *extent = ++path[ppos].p_ext; 87 *extent = ++path[ppos].p_ext;
88 path[ppos].p_block = ext_pblock(path[ppos].p_ext); 88 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
89 return 0; 89 return 0;
90 } 90 }
91 91
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
96 96
97 /* index block */ 97 /* index block */
98 path[ppos].p_idx++; 98 path[ppos].p_idx++;
99 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 99 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
100 if (path[ppos+1].p_bh) 100 if (path[ppos+1].p_bh)
101 brelse(path[ppos+1].p_bh); 101 brelse(path[ppos+1].p_bh);
102 path[ppos+1].p_bh = 102 path[ppos+1].p_bh =
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
111 path[cur_ppos].p_idx = 111 path[cur_ppos].p_idx =
112 EXT_FIRST_INDEX(path[cur_ppos].p_hdr); 112 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
113 path[cur_ppos].p_block = 113 path[cur_ppos].p_block =
114 idx_pblock(path[cur_ppos].p_idx); 114 ext4_idx_pblock(path[cur_ppos].p_idx);
115 if (path[cur_ppos+1].p_bh) 115 if (path[cur_ppos+1].p_bh)
116 brelse(path[cur_ppos+1].p_bh); 116 brelse(path[cur_ppos+1].p_bh);
117 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, 117 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
133 path[leaf_ppos].p_ext = *extent = 133 path[leaf_ppos].p_ext = *extent =
134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
135 path[leaf_ppos].p_block = 135 path[leaf_ppos].p_block =
136 ext_pblock(path[leaf_ppos].p_ext); 136 ext4_ext_pblock(path[leaf_ppos].p_ext);
137 return 0; 137 return 0;
138 } 138 }
139 } 139 }
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
249 */ 249 */
250 o_end->ee_block = end_ext->ee_block; 250 o_end->ee_block = end_ext->ee_block;
251 o_end->ee_len = end_ext->ee_len; 251 o_end->ee_len = end_ext->ee_len;
252 ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); 252 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
253 } 253 }
254 254
255 o_start->ee_len = start_ext->ee_len; 255 o_start->ee_len = start_ext->ee_len;
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
276 */ 276 */
277 o_end->ee_block = end_ext->ee_block; 277 o_end->ee_block = end_ext->ee_block;
278 o_end->ee_len = end_ext->ee_len; 278 o_end->ee_len = end_ext->ee_len;
279 ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); 279 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
280 280
281 /* 281 /*
282 * Set 0 to the extent block if new_ext was 282 * Set 0 to the extent block if new_ext was
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
361 /* Insert new entry */ 361 /* Insert new entry */
362 if (new_ext->ee_len) { 362 if (new_ext->ee_len) {
363 o_start[i] = *new_ext; 363 o_start[i] = *new_ext;
364 ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); 364 ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
365 } 365 }
366 366
367 /* Insert end entry */ 367 /* Insert end entry */
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
488 start_ext.ee_len = end_ext.ee_len = 0; 488 start_ext.ee_len = end_ext.ee_len = 0;
489 489
490 new_ext.ee_block = cpu_to_le32(*from); 490 new_ext.ee_block = cpu_to_le32(*from);
491 ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); 491 ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
492 new_ext.ee_len = dext->ee_len; 492 new_ext.ee_len = dext->ee_len;
493 new_ext_alen = ext4_ext_get_actual_len(&new_ext); 493 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
494 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 494 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
553 copy_extent_status(oext, &end_ext); 553 copy_extent_status(oext, &end_ext);
554 end_ext_alen = ext4_ext_get_actual_len(&end_ext); 554 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
555 ext4_ext_store_pblock(&end_ext, 555 ext4_ext_store_pblock(&end_ext,
556 (ext_pblock(o_end) + oext_alen - end_ext_alen)); 556 (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
557 end_ext.ee_block = 557 end_ext.ee_block =
558 cpu_to_le32(le32_to_cpu(o_end->ee_block) + 558 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
559 oext_alen - end_ext_alen); 559 oext_alen - end_ext_alen);
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
604 /* When tmp_dext is too large, pick up the target range. */ 604 /* When tmp_dext is too large, pick up the target range. */
605 diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 605 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
606 606
607 ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); 607 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
608 tmp_dext->ee_block = 608 tmp_dext->ee_block =
609 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); 609 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
610 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); 610 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
613 tmp_dext->ee_len = cpu_to_le16(max_count); 613 tmp_dext->ee_len = cpu_to_le16(max_count);
614 614
615 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); 615 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
616 ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); 616 ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
617 617
618 /* Adjust extent length if donor extent is larger than orig */ 618 /* Adjust extent length if donor extent is larger than orig */
619 if (ext4_ext_get_actual_len(tmp_dext) > 619 if (ext4_ext_get_actual_len(tmp_dext) >
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 314c0d3b3fa9..92203b8a099f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
856 struct buffer_head *bh_use[NAMEI_RA_SIZE]; 856 struct buffer_head *bh_use[NAMEI_RA_SIZE];
857 struct buffer_head *bh, *ret = NULL; 857 struct buffer_head *bh, *ret = NULL;
858 ext4_lblk_t start, block, b; 858 ext4_lblk_t start, block, b;
859 const u8 *name = d_name->name;
859 int ra_max = 0; /* Number of bh's in the readahead 860 int ra_max = 0; /* Number of bh's in the readahead
860 buffer, bh_use[] */ 861 buffer, bh_use[] */
861 int ra_ptr = 0; /* Current index into readahead 862 int ra_ptr = 0; /* Current index into readahead
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
870 namelen = d_name->len; 871 namelen = d_name->len;
871 if (namelen > EXT4_NAME_LEN) 872 if (namelen > EXT4_NAME_LEN)
872 return NULL; 873 return NULL;
874 if ((namelen <= 2) && (name[0] == '.') &&
875 (name[1] == '.' || name[1] == '0')) {
876 /*
877 * "." or ".." will only be in the first block
878 * NFS may look up ".."; "." should be handled by the VFS
879 */
880 block = start = 0;
881 nblocks = 1;
882 goto restart;
883 }
873 if (is_dx(dir)) { 884 if (is_dx(dir)) {
874 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); 885 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
875 /* 886 /*
@@ -960,55 +971,35 @@ cleanup_and_exit:
960static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, 971static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
961 struct ext4_dir_entry_2 **res_dir, int *err) 972 struct ext4_dir_entry_2 **res_dir, int *err)
962{ 973{
963 struct super_block * sb; 974 struct super_block * sb = dir->i_sb;
964 struct dx_hash_info hinfo; 975 struct dx_hash_info hinfo;
965 u32 hash;
966 struct dx_frame frames[2], *frame; 976 struct dx_frame frames[2], *frame;
967 struct ext4_dir_entry_2 *de, *top;
968 struct buffer_head *bh; 977 struct buffer_head *bh;
969 ext4_lblk_t block; 978 ext4_lblk_t block;
970 int retval; 979 int retval;
971 int namelen = d_name->len;
972 const u8 *name = d_name->name;
973 980
974 sb = dir->i_sb; 981 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
975 /* NFS may look up ".." - look at dx_root directory block */ 982 return NULL;
976 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
977 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
978 return NULL;
979 } else {
980 frame = frames;
981 frame->bh = NULL; /* for dx_release() */
982 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
983 dx_set_block(frame->at, 0); /* dx_root block is 0 */
984 }
985 hash = hinfo.hash;
986 do { 983 do {
987 block = dx_get_block(frame->at); 984 block = dx_get_block(frame->at);
988 if (!(bh = ext4_bread (NULL,dir, block, 0, err))) 985 if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
989 goto errout; 986 goto errout;
990 de = (struct ext4_dir_entry_2 *) bh->b_data;
991 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
992 EXT4_DIR_REC_LEN(0));
993 for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
994 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
995 + ((char *) de - bh->b_data);
996
997 if (!ext4_check_dir_entry(dir, de, bh, off)) {
998 brelse(bh);
999 *err = ERR_BAD_DX_DIR;
1000 goto errout;
1001 }
1002 987
1003 if (ext4_match(namelen, name, de)) { 988 retval = search_dirblock(bh, dir, d_name,
1004 *res_dir = de; 989 block << EXT4_BLOCK_SIZE_BITS(sb),
1005 dx_release(frames); 990 res_dir);
1006 return bh; 991 if (retval == 1) { /* Success! */
1007 } 992 dx_release(frames);
993 return bh;
1008 } 994 }
1009 brelse(bh); 995 brelse(bh);
996 if (retval == -1) {
997 *err = ERR_BAD_DX_DIR;
998 goto errout;
999 }
1000
1010 /* Check to see if we should continue to search */ 1001 /* Check to see if we should continue to search */
1011 retval = ext4_htree_next_block(dir, hash, frame, 1002 retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1012 frames, NULL); 1003 frames, NULL);
1013 if (retval < 0) { 1004 if (retval < 0) {
1014 ext4_warning(sb, 1005 ext4_warning(sb,
@@ -2312,7 +2303,7 @@ retry:
2312 2303
2313 inode->i_ctime = ext4_current_time(inode); 2304 inode->i_ctime = ext4_current_time(inode);
2314 ext4_inc_count(handle, inode); 2305 ext4_inc_count(handle, inode);
2315 atomic_inc(&inode->i_count); 2306 ihold(inode);
2316 2307
2317 err = ext4_add_entry(handle, dentry, inode); 2308 err = ext4_add_entry(handle, dentry, inode);
2318 if (!err) { 2309 if (!err) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
new file mode 100644
index 000000000000..46a7d6a9d976
--- /dev/null
+++ b/fs/ext4/page-io.c
@@ -0,0 +1,430 @@
1/*
2 * linux/fs/ext4/page-io.c
3 *
4 * This contains the new page_io functions for ext4
5 *
6 * Written by Theodore Ts'o, 2010.
7 */
8
9#include <linux/module.h>
10#include <linux/fs.h>
11#include <linux/time.h>
12#include <linux/jbd2.h>
13#include <linux/highuid.h>
14#include <linux/pagemap.h>
15#include <linux/quotaops.h>
16#include <linux/string.h>
17#include <linux/buffer_head.h>
18#include <linux/writeback.h>
19#include <linux/pagevec.h>
20#include <linux/mpage.h>
21#include <linux/namei.h>
22#include <linux/uio.h>
23#include <linux/bio.h>
24#include <linux/workqueue.h>
25#include <linux/kernel.h>
26#include <linux/slab.h>
27
28#include "ext4_jbd2.h"
29#include "xattr.h"
30#include "acl.h"
31#include "ext4_extents.h"
32
33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34
35int __init ext4_init_pageio(void)
36{
37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
38 if (io_page_cachep == NULL)
39 return -ENOMEM;
40 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
41 if (io_page_cachep == NULL) {
42 kmem_cache_destroy(io_page_cachep);
43 return -ENOMEM;
44 }
45
46 return 0;
47}
48
49void ext4_exit_pageio(void)
50{
51 kmem_cache_destroy(io_end_cachep);
52 kmem_cache_destroy(io_page_cachep);
53}
54
55void ext4_free_io_end(ext4_io_end_t *io)
56{
57 int i;
58
59 BUG_ON(!io);
60 if (io->page)
61 put_page(io->page);
62 for (i = 0; i < io->num_io_pages; i++) {
63 if (--io->pages[i]->p_count == 0) {
64 struct page *page = io->pages[i]->p_page;
65
66 end_page_writeback(page);
67 put_page(page);
68 kmem_cache_free(io_page_cachep, io->pages[i]);
69 }
70 }
71 io->num_io_pages = 0;
72 iput(io->inode);
73 kmem_cache_free(io_end_cachep, io);
74}
75
76/*
77 * check a range of space and convert unwritten extents to written.
78 */
79int ext4_end_io_nolock(ext4_io_end_t *io)
80{
81 struct inode *inode = io->inode;
82 loff_t offset = io->offset;
83 ssize_t size = io->size;
84 int ret = 0;
85
86 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
87 "list->prev 0x%p\n",
88 io, inode->i_ino, io->list.next, io->list.prev);
89
90 if (list_empty(&io->list))
91 return ret;
92
93 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
94 return ret;
95
96 ret = ext4_convert_unwritten_extents(inode, offset, size);
97 if (ret < 0) {
98 printk(KERN_EMERG "%s: failed to convert unwritten "
99 "extents to written extents, error is %d "
100 "io is still on inode %lu aio dio list\n",
101 __func__, ret, inode->i_ino);
102 return ret;
103 }
104
105 if (io->iocb)
106 aio_complete(io->iocb, io->result, 0);
107 /* clear the DIO AIO unwritten flag */
108 io->flag &= ~EXT4_IO_END_UNWRITTEN;
109 return ret;
110}
111
112/*
113 * work on completed aio dio IO, to convert unwritten extents to extents
114 */
115static void ext4_end_io_work(struct work_struct *work)
116{
117 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
118 struct inode *inode = io->inode;
119 struct ext4_inode_info *ei = EXT4_I(inode);
120 unsigned long flags;
121 int ret;
122
123 mutex_lock(&inode->i_mutex);
124 ret = ext4_end_io_nolock(io);
125 if (ret < 0) {
126 mutex_unlock(&inode->i_mutex);
127 return;
128 }
129
130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
131 if (!list_empty(&io->list))
132 list_del_init(&io->list);
133 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
134 mutex_unlock(&inode->i_mutex);
135 ext4_free_io_end(io);
136}
137
138ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
139{
140 ext4_io_end_t *io = NULL;
141
142 io = kmem_cache_alloc(io_end_cachep, flags);
143 if (io) {
144 memset(io, 0, sizeof(*io));
145 io->inode = igrab(inode);
146 BUG_ON(!io->inode);
147 INIT_WORK(&io->work, ext4_end_io_work);
148 INIT_LIST_HEAD(&io->list);
149 }
150 return io;
151}
152
153/*
154 * Print an buffer I/O error compatible with the fs/buffer.c. This
155 * provides compatibility with dmesg scrapers that look for a specific
156 * buffer I/O error message. We really need a unified error reporting
157 * structure to userspace ala Digital Unix's uerf system, but it's
158 * probably not going to happen in my lifetime, due to LKML politics...
159 */
160static void buffer_io_error(struct buffer_head *bh)
161{
162 char b[BDEVNAME_SIZE];
163 printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
164 bdevname(bh->b_bdev, b),
165 (unsigned long long)bh->b_blocknr);
166}
167
168static void ext4_end_bio(struct bio *bio, int error)
169{
170 ext4_io_end_t *io_end = bio->bi_private;
171 struct workqueue_struct *wq;
172 struct inode *inode;
173 unsigned long flags;
174 ext4_fsblk_t err_block;
175 int i;
176
177 BUG_ON(!io_end);
178 inode = io_end->inode;
179 bio->bi_private = NULL;
180 bio->bi_end_io = NULL;
181 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
182 error = 0;
183 err_block = bio->bi_sector >> (inode->i_blkbits - 9);
184 bio_put(bio);
185
186 if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
187 pr_err("sb umounted, discard end_io request for inode %lu\n",
188 io_end->inode->i_ino);
189 ext4_free_io_end(io_end);
190 return;
191 }
192
193 if (error) {
194 io_end->flag |= EXT4_IO_END_ERROR;
195 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
196 "(offset %llu size %ld starting block %llu)",
197 inode->i_ino,
198 (unsigned long long) io_end->offset,
199 (long) io_end->size,
200 (unsigned long long) err_block);
201 }
202
203 for (i = 0; i < io_end->num_io_pages; i++) {
204 struct page *page = io_end->pages[i]->p_page;
205 struct buffer_head *bh, *head;
206 int partial_write = 0;
207
208 head = page_buffers(page);
209 if (error)
210 SetPageError(page);
211 BUG_ON(!head);
212 if (head->b_size == PAGE_CACHE_SIZE)
213 clear_buffer_dirty(head);
214 else {
215 loff_t offset;
216 loff_t io_end_offset = io_end->offset + io_end->size;
217
218 offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
219 bh = head;
220 do {
221 if ((offset >= io_end->offset) &&
222 (offset+bh->b_size <= io_end_offset)) {
223 if (error)
224 buffer_io_error(bh);
225
226 clear_buffer_dirty(bh);
227 }
228 if (buffer_delay(bh))
229 partial_write = 1;
230 else if (!buffer_mapped(bh))
231 clear_buffer_dirty(bh);
232 else if (buffer_dirty(bh))
233 partial_write = 1;
234 offset += bh->b_size;
235 bh = bh->b_this_page;
236 } while (bh != head);
237 }
238
239 if (--io_end->pages[i]->p_count == 0) {
240 struct page *page = io_end->pages[i]->p_page;
241
242 end_page_writeback(page);
243 put_page(page);
244 kmem_cache_free(io_page_cachep, io_end->pages[i]);
245 }
246
247 /*
248 * If this is a partial write which happened to make
249 * all buffers uptodate then we can optimize away a
250 * bogus readpage() for the next read(). Here we
251 * 'discover' whether the page went uptodate as a
252 * result of this (potentially partial) write.
253 */
254 if (!partial_write)
255 SetPageUptodate(page);
256 }
257
258 io_end->num_io_pages = 0;
259
260 /* Add the io_end to per-inode completed io list*/
261 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
262 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
263 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
264
265 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
266 /* queue the work to convert unwritten extents to written */
267 queue_work(wq, &io_end->work);
268}
269
270void ext4_io_submit(struct ext4_io_submit *io)
271{
272 struct bio *bio = io->io_bio;
273
274 if (bio) {
275 bio_get(io->io_bio);
276 submit_bio(io->io_op, io->io_bio);
277 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
278 bio_put(io->io_bio);
279 }
280 io->io_bio = 0;
281 io->io_op = 0;
282 io->io_end = 0;
283}
284
285static int io_submit_init(struct ext4_io_submit *io,
286 struct inode *inode,
287 struct writeback_control *wbc,
288 struct buffer_head *bh)
289{
290 ext4_io_end_t *io_end;
291 struct page *page = bh->b_page;
292 int nvecs = bio_get_nr_vecs(bh->b_bdev);
293 struct bio *bio;
294
295 io_end = ext4_init_io_end(inode, GFP_NOFS);
296 if (!io_end)
297 return -ENOMEM;
298 do {
299 bio = bio_alloc(GFP_NOIO, nvecs);
300 nvecs >>= 1;
301 } while (bio == NULL);
302
303 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
304 bio->bi_bdev = bh->b_bdev;
305 bio->bi_private = io->io_end = io_end;
306 bio->bi_end_io = ext4_end_bio;
307
308 io_end->inode = inode;
309 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
310
311 io->io_bio = bio;
312 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
313 WRITE_SYNC_PLUG : WRITE);
314 io->io_next_block = bh->b_blocknr;
315 return 0;
316}
317
318static int io_submit_add_bh(struct ext4_io_submit *io,
319 struct ext4_io_page *io_page,
320 struct inode *inode,
321 struct writeback_control *wbc,
322 struct buffer_head *bh)
323{
324 ext4_io_end_t *io_end;
325 int ret;
326
327 if (buffer_new(bh)) {
328 clear_buffer_new(bh);
329 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
330 }
331
332 if (!buffer_mapped(bh) || buffer_delay(bh)) {
333 if (!buffer_mapped(bh))
334 clear_buffer_dirty(bh);
335 if (io->io_bio)
336 ext4_io_submit(io);
337 return 0;
338 }
339
340 if (io->io_bio && bh->b_blocknr != io->io_next_block) {
341submit_and_retry:
342 ext4_io_submit(io);
343 }
344 if (io->io_bio == NULL) {
345 ret = io_submit_init(io, inode, wbc, bh);
346 if (ret)
347 return ret;
348 }
349 io_end = io->io_end;
350 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
351 (io_end->pages[io_end->num_io_pages-1] != io_page))
352 goto submit_and_retry;
353 if (buffer_uninit(bh))
354 io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
355 io->io_end->size += bh->b_size;
356 io->io_next_block++;
357 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
358 if (ret != bh->b_size)
359 goto submit_and_retry;
360 if ((io_end->num_io_pages == 0) ||
361 (io_end->pages[io_end->num_io_pages-1] != io_page)) {
362 io_end->pages[io_end->num_io_pages++] = io_page;
363 io_page->p_count++;
364 }
365 return 0;
366}
367
368int ext4_bio_write_page(struct ext4_io_submit *io,
369 struct page *page,
370 int len,
371 struct writeback_control *wbc)
372{
373 struct inode *inode = page->mapping->host;
374 unsigned block_start, block_end, blocksize;
375 struct ext4_io_page *io_page;
376 struct buffer_head *bh, *head;
377 int ret = 0;
378
379 blocksize = 1 << inode->i_blkbits;
380
381 BUG_ON(PageWriteback(page));
382 set_page_writeback(page);
383 ClearPageError(page);
384
385 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
386 if (!io_page) {
387 set_page_dirty(page);
388 unlock_page(page);
389 return -ENOMEM;
390 }
391 io_page->p_page = page;
392 io_page->p_count = 0;
393 get_page(page);
394
395 for (bh = head = page_buffers(page), block_start = 0;
396 bh != head || !block_start;
397 block_start = block_end, bh = bh->b_this_page) {
398 block_end = block_start + blocksize;
399 if (block_start >= len) {
400 clear_buffer_dirty(bh);
401 set_buffer_uptodate(bh);
402 continue;
403 }
404 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
405 if (ret) {
406 /*
407 * We only get here on ENOMEM. Not much else
408 * we can do but mark the page as dirty, and
409 * better luck next time.
410 */
411 set_page_dirty(page);
412 break;
413 }
414 }
415 unlock_page(page);
416 /*
417 * If the page was truncated before we could do the writeback,
418 * or we had a memory allocation error while trying to write
419 * the first buffer head, we won't have submitted any pages for
420 * I/O. In that case we need to make sure we've cleared the
421 * PageWriteback bit from the page to prevent the system from
422 * wedging later on.
423 */
424 if (io_page->p_count == 0) {
425 put_page(page);
426 end_page_writeback(page);
427 kmem_cache_free(io_page_cachep, io_page);
428 }
429 return ret;
430}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca5c8aa00a2f..dc963929de65 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
226 } 226 }
227 227
228 /* Zero out all of the reserved backup group descriptor table blocks */ 228 /* Zero out all of the reserved backup group descriptor table blocks */
229 for (i = 0, bit = gdblocks + 1, block = start + bit; 229 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
230 i < reserved_gdb; i++, block++, bit++) { 230 block, sbi->s_itb_per_group);
231 struct buffer_head *gdb; 231 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
232 232 GFP_NOFS);
233 ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); 233 if (err)
234 234 goto exit_bh;
235 if ((err = extend_or_restart_transaction(handle, 1, bh)))
236 goto exit_bh;
237 235
238 if (IS_ERR(gdb = bclean(handle, sb, block))) {
239 err = PTR_ERR(gdb);
240 goto exit_bh;
241 }
242 ext4_handle_dirty_metadata(handle, NULL, gdb);
243 ext4_set_bit(bit, bh->b_data);
244 brelse(gdb);
245 }
246 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
247 input->block_bitmap - start); 237 input->block_bitmap - start);
248 ext4_set_bit(input->block_bitmap - start, bh->b_data); 238 ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
251 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 241 ext4_set_bit(input->inode_bitmap - start, bh->b_data);
252 242
253 /* Zero out all of the inode table blocks */ 243 /* Zero out all of the inode table blocks */
254 for (i = 0, block = input->inode_table, bit = block - start; 244 block = input->inode_table;
255 i < sbi->s_itb_per_group; i++, bit++, block++) { 245 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
256 struct buffer_head *it; 246 block, sbi->s_itb_per_group);
257 247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
258 ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); 248 if (err)
259 249 goto exit_bh;
260 if ((err = extend_or_restart_transaction(handle, 1, bh)))
261 goto exit_bh;
262
263 if (IS_ERR(it = bclean(handle, sb, block))) {
264 err = PTR_ERR(it);
265 goto exit_bh;
266 }
267 ext4_handle_dirty_metadata(handle, NULL, it);
268 brelse(it);
269 ext4_set_bit(bit, bh->b_data);
270 }
271 250
272 if ((err = extend_or_restart_transaction(handle, 2, bh))) 251 if ((err = extend_or_restart_transaction(handle, 2, bh)))
273 goto exit_bh; 252 goto exit_bh;
274 253
275 mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); 254 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
255 bh->b_data);
276 ext4_handle_dirty_metadata(handle, NULL, bh); 256 ext4_handle_dirty_metadata(handle, NULL, bh);
277 brelse(bh); 257 brelse(bh);
278 /* Mark unused entries in inode bitmap used */ 258 /* Mark unused entries in inode bitmap used */
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
283 goto exit_journal; 263 goto exit_journal;
284 } 264 }
285 265
286 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 266 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
287 bh->b_data); 267 bh->b_data);
288 ext4_handle_dirty_metadata(handle, NULL, bh); 268 ext4_handle_dirty_metadata(handle, NULL, bh);
289exit_bh: 269exit_bh:
290 brelse(bh); 270 brelse(bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8ecc1e590303..40131b777af6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -40,6 +40,9 @@
40#include <linux/crc16.h> 40#include <linux/crc16.h>
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42 42
43#include <linux/kthread.h>
44#include <linux/freezer.h>
45
43#include "ext4.h" 46#include "ext4.h"
44#include "ext4_jbd2.h" 47#include "ext4_jbd2.h"
45#include "xattr.h" 48#include "xattr.h"
@@ -49,8 +52,11 @@
49#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
50#include <trace/events/ext4.h> 53#include <trace/events/ext4.h>
51 54
52struct proc_dir_entry *ext4_proc_root; 55static struct proc_dir_entry *ext4_proc_root;
53static struct kset *ext4_kset; 56static struct kset *ext4_kset;
57struct ext4_lazy_init *ext4_li_info;
58struct mutex ext4_li_mtx;
59struct ext4_features *ext4_feat;
54 60
55static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
56 unsigned long journal_devnum); 62 unsigned long journal_devnum);
@@ -67,14 +73,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
67static int ext4_unfreeze(struct super_block *sb); 73static int ext4_unfreeze(struct super_block *sb);
68static void ext4_write_super(struct super_block *sb); 74static void ext4_write_super(struct super_block *sb);
69static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
70static int ext4_get_sb(struct file_system_type *fs_type, int flags, 76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
71 const char *dev_name, void *data, struct vfsmount *mnt); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb);
72 80
73#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
74static struct file_system_type ext3_fs_type = { 82static struct file_system_type ext3_fs_type = {
75 .owner = THIS_MODULE, 83 .owner = THIS_MODULE,
76 .name = "ext3", 84 .name = "ext3",
77 .get_sb = ext4_get_sb, 85 .mount = ext4_mount,
78 .kill_sb = kill_block_super, 86 .kill_sb = kill_block_super,
79 .fs_flags = FS_REQUIRES_DEV, 87 .fs_flags = FS_REQUIRES_DEV,
80}; 88};
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb)
701 struct ext4_super_block *es = sbi->s_es; 709 struct ext4_super_block *es = sbi->s_es;
702 int i, err; 710 int i, err;
703 711
712 ext4_unregister_li_request(sb);
704 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 713 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
705 714
706 flush_workqueue(sbi->dio_unwritten_wq); 715 flush_workqueue(sbi->dio_unwritten_wq);
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb)
717 ext4_abort(sb, "Couldn't clean up the journal"); 726 ext4_abort(sb, "Couldn't clean up the journal");
718 } 727 }
719 728
729 del_timer(&sbi->s_err_report);
720 ext4_release_system_zone(sb); 730 ext4_release_system_zone(sb);
721 ext4_mb_release(sb); 731 ext4_mb_release(sb);
722 ext4_ext_release(sb); 732 ext4_ext_release(sb);
@@ -1042,6 +1052,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1042 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1052 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
1043 seq_puts(seq, ",block_validity"); 1053 seq_puts(seq, ",block_validity");
1044 1054
1055 if (!test_opt(sb, INIT_INODE_TABLE))
1056 seq_puts(seq, ",noinit_inode_table");
1057 else if (sbi->s_li_wait_mult)
1058 seq_printf(seq, ",init_inode_table=%u",
1059 (unsigned) sbi->s_li_wait_mult);
1060
1045 ext4_show_quota_options(seq, sb); 1061 ext4_show_quota_options(seq, sb);
1046 1062
1047 return 0; 1063 return 0;
@@ -1170,6 +1186,7 @@ static const struct super_operations ext4_sops = {
1170 .quota_write = ext4_quota_write, 1186 .quota_write = ext4_quota_write,
1171#endif 1187#endif
1172 .bdev_try_to_free_page = bdev_try_to_free_page, 1188 .bdev_try_to_free_page = bdev_try_to_free_page,
1189 .trim_fs = ext4_trim_fs
1173}; 1190};
1174 1191
1175static const struct super_operations ext4_nojournal_sops = { 1192static const struct super_operations ext4_nojournal_sops = {
@@ -1216,6 +1233,7 @@ enum {
1216 Opt_inode_readahead_blks, Opt_journal_ioprio, 1233 Opt_inode_readahead_blks, Opt_journal_ioprio,
1217 Opt_dioread_nolock, Opt_dioread_lock, 1234 Opt_dioread_nolock, Opt_dioread_lock,
1218 Opt_discard, Opt_nodiscard, 1235 Opt_discard, Opt_nodiscard,
1236 Opt_init_inode_table, Opt_noinit_inode_table,
1219}; 1237};
1220 1238
1221static const match_table_t tokens = { 1239static const match_table_t tokens = {
@@ -1286,6 +1304,9 @@ static const match_table_t tokens = {
1286 {Opt_dioread_lock, "dioread_lock"}, 1304 {Opt_dioread_lock, "dioread_lock"},
1287 {Opt_discard, "discard"}, 1305 {Opt_discard, "discard"},
1288 {Opt_nodiscard, "nodiscard"}, 1306 {Opt_nodiscard, "nodiscard"},
1307 {Opt_init_inode_table, "init_itable=%u"},
1308 {Opt_init_inode_table, "init_itable"},
1309 {Opt_noinit_inode_table, "noinit_itable"},
1289 {Opt_err, NULL}, 1310 {Opt_err, NULL},
1290}; 1311};
1291 1312
@@ -1756,6 +1777,20 @@ set_qf_format:
1756 case Opt_dioread_lock: 1777 case Opt_dioread_lock:
1757 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1778 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1758 break; 1779 break;
1780 case Opt_init_inode_table:
1781 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
1782 if (args[0].from) {
1783 if (match_int(&args[0], &option))
1784 return 0;
1785 } else
1786 option = EXT4_DEF_LI_WAIT_MULT;
1787 if (option < 0)
1788 return 0;
1789 sbi->s_li_wait_mult = option;
1790 break;
1791 case Opt_noinit_inode_table:
1792 clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
1793 break;
1759 default: 1794 default:
1760 ext4_msg(sb, KERN_ERR, 1795 ext4_msg(sb, KERN_ERR,
1761 "Unrecognized mount option \"%s\" " 1796 "Unrecognized mount option \"%s\" "
@@ -1939,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1939} 1974}
1940 1975
1941/* Called at mount-time, super-block is locked */ 1976/* Called at mount-time, super-block is locked */
1942static int ext4_check_descriptors(struct super_block *sb) 1977static int ext4_check_descriptors(struct super_block *sb,
1978 ext4_group_t *first_not_zeroed)
1943{ 1979{
1944 struct ext4_sb_info *sbi = EXT4_SB(sb); 1980 struct ext4_sb_info *sbi = EXT4_SB(sb);
1945 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1981 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -1948,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1948 ext4_fsblk_t inode_bitmap; 1984 ext4_fsblk_t inode_bitmap;
1949 ext4_fsblk_t inode_table; 1985 ext4_fsblk_t inode_table;
1950 int flexbg_flag = 0; 1986 int flexbg_flag = 0;
1951 ext4_group_t i; 1987 ext4_group_t i, grp = sbi->s_groups_count;
1952 1988
1953 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1989 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1954 flexbg_flag = 1; 1990 flexbg_flag = 1;
@@ -1964,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb)
1964 last_block = first_block + 2000 last_block = first_block +
1965 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2001 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1966 2002
2003 if ((grp == sbi->s_groups_count) &&
2004 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2005 grp = i;
2006
1967 block_bitmap = ext4_block_bitmap(sb, gdp); 2007 block_bitmap = ext4_block_bitmap(sb, gdp);
1968 if (block_bitmap < first_block || block_bitmap > last_block) { 2008 if (block_bitmap < first_block || block_bitmap > last_block) {
1969 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2009 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2001,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb)
2001 if (!flexbg_flag) 2041 if (!flexbg_flag)
2002 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2042 first_block += EXT4_BLOCKS_PER_GROUP(sb);
2003 } 2043 }
2044 if (NULL != first_not_zeroed)
2045 *first_not_zeroed = grp;
2004 2046
2005 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2047 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
2006 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2048 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@@ -2373,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \
2373#define EXT4_ATTR(name, mode, show, store) \ 2415#define EXT4_ATTR(name, mode, show, store) \
2374static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2416static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2375 2417
2418#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2376#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2419#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2377#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2420#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2378#define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2421#define EXT4_RW_ATTR_SBI_UI(name, elname) \
@@ -2409,6 +2452,16 @@ static struct attribute *ext4_attrs[] = {
2409 NULL, 2452 NULL,
2410}; 2453};
2411 2454
2455/* Features this copy of ext4 supports */
2456EXT4_INFO_ATTR(lazy_itable_init);
2457EXT4_INFO_ATTR(batched_discard);
2458
2459static struct attribute *ext4_feat_attrs[] = {
2460 ATTR_LIST(lazy_itable_init),
2461 ATTR_LIST(batched_discard),
2462 NULL,
2463};
2464
2412static ssize_t ext4_attr_show(struct kobject *kobj, 2465static ssize_t ext4_attr_show(struct kobject *kobj,
2413 struct attribute *attr, char *buf) 2466 struct attribute *attr, char *buf)
2414{ 2467{
@@ -2437,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj)
2437 complete(&sbi->s_kobj_unregister); 2490 complete(&sbi->s_kobj_unregister);
2438} 2491}
2439 2492
2440
2441static const struct sysfs_ops ext4_attr_ops = { 2493static const struct sysfs_ops ext4_attr_ops = {
2442 .show = ext4_attr_show, 2494 .show = ext4_attr_show,
2443 .store = ext4_attr_store, 2495 .store = ext4_attr_store,
@@ -2449,6 +2501,17 @@ static struct kobj_type ext4_ktype = {
2449 .release = ext4_sb_release, 2501 .release = ext4_sb_release,
2450}; 2502};
2451 2503
2504static void ext4_feat_release(struct kobject *kobj)
2505{
2506 complete(&ext4_feat->f_kobj_unregister);
2507}
2508
2509static struct kobj_type ext4_feat_ktype = {
2510 .default_attrs = ext4_feat_attrs,
2511 .sysfs_ops = &ext4_attr_ops,
2512 .release = ext4_feat_release,
2513};
2514
2452/* 2515/*
2453 * Check whether this filesystem can be mounted based on 2516 * Check whether this filesystem can be mounted based on
2454 * the features present and the RDONLY/RDWR mount requested. 2517 * the features present and the RDONLY/RDWR mount requested.
@@ -2539,6 +2602,372 @@ static void print_daily_error_info(unsigned long arg)
2539 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2602 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2540} 2603}
2541 2604
2605static void ext4_lazyinode_timeout(unsigned long data)
2606{
2607 struct task_struct *p = (struct task_struct *)data;
2608 wake_up_process(p);
2609}
2610
2611/* Find next suitable group and run ext4_init_inode_table */
2612static int ext4_run_li_request(struct ext4_li_request *elr)
2613{
2614 struct ext4_group_desc *gdp = NULL;
2615 ext4_group_t group, ngroups;
2616 struct super_block *sb;
2617 unsigned long timeout = 0;
2618 int ret = 0;
2619
2620 sb = elr->lr_super;
2621 ngroups = EXT4_SB(sb)->s_groups_count;
2622
2623 for (group = elr->lr_next_group; group < ngroups; group++) {
2624 gdp = ext4_get_group_desc(sb, group, NULL);
2625 if (!gdp) {
2626 ret = 1;
2627 break;
2628 }
2629
2630 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2631 break;
2632 }
2633
2634 if (group == ngroups)
2635 ret = 1;
2636
2637 if (!ret) {
2638 timeout = jiffies;
2639 ret = ext4_init_inode_table(sb, group,
2640 elr->lr_timeout ? 0 : 1);
2641 if (elr->lr_timeout == 0) {
2642 timeout = jiffies - timeout;
2643 if (elr->lr_sbi->s_li_wait_mult)
2644 timeout *= elr->lr_sbi->s_li_wait_mult;
2645 else
2646 timeout *= 20;
2647 elr->lr_timeout = timeout;
2648 }
2649 elr->lr_next_sched = jiffies + elr->lr_timeout;
2650 elr->lr_next_group = group + 1;
2651 }
2652
2653 return ret;
2654}
2655
2656/*
2657 * Remove lr_request from the list_request and free the
2658 * request tructure. Should be called with li_list_mtx held
2659 */
2660static void ext4_remove_li_request(struct ext4_li_request *elr)
2661{
2662 struct ext4_sb_info *sbi;
2663
2664 if (!elr)
2665 return;
2666
2667 sbi = elr->lr_sbi;
2668
2669 list_del(&elr->lr_request);
2670 sbi->s_li_request = NULL;
2671 kfree(elr);
2672}
2673
2674static void ext4_unregister_li_request(struct super_block *sb)
2675{
2676 struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
2677
2678 if (!ext4_li_info)
2679 return;
2680
2681 mutex_lock(&ext4_li_info->li_list_mtx);
2682 ext4_remove_li_request(elr);
2683 mutex_unlock(&ext4_li_info->li_list_mtx);
2684}
2685
2686/*
2687 * This is the function where ext4lazyinit thread lives. It walks
2688 * through the request list searching for next scheduled filesystem.
2689 * When such a fs is found, run the lazy initialization request
2690 * (ext4_rn_li_request) and keep track of the time spend in this
2691 * function. Based on that time we compute next schedule time of
2692 * the request. When walking through the list is complete, compute
2693 * next waking time and put itself into sleep.
2694 */
2695static int ext4_lazyinit_thread(void *arg)
2696{
2697 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2698 struct list_head *pos, *n;
2699 struct ext4_li_request *elr;
2700 unsigned long next_wakeup;
2701 DEFINE_WAIT(wait);
2702 int ret;
2703
2704 BUG_ON(NULL == eli);
2705
2706 eli->li_timer.data = (unsigned long)current;
2707 eli->li_timer.function = ext4_lazyinode_timeout;
2708
2709 eli->li_task = current;
2710 wake_up(&eli->li_wait_task);
2711
2712cont_thread:
2713 while (true) {
2714 next_wakeup = MAX_JIFFY_OFFSET;
2715
2716 mutex_lock(&eli->li_list_mtx);
2717 if (list_empty(&eli->li_request_list)) {
2718 mutex_unlock(&eli->li_list_mtx);
2719 goto exit_thread;
2720 }
2721
2722 list_for_each_safe(pos, n, &eli->li_request_list) {
2723 elr = list_entry(pos, struct ext4_li_request,
2724 lr_request);
2725
2726 if (time_after_eq(jiffies, elr->lr_next_sched))
2727 ret = ext4_run_li_request(elr);
2728
2729 if (ret) {
2730 ret = 0;
2731 ext4_remove_li_request(elr);
2732 continue;
2733 }
2734
2735 if (time_before(elr->lr_next_sched, next_wakeup))
2736 next_wakeup = elr->lr_next_sched;
2737 }
2738 mutex_unlock(&eli->li_list_mtx);
2739
2740 if (freezing(current))
2741 refrigerator();
2742
2743 if (time_after_eq(jiffies, next_wakeup)) {
2744 cond_resched();
2745 continue;
2746 }
2747
2748 eli->li_timer.expires = next_wakeup;
2749 add_timer(&eli->li_timer);
2750 prepare_to_wait(&eli->li_wait_daemon, &wait,
2751 TASK_INTERRUPTIBLE);
2752 if (time_before(jiffies, next_wakeup))
2753 schedule();
2754 finish_wait(&eli->li_wait_daemon, &wait);
2755 }
2756
2757exit_thread:
2758 /*
2759 * It looks like the request list is empty, but we need
2760 * to check it under the li_list_mtx lock, to prevent any
2761 * additions into it, and of course we should lock ext4_li_mtx
2762 * to atomically free the list and ext4_li_info, because at
2763 * this point another ext4 filesystem could be registering
2764 * new one.
2765 */
2766 mutex_lock(&ext4_li_mtx);
2767 mutex_lock(&eli->li_list_mtx);
2768 if (!list_empty(&eli->li_request_list)) {
2769 mutex_unlock(&eli->li_list_mtx);
2770 mutex_unlock(&ext4_li_mtx);
2771 goto cont_thread;
2772 }
2773 mutex_unlock(&eli->li_list_mtx);
2774 del_timer_sync(&ext4_li_info->li_timer);
2775 eli->li_task = NULL;
2776 wake_up(&eli->li_wait_task);
2777
2778 kfree(ext4_li_info);
2779 ext4_li_info = NULL;
2780 mutex_unlock(&ext4_li_mtx);
2781
2782 return 0;
2783}
2784
2785static void ext4_clear_request_list(void)
2786{
2787 struct list_head *pos, *n;
2788 struct ext4_li_request *elr;
2789
2790 mutex_lock(&ext4_li_info->li_list_mtx);
2791 if (list_empty(&ext4_li_info->li_request_list))
2792 return;
2793
2794 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2795 elr = list_entry(pos, struct ext4_li_request,
2796 lr_request);
2797 ext4_remove_li_request(elr);
2798 }
2799 mutex_unlock(&ext4_li_info->li_list_mtx);
2800}
2801
2802static int ext4_run_lazyinit_thread(void)
2803{
2804 struct task_struct *t;
2805
2806 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
2807 if (IS_ERR(t)) {
2808 int err = PTR_ERR(t);
2809 ext4_clear_request_list();
2810 del_timer_sync(&ext4_li_info->li_timer);
2811 kfree(ext4_li_info);
2812 ext4_li_info = NULL;
2813 printk(KERN_CRIT "EXT4: error %d creating inode table "
2814 "initialization thread\n",
2815 err);
2816 return err;
2817 }
2818 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2819
2820 wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
2821 return 0;
2822}
2823
2824/*
2825 * Check whether it make sense to run itable init. thread or not.
2826 * If there is at least one uninitialized inode table, return
2827 * corresponding group number, else the loop goes through all
2828 * groups and return total number of groups.
2829 */
2830static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
2831{
2832 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
2833 struct ext4_group_desc *gdp = NULL;
2834
2835 for (group = 0; group < ngroups; group++) {
2836 gdp = ext4_get_group_desc(sb, group, NULL);
2837 if (!gdp)
2838 continue;
2839
2840 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2841 break;
2842 }
2843
2844 return group;
2845}
2846
2847static int ext4_li_info_new(void)
2848{
2849 struct ext4_lazy_init *eli = NULL;
2850
2851 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
2852 if (!eli)
2853 return -ENOMEM;
2854
2855 eli->li_task = NULL;
2856 INIT_LIST_HEAD(&eli->li_request_list);
2857 mutex_init(&eli->li_list_mtx);
2858
2859 init_waitqueue_head(&eli->li_wait_daemon);
2860 init_waitqueue_head(&eli->li_wait_task);
2861 init_timer(&eli->li_timer);
2862 eli->li_state |= EXT4_LAZYINIT_QUIT;
2863
2864 ext4_li_info = eli;
2865
2866 return 0;
2867}
2868
2869static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
2870 ext4_group_t start)
2871{
2872 struct ext4_sb_info *sbi = EXT4_SB(sb);
2873 struct ext4_li_request *elr;
2874 unsigned long rnd;
2875
2876 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
2877 if (!elr)
2878 return NULL;
2879
2880 elr->lr_super = sb;
2881 elr->lr_sbi = sbi;
2882 elr->lr_next_group = start;
2883
2884 /*
2885 * Randomize first schedule time of the request to
2886 * spread the inode table initialization requests
2887 * better.
2888 */
2889 get_random_bytes(&rnd, sizeof(rnd));
2890 elr->lr_next_sched = jiffies + (unsigned long)rnd %
2891 (EXT4_DEF_LI_MAX_START_DELAY * HZ);
2892
2893 return elr;
2894}
2895
2896static int ext4_register_li_request(struct super_block *sb,
2897 ext4_group_t first_not_zeroed)
2898{
2899 struct ext4_sb_info *sbi = EXT4_SB(sb);
2900 struct ext4_li_request *elr;
2901 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2902 int ret;
2903
2904 if (sbi->s_li_request != NULL)
2905 return 0;
2906
2907 if (first_not_zeroed == ngroups ||
2908 (sb->s_flags & MS_RDONLY) ||
2909 !test_opt(sb, INIT_INODE_TABLE)) {
2910 sbi->s_li_request = NULL;
2911 return 0;
2912 }
2913
2914 if (first_not_zeroed == ngroups) {
2915 sbi->s_li_request = NULL;
2916 return 0;
2917 }
2918
2919 elr = ext4_li_request_new(sb, first_not_zeroed);
2920 if (!elr)
2921 return -ENOMEM;
2922
2923 mutex_lock(&ext4_li_mtx);
2924
2925 if (NULL == ext4_li_info) {
2926 ret = ext4_li_info_new();
2927 if (ret)
2928 goto out;
2929 }
2930
2931 mutex_lock(&ext4_li_info->li_list_mtx);
2932 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
2933 mutex_unlock(&ext4_li_info->li_list_mtx);
2934
2935 sbi->s_li_request = elr;
2936
2937 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2938 ret = ext4_run_lazyinit_thread();
2939 if (ret)
2940 goto out;
2941 }
2942out:
2943 mutex_unlock(&ext4_li_mtx);
2944 if (ret)
2945 kfree(elr);
2946 return ret;
2947}
2948
2949/*
2950 * We do not need to lock anything since this is called on
2951 * module unload.
2952 */
2953static void ext4_destroy_lazyinit_thread(void)
2954{
2955 /*
2956 * If thread exited earlier
2957 * there's nothing to be done.
2958 */
2959 if (!ext4_li_info)
2960 return;
2961
2962 ext4_clear_request_list();
2963
2964 while (ext4_li_info->li_task) {
2965 wake_up(&ext4_li_info->li_wait_daemon);
2966 wait_event(ext4_li_info->li_wait_task,
2967 ext4_li_info->li_task == NULL);
2968 }
2969}
2970
2542static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2971static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2543 __releases(kernel_lock) 2972 __releases(kernel_lock)
2544 __acquires(kernel_lock) 2973 __acquires(kernel_lock)
@@ -2564,6 +2993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2564 __u64 blocks_count; 2993 __u64 blocks_count;
2565 int err; 2994 int err;
2566 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2995 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2996 ext4_group_t first_not_zeroed;
2567 2997
2568 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2998 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2569 if (!sbi) 2999 if (!sbi)
@@ -2624,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2624 3054
2625 /* Set defaults before we parse the mount options */ 3055 /* Set defaults before we parse the mount options */
2626 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3056 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3057 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
2627 if (def_mount_opts & EXT4_DEFM_DEBUG) 3058 if (def_mount_opts & EXT4_DEFM_DEBUG)
2628 set_opt(sbi->s_mount_opt, DEBUG); 3059 set_opt(sbi->s_mount_opt, DEBUG);
2629 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3060 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@@ -2901,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2901 goto failed_mount2; 3332 goto failed_mount2;
2902 } 3333 }
2903 } 3334 }
2904 if (!ext4_check_descriptors(sb)) { 3335 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
2905 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3336 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2906 goto failed_mount2; 3337 goto failed_mount2;
2907 } 3338 }
@@ -3122,6 +3553,10 @@ no_journal:
3122 goto failed_mount4; 3553 goto failed_mount4;
3123 } 3554 }
3124 3555
3556 err = ext4_register_li_request(sb, first_not_zeroed);
3557 if (err)
3558 goto failed_mount4;
3559
3125 sbi->s_kobj.kset = ext4_kset; 3560 sbi->s_kobj.kset = ext4_kset;
3126 init_completion(&sbi->s_kobj_unregister); 3561 init_completion(&sbi->s_kobj_unregister);
3127 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3562 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@@ -3461,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb,
3461 EXT4_SB(sb)->s_journal = journal; 3896 EXT4_SB(sb)->s_journal = journal;
3462 ext4_clear_journal_err(sb, es); 3897 ext4_clear_journal_err(sb, es);
3463 3898
3464 if (journal_devnum && 3899 if (!really_read_only && journal_devnum &&
3465 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3900 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3466 es->s_journal_dev = cpu_to_le32(journal_devnum); 3901 es->s_journal_dev = cpu_to_le32(journal_devnum);
3467 3902
@@ -3514,9 +3949,12 @@ static int ext4_commit_super(struct super_block *sb, int sync)
3514 else 3949 else
3515 es->s_kbytes_written = 3950 es->s_kbytes_written =
3516 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 3951 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
3517 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3952 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
3953 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3518 &EXT4_SB(sb)->s_freeblocks_counter)); 3954 &EXT4_SB(sb)->s_freeblocks_counter));
3519 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3955 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
3956 es->s_free_inodes_count =
3957 cpu_to_le32(percpu_counter_sum_positive(
3520 &EXT4_SB(sb)->s_freeinodes_counter)); 3958 &EXT4_SB(sb)->s_freeinodes_counter));
3521 sb->s_dirt = 0; 3959 sb->s_dirt = 0;
3522 BUFFER_TRACE(sbh, "marking dirty"); 3960 BUFFER_TRACE(sbh, "marking dirty");
@@ -3835,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3835 enable_quota = 1; 4273 enable_quota = 1;
3836 } 4274 }
3837 } 4275 }
4276
4277 /*
4278 * Reinitialize lazy itable initialization thread based on
4279 * current settings
4280 */
4281 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4282 ext4_unregister_li_request(sb);
4283 else {
4284 ext4_group_t first_not_zeroed;
4285 first_not_zeroed = ext4_has_uninit_itable(sb);
4286 ext4_register_li_request(sb, first_not_zeroed);
4287 }
4288
3838 ext4_setup_system_zone(sb); 4289 ext4_setup_system_zone(sb);
3839 if (sbi->s_journal == NULL) 4290 if (sbi->s_journal == NULL)
3840 ext4_commit_super(sb, 1); 4291 ext4_commit_super(sb, 1);
@@ -4216,17 +4667,17 @@ out:
4216 4667
4217#endif 4668#endif
4218 4669
4219static int ext4_get_sb(struct file_system_type *fs_type, int flags, 4670static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
4220 const char *dev_name, void *data, struct vfsmount *mnt) 4671 const char *dev_name, void *data)
4221{ 4672{
4222 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4673 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
4223} 4674}
4224 4675
4225#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4676#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4226static struct file_system_type ext2_fs_type = { 4677static struct file_system_type ext2_fs_type = {
4227 .owner = THIS_MODULE, 4678 .owner = THIS_MODULE,
4228 .name = "ext2", 4679 .name = "ext2",
4229 .get_sb = ext4_get_sb, 4680 .mount = ext4_mount,
4230 .kill_sb = kill_block_super, 4681 .kill_sb = kill_block_super,
4231 .fs_flags = FS_REQUIRES_DEV, 4682 .fs_flags = FS_REQUIRES_DEV,
4232}; 4683};
@@ -4271,28 +4722,58 @@ static inline void unregister_as_ext3(void) { }
4271static struct file_system_type ext4_fs_type = { 4722static struct file_system_type ext4_fs_type = {
4272 .owner = THIS_MODULE, 4723 .owner = THIS_MODULE,
4273 .name = "ext4", 4724 .name = "ext4",
4274 .get_sb = ext4_get_sb, 4725 .mount = ext4_mount,
4275 .kill_sb = kill_block_super, 4726 .kill_sb = kill_block_super,
4276 .fs_flags = FS_REQUIRES_DEV, 4727 .fs_flags = FS_REQUIRES_DEV,
4277}; 4728};
4278 4729
4279static int __init init_ext4_fs(void) 4730int __init ext4_init_feat_adverts(void)
4731{
4732 struct ext4_features *ef;
4733 int ret = -ENOMEM;
4734
4735 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
4736 if (!ef)
4737 goto out;
4738
4739 ef->f_kobj.kset = ext4_kset;
4740 init_completion(&ef->f_kobj_unregister);
4741 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
4742 "features");
4743 if (ret) {
4744 kfree(ef);
4745 goto out;
4746 }
4747
4748 ext4_feat = ef;
4749 ret = 0;
4750out:
4751 return ret;
4752}
4753
4754static int __init ext4_init_fs(void)
4280{ 4755{
4281 int err; 4756 int err;
4282 4757
4283 ext4_check_flag_values(); 4758 ext4_check_flag_values();
4284 err = init_ext4_system_zone(); 4759 err = ext4_init_pageio();
4285 if (err) 4760 if (err)
4286 return err; 4761 return err;
4762 err = ext4_init_system_zone();
4763 if (err)
4764 goto out5;
4287 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4765 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4288 if (!ext4_kset) 4766 if (!ext4_kset)
4289 goto out4; 4767 goto out4;
4290 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4768 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4291 err = init_ext4_mballoc(); 4769
4770 err = ext4_init_feat_adverts();
4771
4772 err = ext4_init_mballoc();
4292 if (err) 4773 if (err)
4293 goto out3; 4774 goto out3;
4294 4775
4295 err = init_ext4_xattr(); 4776 err = ext4_init_xattr();
4296 if (err) 4777 if (err)
4297 goto out2; 4778 goto out2;
4298 err = init_inodecache(); 4779 err = init_inodecache();
@@ -4303,38 +4784,46 @@ static int __init init_ext4_fs(void)
4303 err = register_filesystem(&ext4_fs_type); 4784 err = register_filesystem(&ext4_fs_type);
4304 if (err) 4785 if (err)
4305 goto out; 4786 goto out;
4787
4788 ext4_li_info = NULL;
4789 mutex_init(&ext4_li_mtx);
4306 return 0; 4790 return 0;
4307out: 4791out:
4308 unregister_as_ext2(); 4792 unregister_as_ext2();
4309 unregister_as_ext3(); 4793 unregister_as_ext3();
4310 destroy_inodecache(); 4794 destroy_inodecache();
4311out1: 4795out1:
4312 exit_ext4_xattr(); 4796 ext4_exit_xattr();
4313out2: 4797out2:
4314 exit_ext4_mballoc(); 4798 ext4_exit_mballoc();
4315out3: 4799out3:
4800 kfree(ext4_feat);
4316 remove_proc_entry("fs/ext4", NULL); 4801 remove_proc_entry("fs/ext4", NULL);
4317 kset_unregister(ext4_kset); 4802 kset_unregister(ext4_kset);
4318out4: 4803out4:
4319 exit_ext4_system_zone(); 4804 ext4_exit_system_zone();
4805out5:
4806 ext4_exit_pageio();
4320 return err; 4807 return err;
4321} 4808}
4322 4809
4323static void __exit exit_ext4_fs(void) 4810static void __exit ext4_exit_fs(void)
4324{ 4811{
4812 ext4_destroy_lazyinit_thread();
4325 unregister_as_ext2(); 4813 unregister_as_ext2();
4326 unregister_as_ext3(); 4814 unregister_as_ext3();
4327 unregister_filesystem(&ext4_fs_type); 4815 unregister_filesystem(&ext4_fs_type);
4328 destroy_inodecache(); 4816 destroy_inodecache();
4329 exit_ext4_xattr(); 4817 ext4_exit_xattr();
4330 exit_ext4_mballoc(); 4818 ext4_exit_mballoc();
4331 remove_proc_entry("fs/ext4", NULL); 4819 remove_proc_entry("fs/ext4", NULL);
4332 kset_unregister(ext4_kset); 4820 kset_unregister(ext4_kset);
4333 exit_ext4_system_zone(); 4821 ext4_exit_system_zone();
4822 ext4_exit_pageio();
4334} 4823}
4335 4824
4336MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4825MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
4337MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4826MODULE_DESCRIPTION("Fourth Extended Filesystem");
4338MODULE_LICENSE("GPL"); 4827MODULE_LICENSE("GPL");
4339module_init(init_ext4_fs) 4828module_init(ext4_init_fs)
4340module_exit(exit_ext4_fs) 4829module_exit(ext4_exit_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a8cd8dff1ad..fa4b899da4b3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1588#undef BLOCK_HASH_SHIFT 1588#undef BLOCK_HASH_SHIFT
1589 1589
1590int __init 1590int __init
1591init_ext4_xattr(void) 1591ext4_init_xattr(void)
1592{ 1592{
1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); 1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
1594 if (!ext4_xattr_cache) 1594 if (!ext4_xattr_cache)
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
1597} 1597}
1598 1598
1599void 1599void
1600exit_ext4_xattr(void) 1600ext4_exit_xattr(void)
1601{ 1601{
1602 if (ext4_xattr_cache) 1602 if (ext4_xattr_cache)
1603 mb_cache_destroy(ext4_xattr_cache); 1603 mb_cache_destroy(ext4_xattr_cache);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e43905..1ef16520b950 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
83extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 83extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
84 struct ext4_inode *raw_inode, handle_t *handle); 84 struct ext4_inode *raw_inode, handle_t *handle);
85 85
86extern int init_ext4_xattr(void); 86extern int __init ext4_init_xattr(void);
87extern void exit_ext4_xattr(void); 87extern void ext4_exit_xattr(void);
88 88
89extern const struct xattr_handler *ext4_xattr_handlers[]; 89extern const struct xattr_handler *ext4_xattr_handlers[];
90 90
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
121{ 121{
122} 122}
123 123
124static inline int 124static __init inline int
125init_ext4_xattr(void) 125ext4_init_xattr(void)
126{ 126{
127 return 0; 127 return 0;
128} 128}
129 129
130static inline void 130static inline void
131exit_ext4_xattr(void) 131ext4_exit_xattr(void)
132{ 132{
133} 133}
134 134
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae7..3345aabd1dd7 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
675 return 0; 675 return 0;
676} 676}
677 677
678static int msdos_get_sb(struct file_system_type *fs_type, 678static struct dentry *msdos_mount(struct file_system_type *fs_type,
679 int flags, const char *dev_name, 679 int flags, const char *dev_name,
680 void *data, struct vfsmount *mnt) 680 void *data)
681{ 681{
682 return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, 682 return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
683 mnt);
684} 683}
685 684
686static struct file_system_type msdos_fs_type = { 685static struct file_system_type msdos_fs_type = {
687 .owner = THIS_MODULE, 686 .owner = THIS_MODULE,
688 .name = "msdos", 687 .name = "msdos",
689 .get_sb = msdos_get_sb, 688 .mount = msdos_mount,
690 .kill_sb = kill_block_super, 689 .kill_sb = kill_block_super,
691 .fs_flags = FS_REQUIRES_DEV, 690 .fs_flags = FS_REQUIRES_DEV,
692}; 691};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a0152..b936703b8924 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1071 return 0; 1071 return 0;
1072} 1072}
1073 1073
1074static int vfat_get_sb(struct file_system_type *fs_type, 1074static struct dentry *vfat_mount(struct file_system_type *fs_type,
1075 int flags, const char *dev_name, 1075 int flags, const char *dev_name,
1076 void *data, struct vfsmount *mnt) 1076 void *data)
1077{ 1077{
1078 return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, 1078 return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
1079 mnt);
1080} 1079}
1081 1080
1082static struct file_system_type vfat_fs_type = { 1081static struct file_system_type vfat_fs_type = {
1083 .owner = THIS_MODULE, 1082 .owner = THIS_MODULE,
1084 .name = "vfat", 1083 .name = "vfat",
1085 .get_sb = vfat_get_sb, 1084 .mount = vfat_mount,
1086 .kill_sb = kill_block_super, 1085 .kill_sb = kill_block_super,
1087 .fs_flags = FS_REQUIRES_DEV, 1086 .fs_flags = FS_REQUIRES_DEV,
1088}; 1087};
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f8cc34f542c3..ecc8b3954ed6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head)
640 * match the state "is the filp on a fasync list". 640 * match the state "is the filp on a fasync list".
641 * 641 *
642 */ 642 */
643static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 643int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
644{ 644{
645 struct fasync_struct *fa, **fp; 645 struct fasync_struct *fa, **fp;
646 int result = 0; 646 int result = 0;
@@ -666,21 +666,31 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
666 return result; 666 return result;
667} 667}
668 668
669struct fasync_struct *fasync_alloc(void)
670{
671 return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
672}
673
669/* 674/*
670 * Add a fasync entry. Return negative on error, positive if 675 * NOTE! This can be used only for unused fasync entries:
671 * added, and zero if did nothing but change an existing one. 676 * entries that actually got inserted on the fasync list
677 * need to be released by rcu - see fasync_remove_entry.
678 */
679void fasync_free(struct fasync_struct *new)
680{
681 kmem_cache_free(fasync_cache, new);
682}
683
684/*
685 * Insert a new entry into the fasync list. Return the pointer to the
686 * old one if we didn't use the new one.
672 * 687 *
673 * NOTE! It is very important that the FASYNC flag always 688 * NOTE! It is very important that the FASYNC flag always
674 * match the state "is the filp on a fasync list". 689 * match the state "is the filp on a fasync list".
675 */ 690 */
676static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 691struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
677{ 692{
678 struct fasync_struct *new, *fa, **fp; 693 struct fasync_struct *fa, **fp;
679 int result = 0;
680
681 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
682 if (!new)
683 return -ENOMEM;
684 694
685 spin_lock(&filp->f_lock); 695 spin_lock(&filp->f_lock);
686 spin_lock(&fasync_lock); 696 spin_lock(&fasync_lock);
@@ -691,8 +701,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
691 spin_lock_irq(&fa->fa_lock); 701 spin_lock_irq(&fa->fa_lock);
692 fa->fa_fd = fd; 702 fa->fa_fd = fd;
693 spin_unlock_irq(&fa->fa_lock); 703 spin_unlock_irq(&fa->fa_lock);
694
695 kmem_cache_free(fasync_cache, new);
696 goto out; 704 goto out;
697 } 705 }
698 706
@@ -702,13 +710,39 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
702 new->fa_fd = fd; 710 new->fa_fd = fd;
703 new->fa_next = *fapp; 711 new->fa_next = *fapp;
704 rcu_assign_pointer(*fapp, new); 712 rcu_assign_pointer(*fapp, new);
705 result = 1;
706 filp->f_flags |= FASYNC; 713 filp->f_flags |= FASYNC;
707 714
708out: 715out:
709 spin_unlock(&fasync_lock); 716 spin_unlock(&fasync_lock);
710 spin_unlock(&filp->f_lock); 717 spin_unlock(&filp->f_lock);
711 return result; 718 return fa;
719}
720
721/*
722 * Add a fasync entry. Return negative on error, positive if
723 * added, and zero if did nothing but change an existing one.
724 */
725static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
726{
727 struct fasync_struct *new;
728
729 new = fasync_alloc();
730 if (!new)
731 return -ENOMEM;
732
733 /*
734 * fasync_insert_entry() returns the old (update) entry if
735 * it existed.
736 *
737 * So free the (unused) new entry and return 0 to let the
738 * caller know that we didn't add any new fasync entries.
739 */
740 if (fasync_insert_entry(fd, filp, fapp, new)) {
741 fasync_free(new);
742 return 0;
743 }
744
745 return 1;
712} 746}
713 747
714/* 748/*
diff --git a/fs/file_table.c b/fs/file_table.c
index a04bdd81c11c..c3dee381f1b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -60,7 +60,7 @@ static inline void file_free(struct file *f)
60/* 60/*
61 * Return the total number of open files in the system 61 * Return the total number of open files in the system
62 */ 62 */
63static int get_nr_files(void) 63static long get_nr_files(void)
64{ 64{
65 return percpu_counter_read_positive(&nr_files); 65 return percpu_counter_read_positive(&nr_files);
66} 66}
@@ -68,7 +68,7 @@ static int get_nr_files(void)
68/* 68/*
69 * Return the maximum number of open files in the system 69 * Return the maximum number of open files in the system
70 */ 70 */
71int get_max_files(void) 71unsigned long get_max_files(void)
72{ 72{
73 return files_stat.max_files; 73 return files_stat.max_files;
74} 74}
@@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
82 void __user *buffer, size_t *lenp, loff_t *ppos) 82 void __user *buffer, size_t *lenp, loff_t *ppos)
83{ 83{
84 files_stat.nr_files = get_nr_files(); 84 files_stat.nr_files = get_nr_files();
85 return proc_dointvec(table, write, buffer, lenp, ppos); 85 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
86} 86}
87#else 87#else
88int proc_nr_files(ctl_table *table, int write, 88int proc_nr_files(ctl_table *table, int write,
@@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
105struct file *get_empty_filp(void) 105struct file *get_empty_filp(void)
106{ 106{
107 const struct cred *cred = current_cred(); 107 const struct cred *cred = current_cred();
108 static int old_max; 108 static long old_max;
109 struct file * f; 109 struct file * f;
110 110
111 /* 111 /*
@@ -140,8 +140,7 @@ struct file *get_empty_filp(void)
140over: 140over:
141 /* Ran out of filps - report that */ 141 /* Ran out of filps - report that */
142 if (get_nr_files() > old_max) { 142 if (get_nr_files() > old_max) {
143 printk(KERN_INFO "VFS: file-max limit %d reached\n", 143 pr_info("VFS: file-max limit %lu reached\n", get_max_files());
144 get_max_files());
145 old_max = get_nr_files(); 144 old_max = get_nr_files();
146 } 145 }
147 goto fail; 146 goto fail;
@@ -487,7 +486,7 @@ retry:
487 486
488void __init files_init(unsigned long mempages) 487void __init files_init(unsigned long mempages)
489{ 488{
490 int n; 489 unsigned long n;
491 490
492 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 491 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
493 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 492 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages)
498 */ 497 */
499 498
500 n = (mempages * (PAGE_SIZE / 1024)) / 10; 499 n = (mempages * (PAGE_SIZE / 1024)) / 10;
501 files_stat.max_files = n; 500 files_stat.max_files = max_t(unsigned long, n, NR_FILE);
502 if (files_stat.max_files < NR_FILE)
503 files_stat.max_files = NR_FILE;
504 files_defer_init(); 501 files_defer_init();
505 lg_lock_init(files_lglock); 502 lg_lock_init(files_lglock);
506 percpu_counter_init(&nr_files, 0); 503 percpu_counter_init(&nr_files, 0);
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 79d1b4ea13e7..8c04eac5079d 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
260 struct inode *ip = NULL; 260 struct inode *ip = NULL;
261 261
262 if ((ip = new_inode(sbp))) { 262 if ((ip = new_inode(sbp))) {
263 ip->i_ino = get_next_ino();
263 vxfs_iinit(ip, vip); 264 vxfs_iinit(ip, vip);
264 ip->i_mapping->a_ops = &vxfs_aops; 265 ip->i_mapping->a_ops = &vxfs_aops;
265 } 266 }
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b8784..9d1c99558389 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
246/* 246/*
247 * The usual module blurb. 247 * The usual module blurb.
248 */ 248 */
249static int vxfs_get_sb(struct file_system_type *fs_type, 249static struct dentry *vxfs_mount(struct file_system_type *fs_type,
250 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 250 int flags, const char *dev_name, void *data)
251{ 251{
252 return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super, 252 return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
253 mnt);
254} 253}
255 254
256static struct file_system_type vxfs_fs_type = { 255static struct file_system_type vxfs_fs_type = {
257 .owner = THIS_MODULE, 256 .owner = THIS_MODULE,
258 .name = "vxfs", 257 .name = "vxfs",
259 .get_sb = vxfs_get_sb, 258 .mount = vxfs_mount,
260 .kill_sb = kill_block_super, 259 .kill_sb = kill_block_super,
261 .fs_flags = FS_REQUIRES_DEV, 260 .fs_flags = FS_REQUIRES_DEV,
262}; 261};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ab38fef1c9a1..aed881a76b22 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
79 return sb->s_bdi; 79 return sb->s_bdi;
80} 80}
81 81
82static inline struct inode *wb_inode(struct list_head *head)
83{
84 return list_entry(head, struct inode, i_wb_list);
85}
86
82static void bdi_queue_work(struct backing_dev_info *bdi, 87static void bdi_queue_work(struct backing_dev_info *bdi,
83 struct wb_writeback_work *work) 88 struct wb_writeback_work *work)
84{ 89{
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
172 if (!list_empty(&wb->b_dirty)) { 177 if (!list_empty(&wb->b_dirty)) {
173 struct inode *tail; 178 struct inode *tail;
174 179
175 tail = list_entry(wb->b_dirty.next, struct inode, i_list); 180 tail = wb_inode(wb->b_dirty.next);
176 if (time_before(inode->dirtied_when, tail->dirtied_when)) 181 if (time_before(inode->dirtied_when, tail->dirtied_when))
177 inode->dirtied_when = jiffies; 182 inode->dirtied_when = jiffies;
178 } 183 }
179 list_move(&inode->i_list, &wb->b_dirty); 184 list_move(&inode->i_wb_list, &wb->b_dirty);
180} 185}
181 186
182/* 187/*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
186{ 191{
187 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 192 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
188 193
189 list_move(&inode->i_list, &wb->b_more_io); 194 list_move(&inode->i_wb_list, &wb->b_more_io);
190} 195}
191 196
192static void inode_sync_complete(struct inode *inode) 197static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
227 int do_sb_sort = 0; 232 int do_sb_sort = 0;
228 233
229 while (!list_empty(delaying_queue)) { 234 while (!list_empty(delaying_queue)) {
230 inode = list_entry(delaying_queue->prev, struct inode, i_list); 235 inode = wb_inode(delaying_queue->prev);
231 if (older_than_this && 236 if (older_than_this &&
232 inode_dirtied_after(inode, *older_than_this)) 237 inode_dirtied_after(inode, *older_than_this))
233 break; 238 break;
234 if (sb && sb != inode->i_sb) 239 if (sb && sb != inode->i_sb)
235 do_sb_sort = 1; 240 do_sb_sort = 1;
236 sb = inode->i_sb; 241 sb = inode->i_sb;
237 list_move(&inode->i_list, &tmp); 242 list_move(&inode->i_wb_list, &tmp);
238 } 243 }
239 244
240 /* just one sb in list, splice to dispatch_queue and we're done */ 245 /* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
245 250
246 /* Move inodes from one superblock together */ 251 /* Move inodes from one superblock together */
247 while (!list_empty(&tmp)) { 252 while (!list_empty(&tmp)) {
248 inode = list_entry(tmp.prev, struct inode, i_list); 253 sb = wb_inode(tmp.prev)->i_sb;
249 sb = inode->i_sb;
250 list_for_each_prev_safe(pos, node, &tmp) { 254 list_for_each_prev_safe(pos, node, &tmp) {
251 inode = list_entry(pos, struct inode, i_list); 255 inode = wb_inode(pos);
252 if (inode->i_sb == sb) 256 if (inode->i_sb == sb)
253 list_move(&inode->i_list, dispatch_queue); 257 list_move(&inode->i_wb_list, dispatch_queue);
254 } 258 }
255 } 259 }
256} 260}
@@ -408,16 +412,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
408 * completion. 412 * completion.
409 */ 413 */
410 redirty_tail(inode); 414 redirty_tail(inode);
411 } else if (atomic_read(&inode->i_count)) {
412 /*
413 * The inode is clean, inuse
414 */
415 list_move(&inode->i_list, &inode_in_use);
416 } else { 415 } else {
417 /* 416 /*
418 * The inode is clean, unused 417 * The inode is clean. At this point we either have
418 * a reference to the inode or it's on it's way out.
419 * No need to add it back to the LRU.
419 */ 420 */
420 list_move(&inode->i_list, &inode_unused); 421 list_del_init(&inode->i_wb_list);
421 } 422 }
422 } 423 }
423 inode_sync_complete(inode); 424 inode_sync_complete(inode);
@@ -465,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
465{ 466{
466 while (!list_empty(&wb->b_io)) { 467 while (!list_empty(&wb->b_io)) {
467 long pages_skipped; 468 long pages_skipped;
468 struct inode *inode = list_entry(wb->b_io.prev, 469 struct inode *inode = wb_inode(wb->b_io.prev);
469 struct inode, i_list);
470 470
471 if (inode->i_sb != sb) { 471 if (inode->i_sb != sb) {
472 if (only_this_sb) { 472 if (only_this_sb) {
@@ -487,10 +487,16 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
487 return 0; 487 return 0;
488 } 488 }
489 489
490 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 490 /*
491 * Don't bother with new inodes or inodes beeing freed, first
492 * kind does not need peridic writeout yet, and for the latter
493 * kind writeout is handled by the freer.
494 */
495 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
491 requeue_io(inode); 496 requeue_io(inode);
492 continue; 497 continue;
493 } 498 }
499
494 /* 500 /*
495 * Was this inode dirtied after sync_sb_inodes was called? 501 * Was this inode dirtied after sync_sb_inodes was called?
496 * This keeps sync from extra jobs and livelock. 502 * This keeps sync from extra jobs and livelock.
@@ -498,7 +504,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
498 if (inode_dirtied_after(inode, wbc->wb_start)) 504 if (inode_dirtied_after(inode, wbc->wb_start))
499 return 1; 505 return 1;
500 506
501 BUG_ON(inode->i_state & I_FREEING);
502 __iget(inode); 507 __iget(inode);
503 pages_skipped = wbc->pages_skipped; 508 pages_skipped = wbc->pages_skipped;
504 writeback_single_inode(inode, wbc); 509 writeback_single_inode(inode, wbc);
@@ -536,8 +541,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
536 queue_io(wb, wbc->older_than_this); 541 queue_io(wb, wbc->older_than_this);
537 542
538 while (!list_empty(&wb->b_io)) { 543 while (!list_empty(&wb->b_io)) {
539 struct inode *inode = list_entry(wb->b_io.prev, 544 struct inode *inode = wb_inode(wb->b_io.prev);
540 struct inode, i_list);
541 struct super_block *sb = inode->i_sb; 545 struct super_block *sb = inode->i_sb;
542 546
543 if (!pin_sb_for_writeback(sb)) { 547 if (!pin_sb_for_writeback(sb)) {
@@ -582,7 +586,7 @@ static inline bool over_bground_thresh(void)
582 global_dirty_limits(&background_thresh, &dirty_thresh); 586 global_dirty_limits(&background_thresh, &dirty_thresh);
583 587
584 return (global_page_state(NR_FILE_DIRTY) + 588 return (global_page_state(NR_FILE_DIRTY) +
585 global_page_state(NR_UNSTABLE_NFS) >= background_thresh); 589 global_page_state(NR_UNSTABLE_NFS) > background_thresh);
586} 590}
587 591
588/* 592/*
@@ -675,8 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb,
675 */ 679 */
676 spin_lock(&inode_lock); 680 spin_lock(&inode_lock);
677 if (!list_empty(&wb->b_more_io)) { 681 if (!list_empty(&wb->b_more_io)) {
678 inode = list_entry(wb->b_more_io.prev, 682 inode = wb_inode(wb->b_more_io.prev);
679 struct inode, i_list);
680 trace_wbc_writeback_wait(&wbc, wb->bdi); 683 trace_wbc_writeback_wait(&wbc, wb->bdi);
681 inode_wait_for_writeback(inode); 684 inode_wait_for_writeback(inode);
682 } 685 }
@@ -721,9 +724,13 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
721 return 0; 724 return 0;
722 725
723 wb->last_old_flush = jiffies; 726 wb->last_old_flush = jiffies;
727 /*
728 * Add in the number of potentially dirty inodes, because each inode
729 * write can dirty pagecache in the underlying blockdev.
730 */
724 nr_pages = global_page_state(NR_FILE_DIRTY) + 731 nr_pages = global_page_state(NR_FILE_DIRTY) +
725 global_page_state(NR_UNSTABLE_NFS) + 732 global_page_state(NR_UNSTABLE_NFS) +
726 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 733 get_nr_dirty_inodes();
727 734
728 if (nr_pages) { 735 if (nr_pages) {
729 struct wb_writeback_work work = { 736 struct wb_writeback_work work = {
@@ -790,7 +797,7 @@ int bdi_writeback_thread(void *data)
790 struct backing_dev_info *bdi = wb->bdi; 797 struct backing_dev_info *bdi = wb->bdi;
791 long pages_written; 798 long pages_written;
792 799
793 current->flags |= PF_FLUSHER | PF_SWAPWRITE; 800 current->flags |= PF_SWAPWRITE;
794 set_freezable(); 801 set_freezable();
795 wb->last_active = jiffies; 802 wb->last_active = jiffies;
796 803
@@ -962,7 +969,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
962 * dirty list. Add blockdev inodes as well. 969 * dirty list. Add blockdev inodes as well.
963 */ 970 */
964 if (!S_ISBLK(inode->i_mode)) { 971 if (!S_ISBLK(inode->i_mode)) {
965 if (hlist_unhashed(&inode->i_hash)) 972 if (inode_unhashed(inode))
966 goto out; 973 goto out;
967 } 974 }
968 if (inode->i_state & I_FREEING) 975 if (inode->i_state & I_FREEING)
@@ -990,7 +997,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
990 } 997 }
991 998
992 inode->dirtied_when = jiffies; 999 inode->dirtied_when = jiffies;
993 list_move(&inode->i_list, &bdi->wb.b_dirty); 1000 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
994 } 1001 }
995 } 1002 }
996out: 1003out:
@@ -1090,8 +1097,7 @@ void writeback_inodes_sb(struct super_block *sb)
1090 1097
1091 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1098 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1092 1099
1093 work.nr_pages = nr_dirty + nr_unstable + 1100 work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
1094 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1095 1101
1096 bdi_queue_work(sb->s_bdi, &work); 1102 bdi_queue_work(sb->s_bdi, &work);
1097 wait_for_completion(&done); 1103 wait_for_completion(&done);
@@ -1198,3 +1204,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1198 return ret; 1204 return ret;
1199} 1205}
1200EXPORT_SYMBOL(sync_inode); 1206EXPORT_SYMBOL(sync_inode);
1207
1208/**
1209 * sync_inode - write an inode to disk
1210 * @inode: the inode to sync
1211 * @wait: wait for I/O to complete.
1212 *
1213 * Write an inode to disk and adjust it's dirty state after completion.
1214 *
1215 * Note: only writes the actual inode, no associated data or other metadata.
1216 */
1217int sync_inode_metadata(struct inode *inode, int wait)
1218{
1219 struct writeback_control wbc = {
1220 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
1221 .nr_to_write = 0, /* metadata-only */
1222 };
1223
1224 return sync_inode(inode, &wbc);
1225}
1226EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 7367e177186f..85542a7daf40 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
222 if (!inode) 222 if (!inode)
223 return NULL; 223 return NULL;
224 224
225 inode->i_ino = get_next_ino();
225 inode->i_mode = mode; 226 inode->i_mode = mode;
226 inode->i_uid = fc->user_id; 227 inode->i_uid = fc->user_id;
227 inode->i_gid = fc->group_id; 228 inode->i_gid = fc->group_id;
@@ -321,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
321 return 0; 322 return 0;
322} 323}
323 324
324static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags, 325static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
325 const char *dev_name, void *raw_data, 326 int flags, const char *dev_name, void *raw_data)
326 struct vfsmount *mnt)
327{ 327{
328 return get_sb_single(fs_type, flags, raw_data, 328 return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
329 fuse_ctl_fill_super, mnt);
330} 329}
331 330
332static void fuse_ctl_kill_sb(struct super_block *sb) 331static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -345,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
345static struct file_system_type fuse_ctl_fs_type = { 344static struct file_system_type fuse_ctl_fs_type = {
346 .owner = THIS_MODULE, 345 .owner = THIS_MODULE,
347 .name = "fusectl", 346 .name = "fusectl",
348 .get_sb = fuse_ctl_get_sb, 347 .mount = fuse_ctl_mount,
349 .kill_sb = fuse_ctl_kill_sb, 348 .kill_sb = fuse_ctl_kill_sb,
350}; 349};
351 350
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cde755cca564..6e07696308dc 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -809,11 +809,9 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
809 int err; 809 int err;
810 struct page *page = *pagep; 810 struct page *page = *pagep;
811 811
812 if (page && zeroing && count < PAGE_SIZE) { 812 if (page && zeroing && count < PAGE_SIZE)
813 void *mapaddr = kmap_atomic(page, KM_USER1); 813 clear_highpage(page);
814 memset(mapaddr, 0, PAGE_SIZE); 814
815 kunmap_atomic(mapaddr, KM_USER1);
816 }
817 while (count) { 815 while (count) {
818 if (cs->write && cs->pipebufs && page) { 816 if (cs->write && cs->pipebufs && page) {
819 return fuse_ref_page(cs, page, offset, count); 817 return fuse_ref_page(cs, page, offset, count);
@@ -830,10 +828,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
830 } 828 }
831 } 829 }
832 if (page) { 830 if (page) {
833 void *mapaddr = kmap_atomic(page, KM_USER1); 831 void *mapaddr = kmap_atomic(page, KM_USER0);
834 void *buf = mapaddr + offset; 832 void *buf = mapaddr + offset;
835 offset += fuse_copy_do(cs, &buf, &count); 833 offset += fuse_copy_do(cs, &buf, &count);
836 kunmap_atomic(mapaddr, KM_USER1); 834 kunmap_atomic(mapaddr, KM_USER0);
837 } else 835 } else
838 offset += fuse_copy_do(cs, NULL, &count); 836 offset += fuse_copy_do(cs, NULL, &count);
839 } 837 }
@@ -1336,12 +1334,7 @@ out_finish:
1336 1334
1337static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) 1335static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1338{ 1336{
1339 int i; 1337 release_pages(req->pages, req->num_pages, 0);
1340
1341 for (i = 0; i < req->num_pages; i++) {
1342 struct page *page = req->pages[i];
1343 page_cache_release(page);
1344 }
1345} 1338}
1346 1339
1347static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, 1340static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374c..cfce3ad86a92 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1041 return err; 1041 return err;
1042} 1042}
1043 1043
1044static int fuse_get_sb(struct file_system_type *fs_type, 1044static struct dentry *fuse_mount(struct file_system_type *fs_type,
1045 int flags, const char *dev_name, 1045 int flags, const char *dev_name,
1046 void *raw_data, struct vfsmount *mnt) 1046 void *raw_data)
1047{ 1047{
1048 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); 1048 return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
1049} 1049}
1050 1050
1051static void fuse_kill_sb_anon(struct super_block *sb) 1051static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1065,16 @@ static struct file_system_type fuse_fs_type = {
1065 .owner = THIS_MODULE, 1065 .owner = THIS_MODULE,
1066 .name = "fuse", 1066 .name = "fuse",
1067 .fs_flags = FS_HAS_SUBTYPE, 1067 .fs_flags = FS_HAS_SUBTYPE,
1068 .get_sb = fuse_get_sb, 1068 .mount = fuse_mount,
1069 .kill_sb = fuse_kill_sb_anon, 1069 .kill_sb = fuse_kill_sb_anon,
1070}; 1070};
1071 1071
1072#ifdef CONFIG_BLOCK 1072#ifdef CONFIG_BLOCK
1073static int fuse_get_sb_blk(struct file_system_type *fs_type, 1073static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
1074 int flags, const char *dev_name, 1074 int flags, const char *dev_name,
1075 void *raw_data, struct vfsmount *mnt) 1075 void *raw_data)
1076{ 1076{
1077 return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super, 1077 return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
1078 mnt);
1079} 1078}
1080 1079
1081static void fuse_kill_sb_blk(struct super_block *sb) 1080static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
1094static struct file_system_type fuseblk_fs_type = { 1093static struct file_system_type fuseblk_fs_type = {
1095 .owner = THIS_MODULE, 1094 .owner = THIS_MODULE,
1096 .name = "fuseblk", 1095 .name = "fuseblk",
1097 .get_sb = fuse_get_sb_blk, 1096 .mount = fuse_mount_blk,
1098 .kill_sb = fuse_kill_sb_blk, 1097 .kill_sb = fuse_kill_sb_blk,
1099 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1098 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1100}; 1099};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b24afb96aae..4f36f8832b9b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
618 struct gfs2_alloc *al = NULL; 618 struct gfs2_alloc *al = NULL;
619 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 619 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
620 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 620 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
621 unsigned to = from + len;
622 struct page *page; 621 struct page *page;
623 622
624 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 623 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
691 } 690 }
692 691
693prepare_write: 692prepare_write:
694 error = block_prepare_write(page, from, to, gfs2_block_map); 693 error = __block_write_begin(page, from, len, gfs2_block_map);
695out: 694out:
696 if (error == 0) 695 if (error == 0)
697 return 0; 696 return 0;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index f3b071f921aa..939739c7b3f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
55 * activity, but those code paths have their own higher-level 55 * activity, but those code paths have their own higher-level
56 * throttling. 56 * throttling.
57 */ 57 */
58 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 58 if (wbc->sync_mode != WB_SYNC_NONE) {
59 lock_buffer(bh); 59 lock_buffer(bh);
60 } else if (!trylock_buffer(bh)) { 60 } else if (!trylock_buffer(bh)) {
61 redirty_page_for_writepage(wbc, page); 61 redirty_page_for_writepage(wbc, page);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index aeafc233dc89..3eb1393f7b81 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1219,7 +1219,6 @@ fail_sb:
1219fail_locking: 1219fail_locking:
1220 init_locking(sdp, &mount_gh, UNDO); 1220 init_locking(sdp, &mount_gh, UNDO);
1221fail_lm: 1221fail_lm:
1222 invalidate_inodes(sb);
1223 gfs2_gl_hash_clear(sdp); 1222 gfs2_gl_hash_clear(sdp);
1224 gfs2_lm_unmount(sdp); 1223 gfs2_lm_unmount(sdp);
1225fail_sys: 1224fail_sys:
@@ -1251,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
1251} 1250}
1252 1251
1253/** 1252/**
1254 * gfs2_get_sb - Get the GFS2 superblock 1253 * gfs2_mount - Get the GFS2 superblock
1255 * @fs_type: The GFS2 filesystem type 1254 * @fs_type: The GFS2 filesystem type
1256 * @flags: Mount flags 1255 * @flags: Mount flags
1257 * @dev_name: The name of the device 1256 * @dev_name: The name of the device
1258 * @data: The mount arguments 1257 * @data: The mount arguments
1259 * @mnt: The vfsmnt for this mount
1260 * 1258 *
1261 * Q. Why not use get_sb_bdev() ? 1259 * Q. Why not use get_sb_bdev() ?
1262 * A. We need to select one of two root directories to mount, independent 1260 * A. We need to select one of two root directories to mount, independent
@@ -1265,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
1265 * Returns: 0 or -ve on error 1263 * Returns: 0 or -ve on error
1266 */ 1264 */
1267 1265
1268static int gfs2_get_sb(struct file_system_type *fs_type, int flags, 1266static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1269 const char *dev_name, void *data, struct vfsmount *mnt) 1267 const char *dev_name, void *data)
1270{ 1268{
1271 struct block_device *bdev; 1269 struct block_device *bdev;
1272 struct super_block *s; 1270 struct super_block *s;
@@ -1280,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1280 1278
1281 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1279 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1282 if (IS_ERR(bdev)) 1280 if (IS_ERR(bdev))
1283 return PTR_ERR(bdev); 1281 return ERR_CAST(bdev);
1284 1282
1285 /* 1283 /*
1286 * once the super is inserted into the list by sget, s_umount 1284 * once the super is inserted into the list by sget, s_umount
@@ -1299,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1299 if (IS_ERR(s)) 1297 if (IS_ERR(s))
1300 goto error_bdev; 1298 goto error_bdev;
1301 1299
1300 if (s->s_root)
1301 close_bdev_exclusive(bdev, mode);
1302
1302 memset(&args, 0, sizeof(args)); 1303 memset(&args, 0, sizeof(args));
1303 args.ar_quota = GFS2_QUOTA_DEFAULT; 1304 args.ar_quota = GFS2_QUOTA_DEFAULT;
1304 args.ar_data = GFS2_DATA_DEFAULT; 1305 args.ar_data = GFS2_DATA_DEFAULT;
@@ -1310,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1310 error = gfs2_mount_args(&args, data); 1311 error = gfs2_mount_args(&args, data);
1311 if (error) { 1312 if (error) {
1312 printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); 1313 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1313 if (s->s_root) 1314 goto error_super;
1314 goto error_super;
1315 deactivate_locked_super(s);
1316 return error;
1317 } 1315 }
1318 1316
1319 if (s->s_root) { 1317 if (s->s_root) {
1320 error = -EBUSY; 1318 error = -EBUSY;
1321 if ((flags ^ s->s_flags) & MS_RDONLY) 1319 if ((flags ^ s->s_flags) & MS_RDONLY)
1322 goto error_super; 1320 goto error_super;
1323 close_bdev_exclusive(bdev, mode);
1324 } else { 1321 } else {
1325 char b[BDEVNAME_SIZE]; 1322 char b[BDEVNAME_SIZE];
1326 1323
@@ -1329,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1329 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1326 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1330 sb_set_blocksize(s, block_size(bdev)); 1327 sb_set_blocksize(s, block_size(bdev));
1331 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); 1328 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
1332 if (error) { 1329 if (error)
1333 deactivate_locked_super(s); 1330 goto error_super;
1334 return error;
1335 }
1336 s->s_flags |= MS_ACTIVE; 1331 s->s_flags |= MS_ACTIVE;
1337 bdev->bd_super = s; 1332 bdev->bd_super = s;
1338 } 1333 }
1339 1334
1340 sdp = s->s_fs_info; 1335 sdp = s->s_fs_info;
1341 mnt->mnt_sb = s;
1342 if (args.ar_meta) 1336 if (args.ar_meta)
1343 mnt->mnt_root = dget(sdp->sd_master_dir); 1337 return dget(sdp->sd_master_dir);
1344 else 1338 else
1345 mnt->mnt_root = dget(sdp->sd_root_dir); 1339 return dget(sdp->sd_root_dir);
1346 return 0;
1347 1340
1348error_super: 1341error_super:
1349 deactivate_locked_super(s); 1342 deactivate_locked_super(s);
1343 return ERR_PTR(error);
1350error_bdev: 1344error_bdev:
1351 close_bdev_exclusive(bdev, mode); 1345 close_bdev_exclusive(bdev, mode);
1352 return error; 1346 return ERR_PTR(error);
1353} 1347}
1354 1348
1355static int set_meta_super(struct super_block *s, void *ptr) 1349static int set_meta_super(struct super_block *s, void *ptr)
@@ -1357,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
1357 return -EINVAL; 1351 return -EINVAL;
1358} 1352}
1359 1353
1360static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, 1354static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
1361 const char *dev_name, void *data, struct vfsmount *mnt) 1355 int flags, const char *dev_name, void *data)
1362{ 1356{
1363 struct super_block *s; 1357 struct super_block *s;
1364 struct gfs2_sbd *sdp; 1358 struct gfs2_sbd *sdp;
@@ -1369,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
1369 if (error) { 1363 if (error) {
1370 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", 1364 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
1371 dev_name, error); 1365 dev_name, error);
1372 return error; 1366 return ERR_PTR(error);
1373 } 1367 }
1374 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, 1368 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
1375 path.dentry->d_inode->i_sb->s_bdev); 1369 path.dentry->d_inode->i_sb->s_bdev);
1376 path_put(&path); 1370 path_put(&path);
1377 if (IS_ERR(s)) { 1371 if (IS_ERR(s)) {
1378 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); 1372 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
1379 return PTR_ERR(s); 1373 return ERR_CAST(s);
1380 } 1374 }
1381 if ((flags ^ s->s_flags) & MS_RDONLY) { 1375 if ((flags ^ s->s_flags) & MS_RDONLY) {
1382 deactivate_locked_super(s); 1376 deactivate_locked_super(s);
1383 return -EBUSY; 1377 return ERR_PTR(-EBUSY);
1384 } 1378 }
1385 sdp = s->s_fs_info; 1379 sdp = s->s_fs_info;
1386 mnt->mnt_sb = s; 1380 return dget(sdp->sd_master_dir);
1387 mnt->mnt_root = dget(sdp->sd_master_dir);
1388 return 0;
1389} 1381}
1390 1382
1391static void gfs2_kill_sb(struct super_block *sb) 1383static void gfs2_kill_sb(struct super_block *sb)
@@ -1411,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1411struct file_system_type gfs2_fs_type = { 1403struct file_system_type gfs2_fs_type = {
1412 .name = "gfs2", 1404 .name = "gfs2",
1413 .fs_flags = FS_REQUIRES_DEV, 1405 .fs_flags = FS_REQUIRES_DEV,
1414 .get_sb = gfs2_get_sb, 1406 .mount = gfs2_mount,
1415 .kill_sb = gfs2_kill_sb, 1407 .kill_sb = gfs2_kill_sb,
1416 .owner = THIS_MODULE, 1408 .owner = THIS_MODULE,
1417}; 1409};
@@ -1419,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
1419struct file_system_type gfs2meta_fs_type = { 1411struct file_system_type gfs2meta_fs_type = {
1420 .name = "gfs2meta", 1412 .name = "gfs2meta",
1421 .fs_flags = FS_REQUIRES_DEV, 1413 .fs_flags = FS_REQUIRES_DEV,
1422 .get_sb = gfs2_get_sb_meta, 1414 .mount = gfs2_mount_meta,
1423 .owner = THIS_MODULE, 1415 .owner = THIS_MODULE,
1424}; 1416};
1425 1417
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0534510200d5..12cbea7502c2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -255,7 +255,7 @@ out_parent:
255 gfs2_holder_uninit(ghs); 255 gfs2_holder_uninit(ghs);
256 gfs2_holder_uninit(ghs + 1); 256 gfs2_holder_uninit(ghs + 1);
257 if (!error) { 257 if (!error) {
258 atomic_inc(&inode->i_count); 258 ihold(inode);
259 d_instantiate(dentry, inode); 259 d_instantiate(dentry, inode);
260 mark_inode_dirty(inode); 260 mark_inode_dirty(inode);
261 } 261 }
@@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1294 int error; 1294 int error;
1295 1295
1296 if (!page_has_buffers(page)) { 1296 if (!page_has_buffers(page)) {
1297 error = block_prepare_write(page, from, to, gfs2_block_map); 1297 error = __block_write_begin(page, from, to - from, gfs2_block_map);
1298 if (unlikely(error)) 1298 if (unlikely(error))
1299 return error; 1299 return error;
1300 1300
@@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1313 next += bh->b_size; 1313 next += bh->b_size;
1314 if (buffer_mapped(bh)) { 1314 if (buffer_mapped(bh)) {
1315 if (end) { 1315 if (end) {
1316 error = block_prepare_write(page, start, end, 1316 error = __block_write_begin(page, start, end - start,
1317 gfs2_block_map); 1317 gfs2_block_map);
1318 if (unlikely(error)) 1318 if (unlikely(error))
1319 return error; 1319 return error;
@@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1328 } while (next < to); 1328 } while (next < to);
1329 1329
1330 if (end) { 1330 if (end) {
1331 error = block_prepare_write(page, start, end, gfs2_block_map); 1331 error = __block_write_begin(page, start, end - start, gfs2_block_map);
1332 if (unlikely(error)) 1332 if (unlikely(error))
1333 return error; 1333 return error;
1334 empty_write_end(page, start, end); 1334 empty_write_end(page, start, end);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 047d1176096c..2b2c4997430b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -857,7 +857,6 @@ restart:
857 gfs2_clear_rgrpd(sdp); 857 gfs2_clear_rgrpd(sdp);
858 gfs2_jindex_free(sdp); 858 gfs2_jindex_free(sdp);
859 /* Take apart glock structures and buffer lists */ 859 /* Take apart glock structures and buffer lists */
860 invalidate_inodes(sdp->sd_vfs);
861 gfs2_gl_hash_clear(sdp); 860 gfs2_gl_hash_clear(sdp);
862 /* Unmount the locking protocol */ 861 /* Unmount the locking protocol */
863 gfs2_lm_unmount(sdp); 862 gfs2_lm_unmount(sdp);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 4f55651aaa51..c8cffb81e849 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -147,8 +147,6 @@ struct hfs_sb_info {
147 u16 blockoffset; 147 u16 blockoffset;
148 148
149 int fs_div; 149 int fs_div;
150
151 struct hlist_head rsrc_inodes;
152}; 150};
153 151
154#define HFS_FLG_BITMAP_DIRTY 0 152#define HFS_FLG_BITMAP_DIRTY 0
@@ -254,17 +252,6 @@ static inline void hfs_bitmap_dirty(struct super_block *sb)
254 sb->s_dirt = 1; 252 sb->s_dirt = 1;
255} 253}
256 254
257static inline void hfs_buffer_sync(struct buffer_head *bh)
258{
259 while (buffer_locked(bh)) {
260 wait_on_buffer(bh);
261 }
262 if (buffer_dirty(bh)) {
263 ll_rw_block(WRITE, 1, &bh);
264 wait_on_buffer(bh);
265 }
266}
267
268#define sb_bread512(sb, sec, data) ({ \ 255#define sb_bread512(sb, sec, data) ({ \
269 struct buffer_head *__bh; \ 256 struct buffer_head *__bh; \
270 sector_t __block; \ 257 sector_t __block; \
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 397b7adc7ce6..dffb4e996643 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
524 HFS_I(inode)->rsrc_inode = dir; 524 HFS_I(inode)->rsrc_inode = dir;
525 HFS_I(dir)->rsrc_inode = inode; 525 HFS_I(dir)->rsrc_inode = inode;
526 igrab(dir); 526 igrab(dir);
527 hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes); 527 hlist_add_fake(&inode->i_hash);
528 mark_inode_dirty(inode); 528 mark_inode_dirty(inode);
529out: 529out:
530 d_add(dentry, inode); 530 d_add(dentry, inode);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 86428f5ac991..1563d5ce5764 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -220,7 +220,7 @@ int hfs_mdb_get(struct super_block *sb)
220 mdb->drLsMod = hfs_mtime(); 220 mdb->drLsMod = hfs_mtime();
221 221
222 mark_buffer_dirty(HFS_SB(sb)->mdb_bh); 222 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
223 hfs_buffer_sync(HFS_SB(sb)->mdb_bh); 223 sync_dirty_buffer(HFS_SB(sb)->mdb_bh);
224 } 224 }
225 225
226 return 0; 226 return 0;
@@ -287,7 +287,7 @@ void hfs_mdb_commit(struct super_block *sb)
287 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); 287 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
288 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); 288 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
289 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); 289 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
290 hfs_buffer_sync(HFS_SB(sb)->alt_mdb_bh); 290 sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
291 } 291 }
292 292
293 if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) { 293 if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) {
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 33254160f650..4824c27cebb8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -382,7 +382,6 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
382 return -ENOMEM; 382 return -ENOMEM;
383 383
384 sb->s_fs_info = sbi; 384 sb->s_fs_info = sbi;
385 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
386 385
387 res = -EINVAL; 386 res = -EINVAL;
388 if (!parse_options((char *)data, sbi)) { 387 if (!parse_options((char *)data, sbi)) {
@@ -442,17 +441,16 @@ bail:
442 return res; 441 return res;
443} 442}
444 443
445static int hfs_get_sb(struct file_system_type *fs_type, 444static struct dentry *hfs_mount(struct file_system_type *fs_type,
446 int flags, const char *dev_name, void *data, 445 int flags, const char *dev_name, void *data)
447 struct vfsmount *mnt)
448{ 446{
449 return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt); 447 return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
450} 448}
451 449
452static struct file_system_type hfs_fs_type = { 450static struct file_system_type hfs_fs_type = {
453 .owner = THIS_MODULE, 451 .owner = THIS_MODULE,
454 .name = "hfs", 452 .name = "hfs",
455 .get_sb = hfs_get_sb, 453 .mount = hfs_mount,
456 .kill_sb = kill_block_super, 454 .kill_sb = kill_block_super,
457 .fs_flags = FS_REQUIRES_DEV, 455 .fs_flags = FS_REQUIRES_DEV,
458}; 456};
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d236d85ec9d7..9d59c0571f59 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
286 286
287 inc_nlink(inode); 287 inc_nlink(inode);
288 hfsplus_instantiate(dst_dentry, inode, cnid); 288 hfsplus_instantiate(dst_dentry, inode, cnid);
289 atomic_inc(&inode->i_count); 289 ihold(inode);
290 inode->i_ctime = CURRENT_TIME_SEC; 290 inode->i_ctime = CURRENT_TIME_SEC;
291 mark_inode_dirty(inode); 291 mark_inode_dirty(inode);
292 sbi->file_count++; 292 sbi->file_count++;
@@ -317,8 +317,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
317 res = hfsplus_rename_cat(inode->i_ino, 317 res = hfsplus_rename_cat(inode->i_ino,
318 dir, &dentry->d_name, 318 dir, &dentry->d_name,
319 sbi->hidden_dir, &str); 319 sbi->hidden_dir, &str);
320 if (!res) 320 if (!res) {
321 inode->i_flags |= S_DEAD; 321 inode->i_flags |= S_DEAD;
322 drop_nlink(inode);
323 }
322 goto out; 324 goto out;
323 } 325 }
324 res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); 326 res = hfsplus_delete_cat(cnid, dir, &dentry->d_name);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 78449280dae0..8afd7e84f98d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -211,7 +211,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
211 * appear hashed, but do not put on any lists. hlist_del() 211 * appear hashed, but do not put on any lists. hlist_del()
212 * will work fine and require no locking. 212 * will work fine and require no locking.
213 */ 213 */
214 inode->i_hash.pprev = &inode->i_hash.next; 214 hlist_add_fake(&inode->i_hash);
215 215
216 mark_inode_dirty(inode); 216 mark_inode_dirty(inode);
217out: 217out:
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5b4667e08ef7..40a85a3ded6e 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -92,7 +92,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
92 mark_inode_dirty(inode); 92 mark_inode_dirty(inode);
93 93
94out_unlock_inode: 94out_unlock_inode:
95 mutex_lock(&inode->i_mutex); 95 mutex_unlock(&inode->i_mutex);
96out_drop_write: 96out_drop_write:
97 mnt_drop_write(file->f_path.mnt); 97 mnt_drop_write(file->f_path.mnt);
98out: 98out:
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d7536103..52cc746d3ba3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
495 495
496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
497 497
498static int hfsplus_get_sb(struct file_system_type *fs_type, 498static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
499 int flags, const char *dev_name, void *data, 499 int flags, const char *dev_name, void *data)
500 struct vfsmount *mnt)
501{ 500{
502 return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super, 501 return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
503 mnt);
504} 502}
505 503
506static struct file_system_type hfsplus_fs_type = { 504static struct file_system_type hfsplus_fs_type = {
507 .owner = THIS_MODULE, 505 .owner = THIS_MODULE,
508 .name = "hfsplus", 506 .name = "hfsplus",
509 .get_sb = hfsplus_get_sb, 507 .mount = hfsplus_mount,
510 .kill_sb = kill_block_super, 508 .kill_sb = kill_block_super,
511 .fs_flags = FS_REQUIRES_DEV, 509 .fs_flags = FS_REQUIRES_DEV,
512}; 510};
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 7c232c1487ee..bf15a43016b9 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -91,7 +91,6 @@ extern int rename_file(char *from, char *to);
91extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, 91extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
92 long long *bfree_out, long long *bavail_out, 92 long long *bfree_out, long long *bavail_out,
93 long long *files_out, long long *ffree_out, 93 long long *files_out, long long *ffree_out,
94 void *fsid_out, int fsid_size, long *namelen_out, 94 void *fsid_out, int fsid_size, long *namelen_out);
95 long *spare_out);
96 95
97#endif 96#endif
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index f7dc9b5f9ef8..2c0f148a49e6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -217,7 +217,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
217 err = do_statfs(dentry->d_sb->s_fs_info, 217 err = do_statfs(dentry->d_sb->s_fs_info,
218 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, 218 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
219 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 219 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
220 &sf->f_namelen, sf->f_spare); 220 &sf->f_namelen);
221 if (err) 221 if (err)
222 return err; 222 return err;
223 sf->f_blocks = f_blocks; 223 sf->f_blocks = f_blocks;
@@ -962,11 +962,11 @@ out:
962 return err; 962 return err;
963} 963}
964 964
965static int hostfs_read_sb(struct file_system_type *type, 965static struct dentry *hostfs_read_sb(struct file_system_type *type,
966 int flags, const char *dev_name, 966 int flags, const char *dev_name,
967 void *data, struct vfsmount *mnt) 967 void *data)
968{ 968{
969 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); 969 return mount_nodev(type, flags, data, hostfs_fill_sb_common);
970} 970}
971 971
972static void hostfs_kill_sb(struct super_block *s) 972static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
978static struct file_system_type hostfs_type = { 978static struct file_system_type hostfs_type = {
979 .owner = THIS_MODULE, 979 .owner = THIS_MODULE,
980 .name = "hostfs", 980 .name = "hostfs",
981 .get_sb = hostfs_read_sb, 981 .mount = hostfs_read_sb,
982 .kill_sb = hostfs_kill_sb, 982 .kill_sb = hostfs_kill_sb,
983 .fs_flags = 0, 983 .fs_flags = 0,
984}; 984};
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 6777aa06ce2c..d51a98384bc0 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -94,8 +94,7 @@ void *open_dir(char *path, int *err_out)
94 94
95 dir = opendir(path); 95 dir = opendir(path);
96 *err_out = errno; 96 *err_out = errno;
97 if (dir == NULL) 97
98 return NULL;
99 return dir; 98 return dir;
100} 99}
101 100
@@ -205,7 +204,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
205 if (attrs->ia_valid & HOSTFS_ATTR_MODE) { 204 if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
206 if (fd >= 0) { 205 if (fd >= 0) {
207 if (fchmod(fd, attrs->ia_mode) != 0) 206 if (fchmod(fd, attrs->ia_mode) != 0)
208 return (-errno); 207 return -errno;
209 } else if (chmod(file, attrs->ia_mode) != 0) { 208 } else if (chmod(file, attrs->ia_mode) != 0) {
210 return -errno; 209 return -errno;
211 } 210 }
@@ -364,8 +363,7 @@ int rename_file(char *from, char *to)
364int do_statfs(char *root, long *bsize_out, long long *blocks_out, 363int do_statfs(char *root, long *bsize_out, long long *blocks_out,
365 long long *bfree_out, long long *bavail_out, 364 long long *bfree_out, long long *bavail_out,
366 long long *files_out, long long *ffree_out, 365 long long *files_out, long long *ffree_out,
367 void *fsid_out, int fsid_size, long *namelen_out, 366 void *fsid_out, int fsid_size, long *namelen_out)
368 long *spare_out)
369{ 367{
370 struct statfs64 buf; 368 struct statfs64 buf;
371 int err; 369 int err;
@@ -384,10 +382,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
384 sizeof(buf.f_fsid) > fsid_size ? fsid_size : 382 sizeof(buf.f_fsid) > fsid_size ? fsid_size :
385 sizeof(buf.f_fsid)); 383 sizeof(buf.f_fsid));
386 *namelen_out = buf.f_namelen; 384 *namelen_out = buf.f_namelen;
387 spare_out[0] = buf.f_spare[0]; 385
388 spare_out[1] = buf.f_spare[1];
389 spare_out[2] = buf.f_spare[2];
390 spare_out[3] = buf.f_spare[3];
391 spare_out[4] = buf.f_spare[4];
392 return 0; 386 return 0;
393} 387}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163a..bb69389972eb 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -686,17 +686,16 @@ bail0:
686 return -EINVAL; 686 return -EINVAL;
687} 687}
688 688
689static int hpfs_get_sb(struct file_system_type *fs_type, 689static struct dentry *hpfs_mount(struct file_system_type *fs_type,
690 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 690 int flags, const char *dev_name, void *data)
691{ 691{
692 return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super, 692 return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
693 mnt);
694} 693}
695 694
696static struct file_system_type hpfs_fs_type = { 695static struct file_system_type hpfs_fs_type = {
697 .owner = THIS_MODULE, 696 .owner = THIS_MODULE,
698 .name = "hpfs", 697 .name = "hpfs",
699 .get_sb = hpfs_get_sb, 698 .mount = hpfs_mount,
700 .kill_sb = kill_block_super, 699 .kill_sb = kill_block_super,
701 .fs_flags = FS_REQUIRES_DEV, 700 .fs_flags = FS_REQUIRES_DEV,
702}; 701};
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea6140..f702b5f713fc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
748 return(err); 748 return(err);
749} 749}
750 750
751static int hppfs_read_super(struct file_system_type *type, 751static struct dentry *hppfs_read_super(struct file_system_type *type,
752 int flags, const char *dev_name, 752 int flags, const char *dev_name,
753 void *data, struct vfsmount *mnt) 753 void *data)
754{ 754{
755 return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); 755 return mount_nodev(type, flags, data, hppfs_fill_super);
756} 756}
757 757
758static struct file_system_type hppfs_type = { 758static struct file_system_type hppfs_type = {
759 .owner = THIS_MODULE, 759 .owner = THIS_MODULE,
760 .name = "hppfs", 760 .name = "hppfs",
761 .get_sb = hppfs_read_super, 761 .mount = hppfs_read_super,
762 .kill_sb = kill_anon_super, 762 .kill_sb = kill_anon_super,
763 .fs_flags = 0, 763 .fs_flags = 0,
764}; 764};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 113eba3d3c38..d6cfac1f0a40 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/magic.h> 33#include <linux/magic.h>
34#include <linux/migrate.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36 37
@@ -455,6 +456,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
455 inode = new_inode(sb); 456 inode = new_inode(sb);
456 if (inode) { 457 if (inode) {
457 struct hugetlbfs_inode_info *info; 458 struct hugetlbfs_inode_info *info;
459 inode->i_ino = get_next_ino();
458 inode->i_mode = mode; 460 inode->i_mode = mode;
459 inode->i_uid = uid; 461 inode->i_uid = uid;
460 inode->i_gid = gid; 462 inode->i_gid = gid;
@@ -573,6 +575,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
573 return 0; 575 return 0;
574} 576}
575 577
578static int hugetlbfs_migrate_page(struct address_space *mapping,
579 struct page *newpage, struct page *page)
580{
581 int rc;
582
583 rc = migrate_huge_page_move_mapping(mapping, newpage, page);
584 if (rc)
585 return rc;
586 migrate_page_copy(newpage, page);
587
588 return 0;
589}
590
576static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 591static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
577{ 592{
578 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 593 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -659,6 +674,7 @@ static const struct address_space_operations hugetlbfs_aops = {
659 .write_begin = hugetlbfs_write_begin, 674 .write_begin = hugetlbfs_write_begin,
660 .write_end = hugetlbfs_write_end, 675 .write_end = hugetlbfs_write_end,
661 .set_page_dirty = hugetlbfs_set_page_dirty, 676 .set_page_dirty = hugetlbfs_set_page_dirty,
677 .migratepage = hugetlbfs_migrate_page,
662}; 678};
663 679
664 680
@@ -880,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
880 } 896 }
881} 897}
882 898
883static int hugetlbfs_get_sb(struct file_system_type *fs_type, 899static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
884 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 900 int flags, const char *dev_name, void *data)
885{ 901{
886 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 902 return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
887} 903}
888 904
889static struct file_system_type hugetlbfs_fs_type = { 905static struct file_system_type hugetlbfs_fs_type = {
890 .name = "hugetlbfs", 906 .name = "hugetlbfs",
891 .get_sb = hugetlbfs_get_sb, 907 .mount = hugetlbfs_mount,
892 .kill_sb = kill_litter_super, 908 .kill_sb = kill_litter_super,
893}; 909};
894 910
diff --git a/fs/inode.c b/fs/inode.c
index 86464332e590..ae2727ab0c3a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,11 +24,11 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/ima.h>
27 28
28/* 29/*
29 * This is needed for the following functions: 30 * This is needed for the following functions:
30 * - inode_has_buffers 31 * - inode_has_buffers
31 * - invalidate_inode_buffers
32 * - invalidate_bdev 32 * - invalidate_bdev
33 * 33 *
34 * FIXME: remove all knowledge of the buffer layer from this file 34 * FIXME: remove all knowledge of the buffer layer from this file
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
72 * allowing for low-overhead inode sync() operations. 72 * allowing for low-overhead inode sync() operations.
73 */ 73 */
74 74
75LIST_HEAD(inode_in_use); 75static LIST_HEAD(inode_lru);
76LIST_HEAD(inode_unused);
77static struct hlist_head *inode_hashtable __read_mostly; 76static struct hlist_head *inode_hashtable __read_mostly;
78 77
79/* 78/*
@@ -103,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem);
103 */ 102 */
104struct inodes_stat_t inodes_stat; 103struct inodes_stat_t inodes_stat;
105 104
105static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
106static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
107
106static struct kmem_cache *inode_cachep __read_mostly; 108static struct kmem_cache *inode_cachep __read_mostly;
107 109
110static inline int get_nr_inodes(void)
111{
112 return percpu_counter_sum_positive(&nr_inodes);
113}
114
115static inline int get_nr_inodes_unused(void)
116{
117 return percpu_counter_sum_positive(&nr_inodes_unused);
118}
119
120int get_nr_dirty_inodes(void)
121{
122 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
123 return nr_dirty > 0 ? nr_dirty : 0;
124
125}
126
127/*
128 * Handle nr_inode sysctl
129 */
130#ifdef CONFIG_SYSCTL
131int proc_nr_inodes(ctl_table *table, int write,
132 void __user *buffer, size_t *lenp, loff_t *ppos)
133{
134 inodes_stat.nr_inodes = get_nr_inodes();
135 inodes_stat.nr_unused = get_nr_inodes_unused();
136 return proc_dointvec(table, write, buffer, lenp, ppos);
137}
138#endif
139
108static void wake_up_inode(struct inode *inode) 140static void wake_up_inode(struct inode *inode)
109{ 141{
110 /* 142 /*
@@ -192,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
192 inode->i_fsnotify_mask = 0; 224 inode->i_fsnotify_mask = 0;
193#endif 225#endif
194 226
227 percpu_counter_inc(&nr_inodes);
228
195 return 0; 229 return 0;
196out: 230out:
197 return -ENOMEM; 231 return -ENOMEM;
@@ -232,11 +266,13 @@ void __destroy_inode(struct inode *inode)
232 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 266 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
233 posix_acl_release(inode->i_default_acl); 267 posix_acl_release(inode->i_default_acl);
234#endif 268#endif
269 percpu_counter_dec(&nr_inodes);
235} 270}
236EXPORT_SYMBOL(__destroy_inode); 271EXPORT_SYMBOL(__destroy_inode);
237 272
238void destroy_inode(struct inode *inode) 273static void destroy_inode(struct inode *inode)
239{ 274{
275 BUG_ON(!list_empty(&inode->i_lru));
240 __destroy_inode(inode); 276 __destroy_inode(inode);
241 if (inode->i_sb->s_op->destroy_inode) 277 if (inode->i_sb->s_op->destroy_inode)
242 inode->i_sb->s_op->destroy_inode(inode); 278 inode->i_sb->s_op->destroy_inode(inode);
@@ -255,6 +291,8 @@ void inode_init_once(struct inode *inode)
255 INIT_HLIST_NODE(&inode->i_hash); 291 INIT_HLIST_NODE(&inode->i_hash);
256 INIT_LIST_HEAD(&inode->i_dentry); 292 INIT_LIST_HEAD(&inode->i_dentry);
257 INIT_LIST_HEAD(&inode->i_devices); 293 INIT_LIST_HEAD(&inode->i_devices);
294 INIT_LIST_HEAD(&inode->i_wb_list);
295 INIT_LIST_HEAD(&inode->i_lru);
258 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 296 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
259 spin_lock_init(&inode->i_data.tree_lock); 297 spin_lock_init(&inode->i_data.tree_lock);
260 spin_lock_init(&inode->i_data.i_mmap_lock); 298 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -281,14 +319,109 @@ static void init_once(void *foo)
281 */ 319 */
282void __iget(struct inode *inode) 320void __iget(struct inode *inode)
283{ 321{
284 if (atomic_inc_return(&inode->i_count) != 1) 322 atomic_inc(&inode->i_count);
285 return; 323}
324
325/*
326 * get additional reference to inode; caller must already hold one.
327 */
328void ihold(struct inode *inode)
329{
330 WARN_ON(atomic_inc_return(&inode->i_count) < 2);
331}
332EXPORT_SYMBOL(ihold);
333
334static void inode_lru_list_add(struct inode *inode)
335{
336 if (list_empty(&inode->i_lru)) {
337 list_add(&inode->i_lru, &inode_lru);
338 percpu_counter_inc(&nr_inodes_unused);
339 }
340}
286 341
287 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 342static void inode_lru_list_del(struct inode *inode)
288 list_move(&inode->i_list, &inode_in_use); 343{
289 inodes_stat.nr_unused--; 344 if (!list_empty(&inode->i_lru)) {
345 list_del_init(&inode->i_lru);
346 percpu_counter_dec(&nr_inodes_unused);
347 }
348}
349
350static inline void __inode_sb_list_add(struct inode *inode)
351{
352 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
290} 353}
291 354
355/**
356 * inode_sb_list_add - add inode to the superblock list of inodes
357 * @inode: inode to add
358 */
359void inode_sb_list_add(struct inode *inode)
360{
361 spin_lock(&inode_lock);
362 __inode_sb_list_add(inode);
363 spin_unlock(&inode_lock);
364}
365EXPORT_SYMBOL_GPL(inode_sb_list_add);
366
367static inline void __inode_sb_list_del(struct inode *inode)
368{
369 list_del_init(&inode->i_sb_list);
370}
371
372static unsigned long hash(struct super_block *sb, unsigned long hashval)
373{
374 unsigned long tmp;
375
376 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
377 L1_CACHE_BYTES;
378 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
379 return tmp & I_HASHMASK;
380}
381
382/**
383 * __insert_inode_hash - hash an inode
384 * @inode: unhashed inode
385 * @hashval: unsigned long value used to locate this object in the
386 * inode_hashtable.
387 *
388 * Add an inode to the inode hash for this superblock.
389 */
390void __insert_inode_hash(struct inode *inode, unsigned long hashval)
391{
392 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
393
394 spin_lock(&inode_lock);
395 hlist_add_head(&inode->i_hash, b);
396 spin_unlock(&inode_lock);
397}
398EXPORT_SYMBOL(__insert_inode_hash);
399
400/**
401 * __remove_inode_hash - remove an inode from the hash
402 * @inode: inode to unhash
403 *
404 * Remove an inode from the superblock.
405 */
406static void __remove_inode_hash(struct inode *inode)
407{
408 hlist_del_init(&inode->i_hash);
409}
410
411/**
412 * remove_inode_hash - remove an inode from the hash
413 * @inode: inode to unhash
414 *
415 * Remove an inode from the superblock.
416 */
417void remove_inode_hash(struct inode *inode)
418{
419 spin_lock(&inode_lock);
420 hlist_del_init(&inode->i_hash);
421 spin_unlock(&inode_lock);
422}
423EXPORT_SYMBOL(remove_inode_hash);
424
292void end_writeback(struct inode *inode) 425void end_writeback(struct inode *inode)
293{ 426{
294 might_sleep(); 427 might_sleep();
@@ -327,101 +460,113 @@ static void evict(struct inode *inode)
327 */ 460 */
328static void dispose_list(struct list_head *head) 461static void dispose_list(struct list_head *head)
329{ 462{
330 int nr_disposed = 0;
331
332 while (!list_empty(head)) { 463 while (!list_empty(head)) {
333 struct inode *inode; 464 struct inode *inode;
334 465
335 inode = list_first_entry(head, struct inode, i_list); 466 inode = list_first_entry(head, struct inode, i_lru);
336 list_del(&inode->i_list); 467 list_del_init(&inode->i_lru);
337 468
338 evict(inode); 469 evict(inode);
339 470
340 spin_lock(&inode_lock); 471 spin_lock(&inode_lock);
341 hlist_del_init(&inode->i_hash); 472 __remove_inode_hash(inode);
342 list_del_init(&inode->i_sb_list); 473 __inode_sb_list_del(inode);
343 spin_unlock(&inode_lock); 474 spin_unlock(&inode_lock);
344 475
345 wake_up_inode(inode); 476 wake_up_inode(inode);
346 destroy_inode(inode); 477 destroy_inode(inode);
347 nr_disposed++;
348 } 478 }
349 spin_lock(&inode_lock);
350 inodes_stat.nr_inodes -= nr_disposed;
351 spin_unlock(&inode_lock);
352} 479}
353 480
354/* 481/**
355 * Invalidate all inodes for a device. 482 * evict_inodes - evict all evictable inodes for a superblock
483 * @sb: superblock to operate on
484 *
485 * Make sure that no inodes with zero refcount are retained. This is
486 * called by superblock shutdown after having MS_ACTIVE flag removed,
487 * so any inode reaching zero refcount during or after that call will
488 * be immediately evicted.
356 */ 489 */
357static int invalidate_list(struct list_head *head, struct list_head *dispose) 490void evict_inodes(struct super_block *sb)
358{ 491{
359 struct list_head *next; 492 struct inode *inode, *next;
360 int busy = 0, count = 0; 493 LIST_HEAD(dispose);
361
362 next = head->next;
363 for (;;) {
364 struct list_head *tmp = next;
365 struct inode *inode;
366 494
367 /* 495 down_write(&iprune_sem);
368 * We can reschedule here without worrying about the list's
369 * consistency because the per-sb list of inodes must not
370 * change during umount anymore, and because iprune_sem keeps
371 * shrink_icache_memory() away.
372 */
373 cond_resched_lock(&inode_lock);
374 496
375 next = next->next; 497 spin_lock(&inode_lock);
376 if (tmp == head) 498 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
377 break; 499 if (atomic_read(&inode->i_count))
378 inode = list_entry(tmp, struct inode, i_sb_list);
379 if (inode->i_state & I_NEW)
380 continue; 500 continue;
381 invalidate_inode_buffers(inode); 501
382 if (!atomic_read(&inode->i_count)) { 502 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
383 list_move(&inode->i_list, dispose); 503 WARN_ON(1);
384 WARN_ON(inode->i_state & I_NEW);
385 inode->i_state |= I_FREEING;
386 count++;
387 continue; 504 continue;
388 } 505 }
389 busy = 1; 506
507 inode->i_state |= I_FREEING;
508
509 /*
510 * Move the inode off the IO lists and LRU once I_FREEING is
511 * set so that it won't get moved back on there if it is dirty.
512 */
513 list_move(&inode->i_lru, &dispose);
514 list_del_init(&inode->i_wb_list);
515 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
516 percpu_counter_dec(&nr_inodes_unused);
390 } 517 }
391 /* only unused inodes may be cached with i_count zero */ 518 spin_unlock(&inode_lock);
392 inodes_stat.nr_unused -= count; 519
393 return busy; 520 dispose_list(&dispose);
521 up_write(&iprune_sem);
394} 522}
395 523
396/** 524/**
397 * invalidate_inodes - discard the inodes on a device 525 * invalidate_inodes - attempt to free all inodes on a superblock
398 * @sb: superblock 526 * @sb: superblock to operate on
399 * 527 *
400 * Discard all of the inodes for a given superblock. If the discard 528 * Attempts to free all inodes for a given superblock. If there were any
401 * fails because there are busy inodes then a non zero value is returned. 529 * busy inodes return a non-zero value, else zero.
402 * If the discard is successful all the inodes have been discarded.
403 */ 530 */
404int invalidate_inodes(struct super_block *sb) 531int invalidate_inodes(struct super_block *sb)
405{ 532{
406 int busy; 533 int busy = 0;
407 LIST_HEAD(throw_away); 534 struct inode *inode, *next;
535 LIST_HEAD(dispose);
408 536
409 down_write(&iprune_sem); 537 down_write(&iprune_sem);
538
410 spin_lock(&inode_lock); 539 spin_lock(&inode_lock);
411 fsnotify_unmount_inodes(&sb->s_inodes); 540 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
412 busy = invalidate_list(&sb->s_inodes, &throw_away); 541 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
542 continue;
543 if (atomic_read(&inode->i_count)) {
544 busy = 1;
545 continue;
546 }
547
548 inode->i_state |= I_FREEING;
549
550 /*
551 * Move the inode off the IO lists and LRU once I_FREEING is
552 * set so that it won't get moved back on there if it is dirty.
553 */
554 list_move(&inode->i_lru, &dispose);
555 list_del_init(&inode->i_wb_list);
556 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
557 percpu_counter_dec(&nr_inodes_unused);
558 }
413 spin_unlock(&inode_lock); 559 spin_unlock(&inode_lock);
414 560
415 dispose_list(&throw_away); 561 dispose_list(&dispose);
416 up_write(&iprune_sem); 562 up_write(&iprune_sem);
417 563
418 return busy; 564 return busy;
419} 565}
420EXPORT_SYMBOL(invalidate_inodes);
421 566
422static int can_unuse(struct inode *inode) 567static int can_unuse(struct inode *inode)
423{ 568{
424 if (inode->i_state) 569 if (inode->i_state & ~I_REFERENCED)
425 return 0; 570 return 0;
426 if (inode_has_buffers(inode)) 571 if (inode_has_buffers(inode))
427 return 0; 572 return 0;
@@ -433,22 +578,24 @@ static int can_unuse(struct inode *inode)
433} 578}
434 579
435/* 580/*
436 * Scan `goal' inodes on the unused list for freeable ones. They are moved to 581 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
437 * a temporary list and then are freed outside inode_lock by dispose_list(). 582 * temporary list and then are freed outside inode_lock by dispose_list().
438 * 583 *
439 * Any inodes which are pinned purely because of attached pagecache have their 584 * Any inodes which are pinned purely because of attached pagecache have their
440 * pagecache removed. We expect the final iput() on that inode to add it to 585 * pagecache removed. If the inode has metadata buffers attached to
441 * the front of the inode_unused list. So look for it there and if the 586 * mapping->private_list then try to remove them.
442 * inode is still freeable, proceed. The right inode is found 99.9% of the
443 * time in testing on a 4-way.
444 * 587 *
445 * If the inode has metadata buffers attached to mapping->private_list then 588 * If the inode has the I_REFERENCED flag set, then it means that it has been
446 * try to remove them. 589 * used recently - the flag is set in iput_final(). When we encounter such an
590 * inode, clear the flag and move it to the back of the LRU so it gets another
591 * pass through the LRU before it gets reclaimed. This is necessary because of
592 * the fact we are doing lazy LRU updates to minimise lock contention so the
593 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
594 * with this flag set because they are the inodes that are out of order.
447 */ 595 */
448static void prune_icache(int nr_to_scan) 596static void prune_icache(int nr_to_scan)
449{ 597{
450 LIST_HEAD(freeable); 598 LIST_HEAD(freeable);
451 int nr_pruned = 0;
452 int nr_scanned; 599 int nr_scanned;
453 unsigned long reap = 0; 600 unsigned long reap = 0;
454 601
@@ -457,13 +604,26 @@ static void prune_icache(int nr_to_scan)
457 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 604 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
458 struct inode *inode; 605 struct inode *inode;
459 606
460 if (list_empty(&inode_unused)) 607 if (list_empty(&inode_lru))
461 break; 608 break;
462 609
463 inode = list_entry(inode_unused.prev, struct inode, i_list); 610 inode = list_entry(inode_lru.prev, struct inode, i_lru);
464 611
465 if (inode->i_state || atomic_read(&inode->i_count)) { 612 /*
466 list_move(&inode->i_list, &inode_unused); 613 * Referenced or dirty inodes are still in use. Give them
614 * another pass through the LRU as we canot reclaim them now.
615 */
616 if (atomic_read(&inode->i_count) ||
617 (inode->i_state & ~I_REFERENCED)) {
618 list_del_init(&inode->i_lru);
619 percpu_counter_dec(&nr_inodes_unused);
620 continue;
621 }
622
623 /* recently referenced inodes get one more pass */
624 if (inode->i_state & I_REFERENCED) {
625 list_move(&inode->i_lru, &inode_lru);
626 inode->i_state &= ~I_REFERENCED;
467 continue; 627 continue;
468 } 628 }
469 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 629 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -475,18 +635,23 @@ static void prune_icache(int nr_to_scan)
475 iput(inode); 635 iput(inode);
476 spin_lock(&inode_lock); 636 spin_lock(&inode_lock);
477 637
478 if (inode != list_entry(inode_unused.next, 638 if (inode != list_entry(inode_lru.next,
479 struct inode, i_list)) 639 struct inode, i_lru))
480 continue; /* wrong inode or list_empty */ 640 continue; /* wrong inode or list_empty */
481 if (!can_unuse(inode)) 641 if (!can_unuse(inode))
482 continue; 642 continue;
483 } 643 }
484 list_move(&inode->i_list, &freeable);
485 WARN_ON(inode->i_state & I_NEW); 644 WARN_ON(inode->i_state & I_NEW);
486 inode->i_state |= I_FREEING; 645 inode->i_state |= I_FREEING;
487 nr_pruned++; 646
647 /*
648 * Move the inode off the IO lists and LRU once I_FREEING is
649 * set so that it won't get moved back on there if it is dirty.
650 */
651 list_move(&inode->i_lru, &freeable);
652 list_del_init(&inode->i_wb_list);
653 percpu_counter_dec(&nr_inodes_unused);
488 } 654 }
489 inodes_stat.nr_unused -= nr_pruned;
490 if (current_is_kswapd()) 655 if (current_is_kswapd())
491 __count_vm_events(KSWAPD_INODESTEAL, reap); 656 __count_vm_events(KSWAPD_INODESTEAL, reap);
492 else 657 else
@@ -518,7 +683,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
518 return -1; 683 return -1;
519 prune_icache(nr); 684 prune_icache(nr);
520 } 685 }
521 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 686 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
522} 687}
523 688
524static struct shrinker icache_shrinker = { 689static struct shrinker icache_shrinker = {
@@ -529,9 +694,6 @@ static struct shrinker icache_shrinker = {
529static void __wait_on_freeing_inode(struct inode *inode); 694static void __wait_on_freeing_inode(struct inode *inode);
530/* 695/*
531 * Called with the inode lock held. 696 * Called with the inode lock held.
532 * NOTE: we are not increasing the inode-refcount, you must call __iget()
533 * by hand after calling find_inode now! This simplifies iunique and won't
534 * add any additional branch in the common code.
535 */ 697 */
536static struct inode *find_inode(struct super_block *sb, 698static struct inode *find_inode(struct super_block *sb,
537 struct hlist_head *head, 699 struct hlist_head *head,
@@ -551,9 +713,10 @@ repeat:
551 __wait_on_freeing_inode(inode); 713 __wait_on_freeing_inode(inode);
552 goto repeat; 714 goto repeat;
553 } 715 }
554 break; 716 __iget(inode);
717 return inode;
555 } 718 }
556 return node ? inode : NULL; 719 return NULL;
557} 720}
558 721
559/* 722/*
@@ -576,53 +739,49 @@ repeat:
576 __wait_on_freeing_inode(inode); 739 __wait_on_freeing_inode(inode);
577 goto repeat; 740 goto repeat;
578 } 741 }
579 break; 742 __iget(inode);
743 return inode;
580 } 744 }
581 return node ? inode : NULL; 745 return NULL;
582}
583
584static unsigned long hash(struct super_block *sb, unsigned long hashval)
585{
586 unsigned long tmp;
587
588 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
589 L1_CACHE_BYTES;
590 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
591 return tmp & I_HASHMASK;
592}
593
594static inline void
595__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
596 struct inode *inode)
597{
598 inodes_stat.nr_inodes++;
599 list_add(&inode->i_list, &inode_in_use);
600 list_add(&inode->i_sb_list, &sb->s_inodes);
601 if (head)
602 hlist_add_head(&inode->i_hash, head);
603} 746}
604 747
605/** 748/*
606 * inode_add_to_lists - add a new inode to relevant lists 749 * Each cpu owns a range of LAST_INO_BATCH numbers.
607 * @sb: superblock inode belongs to 750 * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
608 * @inode: inode to mark in use 751 * to renew the exhausted range.
609 * 752 *
610 * When an inode is allocated it needs to be accounted for, added to the in use 753 * This does not significantly increase overflow rate because every CPU can
611 * list, the owning superblock and the inode hash. This needs to be done under 754 * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
612 * the inode_lock, so export a function to do this rather than the inode lock 755 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
613 * itself. We calculate the hash list to add to here so it is all internal 756 * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
614 * which requires the caller to have already set up the inode number in the 757 * overflow rate by 2x, which does not seem too significant.
615 * inode to add. 758 *
759 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
760 * error if st_ino won't fit in target struct field. Use 32bit counter
761 * here to attempt to avoid that.
616 */ 762 */
617void inode_add_to_lists(struct super_block *sb, struct inode *inode) 763#define LAST_INO_BATCH 1024
764static DEFINE_PER_CPU(unsigned int, last_ino);
765
766unsigned int get_next_ino(void)
618{ 767{
619 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); 768 unsigned int *p = &get_cpu_var(last_ino);
769 unsigned int res = *p;
620 770
621 spin_lock(&inode_lock); 771#ifdef CONFIG_SMP
622 __inode_add_to_lists(sb, head, inode); 772 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
623 spin_unlock(&inode_lock); 773 static atomic_t shared_last_ino;
774 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
775
776 res = next - LAST_INO_BATCH;
777 }
778#endif
779
780 *p = ++res;
781 put_cpu_var(last_ino);
782 return res;
624} 783}
625EXPORT_SYMBOL_GPL(inode_add_to_lists); 784EXPORT_SYMBOL(get_next_ino);
626 785
627/** 786/**
628 * new_inode - obtain an inode 787 * new_inode - obtain an inode
@@ -638,12 +797,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
638 */ 797 */
639struct inode *new_inode(struct super_block *sb) 798struct inode *new_inode(struct super_block *sb)
640{ 799{
641 /*
642 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
643 * error if st_ino won't fit in target struct field. Use 32bit counter
644 * here to attempt to avoid that.
645 */
646 static unsigned int last_ino;
647 struct inode *inode; 800 struct inode *inode;
648 801
649 spin_lock_prefetch(&inode_lock); 802 spin_lock_prefetch(&inode_lock);
@@ -651,8 +804,7 @@ struct inode *new_inode(struct super_block *sb)
651 inode = alloc_inode(sb); 804 inode = alloc_inode(sb);
652 if (inode) { 805 if (inode) {
653 spin_lock(&inode_lock); 806 spin_lock(&inode_lock);
654 __inode_add_to_lists(sb, NULL, inode); 807 __inode_sb_list_add(inode);
655 inode->i_ino = ++last_ino;
656 inode->i_state = 0; 808 inode->i_state = 0;
657 spin_unlock(&inode_lock); 809 spin_unlock(&inode_lock);
658 } 810 }
@@ -663,7 +815,7 @@ EXPORT_SYMBOL(new_inode);
663void unlock_new_inode(struct inode *inode) 815void unlock_new_inode(struct inode *inode)
664{ 816{
665#ifdef CONFIG_DEBUG_LOCK_ALLOC 817#ifdef CONFIG_DEBUG_LOCK_ALLOC
666 if (inode->i_mode & S_IFDIR) { 818 if (S_ISDIR(inode->i_mode)) {
667 struct file_system_type *type = inode->i_sb->s_type; 819 struct file_system_type *type = inode->i_sb->s_type;
668 820
669 /* Set new key only if filesystem hasn't already changed it */ 821 /* Set new key only if filesystem hasn't already changed it */
@@ -720,7 +872,8 @@ static struct inode *get_new_inode(struct super_block *sb,
720 if (set(inode, data)) 872 if (set(inode, data))
721 goto set_failed; 873 goto set_failed;
722 874
723 __inode_add_to_lists(sb, head, inode); 875 hlist_add_head(&inode->i_hash, head);
876 __inode_sb_list_add(inode);
724 inode->i_state = I_NEW; 877 inode->i_state = I_NEW;
725 spin_unlock(&inode_lock); 878 spin_unlock(&inode_lock);
726 879
@@ -735,7 +888,6 @@ static struct inode *get_new_inode(struct super_block *sb,
735 * us. Use the old inode instead of the one we just 888 * us. Use the old inode instead of the one we just
736 * allocated. 889 * allocated.
737 */ 890 */
738 __iget(old);
739 spin_unlock(&inode_lock); 891 spin_unlock(&inode_lock);
740 destroy_inode(inode); 892 destroy_inode(inode);
741 inode = old; 893 inode = old;
@@ -767,7 +919,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
767 old = find_inode_fast(sb, head, ino); 919 old = find_inode_fast(sb, head, ino);
768 if (!old) { 920 if (!old) {
769 inode->i_ino = ino; 921 inode->i_ino = ino;
770 __inode_add_to_lists(sb, head, inode); 922 hlist_add_head(&inode->i_hash, head);
923 __inode_sb_list_add(inode);
771 inode->i_state = I_NEW; 924 inode->i_state = I_NEW;
772 spin_unlock(&inode_lock); 925 spin_unlock(&inode_lock);
773 926
@@ -782,7 +935,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
782 * us. Use the old inode instead of the one we just 935 * us. Use the old inode instead of the one we just
783 * allocated. 936 * allocated.
784 */ 937 */
785 __iget(old);
786 spin_unlock(&inode_lock); 938 spin_unlock(&inode_lock);
787 destroy_inode(inode); 939 destroy_inode(inode);
788 inode = old; 940 inode = old;
@@ -791,6 +943,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
791 return inode; 943 return inode;
792} 944}
793 945
946/*
947 * search the inode cache for a matching inode number.
948 * If we find one, then the inode number we are trying to
949 * allocate is not unique and so we should not use it.
950 *
951 * Returns 1 if the inode number is unique, 0 if it is not.
952 */
953static int test_inode_iunique(struct super_block *sb, unsigned long ino)
954{
955 struct hlist_head *b = inode_hashtable + hash(sb, ino);
956 struct hlist_node *node;
957 struct inode *inode;
958
959 hlist_for_each_entry(inode, node, b, i_hash) {
960 if (inode->i_ino == ino && inode->i_sb == sb)
961 return 0;
962 }
963
964 return 1;
965}
966
794/** 967/**
795 * iunique - get a unique inode number 968 * iunique - get a unique inode number
796 * @sb: superblock 969 * @sb: superblock
@@ -812,19 +985,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
812 * error if st_ino won't fit in target struct field. Use 32bit counter 985 * error if st_ino won't fit in target struct field. Use 32bit counter
813 * here to attempt to avoid that. 986 * here to attempt to avoid that.
814 */ 987 */
988 static DEFINE_SPINLOCK(iunique_lock);
815 static unsigned int counter; 989 static unsigned int counter;
816 struct inode *inode;
817 struct hlist_head *head;
818 ino_t res; 990 ino_t res;
819 991
820 spin_lock(&inode_lock); 992 spin_lock(&inode_lock);
993 spin_lock(&iunique_lock);
821 do { 994 do {
822 if (counter <= max_reserved) 995 if (counter <= max_reserved)
823 counter = max_reserved + 1; 996 counter = max_reserved + 1;
824 res = counter++; 997 res = counter++;
825 head = inode_hashtable + hash(sb, res); 998 } while (!test_inode_iunique(sb, res));
826 inode = find_inode_fast(sb, head, res); 999 spin_unlock(&iunique_lock);
827 } while (inode != NULL);
828 spin_unlock(&inode_lock); 1000 spin_unlock(&inode_lock);
829 1001
830 return res; 1002 return res;
@@ -876,7 +1048,6 @@ static struct inode *ifind(struct super_block *sb,
876 spin_lock(&inode_lock); 1048 spin_lock(&inode_lock);
877 inode = find_inode(sb, head, test, data); 1049 inode = find_inode(sb, head, test, data);
878 if (inode) { 1050 if (inode) {
879 __iget(inode);
880 spin_unlock(&inode_lock); 1051 spin_unlock(&inode_lock);
881 if (likely(wait)) 1052 if (likely(wait))
882 wait_on_inode(inode); 1053 wait_on_inode(inode);
@@ -909,7 +1080,6 @@ static struct inode *ifind_fast(struct super_block *sb,
909 spin_lock(&inode_lock); 1080 spin_lock(&inode_lock);
910 inode = find_inode_fast(sb, head, ino); 1081 inode = find_inode_fast(sb, head, ino);
911 if (inode) { 1082 if (inode) {
912 __iget(inode);
913 spin_unlock(&inode_lock); 1083 spin_unlock(&inode_lock);
914 wait_on_inode(inode); 1084 wait_on_inode(inode);
915 return inode; 1085 return inode;
@@ -1095,7 +1265,7 @@ int insert_inode_locked(struct inode *inode)
1095 __iget(old); 1265 __iget(old);
1096 spin_unlock(&inode_lock); 1266 spin_unlock(&inode_lock);
1097 wait_on_inode(old); 1267 wait_on_inode(old);
1098 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1268 if (unlikely(!inode_unhashed(old))) {
1099 iput(old); 1269 iput(old);
1100 return -EBUSY; 1270 return -EBUSY;
1101 } 1271 }
@@ -1134,7 +1304,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1134 __iget(old); 1304 __iget(old);
1135 spin_unlock(&inode_lock); 1305 spin_unlock(&inode_lock);
1136 wait_on_inode(old); 1306 wait_on_inode(old);
1137 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1307 if (unlikely(!inode_unhashed(old))) {
1138 iput(old); 1308 iput(old);
1139 return -EBUSY; 1309 return -EBUSY;
1140 } 1310 }
@@ -1143,36 +1313,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1143} 1313}
1144EXPORT_SYMBOL(insert_inode_locked4); 1314EXPORT_SYMBOL(insert_inode_locked4);
1145 1315
1146/**
1147 * __insert_inode_hash - hash an inode
1148 * @inode: unhashed inode
1149 * @hashval: unsigned long value used to locate this object in the
1150 * inode_hashtable.
1151 *
1152 * Add an inode to the inode hash for this superblock.
1153 */
1154void __insert_inode_hash(struct inode *inode, unsigned long hashval)
1155{
1156 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
1157 spin_lock(&inode_lock);
1158 hlist_add_head(&inode->i_hash, head);
1159 spin_unlock(&inode_lock);
1160}
1161EXPORT_SYMBOL(__insert_inode_hash);
1162
1163/**
1164 * remove_inode_hash - remove an inode from the hash
1165 * @inode: inode to unhash
1166 *
1167 * Remove an inode from the superblock.
1168 */
1169void remove_inode_hash(struct inode *inode)
1170{
1171 spin_lock(&inode_lock);
1172 hlist_del_init(&inode->i_hash);
1173 spin_unlock(&inode_lock);
1174}
1175EXPORT_SYMBOL(remove_inode_hash);
1176 1316
1177int generic_delete_inode(struct inode *inode) 1317int generic_delete_inode(struct inode *inode)
1178{ 1318{
@@ -1187,7 +1327,7 @@ EXPORT_SYMBOL(generic_delete_inode);
1187 */ 1327 */
1188int generic_drop_inode(struct inode *inode) 1328int generic_drop_inode(struct inode *inode)
1189{ 1329{
1190 return !inode->i_nlink || hlist_unhashed(&inode->i_hash); 1330 return !inode->i_nlink || inode_unhashed(inode);
1191} 1331}
1192EXPORT_SYMBOL_GPL(generic_drop_inode); 1332EXPORT_SYMBOL_GPL(generic_drop_inode);
1193 1333
@@ -1213,10 +1353,11 @@ static void iput_final(struct inode *inode)
1213 drop = generic_drop_inode(inode); 1353 drop = generic_drop_inode(inode);
1214 1354
1215 if (!drop) { 1355 if (!drop) {
1216 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1217 list_move(&inode->i_list, &inode_unused);
1218 inodes_stat.nr_unused++;
1219 if (sb->s_flags & MS_ACTIVE) { 1356 if (sb->s_flags & MS_ACTIVE) {
1357 inode->i_state |= I_REFERENCED;
1358 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1359 inode_lru_list_add(inode);
1360 }
1220 spin_unlock(&inode_lock); 1361 spin_unlock(&inode_lock);
1221 return; 1362 return;
1222 } 1363 }
@@ -1227,19 +1368,23 @@ static void iput_final(struct inode *inode)
1227 spin_lock(&inode_lock); 1368 spin_lock(&inode_lock);
1228 WARN_ON(inode->i_state & I_NEW); 1369 WARN_ON(inode->i_state & I_NEW);
1229 inode->i_state &= ~I_WILL_FREE; 1370 inode->i_state &= ~I_WILL_FREE;
1230 inodes_stat.nr_unused--; 1371 __remove_inode_hash(inode);
1231 hlist_del_init(&inode->i_hash);
1232 } 1372 }
1233 list_del_init(&inode->i_list); 1373
1234 list_del_init(&inode->i_sb_list);
1235 WARN_ON(inode->i_state & I_NEW); 1374 WARN_ON(inode->i_state & I_NEW);
1236 inode->i_state |= I_FREEING; 1375 inode->i_state |= I_FREEING;
1237 inodes_stat.nr_inodes--; 1376
1377 /*
1378 * Move the inode off the IO lists and LRU once I_FREEING is
1379 * set so that it won't get moved back on there if it is dirty.
1380 */
1381 inode_lru_list_del(inode);
1382 list_del_init(&inode->i_wb_list);
1383
1384 __inode_sb_list_del(inode);
1238 spin_unlock(&inode_lock); 1385 spin_unlock(&inode_lock);
1239 evict(inode); 1386 evict(inode);
1240 spin_lock(&inode_lock); 1387 remove_inode_hash(inode);
1241 hlist_del_init(&inode->i_hash);
1242 spin_unlock(&inode_lock);
1243 wake_up_inode(inode); 1388 wake_up_inode(inode);
1244 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1389 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1245 destroy_inode(inode); 1390 destroy_inode(inode);
@@ -1503,6 +1648,8 @@ void __init inode_init(void)
1503 SLAB_MEM_SPREAD), 1648 SLAB_MEM_SPREAD),
1504 init_once); 1649 init_once);
1505 register_shrinker(&icache_shrinker); 1650 register_shrinker(&icache_shrinker);
1651 percpu_counter_init(&nr_inodes, 0);
1652 percpu_counter_init(&nr_inodes_unused, 0);
1506 1653
1507 /* Hash may have been set up in inode_init_early */ 1654 /* Hash may have been set up in inode_init_early */
1508 if (!hashdist) 1655 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index a6910e91cee8..e43b9a4dbf4e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,10 @@ extern void put_super(struct super_block *sb);
101struct nameidata; 101struct nameidata;
102extern struct file *nameidata_to_filp(struct nameidata *); 102extern struct file *nameidata_to_filp(struct nameidata *);
103extern void release_open_intent(struct nameidata *); 103extern void release_open_intent(struct nameidata *);
104
105/*
106 * inode.c
107 */
108extern int get_nr_dirty_inodes(void);
109extern void evict_inodes(struct super_block *);
110extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f855ea4fc888..e92fdbb3bc3a 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
530 return thaw_super(sb); 530 return thaw_super(sb);
531} 531}
532 532
533static int ioctl_fstrim(struct file *filp, void __user *argp)
534{
535 struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
536 struct fstrim_range range;
537 int ret = 0;
538
539 if (!capable(CAP_SYS_ADMIN))
540 return -EPERM;
541
542 /* If filesystem doesn't support trim feature, return. */
543 if (sb->s_op->trim_fs == NULL)
544 return -EOPNOTSUPP;
545
546 /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
547 if (sb->s_bdev == NULL)
548 return -EINVAL;
549
550 if (argp == NULL) {
551 range.start = 0;
552 range.len = ULLONG_MAX;
553 range.minlen = 0;
554 } else if (copy_from_user(&range, argp, sizeof(range)))
555 return -EFAULT;
556
557 ret = sb->s_op->trim_fs(sb, &range);
558 if (ret < 0)
559 return ret;
560
561 if ((argp != NULL) &&
562 (copy_to_user(argp, &range, sizeof(range))))
563 return -EFAULT;
564
565 return 0;
566}
567
533/* 568/*
534 * When you add any new common ioctls to the switches above and below 569 * When you add any new common ioctls to the switches above and below
535 * please update compat_sys_ioctl() too. 570 * please update compat_sys_ioctl() too.
@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
580 error = ioctl_fsthaw(filp); 615 error = ioctl_fsthaw(filp);
581 break; 616 break;
582 617
618 case FITRIM:
619 error = ioctl_fstrim(filp, argp);
620 break;
621
583 case FS_IOC_FIEMAP: 622 case FS_IOC_FIEMAP:
584 return ioctl_fiemap(filp, arg); 623 return ioctl_fiemap(filp, arg);
585 624
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 09ff41a752a0..bfdeb82a53be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -544,6 +544,34 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
544} 544}
545 545
546/* 546/*
547 * Check if root directory is empty (has less than 3 files).
548 *
549 * Used to detect broken CDs where ISO root directory is empty but Joliet root
550 * directory is OK. If such CD has Rock Ridge extensions, they will be disabled
551 * (and Joliet used instead) or else no files would be visible.
552 */
553static bool rootdir_empty(struct super_block *sb, unsigned long block)
554{
555 int offset = 0, files = 0, de_len;
556 struct iso_directory_record *de;
557 struct buffer_head *bh;
558
559 bh = sb_bread(sb, block);
560 if (!bh)
561 return true;
562 while (files < 3) {
563 de = (struct iso_directory_record *) (bh->b_data + offset);
564 de_len = *(unsigned char *) de;
565 if (de_len == 0)
566 break;
567 files++;
568 offset += de_len;
569 }
570 brelse(bh);
571 return files < 3;
572}
573
574/*
547 * Initialize the superblock and read the root inode. 575 * Initialize the superblock and read the root inode.
548 * 576 *
549 * Note: a check_disk_change() has been done immediately prior 577 * Note: a check_disk_change() has been done immediately prior
@@ -843,6 +871,18 @@ root_found:
843 goto out_no_root; 871 goto out_no_root;
844 872
845 /* 873 /*
874 * Fix for broken CDs with Rock Ridge and empty ISO root directory but
875 * correct Joliet root directory.
876 */
877 if (sbi->s_rock == 1 && joliet_level &&
878 rootdir_empty(s, sbi->s_firstdatazone)) {
879 printk(KERN_NOTICE
880 "ISOFS: primary root directory is empty. "
881 "Disabling Rock Ridge and switching to Joliet.");
882 sbi->s_rock = 0;
883 }
884
885 /*
846 * If this disk has both Rock Ridge and Joliet on it, then we 886 * If this disk has both Rock Ridge and Joliet on it, then we
847 * want to use Rock Ridge by default. This can be overridden 887 * want to use Rock Ridge by default. This can be overridden
848 * by using the norock mount option. There is still one other 888 * by using the norock mount option. There is still one other
@@ -962,25 +1002,23 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
962 * or getblk() if they are not. Returns the number of blocks inserted 1002 * or getblk() if they are not. Returns the number of blocks inserted
963 * (-ve == error.) 1003 * (-ve == error.)
964 */ 1004 */
965int isofs_get_blocks(struct inode *inode, sector_t iblock_s, 1005int isofs_get_blocks(struct inode *inode, sector_t iblock,
966 struct buffer_head **bh, unsigned long nblocks) 1006 struct buffer_head **bh, unsigned long nblocks)
967{ 1007{
968 unsigned long b_off; 1008 unsigned long b_off = iblock;
969 unsigned offset, sect_size; 1009 unsigned offset, sect_size;
970 unsigned int firstext; 1010 unsigned int firstext;
971 unsigned long nextblk, nextoff; 1011 unsigned long nextblk, nextoff;
972 long iblock = (long)iblock_s;
973 int section, rv, error; 1012 int section, rv, error;
974 struct iso_inode_info *ei = ISOFS_I(inode); 1013 struct iso_inode_info *ei = ISOFS_I(inode);
975 1014
976 error = -EIO; 1015 error = -EIO;
977 rv = 0; 1016 rv = 0;
978 if (iblock < 0 || iblock != iblock_s) { 1017 if (iblock != b_off) {
979 printk(KERN_DEBUG "%s: block number too large\n", __func__); 1018 printk(KERN_DEBUG "%s: block number too large\n", __func__);
980 goto abort; 1019 goto abort;
981 } 1020 }
982 1021
983 b_off = iblock;
984 1022
985 offset = 0; 1023 offset = 0;
986 firstext = ei->i_first_extent; 1024 firstext = ei->i_first_extent;
@@ -998,8 +1036,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
998 * I/O errors. 1036 * I/O errors.
999 */ 1037 */
1000 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { 1038 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
1001 printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n", 1039 printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
1002 __func__, iblock, (unsigned long) inode->i_size); 1040 __func__, b_off,
1041 (unsigned long long)inode->i_size);
1003 goto abort; 1042 goto abort;
1004 } 1043 }
1005 1044
@@ -1025,9 +1064,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
1025 if (++section > 100) { 1064 if (++section > 100) {
1026 printk(KERN_DEBUG "%s: More than 100 file sections ?!?" 1065 printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
1027 " aborting...\n", __func__); 1066 " aborting...\n", __func__);
1028 printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u " 1067 printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
1029 "nextblk=%lu nextoff=%lu\n", __func__, 1068 "nextblk=%lu nextoff=%lu\n", __func__,
1030 iblock, firstext, (unsigned) sect_size, 1069 b_off, firstext, (unsigned) sect_size,
1031 nextblk, nextoff); 1070 nextblk, nextoff);
1032 goto abort; 1071 goto abort;
1033 } 1072 }
@@ -1468,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
1468 return inode; 1507 return inode;
1469} 1508}
1470 1509
1471static int isofs_get_sb(struct file_system_type *fs_type, 1510static struct dentry *isofs_mount(struct file_system_type *fs_type,
1472 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1511 int flags, const char *dev_name, void *data)
1473{ 1512{
1474 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, 1513 return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
1475 mnt);
1476} 1514}
1477 1515
1478static struct file_system_type iso9660_fs_type = { 1516static struct file_system_type iso9660_fs_type = {
1479 .owner = THIS_MODULE, 1517 .owner = THIS_MODULE,
1480 .name = "iso9660", 1518 .name = "iso9660",
1481 .get_sb = isofs_get_sb, 1519 .mount = isofs_mount,
1482 .kill_sb = kill_block_super, 1520 .kill_sb = kill_block_super,
1483 .fs_flags = FS_REQUIRES_DEV, 1521 .fs_flags = FS_REQUIRES_DEV,
1484}; 1522};
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 05a38b9c4c0e..e4b87bc1fa56 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -221,7 +221,7 @@ restart:
221 goto restart; 221 goto restart;
222 } 222 }
223 if (buffer_locked(bh)) { 223 if (buffer_locked(bh)) {
224 atomic_inc(&bh->b_count); 224 get_bh(bh);
225 spin_unlock(&journal->j_list_lock); 225 spin_unlock(&journal->j_list_lock);
226 jbd_unlock_bh_state(bh); 226 jbd_unlock_bh_state(bh);
227 wait_on_buffer(bh); 227 wait_on_buffer(bh);
@@ -283,7 +283,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
283 int ret = 0; 283 int ret = 0;
284 284
285 if (buffer_locked(bh)) { 285 if (buffer_locked(bh)) {
286 atomic_inc(&bh->b_count); 286 get_bh(bh);
287 spin_unlock(&journal->j_list_lock); 287 spin_unlock(&journal->j_list_lock);
288 jbd_unlock_bh_state(bh); 288 jbd_unlock_bh_state(bh);
289 wait_on_buffer(bh); 289 wait_on_buffer(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 85a6883c0aca..34a4861c14b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -587,13 +587,13 @@ void journal_commit_transaction(journal_t *journal)
587 /* Bump b_count to prevent truncate from stumbling over 587 /* Bump b_count to prevent truncate from stumbling over
588 the shadowed buffer! @@@ This can go if we ever get 588 the shadowed buffer! @@@ This can go if we ever get
589 rid of the BJ_IO/BJ_Shadow pairing of buffers. */ 589 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
590 atomic_inc(&jh2bh(jh)->b_count); 590 get_bh(jh2bh(jh));
591 591
592 /* Make a temporary IO buffer with which to write it out 592 /* Make a temporary IO buffer with which to write it out
593 (this will requeue both the metadata buffer and the 593 (this will requeue both the metadata buffer and the
594 temporary IO buffer). new_bh goes on BJ_IO*/ 594 temporary IO buffer). new_bh goes on BJ_IO*/
595 595
596 set_bit(BH_JWrite, &jh2bh(jh)->b_state); 596 set_buffer_jwrite(jh2bh(jh));
597 /* 597 /*
598 * akpm: journal_write_metadata_buffer() sets 598 * akpm: journal_write_metadata_buffer() sets
599 * new_bh->b_transaction to commit_transaction. 599 * new_bh->b_transaction to commit_transaction.
@@ -603,7 +603,7 @@ void journal_commit_transaction(journal_t *journal)
603 JBUFFER_TRACE(jh, "ph3: write metadata"); 603 JBUFFER_TRACE(jh, "ph3: write metadata");
604 flags = journal_write_metadata_buffer(commit_transaction, 604 flags = journal_write_metadata_buffer(commit_transaction,
605 jh, &new_jh, blocknr); 605 jh, &new_jh, blocknr);
606 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 606 set_buffer_jwrite(jh2bh(new_jh));
607 wbuf[bufs++] = jh2bh(new_jh); 607 wbuf[bufs++] = jh2bh(new_jh);
608 608
609 /* Record the new block's tag in the current descriptor 609 /* Record the new block's tag in the current descriptor
@@ -713,7 +713,7 @@ wait_for_iobuf:
713 shadowed buffer */ 713 shadowed buffer */
714 jh = commit_transaction->t_shadow_list->b_tprev; 714 jh = commit_transaction->t_shadow_list->b_tprev;
715 bh = jh2bh(jh); 715 bh = jh2bh(jh);
716 clear_bit(BH_JWrite, &bh->b_state); 716 clear_buffer_jwrite(bh);
717 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 717 J_ASSERT_BH(bh, buffer_jbddirty(bh));
718 718
719 /* The metadata is now released for reuse, but we need 719 /* The metadata is now released for reuse, but we need
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 2c4b1f109da9..da1b5e4ffce1 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -36,6 +36,7 @@
36#include <linux/poison.h> 36#include <linux/poison.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/ratelimit.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/page.h> 42#include <asm/page.h>
@@ -84,6 +85,7 @@ EXPORT_SYMBOL(journal_force_commit);
84 85
85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 86static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
86static void __journal_abort_soft (journal_t *journal, int errno); 87static void __journal_abort_soft (journal_t *journal, int errno);
88static const char *journal_dev_name(journal_t *journal, char *buffer);
87 89
88/* 90/*
89 * Helper function used to manage commit timeouts 91 * Helper function used to manage commit timeouts
@@ -439,7 +441,7 @@ int __log_start_commit(journal_t *journal, tid_t target)
439 */ 441 */
440 if (!tid_geq(journal->j_commit_request, target)) { 442 if (!tid_geq(journal->j_commit_request, target)) {
441 /* 443 /*
442 * We want a new commit: OK, mark the request and wakup the 444 * We want a new commit: OK, mark the request and wakeup the
443 * commit thread. We do _not_ do the commit ourselves. 445 * commit thread. We do _not_ do the commit ourselves.
444 */ 446 */
445 447
@@ -950,6 +952,8 @@ int journal_create(journal_t *journal)
950 if (err) 952 if (err)
951 return err; 953 return err;
952 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 954 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
955 if (unlikely(!bh))
956 return -ENOMEM;
953 lock_buffer(bh); 957 lock_buffer(bh);
954 memset (bh->b_data, 0, journal->j_blocksize); 958 memset (bh->b_data, 0, journal->j_blocksize);
955 BUFFER_TRACE(bh, "marking dirty"); 959 BUFFER_TRACE(bh, "marking dirty");
@@ -1010,6 +1014,23 @@ void journal_update_superblock(journal_t *journal, int wait)
1010 goto out; 1014 goto out;
1011 } 1015 }
1012 1016
1017 if (buffer_write_io_error(bh)) {
1018 char b[BDEVNAME_SIZE];
1019 /*
1020 * Oh, dear. A previous attempt to write the journal
1021 * superblock failed. This could happen because the
1022 * USB device was yanked out. Or it could happen to
1023 * be a transient write error and maybe the block will
1024 * be remapped. Nothing we can do but to retry the
1025 * write and hope for the best.
1026 */
1027 printk(KERN_ERR "JBD: previous I/O error detected "
1028 "for journal superblock update for %s.\n",
1029 journal_dev_name(journal, b));
1030 clear_buffer_write_io_error(bh);
1031 set_buffer_uptodate(bh);
1032 }
1033
1013 spin_lock(&journal->j_state_lock); 1034 spin_lock(&journal->j_state_lock);
1014 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", 1035 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
1015 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1036 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -1021,9 +1042,17 @@ void journal_update_superblock(journal_t *journal, int wait)
1021 1042
1022 BUFFER_TRACE(bh, "marking dirty"); 1043 BUFFER_TRACE(bh, "marking dirty");
1023 mark_buffer_dirty(bh); 1044 mark_buffer_dirty(bh);
1024 if (wait) 1045 if (wait) {
1025 sync_dirty_buffer(bh); 1046 sync_dirty_buffer(bh);
1026 else 1047 if (buffer_write_io_error(bh)) {
1048 char b[BDEVNAME_SIZE];
1049 printk(KERN_ERR "JBD: I/O error detected "
1050 "when updating journal superblock for %s.\n",
1051 journal_dev_name(journal, b));
1052 clear_buffer_write_io_error(bh);
1053 set_buffer_uptodate(bh);
1054 }
1055 } else
1027 write_dirty_buffer(bh, WRITE); 1056 write_dirty_buffer(bh, WRITE);
1028 1057
1029out: 1058out:
@@ -1719,7 +1748,6 @@ static void journal_destroy_journal_head_cache(void)
1719static struct journal_head *journal_alloc_journal_head(void) 1748static struct journal_head *journal_alloc_journal_head(void)
1720{ 1749{
1721 struct journal_head *ret; 1750 struct journal_head *ret;
1722 static unsigned long last_warning;
1723 1751
1724#ifdef CONFIG_JBD_DEBUG 1752#ifdef CONFIG_JBD_DEBUG
1725 atomic_inc(&nr_journal_heads); 1753 atomic_inc(&nr_journal_heads);
@@ -1727,11 +1755,9 @@ static struct journal_head *journal_alloc_journal_head(void)
1727 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1755 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
1728 if (ret == NULL) { 1756 if (ret == NULL) {
1729 jbd_debug(1, "out of memory for journal_head\n"); 1757 jbd_debug(1, "out of memory for journal_head\n");
1730 if (time_after(jiffies, last_warning + 5*HZ)) { 1758 printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1731 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1759 __func__);
1732 __func__); 1760
1733 last_warning = jiffies;
1734 }
1735 while (ret == NULL) { 1761 while (ret == NULL) {
1736 yield(); 1762 yield();
1737 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1763 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 81051dafebf5..5b43e96788e6 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -296,10 +296,10 @@ int journal_skip_recovery(journal_t *journal)
296#ifdef CONFIG_JBD_DEBUG 296#ifdef CONFIG_JBD_DEBUG
297 int dropped = info.end_transaction - 297 int dropped = info.end_transaction -
298 be32_to_cpu(journal->j_superblock->s_sequence); 298 be32_to_cpu(journal->j_superblock->s_sequence);
299#endif
300 jbd_debug(1, 299 jbd_debug(1,
301 "JBD: ignoring %d transaction%s from the journal.\n", 300 "JBD: ignoring %d transaction%s from the journal.\n",
302 dropped, (dropped == 1) ? "" : "s"); 301 dropped, (dropped == 1) ? "" : "s");
302#endif
303 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
304 } 304 }
305 305
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5ae71e75a491..846a3f314111 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -293,9 +293,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
293 jbd_free_handle(handle); 293 jbd_free_handle(handle);
294 current->journal_info = NULL; 294 current->journal_info = NULL;
295 handle = ERR_PTR(err); 295 handle = ERR_PTR(err);
296 goto out;
297 } 296 }
298out:
299 return handle; 297 return handle;
300} 298}
301 299
@@ -528,7 +526,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
528 transaction = handle->h_transaction; 526 transaction = handle->h_transaction;
529 journal = transaction->t_journal; 527 journal = transaction->t_journal;
530 528
531 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 529 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
532 530
533 JBUFFER_TRACE(jh, "entry"); 531 JBUFFER_TRACE(jh, "entry");
534repeat: 532repeat:
@@ -713,7 +711,7 @@ done:
713 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 711 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
714 "Possible IO failure.\n"); 712 "Possible IO failure.\n");
715 page = jh2bh(jh)->b_page; 713 page = jh2bh(jh)->b_page;
716 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 714 offset = offset_in_page(jh2bh(jh)->b_data);
717 source = kmap_atomic(page, KM_USER0); 715 source = kmap_atomic(page, KM_USER0);
718 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 716 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
719 kunmap_atomic(source, KM_USER0); 717 kunmap_atomic(source, KM_USER0);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6571a056e55d..6a79fd0a1a32 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
299 transaction->t_chp_stats.cs_forced_to_close++; 299 transaction->t_chp_stats.cs_forced_to_close++;
300 spin_unlock(&journal->j_list_lock); 300 spin_unlock(&journal->j_list_lock);
301 jbd_unlock_bh_state(bh); 301 jbd_unlock_bh_state(bh);
302 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
303 /*
304 * The journal thread is dead; so starting and
305 * waiting for a commit to finish will cause
306 * us to wait for a _very_ long time.
307 */
308 printk(KERN_ERR "JBD2: %s: "
309 "Waiting for Godot: block %llu\n",
310 journal->j_devname,
311 (unsigned long long) bh->b_blocknr);
302 jbd2_log_start_commit(journal, tid); 312 jbd2_log_start_commit(journal, tid);
303 jbd2_log_wait_commit(journal, tid); 313 jbd2_log_wait_commit(journal, tid);
304 ret = 1; 314 ret = 1;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index bc6be8bda1cc..f3ad1598b201 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
26#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
27#include <linux/bio.h> 27#include <linux/bio.h>
28#include <linux/blkdev.h> 28#include <linux/blkdev.h>
29#include <linux/bitops.h>
29#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31#include <asm/system.h>
30 32
31/* 33/*
32 * Default IO end handler for temporary BJ_IO buffer_heads. 34 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
201 spin_lock(&journal->j_list_lock); 203 spin_lock(&journal->j_list_lock);
202 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 204 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
203 mapping = jinode->i_vfs_inode->i_mapping; 205 mapping = jinode->i_vfs_inode->i_mapping;
204 jinode->i_flags |= JI_COMMIT_RUNNING; 206 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
205 spin_unlock(&journal->j_list_lock); 207 spin_unlock(&journal->j_list_lock);
206 /* 208 /*
207 * submit the inode data buffers. We use writepage 209 * submit the inode data buffers. We use writepage
@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
216 spin_lock(&journal->j_list_lock); 218 spin_lock(&journal->j_list_lock);
217 J_ASSERT(jinode->i_transaction == commit_transaction); 219 J_ASSERT(jinode->i_transaction == commit_transaction);
218 commit_transaction->t_flushed_data_blocks = 1; 220 commit_transaction->t_flushed_data_blocks = 1;
219 jinode->i_flags &= ~JI_COMMIT_RUNNING; 221 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
222 smp_mb__after_clear_bit();
220 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 223 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
221 } 224 }
222 spin_unlock(&journal->j_list_lock); 225 spin_unlock(&journal->j_list_lock);
@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
237 /* For locking, see the comment in journal_submit_data_buffers() */ 240 /* For locking, see the comment in journal_submit_data_buffers() */
238 spin_lock(&journal->j_list_lock); 241 spin_lock(&journal->j_list_lock);
239 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 242 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
240 jinode->i_flags |= JI_COMMIT_RUNNING; 243 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
241 spin_unlock(&journal->j_list_lock); 244 spin_unlock(&journal->j_list_lock);
242 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 245 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
243 if (err) { 246 if (err) {
@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
253 ret = err; 256 ret = err;
254 } 257 }
255 spin_lock(&journal->j_list_lock); 258 spin_lock(&journal->j_list_lock);
256 jinode->i_flags &= ~JI_COMMIT_RUNNING; 259 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
260 smp_mb__after_clear_bit();
257 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 261 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
258 } 262 }
259 263
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 262419f83d80..538417c1fdbb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
42#include <linux/log2.h> 42#include <linux/log2.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/bitops.h>
45 46
46#define CREATE_TRACE_POINTS 47#define CREATE_TRACE_POINTS
47#include <trace/events/jbd2.h> 48#include <trace/events/jbd2.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/system.h>
51 53
52EXPORT_SYMBOL(jbd2_journal_extend); 54EXPORT_SYMBOL(jbd2_journal_extend);
53EXPORT_SYMBOL(jbd2_journal_stop); 55EXPORT_SYMBOL(jbd2_journal_stop);
@@ -478,7 +480,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
478 */ 480 */
479 if (!tid_geq(journal->j_commit_request, target)) { 481 if (!tid_geq(journal->j_commit_request, target)) {
480 /* 482 /*
481 * We want a new commit: OK, mark the request and wakup the 483 * We want a new commit: OK, mark the request and wakeup the
482 * commit thread. We do _not_ do the commit ourselves. 484 * commit thread. We do _not_ do the commit ourselves.
483 */ 485 */
484 486
@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
2210restart: 2212restart:
2211 spin_lock(&journal->j_list_lock); 2213 spin_lock(&journal->j_list_lock);
2212 /* Is commit writing out inode - we have to wait */ 2214 /* Is commit writing out inode - we have to wait */
2213 if (jinode->i_flags & JI_COMMIT_RUNNING) { 2215 if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
2214 wait_queue_head_t *wq; 2216 wait_queue_head_t *wq;
2215 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2217 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2216 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2218 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f3479d6e0a83..6bf0a242613e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -156,6 +156,7 @@ alloc_transaction:
156 */ 156 */
157repeat: 157repeat:
158 read_lock(&journal->j_state_lock); 158 read_lock(&journal->j_state_lock);
159 BUG_ON(journal->j_flags & JBD2_UNMOUNT);
159 if (is_journal_aborted(journal) || 160 if (is_journal_aborted(journal) ||
160 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 161 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
161 read_unlock(&journal->j_state_lock); 162 read_unlock(&journal->j_state_lock);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ed78a3cf3cb0..79121aa5858b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
289 mutex_unlock(&f->sem); 289 mutex_unlock(&f->sem);
290 d_instantiate(dentry, old_dentry->d_inode); 290 d_instantiate(dentry, old_dentry->d_inode);
291 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 291 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
292 atomic_inc(&old_dentry->d_inode->i_count); 292 ihold(old_dentry->d_inode);
293 } 293 }
294 return ret; 294 return ret;
295} 295}
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
864 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); 864 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
865 /* Might as well let the VFS know */ 865 /* Might as well let the VFS know */
866 d_instantiate(new_dentry, old_dentry->d_inode); 866 d_instantiate(new_dentry, old_dentry->d_inode);
867 atomic_inc(&old_dentry->d_inode->i_count); 867 ihold(old_dentry->d_inode);
868 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); 868 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
869 return ret; 869 return ret;
870 } 870 }
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b9..c86041b866a4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
179 return ret; 179 return ret;
180} 180}
181 181
182static int jffs2_get_sb(struct file_system_type *fs_type, 182static struct dentry *jffs2_mount(struct file_system_type *fs_type,
183 int flags, const char *dev_name, 183 int flags, const char *dev_name,
184 void *data, struct vfsmount *mnt) 184 void *data)
185{ 185{
186 return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super, 186 return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
187 mnt);
188} 187}
189 188
190static void jffs2_put_super (struct super_block *sb) 189static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
229static struct file_system_type jffs2_fs_type = { 228static struct file_system_type jffs2_fs_type = {
230 .owner = THIS_MODULE, 229 .owner = THIS_MODULE,
231 .name = "jffs2", 230 .name = "jffs2",
232 .get_sb = jffs2_get_sb, 231 .mount = jffs2_mount,
233 .kill_sb = jffs2_kill_sb, 232 .kill_sb = jffs2_kill_sb,
234}; 233};
235 234
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f8332dc8eeb2..3a09423b6c22 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
497 * appear hashed, but do not put on any lists. hlist_del() 497 * appear hashed, but do not put on any lists. hlist_del()
498 * will work fine and require no locking. 498 * will work fine and require no locking.
499 */ 499 */
500 ip->i_hash.pprev = &ip->i_hash.next; 500 hlist_add_fake(&ip->i_hash);
501 501
502 return (ip); 502 return (ip);
503} 503}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d945ea76b445..9466957ec841 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1279 * lazy commit thread finishes processing 1279 * lazy commit thread finishes processing
1280 */ 1280 */
1281 if (tblk->xflag & COMMIT_DELETE) { 1281 if (tblk->xflag & COMMIT_DELETE) {
1282 atomic_inc(&tblk->u.ip->i_count); 1282 ihold(tblk->u.ip);
1283 /* 1283 /*
1284 * Avoid a rare deadlock 1284 * Avoid a rare deadlock
1285 * 1285 *
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a9cf8e8675be..231ca4af9bce 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry,
839 ip->i_ctime = CURRENT_TIME; 839 ip->i_ctime = CURRENT_TIME;
840 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 840 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
841 mark_inode_dirty(dir); 841 mark_inode_dirty(dir);
842 atomic_inc(&ip->i_count); 842 ihold(ip);
843 843
844 iplist[0] = ip; 844 iplist[0] = ip;
845 iplist[1] = dir; 845 iplist[1] = dir;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629e..0669fc1cc3bf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
583 return 0; 583 return 0;
584} 584}
585 585
586static int jfs_get_sb(struct file_system_type *fs_type, 586static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
587 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 587 int flags, const char *dev_name, void *data)
588{ 588{
589 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super, 589 return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
590 mnt);
591} 590}
592 591
593static int jfs_sync_fs(struct super_block *sb, int wait) 592static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
770static struct file_system_type jfs_fs_type = { 769static struct file_system_type jfs_fs_type = {
771 .owner = THIS_MODULE, 770 .owner = THIS_MODULE,
772 .name = "jfs", 771 .name = "jfs",
773 .get_sb = jfs_get_sb, 772 .mount = jfs_do_mount,
774 .kill_sb = kill_block_super, 773 .kill_sb = kill_block_super,
775 .fs_flags = FS_REQUIRES_DEV, 774 .fs_flags = FS_REQUIRES_DEV,
776}; 775};
diff --git a/fs/libfs.c b/fs/libfs.c
index 62baa0387d6e..a3accdf528ad 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
202 * will never be mountable) 202 * will never be mountable)
203 */ 203 */
204int get_sb_pseudo(struct file_system_type *fs_type, char *name, 204struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
205 const struct super_operations *ops, unsigned long magic, 205 const struct super_operations *ops, unsigned long magic)
206 struct vfsmount *mnt)
207{ 206{
208 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 207 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
209 struct dentry *dentry; 208 struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
211 struct qstr d_name = {.name = name, .len = strlen(name)}; 210 struct qstr d_name = {.name = name, .len = strlen(name)};
212 211
213 if (IS_ERR(s)) 212 if (IS_ERR(s))
214 return PTR_ERR(s); 213 return ERR_CAST(s);
215 214
216 s->s_flags = MS_NOUSER; 215 s->s_flags = MS_NOUSER;
217 s->s_maxbytes = MAX_LFS_FILESIZE; 216 s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
241 d_instantiate(dentry, root); 240 d_instantiate(dentry, root);
242 s->s_root = dentry; 241 s->s_root = dentry;
243 s->s_flags |= MS_ACTIVE; 242 s->s_flags |= MS_ACTIVE;
244 simple_set_mnt(mnt, s); 243 return dget(s->s_root);
245 return 0;
246 244
247Enomem: 245Enomem:
248 deactivate_locked_super(s); 246 deactivate_locked_super(s);
249 return -ENOMEM; 247 return ERR_PTR(-ENOMEM);
250} 248}
251 249
252int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 250int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -255,7 +253,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
255 253
256 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 254 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
257 inc_nlink(inode); 255 inc_nlink(inode);
258 atomic_inc(&inode->i_count); 256 ihold(inode);
259 dget(dentry); 257 dget(dentry);
260 d_instantiate(dentry, inode); 258 d_instantiate(dentry, inode);
261 return 0; 259 return 0;
@@ -892,10 +890,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
892 */ 890 */
893int generic_file_fsync(struct file *file, int datasync) 891int generic_file_fsync(struct file *file, int datasync)
894{ 892{
895 struct writeback_control wbc = {
896 .sync_mode = WB_SYNC_ALL,
897 .nr_to_write = 0, /* metadata-only; caller takes care of data */
898 };
899 struct inode *inode = file->f_mapping->host; 893 struct inode *inode = file->f_mapping->host;
900 int err; 894 int err;
901 int ret; 895 int ret;
@@ -906,7 +900,7 @@ int generic_file_fsync(struct file *file, int datasync)
906 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 900 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
907 return ret; 901 return ret;
908 902
909 err = sync_inode(inode, &wbc); 903 err = sync_inode_metadata(inode, 1);
910 if (ret == 0) 904 if (ret == 0)
911 ret = err; 905 ret = err;
912 return ret; 906 return ret;
@@ -955,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
955EXPORT_SYMBOL(dcache_dir_open); 949EXPORT_SYMBOL(dcache_dir_open);
956EXPORT_SYMBOL(dcache_readdir); 950EXPORT_SYMBOL(dcache_readdir);
957EXPORT_SYMBOL(generic_read_dir); 951EXPORT_SYMBOL(generic_read_dir);
958EXPORT_SYMBOL(get_sb_pseudo); 952EXPORT_SYMBOL(mount_pseudo);
959EXPORT_SYMBOL(simple_write_begin); 953EXPORT_SYMBOL(simple_write_begin);
960EXPORT_SYMBOL(simple_write_end); 954EXPORT_SYMBOL(simple_write_end);
961EXPORT_SYMBOL(simple_dir_inode_operations); 955EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 64fd427c993c..d5bb86866e6c 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -42,6 +42,7 @@ struct nlm_wait {
42}; 42};
43 43
44static LIST_HEAD(nlm_blocked); 44static LIST_HEAD(nlm_blocked);
45static DEFINE_SPINLOCK(nlm_blocked_lock);
45 46
46/** 47/**
47 * nlmclnt_init - Set up per-NFS mount point lockd data structures 48 * nlmclnt_init - Set up per-NFS mount point lockd data structures
@@ -97,7 +98,10 @@ struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *
97 block->b_lock = fl; 98 block->b_lock = fl;
98 init_waitqueue_head(&block->b_wait); 99 init_waitqueue_head(&block->b_wait);
99 block->b_status = nlm_lck_blocked; 100 block->b_status = nlm_lck_blocked;
101
102 spin_lock(&nlm_blocked_lock);
100 list_add(&block->b_list, &nlm_blocked); 103 list_add(&block->b_list, &nlm_blocked);
104 spin_unlock(&nlm_blocked_lock);
101 } 105 }
102 return block; 106 return block;
103} 107}
@@ -106,7 +110,9 @@ void nlmclnt_finish_block(struct nlm_wait *block)
106{ 110{
107 if (block == NULL) 111 if (block == NULL)
108 return; 112 return;
113 spin_lock(&nlm_blocked_lock);
109 list_del(&block->b_list); 114 list_del(&block->b_list);
115 spin_unlock(&nlm_blocked_lock);
110 kfree(block); 116 kfree(block);
111} 117}
112 118
@@ -154,6 +160,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
154 * Look up blocked request based on arguments. 160 * Look up blocked request based on arguments.
155 * Warning: must not use cookie to match it! 161 * Warning: must not use cookie to match it!
156 */ 162 */
163 spin_lock(&nlm_blocked_lock);
157 list_for_each_entry(block, &nlm_blocked, b_list) { 164 list_for_each_entry(block, &nlm_blocked, b_list) {
158 struct file_lock *fl_blocked = block->b_lock; 165 struct file_lock *fl_blocked = block->b_lock;
159 166
@@ -178,6 +185,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
178 wake_up(&block->b_wait); 185 wake_up(&block->b_wait);
179 res = nlm_granted; 186 res = nlm_granted;
180 } 187 }
188 spin_unlock(&nlm_blocked_lock);
181 return res; 189 return res;
182} 190}
183 191
@@ -216,10 +224,6 @@ reclaimer(void *ptr)
216 allow_signal(SIGKILL); 224 allow_signal(SIGKILL);
217 225
218 down_write(&host->h_rwsem); 226 down_write(&host->h_rwsem);
219
220 /* This one ensures that our parent doesn't terminate while the
221 * reclaim is in progress */
222 lock_kernel();
223 lockd_up(); /* note: this cannot fail as lockd is already running */ 227 lockd_up(); /* note: this cannot fail as lockd is already running */
224 228
225 dprintk("lockd: reclaiming locks for host %s\n", host->h_name); 229 dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
@@ -260,16 +264,17 @@ restart:
260 dprintk("NLM: done reclaiming locks for host %s\n", host->h_name); 264 dprintk("NLM: done reclaiming locks for host %s\n", host->h_name);
261 265
262 /* Now, wake up all processes that sleep on a blocked lock */ 266 /* Now, wake up all processes that sleep on a blocked lock */
267 spin_lock(&nlm_blocked_lock);
263 list_for_each_entry(block, &nlm_blocked, b_list) { 268 list_for_each_entry(block, &nlm_blocked, b_list) {
264 if (block->b_host == host) { 269 if (block->b_host == host) {
265 block->b_status = nlm_lck_denied_grace_period; 270 block->b_status = nlm_lck_denied_grace_period;
266 wake_up(&block->b_wait); 271 wake_up(&block->b_wait);
267 } 272 }
268 } 273 }
274 spin_unlock(&nlm_blocked_lock);
269 275
270 /* Release host handle after use */ 276 /* Release host handle after use */
271 nlm_release_host(host); 277 nlm_release_host(host);
272 lockd_down(); 278 lockd_down();
273 unlock_kernel();
274 return 0; 279 return 0;
275} 280}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 7932c399fab4..47ea1e1925b8 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -166,7 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
166 /* Set up the argument struct */ 166 /* Set up the argument struct */
167 nlmclnt_setlockargs(call, fl); 167 nlmclnt_setlockargs(call, fl);
168 168
169 lock_kernel();
170 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { 169 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
171 if (fl->fl_type != F_UNLCK) { 170 if (fl->fl_type != F_UNLCK) {
172 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; 171 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -177,10 +176,8 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
177 status = nlmclnt_test(call, fl); 176 status = nlmclnt_test(call, fl);
178 else 177 else
179 status = -EINVAL; 178 status = -EINVAL;
180
181 fl->fl_ops->fl_release_private(fl); 179 fl->fl_ops->fl_release_private(fl);
182 fl->fl_ops = NULL; 180 fl->fl_ops = NULL;
183 unlock_kernel();
184 181
185 dprintk("lockd: clnt proc returns %d\n", status); 182 dprintk("lockd: clnt proc returns %d\n", status);
186 return status; 183 return status;
@@ -226,9 +223,7 @@ void nlm_release_call(struct nlm_rqst *call)
226 223
227static void nlmclnt_rpc_release(void *data) 224static void nlmclnt_rpc_release(void *data)
228{ 225{
229 lock_kernel();
230 nlm_release_call(data); 226 nlm_release_call(data);
231 unlock_kernel();
232} 227}
233 228
234static int nlm_wait_on_grace(wait_queue_head_t *queue) 229static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -448,14 +443,18 @@ out:
448 443
449static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) 444static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
450{ 445{
446 spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
451 new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; 447 new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
452 new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); 448 new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
453 list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); 449 list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
450 spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
454} 451}
455 452
456static void nlmclnt_locks_release_private(struct file_lock *fl) 453static void nlmclnt_locks_release_private(struct file_lock *fl)
457{ 454{
455 spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
458 list_del(&fl->fl_u.nfs_fl.list); 456 list_del(&fl->fl_u.nfs_fl.list);
457 spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
459 nlm_put_lockowner(fl->fl_u.nfs_fl.owner); 458 nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
460} 459}
461 460
@@ -721,9 +720,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
721die: 720die:
722 return; 721 return;
723 retry_rebind: 722 retry_rebind:
724 lock_kernel();
725 nlm_rebind_host(req->a_host); 723 nlm_rebind_host(req->a_host);
726 unlock_kernel();
727 retry_unlock: 724 retry_unlock:
728 rpc_restart_call(task); 725 rpc_restart_call(task);
729} 726}
@@ -801,9 +798,7 @@ retry_cancel:
801 /* Don't ever retry more than 3 times */ 798 /* Don't ever retry more than 3 times */
802 if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) 799 if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
803 goto die; 800 goto die;
804 lock_kernel();
805 nlm_rebind_host(req->a_host); 801 nlm_rebind_host(req->a_host);
806 unlock_kernel();
807 rpc_restart_call(task); 802 rpc_restart_call(task);
808 rpc_delay(task, 30 * HZ); 803 rpc_delay(task, 30 * HZ);
809} 804}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index bb464d12104c..25e21e4023b2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host)
353 .to_retries = 5U, 353 .to_retries = 5U,
354 }; 354 };
355 struct rpc_create_args args = { 355 struct rpc_create_args args = {
356 .net = &init_net,
356 .protocol = host->h_proto, 357 .protocol = host->h_proto,
357 .address = nlm_addr(host), 358 .address = nlm_addr(host),
358 .addrsize = host->h_addrlen, 359 .addrsize = host->h_addrlen,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e3015464fbab..e0c918949644 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void)
69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
70 }; 70 };
71 struct rpc_create_args args = { 71 struct rpc_create_args args = {
72 .net = &init_net,
72 .protocol = XPRT_TRANSPORT_UDP, 73 .protocol = XPRT_TRANSPORT_UDP,
73 .address = (struct sockaddr *)&sin, 74 .address = (struct sockaddr *)&sin,
74 .addrsize = sizeof(sin), 75 .addrsize = sizeof(sin),
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f1bacf1a0391..abfff9d7979d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -22,7 +22,6 @@
22#include <linux/in.h> 22#include <linux/in.h>
23#include <linux/uio.h> 23#include <linux/uio.h>
24#include <linux/smp.h> 24#include <linux/smp.h>
25#include <linux/smp_lock.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/kthread.h> 26#include <linux/kthread.h>
28#include <linux/freezer.h> 27#include <linux/freezer.h>
@@ -130,15 +129,6 @@ lockd(void *vrqstp)
130 129
131 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); 130 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
132 131
133 /*
134 * FIXME: it would be nice if lockd didn't spend its entire life
135 * running under the BKL. At the very least, it would be good to
136 * have someone clarify what it's intended to protect here. I've
137 * seen some handwavy posts about posix locking needing to be
138 * done under the BKL, but it's far from clear.
139 */
140 lock_kernel();
141
142 if (!nlm_timeout) 132 if (!nlm_timeout)
143 nlm_timeout = LOCKD_DFLT_TIMEO; 133 nlm_timeout = LOCKD_DFLT_TIMEO;
144 nlmsvc_timeout = nlm_timeout * HZ; 134 nlmsvc_timeout = nlm_timeout * HZ;
@@ -195,7 +185,6 @@ lockd(void *vrqstp)
195 if (nlmsvc_ops) 185 if (nlmsvc_ops)
196 nlmsvc_invalidate_all(); 186 nlmsvc_invalidate_all();
197 nlm_shutdown_hosts(); 187 nlm_shutdown_hosts();
198 unlock_kernel();
199 return 0; 188 return 0;
200} 189}
201 190
@@ -206,7 +195,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
206 195
207 xprt = svc_find_xprt(serv, name, family, 0); 196 xprt = svc_find_xprt(serv, name, family, 0);
208 if (xprt == NULL) 197 if (xprt == NULL)
209 return svc_create_xprt(serv, name, family, port, 198 return svc_create_xprt(serv, name, &init_net, family, port,
210 SVC_SOCK_DEFAULTS); 199 SVC_SOCK_DEFAULTS);
211 svc_xprt_put(xprt); 200 svc_xprt_put(xprt);
212 return 0; 201 return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 031c6569a134..a336e832475d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
230 230
231static void nlm4svc_callback_release(void *data) 231static void nlm4svc_callback_release(void *data)
232{ 232{
233 lock_kernel();
234 nlm_release_call(data); 233 nlm_release_call(data);
235 unlock_kernel();
236} 234}
237 235
238static const struct rpc_call_ops nlm4svc_callback_ops = { 236static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 84055d31bfc5..c462d346acbd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
52 * The list of blocked locks to retry 52 * The list of blocked locks to retry
53 */ 53 */
54static LIST_HEAD(nlm_blocked); 54static LIST_HEAD(nlm_blocked);
55static DEFINE_SPINLOCK(nlm_blocked_lock);
55 56
56/* 57/*
57 * Insert a blocked lock into the global list 58 * Insert a blocked lock into the global list
58 */ 59 */
59static void 60static void
60nlmsvc_insert_block(struct nlm_block *block, unsigned long when) 61nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when)
61{ 62{
62 struct nlm_block *b; 63 struct nlm_block *b;
63 struct list_head *pos; 64 struct list_head *pos;
@@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
87 block->b_when = when; 88 block->b_when = when;
88} 89}
89 90
91static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
92{
93 spin_lock(&nlm_blocked_lock);
94 nlmsvc_insert_block_locked(block, when);
95 spin_unlock(&nlm_blocked_lock);
96}
97
90/* 98/*
91 * Remove a block from the global list 99 * Remove a block from the global list
92 */ 100 */
@@ -94,7 +102,9 @@ static inline void
94nlmsvc_remove_block(struct nlm_block *block) 102nlmsvc_remove_block(struct nlm_block *block)
95{ 103{
96 if (!list_empty(&block->b_list)) { 104 if (!list_empty(&block->b_list)) {
105 spin_lock(&nlm_blocked_lock);
97 list_del_init(&block->b_list); 106 list_del_init(&block->b_list);
107 spin_unlock(&nlm_blocked_lock);
98 nlmsvc_release_block(block); 108 nlmsvc_release_block(block);
99 } 109 }
100} 110}
@@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
651 struct nlm_block *block; 661 struct nlm_block *block;
652 int rc = -ENOENT; 662 int rc = -ENOENT;
653 663
654 lock_kernel(); 664 spin_lock(&nlm_blocked_lock);
655 list_for_each_entry(block, &nlm_blocked, b_list) { 665 list_for_each_entry(block, &nlm_blocked, b_list) {
656 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 666 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
657 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", 667 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
@@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
665 } else if (result == 0) 675 } else if (result == 0)
666 block->b_granted = 1; 676 block->b_granted = 1;
667 677
668 nlmsvc_insert_block(block, 0); 678 nlmsvc_insert_block_locked(block, 0);
669 svc_wake_up(block->b_daemon); 679 svc_wake_up(block->b_daemon);
670 rc = 0; 680 rc = 0;
671 break; 681 break;
672 } 682 }
673 } 683 }
674 unlock_kernel(); 684 spin_unlock(&nlm_blocked_lock);
675 if (rc == -ENOENT) 685 if (rc == -ENOENT)
676 printk(KERN_WARNING "lockd: grant for unknown block\n"); 686 printk(KERN_WARNING "lockd: grant for unknown block\n");
677 return rc; 687 return rc;
@@ -690,14 +700,16 @@ nlmsvc_notify_blocked(struct file_lock *fl)
690 struct nlm_block *block; 700 struct nlm_block *block;
691 701
692 dprintk("lockd: VFS unblock notification for block %p\n", fl); 702 dprintk("lockd: VFS unblock notification for block %p\n", fl);
703 spin_lock(&nlm_blocked_lock);
693 list_for_each_entry(block, &nlm_blocked, b_list) { 704 list_for_each_entry(block, &nlm_blocked, b_list) {
694 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 705 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
695 nlmsvc_insert_block(block, 0); 706 nlmsvc_insert_block_locked(block, 0);
707 spin_unlock(&nlm_blocked_lock);
696 svc_wake_up(block->b_daemon); 708 svc_wake_up(block->b_daemon);
697 return; 709 return;
698 } 710 }
699 } 711 }
700 712 spin_unlock(&nlm_blocked_lock);
701 printk(KERN_WARNING "lockd: notification for unknown block!\n"); 713 printk(KERN_WARNING "lockd: notification for unknown block!\n");
702} 714}
703 715
@@ -803,7 +815,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
803 815
804 dprintk("lockd: GRANT_MSG RPC callback\n"); 816 dprintk("lockd: GRANT_MSG RPC callback\n");
805 817
806 lock_kernel(); 818 spin_lock(&nlm_blocked_lock);
807 /* if the block is not on a list at this point then it has 819 /* if the block is not on a list at this point then it has
808 * been invalidated. Don't try to requeue it. 820 * been invalidated. Don't try to requeue it.
809 * 821 *
@@ -825,19 +837,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
825 /* Call was successful, now wait for client callback */ 837 /* Call was successful, now wait for client callback */
826 timeout = 60 * HZ; 838 timeout = 60 * HZ;
827 } 839 }
828 nlmsvc_insert_block(block, timeout); 840 nlmsvc_insert_block_locked(block, timeout);
829 svc_wake_up(block->b_daemon); 841 svc_wake_up(block->b_daemon);
830out: 842out:
831 unlock_kernel(); 843 spin_unlock(&nlm_blocked_lock);
832} 844}
833 845
846/*
847 * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an
848 * .rpc_release rpc_call_op
849 */
834static void nlmsvc_grant_release(void *data) 850static void nlmsvc_grant_release(void *data)
835{ 851{
836 struct nlm_rqst *call = data; 852 struct nlm_rqst *call = data;
837
838 lock_kernel();
839 nlmsvc_release_block(call->a_block); 853 nlmsvc_release_block(call->a_block);
840 unlock_kernel();
841} 854}
842 855
843static const struct rpc_call_ops nlmsvc_grant_ops = { 856static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0f2ab741ae7c..c3069f38d602 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
260 260
261static void nlmsvc_callback_release(void *data) 261static void nlmsvc_callback_release(void *data)
262{ 262{
263 lock_kernel();
264 nlm_release_call(data); 263 nlm_release_call(data);
265 unlock_kernel();
266} 264}
267 265
268static const struct rpc_call_ops nlmsvc_callback_ops = { 266static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d0ef94cfb3da..1ca0679c80bf 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -170,6 +170,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
170 170
171again: 171again:
172 file->f_locks = 0; 172 file->f_locks = 0;
173 lock_flocks(); /* protects i_flock list */
173 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 174 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
174 if (fl->fl_lmops != &nlmsvc_lock_operations) 175 if (fl->fl_lmops != &nlmsvc_lock_operations)
175 continue; 176 continue;
@@ -181,6 +182,7 @@ again:
181 if (match(lockhost, host)) { 182 if (match(lockhost, host)) {
182 struct file_lock lock = *fl; 183 struct file_lock lock = *fl;
183 184
185 unlock_flocks();
184 lock.fl_type = F_UNLCK; 186 lock.fl_type = F_UNLCK;
185 lock.fl_start = 0; 187 lock.fl_start = 0;
186 lock.fl_end = OFFSET_MAX; 188 lock.fl_end = OFFSET_MAX;
@@ -192,6 +194,7 @@ again:
192 goto again; 194 goto again;
193 } 195 }
194 } 196 }
197 unlock_flocks();
195 198
196 return 0; 199 return 0;
197} 200}
@@ -226,10 +229,14 @@ nlm_file_inuse(struct nlm_file *file)
226 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) 229 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
227 return 1; 230 return 1;
228 231
232 lock_flocks();
229 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 233 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
230 if (fl->fl_lmops == &nlmsvc_lock_operations) 234 if (fl->fl_lmops == &nlmsvc_lock_operations) {
235 unlock_flocks();
231 return 1; 236 return 1;
237 }
232 } 238 }
239 unlock_flocks();
233 file->f_locks = 0; 240 file->f_locks = 0;
234 return 0; 241 return 0;
235} 242}
diff --git a/fs/locks.c b/fs/locks.c
index 8b2b6ad56a09..50ec15927aab 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,6 +142,7 @@ int lease_break_time = 45;
142 142
143static LIST_HEAD(file_lock_list); 143static LIST_HEAD(file_lock_list);
144static LIST_HEAD(blocked_list); 144static LIST_HEAD(blocked_list);
145static DEFINE_SPINLOCK(file_lock_lock);
145 146
146/* 147/*
147 * Protects the two list heads above, plus the inode->i_flock list 148 * Protects the two list heads above, plus the inode->i_flock list
@@ -149,23 +150,24 @@ static LIST_HEAD(blocked_list);
149 */ 150 */
150void lock_flocks(void) 151void lock_flocks(void)
151{ 152{
152 lock_kernel(); 153 spin_lock(&file_lock_lock);
153} 154}
154EXPORT_SYMBOL_GPL(lock_flocks); 155EXPORT_SYMBOL_GPL(lock_flocks);
155 156
156void unlock_flocks(void) 157void unlock_flocks(void)
157{ 158{
158 unlock_kernel(); 159 spin_unlock(&file_lock_lock);
159} 160}
160EXPORT_SYMBOL_GPL(unlock_flocks); 161EXPORT_SYMBOL_GPL(unlock_flocks);
161 162
162static struct kmem_cache *filelock_cache __read_mostly; 163static struct kmem_cache *filelock_cache __read_mostly;
163 164
164/* Allocate an empty lock structure. */ 165/* Allocate an empty lock structure. */
165static struct file_lock *locks_alloc_lock(void) 166struct file_lock *locks_alloc_lock(void)
166{ 167{
167 return kmem_cache_alloc(filelock_cache, GFP_KERNEL); 168 return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
168} 169}
170EXPORT_SYMBOL_GPL(locks_alloc_lock);
169 171
170void locks_release_private(struct file_lock *fl) 172void locks_release_private(struct file_lock *fl)
171{ 173{
@@ -1365,7 +1367,6 @@ int fcntl_getlease(struct file *filp)
1365int generic_setlease(struct file *filp, long arg, struct file_lock **flp) 1367int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1366{ 1368{
1367 struct file_lock *fl, **before, **my_before = NULL, *lease; 1369 struct file_lock *fl, **before, **my_before = NULL, *lease;
1368 struct file_lock *new_fl = NULL;
1369 struct dentry *dentry = filp->f_path.dentry; 1370 struct dentry *dentry = filp->f_path.dentry;
1370 struct inode *inode = dentry->d_inode; 1371 struct inode *inode = dentry->d_inode;
1371 int error, rdlease_count = 0, wrlease_count = 0; 1372 int error, rdlease_count = 0, wrlease_count = 0;
@@ -1385,11 +1386,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1385 lease = *flp; 1386 lease = *flp;
1386 1387
1387 if (arg != F_UNLCK) { 1388 if (arg != F_UNLCK) {
1388 error = -ENOMEM;
1389 new_fl = locks_alloc_lock();
1390 if (new_fl == NULL)
1391 goto out;
1392
1393 error = -EAGAIN; 1389 error = -EAGAIN;
1394 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1390 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1395 goto out; 1391 goto out;
@@ -1434,7 +1430,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1434 goto out; 1430 goto out;
1435 } 1431 }
1436 1432
1437 error = 0;
1438 if (arg == F_UNLCK) 1433 if (arg == F_UNLCK)
1439 goto out; 1434 goto out;
1440 1435
@@ -1442,15 +1437,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1442 if (!leases_enable) 1437 if (!leases_enable)
1443 goto out; 1438 goto out;
1444 1439
1445 locks_copy_lock(new_fl, lease); 1440 locks_insert_lock(before, lease);
1446 locks_insert_lock(before, new_fl);
1447
1448 *flp = new_fl;
1449 return 0; 1441 return 0;
1450 1442
1451out: 1443out:
1452 if (new_fl != NULL) 1444 locks_free_lock(lease);
1453 locks_free_lock(new_fl);
1454 return error; 1445 return error;
1455} 1446}
1456EXPORT_SYMBOL(generic_setlease); 1447EXPORT_SYMBOL(generic_setlease);
@@ -1514,26 +1505,38 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
1514 */ 1505 */
1515int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1506int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1516{ 1507{
1517 struct file_lock fl, *flp = &fl; 1508 struct file_lock *fl;
1509 struct fasync_struct *new;
1518 struct inode *inode = filp->f_path.dentry->d_inode; 1510 struct inode *inode = filp->f_path.dentry->d_inode;
1519 int error; 1511 int error;
1520 1512
1521 locks_init_lock(&fl); 1513 fl = lease_alloc(filp, arg);
1522 error = lease_init(filp, arg, &fl); 1514 if (IS_ERR(fl))
1523 if (error) 1515 return PTR_ERR(fl);
1524 return error;
1525 1516
1517 new = fasync_alloc();
1518 if (!new) {
1519 locks_free_lock(fl);
1520 return -ENOMEM;
1521 }
1526 lock_flocks(); 1522 lock_flocks();
1527 1523 error = __vfs_setlease(filp, arg, &fl);
1528 error = __vfs_setlease(filp, arg, &flp);
1529 if (error || arg == F_UNLCK) 1524 if (error || arg == F_UNLCK)
1530 goto out_unlock; 1525 goto out_unlock;
1531 1526
1532 error = fasync_helper(fd, filp, 1, &flp->fl_fasync); 1527 /*
1528 * fasync_insert_entry() returns the old entry if any.
1529 * If there was no old entry, then it used 'new' and
1530 * inserted it into the fasync list. Clear new so that
1531 * we don't release it here.
1532 */
1533 if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
1534 new = NULL;
1535
1533 if (error < 0) { 1536 if (error < 0) {
1534 /* remove lease just inserted by setlease */ 1537 /* remove lease just inserted by setlease */
1535 flp->fl_type = F_UNLCK | F_INPROGRESS; 1538 fl->fl_type = F_UNLCK | F_INPROGRESS;
1536 flp->fl_break_time = jiffies - 10; 1539 fl->fl_break_time = jiffies - 10;
1537 time_out_leases(inode); 1540 time_out_leases(inode);
1538 goto out_unlock; 1541 goto out_unlock;
1539 } 1542 }
@@ -1541,6 +1544,8 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1541 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 1544 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1542out_unlock: 1545out_unlock:
1543 unlock_flocks(); 1546 unlock_flocks();
1547 if (new)
1548 fasync_free(new);
1544 return error; 1549 return error;
1545} 1550}
1546 1551
@@ -2109,7 +2114,7 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2109#include <linux/seq_file.h> 2114#include <linux/seq_file.h>
2110 2115
2111static void lock_get_status(struct seq_file *f, struct file_lock *fl, 2116static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2112 int id, char *pfx) 2117 loff_t id, char *pfx)
2113{ 2118{
2114 struct inode *inode = NULL; 2119 struct inode *inode = NULL;
2115 unsigned int fl_pid; 2120 unsigned int fl_pid;
@@ -2122,7 +2127,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2122 if (fl->fl_file != NULL) 2127 if (fl->fl_file != NULL)
2123 inode = fl->fl_file->f_path.dentry->d_inode; 2128 inode = fl->fl_file->f_path.dentry->d_inode;
2124 2129
2125 seq_printf(f, "%d:%s ", id, pfx); 2130 seq_printf(f, "%lld:%s ", id, pfx);
2126 if (IS_POSIX(fl)) { 2131 if (IS_POSIX(fl)) {
2127 seq_printf(f, "%6s %s ", 2132 seq_printf(f, "%6s %s ",
2128 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", 2133 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
@@ -2185,24 +2190,27 @@ static int locks_show(struct seq_file *f, void *v)
2185 2190
2186 fl = list_entry(v, struct file_lock, fl_link); 2191 fl = list_entry(v, struct file_lock, fl_link);
2187 2192
2188 lock_get_status(f, fl, (long)f->private, ""); 2193 lock_get_status(f, fl, *((loff_t *)f->private), "");
2189 2194
2190 list_for_each_entry(bfl, &fl->fl_block, fl_block) 2195 list_for_each_entry(bfl, &fl->fl_block, fl_block)
2191 lock_get_status(f, bfl, (long)f->private, " ->"); 2196 lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
2192 2197
2193 f->private++;
2194 return 0; 2198 return 0;
2195} 2199}
2196 2200
2197static void *locks_start(struct seq_file *f, loff_t *pos) 2201static void *locks_start(struct seq_file *f, loff_t *pos)
2198{ 2202{
2203 loff_t *p = f->private;
2204
2199 lock_flocks(); 2205 lock_flocks();
2200 f->private = (void *)1; 2206 *p = (*pos + 1);
2201 return seq_list_start(&file_lock_list, *pos); 2207 return seq_list_start(&file_lock_list, *pos);
2202} 2208}
2203 2209
2204static void *locks_next(struct seq_file *f, void *v, loff_t *pos) 2210static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2205{ 2211{
2212 loff_t *p = f->private;
2213 ++*p;
2206 return seq_list_next(v, &file_lock_list, pos); 2214 return seq_list_next(v, &file_lock_list, pos);
2207} 2215}
2208 2216
@@ -2220,14 +2228,14 @@ static const struct seq_operations locks_seq_operations = {
2220 2228
2221static int locks_open(struct inode *inode, struct file *filp) 2229static int locks_open(struct inode *inode, struct file *filp)
2222{ 2230{
2223 return seq_open(filp, &locks_seq_operations); 2231 return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t));
2224} 2232}
2225 2233
2226static const struct file_operations proc_locks_operations = { 2234static const struct file_operations proc_locks_operations = {
2227 .open = locks_open, 2235 .open = locks_open,
2228 .read = seq_read, 2236 .read = seq_read,
2229 .llseek = seq_lseek, 2237 .llseek = seq_lseek,
2230 .release = seq_release, 2238 .release = seq_release_private,
2231}; 2239};
2232 2240
2233static int __init proc_locks_init(void) 2241static int __init proc_locks_init(void)
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2a3040..92ca6fbe09bd 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
298 return sync_request(page, bdev, WRITE); 298 return sync_request(page, bdev, WRITE);
299} 299}
300 300
301static void bdev_put_device(struct super_block *sb) 301static void bdev_put_device(struct logfs_super *s)
302{ 302{
303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE); 303 close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
320 .put_device = bdev_put_device, 320 .put_device = bdev_put_device,
321}; 321};
322 322
323int logfs_get_sb_bdev(struct file_system_type *type, int flags, 323int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
324 const char *devname, struct vfsmount *mnt) 324 const char *devname)
325{ 325{
326 struct block_device *bdev; 326 struct block_device *bdev;
327 327
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
333 int mtdnr = MINOR(bdev->bd_dev); 333 int mtdnr = MINOR(bdev->bd_dev);
334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
335 return logfs_get_sb_mtd(type, flags, mtdnr, mnt); 335 return logfs_get_sb_mtd(p, mtdnr);
336 } 336 }
337 337
338 return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt); 338 p->s_bdev = bdev;
339 p->s_mtd = NULL;
340 p->s_devops = &bd_devops;
341 return 0;
339} 342}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d13e4b..7466e9dcc8c5 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
231} 231}
232 232
233static void mtd_put_device(struct super_block *sb) 233static void mtd_put_device(struct logfs_super *s)
234{ 234{
235 put_mtd_device(logfs_super(sb)->s_mtd); 235 put_mtd_device(s->s_mtd);
236} 236}
237 237
238static int mtd_can_write_buf(struct super_block *sb, u64 ofs) 238static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
265 .put_device = mtd_put_device, 265 .put_device = mtd_put_device,
266}; 266};
267 267
268int logfs_get_sb_mtd(struct file_system_type *type, int flags, 268int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
269 int mtdnr, struct vfsmount *mnt)
270{ 269{
271 struct mtd_info *mtd; 270 struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
272 const struct logfs_device_ops *devops = &mtd_devops;
273
274 mtd = get_mtd_device(NULL, mtdnr);
275 if (IS_ERR(mtd)) 271 if (IS_ERR(mtd))
276 return PTR_ERR(mtd); 272 return PTR_ERR(mtd);
277 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt); 273
274 s->s_bdev = NULL;
275 s->s_mtd = mtd;
276 s->s_devops = &mtd_devops;
277 return 0;
278} 278}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1eb4e89e045b..409dfd65e9a1 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
569 return -EMLINK; 569 return -EMLINK;
570 570
571 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 571 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
572 atomic_inc(&inode->i_count); 572 ihold(inode);
573 inode->i_nlink++; 573 inode->i_nlink++;
574 mark_inode_dirty_sync(inode); 574 mark_inode_dirty_sync(inode);
575 575
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b8786264d243..cd51a36b37f0 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
136 int (*erase_segment)(struct logfs_area *area); 136 int (*erase_segment)(struct logfs_area *area);
137}; 137};
138 138
139struct logfs_super; /* forward */
139/** 140/**
140 * struct logfs_device_ops - device access operations 141 * struct logfs_device_ops - device access operations
141 * 142 *
@@ -156,7 +157,7 @@ struct logfs_device_ops {
156 int ensure_write); 157 int ensure_write);
157 int (*can_write_buf)(struct super_block *sb, u64 ofs); 158 int (*can_write_buf)(struct super_block *sb, u64 ofs);
158 void (*sync)(struct super_block *sb); 159 void (*sync)(struct super_block *sb);
159 void (*put_device)(struct super_block *sb); 160 void (*put_device)(struct logfs_super *s);
160}; 161};
161 162
162/** 163/**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
471 472
472/* dev_bdev.c */ 473/* dev_bdev.c */
473#ifdef CONFIG_BLOCK 474#ifdef CONFIG_BLOCK
474int logfs_get_sb_bdev(struct file_system_type *type, int flags, 475int logfs_get_sb_bdev(struct logfs_super *s,
475 const char *devname, struct vfsmount *mnt); 476 struct file_system_type *type,
477 const char *devname);
476#else 478#else
477static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags, 479static inline int logfs_get_sb_bdev(struct logfs_super *s,
478 const char *devname, struct vfsmount *mnt) 480 struct file_system_type *type,
481 const char *devname)
479{ 482{
480 return -ENODEV; 483 return -ENODEV;
481} 484}
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
483 486
484/* dev_mtd.c */ 487/* dev_mtd.c */
485#ifdef CONFIG_MTD 488#ifdef CONFIG_MTD
486int logfs_get_sb_mtd(struct file_system_type *type, int flags, 489int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
487 int mtdnr, struct vfsmount *mnt);
488#else 490#else
489static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags, 491static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
490 int mtdnr, struct vfsmount *mnt)
491{ 492{
492 return -ENODEV; 493 return -ENODEV;
493} 494}
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
619void logfs_crash_dump(struct super_block *sb); 620void logfs_crash_dump(struct super_block *sb);
620void *memchr_inv(const void *s, int c, size_t n); 621void *memchr_inv(const void *s, int c, size_t n);
621int logfs_statfs(struct dentry *dentry, struct kstatfs *stats); 622int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
622int logfs_get_sb_device(struct file_system_type *type, int flags,
623 struct mtd_info *mtd, struct block_device *bdev,
624 const struct logfs_device_ops *devops, struct vfsmount *mnt);
625int logfs_check_ds(struct logfs_disk_super *ds); 623int logfs_check_ds(struct logfs_disk_super *ds);
626int logfs_write_sb(struct super_block *sb); 624int logfs_write_sb(struct super_block *sb);
627 625
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155c5d81..33435e4b14d2 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
325 return 0; 325 return 0;
326} 326}
327 327
328static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) 328static int logfs_get_sb_final(struct super_block *sb)
329{ 329{
330 struct logfs_super *super = logfs_super(sb); 330 struct logfs_super *super = logfs_super(sb);
331 struct inode *rootdir; 331 struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
356 } 356 }
357 357
358 log_super("LogFS: Finished mounting\n"); 358 log_super("LogFS: Finished mounting\n");
359 simple_set_mnt(mnt, sb);
360 return 0; 359 return 0;
361 360
362fail: 361fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
529 logfs_cleanup_rw(sb); 528 logfs_cleanup_rw(sb);
530 if (super->s_erase_page) 529 if (super->s_erase_page)
531 __free_page(super->s_erase_page); 530 __free_page(super->s_erase_page);
532 super->s_devops->put_device(sb); 531 super->s_devops->put_device(super);
533 logfs_mempool_destroy(super->s_btree_pool); 532 logfs_mempool_destroy(super->s_btree_pool);
534 logfs_mempool_destroy(super->s_alias_pool); 533 logfs_mempool_destroy(super->s_alias_pool);
535 kfree(super); 534 kfree(super);
536 log_super("LogFS: Finished unmounting\n"); 535 log_super("LogFS: Finished unmounting\n");
537} 536}
538 537
539int logfs_get_sb_device(struct file_system_type *type, int flags, 538static struct dentry *logfs_get_sb_device(struct logfs_super *super,
540 struct mtd_info *mtd, struct block_device *bdev, 539 struct file_system_type *type, int flags)
541 const struct logfs_device_ops *devops, struct vfsmount *mnt)
542{ 540{
543 struct logfs_super *super;
544 struct super_block *sb; 541 struct super_block *sb;
545 int err = -ENOMEM; 542 int err = -ENOMEM;
546 static int mount_count; 543 static int mount_count;
547 544
548 log_super("LogFS: Start mount %x\n", mount_count++); 545 log_super("LogFS: Start mount %x\n", mount_count++);
549 super = kzalloc(sizeof(*super), GFP_KERNEL);
550 if (!super)
551 goto err0;
552 546
553 super->s_mtd = mtd;
554 super->s_bdev = bdev;
555 err = -EINVAL; 547 err = -EINVAL;
556 sb = sget(type, logfs_sb_test, logfs_sb_set, super); 548 sb = sget(type, logfs_sb_test, logfs_sb_set, super);
557 if (IS_ERR(sb)) 549 if (IS_ERR(sb)) {
558 goto err0; 550 super->s_devops->put_device(super);
551 kfree(super);
552 return ERR_CAST(sb);
553 }
559 554
560 if (sb->s_root) { 555 if (sb->s_root) {
561 /* Device is already in use */ 556 /* Device is already in use */
562 err = 0; 557 super->s_devops->put_device(super);
563 simple_set_mnt(mnt, sb); 558 kfree(super);
564 goto err0; 559 return dget(sb->s_root);
565 } 560 }
566 561
567 super->s_devops = devops;
568
569 /* 562 /*
570 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache 563 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
571 * only covers 16TB and the upper 8TB are used for indirect blocks. 564 * only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
581 goto err1; 574 goto err1;
582 575
583 sb->s_flags |= MS_ACTIVE; 576 sb->s_flags |= MS_ACTIVE;
584 err = logfs_get_sb_final(sb, mnt); 577 err = logfs_get_sb_final(sb);
585 if (err) 578 if (err) {
586 deactivate_locked_super(sb); 579 deactivate_locked_super(sb);
587 return err; 580 return ERR_PTR(err);
581 }
582 return dget(sb->s_root);
588 583
589err1: 584err1:
590 /* no ->s_root, no ->put_super() */ 585 /* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
592 iput(super->s_segfile_inode); 587 iput(super->s_segfile_inode);
593 iput(super->s_mapping_inode); 588 iput(super->s_mapping_inode);
594 deactivate_locked_super(sb); 589 deactivate_locked_super(sb);
595 return err; 590 return ERR_PTR(err);
596err0:
597 kfree(super);
598 //devops->put_device(sb);
599 return err;
600} 591}
601 592
602static int logfs_get_sb(struct file_system_type *type, int flags, 593static struct dentry *logfs_mount(struct file_system_type *type, int flags,
603 const char *devname, void *data, struct vfsmount *mnt) 594 const char *devname, void *data)
604{ 595{
605 ulong mtdnr; 596 ulong mtdnr;
597 struct logfs_super *super;
598 int err;
606 599
607 if (!devname) 600 super = kzalloc(sizeof(*super), GFP_KERNEL);
608 return logfs_get_sb_bdev(type, flags, devname, mnt); 601 if (!super)
609 if (strncmp(devname, "mtd", 3)) 602 return ERR_PTR(-ENOMEM);
610 return logfs_get_sb_bdev(type, flags, devname, mnt);
611 603
612 { 604 if (!devname)
605 err = logfs_get_sb_bdev(super, type, devname);
606 else if (strncmp(devname, "mtd", 3))
607 err = logfs_get_sb_bdev(super, type, devname);
608 else {
613 char *garbage; 609 char *garbage;
614 mtdnr = simple_strtoul(devname+3, &garbage, 0); 610 mtdnr = simple_strtoul(devname+3, &garbage, 0);
615 if (*garbage) 611 if (*garbage)
616 return -EINVAL; 612 err = -EINVAL;
613 else
614 err = logfs_get_sb_mtd(super, mtdnr);
615 }
616
617 if (err) {
618 kfree(super);
619 return ERR_PTR(err);
617 } 620 }
618 621
619 return logfs_get_sb_mtd(type, flags, mtdnr, mnt); 622 return logfs_get_sb_device(super, type, flags);
620} 623}
621 624
622static struct file_system_type logfs_fs_type = { 625static struct file_system_type logfs_fs_type = {
623 .owner = THIS_MODULE, 626 .owner = THIS_MODULE,
624 .name = "logfs", 627 .name = "logfs",
625 .get_sb = logfs_get_sb, 628 .mount = logfs_mount,
626 .kill_sb = logfs_kill_sb, 629 .kill_sb = logfs_kill_sb,
627 .fs_flags = FS_REQUIRES_DEV, 630 .fs_flags = FS_REQUIRES_DEV,
628 631
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8fb..fb2020858a34 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
614 V2_minix_truncate(inode); 614 V2_minix_truncate(inode);
615} 615}
616 616
617static int minix_get_sb(struct file_system_type *fs_type, 617static struct dentry *minix_mount(struct file_system_type *fs_type,
618 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 618 int flags, const char *dev_name, void *data)
619{ 619{
620 return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super, 620 return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
621 mnt);
622} 621}
623 622
624static struct file_system_type minix_fs_type = { 623static struct file_system_type minix_fs_type = {
625 .owner = THIS_MODULE, 624 .owner = THIS_MODULE,
626 .name = "minix", 625 .name = "minix",
627 .get_sb = minix_get_sb, 626 .mount = minix_mount,
628 .kill_sb = kill_block_super, 627 .kill_sb = kill_block_super,
629 .fs_flags = FS_REQUIRES_DEV, 628 .fs_flags = FS_REQUIRES_DEV,
630}; 629};
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index f3f3578393a4..c0d35a3accef 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
101 101
102 inode->i_ctime = CURRENT_TIME_SEC; 102 inode->i_ctime = CURRENT_TIME_SEC;
103 inode_inc_link_count(inode); 103 inode_inc_link_count(inode);
104 atomic_inc(&inode->i_count); 104 ihold(inode);
105 return add_nondir(dentry, inode); 105 return add_nondir(dentry, inode);
106} 106}
107 107
diff --git a/fs/namei.c b/fs/namei.c
index 24896e833565..5362af9b7372 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1121,11 +1121,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1121static struct dentry *__lookup_hash(struct qstr *name, 1121static struct dentry *__lookup_hash(struct qstr *name,
1122 struct dentry *base, struct nameidata *nd) 1122 struct dentry *base, struct nameidata *nd)
1123{ 1123{
1124 struct inode *inode = base->d_inode;
1124 struct dentry *dentry; 1125 struct dentry *dentry;
1125 struct inode *inode;
1126 int err; 1126 int err;
1127 1127
1128 inode = base->d_inode; 1128 err = exec_permission(inode);
1129 if (err)
1130 return ERR_PTR(err);
1129 1131
1130 /* 1132 /*
1131 * See if the low-level filesystem might want 1133 * See if the low-level filesystem might want
@@ -1161,11 +1163,6 @@ out:
1161 */ 1163 */
1162static struct dentry *lookup_hash(struct nameidata *nd) 1164static struct dentry *lookup_hash(struct nameidata *nd)
1163{ 1165{
1164 int err;
1165
1166 err = exec_permission(nd->path.dentry->d_inode);
1167 if (err)
1168 return ERR_PTR(err);
1169 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1166 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1170} 1167}
1171 1168
@@ -1213,9 +1210,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1213 if (err) 1210 if (err)
1214 return ERR_PTR(err); 1211 return ERR_PTR(err);
1215 1212
1216 err = exec_permission(base->d_inode);
1217 if (err)
1218 return ERR_PTR(err);
1219 return __lookup_hash(&this, base, NULL); 1213 return __lookup_hash(&this, base, NULL);
1220} 1214}
1221 1215
@@ -1580,6 +1574,7 @@ static struct file *finish_open(struct nameidata *nd,
1580 */ 1574 */
1581 if (will_truncate) 1575 if (will_truncate)
1582 mnt_drop_write(nd->path.mnt); 1576 mnt_drop_write(nd->path.mnt);
1577 path_put(&nd->path);
1583 return filp; 1578 return filp;
1584 1579
1585exit: 1580exit:
@@ -1681,6 +1676,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1681 } 1676 }
1682 filp = nameidata_to_filp(nd); 1677 filp = nameidata_to_filp(nd);
1683 mnt_drop_write(nd->path.mnt); 1678 mnt_drop_write(nd->path.mnt);
1679 path_put(&nd->path);
1684 if (!IS_ERR(filp)) { 1680 if (!IS_ERR(filp)) {
1685 error = ima_file_check(filp, acc_mode); 1681 error = ima_file_check(filp, acc_mode);
1686 if (error) { 1682 if (error) {
@@ -2291,7 +2287,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2291 goto slashes; 2287 goto slashes;
2292 inode = dentry->d_inode; 2288 inode = dentry->d_inode;
2293 if (inode) 2289 if (inode)
2294 atomic_inc(&inode->i_count); 2290 ihold(inode);
2295 error = mnt_want_write(nd.path.mnt); 2291 error = mnt_want_write(nd.path.mnt);
2296 if (error) 2292 if (error)
2297 goto exit2; 2293 goto exit2;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7ca5182c0bed..8a415c9c5e55 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -595,7 +595,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
595 goto out_free; 595 goto out_free;
596 } 596 }
597 597
598 mnt->mnt_flags = old->mnt_flags; 598 mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
599 atomic_inc(&sb->s_active); 599 atomic_inc(&sb->s_active);
600 mnt->mnt_sb = sb; 600 mnt->mnt_sb = sb;
601 mnt->mnt_root = dget(root); 601 mnt->mnt_root = dget(root);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26aca..d290545aa0c4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1020,16 +1020,16 @@ out:
1020 return result; 1020 return result;
1021} 1021}
1022 1022
1023static int ncp_get_sb(struct file_system_type *fs_type, 1023static struct dentry *ncp_mount(struct file_system_type *fs_type,
1024 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1024 int flags, const char *dev_name, void *data)
1025{ 1025{
1026 return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt); 1026 return mount_nodev(fs_type, flags, data, ncp_fill_super);
1027} 1027}
1028 1028
1029static struct file_system_type ncp_fs_type = { 1029static struct file_system_type ncp_fs_type = {
1030 .owner = THIS_MODULE, 1030 .owner = THIS_MODULE,
1031 .name = "ncpfs", 1031 .name = "ncpfs",
1032 .get_sb = ncp_get_sb, 1032 .mount = ncp_mount,
1033 .kill_sb = kill_anon_super, 1033 .kill_sb = kill_anon_super,
1034 .fs_flags = FS_BINARY_MOUNTDATA, 1034 .fs_flags = FS_BINARY_MOUNTDATA,
1035}; 1035};
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index b950415d7c43..ba306658a6db 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,7 +1,6 @@
1config NFS_FS 1config NFS_FS
2 tristate "NFS client support" 2 tristate "NFS client support"
3 depends on INET && FILE_LOCKING 3 depends on INET && FILE_LOCKING
4 depends on BKL # fix as soon as lockd is done
5 select LOCKD 4 select LOCKD
6 select SUNRPC 5 select SUNRPC
7 select NFS_ACL_SUPPORT if NFS_V3_ACL 6 select NFS_ACL_SUPPORT if NFS_V3_ACL
@@ -77,13 +76,17 @@ config NFS_V4
77 76
78config NFS_V4_1 77config NFS_V4_1
79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
80 depends on NFS_V4 && EXPERIMENTAL 79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
80 select PNFS_FILE_LAYOUT
81 help 81 help
82 This option enables support for minor version 1 of the NFSv4 protocol 82 This option enables support for minor version 1 of the NFSv4 protocol
83 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 83 (RFC 5661) in the kernel's NFS client.
84 84
85 If unsure, say N. 85 If unsure, say N.
86 86
87config PNFS_FILE_LAYOUT
88 tristate
89
87config ROOT_NFS 90config ROOT_NFS
88 bool "Root file system on NFS" 91 bool "Root file system on NFS"
89 depends on NFS_FS=y && IP_PNP 92 depends on NFS_FS=y && IP_PNP
@@ -118,3 +121,14 @@ config NFS_USE_KERNEL_DNS
118 select DNS_RESOLVER 121 select DNS_RESOLVER
119 select KEYS 122 select KEYS
120 default y 123 default y
124
125config NFS_USE_NEW_IDMAPPER
126 bool "Use the new idmapper upcall routine"
127 depends on NFS_V4 && KEYS
128 help
129 Say Y here if you want NFS to use the new idmapper upcall functions.
130 You will need /sbin/request-key (usually provided by the keyutils
131 package). For details, read
132 <file:Documentation/filesystems/nfs/idmapper.txt>.
133
134 If you are unsure, say N.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 delegation.o idmap.o \ 15 delegation.o idmap.o \
16 callback.o callback_xdr.o callback_proc.o \ 16 callback.o callback_xdr.o callback_proc.o \
17 nfs4namespace.o 17 nfs4namespace.o
18nfs-$(CONFIG_NFS_V4_1) += pnfs.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 19nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 20nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
21
22obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
23nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
109{ 109{
110 int ret; 110 int ret;
111 111
112 ret = svc_create_xprt(serv, "tcp", PF_INET, 112 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
114 if (ret <= 0) 114 if (ret <= 0)
115 goto out_err; 115 goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
117 dprintk("NFS: Callback listener port = %u (af %u)\n", 117 dprintk("NFS: Callback listener port = %u (af %u)\n",
118 nfs_callback_tcpport, PF_INET); 118 nfs_callback_tcpport, PF_INET);
119 119
120 ret = svc_create_xprt(serv, "tcp", PF_INET6, 120 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
122 if (ret > 0) { 122 if (ret > 0) {
123 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
118 if (delegation == NULL) 118 if (delegation == NULL)
119 return 0; 119 return 0;
120 120
121 /* seqid is 4-bytes long */ 121 if (stateid->stateid.seqid != 0)
122 if (((u32 *) &stateid->data)[0] != 0)
123 return 0; 122 return 0;
124 if (memcmp(&delegation->stateid.data[4], &stateid->data[4], 123 if (memcmp(&delegation->stateid.stateid.other,
125 sizeof(stateid->data)-4)) 124 &stateid->stateid.other,
125 NFS4_STATEID_OTHER_SIZE))
126 return 0; 126 return 0;
127 127
128 return 1; 128 return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e7340729af89..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
48#include "iostat.h" 48#include "iostat.h"
49#include "internal.h" 49#include "internal.h"
50#include "fscache.h" 50#include "fscache.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_CLIENT 53#define NFSDBG_FACILITY NFSDBG_CLIENT
53 54
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
155 cred = rpc_lookup_machine_cred(); 156 cred = rpc_lookup_machine_cred();
156 if (!IS_ERR(cred)) 157 if (!IS_ERR(cred))
157 clp->cl_machine_cred = cred; 158 clp->cl_machine_cred = cred;
158 159#if defined(CONFIG_NFS_V4_1)
160 INIT_LIST_HEAD(&clp->cl_layouts);
161#endif
159 nfs_fscache_get_client_cookie(clp); 162 nfs_fscache_get_client_cookie(clp);
160 163
161 return clp; 164 return clp;
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
252 nfs_free_client(clp); 255 nfs_free_client(clp);
253 } 256 }
254} 257}
258EXPORT_SYMBOL_GPL(nfs_put_client);
255 259
256#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
257/* 261/*
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
601{ 605{
602 struct rpc_clnt *clnt = NULL; 606 struct rpc_clnt *clnt = NULL;
603 struct rpc_create_args args = { 607 struct rpc_create_args args = {
608 .net = &init_net,
604 .protocol = clp->cl_proto, 609 .protocol = clp->cl_proto,
605 .address = (struct sockaddr *)&clp->cl_addr, 610 .address = (struct sockaddr *)&clp->cl_addr,
606 .addrsize = clp->cl_addrlen, 611 .addrsize = clp->cl_addrlen,
@@ -635,7 +640,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
635 */ 640 */
636static void nfs_destroy_server(struct nfs_server *server) 641static void nfs_destroy_server(struct nfs_server *server)
637{ 642{
638 if (!(server->flags & NFS_MOUNT_NONLM)) 643 if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
644 !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
639 nlmclnt_done(server->nlm_host); 645 nlmclnt_done(server->nlm_host);
640} 646}
641 647
@@ -657,7 +663,8 @@ static int nfs_start_lockd(struct nfs_server *server)
657 663
658 if (nlm_init.nfs_version > 3) 664 if (nlm_init.nfs_version > 3)
659 return 0; 665 return 0;
660 if (server->flags & NFS_MOUNT_NONLM) 666 if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
667 (server->flags & NFS_MOUNT_LOCAL_FCNTL))
661 return 0; 668 return 0;
662 669
663 switch (clp->cl_proto) { 670 switch (clp->cl_proto) {
@@ -898,11 +905,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
898 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 905 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
899 server->wsize = NFS_MAX_FILE_IO_SIZE; 906 server->wsize = NFS_MAX_FILE_IO_SIZE;
900 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 907 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
908 set_pnfs_layoutdriver(server, fsinfo->layouttype);
909
901 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 910 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
902 911
903 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); 912 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
904 if (server->dtsize > PAGE_CACHE_SIZE) 913 if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
905 server->dtsize = PAGE_CACHE_SIZE; 914 server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
906 if (server->dtsize > server->rsize) 915 if (server->dtsize > server->rsize)
907 server->dtsize = server->rsize; 916 server->dtsize = server->rsize;
908 917
@@ -913,6 +922,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
913 922
914 server->maxfilesize = fsinfo->maxfilesize; 923 server->maxfilesize = fsinfo->maxfilesize;
915 924
925 server->time_delta = fsinfo->time_delta;
926
916 /* We're airborne Set socket buffersize */ 927 /* We're airborne Set socket buffersize */
917 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); 928 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
918} 929}
@@ -935,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
935 } 946 }
936 947
937 fsinfo.fattr = fattr; 948 fsinfo.fattr = fattr;
949 fsinfo.layouttype = 0;
938 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 950 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
939 if (error < 0) 951 if (error < 0)
940 goto out_error; 952 goto out_error;
@@ -1017,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
1017{ 1029{
1018 dprintk("--> nfs_free_server()\n"); 1030 dprintk("--> nfs_free_server()\n");
1019 1031
1032 unset_pnfs_layoutdriver(server);
1020 spin_lock(&nfs_client_lock); 1033 spin_lock(&nfs_client_lock);
1021 list_del(&server->client_link); 1034 list_del(&server->client_link);
1022 list_del(&server->master_link); 1035 list_del(&server->master_link);
@@ -1356,8 +1369,9 @@ static int nfs4_init_server(struct nfs_server *server,
1356 1369
1357 /* Initialise the client representation from the mount data */ 1370 /* Initialise the client representation from the mount data */
1358 server->flags = data->flags; 1371 server->flags = data->flags;
1359 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| 1372 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
1360 NFS_CAP_POSIX_LOCK; 1373 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
1374 server->caps |= NFS_CAP_READDIRPLUS;
1361 server->options = data->options; 1375 server->options = data->options;
1362 1376
1363 /* Get a client record */ 1377 /* Get a client record */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e257172d438c..07ac3847e562 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,11 +33,12 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
36 37
37#include "nfs4_fs.h"
38#include "delegation.h" 38#include "delegation.h"
39#include "iostat.h" 39#include "iostat.h"
40#include "internal.h" 40#include "internal.h"
41#include "fscache.h"
41 42
42/* #define NFS_DEBUG_VERBOSE 1 */ 43/* #define NFS_DEBUG_VERBOSE 1 */
43 44
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *,
55 struct inode *, struct dentry *); 56 struct inode *, struct dentry *);
56static int nfs_fsync_dir(struct file *, int); 57static int nfs_fsync_dir(struct file *, int);
57static loff_t nfs_llseek_dir(struct file *, loff_t, int); 58static loff_t nfs_llseek_dir(struct file *, loff_t, int);
59static int nfs_readdir_clear_array(struct page*, gfp_t);
58 60
59const struct file_operations nfs_dir_operations = { 61const struct file_operations nfs_dir_operations = {
60 .llseek = nfs_llseek_dir, 62 .llseek = nfs_llseek_dir,
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = {
80 .setattr = nfs_setattr, 82 .setattr = nfs_setattr,
81}; 83};
82 84
85const struct address_space_operations nfs_dir_addr_space_ops = {
86 .releasepage = nfs_readdir_clear_array,
87};
88
83#ifdef CONFIG_NFS_V3 89#ifdef CONFIG_NFS_V3
84const struct inode_operations nfs3_dir_inode_operations = { 90const struct inode_operations nfs3_dir_inode_operations = {
85 .create = nfs_create, 91 .create = nfs_create,
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
104#ifdef CONFIG_NFS_V4 110#ifdef CONFIG_NFS_V4
105 111
106static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 112static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
113static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
107const struct inode_operations nfs4_dir_inode_operations = { 114const struct inode_operations nfs4_dir_inode_operations = {
108 .create = nfs_create, 115 .create = nfs_open_create,
109 .lookup = nfs_atomic_lookup, 116 .lookup = nfs_atomic_lookup,
110 .link = nfs_link, 117 .link = nfs_link,
111 .unlink = nfs_unlink, 118 .unlink = nfs_unlink,
@@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp)
150 return res; 157 return res;
151} 158}
152 159
153typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); 160struct nfs_cache_array_entry {
161 u64 cookie;
162 u64 ino;
163 struct qstr string;
164};
165
166struct nfs_cache_array {
167 unsigned int size;
168 int eof_index;
169 u64 last_cookie;
170 struct nfs_cache_array_entry array[0];
171};
172
173#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
174
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
154typedef struct { 176typedef struct {
155 struct file *file; 177 struct file *file;
156 struct page *page; 178 struct page *page;
157 unsigned long page_index; 179 unsigned long page_index;
158 __be32 *ptr;
159 u64 *dir_cookie; 180 u64 *dir_cookie;
160 loff_t current_index; 181 loff_t current_index;
161 struct nfs_entry *entry;
162 decode_dirent_t decode; 182 decode_dirent_t decode;
163 int plus; 183
164 unsigned long timestamp; 184 unsigned long timestamp;
165 unsigned long gencount; 185 unsigned long gencount;
166 int timestamp_valid; 186 unsigned int cache_entry_index;
187 unsigned int plus:1;
188 unsigned int eof:1;
167} nfs_readdir_descriptor_t; 189} nfs_readdir_descriptor_t;
168 190
169/* Now we cache directories properly, by stuffing the dirent 191/*
170 * data directly in the page cache. 192 * The caller is responsible for calling nfs_readdir_release_array(page)
171 *
172 * Inode invalidation due to refresh etc. takes care of
173 * _everything_, no sloppy entry flushing logic, no extraneous
174 * copying, network direct to page cache, the way it was meant
175 * to be.
176 *
177 * NOTE: Dirent information verification is done always by the
178 * page-in of the RPC reply, nowhere else, this simplies
179 * things substantially.
180 */ 193 */
181static 194static
182int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) 195struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
196{
197 if (page == NULL)
198 return ERR_PTR(-EIO);
199 return (struct nfs_cache_array *)kmap(page);
200}
201
202static
203void nfs_readdir_release_array(struct page *page)
204{
205 kunmap(page);
206}
207
208/*
209 * we are freeing strings created by nfs_add_to_readdir_array()
210 */
211static
212int nfs_readdir_clear_array(struct page *page, gfp_t mask)
213{
214 struct nfs_cache_array *array = nfs_readdir_get_array(page);
215 int i;
216 for (i = 0; i < array->size; i++)
217 kfree(array->array[i].string.name);
218 nfs_readdir_release_array(page);
219 return 0;
220}
221
222/*
223 * the caller is responsible for freeing qstr.name
224 * when called by nfs_readdir_add_to_array, the strings will be freed in
225 * nfs_clear_readdir_array()
226 */
227static
228int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
229{
230 string->len = len;
231 string->name = kmemdup(name, len, GFP_KERNEL);
232 if (string->name == NULL)
233 return -ENOMEM;
234 string->hash = full_name_hash(name, len);
235 return 0;
236}
237
238static
239int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
240{
241 struct nfs_cache_array *array = nfs_readdir_get_array(page);
242 struct nfs_cache_array_entry *cache_entry;
243 int ret;
244
245 if (IS_ERR(array))
246 return PTR_ERR(array);
247 ret = -EIO;
248 if (array->size >= MAX_READDIR_ARRAY)
249 goto out;
250
251 cache_entry = &array->array[array->size];
252 cache_entry->cookie = entry->prev_cookie;
253 cache_entry->ino = entry->ino;
254 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
255 if (ret)
256 goto out;
257 array->last_cookie = entry->cookie;
258 if (entry->eof == 1)
259 array->eof_index = array->size;
260 array->size++;
261out:
262 nfs_readdir_release_array(page);
263 return ret;
264}
265
266static
267int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
268{
269 loff_t diff = desc->file->f_pos - desc->current_index;
270 unsigned int index;
271
272 if (diff < 0)
273 goto out_eof;
274 if (diff >= array->size) {
275 if (array->eof_index > 0)
276 goto out_eof;
277 desc->current_index += array->size;
278 return -EAGAIN;
279 }
280
281 index = (unsigned int)diff;
282 *desc->dir_cookie = array->array[index].cookie;
283 desc->cache_entry_index = index;
284 if (index == array->eof_index)
285 desc->eof = 1;
286 return 0;
287out_eof:
288 desc->eof = 1;
289 return -EBADCOOKIE;
290}
291
292static
293int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
294{
295 int i;
296 int status = -EAGAIN;
297
298 for (i = 0; i < array->size; i++) {
299 if (i == array->eof_index) {
300 desc->eof = 1;
301 status = -EBADCOOKIE;
302 }
303 if (array->array[i].cookie == *desc->dir_cookie) {
304 desc->cache_entry_index = i;
305 status = 0;
306 break;
307 }
308 }
309
310 return status;
311}
312
313static
314int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
315{
316 struct nfs_cache_array *array;
317 int status = -EBADCOOKIE;
318
319 if (desc->dir_cookie == NULL)
320 goto out;
321
322 array = nfs_readdir_get_array(desc->page);
323 if (IS_ERR(array)) {
324 status = PTR_ERR(array);
325 goto out;
326 }
327
328 if (*desc->dir_cookie == 0)
329 status = nfs_readdir_search_for_pos(array, desc);
330 else
331 status = nfs_readdir_search_for_cookie(array, desc);
332
333 nfs_readdir_release_array(desc->page);
334out:
335 return status;
336}
337
338/* Fill a page with xdr information before transferring to the cache page */
339static
340int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
341 struct nfs_entry *entry, struct file *file, struct inode *inode)
183{ 342{
184 struct file *file = desc->file;
185 struct inode *inode = file->f_path.dentry->d_inode;
186 struct rpc_cred *cred = nfs_file_cred(file); 343 struct rpc_cred *cred = nfs_file_cred(file);
187 unsigned long timestamp, gencount; 344 unsigned long timestamp, gencount;
188 int error; 345 int error;
189 346
190 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
191 __func__, (long long)desc->entry->cookie,
192 page->index);
193
194 again: 347 again:
195 timestamp = jiffies; 348 timestamp = jiffies;
196 gencount = nfs_inc_attr_generation_counter(); 349 gencount = nfs_inc_attr_generation_counter();
197 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, 350 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
198 NFS_SERVER(inode)->dtsize, desc->plus); 351 NFS_SERVER(inode)->dtsize, desc->plus);
199 if (error < 0) { 352 if (error < 0) {
200 /* We requested READDIRPLUS, but the server doesn't grok it */ 353 /* We requested READDIRPLUS, but the server doesn't grok it */
@@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
208 } 361 }
209 desc->timestamp = timestamp; 362 desc->timestamp = timestamp;
210 desc->gencount = gencount; 363 desc->gencount = gencount;
211 desc->timestamp_valid = 1; 364error:
212 SetPageUptodate(page); 365 return error;
213 /* Ensure consistent page alignment of the data.
214 * Note: assumes we have exclusive access to this mapping either
215 * through inode->i_mutex or some other mechanism.
216 */
217 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
218 /* Should never happen */
219 nfs_zap_mapping(inode, inode->i_mapping);
220 }
221 unlock_page(page);
222 return 0;
223 error:
224 unlock_page(page);
225 return -EIO;
226} 366}
227 367
228static inline 368/* Fill in an entry based on the xdr code stored in desc->page */
229int dir_decode(nfs_readdir_descriptor_t *desc) 369static
370int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
230{ 371{
231 __be32 *p = desc->ptr; 372 __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
232 p = desc->decode(p, desc->entry, desc->plus);
233 if (IS_ERR(p)) 373 if (IS_ERR(p))
234 return PTR_ERR(p); 374 return PTR_ERR(p);
235 desc->ptr = p; 375
236 if (desc->timestamp_valid) { 376 entry->fattr->time_start = desc->timestamp;
237 desc->entry->fattr->time_start = desc->timestamp; 377 entry->fattr->gencount = desc->gencount;
238 desc->entry->fattr->gencount = desc->gencount;
239 } else
240 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
241 return 0; 378 return 0;
242} 379}
243 380
244static inline 381static
245void dir_page_release(nfs_readdir_descriptor_t *desc) 382int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
246{ 383{
247 kunmap(desc->page); 384 struct nfs_inode *node;
248 page_cache_release(desc->page); 385 if (dentry->d_inode == NULL)
249 desc->page = NULL; 386 goto different;
250 desc->ptr = NULL; 387 node = NFS_I(dentry->d_inode);
388 if (node->fh.size != entry->fh->size)
389 goto different;
390 if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
391 goto different;
392 return 1;
393different:
394 return 0;
251} 395}
252 396
253/* 397static
254 * Given a pointer to a buffer that has already been filled by a call 398void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
255 * to readdir, find the next entry with cookie '*desc->dir_cookie'.
256 *
257 * If the end of the buffer has been reached, return -EAGAIN, if not,
258 * return the offset within the buffer of the next entry to be
259 * read.
260 */
261static inline
262int find_dirent(nfs_readdir_descriptor_t *desc)
263{ 399{
264 struct nfs_entry *entry = desc->entry; 400 struct qstr filename = {
265 int loop_count = 0, 401 .len = entry->len,
266 status; 402 .name = entry->name,
403 };
404 struct dentry *dentry;
405 struct dentry *alias;
406 struct inode *dir = parent->d_inode;
407 struct inode *inode;
267 408
268 while((status = dir_decode(desc)) == 0) { 409 if (filename.name[0] == '.') {
269 dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", 410 if (filename.len == 1)
270 __func__, (unsigned long long)entry->cookie); 411 return;
271 if (entry->prev_cookie == *desc->dir_cookie) 412 if (filename.len == 2 && filename.name[1] == '.')
272 break; 413 return;
273 if (loop_count++ > 200) { 414 }
274 loop_count = 0; 415 filename.hash = full_name_hash(filename.name, filename.len);
275 schedule(); 416
417 dentry = d_lookup(parent, &filename);
418 if (dentry != NULL) {
419 if (nfs_same_file(dentry, entry)) {
420 nfs_refresh_inode(dentry->d_inode, entry->fattr);
421 goto out;
422 } else {
423 d_drop(dentry);
424 dput(dentry);
276 } 425 }
277 } 426 }
278 return status; 427
428 dentry = d_alloc(parent, &filename);
429 if (dentry == NULL)
430 return;
431
432 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
433 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
434 if (IS_ERR(inode))
435 goto out;
436
437 alias = d_materialise_unique(dentry, inode);
438 if (IS_ERR(alias))
439 goto out;
440 else if (alias) {
441 nfs_set_verifier(alias, nfs_save_change_attribute(dir));
442 dput(alias);
443 } else
444 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
445
446out:
447 dput(dentry);
448}
449
450/* Perform conversion from xdr to cache array */
451static
452void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
453 void *xdr_page, struct page *page, unsigned int buflen)
454{
455 struct xdr_stream stream;
456 struct xdr_buf buf;
457 __be32 *ptr = xdr_page;
458 int status;
459 struct nfs_cache_array *array;
460
461 buf.head->iov_base = xdr_page;
462 buf.head->iov_len = buflen;
463 buf.tail->iov_len = 0;
464 buf.page_base = 0;
465 buf.page_len = 0;
466 buf.buflen = buf.head->iov_len;
467 buf.len = buf.head->iov_len;
468
469 xdr_init_decode(&stream, &buf, ptr);
470
471
472 do {
473 status = xdr_decode(desc, entry, &stream);
474 if (status != 0)
475 break;
476
477 if (nfs_readdir_add_to_array(entry, page) == -1)
478 break;
479 if (desc->plus == 1)
480 nfs_prime_dcache(desc->file->f_path.dentry, entry);
481 } while (!entry->eof);
482
483 if (status == -EBADCOOKIE && entry->eof) {
484 array = nfs_readdir_get_array(page);
485 array->eof_index = array->size - 1;
486 status = 0;
487 nfs_readdir_release_array(page);
488 }
489}
490
491static
492void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
493{
494 unsigned int i;
495 for (i = 0; i < npages; i++)
496 put_page(pages[i]);
497}
498
499static
500void nfs_readdir_free_large_page(void *ptr, struct page **pages,
501 unsigned int npages)
502{
503 vm_unmap_ram(ptr, npages);
504 nfs_readdir_free_pagearray(pages, npages);
279} 505}
280 506
281/* 507/*
282 * Given a pointer to a buffer that has already been filled by a call 508 * nfs_readdir_large_page will allocate pages that must be freed with a call
283 * to readdir, find the entry at offset 'desc->file->f_pos'. 509 * to nfs_readdir_free_large_page
284 *
285 * If the end of the buffer has been reached, return -EAGAIN, if not,
286 * return the offset within the buffer of the next entry to be
287 * read.
288 */ 510 */
289static inline 511static
290int find_dirent_index(nfs_readdir_descriptor_t *desc) 512void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
291{ 513{
292 struct nfs_entry *entry = desc->entry; 514 void *ptr;
293 int loop_count = 0, 515 unsigned int i;
294 status; 516
517 for (i = 0; i < npages; i++) {
518 struct page *page = alloc_page(GFP_KERNEL);
519 if (page == NULL)
520 goto out_freepages;
521 pages[i] = page;
522 }
295 523
296 for(;;) { 524 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
297 status = dir_decode(desc); 525 if (!IS_ERR_OR_NULL(ptr))
298 if (status) 526 return ptr;
299 break; 527out_freepages:
528 nfs_readdir_free_pagearray(pages, i);
529 return NULL;
530}
531
532static
533int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
534{
535 struct page *pages[NFS_MAX_READDIR_PAGES];
536 void *pages_ptr = NULL;
537 struct nfs_entry entry;
538 struct file *file = desc->file;
539 struct nfs_cache_array *array;
540 int status = 0;
541 unsigned int array_size = ARRAY_SIZE(pages);
542
543 entry.prev_cookie = 0;
544 entry.cookie = *desc->dir_cookie;
545 entry.eof = 0;
546 entry.fh = nfs_alloc_fhandle();
547 entry.fattr = nfs_alloc_fattr();
548 if (entry.fh == NULL || entry.fattr == NULL)
549 goto out;
300 550
301 dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", 551 array = nfs_readdir_get_array(page);
302 (unsigned long long)entry->cookie, desc->current_index); 552 memset(array, 0, sizeof(struct nfs_cache_array));
553 array->eof_index = -1;
303 554
304 if (desc->file->f_pos == desc->current_index) { 555 pages_ptr = nfs_readdir_large_page(pages, array_size);
305 *desc->dir_cookie = entry->cookie; 556 if (!pages_ptr)
557 goto out_release_array;
558 do {
559 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
560
561 if (status < 0)
306 break; 562 break;
307 } 563 nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
308 desc->current_index++; 564 } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
309 if (loop_count++ > 200) { 565
310 loop_count = 0; 566 nfs_readdir_free_large_page(pages_ptr, pages, array_size);
311 schedule(); 567out_release_array:
312 } 568 nfs_readdir_release_array(page);
313 } 569out:
570 nfs_free_fattr(entry.fattr);
571 nfs_free_fhandle(entry.fh);
314 return status; 572 return status;
315} 573}
316 574
317/* 575/*
318 * Find the given page, and call find_dirent() or find_dirent_index in 576 * Now we cache directories properly, by converting xdr information
319 * order to try to return the next entry. 577 * to an array that can be used for lookups later. This results in
578 * fewer cache pages, since we can store more information on each page.
579 * We only need to convert from xdr once so future lookups are much simpler
320 */ 580 */
321static inline 581static
322int find_dirent_page(nfs_readdir_descriptor_t *desc) 582int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
323{ 583{
324 struct inode *inode = desc->file->f_path.dentry->d_inode; 584 struct inode *inode = desc->file->f_path.dentry->d_inode;
325 struct page *page;
326 int status;
327 585
328 dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", 586 if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
329 __func__, desc->page_index, 587 goto error;
330 (long long) *desc->dir_cookie); 588 SetPageUptodate(page);
331 589
332 /* If we find the page in the page_cache, we cannot be sure 590 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
333 * how fresh the data is, so we will ignore readdir_plus attributes. 591 /* Should never happen */
334 */ 592 nfs_zap_mapping(inode, inode->i_mapping);
335 desc->timestamp_valid = 0;
336 page = read_cache_page(inode->i_mapping, desc->page_index,
337 (filler_t *)nfs_readdir_filler, desc);
338 if (IS_ERR(page)) {
339 status = PTR_ERR(page);
340 goto out;
341 } 593 }
594 unlock_page(page);
595 return 0;
596 error:
597 unlock_page(page);
598 return -EIO;
599}
342 600
343 /* NOTE: Someone else may have changed the READDIRPLUS flag */ 601static
344 desc->page = page; 602void cache_page_release(nfs_readdir_descriptor_t *desc)
345 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 603{
346 if (*desc->dir_cookie != 0) 604 page_cache_release(desc->page);
347 status = find_dirent(desc); 605 desc->page = NULL;
348 else 606}
349 status = find_dirent_index(desc); 607
350 if (status < 0) 608static
351 dir_page_release(desc); 609struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
352 out: 610{
353 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); 611 struct page *page;
354 return status; 612 page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
613 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
614 if (IS_ERR(page))
615 desc->eof = 1;
616 return page;
355} 617}
356 618
357/* 619/*
358 * Recurse through the page cache pages, and return a 620 * Returns 0 if desc->dir_cookie was found on page desc->page_index
359 * filled nfs_entry structure of the next directory entry if possible.
360 *
361 * The target for the search is '*desc->dir_cookie' if non-0,
362 * 'desc->file->f_pos' otherwise
363 */ 621 */
622static
623int find_cache_page(nfs_readdir_descriptor_t *desc)
624{
625 int res;
626
627 desc->page = get_cache_page(desc);
628 if (IS_ERR(desc->page))
629 return PTR_ERR(desc->page);
630
631 res = nfs_readdir_search_array(desc);
632 if (res == 0)
633 return 0;
634 cache_page_release(desc);
635 return res;
636}
637
638/* Search for desc->dir_cookie from the beginning of the page cache */
364static inline 639static inline
365int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) 640int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
366{ 641{
367 int loop_count = 0; 642 int res = -EAGAIN;
368 int res;
369
370 /* Always search-by-index from the beginning of the cache */
371 if (*desc->dir_cookie == 0) {
372 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
373 (long long)desc->file->f_pos);
374 desc->page_index = 0;
375 desc->entry->cookie = desc->entry->prev_cookie = 0;
376 desc->entry->eof = 0;
377 desc->current_index = 0;
378 } else
379 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
380 (unsigned long long)*desc->dir_cookie);
381 643
382 for (;;) { 644 while (1) {
383 res = find_dirent_page(desc); 645 res = find_cache_page(desc);
384 if (res != -EAGAIN) 646 if (res != -EAGAIN)
385 break; 647 break;
386 /* Align to beginning of next page */ 648 desc->page_index++;
387 desc->page_index ++;
388 if (loop_count++ > 200) {
389 loop_count = 0;
390 schedule();
391 }
392 } 649 }
393
394 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res);
395 return res; 650 return res;
396} 651}
397 652
@@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode)
400 return (inode->i_mode >> 12) & 15; 655 return (inode->i_mode >> 12) & 15;
401} 656}
402 657
403static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
404
405/* 658/*
406 * Once we've found the start of the dirent within a page: fill 'er up... 659 * Once we've found the start of the dirent within a page: fill 'er up...
407 */ 660 */
@@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
410 filldir_t filldir) 663 filldir_t filldir)
411{ 664{
412 struct file *file = desc->file; 665 struct file *file = desc->file;
413 struct nfs_entry *entry = desc->entry; 666 int i = 0;
414 struct dentry *dentry = NULL; 667 int res = 0;
415 u64 fileid; 668 struct nfs_cache_array *array = NULL;
416 int loop_count = 0, 669 unsigned int d_type = DT_UNKNOWN;
417 res; 670 struct dentry *dentry = NULL;
418
419 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
420 (unsigned long long)entry->cookie);
421
422 for(;;) {
423 unsigned d_type = DT_UNKNOWN;
424 /* Note: entry->prev_cookie contains the cookie for
425 * retrieving the current dirent on the server */
426 fileid = entry->ino;
427
428 /* Get a dentry if we have one */
429 if (dentry != NULL)
430 dput(dentry);
431 dentry = nfs_readdir_lookup(desc);
432 671
433 /* Use readdirplus info */ 672 array = nfs_readdir_get_array(desc->page);
434 if (dentry != NULL && dentry->d_inode != NULL) {
435 d_type = dt_type(dentry->d_inode);
436 fileid = NFS_FILEID(dentry->d_inode);
437 }
438 673
439 res = filldir(dirent, entry->name, entry->len, 674 for (i = desc->cache_entry_index; i < array->size; i++) {
440 file->f_pos, nfs_compat_user_ino64(fileid), 675 d_type = DT_UNKNOWN;
441 d_type); 676
677 res = filldir(dirent, array->array[i].string.name,
678 array->array[i].string.len, file->f_pos,
679 nfs_compat_user_ino64(array->array[i].ino), d_type);
442 if (res < 0) 680 if (res < 0)
443 break; 681 break;
444 file->f_pos++; 682 file->f_pos++;
445 *desc->dir_cookie = entry->cookie; 683 desc->cache_entry_index = i;
446 if (dir_decode(desc) != 0) { 684 if (i < (array->size-1))
447 desc->page_index ++; 685 *desc->dir_cookie = array->array[i+1].cookie;
686 else
687 *desc->dir_cookie = array->last_cookie;
688 if (i == array->eof_index) {
689 desc->eof = 1;
448 break; 690 break;
449 } 691 }
450 if (loop_count++ > 200) {
451 loop_count = 0;
452 schedule();
453 }
454 } 692 }
455 dir_page_release(desc); 693
694 nfs_readdir_release_array(desc->page);
695 cache_page_release(desc);
456 if (dentry != NULL) 696 if (dentry != NULL)
457 dput(dentry); 697 dput(dentry);
458 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 698 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
@@ -476,12 +716,9 @@ static inline
476int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, 716int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
477 filldir_t filldir) 717 filldir_t filldir)
478{ 718{
479 struct file *file = desc->file;
480 struct inode *inode = file->f_path.dentry->d_inode;
481 struct rpc_cred *cred = nfs_file_cred(file);
482 struct page *page = NULL; 719 struct page *page = NULL;
483 int status; 720 int status;
484 unsigned long timestamp, gencount; 721 struct inode *inode = desc->file->f_path.dentry->d_inode;
485 722
486 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 723 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
487 (unsigned long long)*desc->dir_cookie); 724 (unsigned long long)*desc->dir_cookie);
@@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
491 status = -ENOMEM; 728 status = -ENOMEM;
492 goto out; 729 goto out;
493 } 730 }
494 timestamp = jiffies; 731
495 gencount = nfs_inc_attr_generation_counter(); 732 if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
496 status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
497 *desc->dir_cookie, page,
498 NFS_SERVER(inode)->dtsize,
499 desc->plus);
500 desc->page = page;
501 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
502 if (status >= 0) {
503 desc->timestamp = timestamp;
504 desc->gencount = gencount;
505 desc->timestamp_valid = 1;
506 if ((status = dir_decode(desc)) == 0)
507 desc->entry->prev_cookie = *desc->dir_cookie;
508 } else
509 status = -EIO; 733 status = -EIO;
510 if (status < 0)
511 goto out_release; 734 goto out_release;
735 }
512 736
737 desc->page_index = 0;
738 desc->page = page;
513 status = nfs_do_filldir(desc, dirent, filldir); 739 status = nfs_do_filldir(desc, dirent, filldir);
514 740
515 /* Reset read descriptor so it searches the page cache from
516 * the start upon the next call to readdir_search_pagecache() */
517 desc->page_index = 0;
518 desc->entry->cookie = desc->entry->prev_cookie = 0;
519 desc->entry->eof = 0;
520 out: 741 out:
521 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", 742 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
522 __func__, status); 743 __func__, status);
523 return status; 744 return status;
524 out_release: 745 out_release:
525 dir_page_release(desc); 746 cache_page_release(desc);
526 goto out; 747 goto out;
527} 748}
528 749
@@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
536 struct inode *inode = dentry->d_inode; 757 struct inode *inode = dentry->d_inode;
537 nfs_readdir_descriptor_t my_desc, 758 nfs_readdir_descriptor_t my_desc,
538 *desc = &my_desc; 759 *desc = &my_desc;
539 struct nfs_entry my_entry;
540 int res = -ENOMEM; 760 int res = -ENOMEM;
541 761
542 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 762 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
557 desc->decode = NFS_PROTO(inode)->decode_dirent; 777 desc->decode = NFS_PROTO(inode)->decode_dirent;
558 desc->plus = NFS_USE_READDIRPLUS(inode); 778 desc->plus = NFS_USE_READDIRPLUS(inode);
559 779
560 my_entry.cookie = my_entry.prev_cookie = 0;
561 my_entry.eof = 0;
562 my_entry.fh = nfs_alloc_fhandle();
563 my_entry.fattr = nfs_alloc_fattr();
564 if (my_entry.fh == NULL || my_entry.fattr == NULL)
565 goto out_alloc_failed;
566
567 desc->entry = &my_entry;
568
569 nfs_block_sillyrename(dentry); 780 nfs_block_sillyrename(dentry);
570 res = nfs_revalidate_mapping(inode, filp->f_mapping); 781 res = nfs_revalidate_mapping(inode, filp->f_mapping);
571 if (res < 0) 782 if (res < 0)
572 goto out; 783 goto out;
573 784
574 while(!desc->entry->eof) { 785 while (desc->eof != 1) {
575 res = readdir_search_pagecache(desc); 786 res = readdir_search_pagecache(desc);
576 787
577 if (res == -EBADCOOKIE) { 788 if (res == -EBADCOOKIE) {
578 /* This means either end of directory */ 789 /* This means either end of directory */
579 if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { 790 if (*desc->dir_cookie && desc->eof == 0) {
580 /* Or that the server has 'lost' a cookie */ 791 /* Or that the server has 'lost' a cookie */
581 res = uncached_readdir(desc, dirent, filldir); 792 res = uncached_readdir(desc, dirent, filldir);
582 if (res >= 0) 793 if (res >= 0)
@@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
588 if (res == -ETOOSMALL && desc->plus) { 799 if (res == -ETOOSMALL && desc->plus) {
589 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 800 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
590 nfs_zap_caches(inode); 801 nfs_zap_caches(inode);
802 desc->page_index = 0;
591 desc->plus = 0; 803 desc->plus = 0;
592 desc->entry->eof = 0; 804 desc->eof = 0;
593 continue; 805 continue;
594 } 806 }
595 if (res < 0) 807 if (res < 0)
@@ -605,9 +817,6 @@ out:
605 nfs_unblock_sillyrename(dentry); 817 nfs_unblock_sillyrename(dentry);
606 if (res > 0) 818 if (res > 0)
607 res = 0; 819 res = 0;
608out_alloc_failed:
609 nfs_free_fattr(my_entry.fattr);
610 nfs_free_fhandle(my_entry.fh);
611 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", 820 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
612 dentry->d_parent->d_name.name, dentry->d_name.name, 821 dentry->d_parent->d_name.name, dentry->d_name.name,
613 res); 822 res);
@@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd)
1029 return 1; 1238 return 1;
1030} 1239}
1031 1240
1241static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
1242{
1243 struct path path = {
1244 .mnt = nd->path.mnt,
1245 .dentry = dentry,
1246 };
1247 struct nfs_open_context *ctx;
1248 struct rpc_cred *cred;
1249 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
1250
1251 cred = rpc_lookup_cred();
1252 if (IS_ERR(cred))
1253 return ERR_CAST(cred);
1254 ctx = alloc_nfs_open_context(&path, cred, fmode);
1255 put_rpccred(cred);
1256 if (ctx == NULL)
1257 return ERR_PTR(-ENOMEM);
1258 return ctx;
1259}
1260
1261static int do_open(struct inode *inode, struct file *filp)
1262{
1263 nfs_fscache_set_inode_cookie(inode, filp);
1264 return 0;
1265}
1266
1267static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
1268{
1269 struct file *filp;
1270 int ret = 0;
1271
1272 /* If the open_intent is for execute, we have an extra check to make */
1273 if (ctx->mode & FMODE_EXEC) {
1274 ret = nfs_may_open(ctx->path.dentry->d_inode,
1275 ctx->cred,
1276 nd->intent.open.flags);
1277 if (ret < 0)
1278 goto out;
1279 }
1280 filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
1281 if (IS_ERR(filp))
1282 ret = PTR_ERR(filp);
1283 else
1284 nfs_file_set_open_context(filp, ctx);
1285out:
1286 put_nfs_open_context(ctx);
1287 return ret;
1288}
1289
1032static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1290static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1033{ 1291{
1292 struct nfs_open_context *ctx;
1293 struct iattr attr;
1034 struct dentry *res = NULL; 1294 struct dentry *res = NULL;
1035 int error; 1295 struct inode *inode;
1296 int open_flags;
1297 int err;
1036 1298
1037 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", 1299 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
1038 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1300 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1054 goto out; 1316 goto out;
1055 } 1317 }
1056 1318
1319 ctx = nameidata_to_nfs_open_context(dentry, nd);
1320 res = ERR_CAST(ctx);
1321 if (IS_ERR(ctx))
1322 goto out;
1323
1324 open_flags = nd->intent.open.flags;
1325 if (nd->flags & LOOKUP_CREATE) {
1326 attr.ia_mode = nd->intent.open.create_mode;
1327 attr.ia_valid = ATTR_MODE;
1328 if (!IS_POSIXACL(dir))
1329 attr.ia_mode &= ~current_umask();
1330 } else {
1331 open_flags &= ~(O_EXCL | O_CREAT);
1332 attr.ia_valid = 0;
1333 }
1334
1057 /* Open the file on the server */ 1335 /* Open the file on the server */
1058 res = nfs4_atomic_open(dir, dentry, nd); 1336 nfs_block_sillyrename(dentry->d_parent);
1059 if (IS_ERR(res)) { 1337 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
1060 error = PTR_ERR(res); 1338 if (IS_ERR(inode)) {
1061 switch (error) { 1339 nfs_unblock_sillyrename(dentry->d_parent);
1340 put_nfs_open_context(ctx);
1341 switch (PTR_ERR(inode)) {
1062 /* Make a negative dentry */ 1342 /* Make a negative dentry */
1063 case -ENOENT: 1343 case -ENOENT:
1344 d_add(dentry, NULL);
1064 res = NULL; 1345 res = NULL;
1065 goto out; 1346 goto out;
1066 /* This turned out not to be a regular file */ 1347 /* This turned out not to be a regular file */
@@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1072 goto no_open; 1353 goto no_open;
1073 /* case -EINVAL: */ 1354 /* case -EINVAL: */
1074 default: 1355 default:
1356 res = ERR_CAST(inode);
1075 goto out; 1357 goto out;
1076 } 1358 }
1077 } else if (res != NULL) 1359 }
1360 res = d_add_unique(dentry, inode);
1361 nfs_unblock_sillyrename(dentry->d_parent);
1362 if (res != NULL) {
1363 dput(ctx->path.dentry);
1364 ctx->path.dentry = dget(res);
1078 dentry = res; 1365 dentry = res;
1366 }
1367 err = nfs_intent_set_file(nd, ctx);
1368 if (err < 0) {
1369 if (res != NULL)
1370 dput(res);
1371 return ERR_PTR(err);
1372 }
1079out: 1373out:
1374 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1080 return res; 1375 return res;
1081no_open: 1376no_open:
1082 return nfs_lookup(dir, dentry, nd); 1377 return nfs_lookup(dir, dentry, nd);
@@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1087 struct dentry *parent = NULL; 1382 struct dentry *parent = NULL;
1088 struct inode *inode = dentry->d_inode; 1383 struct inode *inode = dentry->d_inode;
1089 struct inode *dir; 1384 struct inode *dir;
1385 struct nfs_open_context *ctx;
1090 int openflags, ret = 0; 1386 int openflags, ret = 0;
1091 1387
1092 if (!is_atomic_open(nd) || d_mountpoint(dentry)) 1388 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1093 goto no_open; 1389 goto no_open;
1390
1094 parent = dget_parent(dentry); 1391 parent = dget_parent(dentry);
1095 dir = parent->d_inode; 1392 dir = parent->d_inode;
1393
1096 /* We can't create new files in nfs_open_revalidate(), so we 1394 /* We can't create new files in nfs_open_revalidate(), so we
1097 * optimize away revalidation of negative dentries. 1395 * optimize away revalidation of negative dentries.
1098 */ 1396 */
@@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1112 /* We can't create new files, or truncate existing ones here */ 1410 /* We can't create new files, or truncate existing ones here */
1113 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1411 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
1114 1412
1413 ctx = nameidata_to_nfs_open_context(dentry, nd);
1414 ret = PTR_ERR(ctx);
1415 if (IS_ERR(ctx))
1416 goto out;
1115 /* 1417 /*
1116 * Note: we're not holding inode->i_mutex and so may be racing with 1418 * Note: we're not holding inode->i_mutex and so may be racing with
1117 * operations that change the directory. We therefore save the 1419 * operations that change the directory. We therefore save the
1118 * change attribute *before* we do the RPC call. 1420 * change attribute *before* we do the RPC call.
1119 */ 1421 */
1120 ret = nfs4_open_revalidate(dir, dentry, openflags, nd); 1422 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
1423 if (IS_ERR(inode)) {
1424 ret = PTR_ERR(inode);
1425 switch (ret) {
1426 case -EPERM:
1427 case -EACCES:
1428 case -EDQUOT:
1429 case -ENOSPC:
1430 case -EROFS:
1431 goto out_put_ctx;
1432 default:
1433 goto out_drop;
1434 }
1435 }
1436 iput(inode);
1437 if (inode != dentry->d_inode)
1438 goto out_drop;
1439
1440 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1441 ret = nfs_intent_set_file(nd, ctx);
1442 if (ret >= 0)
1443 ret = 1;
1121out: 1444out:
1122 dput(parent); 1445 dput(parent);
1123 if (!ret)
1124 d_drop(dentry);
1125 return ret; 1446 return ret;
1447out_drop:
1448 d_drop(dentry);
1449 ret = 0;
1450out_put_ctx:
1451 put_nfs_open_context(ctx);
1452 goto out;
1453
1126no_open_dput: 1454no_open_dput:
1127 dput(parent); 1455 dput(parent);
1128no_open: 1456no_open:
1129 return nfs_lookup_revalidate(dentry, nd); 1457 return nfs_lookup_revalidate(dentry, nd);
1130} 1458}
1131#endif /* CONFIG_NFSV4 */
1132 1459
1133static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) 1460static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1461 struct nameidata *nd)
1134{ 1462{
1135 struct dentry *parent = desc->file->f_path.dentry; 1463 struct nfs_open_context *ctx = NULL;
1136 struct inode *dir = parent->d_inode; 1464 struct iattr attr;
1137 struct nfs_entry *entry = desc->entry; 1465 int error;
1138 struct dentry *dentry, *alias; 1466 int open_flags = 0;
1139 struct qstr name = {
1140 .name = entry->name,
1141 .len = entry->len,
1142 };
1143 struct inode *inode;
1144 unsigned long verf = nfs_save_change_attribute(dir);
1145 1467
1146 switch (name.len) { 1468 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1147 case 2: 1469 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1148 if (name.name[0] == '.' && name.name[1] == '.')
1149 return dget_parent(parent);
1150 break;
1151 case 1:
1152 if (name.name[0] == '.')
1153 return dget(parent);
1154 }
1155 1470
1156 spin_lock(&dir->i_lock); 1471 attr.ia_mode = mode;
1157 if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { 1472 attr.ia_valid = ATTR_MODE;
1158 spin_unlock(&dir->i_lock);
1159 return NULL;
1160 }
1161 spin_unlock(&dir->i_lock);
1162 1473
1163 name.hash = full_name_hash(name.name, name.len); 1474 if ((nd->flags & LOOKUP_CREATE) != 0) {
1164 dentry = d_lookup(parent, &name); 1475 open_flags = nd->intent.open.flags;
1165 if (dentry != NULL) {
1166 /* Is this a positive dentry that matches the readdir info? */
1167 if (dentry->d_inode != NULL &&
1168 (NFS_FILEID(dentry->d_inode) == entry->ino ||
1169 d_mountpoint(dentry))) {
1170 if (!desc->plus || entry->fh->size == 0)
1171 return dentry;
1172 if (nfs_compare_fh(NFS_FH(dentry->d_inode),
1173 entry->fh) == 0)
1174 goto out_renew;
1175 }
1176 /* No, so d_drop to allow one to be created */
1177 d_drop(dentry);
1178 dput(dentry);
1179 }
1180 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
1181 return NULL;
1182 if (name.len > NFS_SERVER(dir)->namelen)
1183 return NULL;
1184 /* Note: caller is already holding the dir->i_mutex! */
1185 dentry = d_alloc(parent, &name);
1186 if (dentry == NULL)
1187 return NULL;
1188 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
1189 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
1190 if (IS_ERR(inode)) {
1191 dput(dentry);
1192 return NULL;
1193 }
1194 1476
1195 alias = d_materialise_unique(dentry, inode); 1477 ctx = nameidata_to_nfs_open_context(dentry, nd);
1196 if (alias != NULL) { 1478 error = PTR_ERR(ctx);
1197 dput(dentry); 1479 if (IS_ERR(ctx))
1198 if (IS_ERR(alias)) 1480 goto out_err_drop;
1199 return NULL;
1200 dentry = alias;
1201 } 1481 }
1202 1482
1203out_renew: 1483 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1204 nfs_set_verifier(dentry, verf); 1484 if (error != 0)
1205 return dentry; 1485 goto out_put_ctx;
1486 if (ctx != NULL) {
1487 error = nfs_intent_set_file(nd, ctx);
1488 if (error < 0)
1489 goto out_err;
1490 }
1491 return 0;
1492out_put_ctx:
1493 if (ctx != NULL)
1494 put_nfs_open_context(ctx);
1495out_err_drop:
1496 d_drop(dentry);
1497out_err:
1498 return error;
1206} 1499}
1207 1500
1501#endif /* CONFIG_NFSV4 */
1502
1208/* 1503/*
1209 * Code common to create, mkdir, and mknod. 1504 * Code common to create, mkdir, and mknod.
1210 */ 1505 */
@@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1258{ 1553{
1259 struct iattr attr; 1554 struct iattr attr;
1260 int error; 1555 int error;
1261 int open_flags = 0;
1262 1556
1263 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1557 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1264 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1558 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1266 attr.ia_mode = mode; 1560 attr.ia_mode = mode;
1267 attr.ia_valid = ATTR_MODE; 1561 attr.ia_valid = ATTR_MODE;
1268 1562
1269 if ((nd->flags & LOOKUP_CREATE) != 0) 1563 error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
1270 open_flags = nd->intent.open.flags;
1271
1272 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
1273 if (error != 0) 1564 if (error != 0)
1274 goto out_err; 1565 goto out_err;
1275 return 0; 1566 return 0;
@@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1351 return error; 1642 return error;
1352} 1643}
1353 1644
1354static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1355{
1356 static unsigned int sillycounter;
1357 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
1358 const int countersize = sizeof(sillycounter)*2;
1359 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
1360 char silly[slen+1];
1361 struct qstr qsilly;
1362 struct dentry *sdentry;
1363 int error = -EIO;
1364
1365 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
1366 dentry->d_parent->d_name.name, dentry->d_name.name,
1367 atomic_read(&dentry->d_count));
1368 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
1369
1370 /*
1371 * We don't allow a dentry to be silly-renamed twice.
1372 */
1373 error = -EBUSY;
1374 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1375 goto out;
1376
1377 sprintf(silly, ".nfs%*.*Lx",
1378 fileidsize, fileidsize,
1379 (unsigned long long)NFS_FILEID(dentry->d_inode));
1380
1381 /* Return delegation in anticipation of the rename */
1382 nfs_inode_return_delegation(dentry->d_inode);
1383
1384 sdentry = NULL;
1385 do {
1386 char *suffix = silly + slen - countersize;
1387
1388 dput(sdentry);
1389 sillycounter++;
1390 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1391
1392 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
1393 dentry->d_name.name, silly);
1394
1395 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
1396 /*
1397 * N.B. Better to return EBUSY here ... it could be
1398 * dangerous to delete the file while it's in use.
1399 */
1400 if (IS_ERR(sdentry))
1401 goto out;
1402 } while(sdentry->d_inode != NULL); /* need negative lookup */
1403
1404 qsilly.name = silly;
1405 qsilly.len = strlen(silly);
1406 if (dentry->d_inode) {
1407 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1408 dir, &qsilly);
1409 nfs_mark_for_revalidate(dentry->d_inode);
1410 } else
1411 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1412 dir, &qsilly);
1413 if (!error) {
1414 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1415 d_move(dentry, sdentry);
1416 error = nfs_async_unlink(dir, dentry);
1417 /* If we return 0 we don't unlink */
1418 }
1419 dput(sdentry);
1420out:
1421 return error;
1422}
1423
1424/* 1645/*
1425 * Remove a file after making sure there are no pending writes, 1646 * Remove a file after making sure there are no pending writes,
1426 * and after checking that the file has only one user. 1647 * and after checking that the file has only one user.
@@ -1580,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1580 d_drop(dentry); 1801 d_drop(dentry);
1581 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1802 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1582 if (error == 0) { 1803 if (error == 0) {
1583 atomic_inc(&inode->i_count); 1804 ihold(inode);
1584 d_add(dentry, inode); 1805 d_add(dentry, inode);
1585 } 1806 }
1586 return error; 1807 return error;
@@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head)
1711int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 1932int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1712{ 1933{
1713 LIST_HEAD(head); 1934 LIST_HEAD(head);
1714 struct nfs_inode *nfsi; 1935 struct nfs_inode *nfsi, *next;
1715 struct nfs_access_entry *cache; 1936 struct nfs_access_entry *cache;
1716 1937
1717 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 1938 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
1718 return (nr_to_scan == 0) ? 0 : -1; 1939 return (nr_to_scan == 0) ? 0 : -1;
1719 1940
1720 spin_lock(&nfs_access_lru_lock); 1941 spin_lock(&nfs_access_lru_lock);
1721 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1942 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
1722 struct inode *inode; 1943 struct inode *inode;
1723 1944
1724 if (nr_to_scan-- == 0) 1945 if (nr_to_scan-- == 0)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a80961677..84d3c8b90206 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -873,7 +873,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
873 dreq->inode = inode; 873 dreq->inode = inode;
874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx); 875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
876 if (dreq->l_ctx != NULL) 876 if (dreq->l_ctx == NULL)
877 goto out_release; 877 goto out_release;
878 if (!is_sync_kiocb(iocb)) 878 if (!is_sync_kiocb(iocb))
879 dreq->iocb = iocb; 879 dreq->iocb = iocb;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
167 return 0; 167 return 0;
168 } 168 }
169 item = container_of(h, struct nfs_dns_ent, h); 169 item = container_of(h, struct nfs_dns_ent, h);
170 ttl = (long)item->h.expiry_time - (long)get_seconds(); 170 ttl = item->h.expiry_time - seconds_since_boot();
171 if (ttl < 0) 171 if (ttl < 0)
172 ttl = 0; 172 ttl = 0;
173 173
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
239 ttl = get_expiry(&buf); 239 ttl = get_expiry(&buf);
240 if (ttl == 0) 240 if (ttl == 0)
241 goto out; 241 goto out;
242 key.h.expiry_time = ttl + get_seconds(); 242 key.h.expiry_time = ttl + seconds_since_boot();
243 243
244 ret = -ENOMEM; 244 ret = -ENOMEM;
245 item = nfs_dns_lookup(cd, &key); 245 item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
301 goto out_err; 301 goto out_err;
302 ret = -ETIMEDOUT; 302 ret = -ETIMEDOUT;
303 if (!test_bit(CACHE_VALID, &(*item)->h.flags) 303 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
304 || (*item)->h.expiry_time < get_seconds() 304 || (*item)->h.expiry_time < seconds_since_boot()
305 || cd->flush_time > (*item)->h.last_refresh) 305 || cd->flush_time > (*item)->h.last_refresh)
306 goto out_put; 306 goto out_put;
307 ret = -ENOENT; 307 ret = -ENOENT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 05bf3c0dc751..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h" 38#include "fscache.h"
39#include "pnfs.h"
39 40
40#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
41 42
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
386 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
387 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
388 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
389start: 394start:
390 /* 395 /*
391 * Prevent starvation issues if someone is doing a consistency 396 * Prevent starvation issues if someone is doing a consistency
@@ -551,7 +556,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
551 struct file *filp = vma->vm_file; 556 struct file *filp = vma->vm_file;
552 struct dentry *dentry = filp->f_path.dentry; 557 struct dentry *dentry = filp->f_path.dentry;
553 unsigned pagelen; 558 unsigned pagelen;
554 int ret = -EINVAL; 559 int ret = VM_FAULT_NOPAGE;
555 struct address_space *mapping; 560 struct address_space *mapping;
556 561
557 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", 562 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
@@ -567,21 +572,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
567 if (mapping != dentry->d_inode->i_mapping) 572 if (mapping != dentry->d_inode->i_mapping)
568 goto out_unlock; 573 goto out_unlock;
569 574
570 ret = 0;
571 pagelen = nfs_page_length(page); 575 pagelen = nfs_page_length(page);
572 if (pagelen == 0) 576 if (pagelen == 0)
573 goto out_unlock; 577 goto out_unlock;
574 578
575 ret = nfs_flush_incompatible(filp, page); 579 ret = VM_FAULT_LOCKED;
576 if (ret != 0) 580 if (nfs_flush_incompatible(filp, page) == 0 &&
577 goto out_unlock; 581 nfs_updatepage(filp, page, 0, pagelen) == 0)
582 goto out;
578 583
579 ret = nfs_updatepage(filp, page, 0, pagelen); 584 ret = VM_FAULT_SIGBUS;
580out_unlock: 585out_unlock:
581 if (!ret)
582 return VM_FAULT_LOCKED;
583 unlock_page(page); 586 unlock_page(page);
584 return VM_FAULT_SIGBUS; 587out:
588 return ret;
585} 589}
586 590
587static const struct vm_operations_struct nfs_file_vm_ops = { 591static const struct vm_operations_struct nfs_file_vm_ops = {
@@ -684,7 +688,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
684 return ret; 688 return ret;
685} 689}
686 690
687static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 691static int
692do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
688{ 693{
689 struct inode *inode = filp->f_mapping->host; 694 struct inode *inode = filp->f_mapping->host;
690 int status = 0; 695 int status = 0;
@@ -699,7 +704,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
699 if (nfs_have_delegation(inode, FMODE_READ)) 704 if (nfs_have_delegation(inode, FMODE_READ))
700 goto out_noconflict; 705 goto out_noconflict;
701 706
702 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) 707 if (is_local)
703 goto out_noconflict; 708 goto out_noconflict;
704 709
705 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 710 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@ -726,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
726 return res; 731 return res;
727} 732}
728 733
729static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 734static int
735do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
730{ 736{
731 struct inode *inode = filp->f_mapping->host; 737 struct inode *inode = filp->f_mapping->host;
732 int status; 738 int status;
@@ -741,15 +747,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
741 * If we're signalled while cleaning up locks on process exit, we 747 * If we're signalled while cleaning up locks on process exit, we
742 * still need to complete the unlock. 748 * still need to complete the unlock.
743 */ 749 */
744 /* Use local locking if mounted with "-onolock" */ 750 /*
745 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 751 * Use local locking if mounted with "-onolock" or with appropriate
752 * "-olocal_lock="
753 */
754 if (!is_local)
746 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 755 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
747 else 756 else
748 status = do_vfs_lock(filp, fl); 757 status = do_vfs_lock(filp, fl);
749 return status; 758 return status;
750} 759}
751 760
752static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 761static int
762is_time_granular(struct timespec *ts) {
763 return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
764}
765
766static int
767do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
753{ 768{
754 struct inode *inode = filp->f_mapping->host; 769 struct inode *inode = filp->f_mapping->host;
755 int status; 770 int status;
@@ -762,20 +777,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
762 if (status != 0) 777 if (status != 0)
763 goto out; 778 goto out;
764 779
765 /* Use local locking if mounted with "-onolock" */ 780 /*
766 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 781 * Use local locking if mounted with "-onolock" or with appropriate
782 * "-olocal_lock="
783 */
784 if (!is_local)
767 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 785 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
768 else 786 else
769 status = do_vfs_lock(filp, fl); 787 status = do_vfs_lock(filp, fl);
770 if (status < 0) 788 if (status < 0)
771 goto out; 789 goto out;
790
772 /* 791 /*
773 * Make sure we clear the cache whenever we try to get the lock. 792 * Revalidate the cache if the server has time stamps granular
793 * enough to detect subsecond changes. Otherwise, clear the
794 * cache to prevent missing any changes.
795 *
774 * This makes locking act as a cache coherency point. 796 * This makes locking act as a cache coherency point.
775 */ 797 */
776 nfs_sync_mapping(filp->f_mapping); 798 nfs_sync_mapping(filp->f_mapping);
777 if (!nfs_have_delegation(inode, FMODE_READ)) 799 if (!nfs_have_delegation(inode, FMODE_READ)) {
778 nfs_zap_caches(inode); 800 if (is_time_granular(&NFS_SERVER(inode)->time_delta))
801 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
802 else
803 nfs_zap_caches(inode);
804 }
779out: 805out:
780 return status; 806 return status;
781} 807}
@@ -787,6 +813,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
787{ 813{
788 struct inode *inode = filp->f_mapping->host; 814 struct inode *inode = filp->f_mapping->host;
789 int ret = -ENOLCK; 815 int ret = -ENOLCK;
816 int is_local = 0;
790 817
791 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", 818 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
792 filp->f_path.dentry->d_parent->d_name.name, 819 filp->f_path.dentry->d_parent->d_name.name,
@@ -800,6 +827,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
800 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 827 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
801 goto out_err; 828 goto out_err;
802 829
830 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
831 is_local = 1;
832
803 if (NFS_PROTO(inode)->lock_check_bounds != NULL) { 833 if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
804 ret = NFS_PROTO(inode)->lock_check_bounds(fl); 834 ret = NFS_PROTO(inode)->lock_check_bounds(fl);
805 if (ret < 0) 835 if (ret < 0)
@@ -807,11 +837,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
807 } 837 }
808 838
809 if (IS_GETLK(cmd)) 839 if (IS_GETLK(cmd))
810 ret = do_getlk(filp, cmd, fl); 840 ret = do_getlk(filp, cmd, fl, is_local);
811 else if (fl->fl_type == F_UNLCK) 841 else if (fl->fl_type == F_UNLCK)
812 ret = do_unlk(filp, cmd, fl); 842 ret = do_unlk(filp, cmd, fl, is_local);
813 else 843 else
814 ret = do_setlk(filp, cmd, fl); 844 ret = do_setlk(filp, cmd, fl, is_local);
815out_err: 845out_err:
816 return ret; 846 return ret;
817} 847}
@@ -821,6 +851,9 @@ out_err:
821 */ 851 */
822static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 852static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
823{ 853{
854 struct inode *inode = filp->f_mapping->host;
855 int is_local = 0;
856
824 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", 857 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
825 filp->f_path.dentry->d_parent->d_name.name, 858 filp->f_path.dentry->d_parent->d_name.name,
826 filp->f_path.dentry->d_name.name, 859 filp->f_path.dentry->d_name.name,
@@ -829,14 +862,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
829 if (!(fl->fl_flags & FL_FLOCK)) 862 if (!(fl->fl_flags & FL_FLOCK))
830 return -ENOLCK; 863 return -ENOLCK;
831 864
865 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
866 is_local = 1;
867
832 /* We're simulating flock() locks using posix locks on the server */ 868 /* We're simulating flock() locks using posix locks on the server */
833 fl->fl_owner = (fl_owner_t)filp; 869 fl->fl_owner = (fl_owner_t)filp;
834 fl->fl_start = 0; 870 fl->fl_start = 0;
835 fl->fl_end = OFFSET_MAX; 871 fl->fl_end = OFFSET_MAX;
836 872
837 if (fl->fl_type == F_UNLCK) 873 if (fl->fl_type == F_UNLCK)
838 return do_unlk(filp, cmd, fl); 874 return do_unlk(filp, cmd, fl, is_local);
839 return do_setlk(filp, cmd, fl); 875 return do_setlk(filp, cmd, fl, is_local);
840} 876}
841 877
842/* 878/*
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e1605..ac7b814ce162 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
54 iput(inode); 54 iput(inode);
55 return -ENOMEM; 55 return -ENOMEM;
56 } 56 }
57 /* Circumvent igrab(): we know the inode is not being freed */ 57 ihold(inode);
58 atomic_inc(&inode->i_count);
59 /* 58 /*
60 * Ensure that this dentry is invisible to d_find_alias(). 59 * Ensure that this dentry is invisible to d_find_alias().
61 * Otherwise, it may be spliced into the tree by 60 * Otherwise, it may be spliced into the tree by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 21a84d45916f..4e2d9b6b1380 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,6 +34,212 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38
39#include <linux/slab.h>
40#include <linux/cred.h>
41#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h>
43#include <linux/key-type.h>
44#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h>
47
48#include <keys/user-type.h>
49
50#define NFS_UINT_MAXLEN 11
51
52const struct cred *id_resolver_cache;
53
54struct key_type key_type_id_resolver = {
55 .name = "id_resolver",
56 .instantiate = user_instantiate,
57 .match = user_match,
58 .revoke = user_revoke,
59 .destroy = user_destroy,
60 .describe = user_describe,
61 .read = user_read,
62};
63
64int nfs_idmap_init(void)
65{
66 struct cred *cred;
67 struct key *keyring;
68 int ret = 0;
69
70 printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
71
72 cred = prepare_kernel_cred(NULL);
73 if (!cred)
74 return -ENOMEM;
75
76 keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
77 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
78 KEY_USR_VIEW | KEY_USR_READ,
79 KEY_ALLOC_NOT_IN_QUOTA);
80 if (IS_ERR(keyring)) {
81 ret = PTR_ERR(keyring);
82 goto failed_put_cred;
83 }
84
85 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
86 if (ret < 0)
87 goto failed_put_key;
88
89 ret = register_key_type(&key_type_id_resolver);
90 if (ret < 0)
91 goto failed_put_key;
92
93 cred->thread_keyring = keyring;
94 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
95 id_resolver_cache = cred;
96 return 0;
97
98failed_put_key:
99 key_put(keyring);
100failed_put_cred:
101 put_cred(cred);
102 return ret;
103}
104
105void nfs_idmap_quit(void)
106{
107 key_revoke(id_resolver_cache->thread_keyring);
108 unregister_key_type(&key_type_id_resolver);
109 put_cred(id_resolver_cache);
110}
111
112/*
113 * Assemble the description to pass to request_key()
114 * This function will allocate a new string and update dest to point
115 * at it. The caller is responsible for freeing dest.
116 *
117 * On error 0 is returned. Otherwise, the length of dest is returned.
118 */
119static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
120 const char *type, size_t typelen, char **desc)
121{
122 char *cp;
123 size_t desclen = typelen + namelen + 2;
124
125 *desc = kmalloc(desclen, GFP_KERNEL);
126 if (!*desc)
127 return -ENOMEM;
128
129 cp = *desc;
130 memcpy(cp, type, typelen);
131 cp += typelen;
132 *cp++ = ':';
133
134 memcpy(cp, name, namelen);
135 cp += namelen;
136 *cp = '\0';
137 return desclen;
138}
139
140static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
141 const char *type, void *data, size_t data_size)
142{
143 const struct cred *saved_cred;
144 struct key *rkey;
145 char *desc;
146 struct user_key_payload *payload;
147 ssize_t ret;
148
149 ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
150 if (ret <= 0)
151 goto out;
152
153 saved_cred = override_creds(id_resolver_cache);
154 rkey = request_key(&key_type_id_resolver, desc, "");
155 revert_creds(saved_cred);
156 kfree(desc);
157 if (IS_ERR(rkey)) {
158 ret = PTR_ERR(rkey);
159 goto out;
160 }
161
162 rcu_read_lock();
163 rkey->perm |= KEY_USR_VIEW;
164
165 ret = key_validate(rkey);
166 if (ret < 0)
167 goto out_up;
168
169 payload = rcu_dereference(rkey->payload.data);
170 if (IS_ERR_OR_NULL(payload)) {
171 ret = PTR_ERR(payload);
172 goto out_up;
173 }
174
175 ret = payload->datalen;
176 if (ret > 0 && ret <= data_size)
177 memcpy(data, payload->data, ret);
178 else
179 ret = -EINVAL;
180
181out_up:
182 rcu_read_unlock();
183 key_put(rkey);
184out:
185 return ret;
186}
187
188
189/* ID -> Name */
190static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
191{
192 char id_str[NFS_UINT_MAXLEN];
193 int id_len;
194 ssize_t ret;
195
196 id_len = snprintf(id_str, sizeof(id_str), "%u", id);
197 ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
198 if (ret < 0)
199 return -EINVAL;
200 return ret;
201}
202
203/* Name -> ID */
204static int nfs_idmap_lookup_id(const char *name, size_t namelen,
205 const char *type, __u32 *id)
206{
207 char id_str[NFS_UINT_MAXLEN];
208 long id_long;
209 ssize_t data_size;
210 int ret = 0;
211
212 data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
213 if (data_size <= 0) {
214 ret = -EINVAL;
215 } else {
216 ret = strict_strtol(id_str, 10, &id_long);
217 *id = (__u32)id_long;
218 }
219 return ret;
220}
221
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
223{
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225}
226
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
228{
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230}
231
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
233{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen);
235}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
237{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen);
239}
240
241#else /* CONFIG_NFS_USE_IDMAPPER not defined */
242
37#include <linux/module.h> 243#include <linux/module.h>
38#include <linux/mutex.h> 244#include <linux/mutex.h>
39#include <linux/init.h> 245#include <linux/init.h>
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele
503 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
504} 710}
505 711
506int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) 712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
507{ 713{
508 struct idmap *idmap = clp->cl_idmap; 714 struct idmap *idmap = clp->cl_idmap;
509 715
510 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
511} 717}
512int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) 718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
513{ 719{
514 struct idmap *idmap = clp->cl_idmap; 720 struct idmap *idmap = clp->cl_idmap;
515 721
516 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
517} 723}
518 724
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d2d6c72aa78..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h" 49#include "fscache.h"
50#include "dns_resolve.h" 50#include "dns_resolve.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_VFS 53#define NFSDBG_FACILITY NFSDBG_VFS
53 54
@@ -234,9 +235,6 @@ nfs_init_locked(struct inode *inode, void *opaque)
234 return 0; 235 return 0;
235} 236}
236 237
237/* Don't use READDIRPLUS on directories that we believe are too large */
238#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
239
240/* 238/*
241 * This is our front-end to iget that looks up inodes by file handle 239 * This is our front-end to iget that looks up inodes by file handle
242 * instead of inode number. 240 * instead of inode number.
@@ -291,8 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 } else if (S_ISDIR(inode->i_mode)) { 289 } else if (S_ISDIR(inode->i_mode)) {
292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
293 inode->i_fop = &nfs_dir_operations; 291 inode->i_fop = &nfs_dir_operations;
294 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 292 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
295 && fattr->size <= NFS_LIMIT_READDIRPLUS)
296 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 293 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
297 /* Deal with crossing mountpoints */ 294 /* Deal with crossing mountpoints */
298 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 295 if ((fattr->valid & NFS_ATTR_FATTR_FSID)
@@ -623,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
623 nfs_revalidate_inode(server, inode); 620 nfs_revalidate_inode(server, inode);
624} 621}
625 622
626static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) 623struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
627{ 624{
628 struct nfs_open_context *ctx; 625 struct nfs_open_context *ctx;
629 626
@@ -633,11 +630,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
633 path_get(&ctx->path); 630 path_get(&ctx->path);
634 ctx->cred = get_rpccred(cred); 631 ctx->cred = get_rpccred(cred);
635 ctx->state = NULL; 632 ctx->state = NULL;
633 ctx->mode = f_mode;
636 ctx->flags = 0; 634 ctx->flags = 0;
637 ctx->error = 0; 635 ctx->error = 0;
638 ctx->dir_cookie = 0; 636 ctx->dir_cookie = 0;
639 nfs_init_lock_context(&ctx->lock_context); 637 nfs_init_lock_context(&ctx->lock_context);
640 ctx->lock_context.open_context = ctx; 638 ctx->lock_context.open_context = ctx;
639 INIT_LIST_HEAD(&ctx->list);
641 } 640 }
642 return ctx; 641 return ctx;
643} 642}
@@ -653,11 +652,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
653{ 652{
654 struct inode *inode = ctx->path.dentry->d_inode; 653 struct inode *inode = ctx->path.dentry->d_inode;
655 654
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) 655 if (!list_empty(&ctx->list)) {
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
657 return;
658 list_del(&ctx->list);
659 spin_unlock(&inode->i_lock);
660 } else if (!atomic_dec_and_test(&ctx->lock_context.count))
657 return; 661 return;
658 list_del(&ctx->list); 662 if (inode != NULL)
659 spin_unlock(&inode->i_lock); 663 NFS_PROTO(inode)->close_context(ctx, is_sync);
660 NFS_PROTO(inode)->close_context(ctx, is_sync);
661 if (ctx->cred != NULL) 664 if (ctx->cred != NULL)
662 put_rpccred(ctx->cred); 665 put_rpccred(ctx->cred);
663 path_put(&ctx->path); 666 path_put(&ctx->path);
@@ -673,7 +676,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
673 * Ensure that mmap has a recent RPC credential for use when writing out 676 * Ensure that mmap has a recent RPC credential for use when writing out
674 * shared pages 677 * shared pages
675 */ 678 */
676static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) 679void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
677{ 680{
678 struct inode *inode = filp->f_path.dentry->d_inode; 681 struct inode *inode = filp->f_path.dentry->d_inode;
679 struct nfs_inode *nfsi = NFS_I(inode); 682 struct nfs_inode *nfsi = NFS_I(inode);
@@ -730,11 +733,10 @@ int nfs_open(struct inode *inode, struct file *filp)
730 cred = rpc_lookup_cred(); 733 cred = rpc_lookup_cred();
731 if (IS_ERR(cred)) 734 if (IS_ERR(cred))
732 return PTR_ERR(cred); 735 return PTR_ERR(cred);
733 ctx = alloc_nfs_open_context(&filp->f_path, cred); 736 ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
734 put_rpccred(cred); 737 put_rpccred(cred);
735 if (ctx == NULL) 738 if (ctx == NULL)
736 return -ENOMEM; 739 return -ENOMEM;
737 ctx->mode = filp->f_mode;
738 nfs_file_set_open_context(filp, ctx); 740 nfs_file_set_open_context(filp, ctx);
739 put_nfs_open_context(ctx); 741 put_nfs_open_context(ctx);
740 nfs_fscache_set_inode_cookie(inode, filp); 742 nfs_fscache_set_inode_cookie(inode, filp);
@@ -1409,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
1409{ 1411{
1410 truncate_inode_pages(&inode->i_data, 0); 1412 truncate_inode_pages(&inode->i_data, 0);
1411 end_writeback(inode); 1413 end_writeback(inode);
1414 pnfs_destroy_layout(NFS_I(inode));
1412 /* If we are holding a delegation, return it! */ 1415 /* If we are holding a delegation, return it! */
1413 nfs_inode_return_delegation_noreclaim(inode); 1416 nfs_inode_return_delegation_noreclaim(inode);
1414 /* First call standard NFS clear_inode() code */ 1417 /* First call standard NFS clear_inode() code */
@@ -1446,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1446 nfsi->delegation = NULL; 1449 nfsi->delegation = NULL;
1447 nfsi->delegation_state = 0; 1450 nfsi->delegation_state = 0;
1448 init_rwsem(&nfsi->rwsem); 1451 init_rwsem(&nfsi->rwsem);
1452 nfsi->layout = NULL;
1449#endif 1453#endif
1450} 1454}
1451 1455
@@ -1493,7 +1497,7 @@ static int nfsiod_start(void)
1493{ 1497{
1494 struct workqueue_struct *wq; 1498 struct workqueue_struct *wq;
1495 dprintk("RPC: creating workqueue nfsiod\n"); 1499 dprintk("RPC: creating workqueue nfsiod\n");
1496 wq = create_singlethread_workqueue("nfsiod"); 1500 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
1497 if (wq == NULL) 1501 if (wq == NULL)
1498 return -ENOMEM; 1502 return -ENOMEM;
1499 nfsiod_workqueue = wq; 1503 nfsiod_workqueue = wq;
@@ -1521,6 +1525,10 @@ static int __init init_nfs_fs(void)
1521{ 1525{
1522 int err; 1526 int err;
1523 1527
1528 err = nfs_idmap_init();
1529 if (err < 0)
1530 goto out9;
1531
1524 err = nfs_dns_resolver_init(); 1532 err = nfs_dns_resolver_init();
1525 if (err < 0) 1533 if (err < 0)
1526 goto out8; 1534 goto out8;
@@ -1585,6 +1593,8 @@ out6:
1585out7: 1593out7:
1586 nfs_dns_resolver_destroy(); 1594 nfs_dns_resolver_destroy();
1587out8: 1595out8:
1596 nfs_idmap_quit();
1597out9:
1588 return err; 1598 return err;
1589} 1599}
1590 1600
@@ -1597,6 +1607,7 @@ static void __exit exit_nfs_fs(void)
1597 nfs_destroy_nfspagecache(); 1607 nfs_destroy_nfspagecache();
1598 nfs_fscache_unregister(); 1608 nfs_fscache_unregister();
1599 nfs_dns_resolver_destroy(); 1609 nfs_dns_resolver_destroy();
1610 nfs_idmap_quit();
1600#ifdef CONFIG_PROC_FS 1611#ifdef CONFIG_PROC_FS
1601 rpc_proc_unregister("nfs"); 1612 rpc_proc_unregister("nfs");
1602#endif 1613#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c961bc92c107..db08ff3ff454 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,12 @@ struct nfs_clone_mount {
63#define NFS_UNSPEC_PORT (-1) 63#define NFS_UNSPEC_PORT (-1)
64 64
65/* 65/*
66 * Maximum number of pages that readdir can use for creating
67 * a vmapped array of pages.
68 */
69#define NFS_MAX_READDIR_PAGES 8
70
71/*
66 * In-kernel mount arguments 72 * In-kernel mount arguments
67 */ 73 */
68struct nfs_parsed_mount_data { 74struct nfs_parsed_mount_data {
@@ -181,15 +187,15 @@ extern void nfs_destroy_directcache(void);
181/* nfs2xdr.c */ 187/* nfs2xdr.c */
182extern int nfs_stat_to_errno(int); 188extern int nfs_stat_to_errno(int);
183extern struct rpc_procinfo nfs_procedures[]; 189extern struct rpc_procinfo nfs_procedures[];
184extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); 190extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
185 191
186/* nfs3xdr.c */ 192/* nfs3xdr.c */
187extern struct rpc_procinfo nfs3_procedures[]; 193extern struct rpc_procinfo nfs3_procedures[];
188extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); 194extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
189 195
190/* nfs4xdr.c */ 196/* nfs4xdr.c */
191#ifdef CONFIG_NFS_V4 197#ifdef CONFIG_NFS_V4
192extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 198extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
193#endif 199#endif
194#ifdef CONFIG_NFS_V4_1 200#ifdef CONFIG_NFS_V4_1
195extern const u32 nfs41_maxread_overhead; 201extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 59047f8d7d72..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net,
156 .protocol = info->protocol, 157 .protocol = info->protocol,
157 .address = info->sap, 158 .address = info->sap,
158 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
224 .to_retries = 2, 225 .to_retries = 2,
225 }; 226 };
226 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
227 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
228 .address = info->sap, 230 .address = info->sap,
229 .addrsize = info->salen, 231 .addrsize = info->salen,
@@ -436,7 +438,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
436 438
437 for (i = 0; i < entries; i++) { 439 for (i = 0; i < entries; i++) {
438 flavors[i] = ntohl(*p++); 440 flavors[i] = ntohl(*p++);
439 dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); 441 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
440 } 442 }
441 *count = i; 443 *count = i;
442 444
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index db8846a0e82e..e6bf45710cc7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
337static int 337static int
338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
339{ 339{
340 p = xdr_encode_fhandle(p, args->fromfh); 340 p = xdr_encode_fhandle(p, args->old_dir);
341 p = xdr_encode_array(p, args->fromname, args->fromlen); 341 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
342 p = xdr_encode_fhandle(p, args->tofh); 342 p = xdr_encode_fhandle(p, args->new_dir);
343 p = xdr_encode_array(p, args->toname, args->tolen); 343 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
345 return 0; 345 return 0;
346} 346}
@@ -423,9 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
423 struct page **page; 423 struct page **page;
424 size_t hdrlen; 424 size_t hdrlen;
425 unsigned int pglen, recvd; 425 unsigned int pglen, recvd;
426 u32 len;
427 int status, nr = 0; 426 int status, nr = 0;
428 __be32 *end, *entry, *kaddr;
429 427
430 if ((status = ntohl(*p++))) 428 if ((status = ntohl(*p++)))
431 return nfs_stat_to_errno(status); 429 return nfs_stat_to_errno(status);
@@ -445,80 +443,59 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
445 if (pglen > recvd) 443 if (pglen > recvd)
446 pglen = recvd; 444 pglen = recvd;
447 page = rcvbuf->pages; 445 page = rcvbuf->pages;
448 kaddr = p = kmap_atomic(*page, KM_USER0);
449 end = (__be32 *)((char *)p + pglen);
450 entry = p;
451
452 /* Make sure the packet actually has a value_follows and EOF entry */
453 if ((entry + 1) > end)
454 goto short_pkt;
455
456 for (; *p++; nr++) {
457 if (p + 2 > end)
458 goto short_pkt;
459 p++; /* fileid */
460 len = ntohl(*p++);
461 p += XDR_QUADLEN(len) + 1; /* name plus cookie */
462 if (len > NFS2_MAXNAMLEN) {
463 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
464 len);
465 goto err_unmap;
466 }
467 if (p + 2 > end)
468 goto short_pkt;
469 entry = p;
470 }
471
472 /*
473 * Apparently some server sends responses that are a valid size, but
474 * contain no entries, and have value_follows==0 and EOF==0. For
475 * those, just set the EOF marker.
476 */
477 if (!nr && entry[1] == 0) {
478 dprintk("NFS: readdir reply truncated!\n");
479 entry[1] = 1;
480 }
481 out:
482 kunmap_atomic(kaddr, KM_USER0);
483 return nr; 446 return nr;
484 short_pkt: 447}
485 /* 448
486 * When we get a short packet there are 2 possibilities. We can 449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
487 * return an error, or fix up the response to look like a valid 450{
488 * response and return what we have so far. If there are no 451 dprintk("nfs: %s: prematurely hit end of receive buffer. "
489 * entries and the packet was short, then return -EIO. If there 452 "Remaining buffer length is %tu words.\n",
490 * are valid entries in the response, return them and pretend that 453 func, xdr->end - xdr->p);
491 * the call was successful, but incomplete. The caller can retry the
492 * readdir starting at the last cookie.
493 */
494 entry[0] = entry[1] = 0;
495 if (!nr)
496 nr = -errno_NFSERR_IO;
497 goto out;
498err_unmap:
499 nr = -errno_NFSERR_IO;
500 goto out;
501} 454}
502 455
503__be32 * 456__be32 *
504nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 457nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
505{ 458{
506 if (!*p++) { 459 __be32 *p;
507 if (!*p) 460 p = xdr_inline_decode(xdr, 4);
461 if (unlikely(!p))
462 goto out_overflow;
463 if (!ntohl(*p++)) {
464 p = xdr_inline_decode(xdr, 4);
465 if (unlikely(!p))
466 goto out_overflow;
467 if (!ntohl(*p++))
508 return ERR_PTR(-EAGAIN); 468 return ERR_PTR(-EAGAIN);
509 entry->eof = 1; 469 entry->eof = 1;
510 return ERR_PTR(-EBADCOOKIE); 470 return ERR_PTR(-EBADCOOKIE);
511 } 471 }
512 472
473 p = xdr_inline_decode(xdr, 8);
474 if (unlikely(!p))
475 goto out_overflow;
476
513 entry->ino = ntohl(*p++); 477 entry->ino = ntohl(*p++);
514 entry->len = ntohl(*p++); 478 entry->len = ntohl(*p++);
479
480 p = xdr_inline_decode(xdr, entry->len + 4);
481 if (unlikely(!p))
482 goto out_overflow;
515 entry->name = (const char *) p; 483 entry->name = (const char *) p;
516 p += XDR_QUADLEN(entry->len); 484 p += XDR_QUADLEN(entry->len);
517 entry->prev_cookie = entry->cookie; 485 entry->prev_cookie = entry->cookie;
518 entry->cookie = ntohl(*p++); 486 entry->cookie = ntohl(*p++);
519 entry->eof = !p[0] && p[1]; 487
488 p = xdr_inline_peek(xdr, 8);
489 if (p != NULL)
490 entry->eof = !p[0] && p[1];
491 else
492 entry->eof = 0;
520 493
521 return p; 494 return p;
495
496out_overflow:
497 print_overflow_msg(__func__, xdr);
498 return ERR_PTR(-EIO);
522} 499}
523 500
524/* 501/*
@@ -596,7 +573,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
596 struct kvec *iov = rcvbuf->head; 573 struct kvec *iov = rcvbuf->head;
597 size_t hdrlen; 574 size_t hdrlen;
598 u32 len, recvd; 575 u32 len, recvd;
599 char *kaddr;
600 int status; 576 int status;
601 577
602 if ((status = ntohl(*p++))) 578 if ((status = ntohl(*p++)))
@@ -623,10 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
623 return -EIO; 599 return -EIO;
624 } 600 }
625 601
626 /* NULL terminate the string we got */ 602 xdr_terminate_string(rcvbuf, len);
627 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
628 kaddr[len+rcvbuf->page_base] = '\0';
629 kunmap_atomic(kaddr, KM_USER0);
630 return 0; 603 return 0;
631} 604}
632 605
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index fabb4f2849a1..ce939c062a52 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
313 */ 313 */
314static int 314static int
315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
316 int flags, struct nameidata *nd) 316 int flags, struct nfs_open_context *ctx)
317{ 317{
318 struct nfs3_createdata *data; 318 struct nfs3_createdata *data;
319 mode_t mode = sattr->ia_mode; 319 mode_t mode = sattr->ia_mode;
@@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
438 return 1; 438 return 1;
439} 439}
440 440
441static void
442nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
443{
444 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
445}
446
447static int
448nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
449 struct inode *new_dir)
450{
451 struct nfs_renameres *res;
452
453 if (nfs3_async_handle_jukebox(task, old_dir))
454 return 0;
455 res = task->tk_msg.rpc_resp;
456
457 nfs_post_op_update_inode(old_dir, res->old_fattr);
458 nfs_post_op_update_inode(new_dir, res->new_fattr);
459 return 1;
460}
461
441static int 462static int
442nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, 463nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
443 struct inode *new_dir, struct qstr *new_name) 464 struct inode *new_dir, struct qstr *new_name)
444{ 465{
445 struct nfs3_renameargs arg = { 466 struct nfs_renameargs arg = {
446 .fromfh = NFS_FH(old_dir), 467 .old_dir = NFS_FH(old_dir),
447 .fromname = old_name->name, 468 .old_name = old_name,
448 .fromlen = old_name->len, 469 .new_dir = NFS_FH(new_dir),
449 .tofh = NFS_FH(new_dir), 470 .new_name = new_name,
450 .toname = new_name->name,
451 .tolen = new_name->len
452 }; 471 };
453 struct nfs3_renameres res; 472 struct nfs_renameres res;
454 struct rpc_message msg = { 473 struct rpc_message msg = {
455 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], 474 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
456 .rpc_argp = &arg, 475 .rpc_argp = &arg,
@@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
460 479
461 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 480 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
462 481
463 res.fromattr = nfs_alloc_fattr(); 482 res.old_fattr = nfs_alloc_fattr();
464 res.toattr = nfs_alloc_fattr(); 483 res.new_fattr = nfs_alloc_fattr();
465 if (res.fromattr == NULL || res.toattr == NULL) 484 if (res.old_fattr == NULL || res.new_fattr == NULL)
466 goto out; 485 goto out;
467 486
468 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 487 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
469 nfs_post_op_update_inode(old_dir, res.fromattr); 488 nfs_post_op_update_inode(old_dir, res.old_fattr);
470 nfs_post_op_update_inode(new_dir, res.toattr); 489 nfs_post_op_update_inode(new_dir, res.new_fattr);
471out: 490out:
472 nfs_free_fattr(res.toattr); 491 nfs_free_fattr(res.old_fattr);
473 nfs_free_fattr(res.fromattr); 492 nfs_free_fattr(res.new_fattr);
474 dprintk("NFS reply rename: %d\n", status); 493 dprintk("NFS reply rename: %d\n", status);
475 return status; 494 return status;
476} 495}
@@ -611,7 +630,7 @@ out:
611 */ 630 */
612static int 631static int
613nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 632nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
614 u64 cookie, struct page *page, unsigned int count, int plus) 633 u64 cookie, struct page **pages, unsigned int count, int plus)
615{ 634{
616 struct inode *dir = dentry->d_inode; 635 struct inode *dir = dentry->d_inode;
617 __be32 *verf = NFS_COOKIEVERF(dir); 636 __be32 *verf = NFS_COOKIEVERF(dir);
@@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
621 .verf = {verf[0], verf[1]}, 640 .verf = {verf[0], verf[1]},
622 .plus = plus, 641 .plus = plus,
623 .count = count, 642 .count = count,
624 .pages = &page 643 .pages = pages
625 }; 644 };
626 struct nfs3_readdirres res = { 645 struct nfs3_readdirres res = {
627 .verf = verf, 646 .verf = verf,
@@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
652 671
653 nfs_free_fattr(res.dir_attr); 672 nfs_free_fattr(res.dir_attr);
654out: 673out:
655 dprintk("NFS reply readdir: %d\n", status); 674 dprintk("NFS reply readdir%s: %d\n",
675 plus? "plus" : "", status);
656 return status; 676 return status;
657} 677}
658 678
@@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
722 dprintk("NFS call fsstat\n"); 742 dprintk("NFS call fsstat\n");
723 nfs_fattr_init(stat->fattr); 743 nfs_fattr_init(stat->fattr);
724 status = rpc_call_sync(server->client, &msg, 0); 744 status = rpc_call_sync(server->client, &msg, 0);
725 dprintk("NFS reply statfs: %d\n", status); 745 dprintk("NFS reply fsstat: %d\n", status);
726 return status; 746 return status;
727} 747}
728 748
@@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
844 .unlink_setup = nfs3_proc_unlink_setup, 864 .unlink_setup = nfs3_proc_unlink_setup,
845 .unlink_done = nfs3_proc_unlink_done, 865 .unlink_done = nfs3_proc_unlink_done,
846 .rename = nfs3_proc_rename, 866 .rename = nfs3_proc_rename,
867 .rename_setup = nfs3_proc_rename_setup,
868 .rename_done = nfs3_proc_rename_done,
847 .link = nfs3_proc_link, 869 .link = nfs3_proc_link,
848 .symlink = nfs3_proc_symlink, 870 .symlink = nfs3_proc_symlink,
849 .mkdir = nfs3_proc_mkdir, 871 .mkdir = nfs3_proc_mkdir,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 9769704f8ce6..d9a5e832c257 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = {
100 [NF3FIFO] = S_IFIFO, 100 [NF3FIFO] = S_IFIFO,
101}; 101};
102 102
103static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
104{
105 dprintk("nfs: %s: prematurely hit end of receive buffer. "
106 "Remaining buffer length is %tu words.\n",
107 func, xdr->end - xdr->p);
108}
109
103/* 110/*
104 * Common NFS XDR functions as inlines 111 * Common NFS XDR functions as inlines
105 */ 112 */
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
119 return NULL; 126 return NULL;
120} 127}
121 128
129static inline __be32 *
130xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
131{
132 __be32 *p;
133 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(!p))
135 goto out_overflow;
136 fh->size = ntohl(*p++);
137
138 if (fh->size <= NFS3_FHSIZE) {
139 p = xdr_inline_decode(xdr, fh->size);
140 if (unlikely(!p))
141 goto out_overflow;
142 memcpy(fh->data, p, fh->size);
143 return p + XDR_QUADLEN(fh->size);
144 }
145 return NULL;
146
147out_overflow:
148 print_overflow_msg(__func__, xdr);
149 return ERR_PTR(-EIO);
150}
151
122/* 152/*
123 * Encode/decode time. 153 * Encode/decode time.
124 */ 154 */
@@ -241,6 +271,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
241} 271}
242 272
243static inline __be32 * 273static inline __be32 *
274xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
275{
276 __be32 *p;
277
278 p = xdr_inline_decode(xdr, 4);
279 if (unlikely(!p))
280 goto out_overflow;
281 if (ntohl(*p++)) {
282 p = xdr_inline_decode(xdr, 84);
283 if (unlikely(!p))
284 goto out_overflow;
285 p = xdr_decode_fattr(p, fattr);
286 }
287 return p;
288out_overflow:
289 print_overflow_msg(__func__, xdr);
290 return ERR_PTR(-EIO);
291}
292
293static inline __be32 *
244xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) 294xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
245{ 295{
246 if (*p++) 296 if (*p++)
@@ -442,12 +492,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
442 * Encode RENAME arguments 492 * Encode RENAME arguments
443 */ 493 */
444static int 494static int
445nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) 495nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
446{ 496{
447 p = xdr_encode_fhandle(p, args->fromfh); 497 p = xdr_encode_fhandle(p, args->old_dir);
448 p = xdr_encode_array(p, args->fromname, args->fromlen); 498 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
449 p = xdr_encode_fhandle(p, args->tofh); 499 p = xdr_encode_fhandle(p, args->new_dir);
450 p = xdr_encode_array(p, args->toname, args->tolen); 500 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
451 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 501 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
452 return 0; 502 return 0;
453} 503}
@@ -504,9 +554,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
504 struct kvec *iov = rcvbuf->head; 554 struct kvec *iov = rcvbuf->head;
505 struct page **page; 555 struct page **page;
506 size_t hdrlen; 556 size_t hdrlen;
507 u32 len, recvd, pglen; 557 u32 recvd, pglen;
508 int status, nr = 0; 558 int status, nr = 0;
509 __be32 *entry, *end, *kaddr;
510 559
511 status = ntohl(*p++); 560 status = ntohl(*p++);
512 /* Decode post_op_attrs */ 561 /* Decode post_op_attrs */
@@ -536,99 +585,38 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
536 if (pglen > recvd) 585 if (pglen > recvd)
537 pglen = recvd; 586 pglen = recvd;
538 page = rcvbuf->pages; 587 page = rcvbuf->pages;
539 kaddr = p = kmap_atomic(*page, KM_USER0);
540 end = (__be32 *)((char *)p + pglen);
541 entry = p;
542
543 /* Make sure the packet actually has a value_follows and EOF entry */
544 if ((entry + 1) > end)
545 goto short_pkt;
546
547 for (; *p++; nr++) {
548 if (p + 3 > end)
549 goto short_pkt;
550 p += 2; /* inode # */
551 len = ntohl(*p++); /* string length */
552 p += XDR_QUADLEN(len) + 2; /* name + cookie */
553 if (len > NFS3_MAXNAMLEN) {
554 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
555 len);
556 goto err_unmap;
557 }
558 588
559 if (res->plus) {
560 /* post_op_attr */
561 if (p + 2 > end)
562 goto short_pkt;
563 if (*p++) {
564 p += 21;
565 if (p + 1 > end)
566 goto short_pkt;
567 }
568 /* post_op_fh3 */
569 if (*p++) {
570 if (p + 1 > end)
571 goto short_pkt;
572 len = ntohl(*p++);
573 if (len > NFS3_FHSIZE) {
574 dprintk("NFS: giant filehandle in "
575 "readdir (len 0x%x)!\n", len);
576 goto err_unmap;
577 }
578 p += XDR_QUADLEN(len);
579 }
580 }
581
582 if (p + 2 > end)
583 goto short_pkt;
584 entry = p;
585 }
586
587 /*
588 * Apparently some server sends responses that are a valid size, but
589 * contain no entries, and have value_follows==0 and EOF==0. For
590 * those, just set the EOF marker.
591 */
592 if (!nr && entry[1] == 0) {
593 dprintk("NFS: readdir reply truncated!\n");
594 entry[1] = 1;
595 }
596 out:
597 kunmap_atomic(kaddr, KM_USER0);
598 return nr; 589 return nr;
599 short_pkt:
600 /*
601 * When we get a short packet there are 2 possibilities. We can
602 * return an error, or fix up the response to look like a valid
603 * response and return what we have so far. If there are no
604 * entries and the packet was short, then return -EIO. If there
605 * are valid entries in the response, return them and pretend that
606 * the call was successful, but incomplete. The caller can retry the
607 * readdir starting at the last cookie.
608 */
609 entry[0] = entry[1] = 0;
610 if (!nr)
611 nr = -errno_NFSERR_IO;
612 goto out;
613err_unmap:
614 nr = -errno_NFSERR_IO;
615 goto out;
616} 590}
617 591
618__be32 * 592__be32 *
619nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 593nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
620{ 594{
595 __be32 *p;
621 struct nfs_entry old = *entry; 596 struct nfs_entry old = *entry;
622 597
623 if (!*p++) { 598 p = xdr_inline_decode(xdr, 4);
624 if (!*p) 599 if (unlikely(!p))
600 goto out_overflow;
601 if (!ntohl(*p++)) {
602 p = xdr_inline_decode(xdr, 4);
603 if (unlikely(!p))
604 goto out_overflow;
605 if (!ntohl(*p++))
625 return ERR_PTR(-EAGAIN); 606 return ERR_PTR(-EAGAIN);
626 entry->eof = 1; 607 entry->eof = 1;
627 return ERR_PTR(-EBADCOOKIE); 608 return ERR_PTR(-EBADCOOKIE);
628 } 609 }
629 610
611 p = xdr_inline_decode(xdr, 12);
612 if (unlikely(!p))
613 goto out_overflow;
630 p = xdr_decode_hyper(p, &entry->ino); 614 p = xdr_decode_hyper(p, &entry->ino);
631 entry->len = ntohl(*p++); 615 entry->len = ntohl(*p++);
616
617 p = xdr_inline_decode(xdr, entry->len + 8);
618 if (unlikely(!p))
619 goto out_overflow;
632 entry->name = (const char *) p; 620 entry->name = (const char *) p;
633 p += XDR_QUADLEN(entry->len); 621 p += XDR_QUADLEN(entry->len);
634 entry->prev_cookie = entry->cookie; 622 entry->prev_cookie = entry->cookie;
@@ -636,10 +624,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
636 624
637 if (plus) { 625 if (plus) {
638 entry->fattr->valid = 0; 626 entry->fattr->valid = 0;
639 p = xdr_decode_post_op_attr(p, entry->fattr); 627 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
628 if (IS_ERR(p))
629 goto out_overflow_exit;
640 /* In fact, a post_op_fh3: */ 630 /* In fact, a post_op_fh3: */
631 p = xdr_inline_decode(xdr, 4);
632 if (unlikely(!p))
633 goto out_overflow;
641 if (*p++) { 634 if (*p++) {
642 p = xdr_decode_fhandle(p, entry->fh); 635 p = xdr_decode_fhandle_stream(xdr, entry->fh);
636 if (IS_ERR(p))
637 goto out_overflow_exit;
643 /* Ugh -- server reply was truncated */ 638 /* Ugh -- server reply was truncated */
644 if (p == NULL) { 639 if (p == NULL) {
645 dprintk("NFS: FH truncated\n"); 640 dprintk("NFS: FH truncated\n");
@@ -650,8 +645,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
650 memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); 645 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
651 } 646 }
652 647
653 entry->eof = !p[0] && p[1]; 648 p = xdr_inline_peek(xdr, 8);
649 if (p != NULL)
650 entry->eof = !p[0] && p[1];
651 else
652 entry->eof = 0;
653
654 return p; 654 return p;
655
656out_overflow:
657 print_overflow_msg(__func__, xdr);
658out_overflow_exit:
659 return ERR_PTR(-EIO);
655} 660}
656 661
657/* 662/*
@@ -824,7 +829,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
824 struct kvec *iov = rcvbuf->head; 829 struct kvec *iov = rcvbuf->head;
825 size_t hdrlen; 830 size_t hdrlen;
826 u32 len, recvd; 831 u32 len, recvd;
827 char *kaddr;
828 int status; 832 int status;
829 833
830 status = ntohl(*p++); 834 status = ntohl(*p++);
@@ -857,10 +861,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
857 return -EIO; 861 return -EIO;
858 } 862 }
859 863
860 /* NULL terminate the string we got */ 864 xdr_terminate_string(rcvbuf, len);
861 kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
862 kaddr[len+rcvbuf->page_base] = '\0';
863 kunmap_atomic(kaddr, KM_USER0);
864 return 0; 865 return 0;
865} 866}
866 867
@@ -970,14 +971,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
970 * Decode RENAME reply 971 * Decode RENAME reply
971 */ 972 */
972static int 973static int
973nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) 974nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
974{ 975{
975 int status; 976 int status;
976 977
977 if ((status = ntohl(*p++)) != 0) 978 if ((status = ntohl(*p++)) != 0)
978 status = nfs_stat_to_errno(status); 979 status = nfs_stat_to_errno(status);
979 p = xdr_decode_wcc_data(p, res->fromattr); 980 p = xdr_decode_wcc_data(p, res->old_fattr);
980 p = xdr_decode_wcc_data(p, res->toattr); 981 p = xdr_decode_wcc_data(p, res->new_fattr);
981 return status; 982 return status;
982} 983}
983 984
@@ -1043,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
1043 res->wtmult = ntohl(*p++); 1044 res->wtmult = ntohl(*p++);
1044 res->dtpref = ntohl(*p++); 1045 res->dtpref = ntohl(*p++);
1045 p = xdr_decode_hyper(p, &res->maxfilesize); 1046 p = xdr_decode_hyper(p, &res->maxfilesize);
1047 p = xdr_decode_time3(p, &res->time_delta);
1046 1048
1047 /* ignore time_delta and properties */ 1049 /* ignore properties */
1048 res->lease_time = 0; 1050 res->lease_time = 0;
1049 return 0; 1051 return 0;
1050} 1052}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 311e15cc8af0..9fa496387fdf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -242,8 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
245extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
246extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
247extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 245extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
248extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 246extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
249 struct nfs4_fs_locations *fs_locations, struct page *page); 247 struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -333,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
333extern const nfs4_stateid zero_stateid; 331extern const nfs4_stateid zero_stateid;
334 332
335/* nfs4xdr.c */ 333/* nfs4xdr.c */
336extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 334extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
337extern struct rpc_procinfo nfs4_procedures[]; 335extern struct rpc_procinfo nfs4_procedures[];
338 336
339struct nfs4_mount_data; 337struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
1/*
2 * Module for the pnfs nfs4 file layout driver.
3 * Defines all I/O and Policy interface operations, plus code
4 * to register itself with the pNFS client.
5 *
6 * Copyright (c) 2002
7 * The Regents of the University of Michigan
8 * All Rights Reserved
9 *
10 * Dean Hildebrand <dhildebz@umich.edu>
11 *
12 * Permission is granted to use, copy, create derivative works, and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the University of Michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. If
17 * the above copyright notice or any other identification of the
18 * University of Michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * This software is provided as is, without representation or warranty
22 * of any kind either express or implied, including without limitation
23 * the implied warranties of merchantability, fitness for a particular
24 * purpose, or noninfringement. The Regents of the University of
25 * Michigan shall not be liable for any damages, including special,
26 * indirect, incidental, or consequential damages, with respect to any
27 * claim arising out of or in connection with the use of the software,
28 * even if it has been or is hereafter advised of the possibility of
29 * such damages.
30 */
31
32#include <linux/nfs_fs.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42
43static int
44filelayout_set_layoutdriver(struct nfs_server *nfss)
45{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
47 nfs4_fl_free_deviceid_callback);
48 if (status) {
49 printk(KERN_WARNING "%s: deviceid cache could not be "
50 "initialized\n", __func__);
51 return status;
52 }
53 dprintk("%s: deviceid cache has been initialized successfully\n",
54 __func__);
55 return 0;
56}
57
58/* Clear out the layout by destroying its device list */
59static int
60filelayout_clear_layoutdriver(struct nfs_server *nfss)
61{
62 dprintk("--> %s\n", __func__);
63
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0;
67}
68
69/*
70 * filelayout_check_layout()
71 *
72 * Make sure layout segment parameters are sane WRT the device.
73 * At this point no generic layer initialization of the lseg has occurred,
74 * and nothing has been added to the layout_hdr cache.
75 *
76 */
77static int
78filelayout_check_layout(struct pnfs_layout_hdr *lo,
79 struct nfs4_filelayout_segment *fl,
80 struct nfs4_layoutget_res *lgr,
81 struct nfs4_deviceid *id)
82{
83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode);
86
87 dprintk("--> %s\n", __func__);
88
89 if (fl->pattern_offset > lgr->range.offset) {
90 dprintk("%s pattern_offset %lld to large\n",
91 __func__, fl->pattern_offset);
92 goto out;
93 }
94
95 if (fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n",
97 __func__, fl->stripe_unit);
98 goto out;
99 }
100
101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id);
105 if (dsaddr == NULL)
106 goto out;
107 }
108 fl->dsaddr = dsaddr;
109
110 if (fl->first_stripe_index < 0 ||
111 fl->first_stripe_index >= dsaddr->stripe_count) {
112 dprintk("%s Bad first_stripe_index %d\n",
113 __func__, fl->first_stripe_index);
114 goto out_put;
115 }
116
117 if ((fl->stripe_type == STRIPE_SPARSE &&
118 fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
119 (fl->stripe_type == STRIPE_DENSE &&
120 fl->num_fh != dsaddr->stripe_count)) {
121 dprintk("%s num_fh %u not valid for given packing\n",
122 __func__, fl->num_fh);
123 goto out_put;
124 }
125
126 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
127 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
128 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
129 nfss->wsize);
130 }
131
132 status = 0;
133out:
134 dprintk("--> %s returns %d\n", __func__, status);
135 return status;
136out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
138 goto out;
139}
140
141static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
142{
143 int i;
144
145 for (i = 0; i < fl->num_fh; i++) {
146 if (!fl->fh_array[i])
147 break;
148 kfree(fl->fh_array[i]);
149 }
150 kfree(fl->fh_array);
151 fl->fh_array = NULL;
152}
153
154static void
155_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
156{
157 filelayout_free_fh_array(fl);
158 kfree(fl);
159}
160
161static int
162filelayout_decode_layout(struct pnfs_layout_hdr *flo,
163 struct nfs4_filelayout_segment *fl,
164 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id)
166{
167 uint32_t *p = (uint32_t *)lgr->layout.buf;
168 uint32_t nfl_util;
169 int i;
170
171 dprintk("%s: set_layout_map Begin\n", __func__);
172
173 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id);
176
177 nfl_util = be32_to_cpup(p++);
178 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
179 fl->commit_through_mds = 1;
180 if (nfl_util & NFL4_UFLG_DENSE)
181 fl->stripe_type = STRIPE_DENSE;
182 else
183 fl->stripe_type = STRIPE_SPARSE;
184 fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
185
186 fl->first_stripe_index = be32_to_cpup(p++);
187 p = xdr_decode_hyper(p, &fl->pattern_offset);
188 fl->num_fh = be32_to_cpup(p++);
189
190 dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset);
193
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL);
196 if (!fl->fh_array)
197 return -ENOMEM;
198
199 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) {
203 filelayout_free_fh_array(fl);
204 return -ENOMEM;
205 }
206 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl);
211 return -EIO;
212 }
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size);
217 }
218
219 return 0;
220}
221
222static struct pnfs_layout_segment *
223filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
224 struct nfs4_layoutget_res *lgr)
225{
226 struct nfs4_filelayout_segment *fl;
227 int rc;
228 struct nfs4_deviceid id;
229
230 dprintk("--> %s\n", __func__);
231 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
232 if (!fl)
233 return NULL;
234
235 rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
236 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
237 _filelayout_free_lseg(fl);
238 return NULL;
239 }
240 return &fl->generic_hdr;
241}
242
243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248
249 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl);
253}
254
255static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver,
260 .clear_layoutdriver = filelayout_clear_layoutdriver,
261 .alloc_lseg = filelayout_alloc_lseg,
262 .free_lseg = filelayout_free_lseg,
263};
264
265static int __init nfs4filelayout_init(void)
266{
267 printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
268 __func__);
269 return pnfs_register_layoutdriver(&filelayout_type);
270}
271
272static void __exit nfs4filelayout_exit(void)
273{
274 printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
275 __func__);
276 pnfs_unregister_layoutdriver(&filelayout_type);
277}
278
279module_init(nfs4filelayout_init);
280module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
1/*
2 * NFSv4 file layout driver data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H
32
33#include "pnfs.h"
34
35/*
36 * Field testing shows we need to support upto 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures.
40 */
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43
44enum stripetype4 {
45 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2
47};
48
49/* Individual ip address */
50struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr;
53 u32 ds_port;
54 struct nfs_client *ds_clp;
55 atomic_t ds_count;
56};
57
58struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid;
60 u32 stripe_count;
61 u8 *stripe_indices;
62 u32 ds_num;
63 struct nfs4_pnfs_ds *ds_list[1];
64};
65
66struct nfs4_filelayout_segment {
67 struct pnfs_layout_segment generic_hdr;
68 u32 stripe_type;
69 u32 commit_through_mds;
70 u32 stripe_unit;
71 u32 first_stripe_index;
72 u64 pattern_offset;
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh;
75 struct nfs_fh **fh_array;
76};
77
78static inline struct nfs4_filelayout_segment *
79FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
80{
81 return container_of(lseg,
82 struct nfs4_filelayout_segment,
83 generic_hdr);
84}
85
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
87extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id);
89extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
91struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93
94#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
1/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__,
218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen);
224
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.');
228 *res = '-';
229 }
230
231 /* Currently only support ipv4 address */
232 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
233 dprintk("%s: Only ipv4 addresses supported\n", __func__);
234 goto out_free;
235 }
236
237 /* port */
238 pstr = ipend;
239 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
240 port = htons((tmp[0] << 8) | (tmp[1]));
241
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf);
244out_free:
245 kfree(buf);
246out_err:
247 return ds;
248}
249
250/* Decode opaque device data and return the result */
251static struct nfs4_file_layout_dsaddr*
252decode_device(struct inode *ino, struct pnfs_device *pdev)
253{
254 int i, dummy;
255 u32 cnt, num;
256 u8 *indexp;
257 __be32 *p = (__be32 *)pdev->area, *indicesp;
258 struct nfs4_file_layout_dsaddr *dsaddr;
259
260 /* Get the stripe count (number of stripe index) */
261 cnt = be32_to_cpup(p++);
262 dprintk("%s stripe count %d\n", __func__, cnt);
263 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
264 printk(KERN_WARNING "%s: stripe count %d greater than "
265 "supported maximum %d\n", __func__,
266 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
267 goto out_err;
268 }
269
270 /* Check the multipath list count */
271 indicesp = p;
272 p += XDR_QUADLEN(cnt << 2);
273 num = be32_to_cpup(p++);
274 dprintk("%s ds_num %u\n", __func__, num);
275 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
276 printk(KERN_WARNING "%s: multipath count %d greater than "
277 "supported maximum %d\n", __func__,
278 num, NFS4_PNFS_MAX_MULTI_CNT);
279 goto out_err;
280 }
281 dsaddr = kzalloc(sizeof(*dsaddr) +
282 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
283 GFP_KERNEL);
284 if (!dsaddr)
285 goto out_err;
286
287 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
288 if (!dsaddr->stripe_indices)
289 goto out_err_free;
290
291 dsaddr->stripe_count = cnt;
292 dsaddr->ds_num = num;
293
294 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
295
296 /* Go back an read stripe indices */
297 p = indicesp;
298 indexp = &dsaddr->stripe_indices[0];
299 for (i = 0; i < dsaddr->stripe_count; i++) {
300 *indexp = be32_to_cpup(p++);
301 if (*indexp >= num)
302 goto out_err_free;
303 indexp++;
304 }
305 /* Skip already read multipath list count */
306 p++;
307
308 for (i = 0; i < dsaddr->ds_num; i++) {
309 int j;
310
311 dummy = be32_to_cpup(p++); /* multipath count */
312 if (dummy > 1) {
313 printk(KERN_WARNING
314 "%s: Multipath count %d not supported, "
315 "skipping all greater than 1\n", __func__,
316 dummy);
317 }
318 for (j = 0; j < dummy; j++) {
319 if (j == 0) {
320 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
321 if (dsaddr->ds_list[i] == NULL)
322 goto out_err_free;
323 } else {
324 u32 len;
325 /* skip extra multipath */
326 len = be32_to_cpup(p++);
327 p += XDR_QUADLEN(len);
328 len = be32_to_cpup(p++);
329 p += XDR_QUADLEN(len);
330 continue;
331 }
332 }
333 }
334 return dsaddr;
335
336out_err_free:
337 nfs4_fl_free_deviceid(dsaddr);
338out_err:
339 dprintk("%s ERROR: returning NULL\n", __func__);
340 return NULL;
341}
342
343/*
344 * Decode the opaque device specified in 'dev'
345 * and add it to the list of available devices.
346 * If the deviceid is already cached, nfs4_add_deviceid will return
347 * a pointer to the cached struct and throw away the new.
348 */
349static struct nfs4_file_layout_dsaddr*
350decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
351{
352 struct nfs4_file_layout_dsaddr *dsaddr;
353 struct pnfs_deviceid_node *d;
354
355 dsaddr = decode_device(inode, dev);
356 if (!dsaddr) {
357 printk(KERN_WARNING "%s: Could not decode or add device\n",
358 __func__);
359 return NULL;
360 }
361
362 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
363 &dsaddr->deviceid);
364
365 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
366}
367
368/*
369 * Retrieve the information for dev_id, add it to the list
370 * of available devices, and return it.
371 */
372struct nfs4_file_layout_dsaddr *
373get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
374{
375 struct pnfs_device *pdev = NULL;
376 u32 max_resp_sz;
377 int max_pages;
378 struct page **pages = NULL;
379 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
380 int rc, i;
381 struct nfs_server *server = NFS_SERVER(inode);
382
383 /*
384 * Use the session max response size as the basis for setting
385 * GETDEVICEINFO's maxcount
386 */
387 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
388 max_pages = max_resp_sz >> PAGE_SHIFT;
389 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
390 __func__, inode, max_resp_sz, max_pages);
391
392 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
393 if (pdev == NULL)
394 return NULL;
395
396 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
397 if (pages == NULL) {
398 kfree(pdev);
399 return NULL;
400 }
401 for (i = 0; i < max_pages; i++) {
402 pages[i] = alloc_page(GFP_KERNEL);
403 if (!pages[i])
404 goto out_free;
405 }
406
407 /* set pdev->area */
408 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
409 if (!pdev->area)
410 goto out_free;
411
412 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
413 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
414 pdev->pages = pages;
415 pdev->pgbase = 0;
416 pdev->pglen = PAGE_SIZE * max_pages;
417 pdev->mincount = 0;
418
419 rc = nfs4_proc_getdeviceinfo(server, pdev);
420 dprintk("%s getdevice info returns %d\n", __func__, rc);
421 if (rc)
422 goto out_free;
423
424 /*
425 * Found new device, need to decode it and then add it to the
426 * list of known devices for this mountpoint.
427 */
428 dsaddr = decode_and_add_device(inode, pdev);
429out_free:
430 if (pdev->area != NULL)
431 vunmap(pdev->area);
432 for (i = 0; i < max_pages; i++)
433 __free_page(pages[i]);
434 kfree(pages);
435 kfree(pdev);
436 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
437 return dsaddr;
438}
439
440struct nfs4_file_layout_dsaddr *
441nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
442{
443 struct pnfs_deviceid_node *d;
444
445 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
446 return (d == NULL) ? NULL :
447 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
448}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 089da5b5d20a..0f24cdf2cb13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -129,7 +130,8 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
129 | FATTR4_WORD0_MAXREAD 130 | FATTR4_WORD0_MAXREAD
130 | FATTR4_WORD0_MAXWRITE 131 | FATTR4_WORD0_MAXWRITE
131 | FATTR4_WORD0_LEASE_TIME, 132 | FATTR4_WORD0_LEASE_TIME,
132 0 133 FATTR4_WORD1_TIME_DELTA
134 | FATTR4_WORD1_FS_LAYOUT_TYPES
133}; 135};
134 136
135const u32 nfs4_fs_locations_bitmap[2] = { 137const u32 nfs4_fs_locations_bitmap[2] = {
@@ -255,9 +257,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
255 nfs4_state_mark_reclaim_nograce(clp, state); 257 nfs4_state_mark_reclaim_nograce(clp, state);
256 goto do_state_recovery; 258 goto do_state_recovery;
257 case -NFS4ERR_STALE_STATEID: 259 case -NFS4ERR_STALE_STATEID:
258 if (state == NULL)
259 break;
260 nfs4_state_mark_reclaim_reboot(clp, state);
261 case -NFS4ERR_STALE_CLIENTID: 260 case -NFS4ERR_STALE_CLIENTID:
262 case -NFS4ERR_EXPIRED: 261 case -NFS4ERR_EXPIRED:
263 goto do_state_recovery; 262 goto do_state_recovery;
@@ -334,10 +333,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
334 * Must be called while holding tbl->slot_tbl_lock 333 * Must be called while holding tbl->slot_tbl_lock
335 */ 334 */
336static void 335static void
337nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 336nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
338{ 337{
338 int free_slotid = free_slot - tbl->slots;
339 int slotid = free_slotid; 339 int slotid = free_slotid;
340 340
341 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
341 /* clear used bit in bitmap */ 342 /* clear used bit in bitmap */
342 __clear_bit(slotid, tbl->used_slots); 343 __clear_bit(slotid, tbl->used_slots);
343 344
@@ -379,7 +380,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
379 struct nfs4_slot_table *tbl; 380 struct nfs4_slot_table *tbl;
380 381
381 tbl = &res->sr_session->fc_slot_table; 382 tbl = &res->sr_session->fc_slot_table;
382 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { 383 if (!res->sr_slot) {
383 /* just wake up the next guy waiting since 384 /* just wake up the next guy waiting since
384 * we may have not consumed a slot after all */ 385 * we may have not consumed a slot after all */
385 dprintk("%s: No slot\n", __func__); 386 dprintk("%s: No slot\n", __func__);
@@ -387,17 +388,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
387 } 388 }
388 389
389 spin_lock(&tbl->slot_tbl_lock); 390 spin_lock(&tbl->slot_tbl_lock);
390 nfs4_free_slot(tbl, res->sr_slotid); 391 nfs4_free_slot(tbl, res->sr_slot);
391 nfs41_check_drain_session_complete(res->sr_session); 392 nfs41_check_drain_session_complete(res->sr_session);
392 spin_unlock(&tbl->slot_tbl_lock); 393 spin_unlock(&tbl->slot_tbl_lock);
393 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 394 res->sr_slot = NULL;
394} 395}
395 396
396static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) 397static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
397{ 398{
398 unsigned long timestamp; 399 unsigned long timestamp;
399 struct nfs4_slot_table *tbl;
400 struct nfs4_slot *slot;
401 struct nfs_client *clp; 400 struct nfs_client *clp;
402 401
403 /* 402 /*
@@ -410,17 +409,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
410 res->sr_status = NFS_OK; 409 res->sr_status = NFS_OK;
411 410
412 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 411 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
413 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) 412 if (!res->sr_slot)
414 goto out; 413 goto out;
415 414
416 tbl = &res->sr_session->fc_slot_table;
417 slot = tbl->slots + res->sr_slotid;
418
419 /* Check the SEQUENCE operation status */ 415 /* Check the SEQUENCE operation status */
420 switch (res->sr_status) { 416 switch (res->sr_status) {
421 case 0: 417 case 0:
422 /* Update the slot's sequence and clientid lease timer */ 418 /* Update the slot's sequence and clientid lease timer */
423 ++slot->seq_nr; 419 ++res->sr_slot->seq_nr;
424 timestamp = res->sr_renewal_time; 420 timestamp = res->sr_renewal_time;
425 clp = res->sr_session->clp; 421 clp = res->sr_session->clp;
426 do_renew_lease(clp, timestamp); 422 do_renew_lease(clp, timestamp);
@@ -433,12 +429,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
433 * returned NFS4ERR_DELAY as per Section 2.10.6.2 429 * returned NFS4ERR_DELAY as per Section 2.10.6.2
434 * of RFC5661. 430 * of RFC5661.
435 */ 431 */
436 dprintk("%s: slot=%d seq=%d: Operation in progress\n", 432 dprintk("%s: slot=%td seq=%d: Operation in progress\n",
437 __func__, res->sr_slotid, slot->seq_nr); 433 __func__,
434 res->sr_slot - res->sr_session->fc_slot_table.slots,
435 res->sr_slot->seq_nr);
438 goto out_retry; 436 goto out_retry;
439 default: 437 default:
440 /* Just update the slot sequence no. */ 438 /* Just update the slot sequence no. */
441 ++slot->seq_nr; 439 ++res->sr_slot->seq_nr;
442 } 440 }
443out: 441out:
444 /* The session may be reset by one of the error handlers. */ 442 /* The session may be reset by one of the error handlers. */
@@ -505,10 +503,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
505 503
506 dprintk("--> %s\n", __func__); 504 dprintk("--> %s\n", __func__);
507 /* slot already allocated? */ 505 /* slot already allocated? */
508 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) 506 if (res->sr_slot != NULL)
509 return 0; 507 return 0;
510 508
511 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
512 tbl = &session->fc_slot_table; 509 tbl = &session->fc_slot_table;
513 510
514 spin_lock(&tbl->slot_tbl_lock); 511 spin_lock(&tbl->slot_tbl_lock);
@@ -550,7 +547,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
550 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); 547 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
551 548
552 res->sr_session = session; 549 res->sr_session = session;
553 res->sr_slotid = slotid; 550 res->sr_slot = slot;
554 res->sr_renewal_time = jiffies; 551 res->sr_renewal_time = jiffies;
555 res->sr_status_flags = 0; 552 res->sr_status_flags = 0;
556 /* 553 /*
@@ -576,8 +573,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
576 goto out; 573 goto out;
577 } 574 }
578 575
579 dprintk("--> %s clp %p session %p sr_slotid %d\n", 576 dprintk("--> %s clp %p session %p sr_slot %td\n",
580 __func__, session->clp, session, res->sr_slotid); 577 __func__, session->clp, session, res->sr_slot ?
578 res->sr_slot - session->fc_slot_table.slots : -1);
581 579
582 ret = nfs41_setup_sequence(session, args, res, cache_reply, 580 ret = nfs41_setup_sequence(session, args, res, cache_reply,
583 task); 581 task);
@@ -650,7 +648,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
650 .callback_data = &data 648 .callback_data = &data
651 }; 649 };
652 650
653 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 651 res->sr_slot = NULL;
654 if (privileged) 652 if (privileged)
655 task_setup.callback_ops = &nfs41_call_priv_sync_ops; 653 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
656 task = rpc_run_task(&task_setup); 654 task = rpc_run_task(&task_setup);
@@ -735,7 +733,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
735 p->o_res.server = p->o_arg.server; 733 p->o_res.server = p->o_arg.server;
736 nfs_fattr_init(&p->f_attr); 734 nfs_fattr_init(&p->f_attr);
737 nfs_fattr_init(&p->dir_attr); 735 nfs_fattr_init(&p->dir_attr);
738 p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
739} 736}
740 737
741static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 738static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -1120,6 +1117,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1120 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1117 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1121 smp_rmb(); 1118 smp_rmb();
1122 if (state->n_rdwr != 0) { 1119 if (state->n_rdwr != 0) {
1120 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1123 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); 1121 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
1124 if (ret != 0) 1122 if (ret != 0)
1125 return ret; 1123 return ret;
@@ -1127,6 +1125,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1127 return -ESTALE; 1125 return -ESTALE;
1128 } 1126 }
1129 if (state->n_wronly != 0) { 1127 if (state->n_wronly != 0) {
1128 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1130 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); 1129 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
1131 if (ret != 0) 1130 if (ret != 0)
1132 return ret; 1131 return ret;
@@ -1134,6 +1133,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1134 return -ESTALE; 1133 return -ESTALE;
1135 } 1134 }
1136 if (state->n_rdonly != 0) { 1135 if (state->n_rdonly != 0) {
1136 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); 1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
1138 if (ret != 0) 1138 if (ret != 0)
1139 return ret; 1139 return ret;
@@ -1188,7 +1188,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1188 int err; 1188 int err;
1189 do { 1189 do {
1190 err = _nfs4_do_open_reclaim(ctx, state); 1190 err = _nfs4_do_open_reclaim(ctx, state);
1191 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 1191 if (err != -NFS4ERR_DELAY)
1192 break; 1192 break;
1193 nfs4_handle_exception(server, err, &exception); 1193 nfs4_handle_exception(server, err, &exception);
1194 } while (exception.retry); 1194 } while (exception.retry);
@@ -1258,6 +1258,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1258 case -NFS4ERR_ADMIN_REVOKED: 1258 case -NFS4ERR_ADMIN_REVOKED:
1259 case -NFS4ERR_BAD_STATEID: 1259 case -NFS4ERR_BAD_STATEID:
1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
1261 case -EKEYEXPIRED:
1262 /*
1263 * User RPCSEC_GSS context has expired.
1264 * We cannot recover this stateid now, so
1265 * skip it and allow recovery thread to
1266 * proceed.
1267 */
1261 case -ENOMEM: 1268 case -ENOMEM:
1262 err = 0; 1269 err = 0;
1263 goto out; 1270 goto out;
@@ -1605,7 +1612,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
1605 goto out; 1612 goto out;
1606 case -NFS4ERR_GRACE: 1613 case -NFS4ERR_GRACE:
1607 case -NFS4ERR_DELAY: 1614 case -NFS4ERR_DELAY:
1608 case -EKEYEXPIRED:
1609 nfs4_handle_exception(server, err, &exception); 1615 nfs4_handle_exception(server, err, &exception);
1610 err = 0; 1616 err = 0;
1611 } 1617 }
@@ -1975,7 +1981,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1975 calldata->res.fattr = &calldata->fattr; 1981 calldata->res.fattr = &calldata->fattr;
1976 calldata->res.seqid = calldata->arg.seqid; 1982 calldata->res.seqid = calldata->arg.seqid;
1977 calldata->res.server = server; 1983 calldata->res.server = server;
1978 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
1979 path_get(path); 1984 path_get(path);
1980 calldata->path = *path; 1985 calldata->path = *path;
1981 1986
@@ -1998,120 +2003,17 @@ out:
1998 return status; 2003 return status;
1999} 2004}
2000 2005
2001static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) 2006static struct inode *
2007nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
2002{ 2008{
2003 struct file *filp;
2004 int ret;
2005
2006 /* If the open_intent is for execute, we have an extra check to make */
2007 if (fmode & FMODE_EXEC) {
2008 ret = nfs_may_open(state->inode,
2009 state->owner->so_cred,
2010 nd->intent.open.flags);
2011 if (ret < 0)
2012 goto out_close;
2013 }
2014 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
2015 if (!IS_ERR(filp)) {
2016 struct nfs_open_context *ctx;
2017 ctx = nfs_file_open_context(filp);
2018 ctx->state = state;
2019 return 0;
2020 }
2021 ret = PTR_ERR(filp);
2022out_close:
2023 nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
2024 return ret;
2025}
2026
2027struct dentry *
2028nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2029{
2030 struct path path = {
2031 .mnt = nd->path.mnt,
2032 .dentry = dentry,
2033 };
2034 struct dentry *parent;
2035 struct iattr attr;
2036 struct rpc_cred *cred;
2037 struct nfs4_state *state; 2009 struct nfs4_state *state;
2038 struct dentry *res;
2039 int open_flags = nd->intent.open.flags;
2040 fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
2041
2042 if (nd->flags & LOOKUP_CREATE) {
2043 attr.ia_mode = nd->intent.open.create_mode;
2044 attr.ia_valid = ATTR_MODE;
2045 if (!IS_POSIXACL(dir))
2046 attr.ia_mode &= ~current_umask();
2047 } else {
2048 open_flags &= ~O_EXCL;
2049 attr.ia_valid = 0;
2050 BUG_ON(open_flags & O_CREAT);
2051 }
2052 2010
2053 cred = rpc_lookup_cred();
2054 if (IS_ERR(cred))
2055 return (struct dentry *)cred;
2056 parent = dentry->d_parent;
2057 /* Protect against concurrent sillydeletes */ 2011 /* Protect against concurrent sillydeletes */
2058 nfs_block_sillyrename(parent); 2012 state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
2059 state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); 2013 if (IS_ERR(state))
2060 put_rpccred(cred); 2014 return ERR_CAST(state);
2061 if (IS_ERR(state)) { 2015 ctx->state = state;
2062 if (PTR_ERR(state) == -ENOENT) { 2016 return igrab(state->inode);
2063 d_add(dentry, NULL);
2064 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2065 }
2066 nfs_unblock_sillyrename(parent);
2067 return (struct dentry *)state;
2068 }
2069 res = d_add_unique(dentry, igrab(state->inode));
2070 if (res != NULL)
2071 path.dentry = res;
2072 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
2073 nfs_unblock_sillyrename(parent);
2074 nfs4_intent_set_file(nd, &path, state, fmode);
2075 return res;
2076}
2077
2078int
2079nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
2080{
2081 struct path path = {
2082 .mnt = nd->path.mnt,
2083 .dentry = dentry,
2084 };
2085 struct rpc_cred *cred;
2086 struct nfs4_state *state;
2087 fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
2088
2089 cred = rpc_lookup_cred();
2090 if (IS_ERR(cred))
2091 return PTR_ERR(cred);
2092 state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
2093 put_rpccred(cred);
2094 if (IS_ERR(state)) {
2095 switch (PTR_ERR(state)) {
2096 case -EPERM:
2097 case -EACCES:
2098 case -EDQUOT:
2099 case -ENOSPC:
2100 case -EROFS:
2101 return PTR_ERR(state);
2102 default:
2103 goto out_drop;
2104 }
2105 }
2106 if (state->inode == dentry->d_inode) {
2107 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2108 nfs4_intent_set_file(nd, &path, state, fmode);
2109 return 1;
2110 }
2111 nfs4_close_sync(&path, state, fmode);
2112out_drop:
2113 d_drop(dentry);
2114 return 0;
2115} 2017}
2116 2018
2117static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) 2019static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
@@ -2568,36 +2470,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
2568 2470
2569static int 2471static int
2570nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2472nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2571 int flags, struct nameidata *nd) 2473 int flags, struct nfs_open_context *ctx)
2572{ 2474{
2573 struct path path = { 2475 struct path my_path = {
2574 .mnt = nd->path.mnt,
2575 .dentry = dentry, 2476 .dentry = dentry,
2576 }; 2477 };
2478 struct path *path = &my_path;
2577 struct nfs4_state *state; 2479 struct nfs4_state *state;
2578 struct rpc_cred *cred; 2480 struct rpc_cred *cred = NULL;
2579 fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); 2481 fmode_t fmode = 0;
2580 int status = 0; 2482 int status = 0;
2581 2483
2582 cred = rpc_lookup_cred(); 2484 if (ctx != NULL) {
2583 if (IS_ERR(cred)) { 2485 cred = ctx->cred;
2584 status = PTR_ERR(cred); 2486 path = &ctx->path;
2585 goto out; 2487 fmode = ctx->mode;
2586 } 2488 }
2587 state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); 2489 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
2588 d_drop(dentry); 2490 d_drop(dentry);
2589 if (IS_ERR(state)) { 2491 if (IS_ERR(state)) {
2590 status = PTR_ERR(state); 2492 status = PTR_ERR(state);
2591 goto out_putcred; 2493 goto out;
2592 } 2494 }
2593 d_add(dentry, igrab(state->inode)); 2495 d_add(dentry, igrab(state->inode));
2594 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2496 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2595 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2497 if (ctx != NULL)
2596 status = nfs4_intent_set_file(nd, &path, state, fmode); 2498 ctx->state = state;
2597 else 2499 else
2598 nfs4_close_sync(&path, state, fmode); 2500 nfs4_close_sync(path, state, fmode);
2599out_putcred:
2600 put_rpccred(cred);
2601out: 2501out:
2602 return status; 2502 return status;
2603} 2503}
@@ -2655,6 +2555,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2655 2555
2656 args->bitmask = server->cache_consistency_bitmask; 2556 args->bitmask = server->cache_consistency_bitmask;
2657 res->server = server; 2557 res->server = server;
2558 res->seq_res.sr_slot = NULL;
2658 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2559 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2659} 2560}
2660 2561
@@ -2671,18 +2572,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2671 return 1; 2572 return 1;
2672} 2573}
2673 2574
2575static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2576{
2577 struct nfs_server *server = NFS_SERVER(dir);
2578 struct nfs_renameargs *arg = msg->rpc_argp;
2579 struct nfs_renameres *res = msg->rpc_resp;
2580
2581 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2582 arg->bitmask = server->attr_bitmask;
2583 res->server = server;
2584}
2585
2586static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2587 struct inode *new_dir)
2588{
2589 struct nfs_renameres *res = task->tk_msg.rpc_resp;
2590
2591 if (!nfs4_sequence_done(task, &res->seq_res))
2592 return 0;
2593 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2594 return 0;
2595
2596 update_changeattr(old_dir, &res->old_cinfo);
2597 nfs_post_op_update_inode(old_dir, res->old_fattr);
2598 update_changeattr(new_dir, &res->new_cinfo);
2599 nfs_post_op_update_inode(new_dir, res->new_fattr);
2600 return 1;
2601}
2602
2674static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, 2603static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2675 struct inode *new_dir, struct qstr *new_name) 2604 struct inode *new_dir, struct qstr *new_name)
2676{ 2605{
2677 struct nfs_server *server = NFS_SERVER(old_dir); 2606 struct nfs_server *server = NFS_SERVER(old_dir);
2678 struct nfs4_rename_arg arg = { 2607 struct nfs_renameargs arg = {
2679 .old_dir = NFS_FH(old_dir), 2608 .old_dir = NFS_FH(old_dir),
2680 .new_dir = NFS_FH(new_dir), 2609 .new_dir = NFS_FH(new_dir),
2681 .old_name = old_name, 2610 .old_name = old_name,
2682 .new_name = new_name, 2611 .new_name = new_name,
2683 .bitmask = server->attr_bitmask, 2612 .bitmask = server->attr_bitmask,
2684 }; 2613 };
2685 struct nfs4_rename_res res = { 2614 struct nfs_renameres res = {
2686 .server = server, 2615 .server = server,
2687 }; 2616 };
2688 struct rpc_message msg = { 2617 struct rpc_message msg = {
@@ -2896,15 +2825,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
2896} 2825}
2897 2826
2898static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2827static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2899 u64 cookie, struct page *page, unsigned int count, int plus) 2828 u64 cookie, struct page **pages, unsigned int count, int plus)
2900{ 2829{
2901 struct inode *dir = dentry->d_inode; 2830 struct inode *dir = dentry->d_inode;
2902 struct nfs4_readdir_arg args = { 2831 struct nfs4_readdir_arg args = {
2903 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2904 .pages = &page, 2833 .pages = pages,
2905 .pgbase = 0, 2834 .pgbase = 0,
2906 .count = count, 2835 .count = count,
2907 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, 2836 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
2837 .plus = plus,
2908 }; 2838 };
2909 struct nfs4_readdir_res res; 2839 struct nfs4_readdir_res res;
2910 struct rpc_message msg = { 2840 struct rpc_message msg = {
@@ -2932,14 +2862,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2932} 2862}
2933 2863
2934static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2864static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2935 u64 cookie, struct page *page, unsigned int count, int plus) 2865 u64 cookie, struct page **pages, unsigned int count, int plus)
2936{ 2866{
2937 struct nfs4_exception exception = { }; 2867 struct nfs4_exception exception = { };
2938 int err; 2868 int err;
2939 do { 2869 do {
2940 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), 2870 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
2941 _nfs4_proc_readdir(dentry, cred, cookie, 2871 _nfs4_proc_readdir(dentry, cred, cookie,
2942 page, count, plus), 2872 pages, count, plus),
2943 &exception); 2873 &exception);
2944 } while (exception.retry); 2874 } while (exception.retry);
2945 return err; 2875 return err;
@@ -3490,9 +3420,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3490 nfs4_state_mark_reclaim_nograce(clp, state); 3420 nfs4_state_mark_reclaim_nograce(clp, state);
3491 goto do_state_recovery; 3421 goto do_state_recovery;
3492 case -NFS4ERR_STALE_STATEID: 3422 case -NFS4ERR_STALE_STATEID:
3493 if (state == NULL)
3494 break;
3495 nfs4_state_mark_reclaim_reboot(clp, state);
3496 case -NFS4ERR_STALE_CLIENTID: 3423 case -NFS4ERR_STALE_CLIENTID:
3497 case -NFS4ERR_EXPIRED: 3424 case -NFS4ERR_EXPIRED:
3498 goto do_state_recovery; 3425 goto do_state_recovery;
@@ -3626,7 +3553,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3626 case -NFS4ERR_RESOURCE: 3553 case -NFS4ERR_RESOURCE:
3627 /* The IBM lawyers misread another document! */ 3554 /* The IBM lawyers misread another document! */
3628 case -NFS4ERR_DELAY: 3555 case -NFS4ERR_DELAY:
3629 case -EKEYEXPIRED:
3630 err = nfs4_delay(clp->cl_rpcclient, &timeout); 3556 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3631 } 3557 }
3632 } while (err == 0); 3558 } while (err == 0);
@@ -3721,7 +3647,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3721 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 3647 memcpy(&data->stateid, stateid, sizeof(data->stateid));
3722 data->res.fattr = &data->fattr; 3648 data->res.fattr = &data->fattr;
3723 data->res.server = server; 3649 data->res.server = server;
3724 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3725 nfs_fattr_init(data->res.fattr); 3650 nfs_fattr_init(data->res.fattr);
3726 data->timestamp = jiffies; 3651 data->timestamp = jiffies;
3727 data->rpc_status = 0; 3652 data->rpc_status = 0;
@@ -3874,7 +3799,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3874 p->arg.fl = &p->fl; 3799 p->arg.fl = &p->fl;
3875 p->arg.seqid = seqid; 3800 p->arg.seqid = seqid;
3876 p->res.seqid = seqid; 3801 p->res.seqid = seqid;
3877 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3878 p->arg.stateid = &lsp->ls_stateid; 3802 p->arg.stateid = &lsp->ls_stateid;
3879 p->lsp = lsp; 3803 p->lsp = lsp;
3880 atomic_inc(&lsp->ls_count); 3804 atomic_inc(&lsp->ls_count);
@@ -4054,7 +3978,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
4054 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3978 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
4055 p->arg.lock_owner.id = lsp->ls_id.id; 3979 p->arg.lock_owner.id = lsp->ls_id.id;
4056 p->res.lock_seqid = p->arg.lock_seqid; 3980 p->res.lock_seqid = p->arg.lock_seqid;
4057 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4058 p->lsp = lsp; 3981 p->lsp = lsp;
4059 p->server = server; 3982 p->server = server;
4060 atomic_inc(&lsp->ls_count); 3983 atomic_inc(&lsp->ls_count);
@@ -4241,7 +4164,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4241 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4242 return 0; 4165 return 0;
4243 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4244 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 4167 if (err != -NFS4ERR_DELAY)
4245 break; 4168 break;
4246 nfs4_handle_exception(server, err, &exception); 4169 nfs4_handle_exception(server, err, &exception);
4247 } while (exception.retry); 4170 } while (exception.retry);
@@ -4266,7 +4189,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4266 goto out; 4189 goto out;
4267 case -NFS4ERR_GRACE: 4190 case -NFS4ERR_GRACE:
4268 case -NFS4ERR_DELAY: 4191 case -NFS4ERR_DELAY:
4269 case -EKEYEXPIRED:
4270 nfs4_handle_exception(server, err, &exception); 4192 nfs4_handle_exception(server, err, &exception);
4271 err = 0; 4193 err = 0;
4272 } 4194 }
@@ -4412,13 +4334,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4412 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4334 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
4413 err = 0; 4335 err = 0;
4414 goto out; 4336 goto out;
4337 case -EKEYEXPIRED:
4338 /*
4339 * User RPCSEC_GSS context has expired.
4340 * We cannot recover this stateid now, so
4341 * skip it and allow recovery thread to
4342 * proceed.
4343 */
4344 err = 0;
4345 goto out;
4415 case -ENOMEM: 4346 case -ENOMEM:
4416 case -NFS4ERR_DENIED: 4347 case -NFS4ERR_DENIED:
4417 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 4348 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4418 err = 0; 4349 err = 0;
4419 goto out; 4350 goto out;
4420 case -NFS4ERR_DELAY: 4351 case -NFS4ERR_DELAY:
4421 case -EKEYEXPIRED:
4422 break; 4352 break;
4423 } 4353 }
4424 err = nfs4_handle_exception(server, err, &exception); 4354 err = nfs4_handle_exception(server, err, &exception);
@@ -4647,7 +4577,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4647 switch (task->tk_status) { 4577 switch (task->tk_status) {
4648 case -NFS4ERR_DELAY: 4578 case -NFS4ERR_DELAY:
4649 case -NFS4ERR_GRACE: 4579 case -NFS4ERR_GRACE:
4650 case -EKEYEXPIRED:
4651 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4580 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4652 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4581 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4653 task->tk_status = 0; 4582 task->tk_status = 0;
@@ -4687,7 +4616,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4687 }; 4616 };
4688 int status; 4617 int status;
4689 4618
4690 res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4691 dprintk("--> %s\n", __func__); 4619 dprintk("--> %s\n", __func__);
4692 task = rpc_run_task(&task_setup); 4620 task = rpc_run_task(&task_setup);
4693 4621
@@ -4914,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4914 args->bc_attrs.max_reqs); 4842 args->bc_attrs.max_reqs);
4915} 4843}
4916 4844
4917static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) 4845static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4918{ 4846{
4919 if (rcvd <= sent) 4847 struct nfs4_channel_attrs *sent = &args->fc_attrs;
4920 return 0; 4848 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
4921 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " 4849
4922 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); 4850 if (rcvd->headerpadsz > sent->headerpadsz)
4923 return -EINVAL; 4851 return -EINVAL;
4852 if (rcvd->max_resp_sz > sent->max_resp_sz)
4853 return -EINVAL;
4854 /*
4855 * Our requested max_ops is the minimum we need; we're not
4856 * prepared to break up compounds into smaller pieces than that.
4857 * So, no point even trying to continue if the server won't
4858 * cooperate:
4859 */
4860 if (rcvd->max_ops < sent->max_ops)
4861 return -EINVAL;
4862 if (rcvd->max_reqs == 0)
4863 return -EINVAL;
4864 return 0;
4924} 4865}
4925 4866
4926#define _verify_fore_channel_attr(_name_) \ 4867static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4927 _verify_channel_attr("fore", #_name_, \ 4868{
4928 args->fc_attrs._name_, \ 4869 struct nfs4_channel_attrs *sent = &args->bc_attrs;
4929 session->fc_attrs._name_) 4870 struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
4930 4871
4931#define _verify_back_channel_attr(_name_) \ 4872 if (rcvd->max_rqst_sz > sent->max_rqst_sz)
4932 _verify_channel_attr("back", #_name_, \ 4873 return -EINVAL;
4933 args->bc_attrs._name_, \ 4874 if (rcvd->max_resp_sz < sent->max_resp_sz)
4934 session->bc_attrs._name_) 4875 return -EINVAL;
4876 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
4877 return -EINVAL;
4878 /* These would render the backchannel useless: */
4879 if (rcvd->max_ops == 0)
4880 return -EINVAL;
4881 if (rcvd->max_reqs == 0)
4882 return -EINVAL;
4883 return 0;
4884}
4935 4885
4936/*
4937 * The server is not allowed to increase the fore channel header pad size,
4938 * maximum response size, or maximum number of operations.
4939 *
4940 * The back channel attributes are only negotiatied down: We send what the
4941 * (back channel) server insists upon.
4942 */
4943static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, 4886static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4944 struct nfs4_session *session) 4887 struct nfs4_session *session)
4945{ 4888{
4946 int ret = 0; 4889 int ret;
4947
4948 ret |= _verify_fore_channel_attr(headerpadsz);
4949 ret |= _verify_fore_channel_attr(max_resp_sz);
4950 ret |= _verify_fore_channel_attr(max_ops);
4951
4952 ret |= _verify_back_channel_attr(headerpadsz);
4953 ret |= _verify_back_channel_attr(max_rqst_sz);
4954 ret |= _verify_back_channel_attr(max_resp_sz);
4955 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4956 ret |= _verify_back_channel_attr(max_ops);
4957 ret |= _verify_back_channel_attr(max_reqs);
4958 4890
4959 return ret; 4891 ret = nfs4_verify_fore_channel_attrs(args, session);
4892 if (ret)
4893 return ret;
4894 return nfs4_verify_back_channel_attrs(args, session);
4960} 4895}
4961 4896
4962static int _nfs4_proc_create_session(struct nfs_client *clp) 4897static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5111,7 +5046,6 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5111{ 5046{
5112 switch(task->tk_status) { 5047 switch(task->tk_status) {
5113 case -NFS4ERR_DELAY: 5048 case -NFS4ERR_DELAY:
5114 case -EKEYEXPIRED:
5115 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5049 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5116 return -EAGAIN; 5050 return -EAGAIN;
5117 default: 5051 default:
@@ -5180,12 +5114,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
5180 5114
5181 if (!atomic_inc_not_zero(&clp->cl_count)) 5115 if (!atomic_inc_not_zero(&clp->cl_count))
5182 return ERR_PTR(-EIO); 5116 return ERR_PTR(-EIO);
5183 calldata = kmalloc(sizeof(*calldata), GFP_NOFS); 5117 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
5184 if (calldata == NULL) { 5118 if (calldata == NULL) {
5185 nfs_put_client(clp); 5119 nfs_put_client(clp);
5186 return ERR_PTR(-ENOMEM); 5120 return ERR_PTR(-ENOMEM);
5187 } 5121 }
5188 calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5189 msg.rpc_argp = &calldata->args; 5122 msg.rpc_argp = &calldata->args;
5190 msg.rpc_resp = &calldata->res; 5123 msg.rpc_resp = &calldata->res;
5191 calldata->clp = clp; 5124 calldata->clp = clp;
@@ -5254,7 +5187,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5254 case -NFS4ERR_WRONG_CRED: /* What to do here? */ 5187 case -NFS4ERR_WRONG_CRED: /* What to do here? */
5255 break; 5188 break;
5256 case -NFS4ERR_DELAY: 5189 case -NFS4ERR_DELAY:
5257 case -EKEYEXPIRED:
5258 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5190 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5259 return -EAGAIN; 5191 return -EAGAIN;
5260 default: 5192 default:
@@ -5317,7 +5249,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5317 goto out; 5249 goto out;
5318 calldata->clp = clp; 5250 calldata->clp = clp;
5319 calldata->arg.one_fs = 0; 5251 calldata->arg.one_fs = 0;
5320 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5321 5252
5322 msg.rpc_argp = &calldata->arg; 5253 msg.rpc_argp = &calldata->arg;
5323 msg.rpc_resp = &calldata->res; 5254 msg.rpc_resp = &calldata->res;
@@ -5333,6 +5264,147 @@ out:
5333 dprintk("<-- %s status=%d\n", __func__, status); 5264 dprintk("<-- %s status=%d\n", __func__, status);
5334 return status; 5265 return status;
5335} 5266}
5267
5268static void
5269nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5270{
5271 struct nfs4_layoutget *lgp = calldata;
5272 struct inode *ino = lgp->args.inode;
5273 struct nfs_server *server = NFS_SERVER(ino);
5274
5275 dprintk("--> %s\n", __func__);
5276 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5277 &lgp->res.seq_res, 0, task))
5278 return;
5279 rpc_call_start(task);
5280}
5281
5282static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5283{
5284 struct nfs4_layoutget *lgp = calldata;
5285 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5286
5287 dprintk("--> %s\n", __func__);
5288
5289 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5290 return;
5291
5292 switch (task->tk_status) {
5293 case 0:
5294 break;
5295 case -NFS4ERR_LAYOUTTRYLATER:
5296 case -NFS4ERR_RECALLCONFLICT:
5297 task->tk_status = -NFS4ERR_DELAY;
5298 /* Fall through */
5299 default:
5300 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5301 rpc_restart_call_prepare(task);
5302 return;
5303 }
5304 }
5305 lgp->status = task->tk_status;
5306 dprintk("<-- %s\n", __func__);
5307}
5308
5309static void nfs4_layoutget_release(void *calldata)
5310{
5311 struct nfs4_layoutget *lgp = calldata;
5312
5313 dprintk("--> %s\n", __func__);
5314 put_layout_hdr(lgp->args.inode);
5315 if (lgp->res.layout.buf != NULL)
5316 free_page((unsigned long) lgp->res.layout.buf);
5317 put_nfs_open_context(lgp->args.ctx);
5318 kfree(calldata);
5319 dprintk("<-- %s\n", __func__);
5320}
5321
5322static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5323 .rpc_call_prepare = nfs4_layoutget_prepare,
5324 .rpc_call_done = nfs4_layoutget_done,
5325 .rpc_release = nfs4_layoutget_release,
5326};
5327
5328int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5329{
5330 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5331 struct rpc_task *task;
5332 struct rpc_message msg = {
5333 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5334 .rpc_argp = &lgp->args,
5335 .rpc_resp = &lgp->res,
5336 };
5337 struct rpc_task_setup task_setup_data = {
5338 .rpc_client = server->client,
5339 .rpc_message = &msg,
5340 .callback_ops = &nfs4_layoutget_call_ops,
5341 .callback_data = lgp,
5342 .flags = RPC_TASK_ASYNC,
5343 };
5344 int status = 0;
5345
5346 dprintk("--> %s\n", __func__);
5347
5348 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5349 if (lgp->res.layout.buf == NULL) {
5350 nfs4_layoutget_release(lgp);
5351 return -ENOMEM;
5352 }
5353
5354 lgp->res.seq_res.sr_slot = NULL;
5355 task = rpc_run_task(&task_setup_data);
5356 if (IS_ERR(task))
5357 return PTR_ERR(task);
5358 status = nfs4_wait_for_completion_rpc_task(task);
5359 if (status != 0)
5360 goto out;
5361 status = lgp->status;
5362 if (status != 0)
5363 goto out;
5364 status = pnfs_layout_process(lgp);
5365out:
5366 rpc_put_task(task);
5367 dprintk("<-- %s status=%d\n", __func__, status);
5368 return status;
5369}
5370
5371static int
5372_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5373{
5374 struct nfs4_getdeviceinfo_args args = {
5375 .pdev = pdev,
5376 };
5377 struct nfs4_getdeviceinfo_res res = {
5378 .pdev = pdev,
5379 };
5380 struct rpc_message msg = {
5381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5382 .rpc_argp = &args,
5383 .rpc_resp = &res,
5384 };
5385 int status;
5386
5387 dprintk("--> %s\n", __func__);
5388 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5389 dprintk("<-- %s status=%d\n", __func__, status);
5390
5391 return status;
5392}
5393
5394int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5395{
5396 struct nfs4_exception exception = { };
5397 int err;
5398
5399 do {
5400 err = nfs4_handle_exception(server,
5401 _nfs4_proc_getdeviceinfo(server, pdev),
5402 &exception);
5403 } while (exception.retry);
5404 return err;
5405}
5406EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5407
5336#endif /* CONFIG_NFS_V4_1 */ 5408#endif /* CONFIG_NFS_V4_1 */
5337 5409
5338struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5410struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5443,6 +5515,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5443 .unlink_setup = nfs4_proc_unlink_setup, 5515 .unlink_setup = nfs4_proc_unlink_setup,
5444 .unlink_done = nfs4_proc_unlink_done, 5516 .unlink_done = nfs4_proc_unlink_done,
5445 .rename = nfs4_proc_rename, 5517 .rename = nfs4_proc_rename,
5518 .rename_setup = nfs4_proc_rename_setup,
5519 .rename_done = nfs4_proc_rename_done,
5446 .link = nfs4_proc_link, 5520 .link = nfs4_proc_link,
5447 .symlink = nfs4_proc_symlink, 5521 .symlink = nfs4_proc_symlink,
5448 .mkdir = nfs4_proc_mkdir, 5522 .mkdir = nfs4_proc_mkdir,
@@ -5463,6 +5537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5463 .lock = nfs4_proc_lock, 5537 .lock = nfs4_proc_lock,
5464 .clear_acl_cache = nfs4_zap_acl_attr, 5538 .clear_acl_cache = nfs4_zap_acl_attr,
5465 .close_context = nfs4_close_context, 5539 .close_context = nfs4_close_context,
5540 .open_context = nfs4_atomic_open,
5466}; 5541};
5467 5542
5468/* 5543/*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 96524c5dca6b..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -46,6 +46,7 @@
46#include <linux/kthread.h> 46#include <linux/kthread.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h> 48#include <linux/random.h>
49#include <linux/ratelimit.h>
49#include <linux/workqueue.h> 50#include <linux/workqueue.h>
50#include <linux/bitops.h> 51#include <linux/bitops.h>
51 52
@@ -53,6 +54,7 @@
53#include "callback.h" 54#include "callback.h"
54#include "delegation.h" 55#include "delegation.h"
55#include "internal.h" 56#include "internal.h"
57#include "pnfs.h"
56 58
57#define OPENOWNER_POOL_SIZE 8 59#define OPENOWNER_POOL_SIZE 8
58 60
@@ -1063,6 +1065,14 @@ restart:
1063 /* Mark the file as being 'closed' */ 1065 /* Mark the file as being 'closed' */
1064 state->state = 0; 1066 state->state = 0;
1065 break; 1067 break;
1068 case -EKEYEXPIRED:
1069 /*
1070 * User RPCSEC_GSS context has expired.
1071 * We cannot recover this stateid now, so
1072 * skip it and allow recovery thread to
1073 * proceed.
1074 */
1075 break;
1066 case -NFS4ERR_ADMIN_REVOKED: 1076 case -NFS4ERR_ADMIN_REVOKED:
1067 case -NFS4ERR_STALE_STATEID: 1077 case -NFS4ERR_STALE_STATEID:
1068 case -NFS4ERR_BAD_STATEID: 1078 case -NFS4ERR_BAD_STATEID:
@@ -1138,16 +1148,14 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
1138 (void)ops->reclaim_complete(clp); 1148 (void)ops->reclaim_complete(clp);
1139} 1149}
1140 1150
1141static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) 1151static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1142{ 1152{
1143 struct nfs4_state_owner *sp; 1153 struct nfs4_state_owner *sp;
1144 struct rb_node *pos; 1154 struct rb_node *pos;
1145 struct nfs4_state *state; 1155 struct nfs4_state *state;
1146 1156
1147 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1157 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1148 return; 1158 return 0;
1149
1150 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1151 1159
1152 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1160 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1153 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1161 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
@@ -1161,6 +1169,14 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1161 } 1169 }
1162 1170
1163 nfs_delegation_reap_unclaimed(clp); 1171 nfs_delegation_reap_unclaimed(clp);
1172 return 1;
1173}
1174
1175static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1176{
1177 if (!nfs4_state_clear_reclaim_reboot(clp))
1178 return;
1179 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1164} 1180}
1165 1181
1166static void nfs_delegation_clear_all(struct nfs_client *clp) 1182static void nfs_delegation_clear_all(struct nfs_client *clp)
@@ -1175,6 +1191,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1175 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); 1191 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1176} 1192}
1177 1193
1194static void nfs4_warn_keyexpired(const char *s)
1195{
1196 printk_ratelimited(KERN_WARNING "Error: state manager"
1197 " encountered RPCSEC_GSS session"
1198 " expired against NFSv4 server %s.\n",
1199 s);
1200}
1201
1178static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1202static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1179{ 1203{
1180 switch (error) { 1204 switch (error) {
@@ -1187,7 +1211,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1187 case -NFS4ERR_STALE_CLIENTID: 1211 case -NFS4ERR_STALE_CLIENTID:
1188 case -NFS4ERR_LEASE_MOVED: 1212 case -NFS4ERR_LEASE_MOVED:
1189 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1213 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1190 nfs4_state_end_reclaim_reboot(clp); 1214 nfs4_state_clear_reclaim_reboot(clp);
1191 nfs4_state_start_reclaim_reboot(clp); 1215 nfs4_state_start_reclaim_reboot(clp);
1192 break; 1216 break;
1193 case -NFS4ERR_EXPIRED: 1217 case -NFS4ERR_EXPIRED:
@@ -1204,6 +1228,10 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1204 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1228 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1205 /* Zero session reset errors */ 1229 /* Zero session reset errors */
1206 return 0; 1230 return 0;
1231 case -EKEYEXPIRED:
1232 /* Nothing we can do */
1233 nfs4_warn_keyexpired(clp->cl_hostname);
1234 return 0;
1207 } 1235 }
1208 return error; 1236 return error;
1209} 1237}
@@ -1414,9 +1442,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1414 case -NFS4ERR_DELAY: 1442 case -NFS4ERR_DELAY:
1415 case -NFS4ERR_CLID_INUSE: 1443 case -NFS4ERR_CLID_INUSE:
1416 case -EAGAIN: 1444 case -EAGAIN:
1417 case -EKEYEXPIRED:
1418 break; 1445 break;
1419 1446
1447 case -EKEYEXPIRED:
1448 nfs4_warn_keyexpired(clp->cl_hostname);
1420 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1449 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1421 * in nfs4_exchange_id */ 1450 * in nfs4_exchange_id */
1422 default: 1451 default:
@@ -1447,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1447 } 1476 }
1448 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1477 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1449 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1478 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1479 pnfs_destroy_all_layouts(clp);
1450 } 1480 }
1451 1481
1452 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1482 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 08ef91291132..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
310 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
311#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
312#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
313#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
314#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
315#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
699#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
700 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
701 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
702 730
703const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
704 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -816,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
816 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
817 len += 4; 845 len += 4;
818 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
819 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); 847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
820 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
821 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
822 iap->ia_uid); 850 iap->ia_uid);
@@ -828,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
828 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
829 } 857 }
830 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
831 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); 859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
832 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
833 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
834 iap->ia_gid); 862 iap->ia_gid);
@@ -1385,24 +1413,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1385 1413
1386static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1414static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1387{ 1415{
1388 uint32_t attrs[2] = { 1416 uint32_t attrs[2] = {0, 0};
1389 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1417 uint32_t dircount = readdir->count >> 1;
1390 FATTR4_WORD1_MOUNTED_ON_FILEID,
1391 };
1392 __be32 *p; 1418 __be32 *p;
1393 1419
1420 if (readdir->plus) {
1421 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1422 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
1423 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1424 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1425 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1426 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1427 dircount >>= 1;
1428 }
1429 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
1430 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
1431 /* Switch to mounted_on_fileid if the server supports it */
1432 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1433 attrs[0] &= ~FATTR4_WORD0_FILEID;
1434 else
1435 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1436
1394 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1437 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1395 *p++ = cpu_to_be32(OP_READDIR); 1438 *p++ = cpu_to_be32(OP_READDIR);
1396 p = xdr_encode_hyper(p, readdir->cookie); 1439 p = xdr_encode_hyper(p, readdir->cookie);
1397 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); 1440 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1398 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ 1441 *p++ = cpu_to_be32(dircount);
1399 *p++ = cpu_to_be32(readdir->count); 1442 *p++ = cpu_to_be32(readdir->count);
1400 *p++ = cpu_to_be32(2); 1443 *p++ = cpu_to_be32(2);
1401 /* Switch to mounted_on_fileid if the server supports it */ 1444
1402 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1403 attrs[0] &= ~FATTR4_WORD0_FILEID;
1404 else
1405 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1406 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); 1445 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1407 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); 1446 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1408 hdr->nops++; 1447 hdr->nops++;
@@ -1726,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1726#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1727} 1766}
1728 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1729/* 1820/*
1730 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1731 */ 1822 */
@@ -1823,7 +1914,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1823/* 1914/*
1824 * Encode RENAME request 1915 * Encode RENAME request
1825 */ 1916 */
1826static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) 1917static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
1827{ 1918{
1828 struct xdr_stream xdr; 1919 struct xdr_stream xdr;
1829 struct compound_hdr hdr = { 1920 struct compound_hdr hdr = {
@@ -2543,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2543 return 0; 2634 return 0;
2544} 2635}
2545 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2546#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2547 2683
2548static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2676,7 +2812,10 @@ out_overflow:
2676static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2812static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
2677{ 2813{
2678 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { 2814 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
2679 decode_attr_bitmap(xdr, bitmask); 2815 int ret;
2816 ret = decode_attr_bitmap(xdr, bitmask);
2817 if (unlikely(ret < 0))
2818 return ret;
2680 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; 2819 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
2681 } else 2820 } else
2682 bitmask[0] = bitmask[1] = 0; 2821 bitmask[0] = bitmask[1] = 0;
@@ -2848,6 +2987,56 @@ out_overflow:
2848 return -EIO; 2987 return -EIO;
2849} 2988}
2850 2989
2990static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2991{
2992 __be32 *p;
2993
2994 if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U)))
2995 return -EIO;
2996 if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
2997 p = xdr_inline_decode(xdr, 4);
2998 if (unlikely(!p))
2999 goto out_overflow;
3000 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3001 }
3002 return 0;
3003out_overflow:
3004 print_overflow_msg(__func__, xdr);
3005 return -EIO;
3006}
3007
3008static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
3009{
3010 __be32 *p;
3011 int len;
3012
3013 if (fh != NULL)
3014 memset(fh, 0, sizeof(*fh));
3015
3016 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U)))
3017 return -EIO;
3018 if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
3019 p = xdr_inline_decode(xdr, 4);
3020 if (unlikely(!p))
3021 goto out_overflow;
3022 len = be32_to_cpup(p);
3023 if (len > NFS4_FHSIZE)
3024 return -EIO;
3025 p = xdr_inline_decode(xdr, len);
3026 if (unlikely(!p))
3027 goto out_overflow;
3028 if (fh != NULL) {
3029 memcpy(fh->data, p, len);
3030 fh->size = len;
3031 }
3032 bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
3033 }
3034 return 0;
3035out_overflow:
3036 print_overflow_msg(__func__, xdr);
3037 return -EIO;
3038}
3039
2851static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3040static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2852{ 3041{
2853 __be32 *p; 3042 __be32 *p;
@@ -3521,6 +3710,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
3521 return status; 3710 return status;
3522} 3711}
3523 3712
3713static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
3714 struct timespec *time)
3715{
3716 int status = 0;
3717
3718 time->tv_sec = 0;
3719 time->tv_nsec = 0;
3720 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U)))
3721 return -EIO;
3722 if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) {
3723 status = decode_attr_time(xdr, time);
3724 bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA;
3725 }
3726 dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec,
3727 (long)time->tv_nsec);
3728 return status;
3729}
3730
3524static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3731static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
3525{ 3732{
3526 int status = 0; 3733 int status = 0;
@@ -3744,29 +3951,14 @@ xdr_error:
3744 return status; 3951 return status;
3745} 3952}
3746 3953
3747static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 3954static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3955 struct nfs_fattr *fattr, struct nfs_fh *fh,
3748 const struct nfs_server *server, int may_sleep) 3956 const struct nfs_server *server, int may_sleep)
3749{ 3957{
3750 __be32 *savep;
3751 uint32_t attrlen,
3752 bitmap[2] = {0},
3753 type;
3754 int status; 3958 int status;
3755 umode_t fmode = 0; 3959 umode_t fmode = 0;
3756 uint64_t fileid; 3960 uint64_t fileid;
3757 3961 uint32_t type;
3758 status = decode_op_hdr(xdr, OP_GETATTR);
3759 if (status < 0)
3760 goto xdr_error;
3761
3762 status = decode_attr_bitmap(xdr, bitmap);
3763 if (status < 0)
3764 goto xdr_error;
3765
3766 status = decode_attr_length(xdr, &attrlen, &savep);
3767 if (status < 0)
3768 goto xdr_error;
3769
3770 3962
3771 status = decode_attr_type(xdr, bitmap, &type); 3963 status = decode_attr_type(xdr, bitmap, &type);
3772 if (status < 0) 3964 if (status < 0)
@@ -3792,6 +3984,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3792 goto xdr_error; 3984 goto xdr_error;
3793 fattr->valid |= status; 3985 fattr->valid |= status;
3794 3986
3987 status = decode_attr_error(xdr, bitmap);
3988 if (status < 0)
3989 goto xdr_error;
3990
3991 status = decode_attr_filehandle(xdr, bitmap, fh);
3992 if (status < 0)
3993 goto xdr_error;
3994
3795 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); 3995 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
3796 if (status < 0) 3996 if (status < 0)
3797 goto xdr_error; 3997 goto xdr_error;
@@ -3862,12 +4062,101 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3862 fattr->valid |= status; 4062 fattr->valid |= status;
3863 } 4063 }
3864 4064
4065xdr_error:
4066 dprintk("%s: xdr returned %d\n", __func__, -status);
4067 return status;
4068}
4069
4070static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4071 struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
4072{
4073 __be32 *savep;
4074 uint32_t attrlen,
4075 bitmap[2] = {0};
4076 int status;
4077
4078 status = decode_op_hdr(xdr, OP_GETATTR);
4079 if (status < 0)
4080 goto xdr_error;
4081
4082 status = decode_attr_bitmap(xdr, bitmap);
4083 if (status < 0)
4084 goto xdr_error;
4085
4086 status = decode_attr_length(xdr, &attrlen, &savep);
4087 if (status < 0)
4088 goto xdr_error;
4089
4090 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
4091 if (status < 0)
4092 goto xdr_error;
4093
3865 status = verify_attr_len(xdr, savep, attrlen); 4094 status = verify_attr_len(xdr, savep, attrlen);
3866xdr_error: 4095xdr_error:
3867 dprintk("%s: xdr returned %d\n", __func__, -status); 4096 dprintk("%s: xdr returned %d\n", __func__, -status);
3868 return status; 4097 return status;
3869} 4098}
3870 4099
4100static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4101 const struct nfs_server *server, int may_sleep)
4102{
4103 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
4104}
4105
4106/*
4107 * Decode potentially multiple layout types. Currently we only support
4108 * one layout driver per file system.
4109 */
4110static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4111 uint32_t *layouttype)
4112{
4113 uint32_t *p;
4114 int num;
4115
4116 p = xdr_inline_decode(xdr, 4);
4117 if (unlikely(!p))
4118 goto out_overflow;
4119 num = be32_to_cpup(p);
4120
4121 /* pNFS is not supported by the underlying file system */
4122 if (num == 0) {
4123 *layouttype = 0;
4124 return 0;
4125 }
4126 if (num > 1)
4127 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
4128 "per filesystem not supported\n", __func__);
4129
4130 /* Decode and set first layout type, move xdr->p past unused types */
4131 p = xdr_inline_decode(xdr, num * 4);
4132 if (unlikely(!p))
4133 goto out_overflow;
4134 *layouttype = be32_to_cpup(p);
4135 return 0;
4136out_overflow:
4137 print_overflow_msg(__func__, xdr);
4138 return -EIO;
4139}
4140
4141/*
4142 * The type of file system exported.
4143 * Note we must ensure that layouttype is set in any non-error case.
4144 */
4145static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
4146 uint32_t *layouttype)
4147{
4148 int status = 0;
4149
4150 dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
4151 if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
4152 return -EIO;
4153 if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
4154 status = decode_first_pnfs_layout_type(xdr, layouttype);
4155 bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
4156 } else
4157 *layouttype = 0;
4158 return status;
4159}
3871 4160
3872static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) 4161static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3873{ 4162{
@@ -3894,6 +4183,12 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3894 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) 4183 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
3895 goto xdr_error; 4184 goto xdr_error;
3896 fsinfo->wtpref = fsinfo->wtmax; 4185 fsinfo->wtpref = fsinfo->wtmax;
4186 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
4187 if (status != 0)
4188 goto xdr_error;
4189 status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
4190 if (status != 0)
4191 goto xdr_error;
3897 4192
3898 status = verify_attr_len(xdr, savep, attrlen); 4193 status = verify_attr_len(xdr, savep, attrlen);
3899xdr_error: 4194xdr_error:
@@ -3950,13 +4245,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3950 __be32 *p; 4245 __be32 *p;
3951 uint32_t namelen, type; 4246 uint32_t namelen, type;
3952 4247
3953 p = xdr_inline_decode(xdr, 32); 4248 p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
3954 if (unlikely(!p)) 4249 if (unlikely(!p))
3955 goto out_overflow; 4250 goto out_overflow;
3956 p = xdr_decode_hyper(p, &offset); 4251 p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
3957 p = xdr_decode_hyper(p, &length); 4252 p = xdr_decode_hyper(p, &length);
3958 type = be32_to_cpup(p++); 4253 type = be32_to_cpup(p++); /* 4 byte read */
3959 if (fl != NULL) { 4254 if (fl != NULL) { /* manipulate file lock */
3960 fl->fl_start = (loff_t)offset; 4255 fl->fl_start = (loff_t)offset;
3961 fl->fl_end = fl->fl_start + (loff_t)length - 1; 4256 fl->fl_end = fl->fl_start + (loff_t)length - 1;
3962 if (length == ~(uint64_t)0) 4257 if (length == ~(uint64_t)0)
@@ -3966,9 +4261,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3966 fl->fl_type = F_RDLCK; 4261 fl->fl_type = F_RDLCK;
3967 fl->fl_pid = 0; 4262 fl->fl_pid = 0;
3968 } 4263 }
3969 p = xdr_decode_hyper(p, &clientid); 4264 p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
3970 namelen = be32_to_cpup(p); 4265 namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
3971 p = xdr_inline_decode(xdr, namelen); 4266 p = xdr_inline_decode(xdr, namelen); /* variable size field */
3972 if (likely(p)) 4267 if (likely(p))
3973 return -NFS4ERR_DENIED; 4268 return -NFS4ERR_DENIED;
3974out_overflow: 4269out_overflow:
@@ -4200,12 +4495,9 @@ out_overflow:
4200static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4495static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
4201{ 4496{
4202 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 4497 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
4203 struct page *page = *rcvbuf->pages;
4204 struct kvec *iov = rcvbuf->head; 4498 struct kvec *iov = rcvbuf->head;
4205 size_t hdrlen; 4499 size_t hdrlen;
4206 u32 recvd, pglen = rcvbuf->page_len; 4500 u32 recvd, pglen = rcvbuf->page_len;
4207 __be32 *end, *entry, *p, *kaddr;
4208 unsigned int nr = 0;
4209 int status; 4501 int status;
4210 4502
4211 status = decode_op_hdr(xdr, OP_READDIR); 4503 status = decode_op_hdr(xdr, OP_READDIR);
@@ -4225,71 +4517,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4225 pglen = recvd; 4517 pglen = recvd;
4226 xdr_read_pages(xdr, pglen); 4518 xdr_read_pages(xdr, pglen);
4227 4519
4228 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); 4520
4229 kaddr = p = kmap_atomic(page, KM_USER0);
4230 end = p + ((pglen + readdir->pgbase) >> 2);
4231 entry = p;
4232
4233 /* Make sure the packet actually has a value_follows and EOF entry */
4234 if ((entry + 1) > end)
4235 goto short_pkt;
4236
4237 for (; *p++; nr++) {
4238 u32 len, attrlen, xlen;
4239 if (end - p < 3)
4240 goto short_pkt;
4241 dprintk("cookie = %Lu, ", *((unsigned long long *)p));
4242 p += 2; /* cookie */
4243 len = ntohl(*p++); /* filename length */
4244 if (len > NFS4_MAXNAMLEN) {
4245 dprintk("NFS: giant filename in readdir (len 0x%x)\n",
4246 len);
4247 goto err_unmap;
4248 }
4249 xlen = XDR_QUADLEN(len);
4250 if (end - p < xlen + 1)
4251 goto short_pkt;
4252 dprintk("filename = %*s\n", len, (char *)p);
4253 p += xlen;
4254 len = ntohl(*p++); /* bitmap length */
4255 if (end - p < len + 1)
4256 goto short_pkt;
4257 p += len;
4258 attrlen = XDR_QUADLEN(ntohl(*p++));
4259 if (end - p < attrlen + 2)
4260 goto short_pkt;
4261 p += attrlen; /* attributes */
4262 entry = p;
4263 }
4264 /*
4265 * Apparently some server sends responses that are a valid size, but
4266 * contain no entries, and have value_follows==0 and EOF==0. For
4267 * those, just set the EOF marker.
4268 */
4269 if (!nr && entry[1] == 0) {
4270 dprintk("NFS: readdir reply truncated!\n");
4271 entry[1] = 1;
4272 }
4273out:
4274 kunmap_atomic(kaddr, KM_USER0);
4275 return 0; 4521 return 0;
4276short_pkt:
4277 /*
4278 * When we get a short packet there are 2 possibilities. We can
4279 * return an error, or fix up the response to look like a valid
4280 * response and return what we have so far. If there are no
4281 * entries and the packet was short, then return -EIO. If there
4282 * are valid entries in the response, return them and pretend that
4283 * the call was successful, but incomplete. The caller can retry the
4284 * readdir starting at the last cookie.
4285 */
4286 dprintk("%s: short packet at entry %d\n", __func__, nr);
4287 entry[0] = entry[1] = 0;
4288 if (nr)
4289 goto out;
4290err_unmap:
4291 kunmap_atomic(kaddr, KM_USER0);
4292 return -errno_NFSERR_IO;
4293} 4522}
4294 4523
4295static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) 4524static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -4299,7 +4528,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4299 size_t hdrlen; 4528 size_t hdrlen;
4300 u32 len, recvd; 4529 u32 len, recvd;
4301 __be32 *p; 4530 __be32 *p;
4302 char *kaddr;
4303 int status; 4531 int status;
4304 4532
4305 status = decode_op_hdr(xdr, OP_READLINK); 4533 status = decode_op_hdr(xdr, OP_READLINK);
@@ -4330,9 +4558,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4330 * and and null-terminate the text (the VFS expects 4558 * and and null-terminate the text (the VFS expects
4331 * null-termination). 4559 * null-termination).
4332 */ 4560 */
4333 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); 4561 xdr_terminate_string(rcvbuf, len);
4334 kaddr[len+rcvbuf->page_base] = '\0';
4335 kunmap_atomic(kaddr, KM_USER0);
4336 return 0; 4562 return 0;
4337out_overflow: 4563out_overflow:
4338 print_overflow_msg(__func__, xdr); 4564 print_overflow_msg(__func__, xdr);
@@ -4668,7 +4894,6 @@ static int decode_sequence(struct xdr_stream *xdr,
4668 struct rpc_rqst *rqstp) 4894 struct rpc_rqst *rqstp)
4669{ 4895{
4670#if defined(CONFIG_NFS_V4_1) 4896#if defined(CONFIG_NFS_V4_1)
4671 struct nfs4_slot *slot;
4672 struct nfs4_sessionid id; 4897 struct nfs4_sessionid id;
4673 u32 dummy; 4898 u32 dummy;
4674 int status; 4899 int status;
@@ -4700,15 +4925,14 @@ static int decode_sequence(struct xdr_stream *xdr,
4700 goto out_overflow; 4925 goto out_overflow;
4701 4926
4702 /* seqid */ 4927 /* seqid */
4703 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4704 dummy = be32_to_cpup(p++); 4928 dummy = be32_to_cpup(p++);
4705 if (dummy != slot->seq_nr) { 4929 if (dummy != res->sr_slot->seq_nr) {
4706 dprintk("%s Invalid sequence number\n", __func__); 4930 dprintk("%s Invalid sequence number\n", __func__);
4707 goto out_err; 4931 goto out_err;
4708 } 4932 }
4709 /* slot id */ 4933 /* slot id */
4710 dummy = be32_to_cpup(p++); 4934 dummy = be32_to_cpup(p++);
4711 if (dummy != res->sr_slotid) { 4935 if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
4712 dprintk("%s Invalid slot id\n", __func__); 4936 dprintk("%s Invalid slot id\n", __func__);
4713 goto out_err; 4937 goto out_err;
4714 } 4938 }
@@ -4731,6 +4955,134 @@ out_overflow:
4731#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4732} 4956}
4733 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4734/* 5086/*
4735 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4736 */ 5088 */
@@ -4873,7 +5225,7 @@ out:
4873/* 5225/*
4874 * Decode RENAME response 5226 * Decode RENAME response
4875 */ 5227 */
4876static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) 5228static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
4877{ 5229{
4878 struct xdr_stream xdr; 5230 struct xdr_stream xdr;
4879 struct compound_hdr hdr; 5231 struct compound_hdr hdr;
@@ -5758,25 +6110,84 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5758 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5759 return status; 6111 return status;
5760} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5761#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5762 6161
5763__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6163 struct nfs_server *server, int plus)
5764{ 6164{
5765 uint32_t bitmap[2] = {0}; 6165 uint32_t bitmap[2] = {0};
5766 uint32_t len; 6166 uint32_t len;
5767 6167 __be32 *p = xdr_inline_decode(xdr, 4);
5768 if (!*p++) { 6168 if (unlikely(!p))
5769 if (!*p) 6169 goto out_overflow;
6170 if (!ntohl(*p++)) {
6171 p = xdr_inline_decode(xdr, 4);
6172 if (unlikely(!p))
6173 goto out_overflow;
6174 if (!ntohl(*p++))
5770 return ERR_PTR(-EAGAIN); 6175 return ERR_PTR(-EAGAIN);
5771 entry->eof = 1; 6176 entry->eof = 1;
5772 return ERR_PTR(-EBADCOOKIE); 6177 return ERR_PTR(-EBADCOOKIE);
5773 } 6178 }
5774 6179
6180 p = xdr_inline_decode(xdr, 12);
6181 if (unlikely(!p))
6182 goto out_overflow;
5775 entry->prev_cookie = entry->cookie; 6183 entry->prev_cookie = entry->cookie;
5776 p = xdr_decode_hyper(p, &entry->cookie); 6184 p = xdr_decode_hyper(p, &entry->cookie);
5777 entry->len = ntohl(*p++); 6185 entry->len = ntohl(*p++);
6186
6187 p = xdr_inline_decode(xdr, entry->len);
6188 if (unlikely(!p))
6189 goto out_overflow;
5778 entry->name = (const char *) p; 6190 entry->name = (const char *) p;
5779 p += XDR_QUADLEN(entry->len);
5780 6191
5781 /* 6192 /*
5782 * In case the server doesn't return an inode number, 6193 * In case the server doesn't return an inode number,
@@ -5784,32 +6195,33 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
5784 * since glibc seems to choke on it...) 6195 * since glibc seems to choke on it...)
5785 */ 6196 */
5786 entry->ino = 1; 6197 entry->ino = 1;
6198 entry->fattr->valid = 0;
5787 6199
5788 len = ntohl(*p++); /* bitmap length */ 6200 if (decode_attr_bitmap(xdr, bitmap) < 0)
5789 if (len-- > 0) { 6201 goto out_overflow;
5790 bitmap[0] = ntohl(*p++); 6202
5791 if (len-- > 0) { 6203 if (decode_attr_length(xdr, &len, &p) < 0)
5792 bitmap[1] = ntohl(*p++); 6204 goto out_overflow;
5793 p += len; 6205
5794 } 6206 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
5795 } 6207 goto out_overflow;
5796 len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ 6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
5797 if (len > 0) { 6209 entry->ino = entry->fattr->fileid;
5798 if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { 6210
5799 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 6211 if (verify_attr_len(xdr, p, len) < 0)
5800 /* Ignore the return value of rdattr_error for now */ 6212 goto out_overflow;
5801 p++; 6213
5802 len--; 6214 p = xdr_inline_peek(xdr, 8);
5803 } 6215 if (p != NULL)
5804 if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) 6216 entry->eof = !p[0] && p[1];
5805 xdr_decode_hyper(p, &entry->ino); 6217 else
5806 else if (bitmap[0] == FATTR4_WORD0_FILEID) 6218 entry->eof = 0;
5807 xdr_decode_hyper(p, &entry->ino);
5808 p += len;
5809 }
5810 6219
5811 entry->eof = !p[0] && p[1];
5812 return p; 6220 return p;
6221
6222out_overflow:
6223 print_overflow_msg(__func__, xdr);
6224 return ERR_PTR(-EIO);
5813} 6225}
5814 6226
5815/* 6227/*
@@ -5936,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
5936 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
5937 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5938 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
5939#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
5940}; 6354};
5941 6355
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index df101d9f546a..903908a20023 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -3,9 +3,10 @@
3 * 3 *
4 * Allow an NFS filesystem to be mounted as root. The way this works is: 4 * Allow an NFS filesystem to be mounted as root. The way this works is:
5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. 5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
6 * (2) Handle RPC negotiation with the system which replied to RARP or 6 * (2) Construct the device string and the options string using DHCP
7 * was reported as a boot server by BOOTP or manually. 7 * option 17 and/or kernel command line options.
8 * (3) The actual mounting is done later, when init() is running. 8 * (3) When mount_root() sets up the root file system, pass these strings
9 * to the NFS client's regular mount interface via sys_mount().
9 * 10 *
10 * 11 *
11 * Changes: 12 * Changes:
@@ -65,470 +66,245 @@
65 * Hua Qin : Support for mounting root file system via 66 * Hua Qin : Support for mounting root file system via
66 * NFS over TCP. 67 * NFS over TCP.
67 * Fabian Frederick: Option parser rebuilt (using parser lib) 68 * Fabian Frederick: Option parser rebuilt (using parser lib)
68*/ 69 * Chuck Lever : Use super.c's text-based mount option parsing
70 * Chuck Lever : Add "nfsrootdebug".
71 */
69 72
70#include <linux/types.h> 73#include <linux/types.h>
71#include <linux/string.h> 74#include <linux/string.h>
72#include <linux/kernel.h>
73#include <linux/time.h>
74#include <linux/fs.h>
75#include <linux/init.h> 75#include <linux/init.h>
76#include <linux/sunrpc/clnt.h>
77#include <linux/sunrpc/xprtsock.h>
78#include <linux/nfs.h> 76#include <linux/nfs.h>
79#include <linux/nfs_fs.h> 77#include <linux/nfs_fs.h>
80#include <linux/nfs_mount.h>
81#include <linux/in.h>
82#include <linux/major.h>
83#include <linux/utsname.h> 78#include <linux/utsname.h>
84#include <linux/inet.h>
85#include <linux/root_dev.h> 79#include <linux/root_dev.h>
86#include <net/ipconfig.h> 80#include <net/ipconfig.h>
87#include <linux/parser.h>
88 81
89#include "internal.h" 82#include "internal.h"
90 83
91/* Define this to allow debugging output */
92#undef NFSROOT_DEBUG
93#define NFSDBG_FACILITY NFSDBG_ROOT 84#define NFSDBG_FACILITY NFSDBG_ROOT
94 85
95/* Default port to use if server is not running a portmapper */
96#define NFS_MNT_PORT 627
97
98/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
99#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
100 88
101/* Parameters passed from the kernel command line */ 89/* Parameters passed from the kernel command line */
102static char nfs_root_name[256] __initdata = ""; 90static char nfs_root_parms[256] __initdata = "";
91
92/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = "";
103 94
104/* Address of NFS server */ 95/* Address of NFS server */
105static __be32 servaddr __initdata = 0; 96static __be32 servaddr __initdata = htonl(INADDR_NONE);
106 97
107/* Name of directory to mount */ 98/* Name of directory to mount */
108static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; 99static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
109
110/* NFS-related data */
111static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
112static int nfs_port __initdata = 0; /* Port to connect to for NFS */
113static int mount_port __initdata = 0; /* Mount daemon port number */
114
115
116/***************************************************************************
117
118 Parsing of options
119
120 ***************************************************************************/
121
122enum {
123 /* Options that take integer arguments */
124 Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
125 Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
126 /* Options that take no arguments */
127 Opt_soft, Opt_hard, Opt_intr,
128 Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
129 Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
130 Opt_acl, Opt_noacl,
131 /* Error token */
132 Opt_err
133};
134
135static const match_table_t tokens __initconst = {
136 {Opt_port, "port=%u"},
137 {Opt_rsize, "rsize=%u"},
138 {Opt_wsize, "wsize=%u"},
139 {Opt_timeo, "timeo=%u"},
140 {Opt_retrans, "retrans=%u"},
141 {Opt_acregmin, "acregmin=%u"},
142 {Opt_acregmax, "acregmax=%u"},
143 {Opt_acdirmin, "acdirmin=%u"},
144 {Opt_acdirmax, "acdirmax=%u"},
145 {Opt_soft, "soft"},
146 {Opt_hard, "hard"},
147 {Opt_intr, "intr"},
148 {Opt_nointr, "nointr"},
149 {Opt_posix, "posix"},
150 {Opt_noposix, "noposix"},
151 {Opt_cto, "cto"},
152 {Opt_nocto, "nocto"},
153 {Opt_ac, "ac"},
154 {Opt_noac, "noac"},
155 {Opt_lock, "lock"},
156 {Opt_nolock, "nolock"},
157 {Opt_v2, "nfsvers=2"},
158 {Opt_v2, "v2"},
159 {Opt_v3, "nfsvers=3"},
160 {Opt_v3, "v3"},
161 {Opt_udp, "proto=udp"},
162 {Opt_udp, "udp"},
163 {Opt_tcp, "proto=tcp"},
164 {Opt_tcp, "tcp"},
165 {Opt_acl, "acl"},
166 {Opt_noacl, "noacl"},
167 {Opt_err, NULL}
168
169};
170 100
101/* server:export path string passed to super.c */
102static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
103
104#ifdef RPC_DEBUG
171/* 105/*
172 * Parse option string. 106 * When the "nfsrootdebug" kernel command line option is specified,
107 * enable debugging messages for NFSROOT.
173 */ 108 */
174 109static int __init nfs_root_debug(char *__unused)
175static int __init root_nfs_parse(char *name, char *buf)
176{ 110{
177 111 nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
178 char *p;
179 substring_t args[MAX_OPT_ARGS];
180 int option;
181
182 if (!name)
183 return 1;
184
185 /* Set the NFS remote path */
186 p = strsep(&name, ",");
187 if (p[0] != '\0' && strcmp(p, "default") != 0)
188 strlcpy(buf, p, NFS_MAXPATHLEN);
189
190 while ((p = strsep (&name, ",")) != NULL) {
191 int token;
192 if (!*p)
193 continue;
194 token = match_token(p, tokens, args);
195
196 /* %u tokens only. Beware if you add new tokens! */
197 if (token < Opt_soft && match_int(&args[0], &option))
198 return 0;
199 switch (token) {
200 case Opt_port:
201 nfs_port = option;
202 break;
203 case Opt_rsize:
204 nfs_data.rsize = option;
205 break;
206 case Opt_wsize:
207 nfs_data.wsize = option;
208 break;
209 case Opt_timeo:
210 nfs_data.timeo = option;
211 break;
212 case Opt_retrans:
213 nfs_data.retrans = option;
214 break;
215 case Opt_acregmin:
216 nfs_data.acregmin = option;
217 break;
218 case Opt_acregmax:
219 nfs_data.acregmax = option;
220 break;
221 case Opt_acdirmin:
222 nfs_data.acdirmin = option;
223 break;
224 case Opt_acdirmax:
225 nfs_data.acdirmax = option;
226 break;
227 case Opt_soft:
228 nfs_data.flags |= NFS_MOUNT_SOFT;
229 break;
230 case Opt_hard:
231 nfs_data.flags &= ~NFS_MOUNT_SOFT;
232 break;
233 case Opt_intr:
234 case Opt_nointr:
235 break;
236 case Opt_posix:
237 nfs_data.flags |= NFS_MOUNT_POSIX;
238 break;
239 case Opt_noposix:
240 nfs_data.flags &= ~NFS_MOUNT_POSIX;
241 break;
242 case Opt_cto:
243 nfs_data.flags &= ~NFS_MOUNT_NOCTO;
244 break;
245 case Opt_nocto:
246 nfs_data.flags |= NFS_MOUNT_NOCTO;
247 break;
248 case Opt_ac:
249 nfs_data.flags &= ~NFS_MOUNT_NOAC;
250 break;
251 case Opt_noac:
252 nfs_data.flags |= NFS_MOUNT_NOAC;
253 break;
254 case Opt_lock:
255 nfs_data.flags &= ~NFS_MOUNT_NONLM;
256 break;
257 case Opt_nolock:
258 nfs_data.flags |= NFS_MOUNT_NONLM;
259 break;
260 case Opt_v2:
261 nfs_data.flags &= ~NFS_MOUNT_VER3;
262 break;
263 case Opt_v3:
264 nfs_data.flags |= NFS_MOUNT_VER3;
265 break;
266 case Opt_udp:
267 nfs_data.flags &= ~NFS_MOUNT_TCP;
268 break;
269 case Opt_tcp:
270 nfs_data.flags |= NFS_MOUNT_TCP;
271 break;
272 case Opt_acl:
273 nfs_data.flags &= ~NFS_MOUNT_NOACL;
274 break;
275 case Opt_noacl:
276 nfs_data.flags |= NFS_MOUNT_NOACL;
277 break;
278 default:
279 printk(KERN_WARNING "Root-NFS: unknown "
280 "option: %s\n", p);
281 return 0;
282 }
283 }
284
285 return 1; 112 return 1;
286} 113}
287 114
115__setup("nfsrootdebug", nfs_root_debug);
116#endif
117
288/* 118/*
289 * Prepare the NFS data structure and parse all options. 119 * Parse NFS server and directory information passed on the kernel
120 * command line.
121 *
122 * nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
123 *
124 * If there is a "%s" token in the <root-dir> string, it is replaced
125 * by the ASCII-representation of the client's IP address.
290 */ 126 */
291static int __init root_nfs_name(char *name) 127static int __init nfs_root_setup(char *line)
292{ 128{
293 static char buf[NFS_MAXPATHLEN] __initdata; 129 ROOT_DEV = Root_NFS;
294 char *cp; 130
295 131 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
296 /* Set some default values */ 132 strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
297 memset(&nfs_data, 0, sizeof(nfs_data)); 133 } else {
298 nfs_port = -1; 134 size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
299 nfs_data.version = NFS_MOUNT_VERSION; 135 if (n >= sizeof(nfs_root_parms))
300 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ 136 line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0';
301 nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; 137 sprintf(nfs_root_parms, NFS_ROOT, line);
302 nfs_data.wsize = NFS_DEF_FILE_IO_SIZE;
303 nfs_data.acregmin = NFS_DEF_ACREGMIN;
304 nfs_data.acregmax = NFS_DEF_ACREGMAX;
305 nfs_data.acdirmin = NFS_DEF_ACDIRMIN;
306 nfs_data.acdirmax = NFS_DEF_ACDIRMAX;
307 strcpy(buf, NFS_ROOT);
308
309 /* Process options received from the remote server */
310 root_nfs_parse(root_server_path, buf);
311
312 /* Override them by options set on kernel command-line */
313 root_nfs_parse(name, buf);
314
315 cp = utsname()->nodename;
316 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
317 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
318 return -1;
319 } 138 }
320 sprintf(nfs_export_path, buf, cp); 139
140 /*
141 * Extract the IP address of the NFS server containing our
142 * root file system, if one was specified.
143 *
144 * Note: root_nfs_parse_addr() removes the server-ip from
145 * nfs_root_parms, if it exists.
146 */
147 root_server_addr = root_nfs_parse_addr(nfs_root_parms);
321 148
322 return 1; 149 return 1;
323} 150}
324 151
152__setup("nfsroot=", nfs_root_setup);
325 153
326/* 154static int __init root_nfs_copy(char *dest, const char *src,
327 * Get NFS server address. 155 const size_t destlen)
328 */
329static int __init root_nfs_addr(void)
330{ 156{
331 if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { 157 if (strlcpy(dest, src, destlen) > destlen)
332 printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
333 return -1; 158 return -1;
334 } 159 return 0;
160}
335 161
336 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), 162static int __init root_nfs_cat(char *dest, const char *src,
337 "%pI4", &servaddr); 163 const size_t destlen)
164{
165 if (strlcat(dest, src, destlen) > destlen)
166 return -1;
338 return 0; 167 return 0;
339} 168}
340 169
341/* 170/*
342 * Tell the user what's going on. 171 * Parse out root export path and mount options from
172 * passed-in string @incoming.
173 *
174 * Copy the export path into @exppath.
343 */ 175 */
344#ifdef NFSROOT_DEBUG 176static int __init root_nfs_parse_options(char *incoming, char *exppath,
345static void __init root_nfs_print(void) 177 const size_t exppathlen)
346{ 178{
347 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", 179 char *p;
348 nfs_export_path, nfs_data.hostname);
349 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
350 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
351 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
352 nfs_data.acregmin, nfs_data.acregmax,
353 nfs_data.acdirmin, nfs_data.acdirmax);
354 printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
355 nfs_port, mount_port, nfs_data.flags);
356}
357#endif
358
359 180
360static int __init root_nfs_init(void) 181 /*
361{ 182 * Set the NFS remote path
362#ifdef NFSROOT_DEBUG 183 */
363 nfs_debug |= NFSDBG_ROOT; 184 p = strsep(&incoming, ",");
364#endif 185 if (*p != '\0' && strcmp(p, "default") != 0)
186 if (root_nfs_copy(exppath, p, exppathlen))
187 return -1;
365 188
366 /* 189 /*
367 * Decode the root directory path name and NFS options from 190 * @incoming now points to the rest of the string; if it
368 * the kernel command line. This has to go here in order to 191 * contains something, append it to our root options buffer
369 * be able to use the client IP address for the remote root
370 * directory (necessary for pure RARP booting).
371 */ 192 */
372 if (root_nfs_name(nfs_root_name) < 0 || 193 if (incoming != NULL && *incoming != '\0')
373 root_nfs_addr() < 0) 194 if (root_nfs_cat(nfs_root_options, incoming,
374 return -1; 195 sizeof(nfs_root_options)))
196 return -1;
375 197
376#ifdef NFSROOT_DEBUG 198 /*
377 root_nfs_print(); 199 * Possibly prepare for more options to be appended
378#endif 200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
379 206
380 return 0; 207 return 0;
381} 208}
382 209
383
384/* 210/*
385 * Parse NFS server and directory information passed on the kernel 211 * Decode the export directory path name and NFS options from
386 * command line. 212 * the kernel command line. This has to be done late in order to
213 * use a dynamically acquired client IP address for the remote
214 * root directory path.
215 *
216 * Returns zero if successful; otherwise -1 is returned.
387 */ 217 */
388static int __init nfs_root_setup(char *line) 218static int __init root_nfs_data(char *cmdline)
389{ 219{
390 ROOT_DEV = Root_NFS; 220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
391 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { 221 int len, retval = -1;
392 strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); 222 char *tmp = NULL;
393 } else { 223 const size_t tmplen = sizeof(nfs_export_path);
394 int n = strlen(line) + sizeof(NFS_ROOT) - 1; 224
395 if (n >= sizeof(nfs_root_name)) 225 tmp = kzalloc(tmplen, GFP_KERNEL);
396 line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; 226 if (tmp == NULL)
397 sprintf(nfs_root_name, NFS_ROOT, line); 227 goto out_nomem;
228 strcpy(tmp, NFS_ROOT);
229
230 if (root_server_path[0] != '\0') {
231 dprintk("Root-NFS: DHCPv4 option 17: %s\n",
232 root_server_path);
233 if (root_nfs_parse_options(root_server_path, tmp, tmplen))
234 goto out_optionstoolong;
398 } 235 }
399 root_server_addr = root_nfs_parse_addr(nfs_root_name);
400 return 1;
401}
402
403__setup("nfsroot=", nfs_root_setup);
404
405/***************************************************************************
406 236
407 Routines to actually mount the root directory 237 if (cmdline[0] != '\0') {
238 dprintk("Root-NFS: nfsroot=%s\n", cmdline);
239 if (root_nfs_parse_options(cmdline, tmp, tmplen))
240 goto out_optionstoolong;
241 }
408 242
409 ***************************************************************************/ 243 /*
244 * Append mandatory options for nfsroot so they override
245 * what has come before
246 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
248 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option,
250 sizeof(nfs_root_options)))
251 goto out_optionstoolong;
410 252
411/* 253 /*
412 * Construct sockaddr_in from address and port number. 254 * Set up nfs_root_device. For NFS mounts, this looks like
413 */ 255 *
414static inline void 256 * server:/path
415set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) 257 *
416{ 258 * At this point, utsname()->nodename contains our local
417 sin->sin_family = AF_INET; 259 * IP address or hostname, set by ipconfig. If "%s" exists
418 sin->sin_addr.s_addr = addr; 260 * in tmp, substitute the nodename, then shovel the whole
419 sin->sin_port = port; 261 * mess into nfs_root_device.
420} 262 */
263 len = snprintf(nfs_export_path, sizeof(nfs_export_path),
264 tmp, utsname()->nodename);
265 if (len > (int)sizeof(nfs_export_path))
266 goto out_devnametoolong;
267 len = snprintf(nfs_root_device, sizeof(nfs_root_device),
268 "%pI4:%s", &servaddr, nfs_export_path);
269 if (len > (int)sizeof(nfs_root_device))
270 goto out_devnametoolong;
421 271
422/* 272 retval = 0;
423 * Query server portmapper for the port of a daemon program.
424 */
425static int __init root_nfs_getport(int program, int version, int proto)
426{
427 struct sockaddr_in sin;
428 273
429 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", 274out:
430 program, version, &servaddr); 275 kfree(tmp);
431 set_sockaddr(&sin, servaddr, 0); 276 return retval;
432 return rpcb_getport_sync(&sin, program, version, proto); 277out_nomem:
278 printk(KERN_ERR "Root-NFS: could not allocate memory\n");
279 goto out;
280out_optionstoolong:
281 printk(KERN_ERR "Root-NFS: mount options string too long\n");
282 goto out;
283out_devnametoolong:
284 printk(KERN_ERR "Root-NFS: root device name too long.\n");
285 goto out;
433} 286}
434 287
435 288/**
436/* 289 * nfs_root_data - Return prepared 'data' for NFSROOT mount
437 * Use portmapper to find mountd and nfsd port numbers if not overriden 290 * @root_device: OUT: address of string containing NFSROOT device
438 * by the user. Use defaults if portmapper is not available. 291 * @root_data: OUT: address of string containing NFSROOT mount options
439 * XXX: Is there any nfs server with no portmapper? 292 *
293 * Returns zero and sets @root_device and @root_data if successful,
294 * otherwise -1 is returned.
440 */ 295 */
441static int __init root_nfs_ports(void) 296int __init nfs_root_data(char **root_device, char **root_data)
442{ 297{
443 int port; 298 servaddr = root_server_addr;
444 int nfsd_ver, mountd_ver; 299 if (servaddr == htonl(INADDR_NONE)) {
445 int nfsd_port, mountd_port; 300 printk(KERN_ERR "Root-NFS: no NFS server address\n");
446 int proto; 301 return -1;
447
448 if (nfs_data.flags & NFS_MOUNT_VER3) {
449 nfsd_ver = NFS3_VERSION;
450 mountd_ver = NFS_MNT3_VERSION;
451 nfsd_port = NFS_PORT;
452 mountd_port = NFS_MNT_PORT;
453 } else {
454 nfsd_ver = NFS2_VERSION;
455 mountd_ver = NFS_MNT_VERSION;
456 nfsd_port = NFS_PORT;
457 mountd_port = NFS_MNT_PORT;
458 }
459
460 proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
461
462 if (nfs_port < 0) {
463 if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
464 printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
465 "number from server, using default\n");
466 port = nfsd_port;
467 }
468 nfs_port = port;
469 dprintk("Root-NFS: Portmapper on server returned %d "
470 "as nfsd port\n", port);
471 } 302 }
472 303
473 if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { 304 if (root_nfs_data(nfs_root_parms) < 0)
474 printk(KERN_ERR "Root-NFS: Unable to get mountd port " 305 return -1;
475 "number from server, using default\n");
476 port = mountd_port;
477 }
478 mount_port = port;
479 dprintk("Root-NFS: mountd port is %d\n", port);
480 306
307 *root_device = nfs_root_device;
308 *root_data = nfs_root_options;
481 return 0; 309 return 0;
482} 310}
483
484
485/*
486 * Get a file handle from the server for the directory which is to be
487 * mounted.
488 */
489static int __init root_nfs_get_handle(void)
490{
491 struct sockaddr_in sin;
492 unsigned int auth_flav_len = 0;
493 struct nfs_mount_request request = {
494 .sap = (struct sockaddr *)&sin,
495 .salen = sizeof(sin),
496 .dirpath = nfs_export_path,
497 .version = (nfs_data.flags & NFS_MOUNT_VER3) ?
498 NFS_MNT3_VERSION : NFS_MNT_VERSION,
499 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
500 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
501 .auth_flav_len = &auth_flav_len,
502 };
503 int status = -ENOMEM;
504
505 request.fh = nfs_alloc_fhandle();
506 if (!request.fh)
507 goto out;
508 set_sockaddr(&sin, servaddr, htons(mount_port));
509 status = nfs_mount(&request);
510 if (status < 0)
511 printk(KERN_ERR "Root-NFS: Server returned error %d "
512 "while mounting %s\n", status, nfs_export_path);
513 else {
514 nfs_data.root.size = request.fh->size;
515 memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
516 }
517 nfs_free_fhandle(request.fh);
518out:
519 return status;
520}
521
522/*
523 * Get the NFS port numbers and file handle, and return the prepared 'data'
524 * argument for mount() if everything went OK. Return NULL otherwise.
525 */
526void * __init nfs_root_data(void)
527{
528 if (root_nfs_init() < 0
529 || root_nfs_ports() < 0
530 || root_nfs_get_handle() < 0)
531 return NULL;
532 set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
533 return (void*)&nfs_data;
534}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 919490232e17..137b549e63db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
65 if (req == NULL) 65 if (req == NULL)
66 return ERR_PTR(-ENOMEM); 66 return ERR_PTR(-ENOMEM);
67 67
68 /* get lock context early so we can deal with alloc failures */
69 req->wb_lock_context = nfs_get_lock_context(ctx);
70 if (req->wb_lock_context == NULL) {
71 nfs_page_free(req);
72 return ERR_PTR(-ENOMEM);
73 }
74
68 /* Initialize the request struct. Initially, we assume a 75 /* Initialize the request struct. Initially, we assume a
69 * long write-back delay. This will be adjusted in 76 * long write-back delay. This will be adjusted in
70 * update_nfs_request below if the region is not locked. */ 77 * update_nfs_request below if the region is not locked. */
@@ -79,7 +86,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
79 req->wb_pgbase = offset; 86 req->wb_pgbase = offset;
80 req->wb_bytes = count; 87 req->wb_bytes = count;
81 req->wb_context = get_nfs_open_context(ctx); 88 req->wb_context = get_nfs_open_context(ctx);
82 req->wb_lock_context = nfs_get_lock_context(ctx);
83 kref_init(&req->wb_kref); 89 kref_init(&req->wb_kref);
84 return req; 90 return req;
85} 91}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
31#include "internal.h"
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
77 if (nfss->pnfs_curr_ld) {
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
117 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) {
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175
176/*
177 * pNFS client layout cache
178 */
179
180static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182{
183 assert_spin_locked(&lo->inode->i_lock);
184 lo->refcount++;
185}
186
187static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{
190 assert_spin_locked(&lo->inode->i_lock);
191 BUG_ON(lo->refcount == 0);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200}
201
202void
203put_layout_hdr(struct inode *inode)
204{
205 spin_lock(&inode->i_lock);
206 put_layout_hdr_locked(NFS_I(inode)->layout);
207 spin_unlock(&inode->i_lock);
208}
209
210static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{
213 INIT_LIST_HEAD(&lseg->fi_list);
214 kref_init(&lseg->kref);
215 lseg->layout = lo;
216}
217
218/* Called without i_lock held, as the free_lseg call may sleep */
219static void
220destroy_lseg(struct kref *kref)
221{
222 struct pnfs_layout_segment *lseg =
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 put_layout_hdr(ino);
230}
231
232static void
233put_lseg(struct pnfs_layout_segment *lseg)
234{
235 if (!lseg)
236 return;
237
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 atomic_read(&lseg->kref.refcount));
240 kref_put(&lseg->kref, destroy_lseg);
241}
242
243static void
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245{
246 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp;
248
249 dprintk("%s:Begin lo %p\n", __func__, lo);
250
251 assert_spin_locked(&lo->inode->i_lock);
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 list_move(&lseg->fi_list, tmp_list);
255 }
256 clp = NFS_SERVER(lo->inode)->nfs_client;
257 spin_lock(&clp->cl_lock);
258 /* List does not take a reference, so no need for put here */
259 list_del_init(&lo->layouts);
260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266}
267
268static void
269pnfs_free_lseg_list(struct list_head *tmp_list)
270{
271 struct pnfs_layout_segment *lseg;
272
273 while (!list_empty(tmp_list)) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 fi_list);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 }
280}
281
282void
283pnfs_destroy_layout(struct nfs_inode *nfsi)
284{
285 struct pnfs_layout_hdr *lo;
286 LIST_HEAD(tmp_list);
287
288 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout;
290 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo);
294 }
295 spin_unlock(&nfsi->vfs_inode.i_lock);
296 pnfs_free_lseg_list(&tmp_list);
297}
298
299/*
300 * Called by the state manger to remove all layouts established under an
301 * expired lease.
302 */
303void
304pnfs_destroy_all_layouts(struct nfs_client *clp)
305{
306 struct pnfs_layout_hdr *lo;
307 LIST_HEAD(tmp_list);
308
309 spin_lock(&clp->cl_lock);
310 list_splice_init(&clp->cl_layouts, &tmp_list);
311 spin_unlock(&clp->cl_lock);
312
313 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode));
319 }
320}
321
322/* update lo->stateid with new if is more recent
323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
373
374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
393static struct pnfs_layout_segment *
394send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx,
396 u32 iomode)
397{
398 struct inode *ino = lo->inode;
399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
404
405 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state);
428 }
429 return lseg;
430}
431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
444static void
445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 struct pnfs_layout_segment *lseg)
447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
451 dprintk("%s:Begin\n", __func__);
452
453 assert_spin_locked(&lo->inode->i_lock);
454 if (list_empty(&lo->segs)) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs);
478 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode,
481 lseg->range.offset, lseg->range.length);
482 }
483 get_layout_hdr_locked(lo);
484
485 dprintk("%s:Return\n", __func__);
486}
487
488static struct pnfs_layout_hdr *
489alloc_init_layout_hdr(struct inode *ino)
490{
491 struct pnfs_layout_hdr *lo;
492
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo)
495 return NULL;
496 lo->refcount = 1;
497 INIT_LIST_HEAD(&lo->layouts);
498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
500 lo->inode = ino;
501 return lo;
502}
503
504static struct pnfs_layout_hdr *
505pnfs_find_alloc_layout(struct inode *ino)
506{
507 struct nfs_inode *nfsi = NFS_I(ino);
508 struct pnfs_layout_hdr *new = NULL;
509
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511
512 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout)
514 return nfsi->layout;
515
516 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock);
519
520 if (likely(nfsi->layout == NULL)) /* Won the race? */
521 nfsi->layout = new;
522 else
523 kfree(new);
524 return nfsi->layout;
525}
526
527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
547static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549{
550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
567}
568
569/*
570 * Layout segment is retreived from the server if not cached.
571 * The appropriate layout segment is referenced and returned to the caller.
572 */
573struct pnfs_layout_segment *
574pnfs_update_layout(struct inode *ino,
575 struct nfs_open_context *ctx,
576 enum pnfs_iomode iomode)
577{
578 struct nfs_inode *nfsi = NFS_I(ino);
579 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL;
581
582 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 return NULL;
584 spin_lock(&ino->i_lock);
585 lo = pnfs_find_alloc_layout(ino);
586 if (lo == NULL) {
587 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 goto out_unlock;
589 }
590
591 /* Check to see if the layout for the given range already exists */
592 lseg = pnfs_has_layout(lo, iomode);
593 if (lseg) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 __func__, lseg, iomode);
596 goto out_unlock;
597 }
598
599 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 goto out_unlock;
602
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 spin_unlock(&ino->i_lock);
605
606 lseg = send_layoutget(lo, ctx, iomode);
607out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg);
610 return lseg;
611out_unlock:
612 spin_unlock(&ino->i_lock);
613 goto out;
614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
1/*
2 * pNFS client data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H
32
33struct pnfs_layout_segment {
34 struct list_head fi_list;
35 struct pnfs_layout_range range;
36 struct kref kref;
37 struct pnfs_layout_hdr *layout;
38};
39
40#ifdef CONFIG_NFS_V4_1
41
42#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
43
44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
48};
49
50/* Per-layout driver specific registration structure */
51struct pnfs_layoutdriver_type {
52 struct list_head pnfs_tblid;
53 const u32 id;
54 const char *name;
55 struct module *owner;
56 int (*set_layoutdriver) (struct nfs_server *);
57 int (*clear_layoutdriver) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
60};
61
62struct pnfs_layout_hdr {
63 unsigned long refcount;
64 struct list_head layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
68 unsigned long state;
69 struct inode *inode;
70};
71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
137struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
143void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
148
149
150static inline int lo_fail_bit(u32 iomode)
151{
152 return iomode == IOMODE_RW ?
153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
154}
155
156/* Return true if a layout driver is being used for this mountpoint */
157static inline int pnfs_enabled_sb(struct nfs_server *nfss)
158{
159 return nfss->pnfs_curr_ld != NULL;
160}
161
162#else /* CONFIG_NFS_V4_1 */
163
164static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
165{
166}
167
168static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
169{
170}
171
172static inline struct pnfs_layout_segment *
173pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
174 enum pnfs_iomode access_type)
175{
176 return NULL;
177}
178
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{
181}
182
183static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
184{
185}
186
187#endif /* CONFIG_NFS_V4_1 */
188
189#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 611bec22f552..58e7f84fc1fd 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
258 258
259static int 259static int
260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
261 int flags, struct nameidata *nd) 261 int flags, struct nfs_open_context *ctx)
262{ 262{
263 struct nfs_createdata *data; 263 struct nfs_createdata *data;
264 struct rpc_message msg = { 264 struct rpc_message msg = {
@@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
365 return 1; 365 return 1;
366} 366}
367 367
368static void
369nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
370{
371 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
372}
373
374static int
375nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
376 struct inode *new_dir)
377{
378 if (nfs_async_handle_expired_key(task))
379 return 0;
380 nfs_mark_for_revalidate(old_dir);
381 nfs_mark_for_revalidate(new_dir);
382 return 1;
383}
384
368static int 385static int
369nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, 386nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
370 struct inode *new_dir, struct qstr *new_name) 387 struct inode *new_dir, struct qstr *new_name)
371{ 388{
372 struct nfs_renameargs arg = { 389 struct nfs_renameargs arg = {
373 .fromfh = NFS_FH(old_dir), 390 .old_dir = NFS_FH(old_dir),
374 .fromname = old_name->name, 391 .old_name = old_name,
375 .fromlen = old_name->len, 392 .new_dir = NFS_FH(new_dir),
376 .tofh = NFS_FH(new_dir), 393 .new_name = new_name,
377 .toname = new_name->name,
378 .tolen = new_name->len
379 }; 394 };
380 struct rpc_message msg = { 395 struct rpc_message msg = {
381 .rpc_proc = &nfs_procedures[NFSPROC_RENAME], 396 .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
@@ -519,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
519 */ 534 */
520static int 535static int
521nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 536nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
522 u64 cookie, struct page *page, unsigned int count, int plus) 537 u64 cookie, struct page **pages, unsigned int count, int plus)
523{ 538{
524 struct inode *dir = dentry->d_inode; 539 struct inode *dir = dentry->d_inode;
525 struct nfs_readdirargs arg = { 540 struct nfs_readdirargs arg = {
526 .fh = NFS_FH(dir), 541 .fh = NFS_FH(dir),
527 .cookie = cookie, 542 .cookie = cookie,
528 .count = count, 543 .count = count,
529 .pages = &page, 544 .pages = pages,
530 }; 545 };
531 struct rpc_message msg = { 546 struct rpc_message msg = {
532 .rpc_proc = &nfs_procedures[NFSPROC_READDIR], 547 .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
@@ -705,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
705 .unlink_setup = nfs_proc_unlink_setup, 720 .unlink_setup = nfs_proc_unlink_setup,
706 .unlink_done = nfs_proc_unlink_done, 721 .unlink_done = nfs_proc_unlink_done,
707 .rename = nfs_proc_rename, 722 .rename = nfs_proc_rename,
723 .rename_setup = nfs_proc_rename_setup,
724 .rename_done = nfs_proc_rename_done,
708 .link = nfs_proc_link, 725 .link = nfs_proc_link,
709 .symlink = nfs_proc_symlink, 726 .symlink = nfs_proc_symlink,
710 .mkdir = nfs_proc_mkdir, 727 .mkdir = nfs_proc_mkdir,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 87adc2744246..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h" 27#include "fscache.h"
28#include "pnfs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -46,7 +47,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
46 memset(p, 0, sizeof(*p)); 47 memset(p, 0, sizeof(*p));
47 INIT_LIST_HEAD(&p->pages); 48 INIT_LIST_HEAD(&p->pages);
48 p->npages = pagecount; 49 p->npages = pagecount;
49 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array; 51 p->pagevec = p->page_array;
52 else { 52 else {
@@ -121,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
121 len = nfs_page_length(page); 121 len = nfs_page_length(page);
122 if (len == 0) 122 if (len == 0)
123 return nfs_return_empty_page(page); 123 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
124 new = nfs_create_request(ctx, inode, page, 0, len); 125 new = nfs_create_request(ctx, inode, page, 0, len);
125 if (IS_ERR(new)) { 126 if (IS_ERR(new)) {
126 unlock_page(page); 127 unlock_page(page);
@@ -625,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 626 if (ret == 0)
626 goto read_complete; /* all pages were read */ 627 goto read_complete; /* all pages were read */
627 628
629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
628 if (rsize < PAGE_CACHE_SIZE) 630 if (rsize < PAGE_CACHE_SIZE)
629 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
630 else 632 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f4cbf0c306c6..0a42e8f4adcb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -100,6 +100,7 @@ enum {
100 Opt_addr, Opt_mountaddr, Opt_clientaddr, 100 Opt_addr, Opt_mountaddr, Opt_clientaddr,
101 Opt_lookupcache, 101 Opt_lookupcache,
102 Opt_fscache_uniq, 102 Opt_fscache_uniq,
103 Opt_local_lock,
103 104
104 /* Special mount options */ 105 /* Special mount options */
105 Opt_userspace, Opt_deprecated, Opt_sloppy, 106 Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = {
171 172
172 { Opt_lookupcache, "lookupcache=%s" }, 173 { Opt_lookupcache, "lookupcache=%s" },
173 { Opt_fscache_uniq, "fsc=%s" }, 174 { Opt_fscache_uniq, "fsc=%s" },
175 { Opt_local_lock, "local_lock=%s" },
174 176
175 { Opt_err, NULL } 177 { Opt_err, NULL }
176}; 178};
@@ -236,14 +238,30 @@ static match_table_t nfs_lookupcache_tokens = {
236 { Opt_lookupcache_err, NULL } 238 { Opt_lookupcache_err, NULL }
237}; 239};
238 240
241enum {
242 Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
243 Opt_local_lock_none,
244
245 Opt_local_lock_err
246};
247
248static match_table_t nfs_local_lock_tokens = {
249 { Opt_local_lock_all, "all" },
250 { Opt_local_lock_flock, "flock" },
251 { Opt_local_lock_posix, "posix" },
252 { Opt_local_lock_none, "none" },
253
254 { Opt_local_lock_err, NULL }
255};
256
239 257
240static void nfs_umount_begin(struct super_block *); 258static void nfs_umount_begin(struct super_block *);
241static int nfs_statfs(struct dentry *, struct kstatfs *); 259static int nfs_statfs(struct dentry *, struct kstatfs *);
242static int nfs_show_options(struct seq_file *, struct vfsmount *); 260static int nfs_show_options(struct seq_file *, struct vfsmount *);
243static int nfs_show_stats(struct seq_file *, struct vfsmount *); 261static int nfs_show_stats(struct seq_file *, struct vfsmount *);
244static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 262static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
245static int nfs_xdev_get_sb(struct file_system_type *fs_type, 263static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
246 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 264 int flags, const char *dev_name, void *raw_data);
247static void nfs_put_super(struct super_block *); 265static void nfs_put_super(struct super_block *);
248static void nfs_kill_super(struct super_block *); 266static void nfs_kill_super(struct super_block *);
249static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 267static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -259,7 +277,7 @@ static struct file_system_type nfs_fs_type = {
259struct file_system_type nfs_xdev_fs_type = { 277struct file_system_type nfs_xdev_fs_type = {
260 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
261 .name = "nfs", 279 .name = "nfs",
262 .get_sb = nfs_xdev_get_sb, 280 .mount = nfs_xdev_mount,
263 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
264 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
265}; 283};
@@ -284,14 +302,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
284 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 302 struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
285static int nfs4_get_sb(struct file_system_type *fs_type, 303static int nfs4_get_sb(struct file_system_type *fs_type,
286 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 304 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
287static int nfs4_remote_get_sb(struct file_system_type *fs_type, 305static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
288 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 306 int flags, const char *dev_name, void *raw_data);
289static int nfs4_xdev_get_sb(struct file_system_type *fs_type, 307static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
290 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 308 int flags, const char *dev_name, void *raw_data);
291static int nfs4_referral_get_sb(struct file_system_type *fs_type, 309static int nfs4_referral_get_sb(struct file_system_type *fs_type,
292 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 310 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
293static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, 311static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
294 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 312 int flags, const char *dev_name, void *raw_data);
295static void nfs4_kill_super(struct super_block *sb); 313static void nfs4_kill_super(struct super_block *sb);
296 314
297static struct file_system_type nfs4_fs_type = { 315static struct file_system_type nfs4_fs_type = {
@@ -305,7 +323,7 @@ static struct file_system_type nfs4_fs_type = {
305static struct file_system_type nfs4_remote_fs_type = { 323static struct file_system_type nfs4_remote_fs_type = {
306 .owner = THIS_MODULE, 324 .owner = THIS_MODULE,
307 .name = "nfs4", 325 .name = "nfs4",
308 .get_sb = nfs4_remote_get_sb, 326 .mount = nfs4_remote_mount,
309 .kill_sb = nfs4_kill_super, 327 .kill_sb = nfs4_kill_super,
310 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
311}; 329};
@@ -313,7 +331,7 @@ static struct file_system_type nfs4_remote_fs_type = {
313struct file_system_type nfs4_xdev_fs_type = { 331struct file_system_type nfs4_xdev_fs_type = {
314 .owner = THIS_MODULE, 332 .owner = THIS_MODULE,
315 .name = "nfs4", 333 .name = "nfs4",
316 .get_sb = nfs4_xdev_get_sb, 334 .mount = nfs4_xdev_mount,
317 .kill_sb = nfs4_kill_super, 335 .kill_sb = nfs4_kill_super,
318 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 336 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
319}; 337};
@@ -321,7 +339,7 @@ struct file_system_type nfs4_xdev_fs_type = {
321static struct file_system_type nfs4_remote_referral_fs_type = { 339static struct file_system_type nfs4_remote_referral_fs_type = {
322 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
323 .name = "nfs4", 341 .name = "nfs4",
324 .get_sb = nfs4_remote_referral_get_sb, 342 .mount = nfs4_remote_referral_mount,
325 .kill_sb = nfs4_kill_super, 343 .kill_sb = nfs4_kill_super,
326 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 344 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
327}; 345};
@@ -622,6 +640,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
622 const struct proc_nfs_info *nfs_infop; 640 const struct proc_nfs_info *nfs_infop;
623 struct nfs_client *clp = nfss->nfs_client; 641 struct nfs_client *clp = nfss->nfs_client;
624 u32 version = clp->rpc_ops->version; 642 u32 version = clp->rpc_ops->version;
643 int local_flock, local_fcntl;
625 644
626 seq_printf(m, ",vers=%u", version); 645 seq_printf(m, ",vers=%u", version);
627 seq_printf(m, ",rsize=%u", nfss->rsize); 646 seq_printf(m, ",rsize=%u", nfss->rsize);
@@ -670,6 +689,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
670 else 689 else
671 seq_printf(m, ",lookupcache=pos"); 690 seq_printf(m, ",lookupcache=pos");
672 } 691 }
692
693 local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK;
694 local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL;
695
696 if (!local_flock && !local_fcntl)
697 seq_printf(m, ",local_lock=none");
698 else if (local_flock && local_fcntl)
699 seq_printf(m, ",local_lock=all");
700 else if (local_flock)
701 seq_printf(m, ",local_lock=flock");
702 else
703 seq_printf(m, ",local_lock=posix");
673} 704}
674 705
675/* 706/*
@@ -1017,9 +1048,13 @@ static int nfs_parse_mount_options(char *raw,
1017 break; 1048 break;
1018 case Opt_lock: 1049 case Opt_lock:
1019 mnt->flags &= ~NFS_MOUNT_NONLM; 1050 mnt->flags &= ~NFS_MOUNT_NONLM;
1051 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1052 NFS_MOUNT_LOCAL_FCNTL);
1020 break; 1053 break;
1021 case Opt_nolock: 1054 case Opt_nolock:
1022 mnt->flags |= NFS_MOUNT_NONLM; 1055 mnt->flags |= NFS_MOUNT_NONLM;
1056 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1057 NFS_MOUNT_LOCAL_FCNTL);
1023 break; 1058 break;
1024 case Opt_v2: 1059 case Opt_v2:
1025 mnt->flags &= ~NFS_MOUNT_VER3; 1060 mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1420,6 +1455,34 @@ static int nfs_parse_mount_options(char *raw,
1420 mnt->fscache_uniq = string; 1455 mnt->fscache_uniq = string;
1421 mnt->options |= NFS_OPTION_FSCACHE; 1456 mnt->options |= NFS_OPTION_FSCACHE;
1422 break; 1457 break;
1458 case Opt_local_lock:
1459 string = match_strdup(args);
1460 if (string == NULL)
1461 goto out_nomem;
1462 token = match_token(string, nfs_local_lock_tokens,
1463 args);
1464 kfree(string);
1465 switch (token) {
1466 case Opt_local_lock_all:
1467 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1468 NFS_MOUNT_LOCAL_FCNTL);
1469 break;
1470 case Opt_local_lock_flock:
1471 mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
1472 break;
1473 case Opt_local_lock_posix:
1474 mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
1475 break;
1476 case Opt_local_lock_none:
1477 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1478 NFS_MOUNT_LOCAL_FCNTL);
1479 break;
1480 default:
1481 dfprintk(MOUNT, "NFS: invalid "
1482 "local_lock argument\n");
1483 return 0;
1484 };
1485 break;
1423 1486
1424 /* 1487 /*
1425 * Special options 1488 * Special options
@@ -1825,6 +1888,12 @@ static int nfs_validate_mount_data(void *options,
1825 if (!args->nfs_server.hostname) 1888 if (!args->nfs_server.hostname)
1826 goto out_nomem; 1889 goto out_nomem;
1827 1890
1891 if (!(data->flags & NFS_MOUNT_NONLM))
1892 args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
1893 NFS_MOUNT_LOCAL_FCNTL);
1894 else
1895 args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
1896 NFS_MOUNT_LOCAL_FCNTL);
1828 /* 1897 /*
1829 * The legacy version 6 binary mount data from userspace has a 1898 * The legacy version 6 binary mount data from userspace has a
1830 * field used only to transport selinux information into the 1899 * field used only to transport selinux information into the
@@ -2328,9 +2397,9 @@ static void nfs_kill_super(struct super_block *s)
2328/* 2397/*
2329 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2398 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2330 */ 2399 */
2331static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, 2400static struct dentry *
2332 const char *dev_name, void *raw_data, 2401nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2333 struct vfsmount *mnt) 2402 const char *dev_name, void *raw_data)
2334{ 2403{
2335 struct nfs_clone_mount *data = raw_data; 2404 struct nfs_clone_mount *data = raw_data;
2336 struct super_block *s; 2405 struct super_block *s;
@@ -2342,7 +2411,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
2342 }; 2411 };
2343 int error; 2412 int error;
2344 2413
2345 dprintk("--> nfs_xdev_get_sb()\n"); 2414 dprintk("--> nfs_xdev_mount()\n");
2346 2415
2347 /* create a new volume representation */ 2416 /* create a new volume representation */
2348 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2417 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2389,28 +2458,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
2389 } 2458 }
2390 2459
2391 s->s_flags |= MS_ACTIVE; 2460 s->s_flags |= MS_ACTIVE;
2392 mnt->mnt_sb = s;
2393 mnt->mnt_root = mntroot;
2394 2461
2395 /* clone any lsm security options from the parent to the new sb */ 2462 /* clone any lsm security options from the parent to the new sb */
2396 security_sb_clone_mnt_opts(data->sb, s); 2463 security_sb_clone_mnt_opts(data->sb, s);
2397 2464
2398 dprintk("<-- nfs_xdev_get_sb() = 0\n"); 2465 dprintk("<-- nfs_xdev_mount() = 0\n");
2399 return 0; 2466 return mntroot;
2400 2467
2401out_err_nosb: 2468out_err_nosb:
2402 nfs_free_server(server); 2469 nfs_free_server(server);
2403out_err_noserver: 2470out_err_noserver:
2404 dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); 2471 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
2405 return error; 2472 return ERR_PTR(error);
2406 2473
2407error_splat_super: 2474error_splat_super:
2408 if (server && !s->s_root) 2475 if (server && !s->s_root)
2409 bdi_unregister(&server->backing_dev_info); 2476 bdi_unregister(&server->backing_dev_info);
2410error_splat_bdi: 2477error_splat_bdi:
2411 deactivate_locked_super(s); 2478 deactivate_locked_super(s);
2412 dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); 2479 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
2413 return error; 2480 return ERR_PTR(error);
2414} 2481}
2415 2482
2416#ifdef CONFIG_NFS_V4 2483#ifdef CONFIG_NFS_V4
@@ -2441,7 +2508,8 @@ static void nfs4_fill_super(struct super_block *sb)
2441 2508
2442static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) 2509static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2443{ 2510{
2444 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); 2511 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
2512 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2445} 2513}
2446 2514
2447static int nfs4_validate_text_mount_data(void *options, 2515static int nfs4_validate_text_mount_data(void *options,
@@ -2579,8 +2647,9 @@ out_no_address:
2579/* 2647/*
2580 * Get the superblock for the NFS4 root partition 2648 * Get the superblock for the NFS4 root partition
2581 */ 2649 */
2582static int nfs4_remote_get_sb(struct file_system_type *fs_type, 2650static struct dentry *
2583 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2651nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2652 const char *dev_name, void *raw_data)
2584{ 2653{
2585 struct nfs_parsed_mount_data *data = raw_data; 2654 struct nfs_parsed_mount_data *data = raw_data;
2586 struct super_block *s; 2655 struct super_block *s;
@@ -2644,15 +2713,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2644 goto error_splat_root; 2713 goto error_splat_root;
2645 2714
2646 s->s_flags |= MS_ACTIVE; 2715 s->s_flags |= MS_ACTIVE;
2647 mnt->mnt_sb = s; 2716
2648 mnt->mnt_root = mntroot; 2717 security_free_mnt_opts(&data->lsm_opts);
2649 error = 0; 2718 nfs_free_fhandle(mntfh);
2719 return mntroot;
2650 2720
2651out: 2721out:
2652 security_free_mnt_opts(&data->lsm_opts); 2722 security_free_mnt_opts(&data->lsm_opts);
2653out_free_fh: 2723out_free_fh:
2654 nfs_free_fhandle(mntfh); 2724 nfs_free_fhandle(mntfh);
2655 return error; 2725 return ERR_PTR(error);
2656 2726
2657out_free: 2727out_free:
2658 nfs_free_server(server); 2728 nfs_free_server(server);
@@ -2898,9 +2968,9 @@ static void nfs4_kill_super(struct super_block *sb)
2898/* 2968/*
2899 * Clone an NFS4 server record on xdev traversal (FSID-change) 2969 * Clone an NFS4 server record on xdev traversal (FSID-change)
2900 */ 2970 */
2901static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, 2971static struct dentry *
2902 const char *dev_name, void *raw_data, 2972nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2903 struct vfsmount *mnt) 2973 const char *dev_name, void *raw_data)
2904{ 2974{
2905 struct nfs_clone_mount *data = raw_data; 2975 struct nfs_clone_mount *data = raw_data;
2906 struct super_block *s; 2976 struct super_block *s;
@@ -2912,7 +2982,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2912 }; 2982 };
2913 int error; 2983 int error;
2914 2984
2915 dprintk("--> nfs4_xdev_get_sb()\n"); 2985 dprintk("--> nfs4_xdev_mount()\n");
2916 2986
2917 /* create a new volume representation */ 2987 /* create a new volume representation */
2918 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2988 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2959,32 +3029,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2959 } 3029 }
2960 3030
2961 s->s_flags |= MS_ACTIVE; 3031 s->s_flags |= MS_ACTIVE;
2962 mnt->mnt_sb = s;
2963 mnt->mnt_root = mntroot;
2964 3032
2965 security_sb_clone_mnt_opts(data->sb, s); 3033 security_sb_clone_mnt_opts(data->sb, s);
2966 3034
2967 dprintk("<-- nfs4_xdev_get_sb() = 0\n"); 3035 dprintk("<-- nfs4_xdev_mount() = 0\n");
2968 return 0; 3036 return mntroot;
2969 3037
2970out_err_nosb: 3038out_err_nosb:
2971 nfs_free_server(server); 3039 nfs_free_server(server);
2972out_err_noserver: 3040out_err_noserver:
2973 dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); 3041 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
2974 return error; 3042 return ERR_PTR(error);
2975 3043
2976error_splat_super: 3044error_splat_super:
2977 if (server && !s->s_root) 3045 if (server && !s->s_root)
2978 bdi_unregister(&server->backing_dev_info); 3046 bdi_unregister(&server->backing_dev_info);
2979error_splat_bdi: 3047error_splat_bdi:
2980 deactivate_locked_super(s); 3048 deactivate_locked_super(s);
2981 dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); 3049 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
2982 return error; 3050 return ERR_PTR(error);
2983} 3051}
2984 3052
2985static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, 3053static struct dentry *
2986 int flags, const char *dev_name, void *raw_data, 3054nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
2987 struct vfsmount *mnt) 3055 const char *dev_name, void *raw_data)
2988{ 3056{
2989 struct nfs_clone_mount *data = raw_data; 3057 struct nfs_clone_mount *data = raw_data;
2990 struct super_block *s; 3058 struct super_block *s;
@@ -3048,14 +3116,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
3048 } 3116 }
3049 3117
3050 s->s_flags |= MS_ACTIVE; 3118 s->s_flags |= MS_ACTIVE;
3051 mnt->mnt_sb = s;
3052 mnt->mnt_root = mntroot;
3053 3119
3054 security_sb_clone_mnt_opts(data->sb, s); 3120 security_sb_clone_mnt_opts(data->sb, s);
3055 3121
3056 nfs_free_fhandle(mntfh); 3122 nfs_free_fhandle(mntfh);
3057 dprintk("<-- nfs4_referral_get_sb() = 0\n"); 3123 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3058 return 0; 3124 return mntroot;
3059 3125
3060out_err_nosb: 3126out_err_nosb:
3061 nfs_free_server(server); 3127 nfs_free_server(server);
@@ -3063,7 +3129,7 @@ out_err_noserver:
3063 nfs_free_fhandle(mntfh); 3129 nfs_free_fhandle(mntfh);
3064out_err_nofh: 3130out_err_nofh:
3065 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); 3131 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3066 return error; 3132 return ERR_PTR(error);
3067 3133
3068error_splat_super: 3134error_splat_super:
3069 if (server && !s->s_root) 3135 if (server && !s->s_root)
@@ -3072,7 +3138,7 @@ error_splat_bdi:
3072 deactivate_locked_super(s); 3138 deactivate_locked_super(s);
3073 nfs_free_fhandle(mntfh); 3139 nfs_free_fhandle(mntfh);
3074 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); 3140 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3075 return error; 3141 return ERR_PTR(error);
3076} 3142}
3077 3143
3078/* 3144/*
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index ad4d2e787b20..978aaeb8a093 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = {
32 .extra1 = (int *)&nfs_set_port_min, 32 .extra1 = (int *)&nfs_set_port_min,
33 .extra2 = (int *)&nfs_set_port_max, 33 .extra2 = (int *)&nfs_set_port_max,
34 }, 34 },
35#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
35 { 36 {
36 .procname = "idmap_cache_timeout", 37 .procname = "idmap_cache_timeout",
37 .data = &nfs_idmap_cache_timeout, 38 .data = &nfs_idmap_cache_timeout,
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = {
39 .mode = 0644, 40 .mode = 0644,
40 .proc_handler = proc_dointvec_jiffies, 41 .proc_handler = proc_dointvec_jiffies,
41 }, 42 },
43#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
42#endif 44#endif
43 { 45 {
44 .procname = "nfs_mountpoint_timeout", 46 .procname = "nfs_mountpoint_timeout",
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 2f84adaad427..7bdec8531400 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,9 +13,12 @@
13#include <linux/nfs_fs.h> 13#include <linux/nfs_fs.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#include <linux/namei.h>
16 17
17#include "internal.h" 18#include "internal.h"
18#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "iostat.h"
21#include "delegation.h"
19 22
20struct nfs_unlinkdata { 23struct nfs_unlinkdata {
21 struct hlist_node list; 24 struct hlist_node list;
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
244 * @dir: parent directory of dentry 247 * @dir: parent directory of dentry
245 * @dentry: dentry to unlink 248 * @dentry: dentry to unlink
246 */ 249 */
247int 250static int
248nfs_async_unlink(struct inode *dir, struct dentry *dentry) 251nfs_async_unlink(struct inode *dir, struct dentry *dentry)
249{ 252{
250 struct nfs_unlinkdata *data; 253 struct nfs_unlinkdata *data;
@@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
259 status = PTR_ERR(data->cred); 262 status = PTR_ERR(data->cred);
260 goto out_free; 263 goto out_free;
261 } 264 }
262 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
263 data->res.dir_attr = &data->dir_attr; 265 data->res.dir_attr = &data->dir_attr;
264 266
265 status = -EBUSY; 267 status = -EBUSY;
@@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
303 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) 305 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
304 nfs_free_unlinkdata(data); 306 nfs_free_unlinkdata(data);
305} 307}
308
309/* Cancel a queued async unlink. Called when a sillyrename run fails. */
310static void
311nfs_cancel_async_unlink(struct dentry *dentry)
312{
313 spin_lock(&dentry->d_lock);
314 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
315 struct nfs_unlinkdata *data = dentry->d_fsdata;
316
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
318 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data);
320 return;
321 }
322 spin_unlock(&dentry->d_lock);
323}
324
325struct nfs_renamedata {
326 struct nfs_renameargs args;
327 struct nfs_renameres res;
328 struct rpc_cred *cred;
329 struct inode *old_dir;
330 struct dentry *old_dentry;
331 struct nfs_fattr old_fattr;
332 struct inode *new_dir;
333 struct dentry *new_dentry;
334 struct nfs_fattr new_fattr;
335};
336
337/**
338 * nfs_async_rename_done - Sillyrename post-processing
339 * @task: rpc_task of the sillyrename
340 * @calldata: nfs_renamedata for the sillyrename
341 *
342 * Do the directory attribute updates and the d_move
343 */
344static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
345{
346 struct nfs_renamedata *data = calldata;
347 struct inode *old_dir = data->old_dir;
348 struct inode *new_dir = data->new_dir;
349
350 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
351 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
352 return;
353 }
354
355 if (task->tk_status != 0) {
356 nfs_cancel_async_unlink(data->old_dentry);
357 return;
358 }
359
360 nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
361 d_move(data->old_dentry, data->new_dentry);
362}
363
364/**
365 * nfs_async_rename_release - Release the sillyrename data.
366 * @calldata: the struct nfs_renamedata to be released
367 */
368static void nfs_async_rename_release(void *calldata)
369{
370 struct nfs_renamedata *data = calldata;
371 struct super_block *sb = data->old_dir->i_sb;
372
373 if (data->old_dentry->d_inode)
374 nfs_mark_for_revalidate(data->old_dentry->d_inode);
375
376 dput(data->old_dentry);
377 dput(data->new_dentry);
378 iput(data->old_dir);
379 iput(data->new_dir);
380 nfs_sb_deactive(sb);
381 put_rpccred(data->cred);
382 kfree(data);
383}
384
385#if defined(CONFIG_NFS_V4_1)
386static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
387{
388 struct nfs_renamedata *data = calldata;
389 struct nfs_server *server = NFS_SERVER(data->old_dir);
390
391 if (nfs4_setup_sequence(server, &data->args.seq_args,
392 &data->res.seq_res, 1, task))
393 return;
394 rpc_call_start(task);
395}
396#endif /* CONFIG_NFS_V4_1 */
397
398static const struct rpc_call_ops nfs_rename_ops = {
399 .rpc_call_done = nfs_async_rename_done,
400 .rpc_release = nfs_async_rename_release,
401#if defined(CONFIG_NFS_V4_1)
402 .rpc_call_prepare = nfs_rename_prepare,
403#endif /* CONFIG_NFS_V4_1 */
404};
405
406/**
407 * nfs_async_rename - perform an asynchronous rename operation
408 * @old_dir: directory that currently holds the dentry to be renamed
409 * @new_dir: target directory for the rename
410 * @old_dentry: original dentry to be renamed
411 * @new_dentry: dentry to which the old_dentry should be renamed
412 *
413 * It's expected that valid references to the dentries and inodes are held
414 */
415static struct rpc_task *
416nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
417 struct dentry *old_dentry, struct dentry *new_dentry)
418{
419 struct nfs_renamedata *data;
420 struct rpc_message msg = { };
421 struct rpc_task_setup task_setup_data = {
422 .rpc_message = &msg,
423 .callback_ops = &nfs_rename_ops,
424 .workqueue = nfsiod_workqueue,
425 .rpc_client = NFS_CLIENT(old_dir),
426 .flags = RPC_TASK_ASYNC,
427 };
428
429 data = kzalloc(sizeof(*data), GFP_KERNEL);
430 if (data == NULL)
431 return ERR_PTR(-ENOMEM);
432 task_setup_data.callback_data = data,
433
434 data->cred = rpc_lookup_cred();
435 if (IS_ERR(data->cred)) {
436 struct rpc_task *task = ERR_CAST(data->cred);
437 kfree(data);
438 return task;
439 }
440
441 msg.rpc_argp = &data->args;
442 msg.rpc_resp = &data->res;
443 msg.rpc_cred = data->cred;
444
445 /* set up nfs_renamedata */
446 data->old_dir = old_dir;
447 ihold(old_dir);
448 data->new_dir = new_dir;
449 ihold(new_dir);
450 data->old_dentry = dget(old_dentry);
451 data->new_dentry = dget(new_dentry);
452 nfs_fattr_init(&data->old_fattr);
453 nfs_fattr_init(&data->new_fattr);
454
455 /* set up nfs_renameargs */
456 data->args.old_dir = NFS_FH(old_dir);
457 data->args.old_name = &old_dentry->d_name;
458 data->args.new_dir = NFS_FH(new_dir);
459 data->args.new_name = &new_dentry->d_name;
460
461 /* set up nfs_renameres */
462 data->res.old_fattr = &data->old_fattr;
463 data->res.new_fattr = &data->new_fattr;
464
465 nfs_sb_active(old_dir->i_sb);
466
467 NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
468
469 return rpc_run_task(&task_setup_data);
470}
471
472/**
473 * nfs_sillyrename - Perform a silly-rename of a dentry
474 * @dir: inode of directory that contains dentry
475 * @dentry: dentry to be sillyrenamed
476 *
477 * NFSv2/3 is stateless and the server doesn't know when the client is
478 * holding a file open. To prevent application problems when a file is
479 * unlinked while it's still open, the client performs a "silly-rename".
480 * That is, it renames the file to a hidden file in the same directory,
481 * and only performs the unlink once the last reference to it is put.
482 *
483 * The final cleanup is done during dentry_iput.
484 */
485int
486nfs_sillyrename(struct inode *dir, struct dentry *dentry)
487{
488 static unsigned int sillycounter;
489 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
490 const int countersize = sizeof(sillycounter)*2;
491 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
492 char silly[slen+1];
493 struct dentry *sdentry;
494 struct rpc_task *task;
495 int error = -EIO;
496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count));
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501
502 /*
503 * We don't allow a dentry to be silly-renamed twice.
504 */
505 error = -EBUSY;
506 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
507 goto out;
508
509 sprintf(silly, ".nfs%*.*Lx",
510 fileidsize, fileidsize,
511 (unsigned long long)NFS_FILEID(dentry->d_inode));
512
513 /* Return delegation in anticipation of the rename */
514 nfs_inode_return_delegation(dentry->d_inode);
515
516 sdentry = NULL;
517 do {
518 char *suffix = silly + slen - countersize;
519
520 dput(sdentry);
521 sillycounter++;
522 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
523
524 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
525 dentry->d_name.name, silly);
526
527 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
528 /*
529 * N.B. Better to return EBUSY here ... it could be
530 * dangerous to delete the file while it's in use.
531 */
532 if (IS_ERR(sdentry))
533 goto out;
534 } while (sdentry->d_inode != NULL); /* need negative lookup */
535
536 /* queue unlink first. Can't do this from rpc_release as it
537 * has to allocate memory
538 */
539 error = nfs_async_unlink(dir, dentry);
540 if (error)
541 goto out_dput;
542
543 /* run the rename task, undo unlink if it fails */
544 task = nfs_async_rename(dir, dir, dentry, sdentry);
545 if (IS_ERR(task)) {
546 error = -EBUSY;
547 nfs_cancel_async_unlink(dentry);
548 goto out_dput;
549 }
550
551 /* wait for the RPC task to complete, unless a SIGKILL intervenes */
552 error = rpc_wait_for_completion_task(task);
553 if (error == 0)
554 error = task->tk_status;
555 rpc_put_task(task);
556out_dput:
557 dput(sdentry);
558out:
559 return error;
560}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 874972d9427c..4c14c17a5276 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
55 if (p) { 55 if (p) {
56 memset(p, 0, sizeof(*p)); 56 memset(p, 0, sizeof(*p));
57 INIT_LIST_HEAD(&p->pages); 57 INIT_LIST_HEAD(&p->pages);
58 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
59 } 58 }
60 return p; 59 return p;
61} 60}
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
75 memset(p, 0, sizeof(*p)); 74 memset(p, 0, sizeof(*p));
76 INIT_LIST_HEAD(&p->pages); 75 INIT_LIST_HEAD(&p->pages);
77 p->npages = pagecount; 76 p->npages = pagecount;
78 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
79 if (pagecount <= ARRAY_SIZE(p->page_array)) 77 if (pagecount <= ARRAY_SIZE(p->page_array))
80 p->pagevec = p->page_array; 78 p->pagevec = p->page_array;
81 else { 79 else {
@@ -292,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
292 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 290 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
293 291
294 nfs_pageio_cond_complete(pgio, page->index); 292 nfs_pageio_cond_complete(pgio, page->index);
295 ret = nfs_page_async_flush(pgio, page, 293 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
296 wbc->sync_mode == WB_SYNC_NONE ||
297 wbc->nonblocking != 0);
298 if (ret == -EAGAIN) { 294 if (ret == -EAGAIN) {
299 redirty_page_for_writepage(wbc, page); 295 redirty_page_for_writepage(wbc, page);
300 ret = 0; 296 ret = 0;
@@ -1433,15 +1429,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1433 int flags = FLUSH_SYNC; 1429 int flags = FLUSH_SYNC;
1434 int ret = 0; 1430 int ret = 0;
1435 1431
1436 /* Don't commit yet if this is a non-blocking flush and there are 1432 if (wbc->sync_mode == WB_SYNC_NONE) {
1437 * lots of outstanding writes for this mapping. 1433 /* Don't commit yet if this is a non-blocking flush and there
1438 */ 1434 * are a lot of outstanding writes for this mapping.
1439 if (wbc->sync_mode == WB_SYNC_NONE && 1435 */
1440 nfsi->ncommit <= (nfsi->npages >> 1)) 1436 if (nfsi->ncommit <= (nfsi->npages >> 1))
1441 goto out_mark_dirty; 1437 goto out_mark_dirty;
1442 1438
1443 if (wbc->nonblocking || wbc->for_background) 1439 /* don't wait for the COMMIT response */
1444 flags = 0; 1440 flags = 0;
1441 }
1442
1445 ret = nfs_commit_inode(inode, flags); 1443 ret = nfs_commit_inode(inode, flags);
1446 if (ret >= 0) { 1444 if (ret >= 0) {
1447 if (wbc->sync_mode == WB_SYNC_NONE) { 1445 if (wbc->sync_mode == WB_SYNC_NONE) {
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 7cf4ddafb4ab..18b3e8975fe0 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -2,7 +2,6 @@ config NFSD
2 tristate "NFS server support" 2 tristate "NFS server support"
3 depends on INET 3 depends on INET
4 depends on FILE_LOCKING 4 depends on FILE_LOCKING
5 depends on BKL # fix as soon as lockd is done
6 select LOCKD 5 select LOCKD
7 select SUNRPC 6 select SUNRPC
8 select EXPORTFS 7 select EXPORTFS
@@ -29,6 +28,18 @@ config NFSD
29 28
30 If unsure, say N. 29 If unsure, say N.
31 30
31config NFSD_DEPRECATED
32 bool "Include support for deprecated syscall interface to NFSD"
33 depends on NFSD
34 default y
35 help
36 The syscall interface to nfsd was obsoleted in 2.6.0 by a new
37 filesystem based interface. The old interface is due for removal
38 in 2.6.40. If you wish to remove the interface before then
39 say N.
40
41 In unsure, say Y.
42
32config NFSD_V2_ACL 43config NFSD_V2_ACL
33 bool 44 bool
34 depends on NFSD 45 depends on NFSD
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c2a4f71d87dd..c0fcb7ab7f6d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -28,9 +28,6 @@
28typedef struct auth_domain svc_client; 28typedef struct auth_domain svc_client;
29typedef struct svc_export svc_export; 29typedef struct svc_export svc_export;
30 30
31static void exp_do_unexport(svc_export *unexp);
32static int exp_verify_string(char *cp, int max);
33
34/* 31/*
35 * We have two caches. 32 * We have two caches.
36 * One maps client+vfsmnt+dentry to export options - the export map 33 * One maps client+vfsmnt+dentry to export options - the export map
@@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
802 return ek; 799 return ek;
803} 800}
804 801
802#ifdef CONFIG_NFSD_DEPRECATED
805static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, 803static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
806 struct svc_export *exp) 804 struct svc_export *exp)
807{ 805{
@@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid)
852 850
853 return exp_find_key(clp, FSID_NUM, fsidv, NULL); 851 return exp_find_key(clp, FSID_NUM, fsidv, NULL);
854} 852}
853#endif
855 854
856static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, 855static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
857 struct cache_req *reqp) 856 struct cache_req *reqp)
@@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path)
893 return exp; 892 return exp;
894} 893}
895 894
895#ifdef CONFIG_NFSD_DEPRECATED
896/* 896/*
897 * Hashtable locking. Write locks are placed only by user processes 897 * Hashtable locking. Write locks are placed only by user processes
898 * wanting to modify export information. 898 * wanting to modify export information.
@@ -925,6 +925,19 @@ exp_writeunlock(void)
925{ 925{
926 up_write(&hash_sem); 926 up_write(&hash_sem);
927} 927}
928#else
929
930/* hash_sem not needed once deprecated interface is removed */
931void exp_readlock(void) {}
932static inline void exp_writelock(void){}
933void exp_readunlock(void) {}
934static inline void exp_writeunlock(void){}
935
936#endif
937
938#ifdef CONFIG_NFSD_DEPRECATED
939static void exp_do_unexport(svc_export *unexp);
940static int exp_verify_string(char *cp, int max);
928 941
929static void exp_fsid_unhash(struct svc_export *exp) 942static void exp_fsid_unhash(struct svc_export *exp)
930{ 943{
@@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp)
935 948
936 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); 949 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
937 if (!IS_ERR(ek)) { 950 if (!IS_ERR(ek)) {
938 ek->h.expiry_time = get_seconds()-1; 951 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
939 cache_put(&ek->h, &svc_expkey_cache); 952 cache_put(&ek->h, &svc_expkey_cache);
940 } 953 }
941 svc_expkey_cache.nextcheck = get_seconds();
942} 954}
943 955
944static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) 956static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
@@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp)
973 985
974 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); 986 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
975 if (!IS_ERR(ek)) { 987 if (!IS_ERR(ek)) {
976 ek->h.expiry_time = get_seconds()-1; 988 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
977 cache_put(&ek->h, &svc_expkey_cache); 989 cache_put(&ek->h, &svc_expkey_cache);
978 } 990 }
979 svc_expkey_cache.nextcheck = get_seconds();
980} 991}
981 992
982/* 993/*
@@ -1097,8 +1108,7 @@ out:
1097static void 1108static void
1098exp_do_unexport(svc_export *unexp) 1109exp_do_unexport(svc_export *unexp)
1099{ 1110{
1100 unexp->h.expiry_time = get_seconds()-1; 1111 sunrpc_invalidate(&unexp->h, &svc_export_cache);
1101 svc_export_cache.nextcheck = get_seconds();
1102 exp_unhash(unexp); 1112 exp_unhash(unexp);
1103 exp_fsid_unhash(unexp); 1113 exp_fsid_unhash(unexp);
1104} 1114}
@@ -1150,6 +1160,7 @@ out_unlock:
1150 exp_writeunlock(); 1160 exp_writeunlock();
1151 return err; 1161 return err;
1152} 1162}
1163#endif /* CONFIG_NFSD_DEPRECATED */
1153 1164
1154/* 1165/*
1155 * Obtain the root fh on behalf of a client. 1166 * Obtain the root fh on behalf of a client.
@@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags)
1459 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); 1470 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
1460} 1471}
1461 1472
1473static bool secinfo_flags_equal(int f, int g)
1474{
1475 f &= NFSEXP_SECINFO_FLAGS;
1476 g &= NFSEXP_SECINFO_FLAGS;
1477 return f == g;
1478}
1479
1480static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end)
1481{
1482 int flags;
1483
1484 flags = (*fp)->flags;
1485 seq_printf(m, ",sec=%d", (*fp)->pseudoflavor);
1486 (*fp)++;
1487 while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) {
1488 seq_printf(m, ":%d", (*fp)->pseudoflavor);
1489 (*fp)++;
1490 }
1491 return flags;
1492}
1493
1462static void show_secinfo(struct seq_file *m, struct svc_export *exp) 1494static void show_secinfo(struct seq_file *m, struct svc_export *exp)
1463{ 1495{
1464 struct exp_flavor_info *f; 1496 struct exp_flavor_info *f;
1465 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; 1497 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
1466 int lastflags = 0, first = 0; 1498 int flags;
1467 1499
1468 if (exp->ex_nflavors == 0) 1500 if (exp->ex_nflavors == 0)
1469 return; 1501 return;
1470 for (f = exp->ex_flavors; f < end; f++) { 1502 f = exp->ex_flavors;
1471 if (first || f->flags != lastflags) { 1503 flags = show_secinfo_run(m, &f, end);
1472 if (!first) 1504 if (!secinfo_flags_equal(flags, exp->ex_flags))
1473 show_secinfo_flags(m, lastflags); 1505 show_secinfo_flags(m, flags);
1474 seq_printf(m, ",sec=%d", f->pseudoflavor); 1506 while (f != end) {
1475 lastflags = f->flags; 1507 flags = show_secinfo_run(m, &f, end);
1476 } else { 1508 show_secinfo_flags(m, flags);
1477 seq_printf(m, ":%d", f->pseudoflavor);
1478 }
1479 } 1509 }
1480 show_secinfo_flags(m, lastflags);
1481} 1510}
1482 1511
1483static void exp_flags(struct seq_file *m, int flag, int fsid, 1512static void exp_flags(struct seq_file *m, int flag, int fsid,
@@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = {
1532 .show = e_show, 1561 .show = e_show,
1533}; 1562};
1534 1563
1564#ifdef CONFIG_NFSD_DEPRECATED
1535/* 1565/*
1536 * Add or modify a client. 1566 * Add or modify a client.
1537 * Change requests may involve the list of host addresses. The list of 1567 * Change requests may involve the list of host addresses. The list of
@@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp)
1563 /* Insert client into hashtable. */ 1593 /* Insert client into hashtable. */
1564 for (i = 0; i < ncp->cl_naddr; i++) { 1594 for (i = 0; i < ncp->cl_naddr; i++) {
1565 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); 1595 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
1566 auth_unix_add_addr(&addr6, dom); 1596 auth_unix_add_addr(&init_net, &addr6, dom);
1567 } 1597 }
1568 auth_unix_forget_old(dom); 1598 auth_unix_forget_old(dom);
1569 auth_domain_put(dom); 1599 auth_domain_put(dom);
@@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max)
1621 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); 1651 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
1622 return 0; 1652 return 0;
1623} 1653}
1654#endif /* CONFIG_NFSD_DEPRECATED */
1624 1655
1625/* 1656/*
1626 * Initialize the exports module. 1657 * Initialize the exports module.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 988cbb3a19b6..143da2eecd7b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@
41 41
42#define NFSPROC4_CB_NULL 0 42#define NFSPROC4_CB_NULL 0
43#define NFSPROC4_CB_COMPOUND 1 43#define NFSPROC4_CB_COMPOUND 1
44#define NFS4_STATEID_SIZE 16
45 44
46/* Index of predefined Linux callback client operations */ 45/* Index of predefined Linux callback client operations */
47 46
@@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
248} 247}
249 248
250static void 249static void
251encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, 250encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
252 struct nfs4_cb_compound_hdr *hdr) 251 struct nfs4_cb_compound_hdr *hdr)
253{ 252{
254 __be32 *p; 253 __be32 *p;
254 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
255 255
256 if (hdr->minorversion == 0) 256 if (hdr->minorversion == 0)
257 return; 257 return;
@@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
260 260
261 WRITE32(OP_CB_SEQUENCE); 261 WRITE32(OP_CB_SEQUENCE);
262 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); 262 WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
263 WRITE32(args->cbs_clp->cl_cb_seq_nr); 263 WRITE32(ses->se_cb_seq_nr);
264 WRITE32(0); /* slotid, always 0 */ 264 WRITE32(0); /* slotid, always 0 */
265 WRITE32(0); /* highest slotid always 0 */ 265 WRITE32(0); /* highest slotid always 0 */
266 WRITE32(0); /* cachethis always 0 */ 266 WRITE32(0); /* cachethis always 0 */
@@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
280 280
281static int 281static int
282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
283 struct nfs4_rpc_args *rpc_args) 283 struct nfsd4_callback *cb)
284{ 284{
285 struct xdr_stream xdr; 285 struct xdr_stream xdr;
286 struct nfs4_delegation *args = rpc_args->args_op; 286 struct nfs4_delegation *args = cb->cb_op;
287 struct nfs4_cb_compound_hdr hdr = { 287 struct nfs4_cb_compound_hdr hdr = {
288 .ident = args->dl_ident, 288 .ident = cb->cb_clp->cl_cb_ident,
289 .minorversion = rpc_args->args_seq.cbs_minorversion, 289 .minorversion = cb->cb_minorversion,
290 }; 290 };
291 291
292 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 292 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
293 encode_cb_compound_hdr(&xdr, &hdr); 293 encode_cb_compound_hdr(&xdr, &hdr);
294 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); 294 encode_cb_sequence(&xdr, cb, &hdr);
295 encode_cb_recall(&xdr, args, &hdr); 295 encode_cb_recall(&xdr, args, &hdr);
296 encode_cb_nops(&hdr); 296 encode_cb_nops(&hdr);
297 return 0; 297 return 0;
@@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
339 * with a single slot. 339 * with a single slot.
340 */ 340 */
341static int 341static int
342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, 342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
343 struct rpc_rqst *rqstp) 343 struct rpc_rqst *rqstp)
344{ 344{
345 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
345 struct nfs4_sessionid id; 346 struct nfs4_sessionid id;
346 int status; 347 int status;
347 u32 dummy; 348 u32 dummy;
348 __be32 *p; 349 __be32 *p;
349 350
350 if (res->cbs_minorversion == 0) 351 if (cb->cb_minorversion == 0)
351 return 0; 352 return 0;
352 353
353 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); 354 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
@@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
363 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 364 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
364 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 365 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
365 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 366 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
366 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, 367 if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
367 NFS4_MAX_SESSIONID_LEN)) {
368 dprintk("%s Invalid session id\n", __func__); 368 dprintk("%s Invalid session id\n", __func__);
369 goto out; 369 goto out;
370 } 370 }
371 READ32(dummy); 371 READ32(dummy);
372 if (dummy != res->cbs_clp->cl_cb_seq_nr) { 372 if (dummy != ses->se_cb_seq_nr) {
373 dprintk("%s Invalid sequence number\n", __func__); 373 dprintk("%s Invalid sequence number\n", __func__);
374 goto out; 374 goto out;
375 } 375 }
@@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
393 393
394static int 394static int
395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
396 struct nfsd4_cb_sequence *seq) 396 struct nfsd4_callback *cb)
397{ 397{
398 struct xdr_stream xdr; 398 struct xdr_stream xdr;
399 struct nfs4_cb_compound_hdr hdr; 399 struct nfs4_cb_compound_hdr hdr;
@@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
403 status = decode_cb_compound_hdr(&xdr, &hdr); 403 status = decode_cb_compound_hdr(&xdr, &hdr);
404 if (status) 404 if (status)
405 goto out; 405 goto out;
406 if (seq) { 406 if (cb) {
407 status = decode_cb_sequence(&xdr, seq, rqstp); 407 status = decode_cb_sequence(&xdr, cb, rqstp);
408 if (status) 408 if (status)
409 goto out; 409 goto out;
410 } 410 }
@@ -473,30 +473,34 @@ static int max_cb_time(void)
473/* Reference counting, callback cleanup, etc., all look racy as heck. 473/* Reference counting, callback cleanup, etc., all look racy as heck.
474 * And why is cl_cb_set an atomic? */ 474 * And why is cl_cb_set an atomic? */
475 475
476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
477{ 477{
478 struct rpc_timeout timeparms = { 478 struct rpc_timeout timeparms = {
479 .to_initval = max_cb_time(), 479 .to_initval = max_cb_time(),
480 .to_retries = 0, 480 .to_retries = 0,
481 }; 481 };
482 struct rpc_create_args args = { 482 struct rpc_create_args args = {
483 .protocol = XPRT_TRANSPORT_TCP, 483 .net = &init_net,
484 .address = (struct sockaddr *) &cb->cb_addr, 484 .address = (struct sockaddr *) &conn->cb_addr,
485 .addrsize = cb->cb_addrlen, 485 .addrsize = conn->cb_addrlen,
486 .timeout = &timeparms, 486 .timeout = &timeparms,
487 .program = &cb_program, 487 .program = &cb_program,
488 .prognumber = cb->cb_prog,
489 .version = 0, 488 .version = 0,
490 .authflavor = clp->cl_flavor, 489 .authflavor = clp->cl_flavor,
491 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 490 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
492 .client_name = clp->cl_principal,
493 }; 491 };
494 struct rpc_clnt *client; 492 struct rpc_clnt *client;
495 493
496 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 494 if (clp->cl_minorversion == 0) {
497 return -EINVAL; 495 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
498 if (cb->cb_minorversion) { 496 return -EINVAL;
499 args.bc_xprt = cb->cb_xprt; 497 args.client_name = clp->cl_principal;
498 args.prognumber = conn->cb_prog,
499 args.protocol = XPRT_TRANSPORT_TCP;
500 clp->cl_cb_ident = conn->cb_ident;
501 } else {
502 args.bc_xprt = conn->cb_xprt;
503 args.prognumber = clp->cl_cb_session->se_cb_prog;
500 args.protocol = XPRT_TRANSPORT_BC_TCP; 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
501 } 505 }
502 /* Create RPC client */ 506 /* Create RPC client */
@@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
506 PTR_ERR(client)); 510 PTR_ERR(client));
507 return PTR_ERR(client); 511 return PTR_ERR(client);
508 } 512 }
509 nfsd4_set_callback_client(clp, client); 513 clp->cl_cb_client = client;
510 return 0; 514 return 0;
511 515
512} 516}
@@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
519 523
520static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) 524static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
521{ 525{
522 struct nfs4_client *clp = calldata; 526 struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
523 527
524 if (task->tk_status) 528 if (task->tk_status)
525 warn_no_callback_path(clp, task->tk_status); 529 warn_no_callback_path(clp, task->tk_status);
@@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
528} 532}
529 533
530static const struct rpc_call_ops nfsd4_cb_probe_ops = { 534static const struct rpc_call_ops nfsd4_cb_probe_ops = {
535 /* XXX: release method to ensure we set the cb channel down if
536 * necessary on early failure? */
531 .rpc_call_done = nfsd4_cb_probe_done, 537 .rpc_call_done = nfsd4_cb_probe_done,
532}; 538};
533 539
@@ -543,38 +549,42 @@ int set_callback_cred(void)
543 return 0; 549 return 0;
544} 550}
545 551
552static struct workqueue_struct *callback_wq;
546 553
547void do_probe_callback(struct nfs4_client *clp) 554static void do_probe_callback(struct nfs4_client *clp)
548{ 555{
549 struct rpc_message msg = { 556 struct nfsd4_callback *cb = &clp->cl_cb_null;
550 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
551 .rpc_argp = clp,
552 .rpc_cred = callback_cred
553 };
554 int status;
555 557
556 status = rpc_call_async(clp->cl_cb_client, &msg, 558 cb->cb_op = NULL;
557 RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 559 cb->cb_clp = clp;
558 &nfsd4_cb_probe_ops, (void *)clp); 560
559 if (status) 561 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
560 warn_no_callback_path(clp, status); 562 cb->cb_msg.rpc_argp = NULL;
563 cb->cb_msg.rpc_resp = NULL;
564 cb->cb_msg.rpc_cred = callback_cred;
565
566 cb->cb_ops = &nfsd4_cb_probe_ops;
567
568 queue_work(callback_wq, &cb->cb_work);
561} 569}
562 570
563/* 571/*
564 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... 572 * Poke the callback thread to process any updates to the callback
573 * parameters, and send a null probe.
565 */ 574 */
566void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 575void nfsd4_probe_callback(struct nfs4_client *clp)
567{ 576{
568 int status; 577 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
578 do_probe_callback(clp);
579}
569 580
581void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
582{
570 BUG_ON(atomic_read(&clp->cl_cb_set)); 583 BUG_ON(atomic_read(&clp->cl_cb_set));
571 584
572 status = setup_callback_client(clp, cb); 585 spin_lock(&clp->cl_lock);
573 if (status) { 586 memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
574 warn_no_callback_path(clp, status); 587 spin_unlock(&clp->cl_lock);
575 return;
576 }
577 do_probe_callback(clp);
578} 588}
579 589
580/* 590/*
@@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
585static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, 595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
586 struct rpc_task *task) 596 struct rpc_task *task)
587{ 597{
588 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 598 u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data;
589 u32 *ptr = (u32 *)clp->cl_sessionid.data;
590 int status = 0; 599 int status = 0;
591 600
592 dprintk("%s: %u:%u:%u:%u\n", __func__, 601 dprintk("%s: %u:%u:%u:%u\n", __func__,
@@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
598 status = -EAGAIN; 607 status = -EAGAIN;
599 goto out; 608 goto out;
600 } 609 }
601
602 /*
603 * We'll need the clp during XDR encoding and decoding,
604 * and the sequence during decoding to verify the reply
605 */
606 args->args_seq.cbs_clp = clp;
607 task->tk_msg.rpc_resp = &args->args_seq;
608
609out: 610out:
610 dprintk("%s status=%d\n", __func__, status); 611 dprintk("%s status=%d\n", __func__, status);
611 return status; 612 return status;
@@ -617,13 +618,13 @@ out:
617 */ 618 */
618static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) 619static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
619{ 620{
620 struct nfs4_delegation *dp = calldata; 621 struct nfsd4_callback *cb = calldata;
622 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
621 struct nfs4_client *clp = dp->dl_client; 623 struct nfs4_client *clp = dp->dl_client;
622 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 624 u32 minorversion = clp->cl_minorversion;
623 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
624 int status = 0; 625 int status = 0;
625 626
626 args->args_seq.cbs_minorversion = minorversion; 627 cb->cb_minorversion = minorversion;
627 if (minorversion) { 628 if (minorversion) {
628 status = nfsd41_cb_setup_sequence(clp, task); 629 status = nfsd41_cb_setup_sequence(clp, task);
629 if (status) { 630 if (status) {
@@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
640 641
641static void nfsd4_cb_done(struct rpc_task *task, void *calldata) 642static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
642{ 643{
643 struct nfs4_delegation *dp = calldata; 644 struct nfsd4_callback *cb = calldata;
645 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
644 struct nfs4_client *clp = dp->dl_client; 646 struct nfs4_client *clp = dp->dl_client;
645 647
646 dprintk("%s: minorversion=%d\n", __func__, 648 dprintk("%s: minorversion=%d\n", __func__,
647 clp->cl_cb_conn.cb_minorversion); 649 clp->cl_minorversion);
648 650
649 if (clp->cl_cb_conn.cb_minorversion) { 651 if (clp->cl_minorversion) {
650 /* No need for lock, access serialized in nfsd4_cb_prepare */ 652 /* No need for lock, access serialized in nfsd4_cb_prepare */
651 ++clp->cl_cb_seq_nr; 653 ++clp->cl_cb_session->se_cb_seq_nr;
652 clear_bit(0, &clp->cl_cb_slot_busy); 654 clear_bit(0, &clp->cl_cb_slot_busy);
653 rpc_wake_up_next(&clp->cl_cb_waitq); 655 rpc_wake_up_next(&clp->cl_cb_waitq);
654 dprintk("%s: freed slot, new seqid=%d\n", __func__, 656 dprintk("%s: freed slot, new seqid=%d\n", __func__,
655 clp->cl_cb_seq_nr); 657 clp->cl_cb_session->se_cb_seq_nr);
656 658
657 /* We're done looking into the sequence information */ 659 /* We're done looking into the sequence information */
658 task->tk_msg.rpc_resp = NULL; 660 task->tk_msg.rpc_resp = NULL;
@@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
662 664
663static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 665static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
664{ 666{
665 struct nfs4_delegation *dp = calldata; 667 struct nfsd4_callback *cb = calldata;
668 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
666 struct nfs4_client *clp = dp->dl_client; 669 struct nfs4_client *clp = dp->dl_client;
667 struct rpc_clnt *current_rpc_client = clp->cl_cb_client; 670 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
668 671
@@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
707 710
708static void nfsd4_cb_recall_release(void *calldata) 711static void nfsd4_cb_recall_release(void *calldata)
709{ 712{
710 struct nfs4_delegation *dp = calldata; 713 struct nfsd4_callback *cb = calldata;
714 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
711 715
712 nfs4_put_delegation(dp); 716 nfs4_put_delegation(dp);
713} 717}
@@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
718 .rpc_release = nfsd4_cb_recall_release, 722 .rpc_release = nfsd4_cb_recall_release,
719}; 723};
720 724
721static struct workqueue_struct *callback_wq;
722
723int nfsd4_create_callback_queue(void) 725int nfsd4_create_callback_queue(void)
724{ 726{
725 callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); 727 callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
@@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void)
734} 736}
735 737
736/* must be called under the state lock */ 738/* must be called under the state lock */
737void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) 739void nfsd4_shutdown_callback(struct nfs4_client *clp)
738{ 740{
739 struct rpc_clnt *old = clp->cl_cb_client; 741 set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
740
741 clp->cl_cb_client = new;
742 /* 742 /*
743 * After this, any work that saw the old value of cl_cb_client will 743 * Note this won't actually result in a null callback;
744 * be gone: 744 * instead, nfsd4_do_callback_rpc() will detect the killed
745 * client, destroy the rpc client, and stop:
745 */ 746 */
747 do_probe_callback(clp);
746 flush_workqueue(callback_wq); 748 flush_workqueue(callback_wq);
747 /* So we can safely shut it down: */
748 if (old)
749 rpc_shutdown_client(old);
750} 749}
751 750
752/* 751void nfsd4_release_cb(struct nfsd4_callback *cb)
753 * called with dp->dl_count inc'ed.
754 */
755static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
756{ 752{
757 struct nfs4_client *clp = dp->dl_client; 753 if (cb->cb_ops->rpc_release)
758 struct rpc_clnt *clnt = clp->cl_cb_client; 754 cb->cb_ops->rpc_release(cb);
759 struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; 755}
760 struct rpc_message msg = {
761 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
762 .rpc_cred = callback_cred
763 };
764 756
765 if (clnt == NULL) { 757void nfsd4_process_cb_update(struct nfsd4_callback *cb)
766 nfs4_put_delegation(dp); 758{
767 return; /* Client is shutting down; give up. */ 759 struct nfs4_cb_conn conn;
760 struct nfs4_client *clp = cb->cb_clp;
761 int err;
762
763 /*
764 * This is either an update, or the client dying; in either case,
765 * kill the old client:
766 */
767 if (clp->cl_cb_client) {
768 rpc_shutdown_client(clp->cl_cb_client);
769 clp->cl_cb_client = NULL;
768 } 770 }
771 if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
772 return;
773 spin_lock(&clp->cl_lock);
774 /*
775 * Only serialized callback code is allowed to clear these
776 * flags; main nfsd code can only set them:
777 */
778 BUG_ON(!clp->cl_cb_flags);
779 clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
780 memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
781 spin_unlock(&clp->cl_lock);
769 782
770 args->args_op = dp; 783 err = setup_callback_client(clp, &conn);
771 msg.rpc_argp = args; 784 if (err)
772 dp->dl_retries = 1; 785 warn_no_callback_path(clp, err);
773 rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp);
774} 786}
775 787
776void nfsd4_do_callback_rpc(struct work_struct *w) 788void nfsd4_do_callback_rpc(struct work_struct *w)
777{ 789{
778 /* XXX: for now, just send off delegation recall. */ 790 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
779 /* In future, generalize to handle any sort of callback. */ 791 struct nfs4_client *clp = cb->cb_clp;
780 struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); 792 struct rpc_clnt *clnt;
781 struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
782 793
783 _nfsd4_cb_recall(dp); 794 if (clp->cl_cb_flags)
784} 795 nfsd4_process_cb_update(cb);
785 796
797 clnt = clp->cl_cb_client;
798 if (!clnt) {
799 /* Callback channel broken, or client killed; give up: */
800 nfsd4_release_cb(cb);
801 return;
802 }
803 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
804 cb->cb_ops, cb);
805}
786 806
787void nfsd4_cb_recall(struct nfs4_delegation *dp) 807void nfsd4_cb_recall(struct nfs4_delegation *dp)
788{ 808{
809 struct nfsd4_callback *cb = &dp->dl_recall;
810
811 dp->dl_retries = 1;
812 cb->cb_op = dp;
813 cb->cb_clp = dp->dl_client;
814 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
815 cb->cb_msg.rpc_argp = cb;
816 cb->cb_msg.rpc_resp = cb;
817 cb->cb_msg.rpc_cred = callback_cred;
818
819 cb->cb_ops = &nfsd4_cb_recall_ops;
820 dp->dl_retries = 1;
821
789 queue_work(callback_wq, &dp->dl_recall.cb_work); 822 queue_work(callback_wq, &dp->dl_recall.cb_work);
790} 823}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c78dbf493424..f0695e815f0e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void)
482 cache_unregister(&nametoid_cache); 482 cache_unregister(&nametoid_cache);
483} 483}
484 484
485/*
486 * Deferred request handling
487 */
488
489struct idmap_defer_req {
490 struct cache_req req;
491 struct cache_deferred_req deferred_req;
492 wait_queue_head_t waitq;
493 atomic_t count;
494};
495
496static inline void
497put_mdr(struct idmap_defer_req *mdr)
498{
499 if (atomic_dec_and_test(&mdr->count))
500 kfree(mdr);
501}
502
503static inline void
504get_mdr(struct idmap_defer_req *mdr)
505{
506 atomic_inc(&mdr->count);
507}
508
509static void
510idmap_revisit(struct cache_deferred_req *dreq, int toomany)
511{
512 struct idmap_defer_req *mdr =
513 container_of(dreq, struct idmap_defer_req, deferred_req);
514
515 wake_up(&mdr->waitq);
516 put_mdr(mdr);
517}
518
519static struct cache_deferred_req *
520idmap_defer(struct cache_req *req)
521{
522 struct idmap_defer_req *mdr =
523 container_of(req, struct idmap_defer_req, req);
524
525 mdr->deferred_req.revisit = idmap_revisit;
526 get_mdr(mdr);
527 return (&mdr->deferred_req);
528}
529
530static inline int
531do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
532 struct cache_detail *detail, struct ent **item,
533 struct idmap_defer_req *mdr)
534{
535 *item = lookup_fn(key);
536 if (!*item)
537 return -ENOMEM;
538 return cache_check(detail, &(*item)->h, &mdr->req);
539}
540
541static inline int
542do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
543 struct ent *key, struct cache_detail *detail,
544 struct ent **item)
545{
546 int ret = -ENOMEM;
547
548 *item = lookup_fn(key);
549 if (!*item)
550 goto out_err;
551 ret = -ETIMEDOUT;
552 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
553 || (*item)->h.expiry_time < get_seconds()
554 || detail->flush_time > (*item)->h.last_refresh)
555 goto out_put;
556 ret = -ENOENT;
557 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
558 goto out_put;
559 return 0;
560out_put:
561 cache_put(&(*item)->h, detail);
562out_err:
563 *item = NULL;
564 return ret;
565}
566
567static int 485static int
568idmap_lookup(struct svc_rqst *rqstp, 486idmap_lookup(struct svc_rqst *rqstp,
569 struct ent *(*lookup_fn)(struct ent *), struct ent *key, 487 struct ent *(*lookup_fn)(struct ent *), struct ent *key,
570 struct cache_detail *detail, struct ent **item) 488 struct cache_detail *detail, struct ent **item)
571{ 489{
572 struct idmap_defer_req *mdr;
573 int ret; 490 int ret;
574 491
575 mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); 492 *item = lookup_fn(key);
576 if (!mdr) 493 if (!*item)
577 return -ENOMEM; 494 return -ENOMEM;
578 atomic_set(&mdr->count, 1); 495 retry:
579 init_waitqueue_head(&mdr->waitq); 496 ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
580 mdr->req.defer = idmap_defer; 497
581 ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); 498 if (ret == -ETIMEDOUT) {
582 if (ret == -EAGAIN) { 499 struct ent *prev_item = *item;
583 wait_event_interruptible_timeout(mdr->waitq, 500 *item = lookup_fn(key);
584 test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); 501 if (*item != prev_item)
585 ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); 502 goto retry;
503 cache_put(&(*item)->h, detail);
586 } 504 }
587 put_mdr(mdr);
588 return ret; 505 return ret;
589} 506}
590 507
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 59ec449b0c7f..0cdfd022bb7b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1031 resp->cstate.session = NULL; 1031 resp->cstate.session = NULL;
1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
1034 /* Use the deferral mechanism only for NFSv4.0 compounds */ 1034 /*
1035 rqstp->rq_usedeferral = (args->minorversion == 0); 1035 * Don't use the deferral mechanism for NFSv4; compounds make it
1036 * too hard to avoid non-idempotency problems.
1037 */
1038 rqstp->rq_usedeferral = 0;
1036 1039
1037 /* 1040 /*
1038 * According to RFC3010, this takes precedence over all other errors. 1041 * According to RFC3010, this takes precedence over all other errors.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7292fcf7718..56347e0ac88d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
207{ 207{
208 struct nfs4_delegation *dp; 208 struct nfs4_delegation *dp;
209 struct nfs4_file *fp = stp->st_file; 209 struct nfs4_file *fp = stp->st_file;
210 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
211 210
212 dprintk("NFSD alloc_init_deleg\n"); 211 dprintk("NFSD alloc_init_deleg\n");
213 /* 212 /*
@@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
234 nfs4_file_get_access(fp, O_RDONLY); 233 nfs4_file_get_access(fp, O_RDONLY);
235 dp->dl_flock = NULL; 234 dp->dl_flock = NULL;
236 dp->dl_type = type; 235 dp->dl_type = type;
237 dp->dl_ident = cb->cb_ident;
238 dp->dl_stateid.si_boot = boot_time; 236 dp->dl_stateid.si_boot = boot_time;
239 dp->dl_stateid.si_stateownerid = current_delegid++; 237 dp->dl_stateid.si_stateownerid = current_delegid++;
240 dp->dl_stateid.si_fileid = 0; 238 dp->dl_stateid.si_fileid = 0;
@@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses)
535 */ 533 */
536#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) 534#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
537 535
536static void
537free_session_slots(struct nfsd4_session *ses)
538{
539 int i;
540
541 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
542 kfree(ses->se_slots[i]);
543}
544
538/* 545/*
539 * Give the client the number of ca_maxresponsesize_cached slots it 546 * We don't actually need to cache the rpc and session headers, so we
540 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, 547 * can allocate a little less for each slot:
541 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more 548 */
542 * than NFSD_MAX_SLOTS_PER_SESSION. 549static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
543 * 550{
544 * If we run out of reserved DRC memory we should (up to a point) 551 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
552}
553
554static int nfsd4_sanitize_slot_size(u32 size)
555{
556 size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
557 size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
558
559 return size;
560}
561
562/*
563 * XXX: If we run out of reserved DRC memory we could (up to a point)
545 * re-negotiate active sessions and reduce their slot usage to make 564 * re-negotiate active sessions and reduce their slot usage to make
546 * rooom for new connections. For now we just fail the create session. 565 * rooom for new connections. For now we just fail the create session.
547 */ 566 */
548static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) 567static int nfsd4_get_drc_mem(int slotsize, u32 num)
549{ 568{
550 int mem, size = fchan->maxresp_cached; 569 int avail;
551 570
552 if (fchan->maxreqs < 1) 571 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
553 return nfserr_inval;
554 572
555 if (size < NFSD_MIN_HDR_SEQ_SZ) 573 spin_lock(&nfsd_drc_lock);
556 size = NFSD_MIN_HDR_SEQ_SZ; 574 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
557 size -= NFSD_MIN_HDR_SEQ_SZ; 575 nfsd_drc_max_mem - nfsd_drc_mem_used);
558 if (size > NFSD_SLOT_CACHE_SIZE) 576 num = min_t(int, num, avail / slotsize);
559 size = NFSD_SLOT_CACHE_SIZE; 577 nfsd_drc_mem_used += num * slotsize;
560 578 spin_unlock(&nfsd_drc_lock);
561 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
562 mem = fchan->maxreqs * size;
563 if (mem > NFSD_MAX_MEM_PER_SESSION) {
564 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
565 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
566 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
567 mem = fchan->maxreqs * size;
568 }
569 579
580 return num;
581}
582
583static void nfsd4_put_drc_mem(int slotsize, int num)
584{
570 spin_lock(&nfsd_drc_lock); 585 spin_lock(&nfsd_drc_lock);
571 /* bound the total session drc memory ussage */ 586 nfsd_drc_mem_used -= slotsize * num;
572 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
573 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
574 mem = fchan->maxreqs * size;
575 }
576 nfsd_drc_mem_used += mem;
577 spin_unlock(&nfsd_drc_lock); 587 spin_unlock(&nfsd_drc_lock);
588}
578 589
579 if (fchan->maxreqs == 0) 590static struct nfsd4_session *alloc_session(int slotsize, int numslots)
580 return nfserr_jukebox; 591{
592 struct nfsd4_session *new;
593 int mem, i;
581 594
582 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; 595 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
583 return 0; 596 + sizeof(struct nfsd4_session) > PAGE_SIZE);
597 mem = numslots * sizeof(struct nfsd4_slot *);
598
599 new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
600 if (!new)
601 return NULL;
602 /* allocate each struct nfsd4_slot and data cache in one piece */
603 for (i = 0; i < numslots; i++) {
604 mem = sizeof(struct nfsd4_slot) + slotsize;
605 new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
606 if (!new->se_slots[i])
607 goto out_free;
608 }
609 return new;
610out_free:
611 while (i--)
612 kfree(new->se_slots[i]);
613 kfree(new);
614 return NULL;
584} 615}
585 616
586/* 617static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
587 * fchan holds the client values on input, and the server values on output
588 * sv_max_mesg is the maximum payload plus one page for overhead.
589 */
590static int init_forechannel_attrs(struct svc_rqst *rqstp,
591 struct nfsd4_channel_attrs *session_fchan,
592 struct nfsd4_channel_attrs *fchan)
593{ 618{
594 int status = 0; 619 u32 maxrpc = nfsd_serv->sv_max_mesg;
595 __u32 maxcount = nfsd_serv->sv_max_mesg;
596 620
597 /* headerpadsz set to zero in encode routine */ 621 new->maxreqs = numslots;
622 new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ;
623 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
624 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
625 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
626}
598 627
599 /* Use the client's max request and max response size if possible */ 628static void free_conn(struct nfsd4_conn *c)
600 if (fchan->maxreq_sz > maxcount) 629{
601 fchan->maxreq_sz = maxcount; 630 svc_xprt_put(c->cn_xprt);
602 session_fchan->maxreq_sz = fchan->maxreq_sz; 631 kfree(c);
632}
603 633
604 if (fchan->maxresp_sz > maxcount) 634static void nfsd4_conn_lost(struct svc_xpt_user *u)
605 fchan->maxresp_sz = maxcount; 635{
606 session_fchan->maxresp_sz = fchan->maxresp_sz; 636 struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
637 struct nfs4_client *clp = c->cn_session->se_client;
607 638
608 /* Use the client's maxops if possible */ 639 spin_lock(&clp->cl_lock);
609 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 640 if (!list_empty(&c->cn_persession)) {
610 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 641 list_del(&c->cn_persession);
611 session_fchan->maxops = fchan->maxops; 642 free_conn(c);
643 }
644 spin_unlock(&clp->cl_lock);
645}
612 646
613 /* FIXME: Error means no more DRC pages so the server should 647static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
614 * recover pages from existing sessions. For now fail session 648{
615 * creation. 649 struct nfsd4_conn *conn;
616 */
617 status = set_forechannel_drc_size(fchan);
618 650
619 session_fchan->maxresp_cached = fchan->maxresp_cached; 651 conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
620 session_fchan->maxreqs = fchan->maxreqs; 652 if (!conn)
653 return NULL;
654 svc_xprt_get(rqstp->rq_xprt);
655 conn->cn_xprt = rqstp->rq_xprt;
656 conn->cn_flags = flags;
657 INIT_LIST_HEAD(&conn->cn_xpt_user.list);
658 return conn;
659}
621 660
622 dprintk("%s status %d\n", __func__, status); 661static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
623 return status; 662{
663 conn->cn_session = ses;
664 list_add(&conn->cn_persession, &ses->se_conns);
624} 665}
625 666
626static void 667static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
627free_session_slots(struct nfsd4_session *ses)
628{ 668{
629 int i; 669 struct nfs4_client *clp = ses->se_client;
630 670
631 for (i = 0; i < ses->se_fchannel.maxreqs; i++) 671 spin_lock(&clp->cl_lock);
632 kfree(ses->se_slots[i]); 672 __nfsd4_hash_conn(conn, ses);
673 spin_unlock(&clp->cl_lock);
633} 674}
634 675
635/* 676static void nfsd4_register_conn(struct nfsd4_conn *conn)
636 * We don't actually need to cache the rpc and session headers, so we
637 * can allocate a little less for each slot:
638 */
639static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
640{ 677{
641 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 678 conn->cn_xpt_user.callback = nfsd4_conn_lost;
679 register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
642} 680}
643 681
644static int 682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
645alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
646 struct nfsd4_create_session *cses)
647{ 683{
648 struct nfsd4_session *new, tmp; 684 struct nfsd4_conn *conn;
649 struct nfsd4_slot *sp; 685 u32 flags = NFS4_CDFC4_FORE;
650 int idx, slotsize, cachesize, i;
651 int status;
652 686
653 memset(&tmp, 0, sizeof(tmp)); 687 if (ses->se_flags & SESSION4_BACK_CHAN)
688 flags |= NFS4_CDFC4_BACK;
689 conn = alloc_conn(rqstp, flags);
690 if (!conn)
691 return nfserr_jukebox;
692 nfsd4_hash_conn(conn, ses);
693 nfsd4_register_conn(conn);
694 return nfs_ok;
695}
654 696
655 /* FIXME: For now, we just accept the client back channel attributes. */ 697static void nfsd4_del_conns(struct nfsd4_session *s)
656 tmp.se_bchannel = cses->back_channel; 698{
657 status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, 699 struct nfs4_client *clp = s->se_client;
658 &cses->fore_channel); 700 struct nfsd4_conn *c;
659 if (status)
660 goto out;
661 701
662 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) 702 spin_lock(&clp->cl_lock);
663 + sizeof(struct nfsd4_session) > PAGE_SIZE); 703 while (!list_empty(&s->se_conns)) {
704 c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
705 list_del_init(&c->cn_persession);
706 spin_unlock(&clp->cl_lock);
664 707
665 status = nfserr_jukebox; 708 unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
666 /* allocate struct nfsd4_session and slot table pointers in one piece */ 709 free_conn(c);
667 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
668 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
669 if (!new)
670 goto out;
671 710
672 memcpy(new, &tmp, sizeof(*new)); 711 spin_lock(&clp->cl_lock);
712 }
713 spin_unlock(&clp->cl_lock);
714}
673 715
674 /* allocate each struct nfsd4_slot and data cache in one piece */ 716void free_session(struct kref *kref)
675 cachesize = slot_bytes(&new->se_fchannel); 717{
676 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 718 struct nfsd4_session *ses;
677 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 719 int mem;
678 if (!sp) 720
679 goto out_free; 721 ses = container_of(kref, struct nfsd4_session, se_ref);
680 new->se_slots[i] = sp; 722 nfsd4_del_conns(ses);
723 spin_lock(&nfsd_drc_lock);
724 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
725 nfsd_drc_mem_used -= mem;
726 spin_unlock(&nfsd_drc_lock);
727 free_session_slots(ses);
728 kfree(ses);
729}
730
731static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
732{
733 struct nfsd4_session *new;
734 struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
735 int numslots, slotsize;
736 int status;
737 int idx;
738
739 /*
740 * Note decreasing slot size below client's request may
741 * make it difficult for client to function correctly, whereas
742 * decreasing the number of slots will (just?) affect
743 * performance. When short on memory we therefore prefer to
744 * decrease number of slots instead of their size.
745 */
746 slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
747 numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
748
749 new = alloc_session(slotsize, numslots);
750 if (!new) {
751 nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
752 return NULL;
681 } 753 }
754 init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
682 755
683 new->se_client = clp; 756 new->se_client = clp;
684 gen_sessionid(new); 757 gen_sessionid(new);
685 idx = hash_sessionid(&new->se_sessionid);
686 memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
687 NFS4_MAX_SESSIONID_LEN);
688 758
759 INIT_LIST_HEAD(&new->se_conns);
760
761 new->se_cb_seq_nr = 1;
689 new->se_flags = cses->flags; 762 new->se_flags = cses->flags;
763 new->se_cb_prog = cses->callback_prog;
690 kref_init(&new->se_ref); 764 kref_init(&new->se_ref);
765 idx = hash_sessionid(&new->se_sessionid);
691 spin_lock(&client_lock); 766 spin_lock(&client_lock);
692 list_add(&new->se_hash, &sessionid_hashtbl[idx]); 767 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
693 list_add(&new->se_perclnt, &clp->cl_sessions); 768 list_add(&new->se_perclnt, &clp->cl_sessions);
694 spin_unlock(&client_lock); 769 spin_unlock(&client_lock);
695 770
696 status = nfs_ok; 771 status = nfsd4_new_conn(rqstp, new);
697out: 772 /* whoops: benny points out, status is ignored! (err, or bogus) */
698 return status; 773 if (status) {
699out_free: 774 free_session(&new->se_ref);
700 free_session_slots(new); 775 return NULL;
701 kfree(new); 776 }
702 goto out; 777 if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) {
778 struct sockaddr *sa = svc_addr(rqstp);
779
780 clp->cl_cb_session = new;
781 clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
782 svc_xprt_get(rqstp->rq_xprt);
783 rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
784 clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
785 nfsd4_probe_callback(clp);
786 }
787 return new;
703} 788}
704 789
705/* caller must hold client_lock */ 790/* caller must hold client_lock */
@@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses)
731 list_del(&ses->se_perclnt); 816 list_del(&ses->se_perclnt);
732} 817}
733 818
734void
735free_session(struct kref *kref)
736{
737 struct nfsd4_session *ses;
738 int mem;
739
740 ses = container_of(kref, struct nfsd4_session, se_ref);
741 spin_lock(&nfsd_drc_lock);
742 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
743 nfsd_drc_mem_used -= mem;
744 spin_unlock(&nfsd_drc_lock);
745 free_session_slots(ses);
746 kfree(ses);
747}
748
749/* must be called under the client_lock */ 819/* must be called under the client_lock */
750static inline void 820static inline void
751renew_client_locked(struct nfs4_client *clp) 821renew_client_locked(struct nfs4_client *clp)
@@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
812static inline void 882static inline void
813free_client(struct nfs4_client *clp) 883free_client(struct nfs4_client *clp)
814{ 884{
885 while (!list_empty(&clp->cl_sessions)) {
886 struct nfsd4_session *ses;
887 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
888 se_perclnt);
889 list_del(&ses->se_perclnt);
890 nfsd4_put_session(ses);
891 }
815 if (clp->cl_cred.cr_group_info) 892 if (clp->cl_cred.cr_group_info)
816 put_group_info(clp->cl_cred.cr_group_info); 893 put_group_info(clp->cl_cred.cr_group_info);
817 kfree(clp->cl_principal); 894 kfree(clp->cl_principal);
@@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session)
838static inline void 915static inline void
839unhash_client_locked(struct nfs4_client *clp) 916unhash_client_locked(struct nfs4_client *clp)
840{ 917{
918 struct nfsd4_session *ses;
919
841 mark_client_expired(clp); 920 mark_client_expired(clp);
842 list_del(&clp->cl_lru); 921 list_del(&clp->cl_lru);
843 while (!list_empty(&clp->cl_sessions)) { 922 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
844 struct nfsd4_session *ses; 923 list_del_init(&ses->se_hash);
845 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
846 se_perclnt);
847 unhash_session(ses);
848 nfsd4_put_session(ses);
849 }
850} 924}
851 925
852static void 926static void
@@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp)
875 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 949 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
876 release_openowner(sop); 950 release_openowner(sop);
877 } 951 }
878 nfsd4_set_callback_client(clp, NULL); 952 nfsd4_shutdown_callback(clp);
879 if (clp->cl_cb_conn.cb_xprt) 953 if (clp->cl_cb_conn.cb_xprt)
880 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 954 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
881 list_del(&clp->cl_idhash); 955 list_del(&clp->cl_idhash);
@@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
960 if (clp == NULL) 1034 if (clp == NULL)
961 return NULL; 1035 return NULL;
962 1036
1037 INIT_LIST_HEAD(&clp->cl_sessions);
1038
963 princ = svc_gss_principal(rqstp); 1039 princ = svc_gss_principal(rqstp);
964 if (princ) { 1040 if (princ) {
965 clp->cl_principal = kstrdup(princ, GFP_KERNEL); 1041 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
@@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
976 INIT_LIST_HEAD(&clp->cl_strhash); 1052 INIT_LIST_HEAD(&clp->cl_strhash);
977 INIT_LIST_HEAD(&clp->cl_openowners); 1053 INIT_LIST_HEAD(&clp->cl_openowners);
978 INIT_LIST_HEAD(&clp->cl_delegations); 1054 INIT_LIST_HEAD(&clp->cl_delegations);
979 INIT_LIST_HEAD(&clp->cl_sessions);
980 INIT_LIST_HEAD(&clp->cl_lru); 1055 INIT_LIST_HEAD(&clp->cl_lru);
1056 spin_lock_init(&clp->cl_lock);
1057 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
981 clp->cl_time = get_seconds(); 1058 clp->cl_time = get_seconds();
982 clear_bit(0, &clp->cl_cb_slot_busy); 1059 clear_bit(0, &clp->cl_cb_slot_busy);
983 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1060 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
@@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
986 clp->cl_flavor = rqstp->rq_flavor; 1063 clp->cl_flavor = rqstp->rq_flavor;
987 copy_cred(&clp->cl_cred, &rqstp->rq_cred); 1064 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
988 gen_confirm(clp); 1065 gen_confirm(clp);
989 1066 clp->cl_cb_session = NULL;
990 return clp; 1067 return clp;
991} 1068}
992 1069
@@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
1098static void 1175static void
1099gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) 1176gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1100{ 1177{
1101 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 1178 struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
1102 unsigned short expected_family; 1179 unsigned short expected_family;
1103 1180
1104 /* Currently, we only support tcp and tcp6 for the callback channel */ 1181 /* Currently, we only support tcp and tcp6 for the callback channel */
@@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1111 else 1188 else
1112 goto out_err; 1189 goto out_err;
1113 1190
1114 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 1191 conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
1115 se->se_callback_addr_len, 1192 se->se_callback_addr_len,
1116 (struct sockaddr *) &cb->cb_addr, 1193 (struct sockaddr *)&conn->cb_addr,
1117 sizeof(cb->cb_addr)); 1194 sizeof(conn->cb_addr));
1118 1195
1119 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) 1196 if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
1120 goto out_err; 1197 goto out_err;
1121 1198
1122 if (cb->cb_addr.ss_family == AF_INET6) 1199 if (conn->cb_addr.ss_family == AF_INET6)
1123 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; 1200 ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
1124 1201
1125 cb->cb_minorversion = 0; 1202 conn->cb_prog = se->se_callback_prog;
1126 cb->cb_prog = se->se_callback_prog; 1203 conn->cb_ident = se->se_callback_ident;
1127 cb->cb_ident = se->se_callback_ident;
1128 return; 1204 return;
1129out_err: 1205out_err:
1130 cb->cb_addr.ss_family = AF_UNSPEC; 1206 conn->cb_addr.ss_family = AF_UNSPEC;
1131 cb->cb_addrlen = 0; 1207 conn->cb_addrlen = 0;
1132 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1208 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
1133 "will not receive delegations\n", 1209 "will not receive delegations\n",
1134 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1210 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1415{ 1491{
1416 struct sockaddr *sa = svc_addr(rqstp); 1492 struct sockaddr *sa = svc_addr(rqstp);
1417 struct nfs4_client *conf, *unconf; 1493 struct nfs4_client *conf, *unconf;
1494 struct nfsd4_session *new;
1418 struct nfsd4_clid_slot *cs_slot = NULL; 1495 struct nfsd4_clid_slot *cs_slot = NULL;
1496 bool confirm_me = false;
1419 int status = 0; 1497 int status = 0;
1420 1498
1421 nfs4_lock_state(); 1499 nfs4_lock_state();
@@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1438 cs_slot->sl_seqid, cr_ses->seqid); 1516 cs_slot->sl_seqid, cr_ses->seqid);
1439 goto out; 1517 goto out;
1440 } 1518 }
1441 cs_slot->sl_seqid++;
1442 } else if (unconf) { 1519 } else if (unconf) {
1443 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1520 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1444 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 1521 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1451 if (status) { 1528 if (status) {
1452 /* an unconfirmed replay returns misordered */ 1529 /* an unconfirmed replay returns misordered */
1453 status = nfserr_seq_misordered; 1530 status = nfserr_seq_misordered;
1454 goto out_cache; 1531 goto out;
1455 } 1532 }
1456 1533
1457 cs_slot->sl_seqid++; /* from 0 to 1 */ 1534 confirm_me = true;
1458 move_to_confirmed(unconf);
1459
1460 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1461 unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
1462 svc_xprt_get(rqstp->rq_xprt);
1463 rpc_copy_addr(
1464 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1465 sa);
1466 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1467 unconf->cl_cb_conn.cb_minorversion =
1468 cstate->minorversion;
1469 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1470 unconf->cl_cb_seq_nr = 1;
1471 nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
1472 }
1473 conf = unconf; 1535 conf = unconf;
1474 } else { 1536 } else {
1475 status = nfserr_stale_clientid; 1537 status = nfserr_stale_clientid;
@@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1477 } 1539 }
1478 1540
1479 /* 1541 /*
1542 * XXX: we should probably set this at creation time, and check
1543 * for consistent minorversion use throughout:
1544 */
1545 conf->cl_minorversion = 1;
1546 /*
1480 * We do not support RDMA or persistent sessions 1547 * We do not support RDMA or persistent sessions
1481 */ 1548 */
1482 cr_ses->flags &= ~SESSION4_PERSIST; 1549 cr_ses->flags &= ~SESSION4_PERSIST;
1483 cr_ses->flags &= ~SESSION4_RDMA; 1550 cr_ses->flags &= ~SESSION4_RDMA;
1484 1551
1485 status = alloc_init_session(rqstp, conf, cr_ses); 1552 status = nfserr_jukebox;
1486 if (status) 1553 new = alloc_init_session(rqstp, conf, cr_ses);
1554 if (!new)
1487 goto out; 1555 goto out;
1488 1556 status = nfs_ok;
1489 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1557 memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
1490 NFS4_MAX_SESSIONID_LEN); 1558 NFS4_MAX_SESSIONID_LEN);
1559 cs_slot->sl_seqid++;
1491 cr_ses->seqid = cs_slot->sl_seqid; 1560 cr_ses->seqid = cs_slot->sl_seqid;
1492 1561
1493out_cache:
1494 /* cache solo and embedded create sessions under the state lock */ 1562 /* cache solo and embedded create sessions under the state lock */
1495 nfsd4_cache_create_session(cr_ses, cs_slot, status); 1563 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1564 if (confirm_me)
1565 move_to_confirmed(conf);
1496out: 1566out:
1497 nfs4_unlock_state(); 1567 nfs4_unlock_state();
1498 dprintk("%s returns %d\n", __func__, ntohl(status)); 1568 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r,
1546 1616
1547 nfs4_lock_state(); 1617 nfs4_lock_state();
1548 /* wait for callbacks */ 1618 /* wait for callbacks */
1549 nfsd4_set_callback_client(ses->se_client, NULL); 1619 nfsd4_shutdown_callback(ses->se_client);
1550 nfs4_unlock_state(); 1620 nfs4_unlock_state();
1621
1622 nfsd4_del_conns(ses);
1623
1551 nfsd4_put_session(ses); 1624 nfsd4_put_session(ses);
1552 status = nfs_ok; 1625 status = nfs_ok;
1553out: 1626out:
@@ -1555,6 +1628,36 @@ out:
1555 return status; 1628 return status;
1556} 1629}
1557 1630
1631static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
1632{
1633 struct nfsd4_conn *c;
1634
1635 list_for_each_entry(c, &s->se_conns, cn_persession) {
1636 if (c->cn_xprt == xpt) {
1637 return c;
1638 }
1639 }
1640 return NULL;
1641}
1642
1643static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
1644{
1645 struct nfs4_client *clp = ses->se_client;
1646 struct nfsd4_conn *c;
1647
1648 spin_lock(&clp->cl_lock);
1649 c = __nfsd4_find_conn(new->cn_xprt, ses);
1650 if (c) {
1651 spin_unlock(&clp->cl_lock);
1652 free_conn(new);
1653 return;
1654 }
1655 __nfsd4_hash_conn(new, ses);
1656 spin_unlock(&clp->cl_lock);
1657 nfsd4_register_conn(new);
1658 return;
1659}
1660
1558__be32 1661__be32
1559nfsd4_sequence(struct svc_rqst *rqstp, 1662nfsd4_sequence(struct svc_rqst *rqstp,
1560 struct nfsd4_compound_state *cstate, 1663 struct nfsd4_compound_state *cstate,
@@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1563 struct nfsd4_compoundres *resp = rqstp->rq_resp; 1666 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1564 struct nfsd4_session *session; 1667 struct nfsd4_session *session;
1565 struct nfsd4_slot *slot; 1668 struct nfsd4_slot *slot;
1669 struct nfsd4_conn *conn;
1566 int status; 1670 int status;
1567 1671
1568 if (resp->opcnt != 1) 1672 if (resp->opcnt != 1)
1569 return nfserr_sequence_pos; 1673 return nfserr_sequence_pos;
1570 1674
1675 /*
1676 * Will be either used or freed by nfsd4_sequence_check_conn
1677 * below.
1678 */
1679 conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
1680 if (!conn)
1681 return nfserr_jukebox;
1682
1571 spin_lock(&client_lock); 1683 spin_lock(&client_lock);
1572 status = nfserr_badsession; 1684 status = nfserr_badsession;
1573 session = find_in_sessionid_hashtbl(&seq->sessionid); 1685 session = find_in_sessionid_hashtbl(&seq->sessionid);
@@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1599 if (status) 1711 if (status)
1600 goto out; 1712 goto out;
1601 1713
1714 nfsd4_sequence_check_conn(conn, session);
1715 conn = NULL;
1716
1602 /* Success! bump slot seqid */ 1717 /* Success! bump slot seqid */
1603 slot->sl_inuse = true; 1718 slot->sl_inuse = true;
1604 slot->sl_seqid = seq->seqid; 1719 slot->sl_seqid = seq->seqid;
@@ -1613,6 +1728,7 @@ out:
1613 nfsd4_get_session(cstate->session); 1728 nfsd4_get_session(cstate->session);
1614 atomic_inc(&session->se_client->cl_refcount); 1729 atomic_inc(&session->se_client->cl_refcount);
1615 } 1730 }
1731 kfree(conn);
1616 spin_unlock(&client_lock); 1732 spin_unlock(&client_lock);
1617 dprintk("%s: return %d\n", __func__, ntohl(status)); 1733 dprintk("%s: return %d\n", __func__, ntohl(status));
1618 return status; 1734 return status;
@@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1747 goto out; 1863 goto out;
1748 gen_clid(new); 1864 gen_clid(new);
1749 } 1865 }
1866 /*
1867 * XXX: we should probably set this at creation time, and check
1868 * for consistent minorversion use throughout:
1869 */
1870 new->cl_minorversion = 0;
1750 gen_callback(new, setclid, rpc_get_scope_id(sa)); 1871 gen_callback(new, setclid, rpc_get_scope_id(sa));
1751 add_to_unconfirmed(new, strhashval); 1872 add_to_unconfirmed(new, strhashval);
1752 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1873 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1807 status = nfserr_clid_inuse; 1928 status = nfserr_clid_inuse;
1808 else { 1929 else {
1809 atomic_set(&conf->cl_cb_set, 0); 1930 atomic_set(&conf->cl_cb_set, 0);
1810 nfsd4_probe_callback(conf, &unconf->cl_cb_conn); 1931 nfsd4_change_callback(conf, &unconf->cl_cb_conn);
1932 nfsd4_probe_callback(conf);
1811 expire_client(unconf); 1933 expire_client(unconf);
1812 status = nfs_ok; 1934 status = nfs_ok;
1813 1935
@@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1841 } 1963 }
1842 move_to_confirmed(unconf); 1964 move_to_confirmed(unconf);
1843 conf = unconf; 1965 conf = unconf;
1844 nfsd4_probe_callback(conf, &conf->cl_cb_conn); 1966 nfsd4_probe_callback(conf);
1845 status = nfs_ok; 1967 status = nfs_ok;
1846 } 1968 }
1847 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) 1969 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -2492,7 +2614,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2492 struct nfs4_delegation *dp; 2614 struct nfs4_delegation *dp;
2493 struct nfs4_stateowner *sop = stp->st_stateowner; 2615 struct nfs4_stateowner *sop = stp->st_stateowner;
2494 int cb_up = atomic_read(&sop->so_client->cl_cb_set); 2616 int cb_up = atomic_read(&sop->so_client->cl_cb_set);
2495 struct file_lock fl, *flp = &fl; 2617 struct file_lock *fl;
2496 int status, flag = 0; 2618 int status, flag = 0;
2497 2619
2498 flag = NFS4_OPEN_DELEGATE_NONE; 2620 flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2526,20 +2648,24 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2526 flag = NFS4_OPEN_DELEGATE_NONE; 2648 flag = NFS4_OPEN_DELEGATE_NONE;
2527 goto out; 2649 goto out;
2528 } 2650 }
2529 locks_init_lock(&fl); 2651 status = -ENOMEM;
2530 fl.fl_lmops = &nfsd_lease_mng_ops; 2652 fl = locks_alloc_lock();
2531 fl.fl_flags = FL_LEASE; 2653 if (!fl)
2532 fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 2654 goto out;
2533 fl.fl_end = OFFSET_MAX; 2655 locks_init_lock(fl);
2534 fl.fl_owner = (fl_owner_t)dp; 2656 fl->fl_lmops = &nfsd_lease_mng_ops;
2535 fl.fl_file = find_readable_file(stp->st_file); 2657 fl->fl_flags = FL_LEASE;
2536 BUG_ON(!fl.fl_file); 2658 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2537 fl.fl_pid = current->tgid; 2659 fl->fl_end = OFFSET_MAX;
2660 fl->fl_owner = (fl_owner_t)dp;
2661 fl->fl_file = find_readable_file(stp->st_file);
2662 BUG_ON(!fl->fl_file);
2663 fl->fl_pid = current->tgid;
2538 2664
2539 /* vfs_setlease checks to see if delegation should be handed out. 2665 /* vfs_setlease checks to see if delegation should be handed out.
2540 * the lock_manager callbacks fl_mylease and fl_change are used 2666 * the lock_manager callbacks fl_mylease and fl_change are used
2541 */ 2667 */
2542 if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) { 2668 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
2543 dprintk("NFSD: setlease failed [%d], no delegation\n", status); 2669 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2544 unhash_delegation(dp); 2670 unhash_delegation(dp);
2545 flag = NFS4_OPEN_DELEGATE_NONE; 2671 flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2944,7 +3070,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2944 if (STALE_STATEID(stateid)) 3070 if (STALE_STATEID(stateid))
2945 goto out; 3071 goto out;
2946 3072
2947 status = nfserr_bad_stateid; 3073 /*
3074 * We assume that any stateid that has the current boot time,
3075 * but that we can't find, is expired:
3076 */
3077 status = nfserr_expired;
2948 if (is_delegation_stateid(stateid)) { 3078 if (is_delegation_stateid(stateid)) {
2949 dp = find_delegation_stateid(ino, stateid); 3079 dp = find_delegation_stateid(ino, stateid);
2950 if (!dp) 3080 if (!dp)
@@ -2964,6 +3094,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2964 stp = find_stateid(stateid, flags); 3094 stp = find_stateid(stateid, flags);
2965 if (!stp) 3095 if (!stp)
2966 goto out; 3096 goto out;
3097 status = nfserr_bad_stateid;
2967 if (nfs4_check_fh(current_fh, stp)) 3098 if (nfs4_check_fh(current_fh, stp))
2968 goto out; 3099 goto out;
2969 if (!stp->st_stateowner->so_confirmed) 3100 if (!stp->st_stateowner->so_confirmed)
@@ -3038,8 +3169,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3038 * a replayed close: 3169 * a replayed close:
3039 */ 3170 */
3040 sop = search_close_lru(stateid->si_stateownerid, flags); 3171 sop = search_close_lru(stateid->si_stateownerid, flags);
3172 /* It's not stale; let's assume it's expired: */
3041 if (sop == NULL) 3173 if (sop == NULL)
3042 return nfserr_bad_stateid; 3174 return nfserr_expired;
3043 *sopp = sop; 3175 *sopp = sop;
3044 goto check_replay; 3176 goto check_replay;
3045 } 3177 }
@@ -3304,6 +3436,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3304 status = nfserr_bad_stateid; 3436 status = nfserr_bad_stateid;
3305 if (!is_delegation_stateid(stateid)) 3437 if (!is_delegation_stateid(stateid))
3306 goto out; 3438 goto out;
3439 status = nfserr_expired;
3307 dp = find_delegation_stateid(inode, stateid); 3440 dp = find_delegation_stateid(inode, stateid);
3308 if (!dp) 3441 if (!dp)
3309 goto out; 3442 goto out;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1a468bbd330f..f35a94a04026 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1805 goto out_nfserr; 1805 goto out_nfserr;
1806 } 1806 }
1807 } 1807 }
1808 if ((buflen -= 16) < 0)
1809 goto out_resource;
1810 1808
1811 if (unlikely(bmval2)) { 1809 if (bmval2) {
1810 if ((buflen -= 16) < 0)
1811 goto out_resource;
1812 WRITE32(3); 1812 WRITE32(3);
1813 WRITE32(bmval0); 1813 WRITE32(bmval0);
1814 WRITE32(bmval1); 1814 WRITE32(bmval1);
1815 WRITE32(bmval2); 1815 WRITE32(bmval2);
1816 } else if (likely(bmval1)) { 1816 } else if (bmval1) {
1817 if ((buflen -= 12) < 0)
1818 goto out_resource;
1817 WRITE32(2); 1819 WRITE32(2);
1818 WRITE32(bmval0); 1820 WRITE32(bmval0);
1819 WRITE32(bmval1); 1821 WRITE32(bmval1);
1820 } else { 1822 } else {
1823 if ((buflen -= 8) < 0)
1824 goto out_resource;
1821 WRITE32(1); 1825 WRITE32(1);
1822 WRITE32(bmval0); 1826 WRITE32(bmval0);
1823 } 1827 }
@@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1828 u32 word1 = nfsd_suppattrs1(minorversion); 1832 u32 word1 = nfsd_suppattrs1(minorversion);
1829 u32 word2 = nfsd_suppattrs2(minorversion); 1833 u32 word2 = nfsd_suppattrs2(minorversion);
1830 1834
1831 if ((buflen -= 12) < 0)
1832 goto out_resource;
1833 if (!aclsupport) 1835 if (!aclsupport)
1834 word0 &= ~FATTR4_WORD0_ACL; 1836 word0 &= ~FATTR4_WORD0_ACL;
1835 if (!word2) { 1837 if (!word2) {
1838 if ((buflen -= 12) < 0)
1839 goto out_resource;
1836 WRITE32(2); 1840 WRITE32(2);
1837 WRITE32(word0); 1841 WRITE32(word0);
1838 WRITE32(word1); 1842 WRITE32(word1);
1839 } else { 1843 } else {
1844 if ((buflen -= 16) < 0)
1845 goto out_resource;
1840 WRITE32(3); 1846 WRITE32(3);
1841 WRITE32(word0); 1847 WRITE32(word0);
1842 WRITE32(word1); 1848 WRITE32(word1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 06fa87e52e82..4514ebbee4d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
22 */ 22 */
23enum { 23enum {
24 NFSD_Root = 1, 24 NFSD_Root = 1,
25#ifdef CONFIG_NFSD_DEPRECATED
25 NFSD_Svc, 26 NFSD_Svc,
26 NFSD_Add, 27 NFSD_Add,
27 NFSD_Del, 28 NFSD_Del,
@@ -29,6 +30,7 @@ enum {
29 NFSD_Unexport, 30 NFSD_Unexport,
30 NFSD_Getfd, 31 NFSD_Getfd,
31 NFSD_Getfs, 32 NFSD_Getfs,
33#endif
32 NFSD_List, 34 NFSD_List,
33 NFSD_Export_features, 35 NFSD_Export_features,
34 NFSD_Fh, 36 NFSD_Fh,
@@ -54,6 +56,7 @@ enum {
54/* 56/*
55 * write() for these nodes. 57 * write() for these nodes.
56 */ 58 */
59#ifdef CONFIG_NFSD_DEPRECATED
57static ssize_t write_svc(struct file *file, char *buf, size_t size); 60static ssize_t write_svc(struct file *file, char *buf, size_t size);
58static ssize_t write_add(struct file *file, char *buf, size_t size); 61static ssize_t write_add(struct file *file, char *buf, size_t size);
59static ssize_t write_del(struct file *file, char *buf, size_t size); 62static ssize_t write_del(struct file *file, char *buf, size_t size);
@@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size);
61static ssize_t write_unexport(struct file *file, char *buf, size_t size); 64static ssize_t write_unexport(struct file *file, char *buf, size_t size);
62static ssize_t write_getfd(struct file *file, char *buf, size_t size); 65static ssize_t write_getfd(struct file *file, char *buf, size_t size);
63static ssize_t write_getfs(struct file *file, char *buf, size_t size); 66static ssize_t write_getfs(struct file *file, char *buf, size_t size);
67#endif
64static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 68static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
65static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); 69static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
66static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); 70static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
@@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
76#endif 80#endif
77 81
78static ssize_t (*write_op[])(struct file *, char *, size_t) = { 82static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83#ifdef CONFIG_NFSD_DEPRECATED
79 [NFSD_Svc] = write_svc, 84 [NFSD_Svc] = write_svc,
80 [NFSD_Add] = write_add, 85 [NFSD_Add] = write_add,
81 [NFSD_Del] = write_del, 86 [NFSD_Del] = write_del,
@@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83 [NFSD_Unexport] = write_unexport, 88 [NFSD_Unexport] = write_unexport,
84 [NFSD_Getfd] = write_getfd, 89 [NFSD_Getfd] = write_getfd,
85 [NFSD_Getfs] = write_getfs, 90 [NFSD_Getfs] = write_getfs,
91#endif
86 [NFSD_Fh] = write_filehandle, 92 [NFSD_Fh] = write_filehandle,
87 [NFSD_FO_UnlockIP] = write_unlock_ip, 93 [NFSD_FO_UnlockIP] = write_unlock_ip,
88 [NFSD_FO_UnlockFS] = write_unlock_fs, 94 [NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
121 127
122static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 128static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
123{ 129{
130 static int warned;
131 if (file->f_dentry->d_name.name[0] == '.' && !warned) {
132 printk(KERN_INFO
133 "Warning: \"%s\" uses deprecated NFSD interface: %s."
134 " This will be removed in 2.6.40\n",
135 current->comm, file->f_dentry->d_name.name);
136 warned = 1;
137 }
124 if (! file->private_data) { 138 if (! file->private_data) {
125 /* An attempt to read a transaction file without writing 139 /* An attempt to read a transaction file without writing
126 * causes a 0-byte write so that the file can return 140 * causes a 0-byte write so that the file can return
@@ -187,6 +201,7 @@ static const struct file_operations pool_stats_operations = {
187 * payload - write methods 201 * payload - write methods
188 */ 202 */
189 203
204#ifdef CONFIG_NFSD_DEPRECATED
190/** 205/**
191 * write_svc - Start kernel's NFSD server 206 * write_svc - Start kernel's NFSD server
192 * 207 *
@@ -402,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
402 417
403 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 418 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
404 419
405 clp = auth_unix_lookup(&in6); 420 clp = auth_unix_lookup(&init_net, &in6);
406 if (!clp) 421 if (!clp)
407 err = -EPERM; 422 err = -EPERM;
408 else { 423 else {
@@ -465,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
465 480
466 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 481 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
467 482
468 clp = auth_unix_lookup(&in6); 483 clp = auth_unix_lookup(&init_net, &in6);
469 if (!clp) 484 if (!clp)
470 err = -EPERM; 485 err = -EPERM;
471 else { 486 else {
@@ -482,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
482 out: 497 out:
483 return err; 498 return err;
484} 499}
500#endif /* CONFIG_NFSD_DEPRECATED */
485 501
486/** 502/**
487 * write_unlock_ip - Release all locks used by a client 503 * write_unlock_ip - Release all locks used by a client
@@ -1000,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf)
1000 if (err != 0) 1016 if (err != 0)
1001 return err; 1017 return err;
1002 1018
1003 err = svc_create_xprt(nfsd_serv, transport, 1019 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1004 PF_INET, port, SVC_SOCK_ANONYMOUS); 1020 PF_INET, port, SVC_SOCK_ANONYMOUS);
1005 if (err < 0) 1021 if (err < 0)
1006 goto out_err; 1022 goto out_err;
1007 1023
1008 err = svc_create_xprt(nfsd_serv, transport, 1024 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1009 PF_INET6, port, SVC_SOCK_ANONYMOUS); 1025 PF_INET6, port, SVC_SOCK_ANONYMOUS);
1010 if (err < 0 && err != -EAFNOSUPPORT) 1026 if (err < 0 && err != -EAFNOSUPPORT)
1011 goto out_close; 1027 goto out_close;
@@ -1356,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
1356static int nfsd_fill_super(struct super_block * sb, void * data, int silent) 1372static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1357{ 1373{
1358 static struct tree_descr nfsd_files[] = { 1374 static struct tree_descr nfsd_files[] = {
1375#ifdef CONFIG_NFSD_DEPRECATED
1359 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, 1376 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
1360 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, 1377 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
1361 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, 1378 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
@@ -1363,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1363 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, 1380 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
1364 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1381 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1365 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1382 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1383#endif
1366 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1384 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1367 [NFSD_Export_features] = {"export_features", 1385 [NFSD_Export_features] = {"export_features",
1368 &export_features_operations, S_IRUGO}, 1386 &export_features_operations, S_IRUGO},
@@ -1387,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1387 return simple_fill_super(sb, 0x6e667364, nfsd_files); 1405 return simple_fill_super(sb, 0x6e667364, nfsd_files);
1388} 1406}
1389 1407
1390static int nfsd_get_sb(struct file_system_type *fs_type, 1408static struct dentry *nfsd_mount(struct file_system_type *fs_type,
1391 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1409 int flags, const char *dev_name, void *data)
1392{ 1410{
1393 return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); 1411 return mount_single(fs_type, flags, data, nfsd_fill_super);
1394} 1412}
1395 1413
1396static struct file_system_type nfsd_fs_type = { 1414static struct file_system_type nfsd_fs_type = {
1397 .owner = THIS_MODULE, 1415 .owner = THIS_MODULE,
1398 .name = "nfsd", 1416 .name = "nfsd",
1399 .get_sb = nfsd_get_sb, 1417 .mount = nfsd_mount,
1400 .kill_sb = kill_litter_super, 1418 .kill_sb = kill_litter_super,
1401}; 1419};
1402 1420
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b76ac3a82e39..6b641cf2c19a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -249,7 +249,7 @@ extern time_t nfsd4_grace;
249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
251 251
252#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ 252#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
253 253
254/* 254/*
255 * The following attributes are currently not supported by the NFSv4 server: 255 * The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e2c43464f237..2bae1d86f5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -16,6 +16,7 @@
16#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
17#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <net/net_namespace.h>
19#include "nfsd.h" 20#include "nfsd.h"
20#include "cache.h" 21#include "cache.h"
21#include "vfs.h" 22#include "vfs.h"
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port)
186 if (!list_empty(&nfsd_serv->sv_permsocks)) 187 if (!list_empty(&nfsd_serv->sv_permsocks))
187 return 0; 188 return 0;
188 189
189 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, 190 error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
190 SVC_SOCK_DEFAULTS); 191 SVC_SOCK_DEFAULTS);
191 if (error < 0) 192 if (error < 0)
192 return error; 193 return error;
193 194
194 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, 195 error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
195 SVC_SOCK_DEFAULTS); 196 SVC_SOCK_DEFAULTS);
196 if (error < 0) 197 if (error < 0)
197 return error; 198 return error;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 322518c88e4b..39adc27b0685 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
35#ifndef _NFSD4_STATE_H 35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H 36#define _NFSD4_STATE_H
37 37
38#include <linux/sunrpc/svc_xprt.h>
38#include <linux/nfsd/nfsfh.h> 39#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h" 40#include "nfsfh.h"
40 41
@@ -64,19 +65,12 @@ typedef struct {
64 (s)->si_fileid, \ 65 (s)->si_fileid, \
65 (s)->si_generation 66 (s)->si_generation
66 67
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_rpc_args {
74 void *args_op;
75 struct nfsd4_cb_sequence args_seq;
76};
77
78struct nfsd4_callback { 68struct nfsd4_callback {
79 struct nfs4_rpc_args cb_args; 69 void *cb_op;
70 struct nfs4_client *cb_clp;
71 u32 cb_minorversion;
72 struct rpc_message cb_msg;
73 const struct rpc_call_ops *cb_ops;
80 struct work_struct cb_work; 74 struct work_struct cb_work;
81}; 75};
82 76
@@ -91,7 +85,6 @@ struct nfs4_delegation {
91 u32 dl_type; 85 u32 dl_type;
92 time_t dl_time; 86 time_t dl_time;
93/* For recall: */ 87/* For recall: */
94 u32 dl_ident;
95 stateid_t dl_stateid; 88 stateid_t dl_stateid;
96 struct knfsd_fh dl_fh; 89 struct knfsd_fh dl_fh;
97 int dl_retries; 90 int dl_retries;
@@ -103,8 +96,8 @@ struct nfs4_cb_conn {
103 /* SETCLIENTID info */ 96 /* SETCLIENTID info */
104 struct sockaddr_storage cb_addr; 97 struct sockaddr_storage cb_addr;
105 size_t cb_addrlen; 98 size_t cb_addrlen;
106 u32 cb_prog; 99 u32 cb_prog; /* used only in 4.0 case;
107 u32 cb_minorversion; 100 per-session otherwise */
108 u32 cb_ident; /* minorversion 0 only */ 101 u32 cb_ident; /* minorversion 0 only */
109 struct svc_xprt *cb_xprt; /* minorversion 1 only */ 102 struct svc_xprt *cb_xprt; /* minorversion 1 only */
110}; 103};
@@ -160,6 +153,15 @@ struct nfsd4_clid_slot {
160 struct nfsd4_create_session sl_cr_ses; 153 struct nfsd4_create_session sl_cr_ses;
161}; 154};
162 155
156struct nfsd4_conn {
157 struct list_head cn_persession;
158 struct svc_xprt *cn_xprt;
159 struct svc_xpt_user cn_xpt_user;
160 struct nfsd4_session *cn_session;
161/* CDFC4_FORE, CDFC4_BACK: */
162 unsigned char cn_flags;
163};
164
163struct nfsd4_session { 165struct nfsd4_session {
164 struct kref se_ref; 166 struct kref se_ref;
165 struct list_head se_hash; /* hash by sessionid */ 167 struct list_head se_hash; /* hash by sessionid */
@@ -169,6 +171,9 @@ struct nfsd4_session {
169 struct nfs4_sessionid se_sessionid; 171 struct nfs4_sessionid se_sessionid;
170 struct nfsd4_channel_attrs se_fchannel; 172 struct nfsd4_channel_attrs se_fchannel;
171 struct nfsd4_channel_attrs se_bchannel; 173 struct nfsd4_channel_attrs se_bchannel;
174 struct list_head se_conns;
175 u32 se_cb_prog;
176 u32 se_cb_seq_nr;
172 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 177 struct nfsd4_slot *se_slots[]; /* forward channel slots */
173}; 178};
174 179
@@ -221,24 +226,32 @@ struct nfs4_client {
221 clientid_t cl_clientid; /* generated by server */ 226 clientid_t cl_clientid; /* generated by server */
222 nfs4_verifier cl_confirm; /* generated by server */ 227 nfs4_verifier cl_confirm; /* generated by server */
223 u32 cl_firststate; /* recovery dir creation */ 228 u32 cl_firststate; /* recovery dir creation */
229 u32 cl_minorversion;
224 230
225 /* for v4.0 and v4.1 callbacks: */ 231 /* for v4.0 and v4.1 callbacks: */
226 struct nfs4_cb_conn cl_cb_conn; 232 struct nfs4_cb_conn cl_cb_conn;
233#define NFSD4_CLIENT_CB_UPDATE 1
234#define NFSD4_CLIENT_KILL 2
235 unsigned long cl_cb_flags;
227 struct rpc_clnt *cl_cb_client; 236 struct rpc_clnt *cl_cb_client;
237 u32 cl_cb_ident;
228 atomic_t cl_cb_set; 238 atomic_t cl_cb_set;
239 struct nfsd4_callback cl_cb_null;
240 struct nfsd4_session *cl_cb_session;
241
242 /* for all client information that callback code might need: */
243 spinlock_t cl_lock;
229 244
230 /* for nfs41 */ 245 /* for nfs41 */
231 struct list_head cl_sessions; 246 struct list_head cl_sessions;
232 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 247 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
233 u32 cl_exchange_flags; 248 u32 cl_exchange_flags;
234 struct nfs4_sessionid cl_sessionid;
235 /* number of rpc's in progress over an associated session: */ 249 /* number of rpc's in progress over an associated session: */
236 atomic_t cl_refcount; 250 atomic_t cl_refcount;
237 251
238 /* for nfs41 callbacks */ 252 /* for nfs41 callbacks */
239 /* We currently support a single back channel with a single slot */ 253 /* We currently support a single back channel with a single slot */
240 unsigned long cl_cb_slot_busy; 254 unsigned long cl_cb_slot_busy;
241 u32 cl_cb_seq_nr;
242 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 255 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
243 /* wait here for slots */ 256 /* wait here for slots */
244}; 257};
@@ -440,12 +453,13 @@ extern int nfs4_in_grace(void);
440extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 453extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
441extern void nfs4_free_stateowner(struct kref *kref); 454extern void nfs4_free_stateowner(struct kref *kref);
442extern int set_callback_cred(void); 455extern int set_callback_cred(void);
443extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 456extern void nfsd4_probe_callback(struct nfs4_client *clp);
457extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
444extern void nfsd4_do_callback_rpc(struct work_struct *); 458extern void nfsd4_do_callback_rpc(struct work_struct *);
445extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 459extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
446extern int nfsd4_create_callback_queue(void); 460extern int nfsd4_create_callback_queue(void);
447extern void nfsd4_destroy_callback_queue(void); 461extern void nfsd4_destroy_callback_queue(void);
448extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); 462extern void nfsd4_shutdown_callback(struct nfs4_client *);
449extern void nfs4_put_delegation(struct nfs4_delegation *dp); 463extern void nfs4_put_delegation(struct nfs4_delegation *dp);
450extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 464extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
451extern void nfsd4_init_recdir(char *recdir_name); 465extern void nfsd4_init_recdir(char *recdir_name);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 661a6cf8e826..184938fcff04 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp)
281{ 281{
282 struct inode *inode = fhp->fh_dentry->d_inode; 282 struct inode *inode = fhp->fh_dentry->d_inode;
283 const struct export_operations *export_ops = inode->i_sb->s_export_op; 283 const struct export_operations *export_ops = inode->i_sb->s_export_op;
284 int error = 0;
285 284
286 if (!EX_ISSYNC(fhp->fh_export)) 285 if (!EX_ISSYNC(fhp->fh_export))
287 return 0; 286 return 0;
288 287
289 if (export_ops->commit_metadata) { 288 if (export_ops->commit_metadata)
290 error = export_ops->commit_metadata(inode); 289 return export_ops->commit_metadata(inode);
291 } else { 290 return sync_inode_metadata(inode, 1);
292 struct writeback_control wbc = {
293 .sync_mode = WB_SYNC_ALL,
294 .nr_to_write = 0, /* metadata only */
295 };
296
297 error = sync_inode(inode, &wbc);
298 }
299
300 return error;
301} 291}
302 292
303/* 293/*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 185d1607cb00..6e9557ecf161 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
207 207
208 inode->i_ctime = CURRENT_TIME; 208 inode->i_ctime = CURRENT_TIME;
209 inode_inc_link_count(inode); 209 inode_inc_link_count(inode);
210 atomic_inc(&inode->i_count); 210 ihold(inode);
211 211
212 err = nilfs_add_nondir(dentry, inode); 212 err = nilfs_add_nondir(dentry, inode);
213 if (!err) 213 if (!err)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index d926af626177..687d090cea34 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1609,7 +1609,7 @@ nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1609 kunmap_atomic(kaddr, KM_USER0); 1609 kunmap_atomic(kaddr, KM_USER0);
1610 1610
1611 if (!TestSetPageWriteback(clone_page)) 1611 if (!TestSetPageWriteback(clone_page))
1612 inc_zone_page_state(clone_page, NR_WRITEBACK); 1612 account_page_writeback(clone_page);
1613 unlock_page(clone_page); 1613 unlock_page(clone_page);
1614 1614
1615 return 0; 1615 return 0;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c0db86..f804d41ec9d3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1141,9 +1141,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
1141 return (void *)s->s_bdev == data; 1141 return (void *)s->s_bdev == data;
1142} 1142}
1143 1143
1144static int 1144static struct dentry *
1145nilfs_get_sb(struct file_system_type *fs_type, int flags, 1145nilfs_mount(struct file_system_type *fs_type, int flags,
1146 const char *dev_name, void *data, struct vfsmount *mnt) 1146 const char *dev_name, void *data)
1147{ 1147{
1148 struct nilfs_super_data sd; 1148 struct nilfs_super_data sd;
1149 struct super_block *s; 1149 struct super_block *s;
@@ -1156,7 +1156,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1156 1156
1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1158 if (IS_ERR(sd.bdev)) 1158 if (IS_ERR(sd.bdev))
1159 return PTR_ERR(sd.bdev); 1159 return ERR_CAST(sd.bdev);
1160 1160
1161 sd.cno = 0; 1161 sd.cno = 0;
1162 sd.flags = flags; 1162 sd.flags = flags;
@@ -1235,9 +1235,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1235 if (!s_new) 1235 if (!s_new)
1236 close_bdev_exclusive(sd.bdev, mode); 1236 close_bdev_exclusive(sd.bdev, mode);
1237 1237
1238 mnt->mnt_sb = s; 1238 return root_dentry;
1239 mnt->mnt_root = root_dentry;
1240 return 0;
1241 1239
1242 failed_super: 1240 failed_super:
1243 deactivate_locked_super(s); 1241 deactivate_locked_super(s);
@@ -1245,13 +1243,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1245 failed: 1243 failed:
1246 if (!s_new) 1244 if (!s_new)
1247 close_bdev_exclusive(sd.bdev, mode); 1245 close_bdev_exclusive(sd.bdev, mode);
1248 return err; 1246 return ERR_PTR(err);
1249} 1247}
1250 1248
1251struct file_system_type nilfs_fs_type = { 1249struct file_system_type nilfs_fs_type = {
1252 .owner = THIS_MODULE, 1250 .owner = THIS_MODULE,
1253 .name = "nilfs2", 1251 .name = "nilfs2",
1254 .get_sb = nilfs_get_sb, 1252 .mount = nilfs_mount,
1255 .kill_sb = kill_block_super, 1253 .kill_sb = kill_block_super,
1256 .fs_flags = FS_REQUIRES_DEV, 1254 .fs_flags = FS_REQUIRES_DEV,
1257}; 1255};
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 36802420d69a..4498a208df94 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -88,8 +88,6 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
88{ 88{
89 struct dentry *parent; 89 struct dentry *parent;
90 struct inode *p_inode; 90 struct inode *p_inode;
91 bool send = false;
92 bool should_update_children = false;
93 91
94 if (!dentry) 92 if (!dentry)
95 dentry = path->dentry; 93 dentry = path->dentry;
@@ -97,29 +95,12 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
97 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) 95 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
98 return; 96 return;
99 97
100 spin_lock(&dentry->d_lock); 98 parent = dget_parent(dentry);
101 parent = dentry->d_parent;
102 p_inode = parent->d_inode; 99 p_inode = parent->d_inode;
103 100
104 if (fsnotify_inode_watches_children(p_inode)) { 101 if (unlikely(!fsnotify_inode_watches_children(p_inode)))
105 if (p_inode->i_fsnotify_mask & mask) { 102 __fsnotify_update_child_dentry_flags(p_inode);
106 dget(parent); 103 else if (p_inode->i_fsnotify_mask & mask) {
107 send = true;
108 }
109 } else {
110 /*
111 * The parent doesn't care about events on it's children but
112 * at least one child thought it did. We need to run all the
113 * children and update their d_flags to let them know p_inode
114 * doesn't care about them any more.
115 */
116 dget(parent);
117 should_update_children = true;
118 }
119
120 spin_unlock(&dentry->d_lock);
121
122 if (send) {
123 /* we are notifying a parent so come up with the new mask which 104 /* we are notifying a parent so come up with the new mask which
124 * specifies these are events which came from a child. */ 105 * specifies these are events which came from a child. */
125 mask |= FS_EVENT_ON_CHILD; 106 mask |= FS_EVENT_ON_CHILD;
@@ -130,13 +111,9 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
130 else 111 else
131 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, 112 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
132 dentry->d_name.name, 0); 113 dentry->d_name.name, 0);
133 dput(parent);
134 } 114 }
135 115
136 if (unlikely(should_update_children)) { 116 dput(parent);
137 __fsnotify_update_child_dentry_flags(p_inode);
138 dput(parent);
139 }
140} 117}
141EXPORT_SYMBOL_GPL(__fsnotify_parent); 118EXPORT_SYMBOL_GPL(__fsnotify_parent);
142 119
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 33297c005060..21ed10660b80 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -240,6 +240,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
240{ 240{
241 struct inode *inode, *next_i, *need_iput = NULL; 241 struct inode *inode, *next_i, *need_iput = NULL;
242 242
243 spin_lock(&inode_lock);
243 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 244 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
244 struct inode *need_iput_tmp; 245 struct inode *need_iput_tmp;
245 246
@@ -297,4 +298,5 @@ void fsnotify_unmount_inodes(struct list_head *list)
297 298
298 spin_lock(&inode_lock); 299 spin_lock(&inode_lock);
299 } 300 }
301 spin_unlock(&inode_lock);
300} 302}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 19c5180f8a28..a30ecacc01f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2911 goto unl_upcase_iput_tmp_ino_err_out_now; 2911 goto unl_upcase_iput_tmp_ino_err_out_now;
2912 } 2912 }
2913 if ((sb->s_root = d_alloc_root(vol->root_ino))) { 2913 if ((sb->s_root = d_alloc_root(vol->root_ino))) {
2914 /* We increment i_count simulating an ntfs_iget(). */ 2914 /* We grab a reference, simulating an ntfs_iget(). */
2915 atomic_inc(&vol->root_ino->i_count); 2915 ihold(vol->root_ino);
2916 ntfs_debug("Exiting, status successful."); 2916 ntfs_debug("Exiting, status successful.");
2917 /* Release the default upcase if it has no users. */ 2917 /* Release the default upcase if it has no users. */
2918 mutex_lock(&ntfs_lock); 2918 mutex_lock(&ntfs_lock);
@@ -3021,21 +3021,6 @@ iput_tmp_ino_err_out_now:
3021 if (vol->mft_ino && vol->mft_ino != tmp_ino) 3021 if (vol->mft_ino && vol->mft_ino != tmp_ino)
3022 iput(vol->mft_ino); 3022 iput(vol->mft_ino);
3023 vol->mft_ino = NULL; 3023 vol->mft_ino = NULL;
3024 /*
3025 * This is needed to get ntfs_clear_extent_inode() called for each
3026 * inode we have ever called ntfs_iget()/iput() on, otherwise we A)
3027 * leak resources and B) a subsequent mount fails automatically due to
3028 * ntfs_iget() never calling down into our ntfs_read_locked_inode()
3029 * method again... FIXME: Do we need to do this twice now because of
3030 * attribute inodes? I think not, so leave as is for now... (AIA)
3031 */
3032 if (invalidate_inodes(sb)) {
3033 ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
3034 "driver bug.");
3035 /* Copied from fs/super.c. I just love this message. (-; */
3036 printk("NTFS: Busy inodes after umount. Self-destruct in 5 "
3037 "seconds. Have a nice day...\n");
3038 }
3039 /* Errors at this stage are irrelevant. */ 3024 /* Errors at this stage are irrelevant. */
3040err_out_now: 3025err_out_now:
3041 sb->s_fs_info = NULL; 3026 sb->s_fs_info = NULL;
@@ -3074,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
3074/* Driver wide mutex. */ 3059/* Driver wide mutex. */
3075DEFINE_MUTEX(ntfs_lock); 3060DEFINE_MUTEX(ntfs_lock);
3076 3061
3077static int ntfs_get_sb(struct file_system_type *fs_type, 3062static struct dentry *ntfs_mount(struct file_system_type *fs_type,
3078 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3063 int flags, const char *dev_name, void *data)
3079{ 3064{
3080 return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super, 3065 return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
3081 mnt);
3082} 3066}
3083 3067
3084static struct file_system_type ntfs_fs_type = { 3068static struct file_system_type ntfs_fs_type = {
3085 .owner = THIS_MODULE, 3069 .owner = THIS_MODULE,
3086 .name = "ntfs", 3070 .name = "ntfs",
3087 .get_sb = ntfs_get_sb, 3071 .mount = ntfs_mount,
3088 .kill_sb = kill_block_super, 3072 .kill_sb = kill_block_super,
3089 .fs_flags = FS_REQUIRES_DEV, 3073 .fs_flags = FS_REQUIRES_DEV,
3090}; 3074};
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee118158..f1e962cb3b73 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
165 * ocfs2 never allocates in this function - the only time we 165 * ocfs2 never allocates in this function - the only time we
166 * need to use BH_New is when we're extending i_size on a file 166 * need to use BH_New is when we're extending i_size on a file
167 * system which doesn't support holes, in which case BH_New 167 * system which doesn't support holes, in which case BH_New
168 * allows block_prepare_write() to zero. 168 * allows __block_write_begin() to zero.
169 * 169 *
170 * If we see this on a sparse file system, then a truncate has 170 * If we see this on a sparse file system, then a truncate has
171 * raced us and removed the cluster. In this case, we clear 171 * raced us and removed the cluster. In this case, we clear
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
407 return ret; 407 return ret;
408} 408}
409 409
410/*
411 * This is called from ocfs2_write_zero_page() which has handled it's
412 * own cluster locking and has ensured allocation exists for those
413 * blocks to be written.
414 */
415int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
416 unsigned from, unsigned to)
417{
418 int ret;
419
420 ret = block_prepare_write(page, from, to, ocfs2_get_block);
421
422 return ret;
423}
424
425/* Taken from ext3. We don't necessarily need the full blown 410/* Taken from ext3. We don't necessarily need the full blown
426 * functionality yet, but IMHO it's better to cut and paste the whole 411 * functionality yet, but IMHO it's better to cut and paste the whole
427 * thing so we can avoid introducing our own bugs (and easily pick up 412 * thing so we can avoid introducing our own bugs (and easily pick up
@@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
732} 717}
733 718
734/* 719/*
735 * Some of this taken from block_prepare_write(). We already have our 720 * Some of this taken from __block_write_begin(). We already have our
736 * mapping by now though, and the entire write will be allocating or 721 * mapping by now though, and the entire write will be allocating or
737 * it won't, so not much need to use BH_New. 722 * it won't, so not much need to use BH_New.
738 * 723 *
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 7606f663da6d..76bfdfda691a 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,9 +22,6 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
26 unsigned from, unsigned to);
27
28handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 25handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
29 struct page *page, 26 struct page *page,
30 unsigned from, 27 unsigned from,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a7ebd9d42dc8..b2df490a19ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
400 if (inode) { 400 if (inode) {
401 ip = DLMFS_I(inode); 401 ip = DLMFS_I(inode);
402 402
403 inode->i_ino = get_next_ino();
403 inode->i_mode = mode; 404 inode->i_mode = mode;
404 inode->i_uid = current_fsuid(); 405 inode->i_uid = current_fsuid();
405 inode->i_gid = current_fsgid(); 406 inode->i_gid = current_fsgid();
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
425 if (!inode) 426 if (!inode)
426 return NULL; 427 return NULL;
427 428
429 inode->i_ino = get_next_ino();
428 inode->i_mode = mode; 430 inode->i_mode = mode;
429 inode->i_uid = current_fsuid(); 431 inode->i_uid = current_fsuid();
430 inode->i_gid = current_fsgid(); 432 inode->i_gid = current_fsgid();
@@ -641,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
641 .setattr = dlmfs_file_setattr, 643 .setattr = dlmfs_file_setattr,
642}; 644};
643 645
644static int dlmfs_get_sb(struct file_system_type *fs_type, 646static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
645 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 647 int flags, const char *dev_name, void *data)
646{ 648{
647 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); 649 return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
648} 650}
649 651
650static struct file_system_type dlmfs_fs_type = { 652static struct file_system_type dlmfs_fs_type = {
651 .owner = THIS_MODULE, 653 .owner = THIS_MODULE,
652 .name = "ocfs2_dlmfs", 654 .name = "ocfs2_dlmfs",
653 .get_sb = dlmfs_get_sb, 655 .mount = dlmfs_mount,
654 .kill_sb = kill_litter_super, 656 .kill_sb = kill_litter_super,
655}; 657};
656 658
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1ca6867935bb..77b4c04a2809 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
796 block_end = block_start + (1 << inode->i_blkbits); 796 block_end = block_start + (1 << inode->i_blkbits);
797 797
798 /* 798 /*
799 * block_start is block-aligned. Bump it by one to 799 * block_start is block-aligned. Bump it by one to force
800 * force ocfs2_{prepare,commit}_write() to zero the 800 * __block_write_begin and block_commit_write to zero the
801 * whole block. 801 * whole block.
802 */ 802 */
803 ret = ocfs2_prepare_write_nolock(inode, page, 803 ret = __block_write_begin(page, block_start + 1, 0,
804 block_start + 1, 804 ocfs2_get_block);
805 block_start + 1);
806 if (ret < 0) { 805 if (ret < 0) {
807 mlog_errno(ret); 806 mlog_errno(ret);
808 goto out_unlock; 807 goto out_unlock;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e7bde21149ae..ff5744e1e36f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry,
742 goto out_commit; 742 goto out_commit;
743 } 743 }
744 744
745 atomic_inc(&inode->i_count); 745 ihold(inode);
746 dentry->d_op = &ocfs2_dentry_ops; 746 dentry->d_op = &ocfs2_dentry_ops;
747 d_instantiate(dentry, inode); 747 d_instantiate(dentry, inode);
748 748
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb395820..f02c0ef31578 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1236,14 +1236,12 @@ read_super_error:
1236 return status; 1236 return status;
1237} 1237}
1238 1238
1239static int ocfs2_get_sb(struct file_system_type *fs_type, 1239static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
1240 int flags, 1240 int flags,
1241 const char *dev_name, 1241 const char *dev_name,
1242 void *data, 1242 void *data)
1243 struct vfsmount *mnt)
1244{ 1243{
1245 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, 1244 return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
1246 mnt);
1247} 1245}
1248 1246
1249static void ocfs2_kill_sb(struct super_block *sb) 1247static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1265,7 @@ out:
1267static struct file_system_type ocfs2_fs_type = { 1265static struct file_system_type ocfs2_fs_type = {
1268 .owner = THIS_MODULE, 1266 .owner = THIS_MODULE,
1269 .name = "ocfs2", 1267 .name = "ocfs2",
1270 .get_sb = ocfs2_get_sb, /* is this called when we mount 1268 .mount = ocfs2_mount,
1271 * the fs? */
1272 .kill_sb = ocfs2_kill_sb, 1269 .kill_sb = ocfs2_kill_sb,
1273 1270
1274 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1271 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291a..e043c4cb9a97 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
557 return ret; 557 return ret;
558} 558}
559 559
560static int omfs_get_sb(struct file_system_type *fs_type, 560static struct dentry *omfs_mount(struct file_system_type *fs_type,
561 int flags, const char *dev_name, 561 int flags, const char *dev_name, void *data)
562 void *data, struct vfsmount *m)
563{ 562{
564 return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m); 563 return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
565} 564}
566 565
567static struct file_system_type omfs_fs_type = { 566static struct file_system_type omfs_fs_type = {
568 .owner = THIS_MODULE, 567 .owner = THIS_MODULE,
569 .name = "omfs", 568 .name = "omfs",
570 .get_sb = omfs_get_sb, 569 .mount = omfs_mount,
571 .kill_sb = kill_block_super, 570 .kill_sb = kill_block_super,
572 .fs_flags = FS_REQUIRES_DEV, 571 .fs_flags = FS_REQUIRES_DEV,
573}; 572};
diff --git a/fs/open.c b/fs/open.c
index d74e1983e8dc..4197b9ed023d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
786 /* Pick up the filp from the open intent */ 786 /* Pick up the filp from the open intent */
787 filp = nd->intent.open.file; 787 filp = nd->intent.open.file;
788 /* Has the filesystem initialised the file for us? */ 788 /* Has the filesystem initialised the file for us? */
789 if (filp->f_path.dentry == NULL) 789 if (filp->f_path.dentry == NULL) {
790 path_get(&nd->path);
790 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, 791 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
791 NULL, cred); 792 NULL, cred);
792 else 793 }
793 path_put(&nd->path);
794 return filp; 794 return filp;
795} 795}
796 796
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012c..ddb1f41376e5 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
415 return ret; 415 return ret;
416} 416}
417 417
418static int openprom_get_sb(struct file_system_type *fs_type, 418static struct dentry *openprom_mount(struct file_system_type *fs_type,
419 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 419 int flags, const char *dev_name, void *data)
420{ 420{
421 return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt); 421 return mount_single(fs_type, flags, data, openprom_fill_super)
422} 422}
423 423
424static struct file_system_type openprom_fs_type = { 424static struct file_system_type openprom_fs_type = {
425 .owner = THIS_MODULE, 425 .owner = THIS_MODULE,
426 .name = "openpromfs", 426 .name = "openpromfs",
427 .get_sb = openprom_get_sb, 427 .mount = openprom_mount,
428 .kill_sb = kill_anon_super, 428 .kill_sb = kill_anon_super,
429}; 429};
430 430
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index b81bfc016a05..0a8b0ad0c7e2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -365,25 +365,17 @@ struct device_type part_type = {
365static void delete_partition_rcu_cb(struct rcu_head *head) 365static void delete_partition_rcu_cb(struct rcu_head *head)
366{ 366{
367 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 367 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
368 struct gendisk *disk = part_to_disk(part);
369 struct request_queue *q = disk->queue;
370 unsigned long flags;
371 368
372 part->start_sect = 0; 369 part->start_sect = 0;
373 part->nr_sects = 0; 370 part->nr_sects = 0;
374 part_stat_set_all(part, 0); 371 part_stat_set_all(part, 0);
375 put_device(part_to_dev(part)); 372 put_device(part_to_dev(part));
376
377 spin_lock_irqsave(q->queue_lock, flags);
378 elv_quiesce_end(q);
379 spin_unlock_irqrestore(q->queue_lock, flags);
380} 373}
381 374
382void delete_partition(struct gendisk *disk, int partno) 375void delete_partition(struct gendisk *disk, int partno)
383{ 376{
384 struct disk_part_tbl *ptbl = disk->part_tbl; 377 struct disk_part_tbl *ptbl = disk->part_tbl;
385 struct hd_struct *part; 378 struct hd_struct *part;
386 struct request_queue *q = disk->queue;
387 379
388 if (partno >= ptbl->len) 380 if (partno >= ptbl->len)
389 return; 381 return;
@@ -398,10 +390,6 @@ void delete_partition(struct gendisk *disk, int partno)
398 kobject_put(part->holder_dir); 390 kobject_put(part->holder_dir);
399 device_del(part_to_dev(part)); 391 device_del(part_to_dev(part));
400 392
401 spin_lock_irq(q->queue_lock);
402 elv_quiesce_start(q);
403 spin_unlock_irq(q->queue_lock);
404
405 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 393 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
406} 394}
407 395
diff --git a/fs/pipe.c b/fs/pipe.c
index 37eb1ebeaa90..a8012a955720 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void)
954 if (!inode) 954 if (!inode)
955 goto fail_inode; 955 goto fail_inode;
956 956
957 inode->i_ino = get_next_ino();
958
957 pipe = alloc_pipe_info(inode); 959 pipe = alloc_pipe_info(inode);
958 if (!pipe) 960 if (!pipe)
959 goto fail_iput; 961 goto fail_iput;
@@ -1245,16 +1247,15 @@ out:
1245 * any operations on the root directory. However, we need a non-trivial 1247 * any operations on the root directory. However, we need a non-trivial
1246 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1248 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1247 */ 1249 */
1248static int pipefs_get_sb(struct file_system_type *fs_type, 1250static struct dentry *pipefs_mount(struct file_system_type *fs_type,
1249 int flags, const char *dev_name, void *data, 1251 int flags, const char *dev_name, void *data)
1250 struct vfsmount *mnt)
1251{ 1252{
1252 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1253 return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
1253} 1254}
1254 1255
1255static struct file_system_type pipe_fs_type = { 1256static struct file_system_type pipe_fs_type = {
1256 .name = "pipefs", 1257 .name = "pipefs",
1257 .get_sb = pipefs_get_sb, 1258 .mount = pipefs_mount,
1258 .kill_sb = kill_anon_super, 1259 .kill_sb = kill_anon_super,
1259}; 1260};
1260 1261
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 50f8f0600f06..6a0068841d96 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -33,8 +33,8 @@ config PROC_KCORE
33 depends on PROC_FS && MMU 33 depends on PROC_FS && MMU
34 34
35config PROC_VMCORE 35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)" 36 bool "/proc/vmcore support"
37 depends on PROC_FS && CRASH_DUMP 37 depends on PROC_FS && CRASH_DUMP
38 default y 38 default y
39 help 39 help
40 Exports the dump image of crashed kernel in ELF format. 40 Exports the dump image of crashed kernel in ELF format.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc5d5f51f3fe..f3d02ca461ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
226{ 226{
227 struct mm_struct *mm; 227 struct mm_struct *mm;
228 228
229 if (mutex_lock_killable(&task->cred_guard_mutex)) 229 if (mutex_lock_killable(&task->signal->cred_guard_mutex))
230 return NULL; 230 return NULL;
231 231
232 mm = get_task_mm(task); 232 mm = get_task_mm(task);
@@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
235 mmput(mm); 235 mmput(mm);
236 mm = NULL; 236 mm = NULL;
237 } 237 }
238 mutex_unlock(&task->cred_guard_mutex); 238 mutex_unlock(&task->signal->cred_guard_mutex);
239 239
240 return mm; 240 return mm;
241} 241}
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = {
771static int mem_open(struct inode* inode, struct file* file) 771static int mem_open(struct inode* inode, struct file* file)
772{ 772{
773 file->private_data = (void*)((long)current->self_exec_id); 773 file->private_data = (void*)((long)current->self_exec_id);
774 /* OK to pass negative loff_t, we can catch out-of-range */
775 file->f_mode |= FMODE_UNSIGNED_OFFSET;
774 return 0; 776 return 0;
775} 777}
776 778
@@ -1023,28 +1025,47 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1023 memset(buffer, 0, sizeof(buffer)); 1025 memset(buffer, 0, sizeof(buffer));
1024 if (count > sizeof(buffer) - 1) 1026 if (count > sizeof(buffer) - 1)
1025 count = sizeof(buffer) - 1; 1027 count = sizeof(buffer) - 1;
1026 if (copy_from_user(buffer, buf, count)) 1028 if (copy_from_user(buffer, buf, count)) {
1027 return -EFAULT; 1029 err = -EFAULT;
1030 goto out;
1031 }
1028 1032
1029 err = strict_strtol(strstrip(buffer), 0, &oom_adjust); 1033 err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1030 if (err) 1034 if (err)
1031 return -EINVAL; 1035 goto out;
1032 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1036 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1033 oom_adjust != OOM_DISABLE) 1037 oom_adjust != OOM_DISABLE) {
1034 return -EINVAL; 1038 err = -EINVAL;
1039 goto out;
1040 }
1035 1041
1036 task = get_proc_task(file->f_path.dentry->d_inode); 1042 task = get_proc_task(file->f_path.dentry->d_inode);
1037 if (!task) 1043 if (!task) {
1038 return -ESRCH; 1044 err = -ESRCH;
1045 goto out;
1046 }
1047
1048 task_lock(task);
1049 if (!task->mm) {
1050 err = -EINVAL;
1051 goto err_task_lock;
1052 }
1053
1039 if (!lock_task_sighand(task, &flags)) { 1054 if (!lock_task_sighand(task, &flags)) {
1040 put_task_struct(task); 1055 err = -ESRCH;
1041 return -ESRCH; 1056 goto err_task_lock;
1042 } 1057 }
1043 1058
1044 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { 1059 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1045 unlock_task_sighand(task, &flags); 1060 err = -EACCES;
1046 put_task_struct(task); 1061 goto err_sighand;
1047 return -EACCES; 1062 }
1063
1064 if (oom_adjust != task->signal->oom_adj) {
1065 if (oom_adjust == OOM_DISABLE)
1066 atomic_inc(&task->mm->oom_disable_count);
1067 if (task->signal->oom_adj == OOM_DISABLE)
1068 atomic_dec(&task->mm->oom_disable_count);
1048 } 1069 }
1049 1070
1050 /* 1071 /*
@@ -1065,10 +1086,13 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1065 else 1086 else
1066 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1087 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1067 -OOM_DISABLE; 1088 -OOM_DISABLE;
1089err_sighand:
1068 unlock_task_sighand(task, &flags); 1090 unlock_task_sighand(task, &flags);
1091err_task_lock:
1092 task_unlock(task);
1069 put_task_struct(task); 1093 put_task_struct(task);
1070 1094out:
1071 return count; 1095 return err < 0 ? err : count;
1072} 1096}
1073 1097
1074static const struct file_operations proc_oom_adjust_operations = { 1098static const struct file_operations proc_oom_adjust_operations = {
@@ -1109,30 +1133,49 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1109 memset(buffer, 0, sizeof(buffer)); 1133 memset(buffer, 0, sizeof(buffer));
1110 if (count > sizeof(buffer) - 1) 1134 if (count > sizeof(buffer) - 1)
1111 count = sizeof(buffer) - 1; 1135 count = sizeof(buffer) - 1;
1112 if (copy_from_user(buffer, buf, count)) 1136 if (copy_from_user(buffer, buf, count)) {
1113 return -EFAULT; 1137 err = -EFAULT;
1138 goto out;
1139 }
1114 1140
1115 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); 1141 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj);
1116 if (err) 1142 if (err)
1117 return -EINVAL; 1143 goto out;
1118 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1144 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1119 oom_score_adj > OOM_SCORE_ADJ_MAX) 1145 oom_score_adj > OOM_SCORE_ADJ_MAX) {
1120 return -EINVAL; 1146 err = -EINVAL;
1147 goto out;
1148 }
1121 1149
1122 task = get_proc_task(file->f_path.dentry->d_inode); 1150 task = get_proc_task(file->f_path.dentry->d_inode);
1123 if (!task) 1151 if (!task) {
1124 return -ESRCH; 1152 err = -ESRCH;
1153 goto out;
1154 }
1155
1156 task_lock(task);
1157 if (!task->mm) {
1158 err = -EINVAL;
1159 goto err_task_lock;
1160 }
1161
1125 if (!lock_task_sighand(task, &flags)) { 1162 if (!lock_task_sighand(task, &flags)) {
1126 put_task_struct(task); 1163 err = -ESRCH;
1127 return -ESRCH; 1164 goto err_task_lock;
1128 } 1165 }
1166
1129 if (oom_score_adj < task->signal->oom_score_adj && 1167 if (oom_score_adj < task->signal->oom_score_adj &&
1130 !capable(CAP_SYS_RESOURCE)) { 1168 !capable(CAP_SYS_RESOURCE)) {
1131 unlock_task_sighand(task, &flags); 1169 err = -EACCES;
1132 put_task_struct(task); 1170 goto err_sighand;
1133 return -EACCES;
1134 } 1171 }
1135 1172
1173 if (oom_score_adj != task->signal->oom_score_adj) {
1174 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1175 atomic_inc(&task->mm->oom_disable_count);
1176 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1177 atomic_dec(&task->mm->oom_disable_count);
1178 }
1136 task->signal->oom_score_adj = oom_score_adj; 1179 task->signal->oom_score_adj = oom_score_adj;
1137 /* 1180 /*
1138 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1181 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
@@ -1143,9 +1186,13 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1143 else 1186 else
1144 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / 1187 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1145 OOM_SCORE_ADJ_MAX; 1188 OOM_SCORE_ADJ_MAX;
1189err_sighand:
1146 unlock_task_sighand(task, &flags); 1190 unlock_task_sighand(task, &flags);
1191err_task_lock:
1192 task_unlock(task);
1147 put_task_struct(task); 1193 put_task_struct(task);
1148 return count; 1194out:
1195 return err < 0 ? err : count;
1149} 1196}
1150 1197
1151static const struct file_operations proc_oom_score_adj_operations = { 1198static const struct file_operations proc_oom_score_adj_operations = {
@@ -1601,6 +1648,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1601 1648
1602 /* Common stuff */ 1649 /* Common stuff */
1603 ei = PROC_I(inode); 1650 ei = PROC_I(inode);
1651 inode->i_ino = get_next_ino();
1604 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1652 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1605 inode->i_op = &proc_def_inode_operations; 1653 inode->i_op = &proc_def_inode_operations;
1606 1654
@@ -2306,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2306 goto out_free; 2354 goto out_free;
2307 2355
2308 /* Guard against adverse ptrace interaction */ 2356 /* Guard against adverse ptrace interaction */
2309 length = mutex_lock_interruptible(&task->cred_guard_mutex); 2357 length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
2310 if (length < 0) 2358 if (length < 0)
2311 goto out_free; 2359 goto out_free;
2312 2360
2313 length = security_setprocattr(task, 2361 length = security_setprocattr(task,
2314 (char*)file->f_path.dentry->d_name.name, 2362 (char*)file->f_path.dentry->d_name.name,
2315 (void*)page, count); 2363 (void*)page, count);
2316 mutex_unlock(&task->cred_guard_mutex); 2364 mutex_unlock(&task->signal->cred_guard_mutex);
2317out_free: 2365out_free:
2318 free_page((unsigned long) page); 2366 free_page((unsigned long) page);
2319out: 2367out:
@@ -2547,6 +2595,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2547 2595
2548 /* Initialize the inode */ 2596 /* Initialize the inode */
2549 ei = PROC_I(inode); 2597 ei = PROC_I(inode);
2598 inode->i_ino = get_next_ino();
2550 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2599 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2551 2600
2552 /* 2601 /*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2fc52552271d..b652cb00906b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
23 if (!inode) 23 if (!inode)
24 goto out; 24 goto out;
25 25
26 inode->i_ino = get_next_ino();
27
26 sysctl_head_get(head); 28 sysctl_head_get(head);
27 ei = PROC_I(inode); 29 ei = PROC_I(inode);
28 ei->sysctl = head; 30 ei->sysctl = head;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b316325..ef9fa8e24ad6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
35 return set_anon_super(sb, NULL); 35 return set_anon_super(sb, NULL);
36} 36}
37 37
38static int proc_get_sb(struct file_system_type *fs_type, 38static struct dentry *proc_mount(struct file_system_type *fs_type,
39 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 39 int flags, const char *dev_name, void *data)
40{ 40{
41 int err; 41 int err;
42 struct super_block *sb; 42 struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
61 61
62 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 62 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
63 if (IS_ERR(sb)) 63 if (IS_ERR(sb))
64 return PTR_ERR(sb); 64 return ERR_CAST(sb);
65 65
66 if (!sb->s_root) { 66 if (!sb->s_root) {
67 sb->s_flags = flags; 67 sb->s_flags = flags;
68 err = proc_fill_super(sb); 68 err = proc_fill_super(sb);
69 if (err) { 69 if (err) {
70 deactivate_locked_super(sb); 70 deactivate_locked_super(sb);
71 return err; 71 return ERR_PTR(err);
72 } 72 }
73 73
74 ei = PROC_I(sb->s_root->d_inode); 74 ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
79 } 79 }
80 80
81 sb->s_flags |= MS_ACTIVE; 81 sb->s_flags |= MS_ACTIVE;
82 ns->proc_mnt = mnt;
83 } 82 }
84 83
85 simple_set_mnt(mnt, sb); 84 return dget(sb->s_root);
86 return 0;
87} 85}
88 86
89static void proc_kill_sb(struct super_block *sb) 87static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
97 95
98static struct file_system_type proc_fs_type = { 96static struct file_system_type proc_fs_type = {
99 .name = "proc", 97 .name = "proc",
100 .get_sb = proc_get_sb, 98 .mount = proc_mount,
101 .kill_sb = proc_kill_sb, 99 .kill_sb = proc_kill_sb,
102}; 100};
103 101
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
115 return; 113 return;
116 } 114 }
117 115
116 init_pid_ns.proc_mnt = proc_mnt;
118 proc_symlink("mounts", NULL, "self/mounts"); 117 proc_symlink("mounts", NULL, "self/mounts");
119 118
120 proc_net_init(); 119 proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
213 if (IS_ERR(mnt)) 212 if (IS_ERR(mnt))
214 return PTR_ERR(mnt); 213 return PTR_ERR(mnt);
215 214
215 ns->proc_mnt = mnt;
216 return 0; 216 return 0;
217} 217}
218 218
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 1807c2419f17..37994737c983 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -10,13 +10,13 @@ static int show_softirqs(struct seq_file *p, void *v)
10{ 10{
11 int i, j; 11 int i, j;
12 12
13 seq_printf(p, " "); 13 seq_printf(p, " ");
14 for_each_possible_cpu(i) 14 for_each_possible_cpu(i)
15 seq_printf(p, "CPU%-8d", i); 15 seq_printf(p, "CPU%-8d", i);
16 seq_printf(p, "\n"); 16 seq_printf(p, "\n");
17 17
18 for (i = 0; i < NR_SOFTIRQS; i++) { 18 for (i = 0; i < NR_SOFTIRQS; i++) {
19 seq_printf(p, "%8s:", softirq_to_name[i]); 19 seq_printf(p, "%12s:", softirq_to_name[i]);
20 for_each_possible_cpu(j) 20 for_each_possible_cpu(j)
21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); 21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
22 seq_printf(p, "\n"); 22 seq_printf(p, "\n");
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf31b03fc275..e15a19c93bae 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v)
31 u64 sum_softirq = 0; 31 u64 sum_softirq = 0;
32 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 32 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
33 struct timespec boottime; 33 struct timespec boottime;
34 unsigned int per_irq_sum;
35 34
36 user = nice = system = idle = iowait = 35 user = nice = system = idle = iowait =
37 irq = softirq = steal = cputime64_zero; 36 irq = softirq = steal = cputime64_zero;
@@ -52,9 +51,7 @@ static int show_stat(struct seq_file *p, void *v)
52 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 51 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
53 guest_nice = cputime64_add(guest_nice, 52 guest_nice = cputime64_add(guest_nice,
54 kstat_cpu(i).cpustat.guest_nice); 53 kstat_cpu(i).cpustat.guest_nice);
55 for_each_irq_nr(j) { 54 sum += kstat_cpu_irqs_sum(i);
56 sum += kstat_irqs_cpu(j, i);
57 }
58 sum += arch_irq_stat_cpu(i); 55 sum += arch_irq_stat_cpu(i);
59 56
60 for (j = 0; j < NR_SOFTIRQS; j++) { 57 for (j = 0; j < NR_SOFTIRQS; j++) {
@@ -110,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v)
110 seq_printf(p, "intr %llu", (unsigned long long)sum); 107 seq_printf(p, "intr %llu", (unsigned long long)sum);
111 108
112 /* sum again ? it could be updated? */ 109 /* sum again ? it could be updated? */
113 for_each_irq_nr(j) { 110 for_each_irq_nr(j)
114 per_irq_sum = 0; 111 seq_printf(p, " %u", kstat_irqs(j));
115 for_each_possible_cpu(i)
116 per_irq_sum += kstat_irqs_cpu(j, i);
117
118 seq_printf(p, " %u", per_irq_sum);
119 }
120 112
121 seq_printf(p, 113 seq_printf(p,
122 "\nctxt %llu\n" 114 "\nctxt %llu\n"
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 871e25ed0069..da6b01d70f01 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -327,6 +327,7 @@ struct mem_size_stats {
327 unsigned long private_clean; 327 unsigned long private_clean;
328 unsigned long private_dirty; 328 unsigned long private_dirty;
329 unsigned long referenced; 329 unsigned long referenced;
330 unsigned long anonymous;
330 unsigned long swap; 331 unsigned long swap;
331 u64 pss; 332 u64 pss;
332}; 333};
@@ -357,6 +358,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
357 if (!page) 358 if (!page)
358 continue; 359 continue;
359 360
361 if (PageAnon(page))
362 mss->anonymous += PAGE_SIZE;
363
360 mss->resident += PAGE_SIZE; 364 mss->resident += PAGE_SIZE;
361 /* Accumulate the size in pages that have been accessed. */ 365 /* Accumulate the size in pages that have been accessed. */
362 if (pte_young(ptent) || PageReferenced(page)) 366 if (pte_young(ptent) || PageReferenced(page))
@@ -410,6 +414,7 @@ static int show_smap(struct seq_file *m, void *v)
410 "Private_Clean: %8lu kB\n" 414 "Private_Clean: %8lu kB\n"
411 "Private_Dirty: %8lu kB\n" 415 "Private_Dirty: %8lu kB\n"
412 "Referenced: %8lu kB\n" 416 "Referenced: %8lu kB\n"
417 "Anonymous: %8lu kB\n"
413 "Swap: %8lu kB\n" 418 "Swap: %8lu kB\n"
414 "KernelPageSize: %8lu kB\n" 419 "KernelPageSize: %8lu kB\n"
415 "MMUPageSize: %8lu kB\n", 420 "MMUPageSize: %8lu kB\n",
@@ -421,6 +426,7 @@ static int show_smap(struct seq_file *m, void *v)
421 mss.private_clean >> 10, 426 mss.private_clean >> 10,
422 mss.private_dirty >> 10, 427 mss.private_dirty >> 10,
423 mss.referenced >> 10, 428 mss.referenced >> 10,
429 mss.anonymous >> 10,
424 mss.swap >> 10, 430 mss.swap >> 10,
425 vma_kernel_pagesize(vma) >> 10, 431 vma_kernel_pagesize(vma) >> 10,
426 vma_mmu_pagesize(vma) >> 10); 432 vma_mmu_pagesize(vma) >> 10);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026fc..fcada42f1aa3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
454 kmem_cache_destroy(qnx4_inode_cachep); 454 kmem_cache_destroy(qnx4_inode_cachep);
455} 455}
456 456
457static int qnx4_get_sb(struct file_system_type *fs_type, 457static struct dentry *qnx4_mount(struct file_system_type *fs_type,
458 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 458 int flags, const char *dev_name, void *data)
459{ 459{
460 return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super, 460 return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
461 mnt);
462} 461}
463 462
464static struct file_system_type qnx4_fs_type = { 463static struct file_system_type qnx4_fs_type = {
465 .owner = THIS_MODULE, 464 .owner = THIS_MODULE,
466 .name = "qnx4", 465 .name = "qnx4",
467 .get_sb = qnx4_get_sb, 466 .mount = qnx4_mount,
468 .kill_sb = kill_block_super, 467 .kill_sb = kill_block_super,
469 .fs_flags = FS_REQUIRES_DEV, 468 .fs_flags = FS_REQUIRES_DEV,
470}; 469};
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 3e21b1e2ad3a..880fd9884366 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -4,6 +4,7 @@
4 4
5config QUOTA 5config QUOTA
6 bool "Quota support" 6 bool "Quota support"
7 select QUOTACTL
7 help 8 help
8 If you say Y here, you will be able to set per user limits for disk 9 If you say Y here, you will be able to set per user limits for disk
9 usage (also called disk quotas). Currently, it works for the 10 usage (also called disk quotas). Currently, it works for the
@@ -65,8 +66,7 @@ config QFMT_V2
65 66
66config QUOTACTL 67config QUOTACTL
67 bool 68 bool
68 depends on XFS_QUOTA || QUOTA 69 default n
69 default y
70 70
71config QUOTACTL_COMPAT 71config QUOTACTL_COMPAT
72 bool 72 bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index aad1316a977f..0fed41e6efcd 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1386,6 +1386,9 @@ static void __dquot_initialize(struct inode *inode, int type)
1386 /* Avoid races with quotaoff() */ 1386 /* Avoid races with quotaoff() */
1387 if (!sb_has_quota_active(sb, cnt)) 1387 if (!sb_has_quota_active(sb, cnt))
1388 continue; 1388 continue;
1389 /* We could race with quotaon or dqget() could have failed */
1390 if (!got[cnt])
1391 continue;
1389 if (!inode->i_dquot[cnt]) { 1392 if (!inode->i_dquot[cnt]) {
1390 inode->i_dquot[cnt] = got[cnt]; 1393 inode->i_dquot[cnt] = got[cnt];
1391 got[cnt] = NULL; 1394 got[cnt] = NULL;
@@ -1736,6 +1739,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1736 qsize_t rsv_space = 0; 1739 qsize_t rsv_space = 0;
1737 struct dquot *transfer_from[MAXQUOTAS] = {}; 1740 struct dquot *transfer_from[MAXQUOTAS] = {};
1738 int cnt, ret = 0; 1741 int cnt, ret = 0;
1742 char is_valid[MAXQUOTAS] = {};
1739 char warntype_to[MAXQUOTAS]; 1743 char warntype_to[MAXQUOTAS];
1740 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; 1744 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1741 1745
@@ -1757,8 +1761,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1757 space = cur_space + rsv_space; 1761 space = cur_space + rsv_space;
1758 /* Build the transfer_from list and check the limits */ 1762 /* Build the transfer_from list and check the limits */
1759 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1763 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1764 /*
1765 * Skip changes for same uid or gid or for turned off quota-type.
1766 */
1760 if (!transfer_to[cnt]) 1767 if (!transfer_to[cnt])
1761 continue; 1768 continue;
1769 /* Avoid races with quotaoff() */
1770 if (!sb_has_quota_active(inode->i_sb, cnt))
1771 continue;
1772 is_valid[cnt] = 1;
1762 transfer_from[cnt] = inode->i_dquot[cnt]; 1773 transfer_from[cnt] = inode->i_dquot[cnt];
1763 ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); 1774 ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
1764 if (ret) 1775 if (ret)
@@ -1772,12 +1783,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1772 * Finally perform the needed transfer from transfer_from to transfer_to 1783 * Finally perform the needed transfer from transfer_from to transfer_to
1773 */ 1784 */
1774 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1785 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1775 /* 1786 if (!is_valid[cnt])
1776 * Skip changes for same uid or gid or for turned off quota-type.
1777 */
1778 if (!transfer_to[cnt])
1779 continue; 1787 continue;
1780
1781 /* Due to IO error we might not have transfer_from[] structure */ 1788 /* Due to IO error we might not have transfer_from[] structure */
1782 if (transfer_from[cnt]) { 1789 if (transfer_from[cnt]) {
1783 warntype_from_inodes[cnt] = 1790 warntype_from_inodes[cnt] =
@@ -1801,18 +1808,19 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1801 1808
1802 mark_all_dquot_dirty(transfer_from); 1809 mark_all_dquot_dirty(transfer_from);
1803 mark_all_dquot_dirty(transfer_to); 1810 mark_all_dquot_dirty(transfer_to);
1804 /* Pass back references to put */
1805 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1806 transfer_to[cnt] = transfer_from[cnt];
1807warn:
1808 flush_warnings(transfer_to, warntype_to); 1811 flush_warnings(transfer_to, warntype_to);
1809 flush_warnings(transfer_from, warntype_from_inodes); 1812 flush_warnings(transfer_from, warntype_from_inodes);
1810 flush_warnings(transfer_from, warntype_from_space); 1813 flush_warnings(transfer_from, warntype_from_space);
1811 return ret; 1814 /* Pass back references to put */
1815 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1816 if (is_valid[cnt])
1817 transfer_to[cnt] = transfer_from[cnt];
1818 return 0;
1812over_quota: 1819over_quota:
1813 spin_unlock(&dq_data_lock); 1820 spin_unlock(&dq_data_lock);
1814 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1821 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1815 goto warn; 1822 flush_warnings(transfer_to, warntype_to);
1823 return ret;
1816} 1824}
1817EXPORT_SYMBOL(__dquot_transfer); 1825EXPORT_SYMBOL(__dquot_transfer);
1818 1826
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a5ebae70dc6d..eacb166fb259 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
58 struct inode * inode = new_inode(sb); 58 struct inode * inode = new_inode(sb);
59 59
60 if (inode) { 60 if (inode) {
61 inode->i_ino = get_next_ino();
61 inode_init_owner(inode, dir, mode); 62 inode_init_owner(inode, dir, mode);
62 inode->i_mapping->a_ops = &ramfs_aops; 63 inode->i_mapping->a_ops = &ramfs_aops;
63 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 64 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
@@ -254,17 +255,16 @@ fail:
254 return err; 255 return err;
255} 256}
256 257
257int ramfs_get_sb(struct file_system_type *fs_type, 258struct dentry *ramfs_mount(struct file_system_type *fs_type,
258 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 259 int flags, const char *dev_name, void *data)
259{ 260{
260 return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt); 261 return mount_nodev(fs_type, flags, data, ramfs_fill_super);
261} 262}
262 263
263static int rootfs_get_sb(struct file_system_type *fs_type, 264static struct dentry *rootfs_mount(struct file_system_type *fs_type,
264 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 265 int flags, const char *dev_name, void *data)
265{ 266{
266 return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, 267 return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
267 mnt);
268} 268}
269 269
270static void ramfs_kill_sb(struct super_block *sb) 270static void ramfs_kill_sb(struct super_block *sb)
@@ -275,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
275 275
276static struct file_system_type ramfs_fs_type = { 276static struct file_system_type ramfs_fs_type = {
277 .name = "ramfs", 277 .name = "ramfs",
278 .get_sb = ramfs_get_sb, 278 .mount = ramfs_mount,
279 .kill_sb = ramfs_kill_sb, 279 .kill_sb = ramfs_kill_sb,
280}; 280};
281static struct file_system_type rootfs_fs_type = { 281static struct file_system_type rootfs_fs_type = {
282 .name = "rootfs", 282 .name = "rootfs",
283 .get_sb = rootfs_get_sb, 283 .mount = rootfs_mount,
284 .kill_sb = kill_litter_super, 284 .kill_sb = kill_litter_super,
285}; 285};
286 286
diff --git a/fs/read_write.c b/fs/read_write.c
index e757ef26e4ce..431a0ed610c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,6 +31,20 @@ const struct file_operations generic_ro_fops = {
31 31
32EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
33 33
34static int
35__negative_fpos_check(struct file *file, loff_t pos, size_t count)
36{
37 /*
38 * pos or pos+count is negative here, check overflow.
39 * too big "count" will be caught in rw_verify_area().
40 */
41 if ((pos < 0) && (pos + count < pos))
42 return -EOVERFLOW;
43 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
44 return 0;
45 return -EINVAL;
46}
47
34/** 48/**
35 * generic_file_llseek_unlocked - lockless generic llseek implementation 49 * generic_file_llseek_unlocked - lockless generic llseek implementation
36 * @file: file structure to seek on 50 * @file: file structure to seek on
@@ -62,7 +76,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
62 break; 76 break;
63 } 77 }
64 78
65 if (offset < 0 || offset > inode->i_sb->s_maxbytes) 79 if (offset < 0 && __negative_fpos_check(file, offset, 0))
80 return -EINVAL;
81 if (offset > inode->i_sb->s_maxbytes)
66 return -EINVAL; 82 return -EINVAL;
67 83
68 /* Special lock needed here? */ 84 /* Special lock needed here? */
@@ -137,7 +153,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
137 offset += file->f_pos; 153 offset += file->f_pos;
138 } 154 }
139 retval = -EINVAL; 155 retval = -EINVAL;
140 if (offset >= 0) { 156 if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) {
141 if (offset != file->f_pos) { 157 if (offset != file->f_pos) {
142 file->f_pos = offset; 158 file->f_pos = offset;
143 file->f_version = 0; 159 file->f_version = 0;
@@ -221,13 +237,12 @@ bad:
221} 237}
222#endif 238#endif
223 239
240
224/* 241/*
225 * rw_verify_area doesn't like huge counts. We limit 242 * rw_verify_area doesn't like huge counts. We limit
226 * them to something that fits in "int" so that others 243 * them to something that fits in "int" so that others
227 * won't have to do range checks all the time. 244 * won't have to do range checks all the time.
228 */ 245 */
229#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
230
231int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 246int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
232{ 247{
233 struct inode *inode; 248 struct inode *inode;
@@ -238,8 +253,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
238 if (unlikely((ssize_t) count < 0)) 253 if (unlikely((ssize_t) count < 0))
239 return retval; 254 return retval;
240 pos = *ppos; 255 pos = *ppos;
241 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 256 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
242 return retval; 257 retval = __negative_fpos_check(file, pos, count);
258 if (retval)
259 return retval;
260 }
243 261
244 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 262 if (unlikely(inode->i_flock && mandatory_lock(inode))) {
245 retval = locks_mandatory_area( 263 retval = locks_mandatory_area(
@@ -564,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
564 unsigned long nr_segs, unsigned long fast_segs, 582 unsigned long nr_segs, unsigned long fast_segs,
565 struct iovec *fast_pointer, 583 struct iovec *fast_pointer,
566 struct iovec **ret_pointer) 584 struct iovec **ret_pointer)
567 { 585{
568 unsigned long seg; 586 unsigned long seg;
569 ssize_t ret; 587 ssize_t ret;
570 struct iovec *iov = fast_pointer; 588 struct iovec *iov = fast_pointer;
571 589
572 /* 590 /*
573 * SuS says "The readv() function *may* fail if the iovcnt argument 591 * SuS says "The readv() function *may* fail if the iovcnt argument
574 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 592 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
575 * traditionally returned zero for zero segments, so... 593 * traditionally returned zero for zero segments, so...
576 */ 594 */
577 if (nr_segs == 0) { 595 if (nr_segs == 0) {
578 ret = 0; 596 ret = 0;
579 goto out; 597 goto out;
580 } 598 }
581 599
582 /* 600 /*
583 * First get the "struct iovec" from user memory and 601 * First get the "struct iovec" from user memory and
584 * verify all the pointers 602 * verify all the pointers
585 */ 603 */
586 if (nr_segs > UIO_MAXIOV) { 604 if (nr_segs > UIO_MAXIOV) {
587 ret = -EINVAL; 605 ret = -EINVAL;
588 goto out; 606 goto out;
589 } 607 }
590 if (nr_segs > fast_segs) { 608 if (nr_segs > fast_segs) {
591 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 609 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
592 if (iov == NULL) { 610 if (iov == NULL) {
593 ret = -ENOMEM; 611 ret = -ENOMEM;
594 goto out; 612 goto out;
595 } 613 }
596 } 614 }
597 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 615 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
598 ret = -EFAULT; 616 ret = -EFAULT;
599 goto out; 617 goto out;
600 } 618 }
601 619
602 /* 620 /*
603 * According to the Single Unix Specification we should return EINVAL 621 * According to the Single Unix Specification we should return EINVAL
604 * if an element length is < 0 when cast to ssize_t or if the 622 * if an element length is < 0 when cast to ssize_t or if the
605 * total length would overflow the ssize_t return value of the 623 * total length would overflow the ssize_t return value of the
606 * system call. 624 * system call.
607 */ 625 *
626 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
627 * overflow case.
628 */
608 ret = 0; 629 ret = 0;
609 for (seg = 0; seg < nr_segs; seg++) { 630 for (seg = 0; seg < nr_segs; seg++) {
610 void __user *buf = iov[seg].iov_base; 631 void __user *buf = iov[seg].iov_base;
611 ssize_t len = (ssize_t)iov[seg].iov_len; 632 ssize_t len = (ssize_t)iov[seg].iov_len;
612 633
613 /* see if we we're about to use an invalid len or if 634 /* see if we we're about to use an invalid len or if
614 * it's about to overflow ssize_t */ 635 * it's about to overflow ssize_t */
615 if (len < 0 || (ret + len < ret)) { 636 if (len < 0) {
616 ret = -EINVAL; 637 ret = -EINVAL;
617 goto out; 638 goto out;
618 } 639 }
619 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 640 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
620 ret = -EFAULT; 641 ret = -EFAULT;
621 goto out; 642 goto out;
643 }
644 if (len > MAX_RW_COUNT - ret) {
645 len = MAX_RW_COUNT - ret;
646 iov[seg].iov_len = len;
622 } 647 }
623
624 ret += len; 648 ret += len;
625 } 649 }
626out: 650out:
627 *ret_pointer = iov; 651 *ret_pointer = iov;
628 return ret; 652 return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index caa758377d66..41656d40dc5c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -22,8 +22,6 @@
22 22
23int reiserfs_commit_write(struct file *f, struct page *page, 23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
25int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to);
27 25
28void reiserfs_evict_inode(struct inode *inode) 26void reiserfs_evict_inode(struct inode *inode)
29{ 27{
@@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
165** but tail is still sitting in a direct item, and we can't write to 163** but tail is still sitting in a direct item, and we can't write to
166** it. So, look through this page, and check all the mapped buffers 164** it. So, look through this page, and check all the mapped buffers
167** to make sure they have valid block numbers. Any that don't need 165** to make sure they have valid block numbers. Any that don't need
168** to be unmapped, so that block_prepare_write will correctly call 166** to be unmapped, so that __block_write_begin will correctly call
169** reiserfs_get_block to convert the tail into an unformatted node 167** reiserfs_get_block to convert the tail into an unformatted node
170*/ 168*/
171static inline void fix_tail_page_for_writing(struct page *page) 169static inline void fix_tail_page_for_writing(struct page *page)
@@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
439} 437}
440 438
441/* special version of get_block that is only used by grab_tail_page right 439/* special version of get_block that is only used by grab_tail_page right
442** now. It is sent to block_prepare_write, and when you try to get a 440** now. It is sent to __block_write_begin, and when you try to get a
443** block past the end of the file (or a block from a hole) it returns 441** block past the end of the file (or a block from a hole) it returns
444** -ENOENT instead of a valid buffer. block_prepare_write expects to 442** -ENOENT instead of a valid buffer. __block_write_begin expects to
445** be able to do i/o on the buffers returned, unless an error value 443** be able to do i/o on the buffers returned, unless an error value
446** is also returned. 444** is also returned.
447** 445**
448** So, this allows block_prepare_write to be used for reading a single block 446** So, this allows __block_write_begin to be used for reading a single block
449** in a page. Where it does not produce a valid page for holes, or past the 447** in a page. Where it does not produce a valid page for holes, or past the
450** end of the file. This turns out to be exactly what we need for reading 448** end of the file. This turns out to be exactly what we need for reading
451** tails for conversion. 449** tails for conversion.
@@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode,
558 ** 556 **
559 ** We must fix the tail page for writing because it might have buffers 557 ** We must fix the tail page for writing because it might have buffers
560 ** that are mapped, but have a block number of 0. This indicates tail 558 ** that are mapped, but have a block number of 0. This indicates tail
561 ** data that has been read directly into the page, and block_prepare_write 559 ** data that has been read directly into the page, and
562 ** won't trigger a get_block in this case. 560 ** __block_write_begin won't trigger a get_block in this case.
563 */ 561 */
564 fix_tail_page_for_writing(tail_page); 562 fix_tail_page_for_writing(tail_page);
565 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); 563 retval = __reiserfs_write_begin(tail_page, tail_start,
564 tail_end - tail_start);
566 if (retval) 565 if (retval)
567 goto unlock; 566 goto unlock;
568 567
@@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode,
2033 /* start within the page of the last block in the file */ 2032 /* start within the page of the last block in the file */
2034 start = (offset / blocksize) * blocksize; 2033 start = (offset / blocksize) * blocksize;
2035 2034
2036 error = block_prepare_write(page, start, offset, 2035 error = __block_write_begin(page, start, offset - start,
2037 reiserfs_get_block_create_0); 2036 reiserfs_get_block_create_0);
2038 if (error) 2037 if (error)
2039 goto unlock; 2038 goto unlock;
@@ -2438,7 +2437,7 @@ static int reiserfs_write_full_page(struct page *page,
2438 /* from this point on, we know the buffer is mapped to a 2437 /* from this point on, we know the buffer is mapped to a
2439 * real block and not a direct item 2438 * real block and not a direct item
2440 */ 2439 */
2441 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2440 if (wbc->sync_mode != WB_SYNC_NONE) {
2442 lock_buffer(bh); 2441 lock_buffer(bh);
2443 } else { 2442 } else {
2444 if (!trylock_buffer(bh)) { 2443 if (!trylock_buffer(bh)) {
@@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file,
2628 return ret; 2627 return ret;
2629} 2628}
2630 2629
2631int reiserfs_prepare_write(struct file *f, struct page *page, 2630int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
2632 unsigned from, unsigned to)
2633{ 2631{
2634 struct inode *inode = page->mapping->host; 2632 struct inode *inode = page->mapping->host;
2635 int ret; 2633 int ret;
@@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
2650 th->t_refcount++; 2648 th->t_refcount++;
2651 } 2649 }
2652 2650
2653 ret = block_prepare_write(page, from, to, reiserfs_get_block); 2651 ret = __block_write_begin(page, from, len, reiserfs_get_block);
2654 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2652 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2655 struct reiserfs_transaction_handle *th = current->journal_info; 2653 struct reiserfs_transaction_handle *th = current->journal_info;
2656 /* this gets a little ugly. If reiserfs_get_block returned an 2654 /* this gets a little ugly. If reiserfs_get_block returned an
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 5cbb81e134ac..adf22b485cea 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
160 160
161int reiserfs_commit_write(struct file *f, struct page *page, 161int reiserfs_commit_write(struct file *f, struct page *page,
162 unsigned from, unsigned to); 162 unsigned from, unsigned to);
163int reiserfs_prepare_write(struct file *f, struct page *page,
164 unsigned from, unsigned to);
165/* 163/*
166** reiserfs_unpack 164** reiserfs_unpack
167** Function try to convert tail from direct item into indirect. 165** Function try to convert tail from direct item into indirect.
@@ -200,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
200 } 198 }
201 199
202 /* we unpack by finding the page with the tail, and calling 200 /* we unpack by finding the page with the tail, and calling
203 ** reiserfs_prepare_write on that page. This will force a 201 ** __reiserfs_write_begin on that page. This will force a
204 ** reiserfs_get_block to unpack the tail for us. 202 ** reiserfs_get_block to unpack the tail for us.
205 */ 203 */
206 index = inode->i_size >> PAGE_CACHE_SHIFT; 204 index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -210,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
210 if (!page) { 208 if (!page) {
211 goto out; 209 goto out;
212 } 210 }
213 retval = reiserfs_prepare_write(NULL, page, write_from, write_from); 211 retval = __reiserfs_write_begin(page, write_from, 0);
214 if (retval) 212 if (retval)
215 goto out_unlock; 213 goto out_unlock;
216 214
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ee78d4a0086a..ba5f51ec3458 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1156 inode->i_ctime = CURRENT_TIME_SEC; 1156 inode->i_ctime = CURRENT_TIME_SEC;
1157 reiserfs_update_sd(&th, inode); 1157 reiserfs_update_sd(&th, inode);
1158 1158
1159 atomic_inc(&inode->i_count); 1159 ihold(inode);
1160 d_instantiate(dentry, inode); 1160 d_instantiate(dentry, inode);
1161 retval = journal_end(&th, dir->i_sb, jbegin_count); 1161 retval = journal_end(&th, dir->i_sb, jbegin_count);
1162 reiserfs_write_unlock(dir->i_sb); 1162 reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002d..3bf7a6457f4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2213,12 +2213,11 @@ out:
2213 2213
2214#endif 2214#endif
2215 2215
2216static int get_super_block(struct file_system_type *fs_type, 2216static struct dentry *get_super_block(struct file_system_type *fs_type,
2217 int flags, const char *dev_name, 2217 int flags, const char *dev_name,
2218 void *data, struct vfsmount *mnt) 2218 void *data)
2219{ 2219{
2220 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super, 2220 return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
2221 mnt);
2222} 2221}
2223 2222
2224static int __init init_reiserfs_fs(void) 2223static int __init init_reiserfs_fs(void)
@@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void)
2253struct file_system_type reiserfs_fs_type = { 2252struct file_system_type reiserfs_fs_type = {
2254 .owner = THIS_MODULE, 2253 .owner = THIS_MODULE,
2255 .name = "reiserfs", 2254 .name = "reiserfs",
2256 .get_sb = get_super_block, 2255 .mount = get_super_block,
2257 .kill_sb = reiserfs_kill_sb, 2256 .kill_sb = reiserfs_kill_sb,
2258 .fs_flags = FS_REQUIRES_DEV, 2257 .fs_flags = FS_REQUIRES_DEV,
2259}; 2258};
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c4cf273c672..5d04a7828e7a 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -418,13 +418,11 @@ static inline __u32 xattr_hash(const char *msg, int len)
418 418
419int reiserfs_commit_write(struct file *f, struct page *page, 419int reiserfs_commit_write(struct file *f, struct page *page,
420 unsigned from, unsigned to); 420 unsigned from, unsigned to);
421int reiserfs_prepare_write(struct file *f, struct page *page,
422 unsigned from, unsigned to);
423 421
424static void update_ctime(struct inode *inode) 422static void update_ctime(struct inode *inode)
425{ 423{
426 struct timespec now = current_fs_time(inode->i_sb); 424 struct timespec now = current_fs_time(inode->i_sb);
427 if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink || 425 if (inode_unhashed(inode) || !inode->i_nlink ||
428 timespec_equal(&inode->i_ctime, &now)) 426 timespec_equal(&inode->i_ctime, &now))
429 return; 427 return;
430 428
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
532 rxh->h_hash = cpu_to_le32(xahash); 530 rxh->h_hash = cpu_to_le32(xahash);
533 } 531 }
534 532
535 err = reiserfs_prepare_write(NULL, page, page_offset, 533 err = __reiserfs_write_begin(page, page_offset, chunk + skip);
536 page_offset + chunk + skip);
537 if (!err) { 534 if (!err) {
538 if (buffer) 535 if (buffer)
539 memcpy(data + skip, buffer + buffer_pos, chunk); 536 memcpy(data + skip, buffer + buffer_pos, chunk);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c92..6647f90e55cd 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
552/* 552/*
553 * get a superblock for mounting 553 * get a superblock for mounting
554 */ 554 */
555static int romfs_get_sb(struct file_system_type *fs_type, 555static struct dentry *romfs_mount(struct file_system_type *fs_type,
556 int flags, const char *dev_name, 556 int flags, const char *dev_name,
557 void *data, struct vfsmount *mnt) 557 void *data)
558{ 558{
559 int ret = -EINVAL; 559 struct dentry *ret = ERR_PTR(-EINVAL);
560 560
561#ifdef CONFIG_ROMFS_ON_MTD 561#ifdef CONFIG_ROMFS_ON_MTD
562 ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super, 562 ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
563 mnt);
564#endif 563#endif
565#ifdef CONFIG_ROMFS_ON_BLOCK 564#ifdef CONFIG_ROMFS_ON_BLOCK
566 if (ret == -EINVAL) 565 if (ret == ERR_PTR(-EINVAL))
567 ret = get_sb_bdev(fs_type, flags, dev_name, data, 566 ret = mount_bdev(fs_type, flags, dev_name, data,
568 romfs_fill_super, mnt); 567 romfs_fill_super);
569#endif 568#endif
570 return ret; 569 return ret;
571} 570}
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
592static struct file_system_type romfs_fs_type = { 591static struct file_system_type romfs_fs_type = {
593 .owner = THIS_MODULE, 592 .owner = THIS_MODULE,
594 .name = "romfs", 593 .name = "romfs",
595 .get_sb = romfs_get_sb, 594 .mount = romfs_mount,
596 .kill_sb = romfs_kill_sb, 595 .kill_sb = romfs_kill_sb,
597 .fs_flags = FS_REQUIRES_DEV, 596 .fs_flags = FS_REQUIRES_DEV,
598}; 597};
diff --git a/fs/select.c b/fs/select.c
index 500a669f7790..b7b10aa30861 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
67 return slack; 67 return slack;
68} 68}
69 69
70static long estimate_accuracy(struct timespec *tv) 70long select_estimate_accuracy(struct timespec *tv)
71{ 71{
72 unsigned long ret; 72 unsigned long ret;
73 struct timespec now; 73 struct timespec now;
@@ -417,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
417 } 417 }
418 418
419 if (end_time && !timed_out) 419 if (end_time && !timed_out)
420 slack = estimate_accuracy(end_time); 420 slack = select_estimate_accuracy(end_time);
421 421
422 retval = 0; 422 retval = 0;
423 for (;;) { 423 for (;;) {
@@ -769,7 +769,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
769 } 769 }
770 770
771 if (end_time && !timed_out) 771 if (end_time && !timed_out)
772 slack = estimate_accuracy(end_time); 772 slack = select_estimate_accuracy(end_time);
773 773
774 for (;;) { 774 for (;;) {
775 struct poll_list *walk; 775 struct poll_list *walk;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0e7cb1395a94..05d6b0e78c95 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,9 +462,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
462 if (size) { 462 if (size) {
463 char *p; 463 char *p;
464 464
465 spin_lock(&dcache_lock);
466 p = __d_path(path, root, buf, size); 465 p = __d_path(path, root, buf, size);
467 spin_unlock(&dcache_lock);
468 res = PTR_ERR(p); 466 res = PTR_ERR(p);
469 if (!IS_ERR(p)) { 467 if (!IS_ERR(p)) {
470 char *end = mangle_path(buf, p, esc); 468 char *end = mangle_path(buf, p, esc);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 74047304b01a..492465b451dd 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -99,6 +99,16 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
99#ifdef __ARCH_SI_TRAPNO 99#ifdef __ARCH_SI_TRAPNO
100 err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); 100 err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
101#endif 101#endif
102#ifdef BUS_MCEERR_AO
103 /*
104 * Other callers might not initialize the si_lsb field,
105 * so check explicitly for the right codes here.
106 */
107 if (kinfo->si_code == BUS_MCEERR_AR ||
108 kinfo->si_code == BUS_MCEERR_AO)
109 err |= __put_user((short) kinfo->si_addr_lsb,
110 &uinfo->ssi_addr_lsb);
111#endif
102 break; 112 break;
103 case __SI_CHLD: 113 case __SI_CHLD:
104 err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); 114 err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
deleted file mode 100644
index 2bc24a8c4039..000000000000
--- a/fs/smbfs/Kconfig
+++ /dev/null
@@ -1,56 +0,0 @@
1config SMB_FS
2 tristate "SMB file system support (OBSOLETE, please use CIFS)"
3 depends on BKL # probably unfixable
4 depends on INET
5 select NLS
6 help
7 SMB (Server Message Block) is the protocol Windows for Workgroups
8 (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
9 files and printers over local networks. Saying Y here allows you to
10 mount their file systems (often called "shares" in this context) and
11 access them just like any other Unix directory. Currently, this
12 works only if the Windows machines use TCP/IP as the underlying
13 transport protocol, and not NetBEUI. For details, read
14 <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
15 available from <http://www.tldp.org/docs.html#howto>.
16
17 Note: if you just want your box to act as an SMB *server* and make
18 files and printing services available to Windows clients (which need
19 to have a TCP/IP stack), you don't need to say Y here; you can use
20 the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
21 for that.
22
23 General information about how to connect Linux, Windows machines and
24 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
25
26 To compile the SMB support as a module, choose M here:
27 the module will be called smbfs. Most people say N, however.
28
29config SMB_NLS_DEFAULT
30 bool "Use a default NLS"
31 depends on SMB_FS
32 help
33 Enabling this will make smbfs use nls translations by default. You
34 need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
35 settings and you need to give the default nls for the SMB server as
36 CONFIG_SMB_NLS_REMOTE.
37
38 The nls settings can be changed at mount time, if your smbmount
39 supports that, using the codepage and iocharset parameters.
40
41 smbmount from samba 2.2.0 or later supports this.
42
43config SMB_NLS_REMOTE
44 string "Default Remote NLS Option"
45 depends on SMB_NLS_DEFAULT
46 default "cp437"
47 help
48 This setting allows you to specify a default value for which
49 codepage the server uses. If this field is left blank no
50 translations will be done by default. The local codepage/charset
51 default to CONFIG_NLS_DEFAULT.
52
53 The nls settings can be changed at mount time, if your smbmount
54 supports that, using the codepage and iocharset parameters.
55
56 smbmount from samba 2.2.0 or later supports this.
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
deleted file mode 100644
index 4faf8c4722c3..000000000000
--- a/fs/smbfs/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
1#
2# Makefile for the linux smb-filesystem routines.
3#
4
5obj-$(CONFIG_SMB_FS) += smbfs.o
6
7smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
8 symlink.o smbiod.o request.o
9
10# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
11# SMBFS_PARANOIA should normally be enabled.
12
13EXTRA_CFLAGS += -DSMBFS_PARANOIA
14#EXTRA_CFLAGS += -DSMBFS_DEBUG
15#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
16#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
17#EXTRA_CFLAGS += -Werror
18
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
deleted file mode 100644
index 8c177eb7e344..000000000000
--- a/fs/smbfs/cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
1/*
2 * cache.c
3 *
4 * Copyright (C) 1997 by Bill Hawes
5 *
6 * Routines to support directory cacheing using the page cache.
7 * This cache code is almost directly taken from ncpfs.
8 *
9 * Please add a note about your changes to smbfs in the ChangeLog file.
10 */
11
12#include <linux/time.h>
13#include <linux/errno.h>
14#include <linux/kernel.h>
15#include <linux/mm.h>
16#include <linux/smb_fs.h>
17#include <linux/pagemap.h>
18#include <linux/net.h>
19
20#include <asm/page.h>
21
22#include "smb_debug.h"
23#include "proto.h"
24
25/*
26 * Force the next attempt to use the cache to be a timeout.
27 * If we can't find the page that's fine, it will cause a refresh.
28 */
29void
30smb_invalid_dir_cache(struct inode * dir)
31{
32 struct smb_sb_info *server = server_from_inode(dir);
33 union smb_dir_cache *cache = NULL;
34 struct page *page = NULL;
35
36 page = grab_cache_page(&dir->i_data, 0);
37 if (!page)
38 goto out;
39
40 if (!PageUptodate(page))
41 goto out_unlock;
42
43 cache = kmap(page);
44 cache->head.time = jiffies - SMB_MAX_AGE(server);
45
46 kunmap(page);
47 SetPageUptodate(page);
48out_unlock:
49 unlock_page(page);
50 page_cache_release(page);
51out:
52 return;
53}
54
55/*
56 * Mark all dentries for 'parent' as invalid, forcing them to be re-read
57 */
58void
59smb_invalidate_dircache_entries(struct dentry *parent)
60{
61 struct smb_sb_info *server = server_from_dentry(parent);
62 struct list_head *next;
63 struct dentry *dentry;
64
65 spin_lock(&dcache_lock);
66 next = parent->d_subdirs.next;
67 while (next != &parent->d_subdirs) {
68 dentry = list_entry(next, struct dentry, d_u.d_child);
69 dentry->d_fsdata = NULL;
70 smb_age_dentry(server, dentry);
71 next = next->next;
72 }
73 spin_unlock(&dcache_lock);
74}
75
76/*
77 * dget, but require that fpos and parent matches what the dentry contains.
78 * dentry is not known to be a valid pointer at entry.
79 */
80struct dentry *
81smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
82{
83 struct dentry *dent = dentry;
84 struct list_head *next;
85
86 if (d_validate(dent, parent)) {
87 if (dent->d_name.len <= SMB_MAXNAMELEN &&
88 (unsigned long)dent->d_fsdata == fpos) {
89 if (!dent->d_inode) {
90 dput(dent);
91 dent = NULL;
92 }
93 return dent;
94 }
95 dput(dent);
96 }
97
98 /* If a pointer is invalid, we search the dentry. */
99 spin_lock(&dcache_lock);
100 next = parent->d_subdirs.next;
101 while (next != &parent->d_subdirs) {
102 dent = list_entry(next, struct dentry, d_u.d_child);
103 if ((unsigned long)dent->d_fsdata == fpos) {
104 if (dent->d_inode)
105 dget_locked(dent);
106 else
107 dent = NULL;
108 goto out_unlock;
109 }
110 next = next->next;
111 }
112 dent = NULL;
113out_unlock:
114 spin_unlock(&dcache_lock);
115 return dent;
116}
117
118
119/*
120 * Create dentry/inode for this file and add it to the dircache.
121 */
122int
123smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
124 struct smb_cache_control *ctrl, struct qstr *qname,
125 struct smb_fattr *entry)
126{
127 struct dentry *newdent, *dentry = filp->f_path.dentry;
128 struct inode *newino, *inode = dentry->d_inode;
129 struct smb_cache_control ctl = *ctrl;
130 int valid = 0;
131 int hashed = 0;
132 ino_t ino = 0;
133
134 qname->hash = full_name_hash(qname->name, qname->len);
135
136 if (dentry->d_op && dentry->d_op->d_hash)
137 if (dentry->d_op->d_hash(dentry, qname) != 0)
138 goto end_advance;
139
140 newdent = d_lookup(dentry, qname);
141
142 if (!newdent) {
143 newdent = d_alloc(dentry, qname);
144 if (!newdent)
145 goto end_advance;
146 } else {
147 hashed = 1;
148 memcpy((char *) newdent->d_name.name, qname->name,
149 newdent->d_name.len);
150 }
151
152 if (!newdent->d_inode) {
153 smb_renew_times(newdent);
154 entry->f_ino = iunique(inode->i_sb, 2);
155 newino = smb_iget(inode->i_sb, entry);
156 if (newino) {
157 smb_new_dentry(newdent);
158 d_instantiate(newdent, newino);
159 if (!hashed)
160 d_rehash(newdent);
161 }
162 } else
163 smb_set_inode_attr(newdent->d_inode, entry);
164
165 if (newdent->d_inode) {
166 ino = newdent->d_inode->i_ino;
167 newdent->d_fsdata = (void *) ctl.fpos;
168 smb_new_dentry(newdent);
169 }
170
171 if (ctl.idx >= SMB_DIRCACHE_SIZE) {
172 if (ctl.page) {
173 kunmap(ctl.page);
174 SetPageUptodate(ctl.page);
175 unlock_page(ctl.page);
176 page_cache_release(ctl.page);
177 }
178 ctl.cache = NULL;
179 ctl.idx -= SMB_DIRCACHE_SIZE;
180 ctl.ofs += 1;
181 ctl.page = grab_cache_page(&inode->i_data, ctl.ofs);
182 if (ctl.page)
183 ctl.cache = kmap(ctl.page);
184 }
185 if (ctl.cache) {
186 ctl.cache->dentry[ctl.idx] = newdent;
187 valid = 1;
188 }
189 dput(newdent);
190
191end_advance:
192 if (!valid)
193 ctl.valid = 0;
194 if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
195 if (!ino)
196 ino = find_inode_number(dentry, qname);
197 if (!ino)
198 ino = iunique(inode->i_sb, 2);
199 ctl.filled = filldir(dirent, qname->name, qname->len,
200 filp->f_pos, ino, DT_UNKNOWN);
201 if (!ctl.filled)
202 filp->f_pos += 1;
203 }
204 ctl.fpos += 1;
205 ctl.idx += 1;
206 *ctrl = ctl;
207 return (ctl.valid || !ctl.filled);
208}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
deleted file mode 100644
index 00a70cab1f36..000000000000
--- a/fs/smbfs/dir.c
+++ /dev/null
@@ -1,702 +0,0 @@
1/*
2 * dir.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/time.h>
11#include <linux/errno.h>
12#include <linux/kernel.h>
13#include <linux/smp_lock.h>
14#include <linux/ctype.h>
15#include <linux/net.h>
16#include <linux/sched.h>
17
18#include <linux/smb_fs.h>
19#include <linux/smb_mount.h>
20#include <linux/smbno.h>
21
22#include "smb_debug.h"
23#include "proto.h"
24
25static int smb_readdir(struct file *, void *, filldir_t);
26static int smb_dir_open(struct inode *, struct file *);
27
28static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
29static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
30static int smb_mkdir(struct inode *, struct dentry *, int);
31static int smb_rmdir(struct inode *, struct dentry *);
32static int smb_unlink(struct inode *, struct dentry *);
33static int smb_rename(struct inode *, struct dentry *,
34 struct inode *, struct dentry *);
35static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
36static int smb_link(struct dentry *, struct inode *, struct dentry *);
37
38const struct file_operations smb_dir_operations =
39{
40 .llseek = generic_file_llseek,
41 .read = generic_read_dir,
42 .readdir = smb_readdir,
43 .unlocked_ioctl = smb_ioctl,
44 .open = smb_dir_open,
45};
46
47const struct inode_operations smb_dir_inode_operations =
48{
49 .create = smb_create,
50 .lookup = smb_lookup,
51 .unlink = smb_unlink,
52 .mkdir = smb_mkdir,
53 .rmdir = smb_rmdir,
54 .rename = smb_rename,
55 .getattr = smb_getattr,
56 .setattr = smb_notify_change,
57};
58
59const struct inode_operations smb_dir_inode_operations_unix =
60{
61 .create = smb_create,
62 .lookup = smb_lookup,
63 .unlink = smb_unlink,
64 .mkdir = smb_mkdir,
65 .rmdir = smb_rmdir,
66 .rename = smb_rename,
67 .getattr = smb_getattr,
68 .setattr = smb_notify_change,
69 .symlink = smb_symlink,
70 .mknod = smb_make_node,
71 .link = smb_link,
72};
73
74/*
75 * Read a directory, using filldir to fill the dirent memory.
76 * smb_proc_readdir does the actual reading from the smb server.
77 *
78 * The cache code is almost directly taken from ncpfs
79 */
80static int
81smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
82{
83 struct dentry *dentry = filp->f_path.dentry;
84 struct inode *dir = dentry->d_inode;
85 struct smb_sb_info *server = server_from_dentry(dentry);
86 union smb_dir_cache *cache = NULL;
87 struct smb_cache_control ctl;
88 struct page *page = NULL;
89 int result;
90
91 ctl.page = NULL;
92 ctl.cache = NULL;
93
94 VERBOSE("reading %s/%s, f_pos=%d\n",
95 DENTRY_PATH(dentry), (int) filp->f_pos);
96
97 result = 0;
98
99 lock_kernel();
100
101 switch ((unsigned int) filp->f_pos) {
102 case 0:
103 if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
104 goto out;
105 filp->f_pos = 1;
106 /* fallthrough */
107 case 1:
108 if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
109 goto out;
110 filp->f_pos = 2;
111 }
112
113 /*
114 * Make sure our inode is up-to-date.
115 */
116 result = smb_revalidate_inode(dentry);
117 if (result)
118 goto out;
119
120
121 page = grab_cache_page(&dir->i_data, 0);
122 if (!page)
123 goto read_really;
124
125 ctl.cache = cache = kmap(page);
126 ctl.head = cache->head;
127
128 if (!PageUptodate(page) || !ctl.head.eof) {
129 VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
130 DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
131 goto init_cache;
132 }
133
134 if (filp->f_pos == 2) {
135 if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
136 goto init_cache;
137
138 /*
139 * N.B. ncpfs checks mtime of dentry too here, we don't.
140 * 1. common smb servers do not update mtime on dir changes
141 * 2. it requires an extra smb request
142 * (revalidate has the same timeout as ctl.head.time)
143 *
144 * Instead smbfs invalidates its own cache on local changes
145 * and remote changes are not seen until timeout.
146 */
147 }
148
149 if (filp->f_pos > ctl.head.end)
150 goto finished;
151
152 ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
153 ctl.ofs = ctl.fpos / SMB_DIRCACHE_SIZE;
154 ctl.idx = ctl.fpos % SMB_DIRCACHE_SIZE;
155
156 for (;;) {
157 if (ctl.ofs != 0) {
158 ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
159 if (!ctl.page)
160 goto invalid_cache;
161 ctl.cache = kmap(ctl.page);
162 if (!PageUptodate(ctl.page))
163 goto invalid_cache;
164 }
165 while (ctl.idx < SMB_DIRCACHE_SIZE) {
166 struct dentry *dent;
167 int res;
168
169 dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
170 dentry, filp->f_pos);
171 if (!dent)
172 goto invalid_cache;
173
174 res = filldir(dirent, dent->d_name.name,
175 dent->d_name.len, filp->f_pos,
176 dent->d_inode->i_ino, DT_UNKNOWN);
177 dput(dent);
178 if (res)
179 goto finished;
180 filp->f_pos += 1;
181 ctl.idx += 1;
182 if (filp->f_pos > ctl.head.end)
183 goto finished;
184 }
185 if (ctl.page) {
186 kunmap(ctl.page);
187 SetPageUptodate(ctl.page);
188 unlock_page(ctl.page);
189 page_cache_release(ctl.page);
190 ctl.page = NULL;
191 }
192 ctl.idx = 0;
193 ctl.ofs += 1;
194 }
195invalid_cache:
196 if (ctl.page) {
197 kunmap(ctl.page);
198 unlock_page(ctl.page);
199 page_cache_release(ctl.page);
200 ctl.page = NULL;
201 }
202 ctl.cache = cache;
203init_cache:
204 smb_invalidate_dircache_entries(dentry);
205 ctl.head.time = jiffies;
206 ctl.head.eof = 0;
207 ctl.fpos = 2;
208 ctl.ofs = 0;
209 ctl.idx = SMB_DIRCACHE_START;
210 ctl.filled = 0;
211 ctl.valid = 1;
212read_really:
213 result = server->ops->readdir(filp, dirent, filldir, &ctl);
214 if (result == -ERESTARTSYS && page)
215 ClearPageUptodate(page);
216 if (ctl.idx == -1)
217 goto invalid_cache; /* retry */
218 ctl.head.end = ctl.fpos - 1;
219 ctl.head.eof = ctl.valid;
220finished:
221 if (page) {
222 cache->head = ctl.head;
223 kunmap(page);
224 if (result != -ERESTARTSYS)
225 SetPageUptodate(page);
226 unlock_page(page);
227 page_cache_release(page);
228 }
229 if (ctl.page) {
230 kunmap(ctl.page);
231 SetPageUptodate(ctl.page);
232 unlock_page(ctl.page);
233 page_cache_release(ctl.page);
234 }
235out:
236 unlock_kernel();
237 return result;
238}
239
240static int
241smb_dir_open(struct inode *dir, struct file *file)
242{
243 struct dentry *dentry = file->f_path.dentry;
244 struct smb_sb_info *server;
245 int error = 0;
246
247 VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
248 file->f_path.dentry->d_name.name);
249
250 /*
251 * Directory timestamps in the core protocol aren't updated
252 * when a file is added, so we give them a very short TTL.
253 */
254 lock_kernel();
255 server = server_from_dentry(dentry);
256 if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
257 unsigned long age = jiffies - SMB_I(dir)->oldmtime;
258 if (age > 2*HZ)
259 smb_invalid_dir_cache(dir);
260 }
261
262 /*
263 * Note: in order to allow the smbmount process to open the
264 * mount point, we only revalidate if the connection is valid or
265 * if the process is trying to access something other than the root.
266 */
267 if (server->state == CONN_VALID || !IS_ROOT(dentry))
268 error = smb_revalidate_inode(dentry);
269 unlock_kernel();
270 return error;
271}
272
273/*
274 * Dentry operations routines
275 */
276static int smb_lookup_validate(struct dentry *, struct nameidata *);
277static int smb_hash_dentry(struct dentry *, struct qstr *);
278static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
279static int smb_delete_dentry(struct dentry *);
280
281static const struct dentry_operations smbfs_dentry_operations =
282{
283 .d_revalidate = smb_lookup_validate,
284 .d_hash = smb_hash_dentry,
285 .d_compare = smb_compare_dentry,
286 .d_delete = smb_delete_dentry,
287};
288
289static const struct dentry_operations smbfs_dentry_operations_case =
290{
291 .d_revalidate = smb_lookup_validate,
292 .d_delete = smb_delete_dentry,
293};
294
295
296/*
297 * This is the callback when the dcache has a lookup hit.
298 */
299static int
300smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
301{
302 struct smb_sb_info *server = server_from_dentry(dentry);
303 struct inode * inode = dentry->d_inode;
304 unsigned long age = jiffies - dentry->d_time;
305 int valid;
306
307 /*
308 * The default validation is based on dentry age:
309 * we believe in dentries for a few seconds. (But each
310 * successful server lookup renews the timestamp.)
311 */
312 valid = (age <= SMB_MAX_AGE(server));
313#ifdef SMBFS_DEBUG_VERBOSE
314 if (!valid)
315 VERBOSE("%s/%s not valid, age=%lu\n",
316 DENTRY_PATH(dentry), age);
317#endif
318
319 if (inode) {
320 lock_kernel();
321 if (is_bad_inode(inode)) {
322 PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
323 valid = 0;
324 } else if (!valid)
325 valid = (smb_revalidate_inode(dentry) == 0);
326 unlock_kernel();
327 } else {
328 /*
329 * What should we do for negative dentries?
330 */
331 }
332 return valid;
333}
334
335static int
336smb_hash_dentry(struct dentry *dir, struct qstr *this)
337{
338 unsigned long hash;
339 int i;
340
341 hash = init_name_hash();
342 for (i=0; i < this->len ; i++)
343 hash = partial_name_hash(tolower(this->name[i]), hash);
344 this->hash = end_name_hash(hash);
345
346 return 0;
347}
348
349static int
350smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
351{
352 int i, result = 1;
353
354 if (a->len != b->len)
355 goto out;
356 for (i=0; i < a->len; i++) {
357 if (tolower(a->name[i]) != tolower(b->name[i]))
358 goto out;
359 }
360 result = 0;
361out:
362 return result;
363}
364
365/*
366 * This is the callback from dput() when d_count is going to 0.
367 * We use this to unhash dentries with bad inodes.
368 */
369static int
370smb_delete_dentry(struct dentry * dentry)
371{
372 if (dentry->d_inode) {
373 if (is_bad_inode(dentry->d_inode)) {
374 PARANOIA("bad inode, unhashing %s/%s\n",
375 DENTRY_PATH(dentry));
376 return 1;
377 }
378 } else {
379 /* N.B. Unhash negative dentries? */
380 }
381 return 0;
382}
383
384/*
385 * Initialize a new dentry
386 */
387void
388smb_new_dentry(struct dentry *dentry)
389{
390 struct smb_sb_info *server = server_from_dentry(dentry);
391
392 if (server->mnt->flags & SMB_MOUNT_CASE)
393 dentry->d_op = &smbfs_dentry_operations_case;
394 else
395 dentry->d_op = &smbfs_dentry_operations;
396 dentry->d_time = jiffies;
397}
398
399
400/*
401 * Whenever a lookup succeeds, we know the parent directories
402 * are all valid, so we want to update the dentry timestamps.
403 * N.B. Move this to dcache?
404 */
405void
406smb_renew_times(struct dentry * dentry)
407{
408 dget(dentry);
409 spin_lock(&dentry->d_lock);
410 for (;;) {
411 struct dentry *parent;
412
413 dentry->d_time = jiffies;
414 if (IS_ROOT(dentry))
415 break;
416 parent = dentry->d_parent;
417 dget(parent);
418 spin_unlock(&dentry->d_lock);
419 dput(dentry);
420 dentry = parent;
421 spin_lock(&dentry->d_lock);
422 }
423 spin_unlock(&dentry->d_lock);
424 dput(dentry);
425}
426
427static struct dentry *
428smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
429{
430 struct smb_fattr finfo;
431 struct inode *inode;
432 int error;
433 struct smb_sb_info *server;
434
435 error = -ENAMETOOLONG;
436 if (dentry->d_name.len > SMB_MAXNAMELEN)
437 goto out;
438
439 /* Do not allow lookup of names with backslashes in */
440 error = -EINVAL;
441 if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
442 goto out;
443
444 lock_kernel();
445 error = smb_proc_getattr(dentry, &finfo);
446#ifdef SMBFS_PARANOIA
447 if (error && error != -ENOENT)
448 PARANOIA("find %s/%s failed, error=%d\n",
449 DENTRY_PATH(dentry), error);
450#endif
451
452 inode = NULL;
453 if (error == -ENOENT)
454 goto add_entry;
455 if (!error) {
456 error = -EACCES;
457 finfo.f_ino = iunique(dentry->d_sb, 2);
458 inode = smb_iget(dir->i_sb, &finfo);
459 if (inode) {
460 add_entry:
461 server = server_from_dentry(dentry);
462 if (server->mnt->flags & SMB_MOUNT_CASE)
463 dentry->d_op = &smbfs_dentry_operations_case;
464 else
465 dentry->d_op = &smbfs_dentry_operations;
466
467 d_add(dentry, inode);
468 smb_renew_times(dentry);
469 error = 0;
470 }
471 }
472 unlock_kernel();
473out:
474 return ERR_PTR(error);
475}
476
477/*
478 * This code is common to all routines creating a new inode.
479 */
480static int
481smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
482{
483 struct smb_sb_info *server = server_from_dentry(dentry);
484 struct inode *inode;
485 int error;
486 struct smb_fattr fattr;
487
488 VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
489
490 error = smb_proc_getattr(dentry, &fattr);
491 if (error)
492 goto out_close;
493
494 smb_renew_times(dentry);
495 fattr.f_ino = iunique(dentry->d_sb, 2);
496 inode = smb_iget(dentry->d_sb, &fattr);
497 if (!inode)
498 goto out_no_inode;
499
500 if (have_id) {
501 struct smb_inode_info *ei = SMB_I(inode);
502 ei->fileid = fileid;
503 ei->access = SMB_O_RDWR;
504 ei->open = server->generation;
505 }
506 d_instantiate(dentry, inode);
507out:
508 return error;
509
510out_no_inode:
511 error = -EACCES;
512out_close:
513 if (have_id) {
514 PARANOIA("%s/%s failed, error=%d, closing %u\n",
515 DENTRY_PATH(dentry), error, fileid);
516 smb_close_fileid(dentry, fileid);
517 }
518 goto out;
519}
520
521/* N.B. How should the mode argument be used? */
522static int
523smb_create(struct inode *dir, struct dentry *dentry, int mode,
524 struct nameidata *nd)
525{
526 struct smb_sb_info *server = server_from_dentry(dentry);
527 __u16 fileid;
528 int error;
529 struct iattr attr;
530
531 VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
532
533 lock_kernel();
534 smb_invalid_dir_cache(dir);
535 error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
536 if (!error) {
537 if (server->opt.capabilities & SMB_CAP_UNIX) {
538 /* Set attributes for new file */
539 attr.ia_valid = ATTR_MODE;
540 attr.ia_mode = mode;
541 error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
542 }
543 error = smb_instantiate(dentry, fileid, 1);
544 } else {
545 PARANOIA("%s/%s failed, error=%d\n",
546 DENTRY_PATH(dentry), error);
547 }
548 unlock_kernel();
549 return error;
550}
551
552/* N.B. How should the mode argument be used? */
553static int
554smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
555{
556 struct smb_sb_info *server = server_from_dentry(dentry);
557 int error;
558 struct iattr attr;
559
560 lock_kernel();
561 smb_invalid_dir_cache(dir);
562 error = smb_proc_mkdir(dentry);
563 if (!error) {
564 if (server->opt.capabilities & SMB_CAP_UNIX) {
565 /* Set attributes for new directory */
566 attr.ia_valid = ATTR_MODE;
567 attr.ia_mode = mode;
568 error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
569 }
570 error = smb_instantiate(dentry, 0, 0);
571 }
572 unlock_kernel();
573 return error;
574}
575
576static int
577smb_rmdir(struct inode *dir, struct dentry *dentry)
578{
579 struct inode *inode = dentry->d_inode;
580 int error;
581
582 /*
583 * Close the directory if it's open.
584 */
585 lock_kernel();
586 smb_close(inode);
587
588 /*
589 * Check that nobody else is using the directory..
590 */
591 error = -EBUSY;
592 if (!d_unhashed(dentry))
593 goto out;
594
595 smb_invalid_dir_cache(dir);
596 error = smb_proc_rmdir(dentry);
597
598out:
599 unlock_kernel();
600 return error;
601}
602
603static int
604smb_unlink(struct inode *dir, struct dentry *dentry)
605{
606 int error;
607
608 /*
609 * Close the file if it's open.
610 */
611 lock_kernel();
612 smb_close(dentry->d_inode);
613
614 smb_invalid_dir_cache(dir);
615 error = smb_proc_unlink(dentry);
616 if (!error)
617 smb_renew_times(dentry);
618 unlock_kernel();
619 return error;
620}
621
622static int
623smb_rename(struct inode *old_dir, struct dentry *old_dentry,
624 struct inode *new_dir, struct dentry *new_dentry)
625{
626 int error;
627
628 /*
629 * Close any open files, and check whether to delete the
630 * target before attempting the rename.
631 */
632 lock_kernel();
633 if (old_dentry->d_inode)
634 smb_close(old_dentry->d_inode);
635 if (new_dentry->d_inode) {
636 smb_close(new_dentry->d_inode);
637 error = smb_proc_unlink(new_dentry);
638 if (error) {
639 VERBOSE("unlink %s/%s, error=%d\n",
640 DENTRY_PATH(new_dentry), error);
641 goto out;
642 }
643 /* FIXME */
644 d_delete(new_dentry);
645 }
646
647 smb_invalid_dir_cache(old_dir);
648 smb_invalid_dir_cache(new_dir);
649 error = smb_proc_mv(old_dentry, new_dentry);
650 if (!error) {
651 smb_renew_times(old_dentry);
652 smb_renew_times(new_dentry);
653 }
654out:
655 unlock_kernel();
656 return error;
657}
658
659/*
660 * FIXME: samba servers won't let you create device nodes unless uid/gid
661 * matches the connection credentials (and we don't know which those are ...)
662 */
663static int
664smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
665{
666 int error;
667 struct iattr attr;
668
669 attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
670 attr.ia_mode = mode;
671 current_euid_egid(&attr.ia_uid, &attr.ia_gid);
672
673 if (!new_valid_dev(dev))
674 return -EINVAL;
675
676 smb_invalid_dir_cache(dir);
677 error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
678 if (!error) {
679 error = smb_instantiate(dentry, 0, 0);
680 }
681 return error;
682}
683
684/*
685 * dentry = existing file
686 * new_dentry = new file
687 */
688static int
689smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
690{
691 int error;
692
693 DEBUG1("smb_link old=%s/%s new=%s/%s\n",
694 DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
695 smb_invalid_dir_cache(dir);
696 error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
697 if (!error) {
698 smb_renew_times(dentry);
699 error = smb_instantiate(new_dentry, 0, 0);
700 }
701 return error;
702}
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
deleted file mode 100644
index 8e187a0f94bb..000000000000
--- a/fs/smbfs/file.c
+++ /dev/null
@@ -1,454 +0,0 @@
1/*
2 * file.c
3 *
4 * Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/time.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/fcntl.h>
14#include <linux/stat.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/smp_lock.h>
18#include <linux/net.h>
19#include <linux/aio.h>
20
21#include <asm/uaccess.h>
22#include <asm/system.h>
23
24#include <linux/smbno.h>
25#include <linux/smb_fs.h>
26
27#include "smb_debug.h"
28#include "proto.h"
29
30static int
31smb_fsync(struct file *file, int datasync)
32{
33 struct dentry *dentry = file->f_path.dentry;
34 struct smb_sb_info *server = server_from_dentry(dentry);
35 int result;
36
37 VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
38
39 /*
40 * The VFS will writepage() all dirty pages for us, but we
41 * should send a SMBflush to the server, letting it know that
42 * we want things synchronized with actual storage.
43 *
44 * Note: this function requires all pages to have been written already
45 * (should be ok with writepage_sync)
46 */
47 result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
48 return result;
49}
50
51/*
52 * Read a page synchronously.
53 */
54static int
55smb_readpage_sync(struct dentry *dentry, struct page *page)
56{
57 char *buffer = kmap(page);
58 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
59 struct smb_sb_info *server = server_from_dentry(dentry);
60 unsigned int rsize = smb_get_rsize(server);
61 int count = PAGE_SIZE;
62 int result;
63
64 VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
65 DENTRY_PATH(dentry), count, offset, rsize);
66
67 result = smb_open(dentry, SMB_O_RDONLY);
68 if (result < 0)
69 goto io_error;
70
71 do {
72 if (count < rsize)
73 rsize = count;
74
75 result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
76 if (result < 0)
77 goto io_error;
78
79 count -= result;
80 offset += result;
81 buffer += result;
82 dentry->d_inode->i_atime =
83 current_fs_time(dentry->d_inode->i_sb);
84 if (result < rsize)
85 break;
86 } while (count);
87
88 memset(buffer, 0, count);
89 flush_dcache_page(page);
90 SetPageUptodate(page);
91 result = 0;
92
93io_error:
94 kunmap(page);
95 unlock_page(page);
96 return result;
97}
98
99/*
100 * We are called with the page locked and we unlock it when done.
101 */
102static int
103smb_readpage(struct file *file, struct page *page)
104{
105 int error;
106 struct dentry *dentry = file->f_path.dentry;
107
108 page_cache_get(page);
109 error = smb_readpage_sync(dentry, page);
110 page_cache_release(page);
111 return error;
112}
113
114/*
115 * Write a page synchronously.
116 * Offset is the data offset within the page.
117 */
118static int
119smb_writepage_sync(struct inode *inode, struct page *page,
120 unsigned long pageoffset, unsigned int count)
121{
122 loff_t offset;
123 char *buffer = kmap(page) + pageoffset;
124 struct smb_sb_info *server = server_from_inode(inode);
125 unsigned int wsize = smb_get_wsize(server);
126 int ret = 0;
127
128 offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
129 VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
130 inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
131
132 do {
133 int write_ret;
134
135 if (count < wsize)
136 wsize = count;
137
138 write_ret = server->ops->write(inode, offset, wsize, buffer);
139 if (write_ret < 0) {
140 PARANOIA("failed write, wsize=%d, write_ret=%d\n",
141 wsize, write_ret);
142 ret = write_ret;
143 break;
144 }
145 /* N.B. what if result < wsize?? */
146#ifdef SMBFS_PARANOIA
147 if (write_ret < wsize)
148 PARANOIA("short write, wsize=%d, write_ret=%d\n",
149 wsize, write_ret);
150#endif
151 buffer += wsize;
152 offset += wsize;
153 count -= wsize;
154 /*
155 * Update the inode now rather than waiting for a refresh.
156 */
157 inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
158 SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
159 if (offset > inode->i_size)
160 inode->i_size = offset;
161 } while (count);
162
163 kunmap(page);
164 return ret;
165}
166
167/*
168 * Write a page to the server. This will be used for NFS swapping only
169 * (for now), and we currently do this synchronously only.
170 *
171 * We are called with the page locked and we unlock it when done.
172 */
173static int
174smb_writepage(struct page *page, struct writeback_control *wbc)
175{
176 struct address_space *mapping = page->mapping;
177 struct inode *inode;
178 unsigned long end_index;
179 unsigned offset = PAGE_CACHE_SIZE;
180 int err;
181
182 BUG_ON(!mapping);
183 inode = mapping->host;
184 BUG_ON(!inode);
185
186 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
187
188 /* easy case */
189 if (page->index < end_index)
190 goto do_it;
191 /* things got complicated... */
192 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
193 /* OK, are we completely out? */
194 if (page->index >= end_index+1 || !offset)
195 return 0; /* truncated - don't care */
196do_it:
197 page_cache_get(page);
198 err = smb_writepage_sync(inode, page, 0, offset);
199 SetPageUptodate(page);
200 unlock_page(page);
201 page_cache_release(page);
202 return err;
203}
204
205static int
206smb_updatepage(struct file *file, struct page *page, unsigned long offset,
207 unsigned int count)
208{
209 struct dentry *dentry = file->f_path.dentry;
210
211 DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
212 ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
213
214 return smb_writepage_sync(dentry->d_inode, page, offset, count);
215}
216
217static ssize_t
218smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
219 unsigned long nr_segs, loff_t pos)
220{
221 struct file * file = iocb->ki_filp;
222 struct dentry * dentry = file->f_path.dentry;
223 ssize_t status;
224
225 VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
226 (unsigned long) iocb->ki_left, (unsigned long) pos);
227
228 status = smb_revalidate_inode(dentry);
229 if (status) {
230 PARANOIA("%s/%s validation failed, error=%Zd\n",
231 DENTRY_PATH(dentry), status);
232 goto out;
233 }
234
235 VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
236 (long)dentry->d_inode->i_size,
237 dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
238
239 status = generic_file_aio_read(iocb, iov, nr_segs, pos);
240out:
241 return status;
242}
243
244static int
245smb_file_mmap(struct file * file, struct vm_area_struct * vma)
246{
247 struct dentry * dentry = file->f_path.dentry;
248 int status;
249
250 VERBOSE("file %s/%s, address %lu - %lu\n",
251 DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
252
253 status = smb_revalidate_inode(dentry);
254 if (status) {
255 PARANOIA("%s/%s validation failed, error=%d\n",
256 DENTRY_PATH(dentry), status);
257 goto out;
258 }
259 status = generic_file_mmap(file, vma);
260out:
261 return status;
262}
263
264static ssize_t
265smb_file_splice_read(struct file *file, loff_t *ppos,
266 struct pipe_inode_info *pipe, size_t count,
267 unsigned int flags)
268{
269 struct dentry *dentry = file->f_path.dentry;
270 ssize_t status;
271
272 VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
273 DENTRY_PATH(dentry), *ppos, count);
274
275 status = smb_revalidate_inode(dentry);
276 if (status) {
277 PARANOIA("%s/%s validation failed, error=%Zd\n",
278 DENTRY_PATH(dentry), status);
279 goto out;
280 }
281 status = generic_file_splice_read(file, ppos, pipe, count, flags);
282out:
283 return status;
284}
285
286/*
287 * This does the "real" work of the write. The generic routine has
288 * allocated the page, locked it, done all the page alignment stuff
289 * calculations etc. Now we should just copy the data from user
290 * space and write it back to the real medium..
291 *
292 * If the writer ends up delaying the write, the writer needs to
293 * increment the page use counts until he is done with the page.
294 */
295static int smb_write_begin(struct file *file, struct address_space *mapping,
296 loff_t pos, unsigned len, unsigned flags,
297 struct page **pagep, void **fsdata)
298{
299 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
300 *pagep = grab_cache_page_write_begin(mapping, index, flags);
301 if (!*pagep)
302 return -ENOMEM;
303 return 0;
304}
305
306static int smb_write_end(struct file *file, struct address_space *mapping,
307 loff_t pos, unsigned len, unsigned copied,
308 struct page *page, void *fsdata)
309{
310 int status;
311 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
312
313 lock_kernel();
314 status = smb_updatepage(file, page, offset, copied);
315 unlock_kernel();
316
317 if (!status) {
318 if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
319 SetPageUptodate(page);
320 status = copied;
321 }
322
323 unlock_page(page);
324 page_cache_release(page);
325
326 return status;
327}
328
329const struct address_space_operations smb_file_aops = {
330 .readpage = smb_readpage,
331 .writepage = smb_writepage,
332 .write_begin = smb_write_begin,
333 .write_end = smb_write_end,
334};
335
336/*
337 * Write to a file (through the page cache).
338 */
339static ssize_t
340smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
341 unsigned long nr_segs, loff_t pos)
342{
343 struct file * file = iocb->ki_filp;
344 struct dentry * dentry = file->f_path.dentry;
345 ssize_t result;
346
347 VERBOSE("file %s/%s, count=%lu@%lu\n",
348 DENTRY_PATH(dentry),
349 (unsigned long) iocb->ki_left, (unsigned long) pos);
350
351 result = smb_revalidate_inode(dentry);
352 if (result) {
353 PARANOIA("%s/%s validation failed, error=%Zd\n",
354 DENTRY_PATH(dentry), result);
355 goto out;
356 }
357
358 result = smb_open(dentry, SMB_O_WRONLY);
359 if (result)
360 goto out;
361
362 if (iocb->ki_left > 0) {
363 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
364 VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
365 (long) file->f_pos, (long) dentry->d_inode->i_size,
366 dentry->d_inode->i_mtime.tv_sec,
367 dentry->d_inode->i_atime.tv_sec);
368 }
369out:
370 return result;
371}
372
373static int
374smb_file_open(struct inode *inode, struct file * file)
375{
376 int result;
377 struct dentry *dentry = file->f_path.dentry;
378 int smb_mode = (file->f_mode & O_ACCMODE) - 1;
379
380 lock_kernel();
381 result = smb_open(dentry, smb_mode);
382 if (result)
383 goto out;
384 SMB_I(inode)->openers++;
385out:
386 unlock_kernel();
387 return result;
388}
389
390static int
391smb_file_release(struct inode *inode, struct file * file)
392{
393 lock_kernel();
394 if (!--SMB_I(inode)->openers) {
395 /* We must flush any dirty pages now as we won't be able to
396 write anything after close. mmap can trigger this.
397 "openers" should perhaps include mmap'ers ... */
398 filemap_write_and_wait(inode->i_mapping);
399 smb_close(inode);
400 }
401 unlock_kernel();
402 return 0;
403}
404
405/*
406 * Check whether the required access is compatible with
407 * an inode's permission. SMB doesn't recognize superuser
408 * privileges, so we need our own check for this.
409 */
410static int
411smb_file_permission(struct inode *inode, int mask)
412{
413 int mode = inode->i_mode;
414 int error = 0;
415
416 VERBOSE("mode=%x, mask=%x\n", mode, mask);
417
418 /* Look at user permissions */
419 mode >>= 6;
420 if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
421 error = -EACCES;
422 return error;
423}
424
425static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
426{
427 loff_t ret;
428 lock_kernel();
429 ret = generic_file_llseek_unlocked(file, offset, origin);
430 unlock_kernel();
431 return ret;
432}
433
434const struct file_operations smb_file_operations =
435{
436 .llseek = smb_remote_llseek,
437 .read = do_sync_read,
438 .aio_read = smb_file_aio_read,
439 .write = do_sync_write,
440 .aio_write = smb_file_aio_write,
441 .unlocked_ioctl = smb_ioctl,
442 .mmap = smb_file_mmap,
443 .open = smb_file_open,
444 .release = smb_file_release,
445 .fsync = smb_fsync,
446 .splice_read = smb_file_splice_read,
447};
448
449const struct inode_operations smb_file_inode_operations =
450{
451 .permission = smb_file_permission,
452 .getattr = smb_getattr,
453 .setattr = smb_notify_change,
454};
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
deleted file mode 100644
index 7ae0f5273ab1..000000000000
--- a/fs/smbfs/getopt.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/*
2 * getopt.c
3 */
4
5#include <linux/kernel.h>
6#include <linux/string.h>
7#include <linux/net.h>
8
9#include "getopt.h"
10
11/**
12 * smb_getopt - option parser
13 * @caller: name of the caller, for error messages
14 * @options: the options string
15 * @opts: an array of &struct option entries controlling parser operations
16 * @optopt: output; will contain the current option
17 * @optarg: output; will contain the value (if one exists)
18 * @flag: output; may be NULL; should point to a long for or'ing flags
19 * @value: output; may be NULL; will be overwritten with the integer value
20 * of the current argument.
21 *
22 * Helper to parse options on the format used by mount ("a=b,c=d,e,f").
23 * Returns opts->val if a matching entry in the 'opts' array is found,
24 * 0 when no more tokens are found, -1 if an error is encountered.
25 */
26int smb_getopt(char *caller, char **options, struct option *opts,
27 char **optopt, char **optarg, unsigned long *flag,
28 unsigned long *value)
29{
30 char *token;
31 char *val;
32 int i;
33
34 do {
35 if ((token = strsep(options, ",")) == NULL)
36 return 0;
37 } while (*token == '\0');
38 *optopt = token;
39
40 *optarg = NULL;
41 if ((val = strchr (token, '=')) != NULL) {
42 *val++ = 0;
43 if (value)
44 *value = simple_strtoul(val, NULL, 0);
45 *optarg = val;
46 }
47
48 for (i = 0; opts[i].name != NULL; i++) {
49 if (!strcmp(opts[i].name, token)) {
50 if (!opts[i].flag && (!val || !*val)) {
51 printk("%s: the %s option requires an argument\n",
52 caller, token);
53 return -1;
54 }
55
56 if (flag && opts[i].flag)
57 *flag |= opts[i].flag;
58
59 return opts[i].val;
60 }
61 }
62 printk("%s: Unrecognized mount option %s\n", caller, token);
63 return -1;
64}
diff --git a/fs/smbfs/getopt.h b/fs/smbfs/getopt.h
deleted file mode 100644
index 146219ac7c46..000000000000
--- a/fs/smbfs/getopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
1#ifndef _LINUX_GETOPT_H
2#define _LINUX_GETOPT_H
3
4struct option {
5 const char *name;
6 unsigned long flag;
7 int val;
8};
9
10extern int smb_getopt(char *caller, char **options, struct option *opts,
11 char **optopt, char **optarg, unsigned long *flag,
12 unsigned long *value);
13
14#endif /* _LINUX_GETOPT_H */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
deleted file mode 100644
index 8fc5e50e142f..000000000000
--- a/fs/smbfs/inode.c
+++ /dev/null
@@ -1,844 +0,0 @@
1/*
2 * inode.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/module.h>
11#include <linux/time.h>
12#include <linux/kernel.h>
13#include <linux/mm.h>
14#include <linux/string.h>
15#include <linux/stat.h>
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/dcache.h>
21#include <linux/smp_lock.h>
22#include <linux/nls.h>
23#include <linux/seq_file.h>
24#include <linux/mount.h>
25#include <linux/net.h>
26#include <linux/vfs.h>
27#include <linux/highuid.h>
28#include <linux/sched.h>
29#include <linux/smb_fs.h>
30#include <linux/smbno.h>
31#include <linux/smb_mount.h>
32
33#include <asm/system.h>
34#include <asm/uaccess.h>
35
36#include "smb_debug.h"
37#include "getopt.h"
38#include "proto.h"
39
40/* Always pick a default string */
41#ifdef CONFIG_SMB_NLS_REMOTE
42#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
43#else
44#define SMB_NLS_REMOTE ""
45#endif
46
47#define SMB_TTL_DEFAULT 1000
48
49static void smb_evict_inode(struct inode *);
50static void smb_put_super(struct super_block *);
51static int smb_statfs(struct dentry *, struct kstatfs *);
52static int smb_show_options(struct seq_file *, struct vfsmount *);
53
54static struct kmem_cache *smb_inode_cachep;
55
56static struct inode *smb_alloc_inode(struct super_block *sb)
57{
58 struct smb_inode_info *ei;
59 ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
60 if (!ei)
61 return NULL;
62 return &ei->vfs_inode;
63}
64
65static void smb_destroy_inode(struct inode *inode)
66{
67 kmem_cache_free(smb_inode_cachep, SMB_I(inode));
68}
69
70static void init_once(void *foo)
71{
72 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
73
74 inode_init_once(&ei->vfs_inode);
75}
76
77static int init_inodecache(void)
78{
79 smb_inode_cachep = kmem_cache_create("smb_inode_cache",
80 sizeof(struct smb_inode_info),
81 0, (SLAB_RECLAIM_ACCOUNT|
82 SLAB_MEM_SPREAD),
83 init_once);
84 if (smb_inode_cachep == NULL)
85 return -ENOMEM;
86 return 0;
87}
88
89static void destroy_inodecache(void)
90{
91 kmem_cache_destroy(smb_inode_cachep);
92}
93
94static int smb_remount(struct super_block *sb, int *flags, char *data)
95{
96 *flags |= MS_NODIRATIME;
97 return 0;
98}
99
100static const struct super_operations smb_sops =
101{
102 .alloc_inode = smb_alloc_inode,
103 .destroy_inode = smb_destroy_inode,
104 .drop_inode = generic_delete_inode,
105 .evict_inode = smb_evict_inode,
106 .put_super = smb_put_super,
107 .statfs = smb_statfs,
108 .show_options = smb_show_options,
109 .remount_fs = smb_remount,
110};
111
112
113/* We are always generating a new inode here */
114struct inode *
115smb_iget(struct super_block *sb, struct smb_fattr *fattr)
116{
117 struct smb_sb_info *server = SMB_SB(sb);
118 struct inode *result;
119
120 DEBUG1("smb_iget: %p\n", fattr);
121
122 result = new_inode(sb);
123 if (!result)
124 return result;
125 result->i_ino = fattr->f_ino;
126 SMB_I(result)->open = 0;
127 SMB_I(result)->fileid = 0;
128 SMB_I(result)->access = 0;
129 SMB_I(result)->flags = 0;
130 SMB_I(result)->closed = 0;
131 SMB_I(result)->openers = 0;
132 smb_set_inode_attr(result, fattr);
133 if (S_ISREG(result->i_mode)) {
134 result->i_op = &smb_file_inode_operations;
135 result->i_fop = &smb_file_operations;
136 result->i_data.a_ops = &smb_file_aops;
137 } else if (S_ISDIR(result->i_mode)) {
138 if (server->opt.capabilities & SMB_CAP_UNIX)
139 result->i_op = &smb_dir_inode_operations_unix;
140 else
141 result->i_op = &smb_dir_inode_operations;
142 result->i_fop = &smb_dir_operations;
143 } else if (S_ISLNK(result->i_mode)) {
144 result->i_op = &smb_link_inode_operations;
145 } else {
146 init_special_inode(result, result->i_mode, fattr->f_rdev);
147 }
148 insert_inode_hash(result);
149 return result;
150}
151
152/*
153 * Copy the inode data to a smb_fattr structure.
154 */
155void
156smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
157{
158 memset(fattr, 0, sizeof(struct smb_fattr));
159 fattr->f_mode = inode->i_mode;
160 fattr->f_nlink = inode->i_nlink;
161 fattr->f_ino = inode->i_ino;
162 fattr->f_uid = inode->i_uid;
163 fattr->f_gid = inode->i_gid;
164 fattr->f_size = inode->i_size;
165 fattr->f_mtime = inode->i_mtime;
166 fattr->f_ctime = inode->i_ctime;
167 fattr->f_atime = inode->i_atime;
168 fattr->f_blocks = inode->i_blocks;
169
170 fattr->attr = SMB_I(inode)->attr;
171 /*
172 * Keep the attributes in sync with the inode permissions.
173 */
174 if (fattr->f_mode & S_IWUSR)
175 fattr->attr &= ~aRONLY;
176 else
177 fattr->attr |= aRONLY;
178}
179
180/*
181 * Update the inode, possibly causing it to invalidate its pages if mtime/size
182 * is different from last time.
183 */
184void
185smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
186{
187 struct smb_inode_info *ei = SMB_I(inode);
188
189 /*
190 * A size change should have a different mtime, or same mtime
191 * but different size.
192 */
193 time_t last_time = inode->i_mtime.tv_sec;
194 loff_t last_sz = inode->i_size;
195
196 inode->i_mode = fattr->f_mode;
197 inode->i_nlink = fattr->f_nlink;
198 inode->i_uid = fattr->f_uid;
199 inode->i_gid = fattr->f_gid;
200 inode->i_ctime = fattr->f_ctime;
201 inode->i_blocks = fattr->f_blocks;
202 inode->i_size = fattr->f_size;
203 inode->i_mtime = fattr->f_mtime;
204 inode->i_atime = fattr->f_atime;
205 ei->attr = fattr->attr;
206
207 /*
208 * Update the "last time refreshed" field for revalidation.
209 */
210 ei->oldmtime = jiffies;
211
212 if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
213 VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
214 inode->i_ino,
215 (long) last_time, (long) inode->i_mtime.tv_sec,
216 (long) last_sz, (long) inode->i_size);
217
218 if (!S_ISDIR(inode->i_mode))
219 invalidate_remote_inode(inode);
220 }
221}
222
223/*
224 * This is called if the connection has gone bad ...
225 * try to kill off all the current inodes.
226 */
227void
228smb_invalidate_inodes(struct smb_sb_info *server)
229{
230 VERBOSE("\n");
231 shrink_dcache_sb(SB_of(server));
232 invalidate_inodes(SB_of(server));
233}
234
235/*
236 * This is called to update the inode attributes after
237 * we've made changes to a file or directory.
238 */
239static int
240smb_refresh_inode(struct dentry *dentry)
241{
242 struct inode *inode = dentry->d_inode;
243 int error;
244 struct smb_fattr fattr;
245
246 error = smb_proc_getattr(dentry, &fattr);
247 if (!error) {
248 smb_renew_times(dentry);
249 /*
250 * Check whether the type part of the mode changed,
251 * and don't update the attributes if it did.
252 *
253 * And don't dick with the root inode
254 */
255 if (inode->i_ino == 2)
256 return error;
257 if (S_ISLNK(inode->i_mode))
258 return error; /* VFS will deal with it */
259
260 if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
261 smb_set_inode_attr(inode, &fattr);
262 } else {
263 /*
264 * Big trouble! The inode has become a new object,
265 * so any operations attempted on it are invalid.
266 *
267 * To limit damage, mark the inode as bad so that
268 * subsequent lookup validations will fail.
269 */
270 PARANOIA("%s/%s changed mode, %07o to %07o\n",
271 DENTRY_PATH(dentry),
272 inode->i_mode, fattr.f_mode);
273
274 fattr.f_mode = inode->i_mode; /* save mode */
275 make_bad_inode(inode);
276 inode->i_mode = fattr.f_mode; /* restore mode */
277 /*
278 * No need to worry about unhashing the dentry: the
279 * lookup validation will see that the inode is bad.
280 * But we do want to invalidate the caches ...
281 */
282 if (!S_ISDIR(inode->i_mode))
283 invalidate_remote_inode(inode);
284 else
285 smb_invalid_dir_cache(inode);
286 error = -EIO;
287 }
288 }
289 return error;
290}
291
292/*
293 * This is called when we want to check whether the inode
294 * has changed on the server. If it has changed, we must
295 * invalidate our local caches.
296 */
297int
298smb_revalidate_inode(struct dentry *dentry)
299{
300 struct smb_sb_info *s = server_from_dentry(dentry);
301 struct inode *inode = dentry->d_inode;
302 int error = 0;
303
304 DEBUG1("smb_revalidate_inode\n");
305 lock_kernel();
306
307 /*
308 * Check whether we've recently refreshed the inode.
309 */
310 if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
311 VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
312 inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
313 goto out;
314 }
315
316 error = smb_refresh_inode(dentry);
317out:
318 unlock_kernel();
319 return error;
320}
321
322/*
323 * This routine is called when i_nlink == 0 and i_count goes to 0.
324 * All blocking cleanup operations need to go here to avoid races.
325 */
326static void
327smb_evict_inode(struct inode *ino)
328{
329 DEBUG1("ino=%ld\n", ino->i_ino);
330 truncate_inode_pages(&ino->i_data, 0);
331 end_writeback(ino);
332 lock_kernel();
333 if (smb_close(ino))
334 PARANOIA("could not close inode %ld\n", ino->i_ino);
335 unlock_kernel();
336}
337
338static struct option opts[] = {
339 { "version", 0, 'v' },
340 { "win95", SMB_MOUNT_WIN95, 1 },
341 { "oldattr", SMB_MOUNT_OLDATTR, 1 },
342 { "dirattr", SMB_MOUNT_DIRATTR, 1 },
343 { "case", SMB_MOUNT_CASE, 1 },
344 { "uid", 0, 'u' },
345 { "gid", 0, 'g' },
346 { "file_mode", 0, 'f' },
347 { "dir_mode", 0, 'd' },
348 { "iocharset", 0, 'i' },
349 { "codepage", 0, 'c' },
350 { "ttl", 0, 't' },
351 { NULL, 0, 0}
352};
353
354static int
355parse_options(struct smb_mount_data_kernel *mnt, char *options)
356{
357 int c;
358 unsigned long flags;
359 unsigned long value;
360 char *optarg;
361 char *optopt;
362
363 flags = 0;
364 while ( (c = smb_getopt("smbfs", &options, opts,
365 &optopt, &optarg, &flags, &value)) > 0) {
366
367 VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
368 switch (c) {
369 case 1:
370 /* got a "flag" option */
371 break;
372 case 'v':
373 if (value != SMB_MOUNT_VERSION) {
374 printk ("smbfs: Bad mount version %ld, expected %d\n",
375 value, SMB_MOUNT_VERSION);
376 return 0;
377 }
378 mnt->version = value;
379 break;
380 case 'u':
381 mnt->uid = value;
382 flags |= SMB_MOUNT_UID;
383 break;
384 case 'g':
385 mnt->gid = value;
386 flags |= SMB_MOUNT_GID;
387 break;
388 case 'f':
389 mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
390 flags |= SMB_MOUNT_FMODE;
391 break;
392 case 'd':
393 mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
394 flags |= SMB_MOUNT_DMODE;
395 break;
396 case 'i':
397 strlcpy(mnt->codepage.local_name, optarg,
398 SMB_NLS_MAXNAMELEN);
399 break;
400 case 'c':
401 strlcpy(mnt->codepage.remote_name, optarg,
402 SMB_NLS_MAXNAMELEN);
403 break;
404 case 't':
405 mnt->ttl = value;
406 break;
407 default:
408 printk ("smbfs: Unrecognized mount option %s\n",
409 optopt);
410 return -1;
411 }
412 }
413 mnt->flags = flags;
414 return c;
415}
416
417/*
418 * smb_show_options() is for displaying mount options in /proc/mounts.
419 * It tries to avoid showing settings that were not changed from their
420 * defaults.
421 */
422static int
423smb_show_options(struct seq_file *s, struct vfsmount *m)
424{
425 struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
426 int i;
427
428 for (i = 0; opts[i].name != NULL; i++)
429 if (mnt->flags & opts[i].flag)
430 seq_printf(s, ",%s", opts[i].name);
431
432 if (mnt->flags & SMB_MOUNT_UID)
433 seq_printf(s, ",uid=%d", mnt->uid);
434 if (mnt->flags & SMB_MOUNT_GID)
435 seq_printf(s, ",gid=%d", mnt->gid);
436 if (mnt->mounted_uid != 0)
437 seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
438
439 /*
440 * Defaults for file_mode and dir_mode are unknown to us; they
441 * depend on the current umask of the user doing the mount.
442 */
443 if (mnt->flags & SMB_MOUNT_FMODE)
444 seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
445 if (mnt->flags & SMB_MOUNT_DMODE)
446 seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
447
448 if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
449 seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
450 if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
451 seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
452
453 if (mnt->ttl != SMB_TTL_DEFAULT)
454 seq_printf(s, ",ttl=%d", mnt->ttl);
455
456 return 0;
457}
458
459static void
460smb_unload_nls(struct smb_sb_info *server)
461{
462 unload_nls(server->remote_nls);
463 unload_nls(server->local_nls);
464}
465
466static void
467smb_put_super(struct super_block *sb)
468{
469 struct smb_sb_info *server = SMB_SB(sb);
470
471 lock_kernel();
472
473 smb_lock_server(server);
474 server->state = CONN_INVALID;
475 smbiod_unregister_server(server);
476
477 smb_close_socket(server);
478
479 if (server->conn_pid)
480 kill_pid(server->conn_pid, SIGTERM, 1);
481
482 bdi_destroy(&server->bdi);
483 kfree(server->ops);
484 smb_unload_nls(server);
485 sb->s_fs_info = NULL;
486 smb_unlock_server(server);
487 put_pid(server->conn_pid);
488 kfree(server);
489
490 unlock_kernel();
491}
492
493static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
494{
495 struct smb_sb_info *server;
496 struct smb_mount_data_kernel *mnt;
497 struct smb_mount_data *oldmnt;
498 struct inode *root_inode;
499 struct smb_fattr root;
500 int ver;
501 void *mem;
502 static int warn_count;
503
504 lock_kernel();
505
506 if (warn_count < 5) {
507 warn_count++;
508 printk(KERN_EMERG "smbfs is deprecated and will be removed"
509 " from the 2.6.27 kernel. Please migrate to cifs\n");
510 }
511
512 if (!raw_data)
513 goto out_no_data;
514
515 oldmnt = (struct smb_mount_data *) raw_data;
516 ver = oldmnt->version;
517 if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
518 goto out_wrong_data;
519
520 sb->s_flags |= MS_NODIRATIME;
521 sb->s_blocksize = 1024; /* Eh... Is this correct? */
522 sb->s_blocksize_bits = 10;
523 sb->s_magic = SMB_SUPER_MAGIC;
524 sb->s_op = &smb_sops;
525 sb->s_time_gran = 100;
526
527 server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
528 if (!server)
529 goto out_no_server;
530 sb->s_fs_info = server;
531
532 if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
533 goto out_bdi;
534
535 sb->s_bdi = &server->bdi;
536
537 server->super_block = sb;
538 server->mnt = NULL;
539 server->sock_file = NULL;
540 init_waitqueue_head(&server->conn_wq);
541 init_MUTEX(&server->sem);
542 INIT_LIST_HEAD(&server->entry);
543 INIT_LIST_HEAD(&server->xmitq);
544 INIT_LIST_HEAD(&server->recvq);
545 server->conn_error = 0;
546 server->conn_pid = NULL;
547 server->state = CONN_INVALID; /* no connection yet */
548 server->generation = 0;
549
550 /* Allocate the global temp buffer and some superblock helper structs */
551 /* FIXME: move these to the smb_sb_info struct */
552 VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
553 sizeof(struct smb_mount_data_kernel));
554 mem = kmalloc(sizeof(struct smb_ops) +
555 sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
556 if (!mem)
557 goto out_no_mem;
558
559 server->ops = mem;
560 smb_install_null_ops(server->ops);
561 server->mnt = mem + sizeof(struct smb_ops);
562
563 /* Setup NLS stuff */
564 server->remote_nls = NULL;
565 server->local_nls = NULL;
566
567 mnt = server->mnt;
568
569 memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
570 strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
571 SMB_NLS_MAXNAMELEN);
572 strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
573 SMB_NLS_MAXNAMELEN);
574
575 mnt->ttl = SMB_TTL_DEFAULT;
576 if (ver == SMB_MOUNT_OLDVERSION) {
577 mnt->version = oldmnt->version;
578
579 SET_UID(mnt->uid, oldmnt->uid);
580 SET_GID(mnt->gid, oldmnt->gid);
581
582 mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
583 mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
584
585 mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
586 SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
587 } else {
588 mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
589 S_IROTH | S_IXOTH | S_IFREG;
590 mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
591 S_IROTH | S_IXOTH | S_IFDIR;
592 if (parse_options(mnt, raw_data))
593 goto out_bad_option;
594 }
595 mnt->mounted_uid = current_uid();
596 smb_setcodepage(server, &mnt->codepage);
597
598 /*
599 * Display the enabled options
600 * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
601 */
602 if (mnt->flags & SMB_MOUNT_OLDATTR)
603 printk("SMBFS: Using core getattr (Win 95 speedup)\n");
604 else if (mnt->flags & SMB_MOUNT_DIRATTR)
605 printk("SMBFS: Using dir ff getattr\n");
606
607 if (smbiod_register_server(server) < 0) {
608 printk(KERN_ERR "smbfs: failed to start smbiod\n");
609 goto out_no_smbiod;
610 }
611
612 /*
613 * Keep the super block locked while we get the root inode.
614 */
615 smb_init_root_dirent(server, &root, sb);
616 root_inode = smb_iget(sb, &root);
617 if (!root_inode)
618 goto out_no_root;
619
620 sb->s_root = d_alloc_root(root_inode);
621 if (!sb->s_root)
622 goto out_no_root;
623
624 smb_new_dentry(sb->s_root);
625
626 unlock_kernel();
627 return 0;
628
629out_no_root:
630 iput(root_inode);
631out_no_smbiod:
632 smb_unload_nls(server);
633out_bad_option:
634 kfree(mem);
635out_no_mem:
636 bdi_destroy(&server->bdi);
637out_bdi:
638 if (!server->mnt)
639 printk(KERN_ERR "smb_fill_super: allocation failure\n");
640 sb->s_fs_info = NULL;
641 kfree(server);
642 goto out_fail;
643out_wrong_data:
644 printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
645 goto out_fail;
646out_no_data:
647 printk(KERN_ERR "smb_fill_super: missing data argument\n");
648out_fail:
649 unlock_kernel();
650 return -EINVAL;
651out_no_server:
652 printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
653 unlock_kernel();
654 return -ENOMEM;
655}
656
657static int
658smb_statfs(struct dentry *dentry, struct kstatfs *buf)
659{
660 int result;
661
662 lock_kernel();
663
664 result = smb_proc_dskattr(dentry, buf);
665
666 unlock_kernel();
667
668 buf->f_type = SMB_SUPER_MAGIC;
669 buf->f_namelen = SMB_MAXPATHLEN;
670 return result;
671}
672
673int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
674{
675 int err = smb_revalidate_inode(dentry);
676 if (!err)
677 generic_fillattr(dentry->d_inode, stat);
678 return err;
679}
680
681int
682smb_notify_change(struct dentry *dentry, struct iattr *attr)
683{
684 struct inode *inode = dentry->d_inode;
685 struct smb_sb_info *server = server_from_dentry(dentry);
686 unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
687 int error, changed, refresh = 0;
688 struct smb_fattr fattr;
689
690 lock_kernel();
691
692 error = smb_revalidate_inode(dentry);
693 if (error)
694 goto out;
695
696 if ((error = inode_change_ok(inode, attr)) < 0)
697 goto out;
698
699 error = -EPERM;
700 if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
701 goto out;
702
703 if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
704 goto out;
705
706 if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
707 goto out;
708
709 if ((attr->ia_valid & ATTR_SIZE) != 0) {
710 VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
711 DENTRY_PATH(dentry),
712 (long) inode->i_size, (long) attr->ia_size);
713
714 filemap_write_and_wait(inode->i_mapping);
715
716 error = smb_open(dentry, O_WRONLY);
717 if (error)
718 goto out;
719 error = server->ops->truncate(inode, attr->ia_size);
720 if (error)
721 goto out;
722 truncate_setsize(inode, attr->ia_size);
723 refresh = 1;
724 }
725
726 if (server->opt.capabilities & SMB_CAP_UNIX) {
727 /* For now we don't want to set the size with setattr_unix */
728 attr->ia_valid &= ~ATTR_SIZE;
729 /* FIXME: only call if we actually want to set something? */
730 error = smb_proc_setattr_unix(dentry, attr, 0, 0);
731 if (!error)
732 refresh = 1;
733
734 goto out;
735 }
736
737 /*
738 * Initialize the fattr and check for changed fields.
739 * Note: CTIME under SMB is creation time rather than
740 * change time, so we don't attempt to change it.
741 */
742 smb_get_inode_attr(inode, &fattr);
743
744 changed = 0;
745 if ((attr->ia_valid & ATTR_MTIME) != 0) {
746 fattr.f_mtime = attr->ia_mtime;
747 changed = 1;
748 }
749 if ((attr->ia_valid & ATTR_ATIME) != 0) {
750 fattr.f_atime = attr->ia_atime;
751 /* Earlier protocols don't have an access time */
752 if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
753 changed = 1;
754 }
755 if (changed) {
756 error = smb_proc_settime(dentry, &fattr);
757 if (error)
758 goto out;
759 refresh = 1;
760 }
761
762 /*
763 * Check for mode changes ... we're extremely limited in
764 * what can be set for SMB servers: just the read-only bit.
765 */
766 if ((attr->ia_valid & ATTR_MODE) != 0) {
767 VERBOSE("%s/%s mode change, old=%x, new=%x\n",
768 DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
769 changed = 0;
770 if (attr->ia_mode & S_IWUSR) {
771 if (fattr.attr & aRONLY) {
772 fattr.attr &= ~aRONLY;
773 changed = 1;
774 }
775 } else {
776 if (!(fattr.attr & aRONLY)) {
777 fattr.attr |= aRONLY;
778 changed = 1;
779 }
780 }
781 if (changed) {
782 error = smb_proc_setattr(dentry, &fattr);
783 if (error)
784 goto out;
785 refresh = 1;
786 }
787 }
788 error = 0;
789
790out:
791 if (refresh)
792 smb_refresh_inode(dentry);
793 unlock_kernel();
794 return error;
795}
796
797static int smb_get_sb(struct file_system_type *fs_type,
798 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
799{
800 return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
801}
802
803static struct file_system_type smb_fs_type = {
804 .owner = THIS_MODULE,
805 .name = "smbfs",
806 .get_sb = smb_get_sb,
807 .kill_sb = kill_anon_super,
808 .fs_flags = FS_BINARY_MOUNTDATA,
809};
810
811static int __init init_smb_fs(void)
812{
813 int err;
814 DEBUG1("registering ...\n");
815
816 err = init_inodecache();
817 if (err)
818 goto out_inode;
819 err = smb_init_request_cache();
820 if (err)
821 goto out_request;
822 err = register_filesystem(&smb_fs_type);
823 if (err)
824 goto out;
825 return 0;
826out:
827 smb_destroy_request_cache();
828out_request:
829 destroy_inodecache();
830out_inode:
831 return err;
832}
833
834static void __exit exit_smb_fs(void)
835{
836 DEBUG1("unregistering ...\n");
837 unregister_filesystem(&smb_fs_type);
838 smb_destroy_request_cache();
839 destroy_inodecache();
840}
841
842module_init(init_smb_fs)
843module_exit(exit_smb_fs)
844MODULE_LICENSE("GPL");
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
deleted file mode 100644
index 07215312ad39..000000000000
--- a/fs/smbfs/ioctl.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * ioctl.c
3 *
4 * Copyright (C) 1995, 1996 by Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/errno.h>
11#include <linux/fs.h>
12#include <linux/ioctl.h>
13#include <linux/time.h>
14#include <linux/mm.h>
15#include <linux/highuid.h>
16#include <linux/smp_lock.h>
17#include <linux/net.h>
18
19#include <linux/smb_fs.h>
20#include <linux/smb_mount.h>
21
22#include <asm/uaccess.h>
23
24#include "proto.h"
25
26long
27smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
28{
29 struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
30 struct smb_conn_opt opt;
31 int result = -EINVAL;
32
33 lock_kernel();
34 switch (cmd) {
35 uid16_t uid16;
36 uid_t uid32;
37 case SMB_IOC_GETMOUNTUID:
38 SET_UID(uid16, server->mnt->mounted_uid);
39 result = put_user(uid16, (uid16_t __user *) arg);
40 break;
41 case SMB_IOC_GETMOUNTUID32:
42 SET_UID(uid32, server->mnt->mounted_uid);
43 result = put_user(uid32, (uid_t __user *) arg);
44 break;
45
46 case SMB_IOC_NEWCONN:
47 /* arg is smb_conn_opt, or NULL if no connection was made */
48 if (!arg) {
49 result = 0;
50 smb_lock_server(server);
51 server->state = CONN_RETRIED;
52 printk(KERN_ERR "Connection attempt failed! [%d]\n",
53 server->conn_error);
54 smbiod_flush(server);
55 smb_unlock_server(server);
56 break;
57 }
58
59 result = -EFAULT;
60 if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
61 result = smb_newconn(server, &opt);
62 break;
63 default:
64 break;
65 }
66 unlock_kernel();
67
68 return result;
69}
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
deleted file mode 100644
index 71c29b6670b4..000000000000
--- a/fs/smbfs/proc.c
+++ /dev/null
@@ -1,3507 +0,0 @@
1/*
2 * proc.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/types.h>
11#include <linux/capability.h>
12#include <linux/errno.h>
13#include <linux/slab.h>
14#include <linux/fs.h>
15#include <linux/file.h>
16#include <linux/stat.h>
17#include <linux/fcntl.h>
18#include <linux/dcache.h>
19#include <linux/nls.h>
20#include <linux/smp_lock.h>
21#include <linux/net.h>
22#include <linux/vfs.h>
23#include <linux/smb_fs.h>
24#include <linux/smbno.h>
25#include <linux/smb_mount.h>
26
27#include <net/sock.h>
28
29#include <asm/string.h>
30#include <asm/div64.h>
31
32#include "smb_debug.h"
33#include "proto.h"
34#include "request.h"
35
36
37/* Features. Undefine if they cause problems, this should perhaps be a
38 config option. */
39#define SMBFS_POSIX_UNLINK 1
40
41/* Allow smb_retry to be interrupted. */
42#define SMB_RETRY_INTR
43
44#define SMB_VWV(packet) ((packet) + SMB_HEADER_LEN)
45#define SMB_CMD(packet) (*(packet+8))
46#define SMB_WCT(packet) (*(packet+SMB_HEADER_LEN - 1))
47
48#define SMB_DIRINFO_SIZE 43
49#define SMB_STATUS_SIZE 21
50
51#define SMB_ST_BLKSIZE (PAGE_SIZE)
52#define SMB_ST_BLKSHIFT (PAGE_SHIFT)
53
54static struct smb_ops smb_ops_core;
55static struct smb_ops smb_ops_os2;
56static struct smb_ops smb_ops_win95;
57static struct smb_ops smb_ops_winNT;
58static struct smb_ops smb_ops_unix;
59static struct smb_ops smb_ops_null;
60
61static void
62smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
63static void
64smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
65static int
66smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
67 struct smb_fattr *fattr);
68static int
69smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
70 struct smb_fattr *fattr);
71static int
72smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
73 u16 attr);
74static int
75smb_proc_setattr_ext(struct smb_sb_info *server,
76 struct inode *inode, struct smb_fattr *fattr);
77static int
78smb_proc_query_cifsunix(struct smb_sb_info *server);
79static void
80install_ops(struct smb_ops *dst, struct smb_ops *src);
81
82
83static void
84str_upper(char *name, int len)
85{
86 while (len--)
87 {
88 if (*name >= 'a' && *name <= 'z')
89 *name -= ('a' - 'A');
90 name++;
91 }
92}
93
94#if 0
95static void
96str_lower(char *name, int len)
97{
98 while (len--)
99 {
100 if (*name >= 'A' && *name <= 'Z')
101 *name += ('a' - 'A');
102 name++;
103 }
104}
105#endif
106
107/* reverse a string inline. This is used by the dircache walking routines */
108static void reverse_string(char *buf, int len)
109{
110 char c;
111 char *end = buf+len-1;
112
113 while(buf < end) {
114 c = *buf;
115 *(buf++) = *end;
116 *(end--) = c;
117 }
118}
119
120/* no conversion, just a wrapper for memcpy. */
121static int convert_memcpy(unsigned char *output, int olen,
122 const unsigned char *input, int ilen,
123 struct nls_table *nls_from,
124 struct nls_table *nls_to)
125{
126 if (olen < ilen)
127 return -ENAMETOOLONG;
128 memcpy(output, input, ilen);
129 return ilen;
130}
131
132static inline int write_char(unsigned char ch, char *output, int olen)
133{
134 if (olen < 4)
135 return -ENAMETOOLONG;
136 sprintf(output, ":x%02x", ch);
137 return 4;
138}
139
140static inline int write_unichar(wchar_t ch, char *output, int olen)
141{
142 if (olen < 5)
143 return -ENAMETOOLONG;
144 sprintf(output, ":%04x", ch);
145 return 5;
146}
147
148/* convert from one "codepage" to another (possibly being utf8). */
149static int convert_cp(unsigned char *output, int olen,
150 const unsigned char *input, int ilen,
151 struct nls_table *nls_from,
152 struct nls_table *nls_to)
153{
154 int len = 0;
155 int n;
156 wchar_t ch;
157
158 while (ilen > 0) {
159 /* convert by changing to unicode and back to the new cp */
160 n = nls_from->char2uni(input, ilen, &ch);
161 if (n == -EINVAL) {
162 ilen--;
163 n = write_char(*input++, output, olen);
164 if (n < 0)
165 goto fail;
166 output += n;
167 olen -= n;
168 len += n;
169 continue;
170 } else if (n < 0)
171 goto fail;
172 input += n;
173 ilen -= n;
174
175 n = nls_to->uni2char(ch, output, olen);
176 if (n == -EINVAL)
177 n = write_unichar(ch, output, olen);
178 if (n < 0)
179 goto fail;
180 output += n;
181 olen -= n;
182
183 len += n;
184 }
185 return len;
186fail:
187 return n;
188}
189
190/* ----------------------------------------------------------- */
191
192/*
193 * nls_unicode
194 *
195 * This encodes/decodes little endian unicode format
196 */
197
198static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
199{
200 if (boundlen < 2)
201 return -EINVAL;
202 *out++ = uni & 0xff;
203 *out++ = uni >> 8;
204 return 2;
205}
206
207static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
208{
209 if (boundlen < 2)
210 return -EINVAL;
211 *uni = (rawstring[1] << 8) | rawstring[0];
212 return 2;
213}
214
215static struct nls_table unicode_table = {
216 .charset = "unicode",
217 .uni2char = uni2char,
218 .char2uni = char2uni,
219};
220
221/* ----------------------------------------------------------- */
222
223static int setcodepage(struct nls_table **p, char *name)
224{
225 struct nls_table *nls;
226
227 if (!name || !*name) {
228 nls = NULL;
229 } else if ( (nls = load_nls(name)) == NULL) {
230 printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
231 return -EINVAL;
232 }
233
234 /* if already set, unload the previous one. */
235 if (*p && *p != &unicode_table)
236 unload_nls(*p);
237 *p = nls;
238
239 return 0;
240}
241
242/* Handles all changes to codepage settings. */
243int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
244{
245 int n = 0;
246
247 smb_lock_server(server);
248
249 /* Don't load any nls_* at all, if no remote is requested */
250 if (!*cp->remote_name)
251 goto out;
252
253 /* local */
254 n = setcodepage(&server->local_nls, cp->local_name);
255 if (n != 0)
256 goto out;
257
258 /* remote */
259 if (!strcmp(cp->remote_name, "unicode")) {
260 server->remote_nls = &unicode_table;
261 } else {
262 n = setcodepage(&server->remote_nls, cp->remote_name);
263 if (n != 0)
264 setcodepage(&server->local_nls, NULL);
265 }
266
267out:
268 if (server->local_nls != NULL && server->remote_nls != NULL)
269 server->ops->convert = convert_cp;
270 else
271 server->ops->convert = convert_memcpy;
272
273 smb_unlock_server(server);
274 return n;
275}
276
277
278/*****************************************************************************/
279/* */
280/* Encoding/Decoding section */
281/* */
282/*****************************************************************************/
283
284static __u8 *
285smb_encode_smb_length(__u8 * p, __u32 len)
286{
287 *p = 0;
288 *(p+1) = 0;
289 *(p+2) = (len & 0xFF00) >> 8;
290 *(p+3) = (len & 0xFF);
291 if (len > 0xFFFF)
292 {
293 *(p+1) = 1;
294 }
295 return p + 4;
296}
297
298/*
299 * smb_build_path: build the path to entry and name storing it in buf.
300 * The path returned will have the trailing '\0'.
301 */
302static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
303 int maxlen,
304 struct dentry *entry, struct qstr *name)
305{
306 unsigned char *path = buf;
307 int len;
308 int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
309
310 if (maxlen < (2<<unicode))
311 return -ENAMETOOLONG;
312
313 if (maxlen > SMB_MAXPATHLEN + 1)
314 maxlen = SMB_MAXPATHLEN + 1;
315
316 if (entry == NULL)
317 goto test_name_and_out;
318
319 /*
320 * If IS_ROOT, we have to do no walking at all.
321 */
322 if (IS_ROOT(entry) && !name) {
323 *path++ = '\\';
324 if (unicode) *path++ = '\0';
325 *path++ = '\0';
326 if (unicode) *path++ = '\0';
327 return path-buf;
328 }
329
330 /*
331 * Build the path string walking the tree backward from end to ROOT
332 * and store it in reversed order [see reverse_string()]
333 */
334 dget(entry);
335 spin_lock(&entry->d_lock);
336 while (!IS_ROOT(entry)) {
337 struct dentry *parent;
338
339 if (maxlen < (3<<unicode)) {
340 spin_unlock(&entry->d_lock);
341 dput(entry);
342 return -ENAMETOOLONG;
343 }
344
345 len = server->ops->convert(path, maxlen-2,
346 entry->d_name.name, entry->d_name.len,
347 server->local_nls, server->remote_nls);
348 if (len < 0) {
349 spin_unlock(&entry->d_lock);
350 dput(entry);
351 return len;
352 }
353 reverse_string(path, len);
354 path += len;
355 if (unicode) {
356 /* Note: reverse order */
357 *path++ = '\0';
358 maxlen--;
359 }
360 *path++ = '\\';
361 maxlen -= len+1;
362
363 parent = entry->d_parent;
364 dget(parent);
365 spin_unlock(&entry->d_lock);
366 dput(entry);
367 entry = parent;
368 spin_lock(&entry->d_lock);
369 }
370 spin_unlock(&entry->d_lock);
371 dput(entry);
372 reverse_string(buf, path-buf);
373
374 /* maxlen has space for at least one char */
375test_name_and_out:
376 if (name) {
377 if (maxlen < (3<<unicode))
378 return -ENAMETOOLONG;
379 *path++ = '\\';
380 if (unicode) {
381 *path++ = '\0';
382 maxlen--;
383 }
384 len = server->ops->convert(path, maxlen-2,
385 name->name, name->len,
386 server->local_nls, server->remote_nls);
387 if (len < 0)
388 return len;
389 path += len;
390 maxlen -= len+1;
391 }
392 /* maxlen has space for at least one char */
393 *path++ = '\0';
394 if (unicode) *path++ = '\0';
395 return path-buf;
396}
397
398static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
399 struct dentry *dir, struct qstr *name)
400{
401 int result;
402
403 result = smb_build_path(server, buf, maxlen, dir, name);
404 if (result < 0)
405 goto out;
406 if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
407 str_upper(buf, result);
408out:
409 return result;
410}
411
412/* encode_path for non-trans2 request SMBs */
413static int smb_simple_encode_path(struct smb_request *req, char **p,
414 struct dentry * entry, struct qstr * name)
415{
416 struct smb_sb_info *server = req->rq_server;
417 char *s = *p;
418 int res;
419 int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
420 int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
421
422 if (!maxlen)
423 return -ENAMETOOLONG;
424 *s++ = 4; /* ASCII data format */
425
426 /*
427 * SMB Unicode strings must be 16bit aligned relative the start of the
428 * packet. If they are not they must be padded with 0.
429 */
430 if (unicode) {
431 int align = s - (char *)req->rq_buffer;
432 if (!(align & 1)) {
433 *s++ = '\0';
434 maxlen--;
435 }
436 }
437
438 res = smb_encode_path(server, s, maxlen-1, entry, name);
439 if (res < 0)
440 return res;
441 *p = s + res;
442 return 0;
443}
444
445/* The following are taken directly from msdos-fs */
446
447/* Linear day numbers of the respective 1sts in non-leap years. */
448
449static int day_n[] =
450{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
451 /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
452
453
454static time_t
455utc2local(struct smb_sb_info *server, time_t time)
456{
457 return time - server->opt.serverzone*60;
458}
459
460static time_t
461local2utc(struct smb_sb_info *server, time_t time)
462{
463 return time + server->opt.serverzone*60;
464}
465
466/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
467
468static time_t
469date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
470{
471 int month, year;
472 time_t secs;
473
474 /* first subtract and mask after that... Otherwise, if
475 date == 0, bad things happen */
476 month = ((date >> 5) - 1) & 15;
477 year = date >> 9;
478 secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
479 ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
480 month < 2 ? 1 : 0) + 3653);
481 /* days since 1.1.70 plus 80's leap day */
482 return local2utc(server, secs);
483}
484
485
486/* Convert linear UNIX date to a MS-DOS time/date pair. */
487
488static void
489date_unix2dos(struct smb_sb_info *server,
490 int unix_date, __u16 *date, __u16 *time)
491{
492 int day, year, nl_day, month;
493
494 unix_date = utc2local(server, unix_date);
495 if (unix_date < 315532800)
496 unix_date = 315532800;
497
498 *time = (unix_date % 60) / 2 +
499 (((unix_date / 60) % 60) << 5) +
500 (((unix_date / 3600) % 24) << 11);
501
502 day = unix_date / 86400 - 3652;
503 year = day / 365;
504 if ((year + 3) / 4 + 365 * year > day)
505 year--;
506 day -= (year + 3) / 4 + 365 * year;
507 if (day == 59 && !(year & 3)) {
508 nl_day = day;
509 month = 2;
510 } else {
511 nl_day = (year & 3) || day <= 59 ? day : day - 1;
512 for (month = 1; month < 12; month++)
513 if (day_n[month] > nl_day)
514 break;
515 }
516 *date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
517}
518
519/* The following are taken from fs/ntfs/util.c */
520
521#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
522
523/*
524 * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
525 * into Unix UTC (based 1970-01-01, in seconds).
526 */
527static struct timespec
528smb_ntutc2unixutc(u64 ntutc)
529{
530 struct timespec ts;
531 /* FIXME: what about the timezone difference? */
532 /* Subtract the NTFS time offset, then convert to 1s intervals. */
533 u64 t = ntutc - NTFS_TIME_OFFSET;
534 ts.tv_nsec = do_div(t, 10000000) * 100;
535 ts.tv_sec = t;
536 return ts;
537}
538
539/* Convert the Unix UTC into NT time */
540static u64
541smb_unixutc2ntutc(struct timespec ts)
542{
543 /* Note: timezone conversion is probably wrong. */
544 /* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
545 return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
546}
547
548#define MAX_FILE_MODE 6
549static mode_t file_mode[] = {
550 S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
551};
552
553static int smb_filetype_to_mode(u32 filetype)
554{
555 if (filetype > MAX_FILE_MODE) {
556 PARANOIA("Filetype out of range: %d\n", filetype);
557 return S_IFREG;
558 }
559 return file_mode[filetype];
560}
561
562static u32 smb_filetype_from_mode(int mode)
563{
564 if (S_ISREG(mode))
565 return UNIX_TYPE_FILE;
566 if (S_ISDIR(mode))
567 return UNIX_TYPE_DIR;
568 if (S_ISLNK(mode))
569 return UNIX_TYPE_SYMLINK;
570 if (S_ISCHR(mode))
571 return UNIX_TYPE_CHARDEV;
572 if (S_ISBLK(mode))
573 return UNIX_TYPE_BLKDEV;
574 if (S_ISFIFO(mode))
575 return UNIX_TYPE_FIFO;
576 if (S_ISSOCK(mode))
577 return UNIX_TYPE_SOCKET;
578 return UNIX_TYPE_UNKNOWN;
579}
580
581
582/*****************************************************************************/
583/* */
584/* Support section. */
585/* */
586/*****************************************************************************/
587
588__u32
589smb_len(__u8 * p)
590{
591 return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
592}
593
594static __u16
595smb_bcc(__u8 * packet)
596{
597 int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
598 return WVAL(packet, pos);
599}
600
601/* smb_valid_packet: We check if packet fulfills the basic
602 requirements of a smb packet */
603
604static int
605smb_valid_packet(__u8 * packet)
606{
607 return (packet[4] == 0xff
608 && packet[5] == 'S'
609 && packet[6] == 'M'
610 && packet[7] == 'B'
611 && (smb_len(packet) + 4 == SMB_HEADER_LEN
612 + SMB_WCT(packet) * 2 + smb_bcc(packet)));
613}
614
615/* smb_verify: We check if we got the answer we expected, and if we
616 got enough data. If bcc == -1, we don't care. */
617
618static int
619smb_verify(__u8 * packet, int command, int wct, int bcc)
620{
621 if (SMB_CMD(packet) != command)
622 goto bad_command;
623 if (SMB_WCT(packet) < wct)
624 goto bad_wct;
625 if (bcc != -1 && smb_bcc(packet) < bcc)
626 goto bad_bcc;
627 return 0;
628
629bad_command:
630 printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
631 command, SMB_CMD(packet));
632 goto fail;
633bad_wct:
634 printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
635 command, wct, SMB_WCT(packet));
636 goto fail;
637bad_bcc:
638 printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
639 command, bcc, smb_bcc(packet));
640fail:
641 return -EIO;
642}
643
644/*
645 * Returns the maximum read or write size for the "payload". Making all of the
646 * packet fit within the negotiated max_xmit size.
647 *
648 * N.B. Since this value is usually computed before locking the server,
649 * the server's packet size must never be decreased!
650 */
651static inline int
652smb_get_xmitsize(struct smb_sb_info *server, int overhead)
653{
654 return server->opt.max_xmit - overhead;
655}
656
657/*
658 * Calculate the maximum read size
659 */
660int
661smb_get_rsize(struct smb_sb_info *server)
662{
663 /* readX has 12 parameters, read has 5 */
664 int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
665 int size = smb_get_xmitsize(server, overhead);
666
667 VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
668
669 return size;
670}
671
672/*
673 * Calculate the maximum write size
674 */
675int
676smb_get_wsize(struct smb_sb_info *server)
677{
678 /* writeX has 14 parameters, write has 5 */
679 int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
680 int size = smb_get_xmitsize(server, overhead);
681
682 VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
683
684 return size;
685}
686
687/*
688 * Convert SMB error codes to -E... errno values.
689 */
690int
691smb_errno(struct smb_request *req)
692{
693 int errcls = req->rq_rcls;
694 int error = req->rq_err;
695 char *class = "Unknown";
696
697 VERBOSE("errcls %d code %d from command 0x%x\n",
698 errcls, error, SMB_CMD(req->rq_header));
699
700 if (errcls == ERRDOS) {
701 switch (error) {
702 case ERRbadfunc:
703 return -EINVAL;
704 case ERRbadfile:
705 case ERRbadpath:
706 return -ENOENT;
707 case ERRnofids:
708 return -EMFILE;
709 case ERRnoaccess:
710 return -EACCES;
711 case ERRbadfid:
712 return -EBADF;
713 case ERRbadmcb:
714 return -EREMOTEIO;
715 case ERRnomem:
716 return -ENOMEM;
717 case ERRbadmem:
718 return -EFAULT;
719 case ERRbadenv:
720 case ERRbadformat:
721 return -EREMOTEIO;
722 case ERRbadaccess:
723 return -EACCES;
724 case ERRbaddata:
725 return -E2BIG;
726 case ERRbaddrive:
727 return -ENXIO;
728 case ERRremcd:
729 return -EREMOTEIO;
730 case ERRdiffdevice:
731 return -EXDEV;
732 case ERRnofiles:
733 return -ENOENT;
734 case ERRbadshare:
735 return -ETXTBSY;
736 case ERRlock:
737 return -EDEADLK;
738 case ERRfilexists:
739 return -EEXIST;
740 case ERROR_INVALID_PARAMETER:
741 return -EINVAL;
742 case ERROR_DISK_FULL:
743 return -ENOSPC;
744 case ERROR_INVALID_NAME:
745 return -ENOENT;
746 case ERROR_DIR_NOT_EMPTY:
747 return -ENOTEMPTY;
748 case ERROR_NOT_LOCKED:
749 return -ENOLCK;
750 case ERROR_ALREADY_EXISTS:
751 return -EEXIST;
752 default:
753 class = "ERRDOS";
754 goto err_unknown;
755 }
756 } else if (errcls == ERRSRV) {
757 switch (error) {
758 /* N.B. This is wrong ... EIO ? */
759 case ERRerror:
760 return -ENFILE;
761 case ERRbadpw:
762 return -EINVAL;
763 case ERRbadtype:
764 case ERRtimeout:
765 return -EIO;
766 case ERRaccess:
767 return -EACCES;
768 /*
769 * This is a fatal error, as it means the "tree ID"
770 * for this connection is no longer valid. We map
771 * to a special error code and get a new connection.
772 */
773 case ERRinvnid:
774 return -EBADSLT;
775 default:
776 class = "ERRSRV";
777 goto err_unknown;
778 }
779 } else if (errcls == ERRHRD) {
780 switch (error) {
781 case ERRnowrite:
782 return -EROFS;
783 case ERRbadunit:
784 return -ENODEV;
785 case ERRnotready:
786 return -EUCLEAN;
787 case ERRbadcmd:
788 case ERRdata:
789 return -EIO;
790 case ERRbadreq:
791 return -ERANGE;
792 case ERRbadshare:
793 return -ETXTBSY;
794 case ERRlock:
795 return -EDEADLK;
796 case ERRdiskfull:
797 return -ENOSPC;
798 default:
799 class = "ERRHRD";
800 goto err_unknown;
801 }
802 } else if (errcls == ERRCMD) {
803 class = "ERRCMD";
804 } else if (errcls == SUCCESS) {
805 return 0; /* This is the only valid 0 return */
806 }
807
808err_unknown:
809 printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
810 class, error, SMB_CMD(req->rq_header));
811 return -EIO;
812}
813
814/* smb_request_ok: We expect the server to be locked. Then we do the
815 request and check the answer completely. When smb_request_ok
816 returns 0, you can be quite sure that everything went well. When
817 the answer is <=0, the returned number is a valid unix errno. */
818
819static int
820smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
821{
822 int result;
823
824 req->rq_resp_wct = wct;
825 req->rq_resp_bcc = bcc;
826
827 result = smb_add_request(req);
828 if (result != 0) {
829 DEBUG1("smb_request failed\n");
830 goto out;
831 }
832
833 if (smb_valid_packet(req->rq_header) != 0) {
834 PARANOIA("invalid packet!\n");
835 goto out;
836 }
837
838 result = smb_verify(req->rq_header, command, wct, bcc);
839
840out:
841 return result;
842}
843
844/*
845 * This implements the NEWCONN ioctl. It installs the server pid,
846 * sets server->state to CONN_VALID, and wakes up the waiting process.
847 */
848int
849smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
850{
851 struct file *filp;
852 struct sock *sk;
853 int error;
854
855 VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
856
857 smb_lock_server(server);
858
859 /*
860 * Make sure we don't already have a valid connection ...
861 */
862 error = -EINVAL;
863 if (server->state == CONN_VALID)
864 goto out;
865
866 error = -EACCES;
867 if (current_uid() != server->mnt->mounted_uid &&
868 !capable(CAP_SYS_ADMIN))
869 goto out;
870
871 error = -EBADF;
872 filp = fget(opt->fd);
873 if (!filp)
874 goto out;
875 if (!smb_valid_socket(filp->f_path.dentry->d_inode))
876 goto out_putf;
877
878 server->sock_file = filp;
879 server->conn_pid = get_pid(task_pid(current));
880 server->opt = *opt;
881 server->generation += 1;
882 server->state = CONN_VALID;
883 error = 0;
884
885 if (server->conn_error) {
886 /*
887 * conn_error is the returncode we originally decided to
888 * drop the old connection on. This message should be positive
889 * and not make people ask questions on why smbfs is printing
890 * error messages ...
891 */
892 printk(KERN_INFO "SMB connection re-established (%d)\n",
893 server->conn_error);
894 server->conn_error = 0;
895 }
896
897 /*
898 * Store the server in sock user_data (Only used by sunrpc)
899 */
900 sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
901 sk->sk_user_data = server;
902
903 /* chain into the data_ready callback */
904 server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
905
906 /* check if we have an old smbmount that uses seconds for the
907 serverzone */
908 if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
909 server->opt.serverzone /= 60;
910
911 /* now that we have an established connection we can detect the server
912 type and enable bug workarounds */
913 if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
914 install_ops(server->ops, &smb_ops_core);
915 else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
916 install_ops(server->ops, &smb_ops_os2);
917 else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
918 (server->opt.max_xmit < 0x1000) &&
919 !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
920 /* FIXME: can we kill the WIN95 flag now? */
921 server->mnt->flags |= SMB_MOUNT_WIN95;
922 VERBOSE("detected WIN95 server\n");
923 install_ops(server->ops, &smb_ops_win95);
924 } else {
925 /*
926 * Samba has max_xmit 65535
927 * NT4spX has max_xmit 4536 (or something like that)
928 * win2k has ...
929 */
930 VERBOSE("detected NT1 (Samba, NT4/5) server\n");
931 install_ops(server->ops, &smb_ops_winNT);
932 }
933
934 /* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
935 if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
936 server->ops->getattr = smb_proc_getattr_core;
937 } else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
938 server->ops->getattr = smb_proc_getattr_ff;
939 }
940
941 /* Decode server capabilities */
942 if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
943 /* Should be ok to set this now, as no one can access the
944 mount until the connection has been established. */
945 SB_of(server)->s_maxbytes = ~0ULL >> 1;
946 VERBOSE("LFS enabled\n");
947 }
948 if (server->opt.capabilities & SMB_CAP_UNICODE) {
949 server->mnt->flags |= SMB_MOUNT_UNICODE;
950 VERBOSE("Unicode enabled\n");
951 } else {
952 server->mnt->flags &= ~SMB_MOUNT_UNICODE;
953 }
954#if 0
955 /* flags we may test for other patches ... */
956 if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
957 VERBOSE("Large reads enabled\n");
958 }
959 if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
960 VERBOSE("Large writes enabled\n");
961 }
962#endif
963 if (server->opt.capabilities & SMB_CAP_UNIX) {
964 struct inode *inode;
965 VERBOSE("Using UNIX CIFS extensions\n");
966 install_ops(server->ops, &smb_ops_unix);
967 inode = SB_of(server)->s_root->d_inode;
968 if (inode)
969 inode->i_op = &smb_dir_inode_operations_unix;
970 }
971
972 VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
973 server->opt.protocol, server->opt.max_xmit,
974 pid_nr(server->conn_pid), server->opt.capabilities);
975
976 /* FIXME: this really should be done by smbmount. */
977 if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
978 server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
979 }
980
981 smb_unlock_server(server);
982 smbiod_wake_up();
983 if (server->opt.capabilities & SMB_CAP_UNIX)
984 smb_proc_query_cifsunix(server);
985
986 server->conn_complete++;
987 wake_up_interruptible_all(&server->conn_wq);
988 return error;
989
990out:
991 smb_unlock_server(server);
992 smbiod_wake_up();
993 return error;
994
995out_putf:
996 fput(filp);
997 goto out;
998}
999
1000/* smb_setup_header: We completely set up the packet. You only have to
1001 insert the command-specific fields */
1002
1003__u8 *
1004smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
1005{
1006 __u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
1007 __u8 *p = req->rq_header;
1008 struct smb_sb_info *server = req->rq_server;
1009
1010 p = smb_encode_smb_length(p, xmit_len - 4);
1011
1012 *p++ = 0xff;
1013 *p++ = 'S';
1014 *p++ = 'M';
1015 *p++ = 'B';
1016 *p++ = command;
1017
1018 memset(p, '\0', 19);
1019 p += 19;
1020 p += 8;
1021
1022 if (server->opt.protocol > SMB_PROTOCOL_CORE) {
1023 int flags = SMB_FLAGS_CASELESS_PATHNAMES;
1024 int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
1025 SMB_FLAGS2_EXTENDED_ATTRIBUTES; /* EA? not really ... */
1026
1027 *(req->rq_header + smb_flg) = flags;
1028 if (server->mnt->flags & SMB_MOUNT_UNICODE)
1029 flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
1030 WSET(req->rq_header, smb_flg2, flags2);
1031 }
1032 *p++ = wct; /* wct */
1033 p += 2 * wct;
1034 WSET(p, 0, bcc);
1035
1036 /* Include the header in the data to send */
1037 req->rq_iovlen = 1;
1038 req->rq_iov[0].iov_base = req->rq_header;
1039 req->rq_iov[0].iov_len = xmit_len - bcc;
1040
1041 return req->rq_buffer;
1042}
1043
1044static void
1045smb_setup_bcc(struct smb_request *req, __u8 *p)
1046{
1047 u16 bcc = p - req->rq_buffer;
1048 u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
1049
1050 WSET(pbcc, 0, bcc);
1051
1052 smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN +
1053 2*SMB_WCT(req->rq_header) - 2 + bcc);
1054
1055 /* Include the "bytes" in the data to send */
1056 req->rq_iovlen = 2;
1057 req->rq_iov[1].iov_base = req->rq_buffer;
1058 req->rq_iov[1].iov_len = bcc;
1059}
1060
1061static int
1062smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
1063 __u16 mode, off_t offset)
1064{
1065 int result;
1066 struct smb_request *req;
1067
1068 result = -ENOMEM;
1069 if (! (req = smb_alloc_request(server, 0)))
1070 goto out;
1071
1072 smb_setup_header(req, SMBlseek, 4, 0);
1073 WSET(req->rq_header, smb_vwv0, fileid);
1074 WSET(req->rq_header, smb_vwv1, mode);
1075 DSET(req->rq_header, smb_vwv2, offset);
1076 req->rq_flags |= SMB_REQ_NORETRY;
1077
1078 result = smb_request_ok(req, SMBlseek, 2, 0);
1079 if (result < 0) {
1080 result = 0;
1081 goto out_free;
1082 }
1083
1084 result = DVAL(req->rq_header, smb_vwv0);
1085out_free:
1086 smb_rput(req);
1087out:
1088 return result;
1089}
1090
1091static int
1092smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
1093{
1094 struct inode *ino = dentry->d_inode;
1095 struct smb_inode_info *ei = SMB_I(ino);
1096 int mode, read_write = 0x42, read_only = 0x40;
1097 int res;
1098 char *p;
1099 struct smb_request *req;
1100
1101 /*
1102 * Attempt to open r/w, unless there are no write privileges.
1103 */
1104 mode = read_write;
1105 if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
1106 mode = read_only;
1107#if 0
1108 /* FIXME: why is this code not in? below we fix it so that a caller
1109 wanting RO doesn't get RW. smb_revalidate_inode does some
1110 optimization based on access mode. tail -f needs it to be correct.
1111
1112 We must open rw since we don't do the open if called a second time
1113 with different 'wish'. Is that not supported by smb servers? */
1114 if (!(wish & (O_WRONLY | O_RDWR)))
1115 mode = read_only;
1116#endif
1117
1118 res = -ENOMEM;
1119 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1120 goto out;
1121
1122 retry:
1123 p = smb_setup_header(req, SMBopen, 2, 0);
1124 WSET(req->rq_header, smb_vwv0, mode);
1125 WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
1126 res = smb_simple_encode_path(req, &p, dentry, NULL);
1127 if (res < 0)
1128 goto out_free;
1129 smb_setup_bcc(req, p);
1130
1131 res = smb_request_ok(req, SMBopen, 7, 0);
1132 if (res != 0) {
1133 if (mode == read_write &&
1134 (res == -EACCES || res == -ETXTBSY || res == -EROFS))
1135 {
1136 VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
1137 DENTRY_PATH(dentry), res);
1138 mode = read_only;
1139 req->rq_flags = 0;
1140 goto retry;
1141 }
1142 goto out_free;
1143 }
1144 /* We should now have data in vwv[0..6]. */
1145
1146 ei->fileid = WVAL(req->rq_header, smb_vwv0);
1147 ei->attr = WVAL(req->rq_header, smb_vwv1);
1148 /* smb_vwv2 has mtime */
1149 /* smb_vwv4 has size */
1150 ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
1151 ei->open = server->generation;
1152
1153out_free:
1154 smb_rput(req);
1155out:
1156 return res;
1157}
1158
1159/*
1160 * Make sure the file is open, and check that the access
1161 * is compatible with the desired access.
1162 */
1163int
1164smb_open(struct dentry *dentry, int wish)
1165{
1166 struct inode *inode = dentry->d_inode;
1167 int result;
1168 __u16 access;
1169
1170 result = -ENOENT;
1171 if (!inode) {
1172 printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
1173 DENTRY_PATH(dentry));
1174 goto out;
1175 }
1176
1177 if (!smb_is_open(inode)) {
1178 struct smb_sb_info *server = server_from_inode(inode);
1179 result = 0;
1180 if (!smb_is_open(inode))
1181 result = smb_proc_open(server, dentry, wish);
1182 if (result)
1183 goto out;
1184 /*
1185 * A successful open means the path is still valid ...
1186 */
1187 smb_renew_times(dentry);
1188 }
1189
1190 /*
1191 * Check whether the access is compatible with the desired mode.
1192 */
1193 result = 0;
1194 access = SMB_I(inode)->access;
1195 if (access != wish && access != SMB_O_RDWR) {
1196 PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
1197 DENTRY_PATH(dentry), access, wish);
1198 result = -EACCES;
1199 }
1200out:
1201 return result;
1202}
1203
1204static int
1205smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
1206{
1207 struct smb_request *req;
1208 int result = -ENOMEM;
1209
1210 if (! (req = smb_alloc_request(server, 0)))
1211 goto out;
1212
1213 smb_setup_header(req, SMBclose, 3, 0);
1214 WSET(req->rq_header, smb_vwv0, fileid);
1215 DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
1216 req->rq_flags |= SMB_REQ_NORETRY;
1217 result = smb_request_ok(req, SMBclose, 0, 0);
1218
1219 smb_rput(req);
1220out:
1221 return result;
1222}
1223
1224/*
1225 * Win NT 4.0 has an apparent bug in that it fails to update the
1226 * modify time when writing to a file. As a workaround, we update
1227 * both modify and access time locally, and post the times to the
1228 * server when closing the file.
1229 */
1230static int
1231smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
1232{
1233 struct smb_inode_info *ei = SMB_I(ino);
1234 int result = 0;
1235 if (smb_is_open(ino))
1236 {
1237 /*
1238 * We clear the open flag in advance, in case another
1239 * process observes the value while we block below.
1240 */
1241 ei->open = 0;
1242
1243 /*
1244 * Kludge alert: SMB timestamps are accurate only to
1245 * two seconds ... round the times to avoid needless
1246 * cache invalidations!
1247 */
1248 if (ino->i_mtime.tv_sec & 1) {
1249 ino->i_mtime.tv_sec--;
1250 ino->i_mtime.tv_nsec = 0;
1251 }
1252 if (ino->i_atime.tv_sec & 1) {
1253 ino->i_atime.tv_sec--;
1254 ino->i_atime.tv_nsec = 0;
1255 }
1256 /*
1257 * If the file is open with write permissions,
1258 * update the time stamps to sync mtime and atime.
1259 */
1260 if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
1261 (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
1262 !(ei->access == SMB_O_RDONLY))
1263 {
1264 struct smb_fattr fattr;
1265 smb_get_inode_attr(ino, &fattr);
1266 smb_proc_setattr_ext(server, ino, &fattr);
1267 }
1268
1269 result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
1270 /*
1271 * Force a revalidation after closing ... some servers
1272 * don't post the size until the file has been closed.
1273 */
1274 if (server->opt.protocol < SMB_PROTOCOL_NT1)
1275 ei->oldmtime = 0;
1276 ei->closed = jiffies;
1277 }
1278 return result;
1279}
1280
1281int
1282smb_close(struct inode *ino)
1283{
1284 int result = 0;
1285
1286 if (smb_is_open(ino)) {
1287 struct smb_sb_info *server = server_from_inode(ino);
1288 result = smb_proc_close_inode(server, ino);
1289 }
1290 return result;
1291}
1292
1293/*
1294 * This is used to close a file following a failed instantiate.
1295 * Since we don't have an inode, we can't use any of the above.
1296 */
1297int
1298smb_close_fileid(struct dentry *dentry, __u16 fileid)
1299{
1300 struct smb_sb_info *server = server_from_dentry(dentry);
1301 int result;
1302
1303 result = smb_proc_close(server, fileid, get_seconds());
1304 return result;
1305}
1306
1307/* In smb_proc_read and smb_proc_write we do not retry, because the
1308 file-id would not be valid after a reconnection. */
1309
1310static void
1311smb_proc_read_data(struct smb_request *req)
1312{
1313 req->rq_iov[0].iov_base = req->rq_buffer;
1314 req->rq_iov[0].iov_len = 3;
1315
1316 req->rq_iov[1].iov_base = req->rq_page;
1317 req->rq_iov[1].iov_len = req->rq_rsize;
1318 req->rq_iovlen = 2;
1319
1320 req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
1321}
1322
1323static int
1324smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
1325{
1326 struct smb_sb_info *server = server_from_inode(inode);
1327 __u16 returned_count, data_len;
1328 unsigned char *buf;
1329 int result;
1330 struct smb_request *req;
1331 u8 rbuf[4];
1332
1333 result = -ENOMEM;
1334 if (! (req = smb_alloc_request(server, 0)))
1335 goto out;
1336
1337 smb_setup_header(req, SMBread, 5, 0);
1338 buf = req->rq_header;
1339 WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
1340 WSET(buf, smb_vwv1, count);
1341 DSET(buf, smb_vwv2, offset);
1342 WSET(buf, smb_vwv4, 0);
1343
1344 req->rq_page = data;
1345 req->rq_rsize = count;
1346 req->rq_callback = smb_proc_read_data;
1347 req->rq_buffer = rbuf;
1348 req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
1349
1350 result = smb_request_ok(req, SMBread, 5, -1);
1351 if (result < 0)
1352 goto out_free;
1353 returned_count = WVAL(req->rq_header, smb_vwv0);
1354
1355 data_len = WVAL(rbuf, 1);
1356
1357 if (returned_count != data_len) {
1358 printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
1359 printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
1360 returned_count, data_len);
1361 }
1362 result = data_len;
1363
1364out_free:
1365 smb_rput(req);
1366out:
1367 VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
1368 inode->i_ino, SMB_I(inode)->fileid, count, result);
1369 return result;
1370}
1371
1372static int
1373smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
1374{
1375 struct smb_sb_info *server = server_from_inode(inode);
1376 int result;
1377 u16 fileid = SMB_I(inode)->fileid;
1378 u8 buf[4];
1379 struct smb_request *req;
1380
1381 result = -ENOMEM;
1382 if (! (req = smb_alloc_request(server, 0)))
1383 goto out;
1384
1385 VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
1386 inode->i_ino, fileid, count, offset);
1387
1388 smb_setup_header(req, SMBwrite, 5, count + 3);
1389 WSET(req->rq_header, smb_vwv0, fileid);
1390 WSET(req->rq_header, smb_vwv1, count);
1391 DSET(req->rq_header, smb_vwv2, offset);
1392 WSET(req->rq_header, smb_vwv4, 0);
1393
1394 buf[0] = 1;
1395 WSET(buf, 1, count); /* yes, again ... */
1396 req->rq_iov[1].iov_base = buf;
1397 req->rq_iov[1].iov_len = 3;
1398 req->rq_iov[2].iov_base = (char *) data;
1399 req->rq_iov[2].iov_len = count;
1400 req->rq_iovlen = 3;
1401 req->rq_flags |= SMB_REQ_NORETRY;
1402
1403 result = smb_request_ok(req, SMBwrite, 1, 0);
1404 if (result >= 0)
1405 result = WVAL(req->rq_header, smb_vwv0);
1406
1407 smb_rput(req);
1408out:
1409 return result;
1410}
1411
1412/*
1413 * In smb_proc_readX and smb_proc_writeX we do not retry, because the
1414 * file-id would not be valid after a reconnection.
1415 */
1416
1417#define SMB_READX_MAX_PAD 64
1418static void
1419smb_proc_readX_data(struct smb_request *req)
1420{
1421 /* header length, excluding the netbios length (-4) */
1422 int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
1423 int data_off = WVAL(req->rq_header, smb_vwv6);
1424
1425 /*
1426 * Some genius made the padding to the data bytes arbitrary.
1427 * So we must first calculate the amount of padding used by the server.
1428 */
1429 data_off -= hdrlen;
1430 if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
1431 PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
1432 PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
1433 req->rq_rlen = req->rq_bufsize + 1;
1434 return;
1435 }
1436 req->rq_iov[0].iov_base = req->rq_buffer;
1437 req->rq_iov[0].iov_len = data_off;
1438
1439 req->rq_iov[1].iov_base = req->rq_page;
1440 req->rq_iov[1].iov_len = req->rq_rsize;
1441 req->rq_iovlen = 2;
1442
1443 req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
1444}
1445
1446static int
1447smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
1448{
1449 struct smb_sb_info *server = server_from_inode(inode);
1450 unsigned char *buf;
1451 int result;
1452 struct smb_request *req;
1453 static char pad[SMB_READX_MAX_PAD];
1454
1455 result = -ENOMEM;
1456 if (! (req = smb_alloc_request(server, 0)))
1457 goto out;
1458
1459 smb_setup_header(req, SMBreadX, 12, 0);
1460 buf = req->rq_header;
1461 WSET(buf, smb_vwv0, 0x00ff);
1462 WSET(buf, smb_vwv1, 0);
1463 WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
1464 DSET(buf, smb_vwv3, (u32)offset); /* low 32 bits */
1465 WSET(buf, smb_vwv5, count);
1466 WSET(buf, smb_vwv6, 0);
1467 DSET(buf, smb_vwv7, 0);
1468 WSET(buf, smb_vwv9, 0);
1469 DSET(buf, smb_vwv10, (u32)(offset >> 32)); /* high 32 bits */
1470 WSET(buf, smb_vwv11, 0);
1471
1472 req->rq_page = data;
1473 req->rq_rsize = count;
1474 req->rq_callback = smb_proc_readX_data;
1475 req->rq_buffer = pad;
1476 req->rq_bufsize = SMB_READX_MAX_PAD;
1477 req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
1478
1479 result = smb_request_ok(req, SMBreadX, 12, -1);
1480 if (result < 0)
1481 goto out_free;
1482 result = WVAL(req->rq_header, smb_vwv5);
1483
1484out_free:
1485 smb_rput(req);
1486out:
1487 VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
1488 inode->i_ino, SMB_I(inode)->fileid, count, result);
1489 return result;
1490}
1491
1492static int
1493smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
1494{
1495 struct smb_sb_info *server = server_from_inode(inode);
1496 int result;
1497 u8 *p;
1498 static u8 pad[4];
1499 struct smb_request *req;
1500
1501 result = -ENOMEM;
1502 if (! (req = smb_alloc_request(server, 0)))
1503 goto out;
1504
1505 VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
1506 inode->i_ino, SMB_I(inode)->fileid, count, offset);
1507
1508 p = smb_setup_header(req, SMBwriteX, 14, count + 1);
1509 WSET(req->rq_header, smb_vwv0, 0x00ff);
1510 WSET(req->rq_header, smb_vwv1, 0);
1511 WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
1512 DSET(req->rq_header, smb_vwv3, (u32)offset); /* low 32 bits */
1513 DSET(req->rq_header, smb_vwv5, 0);
1514 WSET(req->rq_header, smb_vwv7, 0); /* write mode */
1515 WSET(req->rq_header, smb_vwv8, 0);
1516 WSET(req->rq_header, smb_vwv9, 0);
1517 WSET(req->rq_header, smb_vwv10, count); /* data length */
1518 WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
1519 DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
1520
1521 req->rq_iov[1].iov_base = pad;
1522 req->rq_iov[1].iov_len = 1;
1523 req->rq_iov[2].iov_base = (char *) data;
1524 req->rq_iov[2].iov_len = count;
1525 req->rq_iovlen = 3;
1526 req->rq_flags |= SMB_REQ_NORETRY;
1527
1528 result = smb_request_ok(req, SMBwriteX, 6, 0);
1529 if (result >= 0)
1530 result = WVAL(req->rq_header, smb_vwv2);
1531
1532 smb_rput(req);
1533out:
1534 return result;
1535}
1536
1537int
1538smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
1539{
1540 struct smb_sb_info *server = server_from_dentry(dentry);
1541 char *p;
1542 int result;
1543 struct smb_request *req;
1544
1545 result = -ENOMEM;
1546 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1547 goto out;
1548
1549 p = smb_setup_header(req, SMBcreate, 3, 0);
1550 WSET(req->rq_header, smb_vwv0, attr);
1551 DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
1552 result = smb_simple_encode_path(req, &p, dentry, NULL);
1553 if (result < 0)
1554 goto out_free;
1555 smb_setup_bcc(req, p);
1556
1557 result = smb_request_ok(req, SMBcreate, 1, 0);
1558 if (result < 0)
1559 goto out_free;
1560
1561 *fileid = WVAL(req->rq_header, smb_vwv0);
1562 result = 0;
1563
1564out_free:
1565 smb_rput(req);
1566out:
1567 return result;
1568}
1569
1570int
1571smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
1572{
1573 struct smb_sb_info *server = server_from_dentry(old_dentry);
1574 char *p;
1575 int result;
1576 struct smb_request *req;
1577
1578 result = -ENOMEM;
1579 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1580 goto out;
1581
1582 p = smb_setup_header(req, SMBmv, 1, 0);
1583 WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
1584 result = smb_simple_encode_path(req, &p, old_dentry, NULL);
1585 if (result < 0)
1586 goto out_free;
1587 result = smb_simple_encode_path(req, &p, new_dentry, NULL);
1588 if (result < 0)
1589 goto out_free;
1590 smb_setup_bcc(req, p);
1591
1592 if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
1593 goto out_free;
1594 result = 0;
1595
1596out_free:
1597 smb_rput(req);
1598out:
1599 return result;
1600}
1601
1602/*
1603 * Code common to mkdir and rmdir.
1604 */
1605static int
1606smb_proc_generic_command(struct dentry *dentry, __u8 command)
1607{
1608 struct smb_sb_info *server = server_from_dentry(dentry);
1609 char *p;
1610 int result;
1611 struct smb_request *req;
1612
1613 result = -ENOMEM;
1614 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1615 goto out;
1616
1617 p = smb_setup_header(req, command, 0, 0);
1618 result = smb_simple_encode_path(req, &p, dentry, NULL);
1619 if (result < 0)
1620 goto out_free;
1621 smb_setup_bcc(req, p);
1622
1623 result = smb_request_ok(req, command, 0, 0);
1624 if (result < 0)
1625 goto out_free;
1626 result = 0;
1627
1628out_free:
1629 smb_rput(req);
1630out:
1631 return result;
1632}
1633
1634int
1635smb_proc_mkdir(struct dentry *dentry)
1636{
1637 return smb_proc_generic_command(dentry, SMBmkdir);
1638}
1639
1640int
1641smb_proc_rmdir(struct dentry *dentry)
1642{
1643 return smb_proc_generic_command(dentry, SMBrmdir);
1644}
1645
1646#if SMBFS_POSIX_UNLINK
1647/*
1648 * Removes readonly attribute from a file. Used by unlink to give posix
1649 * semantics.
1650 */
1651static int
1652smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
1653{
1654 int result;
1655 struct smb_fattr fattr;
1656
1657 /* FIXME: cifsUE should allow removing a readonly file. */
1658
1659 /* first get current attribute */
1660 smb_init_dirent(server, &fattr);
1661 result = server->ops->getattr(server, dentry, &fattr);
1662 smb_finish_dirent(server, &fattr);
1663 if (result < 0)
1664 return result;
1665
1666 /* if RONLY attribute is set, remove it */
1667 if (fattr.attr & aRONLY) { /* read only attribute is set */
1668 fattr.attr &= ~aRONLY;
1669 result = smb_proc_setattr_core(server, dentry, fattr.attr);
1670 }
1671 return result;
1672}
1673#endif
1674
1675int
1676smb_proc_unlink(struct dentry *dentry)
1677{
1678 struct smb_sb_info *server = server_from_dentry(dentry);
1679 int flag = 0;
1680 char *p;
1681 int result;
1682 struct smb_request *req;
1683
1684 result = -ENOMEM;
1685 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1686 goto out;
1687
1688 retry:
1689 p = smb_setup_header(req, SMBunlink, 1, 0);
1690 WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
1691 result = smb_simple_encode_path(req, &p, dentry, NULL);
1692 if (result < 0)
1693 goto out_free;
1694 smb_setup_bcc(req, p);
1695
1696 if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
1697#if SMBFS_POSIX_UNLINK
1698 if (result == -EACCES && !flag) {
1699 /* Posix semantics is for the read-only state
1700 of a file to be ignored in unlink(). In the
1701 SMB world a unlink() is refused on a
1702 read-only file. To make things easier for
1703 unix users we try to override the files
1704 permission if the unlink fails with the
1705 right error.
1706 This introduces a race condition that could
1707 lead to a file being written by someone who
1708 shouldn't have access, but as far as I can
1709 tell that is unavoidable */
1710
1711 /* remove RONLY attribute and try again */
1712 result = smb_set_rw(dentry,server);
1713 if (result == 0) {
1714 flag = 1;
1715 req->rq_flags = 0;
1716 goto retry;
1717 }
1718 }
1719#endif
1720 goto out_free;
1721 }
1722 result = 0;
1723
1724out_free:
1725 smb_rput(req);
1726out:
1727 return result;
1728}
1729
1730int
1731smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
1732{
1733 int result;
1734 struct smb_request *req;
1735
1736 result = -ENOMEM;
1737 if (! (req = smb_alloc_request(server, 0)))
1738 goto out;
1739
1740 smb_setup_header(req, SMBflush, 1, 0);
1741 WSET(req->rq_header, smb_vwv0, fileid);
1742 req->rq_flags |= SMB_REQ_NORETRY;
1743 result = smb_request_ok(req, SMBflush, 0, 0);
1744
1745 smb_rput(req);
1746out:
1747 return result;
1748}
1749
1750static int
1751smb_proc_trunc32(struct inode *inode, loff_t length)
1752{
1753 /*
1754 * Writing 0bytes is old-SMB magic for truncating files.
1755 * MAX_NON_LFS should prevent this from being called with a too
1756 * large offset.
1757 */
1758 return smb_proc_write(inode, length, 0, NULL);
1759}
1760
1761static int
1762smb_proc_trunc64(struct inode *inode, loff_t length)
1763{
1764 struct smb_sb_info *server = server_from_inode(inode);
1765 int result;
1766 char *param;
1767 char *data;
1768 struct smb_request *req;
1769
1770 result = -ENOMEM;
1771 if (! (req = smb_alloc_request(server, 14)))
1772 goto out;
1773
1774 param = req->rq_buffer;
1775 data = req->rq_buffer + 6;
1776
1777 /* FIXME: must we also set allocation size? winNT seems to do that */
1778 WSET(param, 0, SMB_I(inode)->fileid);
1779 WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
1780 WSET(param, 4, 0);
1781 LSET(data, 0, length);
1782
1783 req->rq_trans2_command = TRANSACT2_SETFILEINFO;
1784 req->rq_ldata = 8;
1785 req->rq_data = data;
1786 req->rq_lparm = 6;
1787 req->rq_parm = param;
1788 req->rq_flags |= SMB_REQ_NORETRY;
1789 result = smb_add_request(req);
1790 if (result < 0)
1791 goto out_free;
1792
1793 result = 0;
1794 if (req->rq_rcls != 0)
1795 result = smb_errno(req);
1796
1797out_free:
1798 smb_rput(req);
1799out:
1800 return result;
1801}
1802
1803static int
1804smb_proc_trunc95(struct inode *inode, loff_t length)
1805{
1806 struct smb_sb_info *server = server_from_inode(inode);
1807 int result = smb_proc_trunc32(inode, length);
1808
1809 /*
1810 * win9x doesn't appear to update the size immediately.
1811 * It will return the old file size after the truncate,
1812 * confusing smbfs. So we force an update.
1813 *
1814 * FIXME: is this still necessary?
1815 */
1816 smb_proc_flush(server, SMB_I(inode)->fileid);
1817 return result;
1818}
1819
1820static void
1821smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
1822{
1823 memset(fattr, 0, sizeof(*fattr));
1824
1825 fattr->f_nlink = 1;
1826 fattr->f_uid = server->mnt->uid;
1827 fattr->f_gid = server->mnt->gid;
1828 fattr->f_unix = 0;
1829}
1830
1831static void
1832smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
1833{
1834 if (fattr->f_unix)
1835 return;
1836
1837 fattr->f_mode = server->mnt->file_mode;
1838 if (fattr->attr & aDIR) {
1839 fattr->f_mode = server->mnt->dir_mode;
1840 fattr->f_size = SMB_ST_BLKSIZE;
1841 }
1842 /* Check the read-only flag */
1843 if (fattr->attr & aRONLY)
1844 fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
1845
1846 /* How many 512 byte blocks do we need for this file? */
1847 fattr->f_blocks = 0;
1848 if (fattr->f_size != 0)
1849 fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
1850 return;
1851}
1852
1853void
1854smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
1855 struct super_block *sb)
1856{
1857 smb_init_dirent(server, fattr);
1858 fattr->attr = aDIR;
1859 fattr->f_ino = 2; /* traditional root inode number */
1860 fattr->f_mtime = current_fs_time(sb);
1861 smb_finish_dirent(server, fattr);
1862}
1863
1864/*
1865 * Decode a dirent for old protocols
1866 *
1867 * qname is filled with the decoded, and possibly translated, name.
1868 * fattr receives decoded attributes
1869 *
1870 * Bugs Noted:
1871 * (1) Pathworks servers may pad the name with extra spaces.
1872 */
1873static char *
1874smb_decode_short_dirent(struct smb_sb_info *server, char *p,
1875 struct qstr *qname, struct smb_fattr *fattr,
1876 unsigned char *name_buf)
1877{
1878 int len;
1879
1880 /*
1881 * SMB doesn't have a concept of inode numbers ...
1882 */
1883 smb_init_dirent(server, fattr);
1884 fattr->f_ino = 0; /* FIXME: do we need this? */
1885
1886 p += SMB_STATUS_SIZE; /* reserved (search_status) */
1887 fattr->attr = *p;
1888 fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
1889 fattr->f_mtime.tv_nsec = 0;
1890 fattr->f_size = DVAL(p, 5);
1891 fattr->f_ctime = fattr->f_mtime;
1892 fattr->f_atime = fattr->f_mtime;
1893 qname->name = p + 9;
1894 len = strnlen(qname->name, 12);
1895
1896 /*
1897 * Trim trailing blanks for Pathworks servers
1898 */
1899 while (len > 2 && qname->name[len-1] == ' ')
1900 len--;
1901
1902 smb_finish_dirent(server, fattr);
1903
1904#if 0
1905 /* FIXME: These only work for ascii chars, and recent smbmount doesn't
1906 allow the flag to be set anyway. It kills const. Remove? */
1907 switch (server->opt.case_handling) {
1908 case SMB_CASE_UPPER:
1909 str_upper(entry->name, len);
1910 break;
1911 case SMB_CASE_LOWER:
1912 str_lower(entry->name, len);
1913 break;
1914 default:
1915 break;
1916 }
1917#endif
1918
1919 qname->len = 0;
1920 len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
1921 qname->name, len,
1922 server->remote_nls, server->local_nls);
1923 if (len > 0) {
1924 qname->len = len;
1925 qname->name = name_buf;
1926 DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
1927 }
1928
1929 return p + 22;
1930}
1931
1932/*
1933 * This routine is used to read in directory entries from the network.
1934 * Note that it is for short directory name seeks, i.e.: protocol <
1935 * SMB_PROTOCOL_LANMAN2
1936 */
1937static int
1938smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
1939 struct smb_cache_control *ctl)
1940{
1941 struct dentry *dir = filp->f_path.dentry;
1942 struct smb_sb_info *server = server_from_dentry(dir);
1943 struct qstr qname;
1944 struct smb_fattr fattr;
1945 char *p;
1946 int result;
1947 int i, first, entries_seen, entries;
1948 int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
1949 __u16 bcc;
1950 __u16 count;
1951 char status[SMB_STATUS_SIZE];
1952 static struct qstr mask = {
1953 .name = "*.*",
1954 .len = 3,
1955 };
1956 unsigned char *last_status;
1957 struct smb_request *req;
1958 unsigned char *name_buf;
1959
1960 VERBOSE("%s/%s\n", DENTRY_PATH(dir));
1961
1962 lock_kernel();
1963
1964 result = -ENOMEM;
1965 if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
1966 goto out;
1967
1968 first = 1;
1969 entries = 0;
1970 entries_seen = 2; /* implicit . and .. */
1971
1972 result = -ENOMEM;
1973 if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
1974 goto out_name;
1975
1976 while (1) {
1977 p = smb_setup_header(req, SMBsearch, 2, 0);
1978 WSET(req->rq_header, smb_vwv0, entries_asked);
1979 WSET(req->rq_header, smb_vwv1, aDIR);
1980 if (first == 1) {
1981 result = smb_simple_encode_path(req, &p, dir, &mask);
1982 if (result < 0)
1983 goto out_free;
1984 if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
1985 result = -ENAMETOOLONG;
1986 goto out_free;
1987 }
1988 *p++ = 5;
1989 WSET(p, 0, 0);
1990 p += 2;
1991 first = 0;
1992 } else {
1993 if (p + 5 + SMB_STATUS_SIZE >
1994 (char *)req->rq_buffer + req->rq_bufsize) {
1995 result = -ENAMETOOLONG;
1996 goto out_free;
1997 }
1998
1999 *p++ = 4;
2000 *p++ = 0;
2001 *p++ = 5;
2002 WSET(p, 0, SMB_STATUS_SIZE);
2003 p += 2;
2004 memcpy(p, status, SMB_STATUS_SIZE);
2005 p += SMB_STATUS_SIZE;
2006 }
2007
2008 smb_setup_bcc(req, p);
2009
2010 result = smb_request_ok(req, SMBsearch, 1, -1);
2011 if (result < 0) {
2012 if ((req->rq_rcls == ERRDOS) &&
2013 (req->rq_err == ERRnofiles))
2014 break;
2015 goto out_free;
2016 }
2017 count = WVAL(req->rq_header, smb_vwv0);
2018 if (count <= 0)
2019 break;
2020
2021 result = -EIO;
2022 bcc = smb_bcc(req->rq_header);
2023 if (bcc != count * SMB_DIRINFO_SIZE + 3)
2024 goto out_free;
2025 p = req->rq_buffer + 3;
2026
2027
2028 /* Make sure the response fits in the buffer. Fixed sized
2029 entries means we don't have to check in the decode loop. */
2030
2031 last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
2032
2033 if (last_status + SMB_DIRINFO_SIZE >=
2034 req->rq_buffer + req->rq_bufsize) {
2035 printk(KERN_ERR "smb_proc_readdir_short: "
2036 "last dir entry outside buffer! "
2037 "%d@%p %d@%p\n", SMB_DIRINFO_SIZE, last_status,
2038 req->rq_bufsize, req->rq_buffer);
2039 goto out_free;
2040 }
2041
2042 /* Read the last entry into the status field. */
2043 memcpy(status, last_status, SMB_STATUS_SIZE);
2044
2045
2046 /* Now we are ready to parse smb directory entries. */
2047
2048 for (i = 0; i < count; i++) {
2049 p = smb_decode_short_dirent(server, p,
2050 &qname, &fattr, name_buf);
2051 if (qname.len == 0)
2052 continue;
2053
2054 if (entries_seen == 2 && qname.name[0] == '.') {
2055 if (qname.len == 1)
2056 continue;
2057 if (qname.name[1] == '.' && qname.len == 2)
2058 continue;
2059 }
2060 if (!smb_fill_cache(filp, dirent, filldir, ctl,
2061 &qname, &fattr))
2062 ; /* stop reading? */
2063 entries_seen++;
2064 }
2065 }
2066 result = entries;
2067
2068out_free:
2069 smb_rput(req);
2070out_name:
2071 kfree(name_buf);
2072out:
2073 unlock_kernel();
2074 return result;
2075}
2076
2077static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
2078{
2079 u64 size, disk_bytes;
2080
2081 /* FIXME: verify nls support. all is sent as utf8? */
2082
2083 fattr->f_unix = 1;
2084 fattr->f_mode = 0;
2085
2086 /* FIXME: use the uniqueID from the remote instead? */
2087 /* 0 L file size in bytes */
2088 /* 8 L file size on disk in bytes (block count) */
2089 /* 40 L uid */
2090 /* 48 L gid */
2091 /* 56 W file type */
2092 /* 60 L devmajor */
2093 /* 68 L devminor */
2094 /* 76 L unique ID (inode) */
2095 /* 84 L permissions */
2096 /* 92 L link count */
2097
2098 size = LVAL(p, 0);
2099 disk_bytes = LVAL(p, 8);
2100
2101 /*
2102 * Some samba versions round up on-disk byte usage
2103 * to 1MB boundaries, making it useless. When seeing
2104 * that, use the size instead.
2105 */
2106 if (!(disk_bytes & 0xfffff))
2107 disk_bytes = size+511;
2108
2109 fattr->f_size = size;
2110 fattr->f_blocks = disk_bytes >> 9;
2111 fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
2112 fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
2113 fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
2114
2115 if (server->mnt->flags & SMB_MOUNT_UID)
2116 fattr->f_uid = server->mnt->uid;
2117 else
2118 fattr->f_uid = LVAL(p, 40);
2119
2120 if (server->mnt->flags & SMB_MOUNT_GID)
2121 fattr->f_gid = server->mnt->gid;
2122 else
2123 fattr->f_gid = LVAL(p, 48);
2124
2125 fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
2126
2127 if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
2128 __u64 major = LVAL(p, 60);
2129 __u64 minor = LVAL(p, 68);
2130
2131 fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
2132 if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
2133 MINOR(fattr->f_rdev) != (minor & 0xffffffff))
2134 fattr->f_rdev = 0;
2135 }
2136
2137 fattr->f_mode |= LVAL(p, 84);
2138
2139 if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
2140 (S_ISDIR(fattr->f_mode)) )
2141 fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
2142 else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
2143 !(S_ISDIR(fattr->f_mode)) )
2144 fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
2145 (fattr->f_mode & S_IFMT);
2146
2147}
2148
2149/*
2150 * Interpret a long filename structure using the specified info level:
2151 * level 1 for anything below NT1 protocol
2152 * level 260 for NT1 protocol
2153 *
2154 * qname is filled with the decoded, and possibly translated, name
2155 * fattr receives decoded attributes.
2156 *
2157 * Bugs Noted:
2158 * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
2159 */
2160static char *
2161smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
2162 struct qstr *qname, struct smb_fattr *fattr,
2163 unsigned char *name_buf)
2164{
2165 char *result;
2166 unsigned int len = 0;
2167 int n;
2168 __u16 date, time;
2169 int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
2170
2171 /*
2172 * SMB doesn't have a concept of inode numbers ...
2173 */
2174 smb_init_dirent(server, fattr);
2175 fattr->f_ino = 0; /* FIXME: do we need this? */
2176
2177 switch (level) {
2178 case 1:
2179 len = *((unsigned char *) p + 22);
2180 qname->name = p + 23;
2181 result = p + 24 + len;
2182
2183 date = WVAL(p, 0);
2184 time = WVAL(p, 2);
2185 fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
2186 fattr->f_ctime.tv_nsec = 0;
2187
2188 date = WVAL(p, 4);
2189 time = WVAL(p, 6);
2190 fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
2191 fattr->f_atime.tv_nsec = 0;
2192
2193 date = WVAL(p, 8);
2194 time = WVAL(p, 10);
2195 fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
2196 fattr->f_mtime.tv_nsec = 0;
2197 fattr->f_size = DVAL(p, 12);
2198 /* ULONG allocation size */
2199 fattr->attr = WVAL(p, 20);
2200
2201 VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
2202 p, len, len, qname->name);
2203 break;
2204 case 260:
2205 result = p + WVAL(p, 0);
2206 len = DVAL(p, 60);
2207 if (len > 255) len = 255;
2208 /* NT4 null terminates, unless we are using unicode ... */
2209 qname->name = p + 94;
2210 if (!unicode && len && qname->name[len-1] == '\0')
2211 len--;
2212
2213 fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
2214 fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
2215 fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
2216 /* change time (32) */
2217 fattr->f_size = LVAL(p, 40);
2218 /* alloc size (48) */
2219 fattr->attr = DVAL(p, 56);
2220
2221 VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
2222 p, len, len, qname->name);
2223 break;
2224 case SMB_FIND_FILE_UNIX:
2225 result = p + WVAL(p, 0);
2226 qname->name = p + 108;
2227
2228 len = strlen(qname->name);
2229 /* FIXME: should we check the length?? */
2230
2231 p += 8;
2232 smb_decode_unix_basic(fattr, server, p);
2233 VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
2234 p, len, len, qname->name);
2235 break;
2236 default:
2237 PARANOIA("Unknown info level %d\n", level);
2238 result = p + WVAL(p, 0);
2239 goto out;
2240 }
2241
2242 smb_finish_dirent(server, fattr);
2243
2244#if 0
2245 /* FIXME: These only work for ascii chars, and recent smbmount doesn't
2246 allow the flag to be set anyway. Remove? */
2247 switch (server->opt.case_handling) {
2248 case SMB_CASE_UPPER:
2249 str_upper(qname->name, len);
2250 break;
2251 case SMB_CASE_LOWER:
2252 str_lower(qname->name, len);
2253 break;
2254 default:
2255 break;
2256 }
2257#endif
2258
2259 qname->len = 0;
2260 n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
2261 qname->name, len,
2262 server->remote_nls, server->local_nls);
2263 if (n > 0) {
2264 qname->len = n;
2265 qname->name = name_buf;
2266 }
2267
2268out:
2269 return result;
2270}
2271
2272/* findfirst/findnext flags */
2273#define SMB_CLOSE_AFTER_FIRST (1<<0)
2274#define SMB_CLOSE_IF_END (1<<1)
2275#define SMB_REQUIRE_RESUME_KEY (1<<2)
2276#define SMB_CONTINUE_BIT (1<<3)
2277
2278/*
2279 * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
2280 * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
2281 * go there for advise.
2282 *
2283 * Bugs Noted:
2284 * (1) When using Info Level 1 Win NT 4.0 truncates directory listings
2285 * for certain patterns of names and/or lengths. The breakage pattern
2286 * is completely reproducible and can be toggled by the creation of a
2287 * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
2288 */
2289static int
2290smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
2291 struct smb_cache_control *ctl)
2292{
2293 struct dentry *dir = filp->f_path.dentry;
2294 struct smb_sb_info *server = server_from_dentry(dir);
2295 struct qstr qname;
2296 struct smb_fattr fattr;
2297
2298 unsigned char *p, *lastname;
2299 char *mask, *param;
2300 __u16 command;
2301 int first, entries_seen;
2302
2303 /* Both NT and OS/2 accept info level 1 (but see note below). */
2304 int info_level = 260;
2305 const int max_matches = 512;
2306
2307 unsigned int ff_searchcount = 0;
2308 unsigned int ff_eos = 0;
2309 unsigned int ff_lastname = 0;
2310 unsigned int ff_dir_handle = 0;
2311 unsigned int loop_count = 0;
2312 unsigned int mask_len, i;
2313 int result;
2314 struct smb_request *req;
2315 unsigned char *name_buf;
2316 static struct qstr star = {
2317 .name = "*",
2318 .len = 1,
2319 };
2320
2321 lock_kernel();
2322
2323 /*
2324 * We always prefer unix style. Use info level 1 for older
2325 * servers that don't do 260.
2326 */
2327 if (server->opt.capabilities & SMB_CAP_UNIX)
2328 info_level = SMB_FIND_FILE_UNIX;
2329 else if (server->opt.protocol < SMB_PROTOCOL_NT1)
2330 info_level = 1;
2331
2332 result = -ENOMEM;
2333 if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
2334 goto out;
2335 if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
2336 goto out_name;
2337 param = req->rq_buffer;
2338
2339 /*
2340 * Encode the initial path
2341 */
2342 mask = param + 12;
2343
2344 result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
2345 if (result <= 0)
2346 goto out_free;
2347 mask_len = result - 1; /* mask_len is strlen, not #bytes */
2348 result = 0;
2349 first = 1;
2350 VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
2351
2352 entries_seen = 2;
2353 ff_eos = 0;
2354
2355 while (ff_eos == 0) {
2356 loop_count += 1;
2357 if (loop_count > 10) {
2358 printk(KERN_WARNING "smb_proc_readdir_long: "
2359 "Looping in FIND_NEXT??\n");
2360 result = -EIO;
2361 break;
2362 }
2363
2364 if (first != 0) {
2365 command = TRANSACT2_FINDFIRST;
2366 WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
2367 WSET(param, 2, max_matches); /* max count */
2368 WSET(param, 4, SMB_CLOSE_IF_END);
2369 WSET(param, 6, info_level);
2370 DSET(param, 8, 0);
2371 } else {
2372 command = TRANSACT2_FINDNEXT;
2373
2374 VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
2375 ff_dir_handle, ff_lastname, mask_len, mask);
2376
2377 WSET(param, 0, ff_dir_handle); /* search handle */
2378 WSET(param, 2, max_matches); /* max count */
2379 WSET(param, 4, info_level);
2380 DSET(param, 6, 0);
2381 WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
2382 }
2383
2384 req->rq_trans2_command = command;
2385 req->rq_ldata = 0;
2386 req->rq_data = NULL;
2387 req->rq_lparm = 12 + mask_len + 1;
2388 req->rq_parm = param;
2389 req->rq_flags = 0;
2390 result = smb_add_request(req);
2391 if (result < 0) {
2392 PARANOIA("error=%d, breaking\n", result);
2393 break;
2394 }
2395
2396 if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
2397 /* a damn Win95 bug - sometimes it clags if you
2398 ask it too fast */
2399 schedule_timeout_interruptible(msecs_to_jiffies(200));
2400 continue;
2401 }
2402
2403 if (req->rq_rcls != 0) {
2404 result = smb_errno(req);
2405 PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
2406 mask, result, req->rq_rcls, req->rq_err);
2407 break;
2408 }
2409
2410 /* parse out some important return info */
2411 if (first != 0) {
2412 ff_dir_handle = WVAL(req->rq_parm, 0);
2413 ff_searchcount = WVAL(req->rq_parm, 2);
2414 ff_eos = WVAL(req->rq_parm, 4);
2415 ff_lastname = WVAL(req->rq_parm, 8);
2416 } else {
2417 ff_searchcount = WVAL(req->rq_parm, 0);
2418 ff_eos = WVAL(req->rq_parm, 2);
2419 ff_lastname = WVAL(req->rq_parm, 6);
2420 }
2421
2422 if (ff_searchcount == 0)
2423 break;
2424
2425 /* Now we are ready to parse smb directory entries. */
2426
2427 /* point to the data bytes */
2428 p = req->rq_data;
2429 for (i = 0; i < ff_searchcount; i++) {
2430 /* make sure we stay within the buffer */
2431 if (p >= req->rq_data + req->rq_ldata) {
2432 printk(KERN_ERR "smb_proc_readdir_long: "
2433 "dirent pointer outside buffer! "
2434 "%p %d@%p\n",
2435 p, req->rq_ldata, req->rq_data);
2436 result = -EIO; /* always a comm. error? */
2437 goto out_free;
2438 }
2439
2440 p = smb_decode_long_dirent(server, p, info_level,
2441 &qname, &fattr, name_buf);
2442
2443 /* ignore . and .. from the server */
2444 if (entries_seen == 2 && qname.name[0] == '.') {
2445 if (qname.len == 1)
2446 continue;
2447 if (qname.name[1] == '.' && qname.len == 2)
2448 continue;
2449 }
2450
2451 if (!smb_fill_cache(filp, dirent, filldir, ctl,
2452 &qname, &fattr))
2453 ; /* stop reading? */
2454 entries_seen++;
2455 }
2456
2457 VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
2458
2459 /*
2460 * We might need the lastname for continuations.
2461 *
2462 * Note that some servers (win95?) point to the filename and
2463 * others (NT4, Samba using NT1) to the dir entry. We assume
2464 * here that those who do not point to a filename do not need
2465 * this info to continue the listing.
2466 *
2467 * OS/2 needs this and talks infolevel 1.
2468 * NetApps want lastname with infolevel 260.
2469 * win2k want lastname with infolevel 260, and points to
2470 * the record not to the name.
2471 * Samba+CifsUnixExt doesn't need lastname.
2472 *
2473 * Both are happy if we return the data they point to. So we do.
2474 * (FIXME: above is not true with win2k)
2475 */
2476 mask_len = 0;
2477 if (info_level != SMB_FIND_FILE_UNIX &&
2478 ff_lastname > 0 && ff_lastname < req->rq_ldata) {
2479 lastname = req->rq_data + ff_lastname;
2480
2481 switch (info_level) {
2482 case 260:
2483 mask_len = req->rq_ldata - ff_lastname;
2484 break;
2485 case 1:
2486 /* lastname points to a length byte */
2487 mask_len = *lastname++;
2488 if (ff_lastname + 1 + mask_len > req->rq_ldata)
2489 mask_len = req->rq_ldata - ff_lastname - 1;
2490 break;
2491 }
2492
2493 /*
2494 * Update the mask string for the next message.
2495 */
2496 if (mask_len > 255)
2497 mask_len = 255;
2498 if (mask_len)
2499 strncpy(mask, lastname, mask_len);
2500 }
2501 mask_len = strnlen(mask, mask_len);
2502 VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
2503 mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
2504
2505 first = 0;
2506 loop_count = 0;
2507 }
2508
2509out_free:
2510 smb_rput(req);
2511out_name:
2512 kfree(name_buf);
2513out:
2514 unlock_kernel();
2515 return result;
2516}
2517
2518/*
2519 * This version uses the trans2 TRANSACT2_FINDFIRST message
2520 * to get the attribute data.
2521 *
2522 * Bugs Noted:
2523 */
2524static int
2525smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
2526 struct smb_fattr *fattr)
2527{
2528 char *param, *mask;
2529 __u16 date, time;
2530 int mask_len, result;
2531 struct smb_request *req;
2532
2533 result = -ENOMEM;
2534 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2535 goto out;
2536 param = req->rq_buffer;
2537 mask = param + 12;
2538
2539 mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
2540 if (mask_len < 0) {
2541 result = mask_len;
2542 goto out_free;
2543 }
2544 VERBOSE("name=%s, len=%d\n", mask, mask_len);
2545 WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
2546 WSET(param, 2, 1); /* max count */
2547 WSET(param, 4, 1); /* close after this call */
2548 WSET(param, 6, 1); /* info_level */
2549 DSET(param, 8, 0);
2550
2551 req->rq_trans2_command = TRANSACT2_FINDFIRST;
2552 req->rq_ldata = 0;
2553 req->rq_data = NULL;
2554 req->rq_lparm = 12 + mask_len;
2555 req->rq_parm = param;
2556 req->rq_flags = 0;
2557 result = smb_add_request(req);
2558 if (result < 0)
2559 goto out_free;
2560 if (req->rq_rcls != 0) {
2561 result = smb_errno(req);
2562#ifdef SMBFS_PARANOIA
2563 if (result != -ENOENT)
2564 PARANOIA("error for %s, rcls=%d, err=%d\n",
2565 mask, req->rq_rcls, req->rq_err);
2566#endif
2567 goto out_free;
2568 }
2569 /* Make sure we got enough data ... */
2570 result = -EINVAL;
2571 if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
2572 PARANOIA("bad result for %s, len=%d, count=%d\n",
2573 mask, req->rq_ldata, WVAL(req->rq_parm, 2));
2574 goto out_free;
2575 }
2576
2577 /*
2578 * Decode the response into the fattr ...
2579 */
2580 date = WVAL(req->rq_data, 0);
2581 time = WVAL(req->rq_data, 2);
2582 fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
2583 fattr->f_ctime.tv_nsec = 0;
2584
2585 date = WVAL(req->rq_data, 4);
2586 time = WVAL(req->rq_data, 6);
2587 fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
2588 fattr->f_atime.tv_nsec = 0;
2589
2590 date = WVAL(req->rq_data, 8);
2591 time = WVAL(req->rq_data, 10);
2592 fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
2593 fattr->f_mtime.tv_nsec = 0;
2594 VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
2595 mask, date, time, fattr->f_mtime.tv_sec);
2596 fattr->f_size = DVAL(req->rq_data, 12);
2597 /* ULONG allocation size */
2598 fattr->attr = WVAL(req->rq_data, 20);
2599 result = 0;
2600
2601out_free:
2602 smb_rput(req);
2603out:
2604 return result;
2605}
2606
2607static int
2608smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
2609 struct smb_fattr *fattr)
2610{
2611 int result;
2612 char *p;
2613 struct smb_request *req;
2614
2615 result = -ENOMEM;
2616 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2617 goto out;
2618
2619 p = smb_setup_header(req, SMBgetatr, 0, 0);
2620 result = smb_simple_encode_path(req, &p, dir, NULL);
2621 if (result < 0)
2622 goto out_free;
2623 smb_setup_bcc(req, p);
2624
2625 if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
2626 goto out_free;
2627 fattr->attr = WVAL(req->rq_header, smb_vwv0);
2628 fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
2629 fattr->f_mtime.tv_nsec = 0;
2630 fattr->f_size = DVAL(req->rq_header, smb_vwv3);
2631 fattr->f_ctime = fattr->f_mtime;
2632 fattr->f_atime = fattr->f_mtime;
2633#ifdef SMBFS_DEBUG_TIMESTAMP
2634 printk("getattr_core: %s/%s, mtime=%ld\n",
2635 DENTRY_PATH(dir), fattr->f_mtime);
2636#endif
2637 result = 0;
2638
2639out_free:
2640 smb_rput(req);
2641out:
2642 return result;
2643}
2644
2645/*
2646 * Bugs Noted:
2647 * (1) Win 95 swaps the date and time fields in the standard info level.
2648 */
2649static int
2650smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
2651 struct smb_request *req, int infolevel)
2652{
2653 char *p, *param;
2654 int result;
2655
2656 param = req->rq_buffer;
2657 WSET(param, 0, infolevel);
2658 DSET(param, 2, 0);
2659 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
2660 if (result < 0)
2661 goto out;
2662 p = param + 6 + result;
2663
2664 req->rq_trans2_command = TRANSACT2_QPATHINFO;
2665 req->rq_ldata = 0;
2666 req->rq_data = NULL;
2667 req->rq_lparm = p - param;
2668 req->rq_parm = param;
2669 req->rq_flags = 0;
2670 result = smb_add_request(req);
2671 if (result < 0)
2672 goto out;
2673 if (req->rq_rcls != 0) {
2674 VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
2675 &param[6], result, req->rq_rcls, req->rq_err);
2676 result = smb_errno(req);
2677 goto out;
2678 }
2679 result = -ENOENT;
2680 if (req->rq_ldata < 22) {
2681 PARANOIA("not enough data for %s, len=%d\n",
2682 &param[6], req->rq_ldata);
2683 goto out;
2684 }
2685
2686 result = 0;
2687out:
2688 return result;
2689}
2690
2691static int
2692smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
2693 struct smb_fattr *attr)
2694{
2695 u16 date, time;
2696 int off_date = 0, off_time = 2;
2697 int result;
2698 struct smb_request *req;
2699
2700 result = -ENOMEM;
2701 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2702 goto out;
2703
2704 result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
2705 if (result < 0)
2706 goto out_free;
2707
2708 /*
2709 * Kludge alert: Win 95 swaps the date and time field,
2710 * contrary to the CIFS docs and Win NT practice.
2711 */
2712 if (server->mnt->flags & SMB_MOUNT_WIN95) {
2713 off_date = 2;
2714 off_time = 0;
2715 }
2716 date = WVAL(req->rq_data, off_date);
2717 time = WVAL(req->rq_data, off_time);
2718 attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
2719 attr->f_ctime.tv_nsec = 0;
2720
2721 date = WVAL(req->rq_data, 4 + off_date);
2722 time = WVAL(req->rq_data, 4 + off_time);
2723 attr->f_atime.tv_sec = date_dos2unix(server, date, time);
2724 attr->f_atime.tv_nsec = 0;
2725
2726 date = WVAL(req->rq_data, 8 + off_date);
2727 time = WVAL(req->rq_data, 8 + off_time);
2728 attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
2729 attr->f_mtime.tv_nsec = 0;
2730#ifdef SMBFS_DEBUG_TIMESTAMP
2731 printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
2732 DENTRY_PATH(dir), date, time, attr->f_mtime);
2733#endif
2734 attr->f_size = DVAL(req->rq_data, 12);
2735 attr->attr = WVAL(req->rq_data, 20);
2736
2737out_free:
2738 smb_rput(req);
2739out:
2740 return result;
2741}
2742
2743static int
2744smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
2745 struct smb_fattr *attr)
2746{
2747 struct smb_request *req;
2748 int result;
2749
2750 result = -ENOMEM;
2751 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2752 goto out;
2753
2754 result = smb_proc_getattr_trans2(server, dir, req,
2755 SMB_QUERY_FILE_ALL_INFO);
2756 if (result < 0)
2757 goto out_free;
2758
2759 attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
2760 attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
2761 attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
2762 /* change (24) */
2763 attr->attr = WVAL(req->rq_data, 32);
2764 /* pad? (34) */
2765 /* allocated size (40) */
2766 attr->f_size = LVAL(req->rq_data, 48);
2767
2768out_free:
2769 smb_rput(req);
2770out:
2771 return result;
2772}
2773
2774static int
2775smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
2776 struct smb_fattr *attr)
2777{
2778 struct smb_request *req;
2779 int result;
2780
2781 result = -ENOMEM;
2782 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2783 goto out;
2784
2785 result = smb_proc_getattr_trans2(server, dir, req,
2786 SMB_QUERY_FILE_UNIX_BASIC);
2787 if (result < 0)
2788 goto out_free;
2789
2790 smb_decode_unix_basic(attr, server, req->rq_data);
2791
2792out_free:
2793 smb_rput(req);
2794out:
2795 return result;
2796}
2797
2798static int
2799smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
2800 struct smb_fattr *attr)
2801{
2802 struct inode *inode = dir->d_inode;
2803 int result;
2804
2805 /* FIXME: why not use the "all" version? */
2806 result = smb_proc_getattr_trans2_std(server, dir, attr);
2807 if (result < 0)
2808 goto out;
2809
2810 /*
2811 * None of the getattr versions here can make win9x return the right
2812 * filesize if there are changes made to an open file.
2813 * A seek-to-end does return the right size, but we only need to do
2814 * that on files we have written.
2815 */
2816 if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
2817 smb_is_open(inode))
2818 {
2819 __u16 fileid = SMB_I(inode)->fileid;
2820 attr->f_size = smb_proc_seek(server, fileid, 2, 0);
2821 }
2822
2823out:
2824 return result;
2825}
2826
2827static int
2828smb_proc_ops_wait(struct smb_sb_info *server)
2829{
2830 int result;
2831
2832 result = wait_event_interruptible_timeout(server->conn_wq,
2833 server->conn_complete, 30*HZ);
2834
2835 if (!result || signal_pending(current))
2836 return -EIO;
2837
2838 return 0;
2839}
2840
2841static int
2842smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
2843 struct smb_fattr *fattr)
2844{
2845 int result;
2846
2847 if (smb_proc_ops_wait(server) < 0)
2848 return -EIO;
2849
2850 smb_init_dirent(server, fattr);
2851 result = server->ops->getattr(server, dir, fattr);
2852 smb_finish_dirent(server, fattr);
2853
2854 return result;
2855}
2856
2857static int
2858smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
2859 struct smb_cache_control *ctl)
2860{
2861 struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
2862
2863 if (smb_proc_ops_wait(server) < 0)
2864 return -EIO;
2865
2866 return server->ops->readdir(filp, dirent, filldir, ctl);
2867}
2868
2869int
2870smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
2871{
2872 struct smb_sb_info *server = server_from_dentry(dir);
2873 int result;
2874
2875 smb_init_dirent(server, fattr);
2876 result = server->ops->getattr(server, dir, fattr);
2877 smb_finish_dirent(server, fattr);
2878
2879 return result;
2880}
2881
2882
2883/*
2884 * Because of bugs in the core protocol, we use this only to set
2885 * attributes. See smb_proc_settime() below for timestamp handling.
2886 *
2887 * Bugs Noted:
2888 * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
2889 * with an undocumented error (ERRDOS code 50). Setting
2890 * mtime to 0 allows the attributes to be set.
2891 * (2) The extra parameters following the name string aren't
2892 * in the CIFS docs, but seem to be necessary for operation.
2893 */
2894static int
2895smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
2896 __u16 attr)
2897{
2898 char *p;
2899 int result;
2900 struct smb_request *req;
2901
2902 result = -ENOMEM;
2903 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2904 goto out;
2905
2906 p = smb_setup_header(req, SMBsetatr, 8, 0);
2907 WSET(req->rq_header, smb_vwv0, attr);
2908 DSET(req->rq_header, smb_vwv1, 0); /* mtime */
2909 WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
2910 WSET(req->rq_header, smb_vwv4, 0);
2911 WSET(req->rq_header, smb_vwv5, 0);
2912 WSET(req->rq_header, smb_vwv6, 0);
2913 WSET(req->rq_header, smb_vwv7, 0);
2914 result = smb_simple_encode_path(req, &p, dentry, NULL);
2915 if (result < 0)
2916 goto out_free;
2917 if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
2918 result = -ENAMETOOLONG;
2919 goto out_free;
2920 }
2921 *p++ = 4;
2922 *p++ = 0;
2923 smb_setup_bcc(req, p);
2924
2925 result = smb_request_ok(req, SMBsetatr, 0, 0);
2926 if (result < 0)
2927 goto out_free;
2928 result = 0;
2929
2930out_free:
2931 smb_rput(req);
2932out:
2933 return result;
2934}
2935
2936/*
2937 * Because of bugs in the trans2 setattr messages, we must set
2938 * attributes and timestamps separately. The core SMBsetatr
2939 * message seems to be the only reliable way to set attributes.
2940 */
2941int
2942smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
2943{
2944 struct smb_sb_info *server = server_from_dentry(dir);
2945 int result;
2946
2947 VERBOSE("setting %s/%s, open=%d\n",
2948 DENTRY_PATH(dir), smb_is_open(dir->d_inode));
2949 result = smb_proc_setattr_core(server, dir, fattr->attr);
2950 return result;
2951}
2952
2953/*
2954 * Sets the timestamps for an file open with write permissions.
2955 */
2956static int
2957smb_proc_setattr_ext(struct smb_sb_info *server,
2958 struct inode *inode, struct smb_fattr *fattr)
2959{
2960 __u16 date, time;
2961 int result;
2962 struct smb_request *req;
2963
2964 result = -ENOMEM;
2965 if (! (req = smb_alloc_request(server, 0)))
2966 goto out;
2967
2968 smb_setup_header(req, SMBsetattrE, 7, 0);
2969 WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
2970 /* We don't change the creation time */
2971 WSET(req->rq_header, smb_vwv1, 0);
2972 WSET(req->rq_header, smb_vwv2, 0);
2973 date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
2974 WSET(req->rq_header, smb_vwv3, date);
2975 WSET(req->rq_header, smb_vwv4, time);
2976 date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
2977 WSET(req->rq_header, smb_vwv5, date);
2978 WSET(req->rq_header, smb_vwv6, time);
2979#ifdef SMBFS_DEBUG_TIMESTAMP
2980 printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
2981 date, time, fattr->f_mtime);
2982#endif
2983
2984 req->rq_flags |= SMB_REQ_NORETRY;
2985 result = smb_request_ok(req, SMBsetattrE, 0, 0);
2986 if (result < 0)
2987 goto out_free;
2988 result = 0;
2989out_free:
2990 smb_rput(req);
2991out:
2992 return result;
2993}
2994
2995/*
2996 * Bugs Noted:
2997 * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
2998 * set the file's attribute flags.
2999 */
3000static int
3001smb_proc_setattr_trans2(struct smb_sb_info *server,
3002 struct dentry *dir, struct smb_fattr *fattr)
3003{
3004 __u16 date, time;
3005 char *p, *param;
3006 int result;
3007 char data[26];
3008 struct smb_request *req;
3009
3010 result = -ENOMEM;
3011 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3012 goto out;
3013 param = req->rq_buffer;
3014
3015 WSET(param, 0, 1); /* Info level SMB_INFO_STANDARD */
3016 DSET(param, 2, 0);
3017 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
3018 if (result < 0)
3019 goto out_free;
3020 p = param + 6 + result;
3021
3022 WSET(data, 0, 0); /* creation time */
3023 WSET(data, 2, 0);
3024 date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
3025 WSET(data, 4, date);
3026 WSET(data, 6, time);
3027 date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
3028 WSET(data, 8, date);
3029 WSET(data, 10, time);
3030#ifdef SMBFS_DEBUG_TIMESTAMP
3031 printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
3032 DENTRY_PATH(dir), date, time, fattr->f_mtime);
3033#endif
3034 DSET(data, 12, 0); /* size */
3035 DSET(data, 16, 0); /* blksize */
3036 WSET(data, 20, 0); /* attr */
3037 DSET(data, 22, 0); /* ULONG EA size */
3038
3039 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3040 req->rq_ldata = 26;
3041 req->rq_data = data;
3042 req->rq_lparm = p - param;
3043 req->rq_parm = param;
3044 req->rq_flags = 0;
3045 result = smb_add_request(req);
3046 if (result < 0)
3047 goto out_free;
3048 result = 0;
3049 if (req->rq_rcls != 0)
3050 result = smb_errno(req);
3051
3052out_free:
3053 smb_rput(req);
3054out:
3055 return result;
3056}
3057
3058/*
3059 * ATTR_MODE 0x001
3060 * ATTR_UID 0x002
3061 * ATTR_GID 0x004
3062 * ATTR_SIZE 0x008
3063 * ATTR_ATIME 0x010
3064 * ATTR_MTIME 0x020
3065 * ATTR_CTIME 0x040
3066 * ATTR_ATIME_SET 0x080
3067 * ATTR_MTIME_SET 0x100
3068 * ATTR_FORCE 0x200
3069 * ATTR_ATTR_FLAG 0x400
3070 *
3071 * major/minor should only be set by mknod.
3072 */
3073int
3074smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
3075 unsigned int major, unsigned int minor)
3076{
3077 struct smb_sb_info *server = server_from_dentry(d);
3078 u64 nttime;
3079 char *p, *param;
3080 int result;
3081 char data[100];
3082 struct smb_request *req;
3083
3084 result = -ENOMEM;
3085 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3086 goto out;
3087 param = req->rq_buffer;
3088
3089 DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
3090
3091 WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
3092 DSET(param, 2, 0);
3093 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
3094 if (result < 0)
3095 goto out_free;
3096 p = param + 6 + result;
3097
3098 /* 0 L file size in bytes */
3099 /* 8 L file size on disk in bytes (block count) */
3100 /* 40 L uid */
3101 /* 48 L gid */
3102 /* 56 W file type enum */
3103 /* 60 L devmajor */
3104 /* 68 L devminor */
3105 /* 76 L unique ID (inode) */
3106 /* 84 L permissions */
3107 /* 92 L link count */
3108 LSET(data, 0, SMB_SIZE_NO_CHANGE);
3109 LSET(data, 8, SMB_SIZE_NO_CHANGE);
3110 LSET(data, 16, SMB_TIME_NO_CHANGE);
3111 LSET(data, 24, SMB_TIME_NO_CHANGE);
3112 LSET(data, 32, SMB_TIME_NO_CHANGE);
3113 LSET(data, 40, SMB_UID_NO_CHANGE);
3114 LSET(data, 48, SMB_GID_NO_CHANGE);
3115 DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
3116 LSET(data, 60, major);
3117 LSET(data, 68, minor);
3118 LSET(data, 76, 0);
3119 LSET(data, 84, SMB_MODE_NO_CHANGE);
3120 LSET(data, 92, 0);
3121
3122 if (attr->ia_valid & ATTR_SIZE) {
3123 LSET(data, 0, attr->ia_size);
3124 LSET(data, 8, 0); /* can't set anyway */
3125 }
3126
3127 /*
3128 * FIXME: check the conversion function it the correct one
3129 *
3130 * we can't set ctime but we might as well pass this to the server
3131 * and let it ignore it.
3132 */
3133 if (attr->ia_valid & ATTR_CTIME) {
3134 nttime = smb_unixutc2ntutc(attr->ia_ctime);
3135 LSET(data, 16, nttime);
3136 }
3137 if (attr->ia_valid & ATTR_ATIME) {
3138 nttime = smb_unixutc2ntutc(attr->ia_atime);
3139 LSET(data, 24, nttime);
3140 }
3141 if (attr->ia_valid & ATTR_MTIME) {
3142 nttime = smb_unixutc2ntutc(attr->ia_mtime);
3143 LSET(data, 32, nttime);
3144 }
3145
3146 if (attr->ia_valid & ATTR_UID) {
3147 LSET(data, 40, attr->ia_uid);
3148 }
3149 if (attr->ia_valid & ATTR_GID) {
3150 LSET(data, 48, attr->ia_gid);
3151 }
3152
3153 if (attr->ia_valid & ATTR_MODE) {
3154 LSET(data, 84, attr->ia_mode);
3155 }
3156
3157 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3158 req->rq_ldata = 100;
3159 req->rq_data = data;
3160 req->rq_lparm = p - param;
3161 req->rq_parm = param;
3162 req->rq_flags = 0;
3163 result = smb_add_request(req);
3164
3165out_free:
3166 smb_rput(req);
3167out:
3168 return result;
3169}
3170
3171
3172/*
3173 * Set the modify and access timestamps for a file.
3174 *
3175 * Incredibly enough, in all of SMB there is no message to allow
3176 * setting both attributes and timestamps at once.
3177 *
3178 * Bugs Noted:
3179 * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message
3180 * with info level 1 (INFO_STANDARD).
3181 * (2) Win 95 seems not to support setting directory timestamps.
3182 * (3) Under the core protocol apparently the only way to set the
3183 * timestamp is to open and close the file.
3184 */
3185int
3186smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
3187{
3188 struct smb_sb_info *server = server_from_dentry(dentry);
3189 struct inode *inode = dentry->d_inode;
3190 int result;
3191
3192 VERBOSE("setting %s/%s, open=%d\n",
3193 DENTRY_PATH(dentry), smb_is_open(inode));
3194
3195 /* setting the time on a Win95 server fails (tridge) */
3196 if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 &&
3197 !(server->mnt->flags & SMB_MOUNT_WIN95)) {
3198 if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
3199 result = smb_proc_setattr_ext(server, inode, fattr);
3200 else
3201 result = smb_proc_setattr_trans2(server, dentry, fattr);
3202 } else {
3203 /*
3204 * Fail silently on directories ... timestamp can't be set?
3205 */
3206 result = 0;
3207 if (S_ISREG(inode->i_mode)) {
3208 /*
3209 * Set the mtime by opening and closing the file.
3210 * Note that the file is opened read-only, but this
3211 * still allows us to set the date (tridge)
3212 */
3213 result = -EACCES;
3214 if (!smb_is_open(inode))
3215 smb_proc_open(server, dentry, SMB_O_RDONLY);
3216 if (smb_is_open(inode)) {
3217 inode->i_mtime = fattr->f_mtime;
3218 result = smb_proc_close_inode(server, inode);
3219 }
3220 }
3221 }
3222
3223 return result;
3224}
3225
3226int
3227smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
3228{
3229 struct smb_sb_info *server = SMB_SB(dentry->d_sb);
3230 int result;
3231 char *p;
3232 long unit;
3233 struct smb_request *req;
3234
3235 result = -ENOMEM;
3236 if (! (req = smb_alloc_request(server, 0)))
3237 goto out;
3238
3239 smb_setup_header(req, SMBdskattr, 0, 0);
3240 if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
3241 goto out_free;
3242 p = SMB_VWV(req->rq_header);
3243 unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
3244 attr->f_blocks = WVAL(p, 0) * unit;
3245 attr->f_bsize = SMB_ST_BLKSIZE;
3246 attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
3247 result = 0;
3248
3249out_free:
3250 smb_rput(req);
3251out:
3252 return result;
3253}
3254
3255int
3256smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
3257 char *buffer, int len)
3258{
3259 char *p, *param;
3260 int result;
3261 struct smb_request *req;
3262
3263 DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
3264
3265 result = -ENOMEM;
3266 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3267 goto out;
3268 param = req->rq_buffer;
3269
3270 WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
3271 DSET(param, 2, 0);
3272 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
3273 if (result < 0)
3274 goto out_free;
3275 p = param + 6 + result;
3276
3277 req->rq_trans2_command = TRANSACT2_QPATHINFO;
3278 req->rq_ldata = 0;
3279 req->rq_data = NULL;
3280 req->rq_lparm = p - param;
3281 req->rq_parm = param;
3282 req->rq_flags = 0;
3283 result = smb_add_request(req);
3284 if (result < 0)
3285 goto out_free;
3286 DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
3287 &param[6], result, req->rq_rcls, req->rq_err);
3288
3289 /* copy data up to the \0 or buffer length */
3290 result = len;
3291 if (req->rq_ldata < len)
3292 result = req->rq_ldata;
3293 strncpy(buffer, req->rq_data, result);
3294
3295out_free:
3296 smb_rput(req);
3297out:
3298 return result;
3299}
3300
3301
3302/*
3303 * Create a symlink object called dentry which points to oldpath.
3304 * Samba does not permit dangling links but returns a suitable error message.
3305 */
3306int
3307smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
3308 const char *oldpath)
3309{
3310 char *p, *param;
3311 int result;
3312 struct smb_request *req;
3313
3314 result = -ENOMEM;
3315 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3316 goto out;
3317 param = req->rq_buffer;
3318
3319 WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
3320 DSET(param, 2, 0);
3321 result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
3322 if (result < 0)
3323 goto out_free;
3324 p = param + 6 + result;
3325
3326 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3327 req->rq_ldata = strlen(oldpath) + 1;
3328 req->rq_data = (char *) oldpath;
3329 req->rq_lparm = p - param;
3330 req->rq_parm = param;
3331 req->rq_flags = 0;
3332 result = smb_add_request(req);
3333 if (result < 0)
3334 goto out_free;
3335
3336 DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
3337 &param[6], result, req->rq_rcls, req->rq_err);
3338 result = 0;
3339
3340out_free:
3341 smb_rput(req);
3342out:
3343 return result;
3344}
3345
3346/*
3347 * Create a hard link object called new_dentry which points to dentry.
3348 */
3349int
3350smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
3351 struct dentry *new_dentry)
3352{
3353 char *p, *param;
3354 int result;
3355 struct smb_request *req;
3356
3357 result = -ENOMEM;
3358 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3359 goto out;
3360 param = req->rq_buffer;
3361
3362 WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
3363 DSET(param, 2, 0);
3364 result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
3365 new_dentry, NULL);
3366 if (result < 0)
3367 goto out_free;
3368 p = param + 6 + result;
3369
3370 /* Grr, pointless separation of parameters and data ... */
3371 req->rq_data = p;
3372 req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
3373 dentry, NULL);
3374
3375 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3376 req->rq_lparm = p - param;
3377 req->rq_parm = param;
3378 req->rq_flags = 0;
3379 result = smb_add_request(req);
3380 if (result < 0)
3381 goto out_free;
3382
3383 DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
3384 &param[6], result, req->rq_rcls, req->rq_err);
3385 result = 0;
3386
3387out_free:
3388 smb_rput(req);
3389out:
3390 return result;
3391}
3392
3393static int
3394smb_proc_query_cifsunix(struct smb_sb_info *server)
3395{
3396 int result;
3397 int major, minor;
3398 u64 caps;
3399 char param[2];
3400 struct smb_request *req;
3401
3402 result = -ENOMEM;
3403 if (! (req = smb_alloc_request(server, 100)))
3404 goto out;
3405
3406 WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
3407
3408 req->rq_trans2_command = TRANSACT2_QFSINFO;
3409 req->rq_ldata = 0;
3410 req->rq_data = NULL;
3411 req->rq_lparm = 2;
3412 req->rq_parm = param;
3413 req->rq_flags = 0;
3414 result = smb_add_request(req);
3415 if (result < 0)
3416 goto out_free;
3417
3418 if (req->rq_ldata < 12) {
3419 PARANOIA("Not enough data\n");
3420 goto out_free;
3421 }
3422 major = WVAL(req->rq_data, 0);
3423 minor = WVAL(req->rq_data, 2);
3424
3425 DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
3426 major, minor);
3427 /* FIXME: verify that we are ok with this major/minor? */
3428
3429 caps = LVAL(req->rq_data, 4);
3430 DEBUG1("Server capabilities 0x%016llx\n", caps);
3431
3432out_free:
3433 smb_rput(req);
3434out:
3435 return result;
3436}
3437
3438
3439static void
3440install_ops(struct smb_ops *dst, struct smb_ops *src)
3441{
3442 memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
3443}
3444
3445/* < LANMAN2 */
3446static struct smb_ops smb_ops_core =
3447{
3448 .read = smb_proc_read,
3449 .write = smb_proc_write,
3450 .readdir = smb_proc_readdir_short,
3451 .getattr = smb_proc_getattr_core,
3452 .truncate = smb_proc_trunc32,
3453};
3454
3455/* LANMAN2, OS/2, others? */
3456static struct smb_ops smb_ops_os2 =
3457{
3458 .read = smb_proc_read,
3459 .write = smb_proc_write,
3460 .readdir = smb_proc_readdir_long,
3461 .getattr = smb_proc_getattr_trans2_std,
3462 .truncate = smb_proc_trunc32,
3463};
3464
3465/* Win95, and possibly some NetApp versions too */
3466static struct smb_ops smb_ops_win95 =
3467{
3468 .read = smb_proc_read, /* does not support 12word readX */
3469 .write = smb_proc_write,
3470 .readdir = smb_proc_readdir_long,
3471 .getattr = smb_proc_getattr_95,
3472 .truncate = smb_proc_trunc95,
3473};
3474
3475/* Samba, NT4 and NT5 */
3476static struct smb_ops smb_ops_winNT =
3477{
3478 .read = smb_proc_readX,
3479 .write = smb_proc_writeX,
3480 .readdir = smb_proc_readdir_long,
3481 .getattr = smb_proc_getattr_trans2_all,
3482 .truncate = smb_proc_trunc64,
3483};
3484
3485/* Samba w/ unix extensions. Others? */
3486static struct smb_ops smb_ops_unix =
3487{
3488 .read = smb_proc_readX,
3489 .write = smb_proc_writeX,
3490 .readdir = smb_proc_readdir_long,
3491 .getattr = smb_proc_getattr_unix,
3492 /* FIXME: core/ext/time setattr needs to be cleaned up! */
3493 /* .setattr = smb_proc_setattr_unix, */
3494 .truncate = smb_proc_trunc64,
3495};
3496
3497/* Place holder until real ops are in place */
3498static struct smb_ops smb_ops_null =
3499{
3500 .readdir = smb_proc_readdir_null,
3501 .getattr = smb_proc_getattr_null,
3502};
3503
3504void smb_install_null_ops(struct smb_ops *ops)
3505{
3506 install_ops(ops, &smb_ops_null);
3507}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
deleted file mode 100644
index 05939a6f43e6..000000000000
--- a/fs/smbfs/proto.h
+++ /dev/null
@@ -1,87 +0,0 @@
1/*
2 * Autogenerated with cproto on: Sat Sep 13 17:18:51 CEST 2003
3 */
4
5struct smb_request;
6struct sock;
7struct statfs;
8
9/* proc.c */
10extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
11extern __u32 smb_len(__u8 *p);
12extern int smb_get_rsize(struct smb_sb_info *server);
13extern int smb_get_wsize(struct smb_sb_info *server);
14extern int smb_errno(struct smb_request *req);
15extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
16extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
17extern int smb_open(struct dentry *dentry, int wish);
18extern int smb_close(struct inode *ino);
19extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
20extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
21extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
22extern int smb_proc_mkdir(struct dentry *dentry);
23extern int smb_proc_rmdir(struct dentry *dentry);
24extern int smb_proc_unlink(struct dentry *dentry);
25extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
26extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
27 struct super_block *sb);
28extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
29extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
30extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
31extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
32extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
33extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
34extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
35extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
36extern void smb_install_null_ops(struct smb_ops *ops);
37/* dir.c */
38extern const struct file_operations smb_dir_operations;
39extern const struct inode_operations smb_dir_inode_operations;
40extern const struct inode_operations smb_dir_inode_operations_unix;
41extern void smb_new_dentry(struct dentry *dentry);
42extern void smb_renew_times(struct dentry *dentry);
43/* cache.c */
44extern void smb_invalid_dir_cache(struct inode *dir);
45extern void smb_invalidate_dircache_entries(struct dentry *parent);
46extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
47extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
48/* sock.c */
49extern void smb_data_ready(struct sock *sk, int len);
50extern int smb_valid_socket(struct inode *inode);
51extern void smb_close_socket(struct smb_sb_info *server);
52extern int smb_recv_available(struct smb_sb_info *server);
53extern int smb_receive_header(struct smb_sb_info *server);
54extern int smb_receive_drop(struct smb_sb_info *server);
55extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
56extern int smb_send_request(struct smb_request *req);
57/* inode.c */
58extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
59extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
60extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
61extern void smb_invalidate_inodes(struct smb_sb_info *server);
62extern int smb_revalidate_inode(struct dentry *dentry);
63extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
64extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
65/* file.c */
66extern const struct address_space_operations smb_file_aops;
67extern const struct file_operations smb_file_operations;
68extern const struct inode_operations smb_file_inode_operations;
69/* ioctl.c */
70extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
71/* smbiod.c */
72extern void smbiod_wake_up(void);
73extern int smbiod_register_server(struct smb_sb_info *server);
74extern void smbiod_unregister_server(struct smb_sb_info *server);
75extern void smbiod_flush(struct smb_sb_info *server);
76extern int smbiod_retry(struct smb_sb_info *server);
77/* request.c */
78extern int smb_init_request_cache(void);
79extern void smb_destroy_request_cache(void);
80extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
81extern void smb_rput(struct smb_request *req);
82extern int smb_add_request(struct smb_request *req);
83extern int smb_request_send_server(struct smb_sb_info *server);
84extern int smb_request_recv(struct smb_sb_info *server);
85/* symlink.c */
86extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
87extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
deleted file mode 100644
index 45f45933e862..000000000000
--- a/fs/smbfs/request.c
+++ /dev/null
@@ -1,818 +0,0 @@
1/*
2 * request.c
3 *
4 * Copyright (C) 2001 by Urban Widmark
5 *
6 * Please add a note about your changes to smbfs in the ChangeLog file.
7 */
8
9#include <linux/kernel.h>
10#include <linux/types.h>
11#include <linux/fs.h>
12#include <linux/slab.h>
13#include <linux/net.h>
14#include <linux/sched.h>
15
16#include <linux/smb_fs.h>
17#include <linux/smbno.h>
18#include <linux/smb_mount.h>
19
20#include "smb_debug.h"
21#include "request.h"
22#include "proto.h"
23
24/* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */
25#define SMB_SLAB_DEBUG 0
26
27/* cache for request structures */
28static struct kmem_cache *req_cachep;
29
30static int smb_request_send_req(struct smb_request *req);
31
32/*
33 /proc/slabinfo:
34 name, active, num, objsize, active_slabs, num_slaps, #pages
35*/
36
37
38int smb_init_request_cache(void)
39{
40 req_cachep = kmem_cache_create("smb_request",
41 sizeof(struct smb_request), 0,
42 SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
43 NULL);
44 if (req_cachep == NULL)
45 return -ENOMEM;
46
47 return 0;
48}
49
50void smb_destroy_request_cache(void)
51{
52 kmem_cache_destroy(req_cachep);
53}
54
55/*
56 * Allocate and initialise a request structure
57 */
58static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
59 int bufsize)
60{
61 struct smb_request *req;
62 unsigned char *buf = NULL;
63
64 req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
65 VERBOSE("allocating request: %p\n", req);
66 if (!req)
67 goto out;
68
69 if (bufsize > 0) {
70 buf = kmalloc(bufsize, GFP_NOFS);
71 if (!buf) {
72 kmem_cache_free(req_cachep, req);
73 return NULL;
74 }
75 }
76
77 req->rq_buffer = buf;
78 req->rq_bufsize = bufsize;
79 req->rq_server = server;
80 init_waitqueue_head(&req->rq_wait);
81 INIT_LIST_HEAD(&req->rq_queue);
82 atomic_set(&req->rq_count, 1);
83
84out:
85 return req;
86}
87
88struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
89{
90 struct smb_request *req = NULL;
91
92 for (;;) {
93 atomic_inc(&server->nr_requests);
94 if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
95 req = smb_do_alloc_request(server, bufsize);
96 if (req != NULL)
97 break;
98 }
99
100#if 0
101 /*
102 * Try to free up at least one request in order to stay
103 * below the hard limit
104 */
105 if (nfs_try_to_free_pages(server))
106 continue;
107
108 if (fatal_signal_pending(current))
109 return ERR_PTR(-ERESTARTSYS);
110 current->policy = SCHED_YIELD;
111 schedule();
112#else
113 /* FIXME: we want something like nfs does above, but that
114 requires changes to all callers and can wait. */
115 break;
116#endif
117 }
118 return req;
119}
120
121static void smb_free_request(struct smb_request *req)
122{
123 atomic_dec(&req->rq_server->nr_requests);
124 if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
125 kfree(req->rq_buffer);
126 kfree(req->rq_trans2buffer);
127 kmem_cache_free(req_cachep, req);
128}
129
130/*
131 * What prevents a rget to race with a rput? The count must never drop to zero
132 * while it is in use. Only rput if it is ok that it is free'd.
133 */
134static void smb_rget(struct smb_request *req)
135{
136 atomic_inc(&req->rq_count);
137}
138void smb_rput(struct smb_request *req)
139{
140 if (atomic_dec_and_test(&req->rq_count)) {
141 list_del_init(&req->rq_queue);
142 smb_free_request(req);
143 }
144}
145
146/* setup to receive the data part of the SMB */
147static int smb_setup_bcc(struct smb_request *req)
148{
149 int result = 0;
150 req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
151
152 if (req->rq_rlen > req->rq_bufsize) {
153 PARANOIA("Packet too large %d > %d\n",
154 req->rq_rlen, req->rq_bufsize);
155 return -ENOBUFS;
156 }
157
158 req->rq_iov[0].iov_base = req->rq_buffer;
159 req->rq_iov[0].iov_len = req->rq_rlen;
160 req->rq_iovlen = 1;
161
162 return result;
163}
164
165/*
166 * Prepare a "normal" request structure.
167 */
168static int smb_setup_request(struct smb_request *req)
169{
170 int len = smb_len(req->rq_header) + 4;
171 req->rq_slen = len;
172
173 /* if we expect a data part in the reply we set the iov's to read it */
174 if (req->rq_resp_bcc)
175 req->rq_setup_read = smb_setup_bcc;
176
177 /* This tries to support re-using the same request */
178 req->rq_bytes_sent = 0;
179 req->rq_rcls = 0;
180 req->rq_err = 0;
181 req->rq_errno = 0;
182 req->rq_fragment = 0;
183 kfree(req->rq_trans2buffer);
184 req->rq_trans2buffer = NULL;
185
186 return 0;
187}
188
189/*
190 * Prepare a transaction2 request structure
191 */
192static int smb_setup_trans2request(struct smb_request *req)
193{
194 struct smb_sb_info *server = req->rq_server;
195 int mparam, mdata;
196 static unsigned char padding[4];
197
198 /* I know the following is very ugly, but I want to build the
199 smb packet as efficiently as possible. */
200
201 const int smb_parameters = 15;
202 const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
203 const int oparam = ALIGN(header + 3, sizeof(u32));
204 const int odata = ALIGN(oparam + req->rq_lparm, sizeof(u32));
205 const int bcc = (req->rq_data ? odata + req->rq_ldata :
206 oparam + req->rq_lparm) - header;
207
208 if ((bcc + oparam) > server->opt.max_xmit)
209 return -ENOMEM;
210 smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
211
212 /*
213 * max parameters + max data + max setup == bufsize to make NT4 happy
214 * and not abort the transfer or split into multiple responses. It also
215 * makes smbfs happy as handling packets larger than the buffer size
216 * is extra work.
217 *
218 * OS/2 is probably going to hate me for this ...
219 */
220 mparam = SMB_TRANS2_MAX_PARAM;
221 mdata = req->rq_bufsize - mparam;
222
223 mdata = server->opt.max_xmit - mparam - 100;
224 if (mdata < 1024) {
225 mdata = 1024;
226 mparam = 20;
227 }
228
229#if 0
230 /* NT/win2k has ~4k max_xmit, so with this we request more than it wants
231 to return as one SMB. Useful for testing the fragmented trans2
232 handling. */
233 mdata = 8192;
234#endif
235
236 WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
237 WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
238 WSET(req->rq_header, smb_mprcnt, mparam);
239 WSET(req->rq_header, smb_mdrcnt, mdata);
240 WSET(req->rq_header, smb_msrcnt, 0); /* max setup always 0 ? */
241 WSET(req->rq_header, smb_flags, 0);
242 DSET(req->rq_header, smb_timeout, 0);
243 WSET(req->rq_header, smb_pscnt, req->rq_lparm);
244 WSET(req->rq_header, smb_psoff, oparam - 4);
245 WSET(req->rq_header, smb_dscnt, req->rq_ldata);
246 WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
247 *(req->rq_header + smb_suwcnt) = 0x01; /* setup count */
248 *(req->rq_header + smb_suwcnt + 1) = 0x00; /* reserved */
249 WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
250
251 req->rq_iovlen = 2;
252 req->rq_iov[0].iov_base = (void *) req->rq_header;
253 req->rq_iov[0].iov_len = oparam;
254 req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
255 req->rq_iov[1].iov_len = req->rq_lparm;
256 req->rq_slen = oparam + req->rq_lparm;
257
258 if (req->rq_data) {
259 req->rq_iovlen += 2;
260 req->rq_iov[2].iov_base = padding;
261 req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
262 req->rq_iov[3].iov_base = req->rq_data;
263 req->rq_iov[3].iov_len = req->rq_ldata;
264 req->rq_slen = odata + req->rq_ldata;
265 }
266
267 /* always a data part for trans2 replies */
268 req->rq_setup_read = smb_setup_bcc;
269
270 return 0;
271}
272
273/*
274 * Add a request and tell smbiod to process it
275 */
276int smb_add_request(struct smb_request *req)
277{
278 long timeleft;
279 struct smb_sb_info *server = req->rq_server;
280 int result = 0;
281
282 smb_setup_request(req);
283 if (req->rq_trans2_command) {
284 if (req->rq_buffer == NULL) {
285 PARANOIA("trans2 attempted without response buffer!\n");
286 return -EIO;
287 }
288 result = smb_setup_trans2request(req);
289 }
290 if (result < 0)
291 return result;
292
293#ifdef SMB_DEBUG_PACKET_SIZE
294 add_xmit_stats(req);
295#endif
296
297 /* add 'req' to the queue of requests */
298 if (smb_lock_server_interruptible(server))
299 return -EINTR;
300
301 /*
302 * Try to send the request as the process. If that fails we queue the
303 * request and let smbiod send it later.
304 */
305
306 /* FIXME: each server has a number on the maximum number of parallel
307 requests. 10, 50 or so. We should not allow more requests to be
308 active. */
309 if (server->mid > 0xf000)
310 server->mid = 0;
311 req->rq_mid = server->mid++;
312 WSET(req->rq_header, smb_mid, req->rq_mid);
313
314 result = 0;
315 if (server->state == CONN_VALID) {
316 if (list_empty(&server->xmitq))
317 result = smb_request_send_req(req);
318 if (result < 0) {
319 /* Connection lost? */
320 server->conn_error = result;
321 server->state = CONN_INVALID;
322 }
323 }
324 if (result != 1)
325 list_add_tail(&req->rq_queue, &server->xmitq);
326 smb_rget(req);
327
328 if (server->state != CONN_VALID)
329 smbiod_retry(server);
330
331 smb_unlock_server(server);
332
333 smbiod_wake_up();
334
335 timeleft = wait_event_interruptible_timeout(req->rq_wait,
336 req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
337 if (!timeleft || signal_pending(current)) {
338 /*
339 * On timeout or on interrupt we want to try and remove the
340 * request from the recvq/xmitq.
341 * First check if the request is still part of a queue. (May
342 * have been removed by some error condition)
343 */
344 smb_lock_server(server);
345 if (!list_empty(&req->rq_queue)) {
346 list_del_init(&req->rq_queue);
347 smb_rput(req);
348 }
349 smb_unlock_server(server);
350 }
351
352 if (!timeleft) {
353 PARANOIA("request [%p, mid=%d] timed out!\n",
354 req, req->rq_mid);
355 VERBOSE("smb_com: %02x\n", *(req->rq_header + smb_com));
356 VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
357 VERBOSE("smb_flg: %02x\n", *(req->rq_header + smb_flg));
358 VERBOSE("smb_tid: %04x\n", WVAL(req->rq_header, smb_tid));
359 VERBOSE("smb_pid: %04x\n", WVAL(req->rq_header, smb_pid));
360 VERBOSE("smb_uid: %04x\n", WVAL(req->rq_header, smb_uid));
361 VERBOSE("smb_mid: %04x\n", WVAL(req->rq_header, smb_mid));
362 VERBOSE("smb_wct: %02x\n", *(req->rq_header + smb_wct));
363
364 req->rq_rcls = ERRSRV;
365 req->rq_err = ERRtimeout;
366
367 /* Just in case it was "stuck" */
368 smbiod_wake_up();
369 }
370 VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
371
372 if (req->rq_rcls != 0)
373 req->rq_errno = smb_errno(req);
374 if (signal_pending(current))
375 req->rq_errno = -ERESTARTSYS;
376 return req->rq_errno;
377}
378
379/*
380 * Send a request and place it on the recvq if successfully sent.
381 * Must be called with the server lock held.
382 */
383static int smb_request_send_req(struct smb_request *req)
384{
385 struct smb_sb_info *server = req->rq_server;
386 int result;
387
388 if (req->rq_bytes_sent == 0) {
389 WSET(req->rq_header, smb_tid, server->opt.tid);
390 WSET(req->rq_header, smb_pid, 1);
391 WSET(req->rq_header, smb_uid, server->opt.server_uid);
392 }
393
394 result = smb_send_request(req);
395 if (result < 0 && result != -EAGAIN)
396 goto out;
397
398 result = 0;
399 if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
400 goto out;
401
402 list_move_tail(&req->rq_queue, &server->recvq);
403 result = 1;
404out:
405 return result;
406}
407
408/*
409 * Sends one request for this server. (smbiod)
410 * Must be called with the server lock held.
411 * Returns: <0 on error
412 * 0 if no request could be completely sent
413 * 1 if all data for one request was sent
414 */
415int smb_request_send_server(struct smb_sb_info *server)
416{
417 struct list_head *head;
418 struct smb_request *req;
419 int result;
420
421 if (server->state != CONN_VALID)
422 return 0;
423
424 /* dequeue first request, if any */
425 req = NULL;
426 head = server->xmitq.next;
427 if (head != &server->xmitq) {
428 req = list_entry(head, struct smb_request, rq_queue);
429 }
430 if (!req)
431 return 0;
432
433 result = smb_request_send_req(req);
434 if (result < 0) {
435 server->conn_error = result;
436 list_move(&req->rq_queue, &server->xmitq);
437 result = -EIO;
438 goto out;
439 }
440
441out:
442 return result;
443}
444
445/*
446 * Try to find a request matching this "mid". Typically the first entry will
447 * be the matching one.
448 */
449static struct smb_request *find_request(struct smb_sb_info *server, int mid)
450{
451 struct list_head *tmp;
452 struct smb_request *req = NULL;
453
454 list_for_each(tmp, &server->recvq) {
455 req = list_entry(tmp, struct smb_request, rq_queue);
456 if (req->rq_mid == mid) {
457 break;
458 }
459 req = NULL;
460 }
461
462 if (!req) {
463 VERBOSE("received reply with mid %d but no request!\n",
464 WVAL(server->header, smb_mid));
465 server->rstate = SMB_RECV_DROP;
466 }
467
468 return req;
469}
470
471/*
472 * Called when we have read the smb header and believe this is a response.
473 */
474static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
475{
476 int hdrlen, wct;
477
478 memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
479
480 wct = *(req->rq_header + smb_wct);
481 if (wct > 20) {
482 PARANOIA("wct too large, %d > 20\n", wct);
483 server->rstate = SMB_RECV_DROP;
484 return 0;
485 }
486
487 req->rq_resp_wct = wct;
488 hdrlen = SMB_HEADER_LEN + wct*2 + 2;
489 VERBOSE("header length: %d smb_wct: %2d\n", hdrlen, wct);
490
491 req->rq_bytes_recvd = SMB_HEADER_LEN;
492 req->rq_rlen = hdrlen;
493 req->rq_iov[0].iov_base = req->rq_header;
494 req->rq_iov[0].iov_len = hdrlen;
495 req->rq_iovlen = 1;
496 server->rstate = SMB_RECV_PARAM;
497
498#ifdef SMB_DEBUG_PACKET_SIZE
499 add_recv_stats(smb_len(server->header));
500#endif
501 return 0;
502}
503
504/*
505 * Reads the SMB parameters
506 */
507static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
508{
509 int result;
510
511 result = smb_receive(server, req);
512 if (result < 0)
513 return result;
514 if (req->rq_bytes_recvd < req->rq_rlen)
515 return 0;
516
517 VERBOSE("result: %d smb_bcc: %04x\n", result,
518 WVAL(req->rq_header, SMB_HEADER_LEN +
519 (*(req->rq_header + smb_wct) * 2)));
520
521 result = 0;
522 req->rq_iov[0].iov_base = NULL;
523 req->rq_rlen = 0;
524 if (req->rq_callback)
525 req->rq_callback(req);
526 else if (req->rq_setup_read)
527 result = req->rq_setup_read(req);
528 if (result < 0) {
529 server->rstate = SMB_RECV_DROP;
530 return result;
531 }
532
533 server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
534
535 req->rq_bytes_recvd = 0; // recvd out of the iov
536
537 VERBOSE("rlen: %d\n", req->rq_rlen);
538 if (req->rq_rlen < 0) {
539 PARANOIA("Parameters read beyond end of packet!\n");
540 server->rstate = SMB_RECV_END;
541 return -EIO;
542 }
543 return 0;
544}
545
546/*
547 * Reads the SMB data
548 */
549static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
550{
551 int result;
552
553 result = smb_receive(server, req);
554 if (result < 0)
555 goto out;
556 if (req->rq_bytes_recvd < req->rq_rlen)
557 goto out;
558 server->rstate = SMB_RECV_END;
559out:
560 VERBOSE("result: %d\n", result);
561 return result;
562}
563
564/*
565 * Receive a transaction2 response
566 * Return: 0 if the response has been fully read
567 * 1 if there are further "fragments" to read
568 * <0 if there is an error
569 */
570static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
571{
572 unsigned char *inbuf;
573 unsigned int parm_disp, parm_offset, parm_count, parm_tot;
574 unsigned int data_disp, data_offset, data_count, data_tot;
575 int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
576
577 VERBOSE("handling trans2\n");
578
579 inbuf = req->rq_header;
580 data_tot = WVAL(inbuf, smb_tdrcnt);
581 parm_tot = WVAL(inbuf, smb_tprcnt);
582 parm_disp = WVAL(inbuf, smb_prdisp);
583 parm_offset = WVAL(inbuf, smb_proff);
584 parm_count = WVAL(inbuf, smb_prcnt);
585 data_disp = WVAL(inbuf, smb_drdisp);
586 data_offset = WVAL(inbuf, smb_droff);
587 data_count = WVAL(inbuf, smb_drcnt);
588
589 /* Modify offset for the split header/buffer we use */
590 if (data_count || data_offset) {
591 if (unlikely(data_offset < hdrlen))
592 goto out_bad_data;
593 else
594 data_offset -= hdrlen;
595 }
596 if (parm_count || parm_offset) {
597 if (unlikely(parm_offset < hdrlen))
598 goto out_bad_parm;
599 else
600 parm_offset -= hdrlen;
601 }
602
603 if (parm_count == parm_tot && data_count == data_tot) {
604 /*
605 * This packet has all the trans2 data.
606 *
607 * We setup the request so that this will be the common
608 * case. It may be a server error to not return a
609 * response that fits.
610 */
611 VERBOSE("single trans2 response "
612 "dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
613 data_count, parm_count,
614 data_offset, parm_offset);
615 req->rq_ldata = data_count;
616 req->rq_lparm = parm_count;
617 req->rq_data = req->rq_buffer + data_offset;
618 req->rq_parm = req->rq_buffer + parm_offset;
619 if (unlikely(parm_offset + parm_count > req->rq_rlen))
620 goto out_bad_parm;
621 if (unlikely(data_offset + data_count > req->rq_rlen))
622 goto out_bad_data;
623 return 0;
624 }
625
626 VERBOSE("multi trans2 response "
627 "frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
628 req->rq_fragment,
629 data_count, parm_count,
630 data_offset, parm_offset);
631
632 if (!req->rq_fragment) {
633 int buf_len;
634
635 /* We got the first trans2 fragment */
636 req->rq_fragment = 1;
637 req->rq_total_data = data_tot;
638 req->rq_total_parm = parm_tot;
639 req->rq_ldata = 0;
640 req->rq_lparm = 0;
641
642 buf_len = data_tot + parm_tot;
643 if (buf_len > SMB_MAX_PACKET_SIZE)
644 goto out_too_long;
645
646 req->rq_trans2bufsize = buf_len;
647 req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
648 if (!req->rq_trans2buffer)
649 goto out_no_mem;
650
651 req->rq_parm = req->rq_trans2buffer;
652 req->rq_data = req->rq_trans2buffer + parm_tot;
653 } else if (unlikely(req->rq_total_data < data_tot ||
654 req->rq_total_parm < parm_tot))
655 goto out_data_grew;
656
657 if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
658 parm_offset + parm_count > req->rq_rlen))
659 goto out_bad_parm;
660 if (unlikely(data_disp + data_count > req->rq_total_data ||
661 data_offset + data_count > req->rq_rlen))
662 goto out_bad_data;
663
664 inbuf = req->rq_buffer;
665 memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
666 memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
667
668 req->rq_ldata += data_count;
669 req->rq_lparm += parm_count;
670
671 /*
672 * Check whether we've received all of the data. Note that
673 * we use the packet totals -- total lengths might shrink!
674 */
675 if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
676 req->rq_ldata = data_tot;
677 req->rq_lparm = parm_tot;
678 return 0;
679 }
680 return 1;
681
682out_too_long:
683 printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
684 data_tot, parm_tot);
685 goto out_EIO;
686out_no_mem:
687 printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
688 req->rq_trans2bufsize);
689 req->rq_errno = -ENOMEM;
690 goto out;
691out_data_grew:
692 printk(KERN_ERR "smb_trans2: data/params grew!\n");
693 goto out_EIO;
694out_bad_parm:
695 printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
696 parm_disp, parm_count, parm_tot, parm_offset);
697 goto out_EIO;
698out_bad_data:
699 printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
700 data_disp, data_count, data_tot, data_offset);
701out_EIO:
702 req->rq_errno = -EIO;
703out:
704 return req->rq_errno;
705}
706
707/*
708 * State machine for receiving responses. We handle the fact that we can't
709 * read the full response in one try by having states telling us how much we
710 * have read.
711 *
712 * Must be called with the server lock held (only called from smbiod).
713 *
714 * Return: <0 on error
715 */
716int smb_request_recv(struct smb_sb_info *server)
717{
718 struct smb_request *req = NULL;
719 int result = 0;
720
721 if (smb_recv_available(server) <= 0)
722 return 0;
723
724 VERBOSE("state: %d\n", server->rstate);
725 switch (server->rstate) {
726 case SMB_RECV_DROP:
727 result = smb_receive_drop(server);
728 if (result < 0)
729 break;
730 if (server->rstate == SMB_RECV_DROP)
731 break;
732 server->rstate = SMB_RECV_START;
733 /* fallthrough */
734 case SMB_RECV_START:
735 server->smb_read = 0;
736 server->rstate = SMB_RECV_HEADER;
737 /* fallthrough */
738 case SMB_RECV_HEADER:
739 result = smb_receive_header(server);
740 if (result < 0)
741 break;
742 if (server->rstate == SMB_RECV_HEADER)
743 break;
744 if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
745 server->rstate = SMB_RECV_REQUEST;
746 break;
747 }
748 if (server->rstate != SMB_RECV_HCOMPLETE)
749 break;
750 /* fallthrough */
751 case SMB_RECV_HCOMPLETE:
752 req = find_request(server, WVAL(server->header, smb_mid));
753 if (!req)
754 break;
755 smb_init_request(server, req);
756 req->rq_rcls = *(req->rq_header + smb_rcls);
757 req->rq_err = WVAL(req->rq_header, smb_err);
758 if (server->rstate != SMB_RECV_PARAM)
759 break;
760 /* fallthrough */
761 case SMB_RECV_PARAM:
762 if (!req)
763 req = find_request(server,WVAL(server->header,smb_mid));
764 if (!req)
765 break;
766 result = smb_recv_param(server, req);
767 if (result < 0)
768 break;
769 if (server->rstate != SMB_RECV_DATA)
770 break;
771 /* fallthrough */
772 case SMB_RECV_DATA:
773 if (!req)
774 req = find_request(server,WVAL(server->header,smb_mid));
775 if (!req)
776 break;
777 result = smb_recv_data(server, req);
778 if (result < 0)
779 break;
780 break;
781
782 /* We should never be called with any of these states */
783 case SMB_RECV_END:
784 case SMB_RECV_REQUEST:
785 BUG();
786 }
787
788 if (result < 0) {
789 /* We saw an error */
790 return result;
791 }
792
793 if (server->rstate != SMB_RECV_END)
794 return 0;
795
796 result = 0;
797 if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
798 result = smb_recv_trans2(server, req);
799
800 /*
801 * Response completely read. Drop any extra bytes sent by the server.
802 * (Yes, servers sometimes add extra bytes to responses)
803 */
804 VERBOSE("smb_len: %d smb_read: %d\n",
805 server->smb_len, server->smb_read);
806 if (server->smb_read < server->smb_len)
807 smb_receive_drop(server);
808
809 server->rstate = SMB_RECV_START;
810
811 if (!result) {
812 list_del_init(&req->rq_queue);
813 req->rq_flags |= SMB_REQ_RECEIVED;
814 smb_rput(req);
815 wake_up_interruptible(&req->rq_wait);
816 }
817 return 0;
818}
diff --git a/fs/smbfs/request.h b/fs/smbfs/request.h
deleted file mode 100644
index efb21451e7c9..000000000000
--- a/fs/smbfs/request.h
+++ /dev/null
@@ -1,70 +0,0 @@
1#include <linux/list.h>
2#include <linux/types.h>
3#include <linux/uio.h>
4#include <linux/wait.h>
5
6struct smb_request {
7 struct list_head rq_queue; /* recvq or xmitq for the server */
8
9 atomic_t rq_count;
10
11 wait_queue_head_t rq_wait;
12 int rq_flags;
13 int rq_mid; /* multiplex ID, set by request.c */
14
15 struct smb_sb_info *rq_server;
16
17 /* header + word count + parameter words + byte count */
18 unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
19
20 int rq_bufsize;
21 unsigned char *rq_buffer;
22
23 /* FIXME: this is not good enough for merging IO requests. */
24 unsigned char *rq_page;
25 int rq_rsize;
26
27 int rq_resp_wct;
28 int rq_resp_bcc;
29
30 int rq_rlen;
31 int rq_bytes_recvd;
32
33 int rq_slen;
34 int rq_bytes_sent;
35
36 int rq_iovlen;
37 struct kvec rq_iov[4];
38
39 int (*rq_setup_read) (struct smb_request *);
40 void (*rq_callback) (struct smb_request *);
41
42 /* ------ trans2 stuff ------ */
43
44 u16 rq_trans2_command; /* 0 if not a trans2 request */
45 unsigned int rq_ldata;
46 unsigned char *rq_data;
47 unsigned int rq_lparm;
48 unsigned char *rq_parm;
49
50 int rq_fragment;
51 u32 rq_total_data;
52 u32 rq_total_parm;
53 int rq_trans2bufsize;
54 unsigned char *rq_trans2buffer;
55
56 /* ------ response ------ */
57
58 unsigned short rq_rcls;
59 unsigned short rq_err;
60 int rq_errno;
61};
62
63#define SMB_REQ_STATIC 0x0001 /* rq_buffer is static */
64#define SMB_REQ_NORETRY 0x0002 /* request is invalid after retry */
65
66#define SMB_REQ_TRANSMITTED 0x4000 /* all data has been sent */
67#define SMB_REQ_RECEIVED 0x8000 /* reply received, smbiod is done */
68
69#define xSMB_REQ_NOREPLY 0x0004 /* we don't want the reply (if any) */
70#define xSMB_REQ_NORECEIVER 0x0008 /* caller doesn't wait for response */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
deleted file mode 100644
index fc4b1a5dd755..000000000000
--- a/fs/smbfs/smb_debug.h
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * Defines some debug macros for smbfs.
3 */
4
5/* This makes a dentry parent/child name pair. Useful for debugging printk's */
6#define DENTRY_PATH(dentry) \
7 (dentry)->d_parent->d_name.name,(dentry)->d_name.name
8
9/*
10 * safety checks that should never happen ???
11 * these are normally enabled.
12 */
13#ifdef SMBFS_PARANOIA
14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
15#else
16# define PARANOIA(f, a...) do { ; } while(0)
17#endif
18
19/* lots of debug messages */
20#ifdef SMBFS_DEBUG_VERBOSE
21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
22#else
23# define VERBOSE(f, a...) do { ; } while(0)
24#endif
25
26/*
27 * "normal" debug messages, but not with a normal DEBUG define ... way
28 * too common name.
29 */
30#ifdef SMBFS_DEBUG
31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
32#else
33#define DEBUG1(f, a...) do { ; } while(0)
34#endif
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
deleted file mode 100644
index 0e39a924f10a..000000000000
--- a/fs/smbfs/smbiod.c
+++ /dev/null
@@ -1,344 +0,0 @@
1/*
2 * smbiod.c
3 *
4 * Copyright (C) 2000, Charles Loep / Corel Corp.
5 * Copyright (C) 2001, Urban Widmark
6 */
7
8
9#include <linux/sched.h>
10#include <linux/kernel.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/stat.h>
14#include <linux/errno.h>
15#include <linux/init.h>
16#include <linux/file.h>
17#include <linux/dcache.h>
18#include <linux/module.h>
19#include <linux/net.h>
20#include <linux/kthread.h>
21#include <net/ip.h>
22
23#include <linux/smb_fs.h>
24#include <linux/smbno.h>
25#include <linux/smb_mount.h>
26
27#include <asm/system.h>
28#include <asm/uaccess.h>
29
30#include "smb_debug.h"
31#include "request.h"
32#include "proto.h"
33
34enum smbiod_state {
35 SMBIOD_DEAD,
36 SMBIOD_STARTING,
37 SMBIOD_RUNNING,
38};
39
40static enum smbiod_state smbiod_state = SMBIOD_DEAD;
41static struct task_struct *smbiod_thread;
42static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
43static LIST_HEAD(smb_servers);
44static DEFINE_SPINLOCK(servers_lock);
45
46#define SMBIOD_DATA_READY (1<<0)
47static unsigned long smbiod_flags;
48
49static int smbiod(void *);
50static int smbiod_start(void);
51
52/*
53 * called when there's work for us to do
54 */
55void smbiod_wake_up(void)
56{
57 if (smbiod_state == SMBIOD_DEAD)
58 return;
59 set_bit(SMBIOD_DATA_READY, &smbiod_flags);
60 wake_up_interruptible(&smbiod_wait);
61}
62
63/*
64 * start smbiod if none is running
65 */
66static int smbiod_start(void)
67{
68 struct task_struct *tsk;
69 int err = 0;
70
71 if (smbiod_state != SMBIOD_DEAD)
72 return 0;
73 smbiod_state = SMBIOD_STARTING;
74 __module_get(THIS_MODULE);
75 spin_unlock(&servers_lock);
76 tsk = kthread_run(smbiod, NULL, "smbiod");
77 if (IS_ERR(tsk)) {
78 err = PTR_ERR(tsk);
79 module_put(THIS_MODULE);
80 }
81
82 spin_lock(&servers_lock);
83 if (err < 0) {
84 smbiod_state = SMBIOD_DEAD;
85 smbiod_thread = NULL;
86 } else {
87 smbiod_state = SMBIOD_RUNNING;
88 smbiod_thread = tsk;
89 }
90 return err;
91}
92
93/*
94 * register a server & start smbiod if necessary
95 */
96int smbiod_register_server(struct smb_sb_info *server)
97{
98 int ret;
99 spin_lock(&servers_lock);
100 list_add(&server->entry, &smb_servers);
101 VERBOSE("%p\n", server);
102 ret = smbiod_start();
103 spin_unlock(&servers_lock);
104 return ret;
105}
106
107/*
108 * Unregister a server
109 * Must be called with the server lock held.
110 */
111void smbiod_unregister_server(struct smb_sb_info *server)
112{
113 spin_lock(&servers_lock);
114 list_del_init(&server->entry);
115 VERBOSE("%p\n", server);
116 spin_unlock(&servers_lock);
117
118 smbiod_wake_up();
119 smbiod_flush(server);
120}
121
122void smbiod_flush(struct smb_sb_info *server)
123{
124 struct list_head *tmp, *n;
125 struct smb_request *req;
126
127 list_for_each_safe(tmp, n, &server->xmitq) {
128 req = list_entry(tmp, struct smb_request, rq_queue);
129 req->rq_errno = -EIO;
130 list_del_init(&req->rq_queue);
131 smb_rput(req);
132 wake_up_interruptible(&req->rq_wait);
133 }
134 list_for_each_safe(tmp, n, &server->recvq) {
135 req = list_entry(tmp, struct smb_request, rq_queue);
136 req->rq_errno = -EIO;
137 list_del_init(&req->rq_queue);
138 smb_rput(req);
139 wake_up_interruptible(&req->rq_wait);
140 }
141}
142
143/*
144 * Wake up smbmount and make it reconnect to the server.
145 * This must be called with the server locked.
146 *
147 * FIXME: add smbconnect version to this
148 */
149int smbiod_retry(struct smb_sb_info *server)
150{
151 struct list_head *head;
152 struct smb_request *req;
153 struct pid *pid = get_pid(server->conn_pid);
154 int result = 0;
155
156 VERBOSE("state: %d\n", server->state);
157 if (server->state == CONN_VALID || server->state == CONN_RETRYING)
158 goto out;
159
160 smb_invalidate_inodes(server);
161
162 /*
163 * Some requests are meaningless after a retry, so we abort them.
164 * One example are all requests using 'fileid' since the files are
165 * closed on retry.
166 */
167 head = server->xmitq.next;
168 while (head != &server->xmitq) {
169 req = list_entry(head, struct smb_request, rq_queue);
170 head = head->next;
171
172 req->rq_bytes_sent = 0;
173 if (req->rq_flags & SMB_REQ_NORETRY) {
174 VERBOSE("aborting request %p on xmitq\n", req);
175 req->rq_errno = -EIO;
176 list_del_init(&req->rq_queue);
177 smb_rput(req);
178 wake_up_interruptible(&req->rq_wait);
179 }
180 }
181
182 /*
183 * FIXME: test the code for retrying request we already sent
184 */
185 head = server->recvq.next;
186 while (head != &server->recvq) {
187 req = list_entry(head, struct smb_request, rq_queue);
188 head = head->next;
189#if 0
190 if (req->rq_flags & SMB_REQ_RETRY) {
191 /* must move the request to the xmitq */
192 VERBOSE("retrying request %p on recvq\n", req);
193 list_move(&req->rq_queue, &server->xmitq);
194 continue;
195 }
196#endif
197
198 VERBOSE("aborting request %p on recvq\n", req);
199 /* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
200 req->rq_errno = -EIO;
201 list_del_init(&req->rq_queue);
202 smb_rput(req);
203 wake_up_interruptible(&req->rq_wait);
204 }
205
206 smb_close_socket(server);
207
208 if (!pid) {
209 /* FIXME: this is fatal, umount? */
210 printk(KERN_ERR "smb_retry: no connection process\n");
211 server->state = CONN_RETRIED;
212 goto out;
213 }
214
215 /*
216 * Change state so that only one retry per server will be started.
217 */
218 server->state = CONN_RETRYING;
219
220 /*
221 * Note: use the "priv" flag, as a user process may need to reconnect.
222 */
223 result = kill_pid(pid, SIGUSR1, 1);
224 if (result) {
225 /* FIXME: this is most likely fatal, umount? */
226 printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
227 goto out;
228 }
229 VERBOSE("signalled pid %d\n", pid_nr(pid));
230
231 /* FIXME: The retried requests should perhaps get a "time boost". */
232
233out:
234 put_pid(pid);
235 return result;
236}
237
238/*
239 * Currently handles lockingX packets.
240 */
241static void smbiod_handle_request(struct smb_sb_info *server)
242{
243 PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
244 server->rstate = SMB_RECV_DROP;
245}
246
247/*
248 * Do some IO for one server.
249 */
250static void smbiod_doio(struct smb_sb_info *server)
251{
252 int result;
253 int maxwork = 7;
254
255 if (server->state != CONN_VALID)
256 goto out;
257
258 do {
259 result = smb_request_recv(server);
260 if (result < 0) {
261 server->state = CONN_INVALID;
262 smbiod_retry(server);
263 goto out; /* reconnecting is slow */
264 } else if (server->rstate == SMB_RECV_REQUEST)
265 smbiod_handle_request(server);
266 } while (result > 0 && maxwork-- > 0);
267
268 /*
269 * If there is more to read then we want to be sure to wake up again.
270 */
271 if (server->state != CONN_VALID)
272 goto out;
273 if (smb_recv_available(server) > 0)
274 set_bit(SMBIOD_DATA_READY, &smbiod_flags);
275
276 do {
277 result = smb_request_send_server(server);
278 if (result < 0) {
279 server->state = CONN_INVALID;
280 smbiod_retry(server);
281 goto out; /* reconnecting is slow */
282 }
283 } while (result > 0);
284
285 /*
286 * If the last request was not sent out we want to wake up again.
287 */
288 if (!list_empty(&server->xmitq))
289 set_bit(SMBIOD_DATA_READY, &smbiod_flags);
290
291out:
292 return;
293}
294
295/*
296 * smbiod kernel thread
297 */
298static int smbiod(void *unused)
299{
300 VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
301
302 for (;;) {
303 struct smb_sb_info *server;
304 struct list_head *pos, *n;
305
306 /* FIXME: Use poll? */
307 wait_event_interruptible(smbiod_wait,
308 test_bit(SMBIOD_DATA_READY, &smbiod_flags));
309 if (signal_pending(current)) {
310 spin_lock(&servers_lock);
311 smbiod_state = SMBIOD_DEAD;
312 spin_unlock(&servers_lock);
313 break;
314 }
315
316 clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
317
318 spin_lock(&servers_lock);
319 if (list_empty(&smb_servers)) {
320 smbiod_state = SMBIOD_DEAD;
321 spin_unlock(&servers_lock);
322 break;
323 }
324
325 list_for_each_safe(pos, n, &smb_servers) {
326 server = list_entry(pos, struct smb_sb_info, entry);
327 VERBOSE("checking server %p\n", server);
328
329 if (server->state == CONN_VALID) {
330 spin_unlock(&servers_lock);
331
332 smb_lock_server(server);
333 smbiod_doio(server);
334 smb_unlock_server(server);
335
336 spin_lock(&servers_lock);
337 }
338 }
339 spin_unlock(&servers_lock);
340 }
341
342 VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
343 module_put_and_exit(0);
344}
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
deleted file mode 100644
index e37fe4deebd0..000000000000
--- a/fs/smbfs/sock.c
+++ /dev/null
@@ -1,386 +0,0 @@
1/*
2 * sock.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/fs.h>
11#include <linux/time.h>
12#include <linux/errno.h>
13#include <linux/socket.h>
14#include <linux/fcntl.h>
15#include <linux/file.h>
16#include <linux/in.h>
17#include <linux/net.h>
18#include <linux/mm.h>
19#include <linux/netdevice.h>
20#include <linux/workqueue.h>
21#include <net/scm.h>
22#include <net/tcp_states.h>
23#include <net/ip.h>
24
25#include <linux/smb_fs.h>
26#include <linux/smb.h>
27#include <linux/smbno.h>
28
29#include <asm/uaccess.h>
30#include <asm/ioctls.h>
31
32#include "smb_debug.h"
33#include "proto.h"
34#include "request.h"
35
36
37static int
38_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
39{
40 struct kvec iov = {ubuf, size};
41 struct msghdr msg = {.msg_flags = flags};
42 msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
43 return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
44}
45
46/*
47 * Return the server this socket belongs to
48 */
49static struct smb_sb_info *
50server_from_socket(struct socket *socket)
51{
52 return socket->sk->sk_user_data;
53}
54
55/*
56 * Called when there is data on the socket.
57 */
58void
59smb_data_ready(struct sock *sk, int len)
60{
61 struct smb_sb_info *server = server_from_socket(sk->sk_socket);
62 void (*data_ready)(struct sock *, int) = server->data_ready;
63
64 data_ready(sk, len);
65 VERBOSE("(%p, %d)\n", sk, len);
66 smbiod_wake_up();
67}
68
69int
70smb_valid_socket(struct inode * inode)
71{
72 return (inode && S_ISSOCK(inode->i_mode) &&
73 SOCKET_I(inode)->type == SOCK_STREAM);
74}
75
76static struct socket *
77server_sock(struct smb_sb_info *server)
78{
79 struct file *file;
80
81 if (server && (file = server->sock_file))
82 {
83#ifdef SMBFS_PARANOIA
84 if (!smb_valid_socket(file->f_path.dentry->d_inode))
85 PARANOIA("bad socket!\n");
86#endif
87 return SOCKET_I(file->f_path.dentry->d_inode);
88 }
89 return NULL;
90}
91
92void
93smb_close_socket(struct smb_sb_info *server)
94{
95 struct file * file = server->sock_file;
96
97 if (file) {
98 struct socket *sock = server_sock(server);
99
100 VERBOSE("closing socket %p\n", sock);
101 sock->sk->sk_data_ready = server->data_ready;
102 server->sock_file = NULL;
103 fput(file);
104 }
105}
106
107static int
108smb_get_length(struct socket *socket, unsigned char *header)
109{
110 int result;
111
112 result = _recvfrom(socket, header, 4, MSG_PEEK);
113 if (result == -EAGAIN)
114 return -ENODATA;
115 if (result < 0) {
116 PARANOIA("recv error = %d\n", -result);
117 return result;
118 }
119 if (result < 4)
120 return -ENODATA;
121
122 switch (header[0]) {
123 case 0x00:
124 case 0x82:
125 break;
126
127 case 0x85:
128 DEBUG1("Got SESSION KEEP ALIVE\n");
129 _recvfrom(socket, header, 4, 0); /* read away */
130 return -ENODATA;
131
132 default:
133 PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
134 return -EIO;
135 }
136
137 /* The length in the RFC NB header is the raw data length */
138 return smb_len(header);
139}
140
141int
142smb_recv_available(struct smb_sb_info *server)
143{
144 mm_segment_t oldfs;
145 int avail, err;
146 struct socket *sock = server_sock(server);
147
148 oldfs = get_fs();
149 set_fs(get_ds());
150 err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
151 set_fs(oldfs);
152 return (err >= 0) ? avail : err;
153}
154
155/*
156 * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
157 */
158static int
159smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
160{
161 struct kvec *iv = *data;
162 int i;
163 int len;
164
165 /*
166 * Eat any sent kvecs
167 */
168 while (iv->iov_len <= amount) {
169 amount -= iv->iov_len;
170 iv++;
171 (*num)--;
172 }
173
174 /*
175 * And chew down the partial one
176 */
177 vec[0].iov_len = iv->iov_len-amount;
178 vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
179 iv++;
180
181 len = vec[0].iov_len;
182
183 /*
184 * And copy any others
185 */
186 for (i = 1; i < *num; i++) {
187 vec[i] = *iv++;
188 len += vec[i].iov_len;
189 }
190
191 *data = vec;
192 return len;
193}
194
195/*
196 * smb_receive_header
197 * Only called by the smbiod thread.
198 */
199int
200smb_receive_header(struct smb_sb_info *server)
201{
202 struct socket *sock;
203 int result = 0;
204 unsigned char peek_buf[4];
205
206 result = -EIO;
207 sock = server_sock(server);
208 if (!sock)
209 goto out;
210 if (sock->sk->sk_state != TCP_ESTABLISHED)
211 goto out;
212
213 if (!server->smb_read) {
214 result = smb_get_length(sock, peek_buf);
215 if (result < 0) {
216 if (result == -ENODATA)
217 result = 0;
218 goto out;
219 }
220 server->smb_len = result + 4;
221
222 if (server->smb_len < SMB_HEADER_LEN) {
223 PARANOIA("short packet: %d\n", result);
224 server->rstate = SMB_RECV_DROP;
225 result = -EIO;
226 goto out;
227 }
228 if (server->smb_len > SMB_MAX_PACKET_SIZE) {
229 PARANOIA("long packet: %d\n", result);
230 server->rstate = SMB_RECV_DROP;
231 result = -EIO;
232 goto out;
233 }
234 }
235
236 result = _recvfrom(sock, server->header + server->smb_read,
237 SMB_HEADER_LEN - server->smb_read, 0);
238 VERBOSE("_recvfrom: %d\n", result);
239 if (result < 0) {
240 VERBOSE("receive error: %d\n", result);
241 goto out;
242 }
243 server->smb_read += result;
244
245 if (server->smb_read == SMB_HEADER_LEN)
246 server->rstate = SMB_RECV_HCOMPLETE;
247out:
248 return result;
249}
250
251static char drop_buffer[PAGE_SIZE];
252
253/*
254 * smb_receive_drop - read and throw away the data
255 * Only called by the smbiod thread.
256 *
257 * FIXME: we are in the kernel, could we just tell the socket that we want
258 * to drop stuff from the buffer?
259 */
260int
261smb_receive_drop(struct smb_sb_info *server)
262{
263 struct socket *sock;
264 unsigned int flags;
265 struct kvec iov;
266 struct msghdr msg;
267 int rlen = smb_len(server->header) - server->smb_read + 4;
268 int result = -EIO;
269
270 if (rlen > PAGE_SIZE)
271 rlen = PAGE_SIZE;
272
273 sock = server_sock(server);
274 if (!sock)
275 goto out;
276 if (sock->sk->sk_state != TCP_ESTABLISHED)
277 goto out;
278
279 flags = MSG_DONTWAIT | MSG_NOSIGNAL;
280 iov.iov_base = drop_buffer;
281 iov.iov_len = PAGE_SIZE;
282 msg.msg_flags = flags;
283 msg.msg_name = NULL;
284 msg.msg_namelen = 0;
285 msg.msg_control = NULL;
286
287 result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
288
289 VERBOSE("read: %d\n", result);
290 if (result < 0) {
291 VERBOSE("receive error: %d\n", result);
292 goto out;
293 }
294 server->smb_read += result;
295
296 if (server->smb_read >= server->smb_len)
297 server->rstate = SMB_RECV_END;
298
299out:
300 return result;
301}
302
303/*
304 * smb_receive
305 * Only called by the smbiod thread.
306 */
307int
308smb_receive(struct smb_sb_info *server, struct smb_request *req)
309{
310 struct socket *sock;
311 unsigned int flags;
312 struct kvec iov[4];
313 struct kvec *p = req->rq_iov;
314 size_t num = req->rq_iovlen;
315 struct msghdr msg;
316 int rlen;
317 int result = -EIO;
318
319 sock = server_sock(server);
320 if (!sock)
321 goto out;
322 if (sock->sk->sk_state != TCP_ESTABLISHED)
323 goto out;
324
325 flags = MSG_DONTWAIT | MSG_NOSIGNAL;
326 msg.msg_flags = flags;
327 msg.msg_name = NULL;
328 msg.msg_namelen = 0;
329 msg.msg_control = NULL;
330
331 /* Dont repeat bytes and count available bufferspace */
332 rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
333 (req->rq_rlen - req->rq_bytes_recvd));
334
335 result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
336
337 VERBOSE("read: %d\n", result);
338 if (result < 0) {
339 VERBOSE("receive error: %d\n", result);
340 goto out;
341 }
342 req->rq_bytes_recvd += result;
343 server->smb_read += result;
344
345out:
346 return result;
347}
348
349/*
350 * Try to send a SMB request. This may return after sending only parts of the
351 * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
352 *
353 * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
354 */
355int
356smb_send_request(struct smb_request *req)
357{
358 struct smb_sb_info *server = req->rq_server;
359 struct socket *sock;
360 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
361 int slen = req->rq_slen - req->rq_bytes_sent;
362 int result = -EIO;
363 struct kvec iov[4];
364 struct kvec *p = req->rq_iov;
365 size_t num = req->rq_iovlen;
366
367 sock = server_sock(server);
368 if (!sock)
369 goto out;
370 if (sock->sk->sk_state != TCP_ESTABLISHED)
371 goto out;
372
373 /* Dont repeat bytes */
374 if (req->rq_bytes_sent)
375 smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
376
377 result = kernel_sendmsg(sock, &msg, p, num, slen);
378
379 if (result >= 0) {
380 req->rq_bytes_sent += result;
381 if (req->rq_bytes_sent >= req->rq_slen)
382 req->rq_flags |= SMB_REQ_TRANSMITTED;
383 }
384out:
385 return result;
386}
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
deleted file mode 100644
index 00b2909bd469..000000000000
--- a/fs/smbfs/symlink.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * symlink.c
3 *
4 * Copyright (C) 2002 by John Newbigin
5 *
6 * Please add a note about your changes to smbfs in the ChangeLog file.
7 */
8
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/fcntl.h>
12#include <linux/stat.h>
13#include <linux/mm.h>
14#include <linux/slab.h>
15#include <linux/pagemap.h>
16#include <linux/net.h>
17#include <linux/namei.h>
18
19#include <asm/uaccess.h>
20#include <asm/system.h>
21
22#include <linux/smbno.h>
23#include <linux/smb_fs.h>
24
25#include "smb_debug.h"
26#include "proto.h"
27
28int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
29{
30 DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
31
32 return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
33}
34
35static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
36{
37 char *link = __getname();
38 DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
39
40 if (!link) {
41 link = ERR_PTR(-ENOMEM);
42 } else {
43 int len = smb_proc_read_link(server_from_dentry(dentry),
44 dentry, link, PATH_MAX - 1);
45 if (len < 0) {
46 __putname(link);
47 link = ERR_PTR(len);
48 } else {
49 link[len] = 0;
50 }
51 }
52 nd_set_link(nd, link);
53 return NULL;
54}
55
56static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
57{
58 char *s = nd_get_link(nd);
59 if (!IS_ERR(s))
60 __putname(s);
61}
62
63const struct inode_operations smb_link_inode_operations =
64{
65 .readlink = generic_readlink,
66 .follow_link = smb_follow_link,
67 .put_link = smb_put_link,
68};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f1156048..24de30ba34c1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
370} 370}
371 371
372 372
373static int squashfs_get_sb(struct file_system_type *fs_type, int flags, 373static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
374 const char *dev_name, void *data, 374 const char *dev_name, void *data)
375 struct vfsmount *mnt)
376{ 375{
377 return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, 376 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
378 mnt);
379} 377}
380 378
381 379
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
451static struct file_system_type squashfs_fs_type = { 449static struct file_system_type squashfs_fs_type = {
452 .owner = THIS_MODULE, 450 .owner = THIS_MODULE,
453 .name = "squashfs", 451 .name = "squashfs",
454 .get_sb = squashfs_get_sb, 452 .mount = squashfs_mount,
455 .kill_sb = kill_block_super, 453 .kill_sb = kill_block_super,
456 .fs_flags = FS_REQUIRES_DEV 454 .fs_flags = FS_REQUIRES_DEV
457}; 455};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b8541f9c6..3876c36699a1 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
158 strncmp(target, name, name_size) == 0) { 158 strncmp(target, name, name_size) == 0) {
159 /* found xattr */ 159 /* found xattr */
160 if (type & SQUASHFS_XATTR_VALUE_OOL) { 160 if (type & SQUASHFS_XATTR_VALUE_OOL) {
161 __le64 xattr; 161 __le64 xattr_val;
162 u64 xattr;
162 /* val is a reference to the real location */ 163 /* val is a reference to the real location */
163 err = squashfs_read_metadata(sb, &val, &start, 164 err = squashfs_read_metadata(sb, &val, &start,
164 &offset, sizeof(val)); 165 &offset, sizeof(val));
165 if (err < 0) 166 if (err < 0)
166 goto failed; 167 goto failed;
167 err = squashfs_read_metadata(sb, &xattr, &start, 168 err = squashfs_read_metadata(sb, &xattr_val,
168 &offset, sizeof(xattr)); 169 &start, &offset, sizeof(xattr_val));
169 if (err < 0) 170 if (err < 0)
170 goto failed; 171 goto failed;
171 xattr = le64_to_cpu(xattr); 172 xattr = le64_to_cpu(xattr_val);
172 start = SQUASHFS_XATTR_BLK(xattr) + 173 start = SQUASHFS_XATTR_BLK(xattr) +
173 msblk->xattr_table; 174 msblk->xattr_table;
174 offset = SQUASHFS_XATTR_OFFSET(xattr); 175 offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d719fbf..b634efce4bde 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
25extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, 25extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
26 u64 *, int *); 26 u64 *, int *);
27extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, 27extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
28 int *, unsigned long long *); 28 unsigned int *, unsigned long long *);
29#else 29#else
30static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, 30static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
31 u64 start, u64 *xattr_table_start, int *xattr_ids) 31 u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
35} 35}
36 36
37static inline int squashfs_xattr_lookup(struct super_block *sb, 37static inline int squashfs_xattr_lookup(struct super_block *sb,
38 unsigned int index, int *count, int *size, 38 unsigned int index, int *count, unsigned int *size,
39 unsigned long long *xattr) 39 unsigned long long *xattr)
40{ 40{
41 return 0; 41 return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb41106098f..d33be5dd6c32 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
34#include "squashfs_fs_sb.h" 34#include "squashfs_fs_sb.h"
35#include "squashfs_fs_i.h" 35#include "squashfs_fs_i.h"
36#include "squashfs.h" 36#include "squashfs.h"
37#include "xattr.h"
37 38
38/* 39/*
39 * Map xattr id using the xattr id look up table 40 * Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index 8819e3a7ff20..ca696155cd9a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb)
273 get_fs_excl(); 273 get_fs_excl();
274 sb->s_flags &= ~MS_ACTIVE; 274 sb->s_flags &= ~MS_ACTIVE;
275 275
276 /* bad name - it should be evict_inodes() */ 276 fsnotify_unmount_inodes(&sb->s_inodes);
277 invalidate_inodes(sb); 277
278 evict_inodes(sb);
278 279
279 if (sop->put_super) 280 if (sop->put_super)
280 sop->put_super(sb); 281 sop->put_super(sb);
281 282
282 /* Forget any remaining inodes */ 283 if (!list_empty(&sb->s_inodes)) {
283 if (invalidate_inodes(sb)) {
284 printk("VFS: Busy inodes after unmount of %s. " 284 printk("VFS: Busy inodes after unmount of %s. "
285 "Self-destruct in 5 seconds. Have a nice day...\n", 285 "Self-destruct in 5 seconds. Have a nice day...\n",
286 sb->s_id); 286 sb->s_id);
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
715 return set_anon_super(sb, NULL); 715 return set_anon_super(sb, NULL);
716} 716}
717 717
718int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 718struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
719 int (*fill_super)(struct super_block *, void *, int), 719 void *data, int (*fill_super)(struct super_block *, void *, int))
720 struct vfsmount *mnt)
721{ 720{
722 struct super_block *sb; 721 struct super_block *sb;
723 722
724 sb = sget(fs_type, ns_test_super, ns_set_super, data); 723 sb = sget(fs_type, ns_test_super, ns_set_super, data);
725 if (IS_ERR(sb)) 724 if (IS_ERR(sb))
726 return PTR_ERR(sb); 725 return ERR_CAST(sb);
727 726
728 if (!sb->s_root) { 727 if (!sb->s_root) {
729 int err; 728 int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
731 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 730 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
732 if (err) { 731 if (err) {
733 deactivate_locked_super(sb); 732 deactivate_locked_super(sb);
734 return err; 733 return ERR_PTR(err);
735 } 734 }
736 735
737 sb->s_flags |= MS_ACTIVE; 736 sb->s_flags |= MS_ACTIVE;
738 } 737 }
739 738
740 simple_set_mnt(mnt, sb); 739 return dget(sb->s_root);
741 return 0;
742} 740}
743 741
744EXPORT_SYMBOL(get_sb_ns); 742EXPORT_SYMBOL(mount_ns);
745 743
746#ifdef CONFIG_BLOCK 744#ifdef CONFIG_BLOCK
747static int set_bdev_super(struct super_block *s, void *data) 745static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +760,9 @@ static int test_bdev_super(struct super_block *s, void *data)
762 return (void *)s->s_bdev == data; 760 return (void *)s->s_bdev == data;
763} 761}
764 762
765int get_sb_bdev(struct file_system_type *fs_type, 763struct dentry *mount_bdev(struct file_system_type *fs_type,
766 int flags, const char *dev_name, void *data, 764 int flags, const char *dev_name, void *data,
767 int (*fill_super)(struct super_block *, void *, int), 765 int (*fill_super)(struct super_block *, void *, int))
768 struct vfsmount *mnt)
769{ 766{
770 struct block_device *bdev; 767 struct block_device *bdev;
771 struct super_block *s; 768 struct super_block *s;
@@ -777,7 +774,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
777 774
778 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 775 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
779 if (IS_ERR(bdev)) 776 if (IS_ERR(bdev))
780 return PTR_ERR(bdev); 777 return ERR_CAST(bdev);
781 778
782 /* 779 /*
783 * once the super is inserted into the list by sget, s_umount 780 * once the super is inserted into the list by sget, s_umount
@@ -829,15 +826,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
829 bdev->bd_super = s; 826 bdev->bd_super = s;
830 } 827 }
831 828
832 simple_set_mnt(mnt, s); 829 return dget(s->s_root);
833 return 0;
834 830
835error_s: 831error_s:
836 error = PTR_ERR(s); 832 error = PTR_ERR(s);
837error_bdev: 833error_bdev:
838 close_bdev_exclusive(bdev, mode); 834 close_bdev_exclusive(bdev, mode);
839error: 835error:
840 return error; 836 return ERR_PTR(error);
837}
838EXPORT_SYMBOL(mount_bdev);
839
840int get_sb_bdev(struct file_system_type *fs_type,
841 int flags, const char *dev_name, void *data,
842 int (*fill_super)(struct super_block *, void *, int),
843 struct vfsmount *mnt)
844{
845 struct dentry *root;
846
847 root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
848 if (IS_ERR(root))
849 return PTR_ERR(root);
850 mnt->mnt_root = root;
851 mnt->mnt_sb = root->d_sb;
852 return 0;
841} 853}
842 854
843EXPORT_SYMBOL(get_sb_bdev); 855EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +868,42 @@ void kill_block_super(struct super_block *sb)
856EXPORT_SYMBOL(kill_block_super); 868EXPORT_SYMBOL(kill_block_super);
857#endif 869#endif
858 870
859int get_sb_nodev(struct file_system_type *fs_type, 871struct dentry *mount_nodev(struct file_system_type *fs_type,
860 int flags, void *data, 872 int flags, void *data,
861 int (*fill_super)(struct super_block *, void *, int), 873 int (*fill_super)(struct super_block *, void *, int))
862 struct vfsmount *mnt)
863{ 874{
864 int error; 875 int error;
865 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 876 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
866 877
867 if (IS_ERR(s)) 878 if (IS_ERR(s))
868 return PTR_ERR(s); 879 return ERR_CAST(s);
869 880
870 s->s_flags = flags; 881 s->s_flags = flags;
871 882
872 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 883 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
873 if (error) { 884 if (error) {
874 deactivate_locked_super(s); 885 deactivate_locked_super(s);
875 return error; 886 return ERR_PTR(error);
876 } 887 }
877 s->s_flags |= MS_ACTIVE; 888 s->s_flags |= MS_ACTIVE;
878 simple_set_mnt(mnt, s); 889 return dget(s->s_root);
879 return 0;
880} 890}
891EXPORT_SYMBOL(mount_nodev);
892
893int get_sb_nodev(struct file_system_type *fs_type,
894 int flags, void *data,
895 int (*fill_super)(struct super_block *, void *, int),
896 struct vfsmount *mnt)
897{
898 struct dentry *root;
881 899
900 root = mount_nodev(fs_type, flags, data, fill_super);
901 if (IS_ERR(root))
902 return PTR_ERR(root);
903 mnt->mnt_root = root;
904 mnt->mnt_sb = root->d_sb;
905 return 0;
906}
882EXPORT_SYMBOL(get_sb_nodev); 907EXPORT_SYMBOL(get_sb_nodev);
883 908
884static int compare_single(struct super_block *s, void *p) 909static int compare_single(struct super_block *s, void *p)
@@ -886,29 +911,42 @@ static int compare_single(struct super_block *s, void *p)
886 return 1; 911 return 1;
887} 912}
888 913
889int get_sb_single(struct file_system_type *fs_type, 914struct dentry *mount_single(struct file_system_type *fs_type,
890 int flags, void *data, 915 int flags, void *data,
891 int (*fill_super)(struct super_block *, void *, int), 916 int (*fill_super)(struct super_block *, void *, int))
892 struct vfsmount *mnt)
893{ 917{
894 struct super_block *s; 918 struct super_block *s;
895 int error; 919 int error;
896 920
897 s = sget(fs_type, compare_single, set_anon_super, NULL); 921 s = sget(fs_type, compare_single, set_anon_super, NULL);
898 if (IS_ERR(s)) 922 if (IS_ERR(s))
899 return PTR_ERR(s); 923 return ERR_CAST(s);
900 if (!s->s_root) { 924 if (!s->s_root) {
901 s->s_flags = flags; 925 s->s_flags = flags;
902 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 926 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
903 if (error) { 927 if (error) {
904 deactivate_locked_super(s); 928 deactivate_locked_super(s);
905 return error; 929 return ERR_PTR(error);
906 } 930 }
907 s->s_flags |= MS_ACTIVE; 931 s->s_flags |= MS_ACTIVE;
908 } else { 932 } else {
909 do_remount_sb(s, flags, data, 0); 933 do_remount_sb(s, flags, data, 0);
910 } 934 }
911 simple_set_mnt(mnt, s); 935 return dget(s->s_root);
936}
937EXPORT_SYMBOL(mount_single);
938
939int get_sb_single(struct file_system_type *fs_type,
940 int flags, void *data,
941 int (*fill_super)(struct super_block *, void *, int),
942 struct vfsmount *mnt)
943{
944 struct dentry *root;
945 root = mount_single(fs_type, flags, data, fill_super);
946 if (IS_ERR(root))
947 return PTR_ERR(root);
948 mnt->mnt_root = root;
949 mnt->mnt_sb = root->d_sb;
912 return 0; 950 return 0;
913} 951}
914 952
@@ -918,6 +956,7 @@ struct vfsmount *
918vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 956vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
919{ 957{
920 struct vfsmount *mnt; 958 struct vfsmount *mnt;
959 struct dentry *root;
921 char *secdata = NULL; 960 char *secdata = NULL;
922 int error; 961 int error;
923 962
@@ -942,9 +981,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
942 goto out_free_secdata; 981 goto out_free_secdata;
943 } 982 }
944 983
945 error = type->get_sb(type, flags, name, data, mnt); 984 if (type->mount) {
946 if (error < 0) 985 root = type->mount(type, flags, name, data);
947 goto out_free_secdata; 986 if (IS_ERR(root)) {
987 error = PTR_ERR(root);
988 goto out_free_secdata;
989 }
990 mnt->mnt_root = root;
991 mnt->mnt_sb = root->d_sb;
992 } else {
993 error = type->get_sb(type, flags, name, data, mnt);
994 if (error < 0)
995 goto out_free_secdata;
996 }
948 BUG_ON(!mnt->mnt_sb); 997 BUG_ON(!mnt->mnt_sb);
949 WARN_ON(!mnt->mnt_sb->s_bdi); 998 WARN_ON(!mnt->mnt_sb->s_bdi);
950 mnt->mnt_sb->s_flags |= MS_BORN; 999 mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af22574c50..266895783b47 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
23#include "sysfs.h" 23#include "sysfs.h"
24 24
25 25
26static struct vfsmount *sysfs_mount; 26static struct vfsmount *sysfs_mnt;
27struct kmem_cache *sysfs_dir_cachep; 27struct kmem_cache *sysfs_dir_cachep;
28 28
29static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
95 return error; 95 return error;
96} 96}
97 97
98static int sysfs_get_sb(struct file_system_type *fs_type, 98static struct dentry *sysfs_mount(struct file_system_type *fs_type,
99 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 99 int flags, const char *dev_name, void *data)
100{ 100{
101 struct sysfs_super_info *info; 101 struct sysfs_super_info *info;
102 enum kobj_ns_type type; 102 enum kobj_ns_type type;
103 struct super_block *sb; 103 struct super_block *sb;
104 int error; 104 int error;
105 105
106 error = -ENOMEM;
107 info = kzalloc(sizeof(*info), GFP_KERNEL); 106 info = kzalloc(sizeof(*info), GFP_KERNEL);
108 if (!info) 107 if (!info)
109 goto out; 108 return ERR_PTR(-ENOMEM);
110 109
111 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 110 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
112 info->ns[type] = kobj_ns_current(type); 111 info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
114 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 113 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
115 if (IS_ERR(sb) || sb->s_fs_info != info) 114 if (IS_ERR(sb) || sb->s_fs_info != info)
116 kfree(info); 115 kfree(info);
117 if (IS_ERR(sb)) { 116 if (IS_ERR(sb))
118 error = PTR_ERR(sb); 117 return ERR_CAST(sb);
119 goto out;
120 }
121 if (!sb->s_root) { 118 if (!sb->s_root) {
122 sb->s_flags = flags; 119 sb->s_flags = flags;
123 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 120 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
124 if (error) { 121 if (error) {
125 deactivate_locked_super(sb); 122 deactivate_locked_super(sb);
126 goto out; 123 return ERR_PTR(error);
127 } 124 }
128 sb->s_flags |= MS_ACTIVE; 125 sb->s_flags |= MS_ACTIVE;
129 } 126 }
130 127
131 simple_set_mnt(mnt, sb); 128 return dget(sb->s_root);
132 error = 0;
133out:
134 return error;
135} 129}
136 130
137static void sysfs_kill_sb(struct super_block *sb) 131static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
147 141
148static struct file_system_type sysfs_fs_type = { 142static struct file_system_type sysfs_fs_type = {
149 .name = "sysfs", 143 .name = "sysfs",
150 .get_sb = sysfs_get_sb, 144 .mount = sysfs_mount,
151 .kill_sb = sysfs_kill_sb, 145 .kill_sb = sysfs_kill_sb,
152}; 146};
153 147
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
189 183
190 err = register_filesystem(&sysfs_fs_type); 184 err = register_filesystem(&sysfs_fs_type);
191 if (!err) { 185 if (!err) {
192 sysfs_mount = kern_mount(&sysfs_fs_type); 186 sysfs_mnt = kern_mount(&sysfs_fs_type);
193 if (IS_ERR(sysfs_mount)) { 187 if (IS_ERR(sysfs_mnt)) {
194 printk(KERN_ERR "sysfs: could not mount!\n"); 188 printk(KERN_ERR "sysfs: could not mount!\n");
195 err = PTR_ERR(sysfs_mount); 189 err = PTR_ERR(sysfs_mnt);
196 sysfs_mount = NULL; 190 sysfs_mnt = NULL;
197 unregister_filesystem(&sysfs_fs_type); 191 unregister_filesystem(&sysfs_fs_type);
198 goto out_err; 192 goto out_err;
199 } 193 }
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 33e047b59b8d..11e7f7d11cd0 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
126 126
127 inode->i_ctime = CURRENT_TIME_SEC; 127 inode->i_ctime = CURRENT_TIME_SEC;
128 inode_inc_link_count(inode); 128 inode_inc_link_count(inode);
129 atomic_inc(&inode->i_count); 129 ihold(inode);
130 130
131 return add_nondir(dentry, inode); 131 return add_nondir(dentry, inode);
132} 132}
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927e..3d9c62be0c10 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
526 526
527/* Every kernel module contains stuff like this. */ 527/* Every kernel module contains stuff like this. */
528 528
529static int sysv_get_sb(struct file_system_type *fs_type, 529static struct dentry *sysv_mount(struct file_system_type *fs_type,
530 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 530 int flags, const char *dev_name, void *data)
531{ 531{
532 return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super, 532 return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
533 mnt);
534} 533}
535 534
536static int v7_get_sb(struct file_system_type *fs_type, 535static struct dentry *v7_mount(struct file_system_type *fs_type,
537 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 536 int flags, const char *dev_name, void *data)
538{ 537{
539 return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt); 538 return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
540} 539}
541 540
542static struct file_system_type sysv_fs_type = { 541static struct file_system_type sysv_fs_type = {
543 .owner = THIS_MODULE, 542 .owner = THIS_MODULE,
544 .name = "sysv", 543 .name = "sysv",
545 .get_sb = sysv_get_sb, 544 .mount = sysv_mount,
546 .kill_sb = kill_block_super, 545 .kill_sb = kill_block_super,
547 .fs_flags = FS_REQUIRES_DEV, 546 .fs_flags = FS_REQUIRES_DEV,
548}; 547};
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
550static struct file_system_type v7_fs_type = { 549static struct file_system_type v7_fs_type = {
551 .owner = THIS_MODULE, 550 .owner = THIS_MODULE,
552 .name = "v7", 551 .name = "v7",
553 .get_sb = v7_get_sb, 552 .mount = v7_mount,
554 .kill_sb = kill_block_super, 553 .kill_sb = kill_block_super,
555 .fs_flags = FS_REQUIRES_DEV, 554 .fs_flags = FS_REQUIRES_DEV,
556}; 555};
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce72213..14f64b689d7f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
550 550
551 lock_2_inodes(dir, inode); 551 lock_2_inodes(dir, inode);
552 inc_nlink(inode); 552 inc_nlink(inode);
553 atomic_inc(&inode->i_count); 553 ihold(inode);
554 inode->i_ctime = ubifs_current_time(inode); 554 inode->i_ctime = ubifs_current_time(inode);
555 dir->i_size += sz_change; 555 dir->i_size += sz_change;
556 dir_ui->ui_size = dir->i_size; 556 dir_ui->ui_size = dir->i_size;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f0ad07..91fac54c70e3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2038,8 +2038,8 @@ static int sb_test(struct super_block *sb, void *data)
2038 return c->vi.cdev == *dev; 2038 return c->vi.cdev == *dev;
2039} 2039}
2040 2040
2041static int ubifs_get_sb(struct file_system_type *fs_type, int flags, 2041static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2042 const char *name, void *data, struct vfsmount *mnt) 2042 const char *name, void *data)
2043{ 2043{
2044 struct ubi_volume_desc *ubi; 2044 struct ubi_volume_desc *ubi;
2045 struct ubi_volume_info vi; 2045 struct ubi_volume_info vi;
@@ -2057,7 +2057,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2057 if (IS_ERR(ubi)) { 2057 if (IS_ERR(ubi)) {
2058 dbg_err("cannot open \"%s\", error %d", 2058 dbg_err("cannot open \"%s\", error %d",
2059 name, (int)PTR_ERR(ubi)); 2059 name, (int)PTR_ERR(ubi));
2060 return PTR_ERR(ubi); 2060 return ERR_CAST(ubi);
2061 } 2061 }
2062 ubi_get_volume_info(ubi, &vi); 2062 ubi_get_volume_info(ubi, &vi);
2063 2063
@@ -2095,20 +2095,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2095 /* 'fill_super()' opens ubi again so we must close it here */ 2095 /* 'fill_super()' opens ubi again so we must close it here */
2096 ubi_close_volume(ubi); 2096 ubi_close_volume(ubi);
2097 2097
2098 simple_set_mnt(mnt, sb); 2098 return dget(sb->s_root);
2099 return 0;
2100 2099
2101out_deact: 2100out_deact:
2102 deactivate_locked_super(sb); 2101 deactivate_locked_super(sb);
2103out_close: 2102out_close:
2104 ubi_close_volume(ubi); 2103 ubi_close_volume(ubi);
2105 return err; 2104 return ERR_PTR(err);
2106} 2105}
2107 2106
2108static struct file_system_type ubifs_fs_type = { 2107static struct file_system_type ubifs_fs_type = {
2109 .name = "ubifs", 2108 .name = "ubifs",
2110 .owner = THIS_MODULE, 2109 .owner = THIS_MODULE,
2111 .get_sb = ubifs_get_sb, 2110 .mount = ubifs_mount,
2112 .kill_sb = kill_anon_super, 2111 .kill_sb = kill_anon_super,
2113}; 2112};
2114 2113
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index bf5fc674193c..6d8dc02baebb 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1101 inc_nlink(inode); 1101 inc_nlink(inode);
1102 inode->i_ctime = current_fs_time(inode->i_sb); 1102 inode->i_ctime = current_fs_time(inode->i_sb);
1103 mark_inode_dirty(inode); 1103 mark_inode_dirty(inode);
1104 atomic_inc(&inode->i_count); 1104 ihold(inode);
1105 d_instantiate(dentry, inode); 1105 d_instantiate(dentry, inode);
1106 unlock_kernel(); 1106 unlock_kernel();
1107 1107
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b40..4a5c7c61836a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
107} 107}
108 108
109/* UDF filesystem type */ 109/* UDF filesystem type */
110static int udf_get_sb(struct file_system_type *fs_type, 110static struct dentry *udf_mount(struct file_system_type *fs_type,
111 int flags, const char *dev_name, void *data, 111 int flags, const char *dev_name, void *data)
112 struct vfsmount *mnt)
113{ 112{
114 return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); 113 return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
115} 114}
116 115
117static struct file_system_type udf_fstype = { 116static struct file_system_type udf_fstype = {
118 .owner = THIS_MODULE, 117 .owner = THIS_MODULE,
119 .name = "udf", 118 .name = "udf",
120 .get_sb = udf_get_sb, 119 .mount = udf_mount,
121 .kill_sb = kill_block_super, 120 .kill_sb = kill_block_super,
122 .fs_flags = FS_REQUIRES_DEV, 121 .fs_flags = FS_REQUIRES_DEV,
123}; 122};
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b056f02b1fb3..12f39b9e4437 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
180 180
181 inode->i_ctime = CURRENT_TIME_SEC; 181 inode->i_ctime = CURRENT_TIME_SEC;
182 inode_inc_link_count(inode); 182 inode_inc_link_count(inode);
183 atomic_inc(&inode->i_count); 183 ihold(inode);
184 184
185 error = ufs_add_nondir(dentry, inode); 185 error = ufs_add_nondir(dentry, inode);
186 unlock_kernel(); 186 unlock_kernel();
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7d..2c47daed56da 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
1454 .show_options = ufs_show_options, 1454 .show_options = ufs_show_options,
1455}; 1455};
1456 1456
1457static int ufs_get_sb(struct file_system_type *fs_type, 1457static struct dentry *ufs_mount(struct file_system_type *fs_type,
1458 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1458 int flags, const char *dev_name, void *data)
1459{ 1459{
1460 return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt); 1460 return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
1461} 1461}
1462 1462
1463static struct file_system_type ufs_fs_type = { 1463static struct file_system_type ufs_fs_type = {
1464 .owner = THIS_MODULE, 1464 .owner = THIS_MODULE,
1465 .name = "ufs", 1465 .name = "ufs",
1466 .get_sb = ufs_get_sb, 1466 .mount = ufs_mount,
1467 .kill_sb = kill_block_super, 1467 .kill_sb = kill_block_super,
1468 .fs_flags = FS_REQUIRES_DEV, 1468 .fs_flags = FS_REQUIRES_DEV,
1469}; 1469};
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 480f28127f09..6100ec0fa1d4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,7 @@ config XFS_FS
22config XFS_QUOTA 22config XFS_QUOTA
23 bool "XFS Quota support" 23 bool "XFS Quota support"
24 depends on XFS_FS 24 depends on XFS_FS
25 select QUOTACTL
25 help 26 help
26 If you say Y here, you will be able to set limits for disk usage on 27 If you say Y here, you will be able to set limits for disk usage on
27 a per user and/or a per group basis under XFS. XFS considers quota 28 a per user and/or a per group basis under XFS. XFS considers quota
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b552f816de15..c9af48fffcd7 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1139,8 +1139,7 @@ xfs_vm_writepage(
1139 type = IO_DELAY; 1139 type = IO_DELAY;
1140 flags = BMAPI_ALLOCATE; 1140 flags = BMAPI_ALLOCATE;
1141 1141
1142 if (wbc->sync_mode == WB_SYNC_NONE && 1142 if (wbc->sync_mode == WB_SYNC_NONE)
1143 wbc->nonblocking)
1144 flags |= BMAPI_TRYLOCK; 1143 flags |= BMAPI_TRYLOCK;
1145 } 1144 }
1146 1145
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ba5312802aa9..63fd2c07cb57 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1580,6 +1580,7 @@ xfs_mapping_buftarg(
1580 XFS_BUFTARG_NAME(btp)); 1580 XFS_BUFTARG_NAME(btp));
1581 return ENOMEM; 1581 return ENOMEM;
1582 } 1582 }
1583 inode->i_ino = get_next_ino();
1583 inode->i_mode = S_IFBLK; 1584 inode->i_mode = S_IFBLK;
1584 inode->i_bdev = bdev; 1585 inode->i_bdev = bdev;
1585 inode->i_rdev = bdev->bd_dev; 1586 inode->i_rdev = bdev->bd_dev;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index ec858e09d546..96107efc0c61 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -317,7 +317,7 @@ xfs_vn_link(
317 if (unlikely(error)) 317 if (unlikely(error))
318 return -error; 318 return -error;
319 319
320 atomic_inc(&inode->i_count); 320 ihold(inode);
321 d_instantiate(dentry, inode); 321 d_instantiate(dentry, inode);
322 return 0; 322 return 0;
323} 323}
@@ -760,7 +760,9 @@ xfs_setup_inode(
760 760
761 inode->i_ino = ip->i_ino; 761 inode->i_ino = ip->i_ino;
762 inode->i_state = I_NEW; 762 inode->i_state = I_NEW;
763 inode_add_to_lists(ip->i_mount->m_super, inode); 763
764 inode_sb_list_add(inode);
765 insert_inode_hash(inode);
764 766
765 inode->i_mode = ip->i_d.di_mode; 767 inode->i_mode = ip->i_d.di_mode;
766 inode->i_nlink = ip->i_d.di_nlink; 768 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index ab31ce5aeaf9..9f3a78fe6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -576,7 +576,7 @@ xfs_max_file_offset(
576 576
577 /* Figure out maximum filesize, on Linux this can depend on 577 /* Figure out maximum filesize, on Linux this can depend on
578 * the filesystem blocksize (on 32 bit platforms). 578 * the filesystem blocksize (on 32 bit platforms).
579 * __block_prepare_write does this in an [unsigned] long... 579 * __block_write_begin does this in an [unsigned] long...
580 * page->index << (PAGE_CACHE_SHIFT - bbits) 580 * page->index << (PAGE_CACHE_SHIFT - bbits)
581 * So, for page sized blocks (4K on 32 bit platforms), 581 * So, for page sized blocks (4K on 32 bit platforms),
582 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is 582 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
@@ -1609,16 +1609,14 @@ xfs_fs_fill_super(
1609 goto out_free_sb; 1609 goto out_free_sb;
1610} 1610}
1611 1611
1612STATIC int 1612STATIC struct dentry *
1613xfs_fs_get_sb( 1613xfs_fs_mount(
1614 struct file_system_type *fs_type, 1614 struct file_system_type *fs_type,
1615 int flags, 1615 int flags,
1616 const char *dev_name, 1616 const char *dev_name,
1617 void *data, 1617 void *data)
1618 struct vfsmount *mnt)
1619{ 1618{
1620 return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, 1619 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1621 mnt);
1622} 1620}
1623 1621
1624static const struct super_operations xfs_super_operations = { 1622static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1637,7 @@ static const struct super_operations xfs_super_operations = {
1639static struct file_system_type xfs_fs_type = { 1637static struct file_system_type xfs_fs_type = {
1640 .owner = THIS_MODULE, 1638 .owner = THIS_MODULE,
1641 .name = "xfs", 1639 .name = "xfs",
1642 .get_sb = xfs_fs_get_sb, 1640 .mount = xfs_fs_mount,
1643 .kill_sb = kill_block_super, 1641 .kill_sb = kill_block_super,
1644 .fs_flags = FS_REQUIRES_DEV, 1642 .fs_flags = FS_REQUIRES_DEV,
1645}; 1643};
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fac52290de90..fb2ca2e4cdc9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -500,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *);
500#define IHOLD(ip) \ 500#define IHOLD(ip) \
501do { \ 501do { \
502 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 502 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
503 atomic_inc(&(VFS_I(ip)->i_count)); \ 503 ihold(VFS_I(ip)); \
504 trace_xfs_ihold(ip, _THIS_IP_); \ 504 trace_xfs_ihold(ip, _THIS_IP_); \
505} while (0) 505} while (0)
506 506