aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c2
-rw-r--r--fs/9p/v9fs.c8
-rw-r--r--fs/9p/v9fs.h23
-rw-r--r--fs/9p/vfs_dir.c2
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c48
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/adfs/inode.c5
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/afs/write.c21
-rw-r--r--fs/attr.c11
-rw-r--r--fs/autofs4/autofs_i.h7
-rw-r--r--fs/autofs4/dev-ioctl.c11
-rw-r--r--fs/autofs4/expire.c6
-rw-r--r--fs/autofs4/inode.c63
-rw-r--r--fs/autofs4/root.c474
-rw-r--r--fs/bfs/inode.c5
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/dcache.c70
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/exofs/common.h39
-rw-r--r--fs/exofs/exofs.h55
-rw-r--r--fs/exofs/inode.c198
-rw-r--r--fs/exofs/ios.c575
-rw-r--r--fs/exofs/super.c121
-rw-r--r--fs/ext2/balloc.c12
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/file.c5
-rw-r--r--fs/ext2/ialloc.c14
-rw-r--r--fs/ext2/inode.c18
-rw-r--r--fs/ext2/namei.c51
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/balloc.c11
-rw-r--r--fs/ext3/file.c7
-rw-r--r--fs/ext3/ialloc.c16
-rw-r--r--fs/ext3/inode.c45
-rw-r--r--fs/ext3/namei.c24
-rw-r--r--fs/ext3/super.c246
-rw-r--r--fs/ext3/xattr.c22
-rw-r--r--fs/ext4/balloc.c35
-rw-r--r--fs/ext4/block_validity.c4
-rw-r--r--fs/ext4/dir.c14
-rw-r--r--fs/ext4/ext4.h108
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h24
-rw-r--r--fs/ext4/extents.c260
-rw-r--r--fs/ext4/file.c13
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/ialloc.c48
-rw-r--r--fs/ext4/inode.c492
-rw-r--r--fs/ext4/ioctl.c12
-rw-r--r--fs/ext4/mballoc.c79
-rw-r--r--fs/ext4/mballoc.h9
-rw-r--r--fs/ext4/migrate.c35
-rw-r--r--fs/ext4/move_extent.c36
-rw-r--r--fs/ext4/namei.c86
-rw-r--r--fs/ext4/resize.c102
-rw-r--r--fs/ext4/super.c357
-rw-r--r--fs/ext4/xattr.c64
-rw-r--r--fs/fat/inode.c9
-rw-r--r--fs/fs-writeback.c22
-rw-r--r--fs/gfs2/ops_inode.c113
-rw-r--r--fs/gfs2/quota.c9
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/super.c7
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hpfs/anode.c2
-rw-r--r--fs/hpfs/dentry.c14
-rw-r--r--fs/hpfs/dir.c14
-rw-r--r--fs/hpfs/dnode.c21
-rw-r--r--fs/hpfs/ea.c7
-rw-r--r--fs/hpfs/hpfs_fn.h30
-rw-r--r--fs/hpfs/inode.c4
-rw-r--r--fs/hpfs/map.c6
-rw-r--r--fs/hpfs/name.c21
-rw-r--r--fs/hpfs/namei.c75
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/inode.c4
-rw-r--r--fs/internal.h2
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/transaction.c43
-rw-r--r--fs/jbd2/checkpoint.c1
-rw-r--r--fs/jbd2/commit.c13
-rw-r--r--fs/jbd2/journal.c132
-rw-r--r--fs/jbd2/transaction.c43
-rw-r--r--fs/jfs/acl.c26
-rw-r--r--fs/jfs/file.c31
-rw-r--r--fs/jfs/inode.c14
-rw-r--r--fs/jfs/jfs_acl.h7
-rw-r--r--fs/jfs/jfs_dtree.c28
-rw-r--r--fs/jfs/jfs_extent.c16
-rw-r--r--fs/jfs/jfs_inode.c8
-rw-r--r--fs/jfs/jfs_inode.h3
-rw-r--r--fs/jfs/jfs_xtree.c21
-rw-r--r--fs/jfs/namei.c23
-rw-r--r--fs/jfs/super.c6
-rw-r--r--fs/jfs/xattr.c17
-rw-r--r--fs/libfs.c77
-rw-r--r--fs/locks.c5
-rw-r--r--fs/minix/inode.c8
-rw-r--r--fs/namei.c571
-rw-r--r--fs/namespace.c53
-rw-r--r--fs/nfs/callback.h8
-rw-r--r--fs/nfs/callback_proc.c165
-rw-r--r--fs/nfs/callback_xdr.c105
-rw-r--r--fs/nfs/client.c48
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/dns_resolve.c18
-rw-r--r--fs/nfs/file.c30
-rw-r--r--fs/nfs/inode.c102
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3proc.c9
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c111
-rw-r--r--fs/nfs/nfs4renewd.c24
-rw-r--r--fs/nfs/nfs4state.c118
-rw-r--r--fs/nfs/nfs4xdr.c10
-rw-r--r--fs/nfs/proc.c41
-rw-r--r--fs/nfs/symlink.c2
-rw-r--r--fs/nfs/write.c247
-rw-r--r--fs/nfsctl.c5
-rw-r--r--fs/nfsd/nfs4xdr.c12
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nilfs2/dir.c14
-rw-r--r--fs/nilfs2/namei.c13
-rw-r--r--fs/nilfs2/nilfs.h4
-rw-r--r--fs/notify/inotify/inotify_user.c59
-rw-r--r--fs/ntfs/dir.c2
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ntfs/inode.h4
-rw-r--r--fs/ntfs/super.c8
-rw-r--r--fs/ocfs2/alloc.c13
-rw-r--r--fs/ocfs2/aops.c11
-rw-r--r--fs/ocfs2/dir.c37
-rw-r--r--fs/ocfs2/file.c20
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/namei.c52
-rw-r--r--fs/ocfs2/quota_global.c7
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/omfs/inode.c10
-rw-r--r--fs/open.c7
-rw-r--r--fs/pnode.c28
-rw-r--r--fs/pnode.h5
-rw-r--r--fs/proc/base.c10
-rw-r--r--fs/proc/generic.c5
-rw-r--r--fs/proc/root.c6
-rw-r--r--fs/quota/Kconfig5
-rw-r--r--fs/quota/Makefile2
-rw-r--r--fs/quota/compat.c118
-rw-r--r--fs/quota/dquot.c412
-rw-r--r--fs/quota/netlink.c95
-rw-r--r--fs/quota/quota.c735
-rw-r--r--fs/reiserfs/bitmap.c10
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c24
-rw-r--r--fs/reiserfs/namei.c23
-rw-r--r--fs/reiserfs/stree.c20
-rw-r--r--fs/reiserfs/super.c15
-rw-r--r--fs/reiserfs/xattr.c4
-rw-r--r--fs/squashfs/Makefile2
-rw-r--r--fs/squashfs/block.c76
-rw-r--r--fs/squashfs/cache.c1
-rw-r--r--fs/squashfs/decompressor.c68
-rw-r--r--fs/squashfs/decompressor.h55
-rw-r--r--fs/squashfs/dir.c1
-rw-r--r--fs/squashfs/export.c1
-rw-r--r--fs/squashfs/file.c1
-rw-r--r--fs/squashfs/fragment.c1
-rw-r--r--fs/squashfs/id.c1
-rw-r--r--fs/squashfs/inode.c1
-rw-r--r--fs/squashfs/namei.c1
-rw-r--r--fs/squashfs/squashfs.h8
-rw-r--r--fs/squashfs/squashfs_fs.h6
-rw-r--r--fs/squashfs/squashfs_fs_sb.h40
-rw-r--r--fs/squashfs/super.c49
-rw-r--r--fs/squashfs/symlink.c1
-rw-r--r--fs/squashfs/zlib_wrapper.c150
-rw-r--r--fs/super.c21
-rw-r--r--fs/sync.c14
-rw-r--r--fs/sysv/inode.c10
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/balloc.c37
-rw-r--r--fs/udf/dir.c4
-rw-r--r--fs/udf/file.c28
-rw-r--r--fs/udf/ialloc.c14
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/udf/namei.c37
-rw-r--r--fs/udf/symlink.c10
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/ufs/balloc.c24
-rw-r--r--fs/ufs/dir.c10
-rw-r--r--fs/ufs/file.c3
-rw-r--r--fs/ufs/ialloc.c11
-rw-r--r--fs/ufs/inode.c9
-rw-r--r--fs/ufs/namei.c18
-rw-r--r--fs/ufs/super.c6
-rw-r--r--fs/ufs/truncate.c10
-rw-r--r--fs/ufs/ufs.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c8
213 files changed, 5348 insertions, 3870 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 14d944204571..08b2eb157048 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -151,7 +151,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
151 if (access == V9FS_ACCESS_SINGLE) 151 if (access == V9FS_ACCESS_SINGLE)
152 return ERR_PTR(-EPERM); 152 return ERR_PTR(-EPERM);
153 153
154 if (v9fs_extended(v9ses)) 154 if (v9fs_proto_dotu(v9ses))
155 uname = NULL; 155 uname = NULL;
156 else 156 else
157 uname = v9ses->uname; 157 uname = v9ses->uname;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 7d6c2139891d..6c7f6a251115 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -241,7 +241,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
241 list_add(&v9ses->slist, &v9fs_sessionlist); 241 list_add(&v9ses->slist, &v9fs_sessionlist);
242 spin_unlock(&v9fs_sessionlist_lock); 242 spin_unlock(&v9fs_sessionlist_lock);
243 243
244 v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER; 244 v9ses->flags = V9FS_PROTO_2000U | V9FS_ACCESS_USER;
245 strcpy(v9ses->uname, V9FS_DEFUSER); 245 strcpy(v9ses->uname, V9FS_DEFUSER);
246 strcpy(v9ses->aname, V9FS_DEFANAME); 246 strcpy(v9ses->aname, V9FS_DEFANAME);
247 v9ses->uid = ~0; 247 v9ses->uid = ~0;
@@ -262,13 +262,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
262 goto error; 262 goto error;
263 } 263 }
264 264
265 if (!v9ses->clnt->dotu) 265 if (!p9_is_proto_dotu(v9ses->clnt))
266 v9ses->flags &= ~V9FS_EXTENDED; 266 v9ses->flags &= ~V9FS_PROTO_2000U;
267 267
268 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 268 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
269 269
270 /* for legacy mode, fall back to V9FS_ACCESS_ANY */ 270 /* for legacy mode, fall back to V9FS_ACCESS_ANY */
271 if (!v9fs_extended(v9ses) && 271 if (!v9fs_proto_dotu(v9ses) &&
272 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { 272 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
273 273
274 v9ses->flags &= ~V9FS_ACCESS_MASK; 274 v9ses->flags &= ~V9FS_ACCESS_MASK;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 019f4ccb70c1..79000bf62491 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -23,7 +23,8 @@
23 23
24/** 24/**
25 * enum p9_session_flags - option flags for each 9P session 25 * enum p9_session_flags - option flags for each 9P session
26 * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions 26 * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions
27 * @V9FS_PROTO_2010L: whether or not to use 9P2010.l extensions
27 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy 28 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
28 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) 29 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
29 * @V9FS_ACCESS_ANY: use a single attach for all users 30 * @V9FS_ACCESS_ANY: use a single attach for all users
@@ -32,11 +33,12 @@
32 * Session flags reflect options selected by users at mount time 33 * Session flags reflect options selected by users at mount time
33 */ 34 */
34enum p9_session_flags { 35enum p9_session_flags {
35 V9FS_EXTENDED = 0x01, 36 V9FS_PROTO_2000U = 0x01,
36 V9FS_ACCESS_SINGLE = 0x02, 37 V9FS_PROTO_2010L = 0x02,
37 V9FS_ACCESS_USER = 0x04, 38 V9FS_ACCESS_SINGLE = 0x04,
38 V9FS_ACCESS_ANY = 0x06, 39 V9FS_ACCESS_USER = 0x08,
39 V9FS_ACCESS_MASK = 0x06, 40 V9FS_ACCESS_ANY = 0x0C,
41 V9FS_ACCESS_MASK = 0x0C,
40}; 42};
41 43
42/* possible values of ->cache */ 44/* possible values of ->cache */
@@ -121,7 +123,12 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
121 return (inode->i_sb->s_fs_info); 123 return (inode->i_sb->s_fs_info);
122} 124}
123 125
124static inline int v9fs_extended(struct v9fs_session_info *v9ses) 126static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
125{ 127{
126 return v9ses->flags & V9FS_EXTENDED; 128 return v9ses->flags & V9FS_PROTO_2000U;
129}
130
131static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
132{
133 return v9ses->flags & V9FS_PROTO_2010L;
127} 134}
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 15cce53bf61e..6580aa449541 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -135,7 +135,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
135 while (rdir->head < rdir->tail) { 135 while (rdir->head < rdir->tail) {
136 err = p9stat_read(rdir->buf + rdir->head, 136 err = p9stat_read(rdir->buf + rdir->head,
137 buflen - rdir->head, &st, 137 buflen - rdir->head, &st,
138 fid->clnt->dotu); 138 fid->clnt->proto_version);
139 if (err) { 139 if (err) {
140 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 140 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
141 err = -EIO; 141 err = -EIO;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 74a0461a9ac0..36122683fae8 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -61,7 +61,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
61 61
62 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); 62 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
63 v9ses = v9fs_inode2v9ses(inode); 63 v9ses = v9fs_inode2v9ses(inode);
64 omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses)); 64 omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses));
65 fid = file->private_data; 65 fid = file->private_data;
66 if (!fid) { 66 if (!fid) {
67 fid = v9fs_fid_clone(file->f_path.dentry); 67 fid = v9fs_fid_clone(file->f_path.dentry);
@@ -77,7 +77,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
77 i_size_write(inode, 0); 77 i_size_write(inode, 0);
78 inode->i_blocks = 0; 78 inode->i_blocks = 0;
79 } 79 }
80 if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) 80 if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses)))
81 generic_file_llseek(file, 0, SEEK_END); 81 generic_file_llseek(file, 0, SEEK_END);
82 } 82 }
83 83
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index a407fa3388c0..5fe45d692c9f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -60,7 +60,7 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
60 res = mode & 0777; 60 res = mode & 0777;
61 if (S_ISDIR(mode)) 61 if (S_ISDIR(mode))
62 res |= P9_DMDIR; 62 res |= P9_DMDIR;
63 if (v9fs_extended(v9ses)) { 63 if (v9fs_proto_dotu(v9ses)) {
64 if (S_ISLNK(mode)) 64 if (S_ISLNK(mode))
65 res |= P9_DMSYMLINK; 65 res |= P9_DMSYMLINK;
66 if (v9ses->nodev == 0) { 66 if (v9ses->nodev == 0) {
@@ -102,21 +102,21 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
102 102
103 if ((mode & P9_DMDIR) == P9_DMDIR) 103 if ((mode & P9_DMDIR) == P9_DMDIR)
104 res |= S_IFDIR; 104 res |= S_IFDIR;
105 else if ((mode & P9_DMSYMLINK) && (v9fs_extended(v9ses))) 105 else if ((mode & P9_DMSYMLINK) && (v9fs_proto_dotu(v9ses)))
106 res |= S_IFLNK; 106 res |= S_IFLNK;
107 else if ((mode & P9_DMSOCKET) && (v9fs_extended(v9ses)) 107 else if ((mode & P9_DMSOCKET) && (v9fs_proto_dotu(v9ses))
108 && (v9ses->nodev == 0)) 108 && (v9ses->nodev == 0))
109 res |= S_IFSOCK; 109 res |= S_IFSOCK;
110 else if ((mode & P9_DMNAMEDPIPE) && (v9fs_extended(v9ses)) 110 else if ((mode & P9_DMNAMEDPIPE) && (v9fs_proto_dotu(v9ses))
111 && (v9ses->nodev == 0)) 111 && (v9ses->nodev == 0))
112 res |= S_IFIFO; 112 res |= S_IFIFO;
113 else if ((mode & P9_DMDEVICE) && (v9fs_extended(v9ses)) 113 else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses))
114 && (v9ses->nodev == 0)) 114 && (v9ses->nodev == 0))
115 res |= S_IFBLK; 115 res |= S_IFBLK;
116 else 116 else
117 res |= S_IFREG; 117 res |= S_IFREG;
118 118
119 if (v9fs_extended(v9ses)) { 119 if (v9fs_proto_dotu(v9ses)) {
120 if ((mode & P9_DMSETUID) == P9_DMSETUID) 120 if ((mode & P9_DMSETUID) == P9_DMSETUID)
121 res |= S_ISUID; 121 res |= S_ISUID;
122 122
@@ -265,7 +265,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
265 case S_IFBLK: 265 case S_IFBLK:
266 case S_IFCHR: 266 case S_IFCHR:
267 case S_IFSOCK: 267 case S_IFSOCK:
268 if (!v9fs_extended(v9ses)) { 268 if (!v9fs_proto_dotu(v9ses)) {
269 P9_DPRINTK(P9_DEBUG_ERROR, 269 P9_DPRINTK(P9_DEBUG_ERROR,
270 "special files without extended mode\n"); 270 "special files without extended mode\n");
271 err = -EINVAL; 271 err = -EINVAL;
@@ -278,7 +278,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
278 inode->i_fop = &v9fs_file_operations; 278 inode->i_fop = &v9fs_file_operations;
279 break; 279 break;
280 case S_IFLNK: 280 case S_IFLNK:
281 if (!v9fs_extended(v9ses)) { 281 if (!v9fs_proto_dotu(v9ses)) {
282 P9_DPRINTK(P9_DEBUG_ERROR, 282 P9_DPRINTK(P9_DEBUG_ERROR,
283 "extended modes used w/o 9P2000.u\n"); 283 "extended modes used w/o 9P2000.u\n");
284 err = -EINVAL; 284 err = -EINVAL;
@@ -288,7 +288,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
288 break; 288 break;
289 case S_IFDIR: 289 case S_IFDIR:
290 inc_nlink(inode); 290 inc_nlink(inode);
291 if (v9fs_extended(v9ses)) 291 if (v9fs_proto_dotu(v9ses))
292 inode->i_op = &v9fs_dir_inode_operations_ext; 292 inode->i_op = &v9fs_dir_inode_operations_ext;
293 else 293 else
294 inode->i_op = &v9fs_dir_inode_operations; 294 inode->i_op = &v9fs_dir_inode_operations;
@@ -575,7 +575,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
575 flags = O_RDWR; 575 flags = O_RDWR;
576 576
577 fid = v9fs_create(v9ses, dir, dentry, NULL, perm, 577 fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
578 v9fs_uflags2omode(flags, v9fs_extended(v9ses))); 578 v9fs_uflags2omode(flags,
579 v9fs_proto_dotu(v9ses)));
579 if (IS_ERR(fid)) { 580 if (IS_ERR(fid)) {
580 err = PTR_ERR(fid); 581 err = PTR_ERR(fid);
581 fid = NULL; 582 fid = NULL;
@@ -858,7 +859,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
858 if (iattr->ia_valid & ATTR_SIZE) 859 if (iattr->ia_valid & ATTR_SIZE)
859 wstat.length = iattr->ia_size; 860 wstat.length = iattr->ia_size;
860 861
861 if (v9fs_extended(v9ses)) { 862 if (v9fs_proto_dotu(v9ses)) {
862 if (iattr->ia_valid & ATTR_UID) 863 if (iattr->ia_valid & ATTR_UID)
863 wstat.n_uid = iattr->ia_uid; 864 wstat.n_uid = iattr->ia_uid;
864 865
@@ -886,6 +887,8 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
886 struct super_block *sb) 887 struct super_block *sb)
887{ 888{
888 char ext[32]; 889 char ext[32];
890 char tag_name[14];
891 unsigned int i_nlink;
889 struct v9fs_session_info *v9ses = sb->s_fs_info; 892 struct v9fs_session_info *v9ses = sb->s_fs_info;
890 893
891 inode->i_nlink = 1; 894 inode->i_nlink = 1;
@@ -897,11 +900,26 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
897 inode->i_uid = v9ses->dfltuid; 900 inode->i_uid = v9ses->dfltuid;
898 inode->i_gid = v9ses->dfltgid; 901 inode->i_gid = v9ses->dfltgid;
899 902
900 if (v9fs_extended(v9ses)) { 903 if (v9fs_proto_dotu(v9ses)) {
901 inode->i_uid = stat->n_uid; 904 inode->i_uid = stat->n_uid;
902 inode->i_gid = stat->n_gid; 905 inode->i_gid = stat->n_gid;
903 } 906 }
904 907 if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) {
908 if (v9fs_proto_dotu(v9ses) && (stat->extension[0] != '\0')) {
909 /*
910 * Hadlink support got added later to
911 * to the .u extension. So there can be
912 * server out there that doesn't support
913 * this even with .u extension. So check
914 * for non NULL stat->extension
915 */
916 strncpy(ext, stat->extension, sizeof(ext));
917 /* HARDLINKCOUNT %u */
918 sscanf(ext, "%13s %u", tag_name, &i_nlink);
919 if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
920 inode->i_nlink = i_nlink;
921 }
922 }
905 inode->i_mode = p9mode2unixmode(v9ses, stat->mode); 923 inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
906 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { 924 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
907 char type = 0; 925 char type = 0;
@@ -976,7 +994,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
976 if (IS_ERR(fid)) 994 if (IS_ERR(fid))
977 return PTR_ERR(fid); 995 return PTR_ERR(fid);
978 996
979 if (!v9fs_extended(v9ses)) 997 if (!v9fs_proto_dotu(v9ses))
980 return -EBADF; 998 return -EBADF;
981 999
982 st = p9_client_stat(fid); 1000 st = p9_client_stat(fid);
@@ -1066,7 +1084,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1066 struct p9_fid *fid; 1084 struct p9_fid *fid;
1067 1085
1068 v9ses = v9fs_inode2v9ses(dir); 1086 v9ses = v9fs_inode2v9ses(dir);
1069 if (!v9fs_extended(v9ses)) { 1087 if (!v9fs_proto_dotu(v9ses)) {
1070 P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n"); 1088 P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
1071 return -EPERM; 1089 return -EPERM;
1072 } 1090 }
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 9cc18775b832..2ff622f6f547 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -121,7 +121,7 @@ struct adfs_discmap {
121 121
122/* Inode stuff */ 122/* Inode stuff */
123struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); 123struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
124int adfs_write_inode(struct inode *inode,int unused); 124int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
125int adfs_notify_change(struct dentry *dentry, struct iattr *attr); 125int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
126 126
127/* map.c */ 127/* map.c */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 3f57ce4bee5d..0f5e30978135 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -9,6 +9,7 @@
9 */ 9 */
10#include <linux/smp_lock.h> 10#include <linux/smp_lock.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/writeback.h>
12#include "adfs.h" 13#include "adfs.h"
13 14
14/* 15/*
@@ -360,7 +361,7 @@ out:
360 * The adfs-specific inode data has already been updated by 361 * The adfs-specific inode data has already been updated by
361 * adfs_notify_change() 362 * adfs_notify_change()
362 */ 363 */
363int adfs_write_inode(struct inode *inode, int wait) 364int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
364{ 365{
365 struct super_block *sb = inode->i_sb; 366 struct super_block *sb = inode->i_sb;
366 struct object_info obj; 367 struct object_info obj;
@@ -375,7 +376,7 @@ int adfs_write_inode(struct inode *inode, int wait)
375 obj.attr = ADFS_I(inode)->attr; 376 obj.attr = ADFS_I(inode)->attr;
376 obj.size = inode->i_size; 377 obj.size = inode->i_size;
377 378
378 ret = adfs_dir_update(sb, &obj, wait); 379 ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
379 unlock_kernel(); 380 unlock_kernel();
380 return ret; 381 return ret;
381} 382}
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e40caaba456..861dae68ac12 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -175,7 +175,8 @@ extern void affs_delete_inode(struct inode *inode);
175extern void affs_clear_inode(struct inode *inode); 175extern void affs_clear_inode(struct inode *inode);
176extern struct inode *affs_iget(struct super_block *sb, 176extern struct inode *affs_iget(struct super_block *sb,
177 unsigned long ino); 177 unsigned long ino);
178extern int affs_write_inode(struct inode *inode, int); 178extern int affs_write_inode(struct inode *inode,
179 struct writeback_control *wbc);
179extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type); 180extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
180 181
181/* file.c */ 182/* file.c */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3c4ec7d864c4..c9744d771d98 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -166,7 +166,7 @@ bad_inode:
166} 166}
167 167
168int 168int
169affs_write_inode(struct inode *inode, int unused) 169affs_write_inode(struct inode *inode, struct writeback_control *wbc)
170{ 170{
171 struct super_block *sb = inode->i_sb; 171 struct super_block *sb = inode->i_sb;
172 struct buffer_head *bh; 172 struct buffer_head *bh;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6ece2a13bf71..c54dad4e6063 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -733,7 +733,6 @@ extern int afs_write_end(struct file *file, struct address_space *mapping,
733 struct page *page, void *fsdata); 733 struct page *page, void *fsdata);
734extern int afs_writepage(struct page *, struct writeback_control *); 734extern int afs_writepage(struct page *, struct writeback_control *);
735extern int afs_writepages(struct address_space *, struct writeback_control *); 735extern int afs_writepages(struct address_space *, struct writeback_control *);
736extern int afs_write_inode(struct inode *, int);
737extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); 736extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
738extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, 737extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
739 unsigned long, loff_t); 738 unsigned long, loff_t);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e1ea1c240b6a..14f6431598ad 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -48,7 +48,6 @@ struct file_system_type afs_fs_type = {
48static const struct super_operations afs_super_ops = { 48static const struct super_operations afs_super_ops = {
49 .statfs = afs_statfs, 49 .statfs = afs_statfs,
50 .alloc_inode = afs_alloc_inode, 50 .alloc_inode = afs_alloc_inode,
51 .write_inode = afs_write_inode,
52 .destroy_inode = afs_destroy_inode, 51 .destroy_inode = afs_destroy_inode,
53 .clear_inode = afs_clear_inode, 52 .clear_inode = afs_clear_inode,
54 .put_super = afs_put_super, 53 .put_super = afs_put_super,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 5e15a21dbf9f..3bed54a294d4 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -585,27 +585,6 @@ int afs_writepages(struct address_space *mapping,
585} 585}
586 586
587/* 587/*
588 * write an inode back
589 */
590int afs_write_inode(struct inode *inode, int sync)
591{
592 struct afs_vnode *vnode = AFS_FS_I(inode);
593 int ret;
594
595 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
596
597 ret = 0;
598 if (sync) {
599 ret = filemap_fdatawait(inode->i_mapping);
600 if (ret < 0)
601 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
602 }
603
604 _leave(" = %d", ret);
605 return ret;
606}
607
608/*
609 * completion of write to server 588 * completion of write to server
610 */ 589 */
611void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) 590void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
diff --git a/fs/attr.c b/fs/attr.c
index 96d394bdaddf..0a6ea54cde7d 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -12,7 +12,6 @@
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/fsnotify.h> 13#include <linux/fsnotify.h>
14#include <linux/fcntl.h> 14#include <linux/fcntl.h>
15#include <linux/quotaops.h>
16#include <linux/security.h> 15#include <linux/security.h>
17 16
18/* Taken over from the old code... */ 17/* Taken over from the old code... */
@@ -212,14 +211,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
212 error = inode->i_op->setattr(dentry, attr); 211 error = inode->i_op->setattr(dentry, attr);
213 } else { 212 } else {
214 error = inode_change_ok(inode, attr); 213 error = inode_change_ok(inode, attr);
215 if (!error) { 214 if (!error)
216 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 215 error = inode_setattr(inode, attr);
217 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
218 error = vfs_dq_transfer(inode, attr) ?
219 -EDQUOT : 0;
220 if (!error)
221 error = inode_setattr(inode, attr);
222 }
223 } 216 }
224 217
225 if (ia_valid & ATTR_SIZE) 218 if (ia_valid & ATTR_SIZE)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 0118d67221b2..3d283abf67d7 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -60,11 +60,6 @@ do { \
60 current->pid, __func__, ##args); \ 60 current->pid, __func__, ##args); \
61} while (0) 61} while (0)
62 62
63struct rehash_entry {
64 struct task_struct *task;
65 struct list_head list;
66};
67
68/* Unified info structure. This is pointed to by both the dentry and 63/* Unified info structure. This is pointed to by both the dentry and
69 inode structures. Each file in the filesystem has an instance of this 64 inode structures. Each file in the filesystem has an instance of this
70 structure. It holds a reference to the dentry, so dentries are never 65 structure. It holds a reference to the dentry, so dentries are never
@@ -81,7 +76,6 @@ struct autofs_info {
81 76
82 struct list_head active; 77 struct list_head active;
83 int active_count; 78 int active_count;
84 struct list_head rehash_list;
85 79
86 struct list_head expiring; 80 struct list_head expiring;
87 81
@@ -104,7 +98,6 @@ struct autofs_info {
104#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 98#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
105#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ 99#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
106#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ 100#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
107#define AUTOFS_INF_REHASH (1<<3) /* dentry in transit to ->lookup() */
108 101
109struct autofs_wait_queue { 102struct autofs_wait_queue {
110 wait_queue_head_t queue; 103 wait_queue_head_t queue;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 00bf8fcb245f..c8a80dffb455 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -544,10 +544,9 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
544 goto out; 544 goto out;
545 devid = new_encode_dev(path.mnt->mnt_sb->s_dev); 545 devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
546 err = 0; 546 err = 0;
547 if (path.dentry->d_inode && 547 if (path.mnt->mnt_root == path.dentry) {
548 path.mnt->mnt_root == path.dentry) {
549 err = 1; 548 err = 1;
550 magic = path.dentry->d_inode->i_sb->s_magic; 549 magic = path.mnt->mnt_sb->s_magic;
551 } 550 }
552 } else { 551 } else {
553 dev_t dev = sbi->sb->s_dev; 552 dev_t dev = sbi->sb->s_dev;
@@ -560,10 +559,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
560 559
561 err = have_submounts(path.dentry); 560 err = have_submounts(path.dentry);
562 561
563 if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) { 562 if (follow_down(&path))
564 if (follow_down(&path)) 563 magic = path.mnt->mnt_sb->s_magic;
565 magic = path.mnt->mnt_sb->s_magic;
566 }
567 } 564 }
568 565
569 param->ismountpoint.out.devid = devid; 566 param->ismountpoint.out.devid = devid;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 74bc9aa6df31..a796c9417fb1 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -279,7 +279,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
279 root->d_mounted--; 279 root->d_mounted--;
280 } 280 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 281 ino->flags |= AUTOFS_INF_EXPIRING;
282 autofs4_add_expiring(root);
283 init_completion(&ino->expire_complete); 282 init_completion(&ino->expire_complete);
284 spin_unlock(&sbi->fs_lock); 283 spin_unlock(&sbi->fs_lock);
285 return root; 284 return root;
@@ -407,7 +406,6 @@ found:
407 expired, (int)expired->d_name.len, expired->d_name.name); 406 expired, (int)expired->d_name.len, expired->d_name.name);
408 ino = autofs4_dentry_ino(expired); 407 ino = autofs4_dentry_ino(expired);
409 ino->flags |= AUTOFS_INF_EXPIRING; 408 ino->flags |= AUTOFS_INF_EXPIRING;
410 autofs4_add_expiring(expired);
411 init_completion(&ino->expire_complete); 409 init_completion(&ino->expire_complete);
412 spin_unlock(&sbi->fs_lock); 410 spin_unlock(&sbi->fs_lock);
413 spin_lock(&dcache_lock); 411 spin_lock(&dcache_lock);
@@ -435,7 +433,7 @@ int autofs4_expire_wait(struct dentry *dentry)
435 433
436 DPRINTK("expire done status=%d", status); 434 DPRINTK("expire done status=%d", status);
437 435
438 if (d_unhashed(dentry) && IS_DEADDIR(dentry->d_inode)) 436 if (d_unhashed(dentry))
439 return -EAGAIN; 437 return -EAGAIN;
440 438
441 return status; 439 return status;
@@ -475,7 +473,6 @@ int autofs4_expire_run(struct super_block *sb,
475 spin_lock(&sbi->fs_lock); 473 spin_lock(&sbi->fs_lock);
476 ino = autofs4_dentry_ino(dentry); 474 ino = autofs4_dentry_ino(dentry);
477 ino->flags &= ~AUTOFS_INF_EXPIRING; 475 ino->flags &= ~AUTOFS_INF_EXPIRING;
478 autofs4_del_expiring(dentry);
479 complete_all(&ino->expire_complete); 476 complete_all(&ino->expire_complete);
480 spin_unlock(&sbi->fs_lock); 477 spin_unlock(&sbi->fs_lock);
481 478
@@ -506,7 +503,6 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
506 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
507 } 504 }
508 ino->flags &= ~AUTOFS_INF_EXPIRING; 505 ino->flags &= ~AUTOFS_INF_EXPIRING;
509 autofs4_del_expiring(dentry);
510 complete_all(&ino->expire_complete); 506 complete_all(&ino->expire_complete);
511 spin_unlock(&sbi->fs_lock); 507 spin_unlock(&sbi->fs_lock);
512 dput(dentry); 508 dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d0a3de247458..821b2b955dac 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -49,7 +49,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
49 ino->dentry = NULL; 49 ino->dentry = NULL;
50 ino->size = 0; 50 ino->size = 0;
51 INIT_LIST_HEAD(&ino->active); 51 INIT_LIST_HEAD(&ino->active);
52 INIT_LIST_HEAD(&ino->rehash_list);
53 ino->active_count = 0; 52 ino->active_count = 0;
54 INIT_LIST_HEAD(&ino->expiring); 53 INIT_LIST_HEAD(&ino->expiring);
55 atomic_set(&ino->count, 0); 54 atomic_set(&ino->count, 0);
@@ -97,63 +96,6 @@ void autofs4_free_ino(struct autofs_info *ino)
97 kfree(ino); 96 kfree(ino);
98} 97}
99 98
100/*
101 * Deal with the infamous "Busy inodes after umount ..." message.
102 *
103 * Clean up the dentry tree. This happens with autofs if the user
104 * space program goes away due to a SIGKILL, SIGSEGV etc.
105 */
106static void autofs4_force_release(struct autofs_sb_info *sbi)
107{
108 struct dentry *this_parent = sbi->sb->s_root;
109 struct list_head *next;
110
111 if (!sbi->sb->s_root)
112 return;
113
114 spin_lock(&dcache_lock);
115repeat:
116 next = this_parent->d_subdirs.next;
117resume:
118 while (next != &this_parent->d_subdirs) {
119 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
120
121 /* Negative dentry - don`t care */
122 if (!simple_positive(dentry)) {
123 next = next->next;
124 continue;
125 }
126
127 if (!list_empty(&dentry->d_subdirs)) {
128 this_parent = dentry;
129 goto repeat;
130 }
131
132 next = next->next;
133 spin_unlock(&dcache_lock);
134
135 DPRINTK("dentry %p %.*s",
136 dentry, (int)dentry->d_name.len, dentry->d_name.name);
137
138 dput(dentry);
139 spin_lock(&dcache_lock);
140 }
141
142 if (this_parent != sbi->sb->s_root) {
143 struct dentry *dentry = this_parent;
144
145 next = this_parent->d_u.d_child.next;
146 this_parent = this_parent->d_parent;
147 spin_unlock(&dcache_lock);
148 DPRINTK("parent dentry %p %.*s",
149 dentry, (int)dentry->d_name.len, dentry->d_name.name);
150 dput(dentry);
151 spin_lock(&dcache_lock);
152 goto resume;
153 }
154 spin_unlock(&dcache_lock);
155}
156
157void autofs4_kill_sb(struct super_block *sb) 99void autofs4_kill_sb(struct super_block *sb)
158{ 100{
159 struct autofs_sb_info *sbi = autofs4_sbi(sb); 101 struct autofs_sb_info *sbi = autofs4_sbi(sb);
@@ -170,15 +112,12 @@ void autofs4_kill_sb(struct super_block *sb)
170 /* Free wait queues, close pipe */ 112 /* Free wait queues, close pipe */
171 autofs4_catatonic_mode(sbi); 113 autofs4_catatonic_mode(sbi);
172 114
173 /* Clean up and release dangling references */
174 autofs4_force_release(sbi);
175
176 sb->s_fs_info = NULL; 115 sb->s_fs_info = NULL;
177 kfree(sbi); 116 kfree(sbi);
178 117
179out_kill_sb: 118out_kill_sb:
180 DPRINTK("shutting down"); 119 DPRINTK("shutting down");
181 kill_anon_super(sb); 120 kill_litter_super(sb);
182} 121}
183 122
184static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) 123static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 30cc9ddf4b70..a015b49891df 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -104,99 +104,6 @@ static void autofs4_del_active(struct dentry *dentry)
104 return; 104 return;
105} 105}
106 106
107static void autofs4_add_rehash_entry(struct autofs_info *ino,
108 struct rehash_entry *entry)
109{
110 entry->task = current;
111 INIT_LIST_HEAD(&entry->list);
112 list_add(&entry->list, &ino->rehash_list);
113 return;
114}
115
116static void autofs4_remove_rehash_entry(struct autofs_info *ino)
117{
118 struct list_head *head = &ino->rehash_list;
119 struct rehash_entry *entry;
120 list_for_each_entry(entry, head, list) {
121 if (entry->task == current) {
122 list_del(&entry->list);
123 kfree(entry);
124 break;
125 }
126 }
127 return;
128}
129
130static void autofs4_remove_rehash_entrys(struct autofs_info *ino)
131{
132 struct autofs_sb_info *sbi = ino->sbi;
133 struct rehash_entry *entry, *next;
134 struct list_head *head;
135
136 spin_lock(&sbi->fs_lock);
137 spin_lock(&sbi->lookup_lock);
138 if (!(ino->flags & AUTOFS_INF_REHASH)) {
139 spin_unlock(&sbi->lookup_lock);
140 spin_unlock(&sbi->fs_lock);
141 return;
142 }
143 ino->flags &= ~AUTOFS_INF_REHASH;
144 head = &ino->rehash_list;
145 list_for_each_entry_safe(entry, next, head, list) {
146 list_del(&entry->list);
147 kfree(entry);
148 }
149 spin_unlock(&sbi->lookup_lock);
150 spin_unlock(&sbi->fs_lock);
151 dput(ino->dentry);
152
153 return;
154}
155
156static void autofs4_revalidate_drop(struct dentry *dentry,
157 struct rehash_entry *entry)
158{
159 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
160 struct autofs_info *ino = autofs4_dentry_ino(dentry);
161 /*
162 * Add to the active list so we can pick this up in
163 * ->lookup(). Also add an entry to a rehash list so
164 * we know when there are no dentrys in flight so we
165 * know when we can rehash the dentry.
166 */
167 spin_lock(&sbi->lookup_lock);
168 if (list_empty(&ino->active))
169 list_add(&ino->active, &sbi->active_list);
170 autofs4_add_rehash_entry(ino, entry);
171 spin_unlock(&sbi->lookup_lock);
172 if (!(ino->flags & AUTOFS_INF_REHASH)) {
173 ino->flags |= AUTOFS_INF_REHASH;
174 dget(dentry);
175 spin_lock(&dentry->d_lock);
176 __d_drop(dentry);
177 spin_unlock(&dentry->d_lock);
178 }
179 return;
180}
181
182static void autofs4_revalidate_rehash(struct dentry *dentry)
183{
184 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
185 struct autofs_info *ino = autofs4_dentry_ino(dentry);
186 if (ino->flags & AUTOFS_INF_REHASH) {
187 spin_lock(&sbi->lookup_lock);
188 autofs4_remove_rehash_entry(ino);
189 if (list_empty(&ino->rehash_list)) {
190 spin_unlock(&sbi->lookup_lock);
191 ino->flags &= ~AUTOFS_INF_REHASH;
192 d_rehash(dentry);
193 dput(ino->dentry);
194 } else
195 spin_unlock(&sbi->lookup_lock);
196 }
197 return;
198}
199
200static unsigned int autofs4_need_mount(unsigned int flags) 107static unsigned int autofs4_need_mount(unsigned int flags)
201{ 108{
202 unsigned int res = 0; 109 unsigned int res = 0;
@@ -236,7 +143,7 @@ out:
236 return dcache_dir_open(inode, file); 143 return dcache_dir_open(inode, file);
237} 144}
238 145
239static int try_to_fill_dentry(struct dentry *dentry) 146static int try_to_fill_dentry(struct dentry *dentry, int flags)
240{ 147{
241 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 148 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
242 struct autofs_info *ino = autofs4_dentry_ino(dentry); 149 struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -249,17 +156,55 @@ static int try_to_fill_dentry(struct dentry *dentry)
249 * Wait for a pending mount, triggering one if there 156 * Wait for a pending mount, triggering one if there
250 * isn't one already 157 * isn't one already
251 */ 158 */
252 DPRINTK("waiting for mount name=%.*s", 159 if (dentry->d_inode == NULL) {
253 dentry->d_name.len, dentry->d_name.name); 160 DPRINTK("waiting for mount name=%.*s",
161 dentry->d_name.len, dentry->d_name.name);
254 162
255 status = autofs4_wait(sbi, dentry, NFY_MOUNT); 163 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
256 164
257 DPRINTK("mount done status=%d", status); 165 DPRINTK("mount done status=%d", status);
258 166
259 /* Update expiry counter */ 167 /* Turn this into a real negative dentry? */
260 ino->last_used = jiffies; 168 if (status == -ENOENT) {
169 spin_lock(&sbi->fs_lock);
170 ino->flags &= ~AUTOFS_INF_PENDING;
171 spin_unlock(&sbi->fs_lock);
172 return status;
173 } else if (status) {
174 /* Return a negative dentry, but leave it "pending" */
175 return status;
176 }
177 /* Trigger mount for path component or follow link */
178 } else if (ino->flags & AUTOFS_INF_PENDING ||
179 autofs4_need_mount(flags) ||
180 current->link_count) {
181 DPRINTK("waiting for mount name=%.*s",
182 dentry->d_name.len, dentry->d_name.name);
261 183
262 return status; 184 spin_lock(&sbi->fs_lock);
185 ino->flags |= AUTOFS_INF_PENDING;
186 spin_unlock(&sbi->fs_lock);
187 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
188
189 DPRINTK("mount done status=%d", status);
190
191 if (status) {
192 spin_lock(&sbi->fs_lock);
193 ino->flags &= ~AUTOFS_INF_PENDING;
194 spin_unlock(&sbi->fs_lock);
195 return status;
196 }
197 }
198
199 /* Initialize expiry counter after successful mount */
200 if (ino)
201 ino->last_used = jiffies;
202
203 spin_lock(&sbi->fs_lock);
204 ino->flags &= ~AUTOFS_INF_PENDING;
205 spin_unlock(&sbi->fs_lock);
206
207 return 0;
263} 208}
264 209
265/* For autofs direct mounts the follow link triggers the mount */ 210/* For autofs direct mounts the follow link triggers the mount */
@@ -313,16 +258,10 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
313 */ 258 */
314 if (ino->flags & AUTOFS_INF_PENDING || 259 if (ino->flags & AUTOFS_INF_PENDING ||
315 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { 260 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
316 ino->flags |= AUTOFS_INF_PENDING;
317 spin_unlock(&dcache_lock); 261 spin_unlock(&dcache_lock);
318 spin_unlock(&sbi->fs_lock); 262 spin_unlock(&sbi->fs_lock);
319 263
320 status = try_to_fill_dentry(dentry); 264 status = try_to_fill_dentry(dentry, 0);
321
322 spin_lock(&sbi->fs_lock);
323 ino->flags &= ~AUTOFS_INF_PENDING;
324 spin_unlock(&sbi->fs_lock);
325
326 if (status) 265 if (status)
327 goto out_error; 266 goto out_error;
328 267
@@ -361,47 +300,18 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
361{ 300{
362 struct inode *dir = dentry->d_parent->d_inode; 301 struct inode *dir = dentry->d_parent->d_inode;
363 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 302 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
364 struct autofs_info *ino = autofs4_dentry_ino(dentry); 303 int oz_mode = autofs4_oz_mode(sbi);
365 struct rehash_entry *entry;
366 int flags = nd ? nd->flags : 0; 304 int flags = nd ? nd->flags : 0;
367 unsigned int mutex_aquired; 305 int status = 1;
368 306
369 DPRINTK("name = %.*s oz_mode = %d",
370 dentry->d_name.len, dentry->d_name.name, oz_mode);
371
372 /* Daemon never causes a mount to trigger */
373 if (autofs4_oz_mode(sbi))
374 return 1;
375
376 entry = kmalloc(sizeof(struct rehash_entry), GFP_KERNEL);
377 if (!entry)
378 return -ENOMEM;
379
380 mutex_aquired = mutex_trylock(&dir->i_mutex);
381
382 spin_lock(&sbi->fs_lock);
383 spin_lock(&dcache_lock);
384 /* Pending dentry */ 307 /* Pending dentry */
308 spin_lock(&sbi->fs_lock);
385 if (autofs4_ispending(dentry)) { 309 if (autofs4_ispending(dentry)) {
386 int status; 310 /* The daemon never causes a mount to trigger */
387
388 /*
389 * We can only unhash and send this to ->lookup() if
390 * the directory mutex is held over d_revalidate() and
391 * ->lookup(). This prevents the VFS from incorrectly
392 * seeing the dentry as non-existent.
393 */
394 ino->flags |= AUTOFS_INF_PENDING;
395 if (!mutex_aquired) {
396 autofs4_revalidate_drop(dentry, entry);
397 spin_unlock(&dcache_lock);
398 spin_unlock(&sbi->fs_lock);
399 return 0;
400 }
401 spin_unlock(&dcache_lock);
402 spin_unlock(&sbi->fs_lock); 311 spin_unlock(&sbi->fs_lock);
403 mutex_unlock(&dir->i_mutex); 312
404 kfree(entry); 313 if (oz_mode)
314 return 1;
405 315
406 /* 316 /*
407 * If the directory has gone away due to an expire 317 * If the directory has gone away due to an expire
@@ -415,82 +325,45 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
415 * A zero status is success otherwise we have a 325 * A zero status is success otherwise we have a
416 * negative error code. 326 * negative error code.
417 */ 327 */
418 status = try_to_fill_dentry(dentry); 328 status = try_to_fill_dentry(dentry, flags);
419
420 spin_lock(&sbi->fs_lock);
421 ino->flags &= ~AUTOFS_INF_PENDING;
422 spin_unlock(&sbi->fs_lock);
423
424 if (status == 0) 329 if (status == 0)
425 return 1; 330 return 1;
426 331
427 return status; 332 return status;
428 } 333 }
334 spin_unlock(&sbi->fs_lock);
335
336 /* Negative dentry.. invalidate if "old" */
337 if (dentry->d_inode == NULL)
338 return 0;
429 339
430 /* Check for a non-mountpoint directory with no contents */ 340 /* Check for a non-mountpoint directory with no contents */
341 spin_lock(&dcache_lock);
431 if (S_ISDIR(dentry->d_inode->i_mode) && 342 if (S_ISDIR(dentry->d_inode->i_mode) &&
432 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 343 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
433 DPRINTK("dentry=%p %.*s, emptydir", 344 DPRINTK("dentry=%p %.*s, emptydir",
434 dentry, dentry->d_name.len, dentry->d_name.name); 345 dentry, dentry->d_name.len, dentry->d_name.name);
346 spin_unlock(&dcache_lock);
435 347
436 if (autofs4_need_mount(flags) || current->link_count) { 348 /* The daemon never causes a mount to trigger */
437 int status; 349 if (oz_mode)
438 350 return 1;
439 /*
440 * We can only unhash and send this to ->lookup() if
441 * the directory mutex is held over d_revalidate() and
442 * ->lookup(). This prevents the VFS from incorrectly
443 * seeing the dentry as non-existent.
444 */
445 ino->flags |= AUTOFS_INF_PENDING;
446 if (!mutex_aquired) {
447 autofs4_revalidate_drop(dentry, entry);
448 spin_unlock(&dcache_lock);
449 spin_unlock(&sbi->fs_lock);
450 return 0;
451 }
452 spin_unlock(&dcache_lock);
453 spin_unlock(&sbi->fs_lock);
454 mutex_unlock(&dir->i_mutex);
455 kfree(entry);
456
457 /*
458 * A zero status is success otherwise we have a
459 * negative error code.
460 */
461 status = try_to_fill_dentry(dentry);
462
463 spin_lock(&sbi->fs_lock);
464 ino->flags &= ~AUTOFS_INF_PENDING;
465 spin_unlock(&sbi->fs_lock);
466 351
467 if (status == 0) 352 /*
468 return 1; 353 * A zero status is success otherwise we have a
354 * negative error code.
355 */
356 status = try_to_fill_dentry(dentry, flags);
357 if (status == 0)
358 return 1;
469 359
470 return status; 360 return status;
471 }
472 } 361 }
473 spin_unlock(&dcache_lock); 362 spin_unlock(&dcache_lock);
474 spin_unlock(&sbi->fs_lock);
475
476 if (mutex_aquired)
477 mutex_unlock(&dir->i_mutex);
478
479 kfree(entry);
480 363
481 return 1; 364 return 1;
482} 365}
483 366
484static void autofs4_free_rehash_entrys(struct autofs_info *inf)
485{
486 struct list_head *head = &inf->rehash_list;
487 struct rehash_entry *entry, *next;
488 list_for_each_entry_safe(entry, next, head, list) {
489 list_del(&entry->list);
490 kfree(entry);
491 }
492}
493
494void autofs4_dentry_release(struct dentry *de) 367void autofs4_dentry_release(struct dentry *de)
495{ 368{
496 struct autofs_info *inf; 369 struct autofs_info *inf;
@@ -509,8 +382,6 @@ void autofs4_dentry_release(struct dentry *de)
509 list_del(&inf->active); 382 list_del(&inf->active);
510 if (!list_empty(&inf->expiring)) 383 if (!list_empty(&inf->expiring))
511 list_del(&inf->expiring); 384 list_del(&inf->expiring);
512 if (!list_empty(&inf->rehash_list))
513 autofs4_free_rehash_entrys(inf);
514 spin_unlock(&sbi->lookup_lock); 385 spin_unlock(&sbi->lookup_lock);
515 } 386 }
516 387
@@ -543,7 +414,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
543 const unsigned char *str = name->name; 414 const unsigned char *str = name->name;
544 struct list_head *p, *head; 415 struct list_head *p, *head;
545 416
546restart:
547 spin_lock(&dcache_lock); 417 spin_lock(&dcache_lock);
548 spin_lock(&sbi->lookup_lock); 418 spin_lock(&sbi->lookup_lock);
549 head = &sbi->active_list; 419 head = &sbi->active_list;
@@ -561,19 +431,6 @@ restart:
561 if (atomic_read(&active->d_count) == 0) 431 if (atomic_read(&active->d_count) == 0)
562 goto next; 432 goto next;
563 433
564 if (active->d_inode && IS_DEADDIR(active->d_inode)) {
565 if (!list_empty(&ino->rehash_list)) {
566 dget(active);
567 spin_unlock(&active->d_lock);
568 spin_unlock(&sbi->lookup_lock);
569 spin_unlock(&dcache_lock);
570 autofs4_remove_rehash_entrys(ino);
571 dput(active);
572 goto restart;
573 }
574 goto next;
575 }
576
577 qstr = &active->d_name; 434 qstr = &active->d_name;
578 435
579 if (active->d_name.hash != hash) 436 if (active->d_name.hash != hash)
@@ -586,11 +443,13 @@ restart:
586 if (memcmp(qstr->name, str, len)) 443 if (memcmp(qstr->name, str, len))
587 goto next; 444 goto next;
588 445
589 dget(active); 446 if (d_unhashed(active)) {
590 spin_unlock(&active->d_lock); 447 dget(active);
591 spin_unlock(&sbi->lookup_lock); 448 spin_unlock(&active->d_lock);
592 spin_unlock(&dcache_lock); 449 spin_unlock(&sbi->lookup_lock);
593 return active; 450 spin_unlock(&dcache_lock);
451 return active;
452 }
594next: 453next:
595 spin_unlock(&active->d_lock); 454 spin_unlock(&active->d_lock);
596 } 455 }
@@ -639,11 +498,13 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
639 if (memcmp(qstr->name, str, len)) 498 if (memcmp(qstr->name, str, len))
640 goto next; 499 goto next;
641 500
642 dget(expiring); 501 if (d_unhashed(expiring)) {
643 spin_unlock(&expiring->d_lock); 502 dget(expiring);
644 spin_unlock(&sbi->lookup_lock); 503 spin_unlock(&expiring->d_lock);
645 spin_unlock(&dcache_lock); 504 spin_unlock(&sbi->lookup_lock);
646 return expiring; 505 spin_unlock(&dcache_lock);
506 return expiring;
507 }
647next: 508next:
648 spin_unlock(&expiring->d_lock); 509 spin_unlock(&expiring->d_lock);
649 } 510 }
@@ -653,48 +514,6 @@ next:
653 return NULL; 514 return NULL;
654} 515}
655 516
656static struct autofs_info *init_new_dentry(struct autofs_sb_info *sbi,
657 struct dentry *dentry, int oz_mode)
658{
659 struct autofs_info *ino;
660
661 /*
662 * Mark the dentry incomplete but don't hash it. We do this
663 * to serialize our inode creation operations (symlink and
664 * mkdir) which prevents deadlock during the callback to
665 * the daemon. Subsequent user space lookups for the same
666 * dentry are placed on the wait queue while the daemon
667 * itself is allowed passage unresticted so the create
668 * operation itself can then hash the dentry. Finally,
669 * we check for the hashed dentry and return the newly
670 * hashed dentry.
671 */
672 dentry->d_op = &autofs4_root_dentry_operations;
673
674 /*
675 * And we need to ensure that the same dentry is used for
676 * all following lookup calls until it is hashed so that
677 * the dentry flags are persistent throughout the request.
678 */
679 ino = autofs4_init_ino(NULL, sbi, 0555);
680 if (!ino)
681 return ERR_PTR(-ENOMEM);
682
683 dentry->d_fsdata = ino;
684 ino->dentry = dentry;
685
686 /*
687 * Only set the mount pending flag for new dentrys not created
688 * by the daemon.
689 */
690 if (!oz_mode)
691 ino->flags |= AUTOFS_INF_PENDING;
692
693 d_instantiate(dentry, NULL);
694
695 return ino;
696}
697
698/* Lookups in the root directory */ 517/* Lookups in the root directory */
699static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 518static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
700{ 519{
@@ -702,7 +521,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
702 struct autofs_info *ino; 521 struct autofs_info *ino;
703 struct dentry *expiring, *active; 522 struct dentry *expiring, *active;
704 int oz_mode; 523 int oz_mode;
705 int status = 0;
706 524
707 DPRINTK("name = %.*s", 525 DPRINTK("name = %.*s",
708 dentry->d_name.len, dentry->d_name.name); 526 dentry->d_name.len, dentry->d_name.name);
@@ -717,26 +535,44 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
717 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 535 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
718 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 536 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
719 537
720 spin_lock(&sbi->fs_lock);
721 active = autofs4_lookup_active(dentry); 538 active = autofs4_lookup_active(dentry);
722 if (active) { 539 if (active) {
723 dentry = active; 540 dentry = active;
724 ino = autofs4_dentry_ino(dentry); 541 ino = autofs4_dentry_ino(dentry);
725 /* If this came from revalidate, rehash it */
726 autofs4_revalidate_rehash(dentry);
727 spin_unlock(&sbi->fs_lock);
728 } else { 542 } else {
729 spin_unlock(&sbi->fs_lock); 543 /*
730 ino = init_new_dentry(sbi, dentry, oz_mode); 544 * Mark the dentry incomplete but don't hash it. We do this
731 if (IS_ERR(ino)) 545 * to serialize our inode creation operations (symlink and
732 return (struct dentry *) ino; 546 * mkdir) which prevents deadlock during the callback to
733 } 547 * the daemon. Subsequent user space lookups for the same
548 * dentry are placed on the wait queue while the daemon
549 * itself is allowed passage unresticted so the create
550 * operation itself can then hash the dentry. Finally,
551 * we check for the hashed dentry and return the newly
552 * hashed dentry.
553 */
554 dentry->d_op = &autofs4_root_dentry_operations;
555
556 /*
557 * And we need to ensure that the same dentry is used for
558 * all following lookup calls until it is hashed so that
559 * the dentry flags are persistent throughout the request.
560 */
561 ino = autofs4_init_ino(NULL, sbi, 0555);
562 if (!ino)
563 return ERR_PTR(-ENOMEM);
734 564
735 autofs4_add_active(dentry); 565 dentry->d_fsdata = ino;
566 ino->dentry = dentry;
567
568 autofs4_add_active(dentry);
569
570 d_instantiate(dentry, NULL);
571 }
736 572
737 if (!oz_mode) { 573 if (!oz_mode) {
738 expiring = autofs4_lookup_expiring(dentry);
739 mutex_unlock(&dir->i_mutex); 574 mutex_unlock(&dir->i_mutex);
575 expiring = autofs4_lookup_expiring(dentry);
740 if (expiring) { 576 if (expiring) {
741 /* 577 /*
742 * If we are racing with expire the request might not 578 * If we are racing with expire the request might not
@@ -744,22 +580,23 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
744 * so it must have been successful, so just wait for it. 580 * so it must have been successful, so just wait for it.
745 */ 581 */
746 autofs4_expire_wait(expiring); 582 autofs4_expire_wait(expiring);
583 autofs4_del_expiring(expiring);
747 dput(expiring); 584 dput(expiring);
748 } 585 }
749 status = try_to_fill_dentry(dentry); 586
750 mutex_lock(&dir->i_mutex);
751 spin_lock(&sbi->fs_lock); 587 spin_lock(&sbi->fs_lock);
752 ino->flags &= ~AUTOFS_INF_PENDING; 588 ino->flags |= AUTOFS_INF_PENDING;
753 spin_unlock(&sbi->fs_lock); 589 spin_unlock(&sbi->fs_lock);
590 if (dentry->d_op && dentry->d_op->d_revalidate)
591 (dentry->d_op->d_revalidate)(dentry, nd);
592 mutex_lock(&dir->i_mutex);
754 } 593 }
755 594
756 autofs4_del_active(dentry);
757
758 /* 595 /*
759 * If we had a mount fail, check if we had to handle 596 * If we are still pending, check if we had to handle
760 * a signal. If so we can force a restart.. 597 * a signal. If so we can force a restart..
761 */ 598 */
762 if (status) { 599 if (ino->flags & AUTOFS_INF_PENDING) {
763 /* See if we were interrupted */ 600 /* See if we were interrupted */
764 if (signal_pending(current)) { 601 if (signal_pending(current)) {
765 sigset_t *sigset = &current->pending.signal; 602 sigset_t *sigset = &current->pending.signal;
@@ -771,46 +608,43 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
771 return ERR_PTR(-ERESTARTNOINTR); 608 return ERR_PTR(-ERESTARTNOINTR);
772 } 609 }
773 } 610 }
774 } 611 if (!oz_mode) {
775 612 spin_lock(&sbi->fs_lock);
776 /* 613 ino->flags &= ~AUTOFS_INF_PENDING;
777 * User space can (and has done in the past) remove and re-create 614 spin_unlock(&sbi->fs_lock);
778 * this directory during the callback. This can leave us with an
779 * unhashed dentry, but a successful mount! So we need to
780 * perform another cached lookup in case the dentry now exists.
781 */
782 if (!oz_mode && !have_submounts(dentry)) {
783 struct dentry *new;
784 new = d_lookup(dentry->d_parent, &dentry->d_name);
785 if (new) {
786 if (active)
787 dput(active);
788 return new;
789 } else {
790 if (!status)
791 status = -ENOENT;
792 } 615 }
793 } 616 }
794 617
795 /* 618 /*
796 * If we had a mount failure, return status to user space. 619 * If this dentry is unhashed, then we shouldn't honour this
797 * If the mount succeeded and we used a dentry from the active queue 620 * lookup. Returning ENOENT here doesn't do the right thing
798 * return it. 621 * for all system calls, but it should be OK for the operations
622 * we permit from an autofs.
799 */ 623 */
800 if (status) { 624 if (!oz_mode && d_unhashed(dentry)) {
801 dentry = ERR_PTR(status);
802 if (active)
803 dput(active);
804 return dentry;
805 } else {
806 /* 625 /*
807 * Valid successful mount, return active dentry or NULL 626 * A user space application can (and has done in the past)
808 * for a new dentry. 627 * remove and re-create this directory during the callback.
628 * This can leave us with an unhashed dentry, but a
629 * successful mount! So we need to perform another
630 * cached lookup in case the dentry now exists.
809 */ 631 */
632 struct dentry *parent = dentry->d_parent;
633 struct dentry *new = d_lookup(parent, &dentry->d_name);
634 if (new != NULL)
635 dentry = new;
636 else
637 dentry = ERR_PTR(-ENOENT);
638
810 if (active) 639 if (active)
811 return active; 640 dput(active);
641
642 return dentry;
812 } 643 }
813 644
645 if (active)
646 return active;
647
814 return NULL; 648 return NULL;
815} 649}
816 650
@@ -834,6 +668,8 @@ static int autofs4_dir_symlink(struct inode *dir,
834 if (!ino) 668 if (!ino)
835 return -ENOMEM; 669 return -ENOMEM;
836 670
671 autofs4_del_active(dentry);
672
837 ino->size = strlen(symname); 673 ino->size = strlen(symname);
838 cp = kmalloc(ino->size + 1, GFP_KERNEL); 674 cp = kmalloc(ino->size + 1, GFP_KERNEL);
839 if (!cp) { 675 if (!cp) {
@@ -910,6 +746,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
910 dir->i_mtime = CURRENT_TIME; 746 dir->i_mtime = CURRENT_TIME;
911 747
912 spin_lock(&dcache_lock); 748 spin_lock(&dcache_lock);
749 autofs4_add_expiring(dentry);
913 spin_lock(&dentry->d_lock); 750 spin_lock(&dentry->d_lock);
914 __d_drop(dentry); 751 __d_drop(dentry);
915 spin_unlock(&dentry->d_lock); 752 spin_unlock(&dentry->d_lock);
@@ -935,6 +772,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
935 spin_unlock(&dcache_lock); 772 spin_unlock(&dcache_lock);
936 return -ENOTEMPTY; 773 return -ENOTEMPTY;
937 } 774 }
775 autofs4_add_expiring(dentry);
938 spin_lock(&dentry->d_lock); 776 spin_lock(&dentry->d_lock);
939 __d_drop(dentry); 777 __d_drop(dentry);
940 spin_unlock(&dentry->d_lock); 778 spin_unlock(&dentry->d_lock);
@@ -972,6 +810,8 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
972 if (!ino) 810 if (!ino)
973 return -ENOMEM; 811 return -ENOMEM;
974 812
813 autofs4_del_active(dentry);
814
975 inode = autofs4_get_inode(dir->i_sb, ino); 815 inode = autofs4_get_inode(dir->i_sb, ino);
976 if (!inode) { 816 if (!inode) {
977 if (!dentry->d_fsdata) 817 if (!dentry->d_fsdata)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8f3d9fd89604..f22a7d3dc362 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
15#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/vfs.h> 17#include <linux/vfs.h>
18#include <linux/writeback.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "bfs.h" 20#include "bfs.h"
20 21
@@ -98,7 +99,7 @@ error:
98 return ERR_PTR(-EIO); 99 return ERR_PTR(-EIO);
99} 100}
100 101
101static int bfs_write_inode(struct inode *inode, int wait) 102static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
102{ 103{
103 struct bfs_sb_info *info = BFS_SB(inode->i_sb); 104 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
104 unsigned int ino = (u16)inode->i_ino; 105 unsigned int ino = (u16)inode->i_ino;
@@ -147,7 +148,7 @@ static int bfs_write_inode(struct inode *inode, int wait)
147 di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); 148 di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
148 149
149 mark_buffer_dirty(bh); 150 mark_buffer_dirty(bh);
150 if (wait) { 151 if (wbc->sync_mode == WB_SYNC_ALL) {
151 sync_dirty_buffer(bh); 152 sync_dirty_buffer(bh);
152 if (buffer_req(bh) && !buffer_uptodate(bh)) 153 if (buffer_req(bh) && !buffer_uptodate(bh))
153 err = -EIO; 154 err = -EIO;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2aa8ec6a0981..8b5cfdd4bfc1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2326,7 +2326,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2326int btrfs_readpage(struct file *file, struct page *page); 2326int btrfs_readpage(struct file *file, struct page *page);
2327void btrfs_delete_inode(struct inode *inode); 2327void btrfs_delete_inode(struct inode *inode);
2328void btrfs_put_inode(struct inode *inode); 2328void btrfs_put_inode(struct inode *inode);
2329int btrfs_write_inode(struct inode *inode, int wait); 2329int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2330void btrfs_dirty_inode(struct inode *inode); 2330void btrfs_dirty_inode(struct inode *inode);
2331struct inode *btrfs_alloc_inode(struct super_block *sb); 2331struct inode *btrfs_alloc_inode(struct super_block *sb);
2332void btrfs_destroy_inode(struct inode *inode); 2332void btrfs_destroy_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4deb280f8969..c41db6d45ab6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3968,7 +3968,7 @@ err:
3968 return ret; 3968 return ret;
3969} 3969}
3970 3970
3971int btrfs_write_inode(struct inode *inode, int wait) 3971int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
3972{ 3972{
3973 struct btrfs_root *root = BTRFS_I(inode)->root; 3973 struct btrfs_root *root = BTRFS_I(inode)->root;
3974 struct btrfs_trans_handle *trans; 3974 struct btrfs_trans_handle *trans;
@@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
3977 if (root->fs_info->btree_inode == inode) 3977 if (root->fs_info->btree_inode == inode)
3978 return 0; 3978 return 0;
3979 3979
3980 if (wait) { 3980 if (wbc->sync_mode == WB_SYNC_ALL) {
3981 trans = btrfs_join_transaction(root, 1); 3981 trans = btrfs_join_transaction(root, 1);
3982 btrfs_set_trans_block_group(trans, inode); 3982 btrfs_set_trans_block_group(trans, inode);
3983 ret = btrfs_commit_transaction(trans, root); 3983 ret = btrfs_commit_transaction(trans, root);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 057e1dae12ab..3d8f8a96f5a3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2289,9 +2289,9 @@ cifs_oplock_break(struct slow_work *work)
2289 if (inode && S_ISREG(inode->i_mode)) { 2289 if (inode && S_ISREG(inode->i_mode)) {
2290#ifdef CONFIG_CIFS_EXPERIMENTAL 2290#ifdef CONFIG_CIFS_EXPERIMENTAL
2291 if (cinode->clientCanCacheAll == 0) 2291 if (cinode->clientCanCacheAll == 0)
2292 break_lease(inode, FMODE_READ); 2292 break_lease(inode, O_RDONLY);
2293 else if (cinode->clientCanCacheRead == 0) 2293 else if (cinode->clientCanCacheRead == 0)
2294 break_lease(inode, FMODE_WRITE); 2294 break_lease(inode, O_WRONLY);
2295#endif 2295#endif
2296 rc = filemap_fdatawrite(inode->i_mapping); 2296 rc = filemap_fdatawrite(inode->i_mapping);
2297 if (cinode->clientCanCacheRead == 0) { 2297 if (cinode->clientCanCacheRead == 0) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 953173a293a9..f1358e5c3a59 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -257,6 +257,7 @@ kill_it:
257 if (dentry) 257 if (dentry)
258 goto repeat; 258 goto repeat;
259} 259}
260EXPORT_SYMBOL(dput);
260 261
261/** 262/**
262 * d_invalidate - invalidate a dentry 263 * d_invalidate - invalidate a dentry
@@ -314,6 +315,7 @@ int d_invalidate(struct dentry * dentry)
314 spin_unlock(&dcache_lock); 315 spin_unlock(&dcache_lock);
315 return 0; 316 return 0;
316} 317}
318EXPORT_SYMBOL(d_invalidate);
317 319
318/* This should be called _only_ with dcache_lock held */ 320/* This should be called _only_ with dcache_lock held */
319 321
@@ -328,6 +330,7 @@ struct dentry * dget_locked(struct dentry *dentry)
328{ 330{
329 return __dget_locked(dentry); 331 return __dget_locked(dentry);
330} 332}
333EXPORT_SYMBOL(dget_locked);
331 334
332/** 335/**
333 * d_find_alias - grab a hashed alias of inode 336 * d_find_alias - grab a hashed alias of inode
@@ -384,6 +387,7 @@ struct dentry * d_find_alias(struct inode *inode)
384 } 387 }
385 return de; 388 return de;
386} 389}
390EXPORT_SYMBOL(d_find_alias);
387 391
388/* 392/*
389 * Try to kill dentries associated with this inode. 393 * Try to kill dentries associated with this inode.
@@ -408,6 +412,7 @@ restart:
408 } 412 }
409 spin_unlock(&dcache_lock); 413 spin_unlock(&dcache_lock);
410} 414}
415EXPORT_SYMBOL(d_prune_aliases);
411 416
412/* 417/*
413 * Throw away a dentry - free the inode, dput the parent. This requires that 418 * Throw away a dentry - free the inode, dput the parent. This requires that
@@ -610,6 +615,7 @@ void shrink_dcache_sb(struct super_block * sb)
610{ 615{
611 __shrink_dcache_sb(sb, NULL, 0); 616 __shrink_dcache_sb(sb, NULL, 0);
612} 617}
618EXPORT_SYMBOL(shrink_dcache_sb);
613 619
614/* 620/*
615 * destroy a single subtree of dentries for unmount 621 * destroy a single subtree of dentries for unmount
@@ -792,6 +798,7 @@ positive:
792 spin_unlock(&dcache_lock); 798 spin_unlock(&dcache_lock);
793 return 1; 799 return 1;
794} 800}
801EXPORT_SYMBOL(have_submounts);
795 802
796/* 803/*
797 * Search the dentry child list for the specified parent, 804 * Search the dentry child list for the specified parent,
@@ -876,6 +883,7 @@ void shrink_dcache_parent(struct dentry * parent)
876 while ((found = select_parent(parent)) != 0) 883 while ((found = select_parent(parent)) != 0)
877 __shrink_dcache_sb(sb, &found, 0); 884 __shrink_dcache_sb(sb, &found, 0);
878} 885}
886EXPORT_SYMBOL(shrink_dcache_parent);
879 887
880/* 888/*
881 * Scan `nr' dentries and return the number which remain. 889 * Scan `nr' dentries and return the number which remain.
@@ -968,6 +976,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
968 976
969 return dentry; 977 return dentry;
970} 978}
979EXPORT_SYMBOL(d_alloc);
971 980
972struct dentry *d_alloc_name(struct dentry *parent, const char *name) 981struct dentry *d_alloc_name(struct dentry *parent, const char *name)
973{ 982{
@@ -1012,6 +1021,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
1012 spin_unlock(&dcache_lock); 1021 spin_unlock(&dcache_lock);
1013 security_d_instantiate(entry, inode); 1022 security_d_instantiate(entry, inode);
1014} 1023}
1024EXPORT_SYMBOL(d_instantiate);
1015 1025
1016/** 1026/**
1017 * d_instantiate_unique - instantiate a non-aliased dentry 1027 * d_instantiate_unique - instantiate a non-aliased dentry
@@ -1108,6 +1118,7 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1108 } 1118 }
1109 return res; 1119 return res;
1110} 1120}
1121EXPORT_SYMBOL(d_alloc_root);
1111 1122
1112static inline struct hlist_head *d_hash(struct dentry *parent, 1123static inline struct hlist_head *d_hash(struct dentry *parent,
1113 unsigned long hash) 1124 unsigned long hash)
@@ -1211,7 +1222,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1211 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1222 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1212 spin_unlock(&dcache_lock); 1223 spin_unlock(&dcache_lock);
1213 security_d_instantiate(new, inode); 1224 security_d_instantiate(new, inode);
1214 d_rehash(dentry);
1215 d_move(new, dentry); 1225 d_move(new, dentry);
1216 iput(inode); 1226 iput(inode);
1217 } else { 1227 } else {
@@ -1225,6 +1235,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1225 d_add(dentry, inode); 1235 d_add(dentry, inode);
1226 return new; 1236 return new;
1227} 1237}
1238EXPORT_SYMBOL(d_splice_alias);
1228 1239
1229/** 1240/**
1230 * d_add_ci - lookup or allocate new dentry with case-exact name 1241 * d_add_ci - lookup or allocate new dentry with case-exact name
@@ -1314,6 +1325,7 @@ err_out:
1314 iput(inode); 1325 iput(inode);
1315 return ERR_PTR(error); 1326 return ERR_PTR(error);
1316} 1327}
1328EXPORT_SYMBOL(d_add_ci);
1317 1329
1318/** 1330/**
1319 * d_lookup - search for a dentry 1331 * d_lookup - search for a dentry
@@ -1357,6 +1369,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1357 } while (read_seqretry(&rename_lock, seq)); 1369 } while (read_seqretry(&rename_lock, seq));
1358 return dentry; 1370 return dentry;
1359} 1371}
1372EXPORT_SYMBOL(d_lookup);
1360 1373
1361struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1374struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1362{ 1375{
@@ -1483,6 +1496,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
1483out: 1496out:
1484 return 0; 1497 return 0;
1485} 1498}
1499EXPORT_SYMBOL(d_validate);
1486 1500
1487/* 1501/*
1488 * When a file is deleted, we have two options: 1502 * When a file is deleted, we have two options:
@@ -1528,6 +1542,7 @@ void d_delete(struct dentry * dentry)
1528 1542
1529 fsnotify_nameremove(dentry, isdir); 1543 fsnotify_nameremove(dentry, isdir);
1530} 1544}
1545EXPORT_SYMBOL(d_delete);
1531 1546
1532static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1547static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1533{ 1548{
@@ -1556,6 +1571,7 @@ void d_rehash(struct dentry * entry)
1556 spin_unlock(&entry->d_lock); 1571 spin_unlock(&entry->d_lock);
1557 spin_unlock(&dcache_lock); 1572 spin_unlock(&dcache_lock);
1558} 1573}
1574EXPORT_SYMBOL(d_rehash);
1559 1575
1560/* 1576/*
1561 * When switching names, the actual string doesn't strictly have to 1577 * When switching names, the actual string doesn't strictly have to
@@ -1702,6 +1718,7 @@ void d_move(struct dentry * dentry, struct dentry * target)
1702 d_move_locked(dentry, target); 1718 d_move_locked(dentry, target);
1703 spin_unlock(&dcache_lock); 1719 spin_unlock(&dcache_lock);
1704} 1720}
1721EXPORT_SYMBOL(d_move);
1705 1722
1706/** 1723/**
1707 * d_ancestor - search for an ancestor 1724 * d_ancestor - search for an ancestor
@@ -1868,6 +1885,7 @@ shouldnt_be_hashed:
1868 spin_unlock(&dcache_lock); 1885 spin_unlock(&dcache_lock);
1869 BUG(); 1886 BUG();
1870} 1887}
1888EXPORT_SYMBOL_GPL(d_materialise_unique);
1871 1889
1872static int prepend(char **buffer, int *buflen, const char *str, int namelen) 1890static int prepend(char **buffer, int *buflen, const char *str, int namelen)
1873{ 1891{
@@ -2005,6 +2023,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
2005 path_put(&root); 2023 path_put(&root);
2006 return res; 2024 return res;
2007} 2025}
2026EXPORT_SYMBOL(d_path);
2008 2027
2009/* 2028/*
2010 * Helper function for dentry_operations.d_dname() members 2029 * Helper function for dentry_operations.d_dname() members
@@ -2171,6 +2190,30 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2171 return result; 2190 return result;
2172} 2191}
2173 2192
2193int path_is_under(struct path *path1, struct path *path2)
2194{
2195 struct vfsmount *mnt = path1->mnt;
2196 struct dentry *dentry = path1->dentry;
2197 int res;
2198 spin_lock(&vfsmount_lock);
2199 if (mnt != path2->mnt) {
2200 for (;;) {
2201 if (mnt->mnt_parent == mnt) {
2202 spin_unlock(&vfsmount_lock);
2203 return 0;
2204 }
2205 if (mnt->mnt_parent == path2->mnt)
2206 break;
2207 mnt = mnt->mnt_parent;
2208 }
2209 dentry = mnt->mnt_mountpoint;
2210 }
2211 res = is_subdir(dentry, path2->dentry);
2212 spin_unlock(&vfsmount_lock);
2213 return res;
2214}
2215EXPORT_SYMBOL(path_is_under);
2216
2174void d_genocide(struct dentry *root) 2217void d_genocide(struct dentry *root)
2175{ 2218{
2176 struct dentry *this_parent = root; 2219 struct dentry *this_parent = root;
@@ -2228,6 +2271,7 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2228 } 2271 }
2229 return ino; 2272 return ino;
2230} 2273}
2274EXPORT_SYMBOL(find_inode_number);
2231 2275
2232static __initdata unsigned long dhash_entries; 2276static __initdata unsigned long dhash_entries;
2233static int __init set_dhash_entries(char *str) 2277static int __init set_dhash_entries(char *str)
@@ -2297,6 +2341,7 @@ static void __init dcache_init(void)
2297 2341
2298/* SLAB cache for __getname() consumers */ 2342/* SLAB cache for __getname() consumers */
2299struct kmem_cache *names_cachep __read_mostly; 2343struct kmem_cache *names_cachep __read_mostly;
2344EXPORT_SYMBOL(names_cachep);
2300 2345
2301EXPORT_SYMBOL(d_genocide); 2346EXPORT_SYMBOL(d_genocide);
2302 2347
@@ -2326,26 +2371,3 @@ void __init vfs_caches_init(unsigned long mempages)
2326 bdev_cache_init(); 2371 bdev_cache_init();
2327 chrdev_init(); 2372 chrdev_init();
2328} 2373}
2329
2330EXPORT_SYMBOL(d_alloc);
2331EXPORT_SYMBOL(d_alloc_root);
2332EXPORT_SYMBOL(d_delete);
2333EXPORT_SYMBOL(d_find_alias);
2334EXPORT_SYMBOL(d_instantiate);
2335EXPORT_SYMBOL(d_invalidate);
2336EXPORT_SYMBOL(d_lookup);
2337EXPORT_SYMBOL(d_move);
2338EXPORT_SYMBOL_GPL(d_materialise_unique);
2339EXPORT_SYMBOL(d_path);
2340EXPORT_SYMBOL(d_prune_aliases);
2341EXPORT_SYMBOL(d_rehash);
2342EXPORT_SYMBOL(d_splice_alias);
2343EXPORT_SYMBOL(d_add_ci);
2344EXPORT_SYMBOL(d_validate);
2345EXPORT_SYMBOL(dget_locked);
2346EXPORT_SYMBOL(dput);
2347EXPORT_SYMBOL(find_inode_number);
2348EXPORT_SYMBOL(have_submounts);
2349EXPORT_SYMBOL(names_cachep);
2350EXPORT_SYMBOL(shrink_dcache_parent);
2351EXPORT_SYMBOL(shrink_dcache_sb);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 274ac865bae8..049d6c36da09 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -496,7 +496,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
496 } 496 }
497 d_move(old_dentry, dentry); 497 d_move(old_dentry, dentry);
498 fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name, 498 fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
499 old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode), 499 S_ISDIR(old_dentry->d_inode->i_mode),
500 NULL, old_dentry); 500 NULL, old_dentry);
501 fsnotify_oldname_free(old_name); 501 fsnotify_oldname_free(old_name);
502 unlock_rename(new_dir, old_dir); 502 unlock_rename(new_dir, old_dir);
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1b178e61718..f0d520312d8b 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -55,6 +55,8 @@
55/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
56# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) 56# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
57# define EXOFS_ATTR_INODE_DATA 1 57# define EXOFS_ATTR_INODE_DATA 1
58# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
59# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
58 60
59/* 61/*
60 * The maximum number of files we can have is limited by the size of the 62 * The maximum number of files we can have is limited by the size of the
@@ -206,4 +208,41 @@ enum {
206 (((name_len) + offsetof(struct exofs_dir_entry, name) + \ 208 (((name_len) + offsetof(struct exofs_dir_entry, name) + \
207 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) 209 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
208 210
211/*
212 * The on-disk (optional) layout structure.
213 * sits in an EXOFS_ATTR_INODE_FILE_LAYOUT or EXOFS_ATTR_INODE_DIR_LAYOUT
214 * attribute, attached to any inode, usually to a directory.
215 */
216
217enum exofs_inode_layout_gen_functions {
218 LAYOUT_MOVING_WINDOW = 0,
219 LAYOUT_IMPLICT = 1,
220};
221
222struct exofs_on_disk_inode_layout {
223 __le16 gen_func; /* One of enum exofs_inode_layout_gen_functions */
224 __le16 pad;
225 union {
226 /* gen_func == LAYOUT_MOVING_WINDOW (default) */
227 struct exofs_layout_sliding_window {
228 __le32 num_devices; /* first n devices in global-table*/
229 } sliding_window __packed;
230
231 /* gen_func == LAYOUT_IMPLICT */
232 struct exofs_layout_implict_list {
233 struct exofs_dt_data_map data_map;
234 /* Variable array of size data_map.cb_num_comps. These
235 * are device indexes of the devices in the global table
236 */
237 __le32 dev_indexes[];
238 } implict __packed;
239 };
240} __packed;
241
242static inline size_t exofs_on_disk_inode_layout_size(unsigned max_devs)
243{
244 return sizeof(struct exofs_on_disk_inode_layout) +
245 max_devs * sizeof(__le32);
246}
247
209#endif /*ifndef __EXOFS_COM_H__*/ 248#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c35fd4623986..8442e353309f 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -55,12 +55,28 @@
55/* u64 has problems with printk this will cast it to unsigned long long */ 55/* u64 has problems with printk this will cast it to unsigned long long */
56#define _LLU(x) (unsigned long long)(x) 56#define _LLU(x) (unsigned long long)(x)
57 57
58struct exofs_layout {
59 osd_id s_pid; /* partition ID of file system*/
60
61 /* Our way of looking at the data_map */
62 unsigned stripe_unit;
63 unsigned mirrors_p1;
64
65 unsigned group_width;
66 u64 group_depth;
67 unsigned group_count;
68
69 enum exofs_inode_layout_gen_functions lay_func;
70
71 unsigned s_numdevs; /* Num of devices in array */
72 struct osd_dev *s_ods[0]; /* Variable length */
73};
74
58/* 75/*
59 * our extension to the in-memory superblock 76 * our extension to the in-memory superblock
60 */ 77 */
61struct exofs_sb_info { 78struct exofs_sb_info {
62 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ 79 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
63 osd_id s_pid; /* partition ID of file system*/
64 int s_timeout; /* timeout for OSD operations */ 80 int s_timeout; /* timeout for OSD operations */
65 uint64_t s_nextid; /* highest object ID used */ 81 uint64_t s_nextid; /* highest object ID used */
66 uint32_t s_numfiles; /* number of files on fs */ 82 uint32_t s_numfiles; /* number of files on fs */
@@ -69,22 +85,27 @@ struct exofs_sb_info {
69 atomic_t s_curr_pending; /* number of pending commands */ 85 atomic_t s_curr_pending; /* number of pending commands */
70 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 86 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
71 87
72 struct pnfs_osd_data_map data_map; /* Default raid to use */ 88 struct pnfs_osd_data_map data_map; /* Default raid to use
73 unsigned s_numdevs; /* Num of devices in array */ 89 * FIXME: Needed ?
74 struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */ 90 */
91/* struct exofs_layout dir_layout;*/ /* Default dir layout */
92 struct exofs_layout layout; /* Default files layout,
93 * contains the variable osd_dev
94 * array. Keep last */
95 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
75}; 96};
76 97
77/* 98/*
78 * our extension to the in-memory inode 99 * our extension to the in-memory inode
79 */ 100 */
80struct exofs_i_info { 101struct exofs_i_info {
102 struct inode vfs_inode; /* normal in-memory inode */
103 wait_queue_head_t i_wq; /* wait queue for inode */
81 unsigned long i_flags; /* various atomic flags */ 104 unsigned long i_flags; /* various atomic flags */
82 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ 105 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
83 uint32_t i_dir_start_lookup; /* which page to start lookup */ 106 uint32_t i_dir_start_lookup; /* which page to start lookup */
84 wait_queue_head_t i_wq; /* wait queue for inode */
85 uint64_t i_commit_size; /* the object's written length */ 107 uint64_t i_commit_size; /* the object's written length */
86 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ 108 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */
87 struct inode vfs_inode; /* normal in-memory inode */
88}; 109};
89 110
90static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) 111static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -101,7 +122,7 @@ struct exofs_io_state {
101 void *private; 122 void *private;
102 exofs_io_done_fn done; 123 exofs_io_done_fn done;
103 124
104 struct exofs_sb_info *sbi; 125 struct exofs_layout *layout;
105 struct osd_obj_id obj; 126 struct osd_obj_id obj;
106 u8 *cred; 127 u8 *cred;
107 128
@@ -109,7 +130,11 @@ struct exofs_io_state {
109 loff_t offset; 130 loff_t offset;
110 unsigned long length; 131 unsigned long length;
111 void *kern_buff; 132 void *kern_buff;
112 struct bio *bio; 133
134 struct page **pages;
135 unsigned nr_pages;
136 unsigned pgbase;
137 unsigned pages_consumed;
113 138
114 /* Attributes */ 139 /* Attributes */
115 unsigned in_attr_len; 140 unsigned in_attr_len;
@@ -122,6 +147,9 @@ struct exofs_io_state {
122 struct exofs_per_dev_state { 147 struct exofs_per_dev_state {
123 struct osd_request *or; 148 struct osd_request *or;
124 struct bio *bio; 149 struct bio *bio;
150 loff_t offset;
151 unsigned length;
152 unsigned dev;
125 } per_dev[]; 153 } per_dev[];
126}; 154};
127 155
@@ -175,6 +203,12 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
175} 203}
176 204
177/* 205/*
206 * Given a layout, object_number and stripe_index return the associated global
207 * dev_index
208 */
209unsigned exofs_layout_od_id(struct exofs_layout *layout,
210 osd_id obj_no, unsigned layout_index);
211/*
178 * Maximum count of links to a file 212 * Maximum count of links to a file
179 */ 213 */
180#define EXOFS_LINK_MAX 32000 214#define EXOFS_LINK_MAX 32000
@@ -189,7 +223,8 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
189int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 223int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
190 u64 offset, void *p, unsigned length); 224 u64 offset, void *p, unsigned length);
191 225
192int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); 226int exofs_get_io_state(struct exofs_layout *layout,
227 struct exofs_io_state **ios);
193void exofs_put_io_state(struct exofs_io_state *ios); 228void exofs_put_io_state(struct exofs_io_state *ios);
194 229
195int exofs_check_io(struct exofs_io_state *ios, u64 *resid); 230int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
@@ -226,7 +261,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
226 struct page **pagep, void **fsdata); 261 struct page **pagep, void **fsdata);
227extern struct inode *exofs_iget(struct super_block *, unsigned long); 262extern struct inode *exofs_iget(struct super_block *, unsigned long);
228struct inode *exofs_new_inode(struct inode *, int); 263struct inode *exofs_new_inode(struct inode *, int);
229extern int exofs_write_inode(struct inode *, int); 264extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
230extern void exofs_delete_inode(struct inode *); 265extern void exofs_delete_inode(struct inode *);
231 266
232/* dir.c: */ 267/* dir.c: */
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 2afbcebeda71..a17e4b733e35 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -41,16 +41,18 @@
41 41
42enum { BIO_MAX_PAGES_KMALLOC = 42enum { BIO_MAX_PAGES_KMALLOC =
43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), 43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
44 MAX_PAGES_KMALLOC =
45 PAGE_SIZE / sizeof(struct page *),
44}; 46};
45 47
46struct page_collect { 48struct page_collect {
47 struct exofs_sb_info *sbi; 49 struct exofs_sb_info *sbi;
48 struct request_queue *req_q;
49 struct inode *inode; 50 struct inode *inode;
50 unsigned expected_pages; 51 unsigned expected_pages;
51 struct exofs_io_state *ios; 52 struct exofs_io_state *ios;
52 53
53 struct bio *bio; 54 struct page **pages;
55 unsigned alloc_pages;
54 unsigned nr_pages; 56 unsigned nr_pages;
55 unsigned long length; 57 unsigned long length;
56 loff_t pg_first; /* keep 64bit also in 32-arches */ 58 loff_t pg_first; /* keep 64bit also in 32-arches */
@@ -62,15 +64,12 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 64 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
63 65
64 pcol->sbi = sbi; 66 pcol->sbi = sbi;
65 /* Create master bios on first Q, later on cloning, each clone will be
66 * allocated on it's destination Q
67 */
68 pcol->req_q = osd_request_queue(sbi->s_ods[0]);
69 pcol->inode = inode; 67 pcol->inode = inode;
70 pcol->expected_pages = expected_pages; 68 pcol->expected_pages = expected_pages;
71 69
72 pcol->ios = NULL; 70 pcol->ios = NULL;
73 pcol->bio = NULL; 71 pcol->pages = NULL;
72 pcol->alloc_pages = 0;
74 pcol->nr_pages = 0; 73 pcol->nr_pages = 0;
75 pcol->length = 0; 74 pcol->length = 0;
76 pcol->pg_first = -1; 75 pcol->pg_first = -1;
@@ -80,7 +79,8 @@ static void _pcol_reset(struct page_collect *pcol)
80{ 79{
81 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 80 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
82 81
83 pcol->bio = NULL; 82 pcol->pages = NULL;
83 pcol->alloc_pages = 0;
84 pcol->nr_pages = 0; 84 pcol->nr_pages = 0;
85 pcol->length = 0; 85 pcol->length = 0;
86 pcol->pg_first = -1; 86 pcol->pg_first = -1;
@@ -90,38 +90,43 @@ static void _pcol_reset(struct page_collect *pcol)
90 * it might not end here. don't be left with nothing 90 * it might not end here. don't be left with nothing
91 */ 91 */
92 if (!pcol->expected_pages) 92 if (!pcol->expected_pages)
93 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; 93 pcol->expected_pages = MAX_PAGES_KMALLOC;
94} 94}
95 95
96static int pcol_try_alloc(struct page_collect *pcol) 96static int pcol_try_alloc(struct page_collect *pcol)
97{ 97{
98 int pages = min_t(unsigned, pcol->expected_pages, 98 unsigned pages = min_t(unsigned, pcol->expected_pages,
99 BIO_MAX_PAGES_KMALLOC); 99 MAX_PAGES_KMALLOC);
100 100
101 if (!pcol->ios) { /* First time allocate io_state */ 101 if (!pcol->ios) { /* First time allocate io_state */
102 int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); 102 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
103 103
104 if (ret) 104 if (ret)
105 return ret; 105 return ret;
106 } 106 }
107 107
108 /* TODO: easily support bio chaining */
109 pages = min_t(unsigned, pages,
110 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
111
108 for (; pages; pages >>= 1) { 112 for (; pages; pages >>= 1) {
109 pcol->bio = bio_kmalloc(GFP_KERNEL, pages); 113 pcol->pages = kmalloc(pages * sizeof(struct page *),
110 if (likely(pcol->bio)) 114 GFP_KERNEL);
115 if (likely(pcol->pages)) {
116 pcol->alloc_pages = pages;
111 return 0; 117 return 0;
118 }
112 } 119 }
113 120
114 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", 121 EXOFS_ERR("Failed to kmalloc expected_pages=%u\n",
115 pcol->expected_pages); 122 pcol->expected_pages);
116 return -ENOMEM; 123 return -ENOMEM;
117} 124}
118 125
119static void pcol_free(struct page_collect *pcol) 126static void pcol_free(struct page_collect *pcol)
120{ 127{
121 if (pcol->bio) { 128 kfree(pcol->pages);
122 bio_put(pcol->bio); 129 pcol->pages = NULL;
123 pcol->bio = NULL;
124 }
125 130
126 if (pcol->ios) { 131 if (pcol->ios) {
127 exofs_put_io_state(pcol->ios); 132 exofs_put_io_state(pcol->ios);
@@ -132,11 +137,10 @@ static void pcol_free(struct page_collect *pcol)
132static int pcol_add_page(struct page_collect *pcol, struct page *page, 137static int pcol_add_page(struct page_collect *pcol, struct page *page,
133 unsigned len) 138 unsigned len)
134{ 139{
135 int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); 140 if (unlikely(pcol->nr_pages >= pcol->alloc_pages))
136 if (unlikely(len != added_len))
137 return -ENOMEM; 141 return -ENOMEM;
138 142
139 ++pcol->nr_pages; 143 pcol->pages[pcol->nr_pages++] = page;
140 pcol->length += len; 144 pcol->length += len;
141 return 0; 145 return 0;
142} 146}
@@ -181,7 +185,6 @@ static void update_write_page(struct page *page, int ret)
181 */ 185 */
182static int __readpages_done(struct page_collect *pcol, bool do_unlock) 186static int __readpages_done(struct page_collect *pcol, bool do_unlock)
183{ 187{
184 struct bio_vec *bvec;
185 int i; 188 int i;
186 u64 resid; 189 u64 resid;
187 u64 good_bytes; 190 u64 good_bytes;
@@ -193,13 +196,13 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
193 else 196 else
194 good_bytes = pcol->length - resid; 197 good_bytes = pcol->length - resid;
195 198
196 EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" 199 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
197 " length=0x%lx nr_pages=%u\n", 200 " length=0x%lx nr_pages=%u\n",
198 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 201 pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
199 pcol->nr_pages); 202 pcol->nr_pages);
200 203
201 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 204 for (i = 0; i < pcol->nr_pages; i++) {
202 struct page *page = bvec->bv_page; 205 struct page *page = pcol->pages[i];
203 struct inode *inode = page->mapping->host; 206 struct inode *inode = page->mapping->host;
204 int page_stat; 207 int page_stat;
205 208
@@ -218,11 +221,11 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
218 ret = update_read_page(page, page_stat); 221 ret = update_read_page(page, page_stat);
219 if (do_unlock) 222 if (do_unlock)
220 unlock_page(page); 223 unlock_page(page);
221 length += bvec->bv_len; 224 length += PAGE_SIZE;
222 } 225 }
223 226
224 pcol_free(pcol); 227 pcol_free(pcol);
225 EXOFS_DBGMSG("readpages_done END\n"); 228 EXOFS_DBGMSG2("readpages_done END\n");
226 return ret; 229 return ret;
227} 230}
228 231
@@ -238,11 +241,10 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
238 241
239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 242static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
240{ 243{
241 struct bio_vec *bvec;
242 int i; 244 int i;
243 245
244 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 246 for (i = 0; i < pcol->nr_pages; i++) {
245 struct page *page = bvec->bv_page; 247 struct page *page = pcol->pages[i];
246 248
247 if (rw == READ) 249 if (rw == READ)
248 update_read_page(page, ret); 250 update_read_page(page, ret);
@@ -260,13 +262,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
260 struct page_collect *pcol_copy = NULL; 262 struct page_collect *pcol_copy = NULL;
261 int ret; 263 int ret;
262 264
263 if (!pcol->bio) 265 if (!pcol->pages)
264 return 0; 266 return 0;
265 267
266 /* see comment in _readpage() about sync reads */ 268 /* see comment in _readpage() about sync reads */
267 WARN_ON(is_sync && (pcol->nr_pages != 1)); 269 WARN_ON(is_sync && (pcol->nr_pages != 1));
268 270
269 ios->bio = pcol->bio; 271 ios->pages = pcol->pages;
272 ios->nr_pages = pcol->nr_pages;
270 ios->length = pcol->length; 273 ios->length = pcol->length;
271 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 274 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
272 275
@@ -290,7 +293,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
290 293
291 atomic_inc(&pcol->sbi->s_curr_pending); 294 atomic_inc(&pcol->sbi->s_curr_pending);
292 295
293 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 296 EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
294 ios->obj.id, _LLU(ios->offset), pcol->length); 297 ios->obj.id, _LLU(ios->offset), pcol->length);
295 298
296 /* pages ownership was passed to pcol_copy */ 299 /* pages ownership was passed to pcol_copy */
@@ -366,7 +369,7 @@ try_again:
366 goto try_again; 369 goto try_again;
367 } 370 }
368 371
369 if (!pcol->bio) { 372 if (!pcol->pages) {
370 ret = pcol_try_alloc(pcol); 373 ret = pcol_try_alloc(pcol);
371 if (unlikely(ret)) 374 if (unlikely(ret))
372 goto fail; 375 goto fail;
@@ -448,7 +451,6 @@ static int exofs_readpage(struct file *file, struct page *page)
448static void writepages_done(struct exofs_io_state *ios, void *p) 451static void writepages_done(struct exofs_io_state *ios, void *p)
449{ 452{
450 struct page_collect *pcol = p; 453 struct page_collect *pcol = p;
451 struct bio_vec *bvec;
452 int i; 454 int i;
453 u64 resid; 455 u64 resid;
454 u64 good_bytes; 456 u64 good_bytes;
@@ -462,13 +464,13 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
462 else 464 else
463 good_bytes = pcol->length - resid; 465 good_bytes = pcol->length - resid;
464 466
465 EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" 467 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
466 " length=0x%lx nr_pages=%u\n", 468 " length=0x%lx nr_pages=%u\n",
467 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 469 pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
468 pcol->nr_pages); 470 pcol->nr_pages);
469 471
470 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 472 for (i = 0; i < pcol->nr_pages; i++) {
471 struct page *page = bvec->bv_page; 473 struct page *page = pcol->pages[i];
472 struct inode *inode = page->mapping->host; 474 struct inode *inode = page->mapping->host;
473 int page_stat; 475 int page_stat;
474 476
@@ -485,12 +487,12 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
485 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 487 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
486 inode->i_ino, page->index, page_stat); 488 inode->i_ino, page->index, page_stat);
487 489
488 length += bvec->bv_len; 490 length += PAGE_SIZE;
489 } 491 }
490 492
491 pcol_free(pcol); 493 pcol_free(pcol);
492 kfree(pcol); 494 kfree(pcol);
493 EXOFS_DBGMSG("writepages_done END\n"); 495 EXOFS_DBGMSG2("writepages_done END\n");
494} 496}
495 497
496static int write_exec(struct page_collect *pcol) 498static int write_exec(struct page_collect *pcol)
@@ -500,7 +502,7 @@ static int write_exec(struct page_collect *pcol)
500 struct page_collect *pcol_copy = NULL; 502 struct page_collect *pcol_copy = NULL;
501 int ret; 503 int ret;
502 504
503 if (!pcol->bio) 505 if (!pcol->pages)
504 return 0; 506 return 0;
505 507
506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 508 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -512,9 +514,8 @@ static int write_exec(struct page_collect *pcol)
512 514
513 *pcol_copy = *pcol; 515 *pcol_copy = *pcol;
514 516
515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 517 ios->pages = pcol_copy->pages;
516 518 ios->nr_pages = pcol_copy->nr_pages;
517 ios->bio = pcol_copy->bio;
518 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; 519 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
519 ios->length = pcol_copy->length; 520 ios->length = pcol_copy->length;
520 ios->done = writepages_done; 521 ios->done = writepages_done;
@@ -527,7 +528,7 @@ static int write_exec(struct page_collect *pcol)
527 } 528 }
528 529
529 atomic_inc(&pcol->sbi->s_curr_pending); 530 atomic_inc(&pcol->sbi->s_curr_pending);
530 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 531 EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
531 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), 532 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
532 pcol->length); 533 pcol->length);
533 /* pages ownership was passed to pcol_copy */ 534 /* pages ownership was passed to pcol_copy */
@@ -605,7 +606,7 @@ try_again:
605 goto try_again; 606 goto try_again;
606 } 607 }
607 608
608 if (!pcol->bio) { 609 if (!pcol->pages) {
609 ret = pcol_try_alloc(pcol); 610 ret = pcol_try_alloc(pcol);
610 if (unlikely(ret)) 611 if (unlikely(ret))
611 goto fail; 612 goto fail;
@@ -616,7 +617,7 @@ try_again:
616 617
617 ret = pcol_add_page(pcol, page, len); 618 ret = pcol_add_page(pcol, page, len);
618 if (unlikely(ret)) { 619 if (unlikely(ret)) {
619 EXOFS_DBGMSG("Failed pcol_add_page " 620 EXOFS_DBGMSG2("Failed pcol_add_page "
620 "nr_pages=%u total_length=0x%lx\n", 621 "nr_pages=%u total_length=0x%lx\n",
621 pcol->nr_pages, pcol->length); 622 pcol->nr_pages, pcol->length);
622 623
@@ -663,7 +664,7 @@ static int exofs_writepages(struct address_space *mapping,
663 if (expected_pages < 32L) 664 if (expected_pages < 32L)
664 expected_pages = 32L; 665 expected_pages = 32L;
665 666
666 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 667 EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
667 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 668 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
668 mapping->host->i_ino, wbc->range_start, wbc->range_end, 669 mapping->host->i_ino, wbc->range_start, wbc->range_end,
669 mapping->nrpages, start, end, expected_pages); 670 mapping->nrpages, start, end, expected_pages);
@@ -859,20 +860,33 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
859 return error; 860 return error;
860} 861}
861 862
863static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
864 EXOFS_APAGE_FS_DATA,
865 EXOFS_ATTR_INODE_FILE_LAYOUT,
866 0);
867static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF(
868 EXOFS_APAGE_FS_DATA,
869 EXOFS_ATTR_INODE_DIR_LAYOUT,
870 0);
871
862/* 872/*
863 * Read an inode from the OSD, and return it as is. We also return the size 873 * Read the Linux inode info from the OSD, and return it as is. In exofs the
864 * attribute in the 'obj_size' argument. 874 * inode info is in an application specific page/attribute of the osd-object.
865 */ 875 */
866static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 876static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
867 struct exofs_fcb *inode, uint64_t *obj_size) 877 struct exofs_fcb *inode)
868{ 878{
869 struct exofs_sb_info *sbi = sb->s_fs_info; 879 struct exofs_sb_info *sbi = sb->s_fs_info;
870 struct osd_attr attrs[2]; 880 struct osd_attr attrs[] = {
881 [0] = g_attr_inode_data,
882 [1] = g_attr_inode_file_layout,
883 [2] = g_attr_inode_dir_layout,
884 };
871 struct exofs_io_state *ios; 885 struct exofs_io_state *ios;
886 struct exofs_on_disk_inode_layout *layout;
872 int ret; 887 int ret;
873 888
874 *obj_size = ~0; 889 ret = exofs_get_io_state(&sbi->layout, &ios);
875 ret = exofs_get_io_state(sbi, &ios);
876 if (unlikely(ret)) { 890 if (unlikely(ret)) {
877 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 891 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
878 return ret; 892 return ret;
@@ -882,14 +896,25 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
882 exofs_make_credential(oi->i_cred, &ios->obj); 896 exofs_make_credential(oi->i_cred, &ios->obj);
883 ios->cred = oi->i_cred; 897 ios->cred = oi->i_cred;
884 898
885 attrs[0] = g_attr_inode_data; 899 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
886 attrs[1] = g_attr_logical_length; 900 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
901
887 ios->in_attr = attrs; 902 ios->in_attr = attrs;
888 ios->in_attr_len = ARRAY_SIZE(attrs); 903 ios->in_attr_len = ARRAY_SIZE(attrs);
889 904
890 ret = exofs_sbi_read(ios); 905 ret = exofs_sbi_read(ios);
891 if (ret) 906 if (unlikely(ret)) {
907 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
908 _LLU(ios->obj.id), ret);
909 memset(inode, 0, sizeof(*inode));
910 inode->i_mode = 0040000 | (0777 & ~022);
911 /* If object is lost on target we might as well enable it's
912 * delete.
913 */
914 if ((ret == -ENOENT) || (ret == -EINVAL))
915 ret = 0;
892 goto out; 916 goto out;
917 }
893 918
894 ret = extract_attr_from_ios(ios, &attrs[0]); 919 ret = extract_attr_from_ios(ios, &attrs[0]);
895 if (ret) { 920 if (ret) {
@@ -901,11 +926,33 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
901 926
902 ret = extract_attr_from_ios(ios, &attrs[1]); 927 ret = extract_attr_from_ios(ios, &attrs[1]);
903 if (ret) { 928 if (ret) {
904 EXOFS_ERR("%s: extract_attr of logical_length failed\n", 929 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
905 __func__); 930 goto out;
931 }
932 if (attrs[1].len) {
933 layout = attrs[1].val_ptr;
934 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
935 EXOFS_ERR("%s: unsupported files layout %d\n",
936 __func__, layout->gen_func);
937 ret = -ENOTSUPP;
938 goto out;
939 }
940 }
941
942 ret = extract_attr_from_ios(ios, &attrs[2]);
943 if (ret) {
944 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
906 goto out; 945 goto out;
907 } 946 }
908 *obj_size = get_unaligned_be64(attrs[1].val_ptr); 947 if (attrs[2].len) {
948 layout = attrs[2].val_ptr;
949 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
950 EXOFS_ERR("%s: unsupported meta-data layout %d\n",
951 __func__, layout->gen_func);
952 ret = -ENOTSUPP;
953 goto out;
954 }
955 }
909 956
910out: 957out:
911 exofs_put_io_state(ios); 958 exofs_put_io_state(ios);
@@ -925,7 +972,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
925 struct exofs_i_info *oi; 972 struct exofs_i_info *oi;
926 struct exofs_fcb fcb; 973 struct exofs_fcb fcb;
927 struct inode *inode; 974 struct inode *inode;
928 uint64_t obj_size;
929 int ret; 975 int ret;
930 976
931 inode = iget_locked(sb, ino); 977 inode = iget_locked(sb, ino);
@@ -937,7 +983,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
937 __oi_init(oi); 983 __oi_init(oi);
938 984
939 /* read the inode from the osd */ 985 /* read the inode from the osd */
940 ret = exofs_get_inode(sb, oi, &fcb, &obj_size); 986 ret = exofs_get_inode(sb, oi, &fcb);
941 if (ret) 987 if (ret)
942 goto bad_inode; 988 goto bad_inode;
943 989
@@ -958,13 +1004,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
958 inode->i_blkbits = EXOFS_BLKSHIFT; 1004 inode->i_blkbits = EXOFS_BLKSHIFT;
959 inode->i_generation = le32_to_cpu(fcb.i_generation); 1005 inode->i_generation = le32_to_cpu(fcb.i_generation);
960 1006
961 if ((inode->i_size != obj_size) &&
962 (!exofs_inode_is_fast_symlink(inode))) {
963 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
964 inode->i_size, _LLU(obj_size));
965 /* FIXME: call exofs_inode_recovery() */
966 }
967
968 oi->i_dir_start_lookup = 0; 1007 oi->i_dir_start_lookup = 0;
969 1008
970 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { 1009 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
@@ -1043,7 +1082,7 @@ static void create_done(struct exofs_io_state *ios, void *p)
1043 1082
1044 if (unlikely(ret)) { 1083 if (unlikely(ret)) {
1045 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1084 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1046 _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); 1085 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
1047 /*TODO: When FS is corrupted creation can fail, object already 1086 /*TODO: When FS is corrupted creation can fail, object already
1048 * exist. Get rid of this asynchronous creation, if exist 1087 * exist. Get rid of this asynchronous creation, if exist
1049 * increment the obj counter and try the next object. Until we 1088 * increment the obj counter and try the next object. Until we
@@ -1104,7 +1143,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1104 1143
1105 mark_inode_dirty(inode); 1144 mark_inode_dirty(inode);
1106 1145
1107 ret = exofs_get_io_state(sbi, &ios); 1146 ret = exofs_get_io_state(&sbi->layout, &ios);
1108 if (unlikely(ret)) { 1147 if (unlikely(ret)) {
1109 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); 1148 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
1110 return ERR_PTR(ret); 1149 return ERR_PTR(ret);
@@ -1170,8 +1209,10 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1170 int ret; 1209 int ret;
1171 1210
1172 args = kzalloc(sizeof(*args), GFP_KERNEL); 1211 args = kzalloc(sizeof(*args), GFP_KERNEL);
1173 if (!args) 1212 if (!args) {
1213 EXOFS_DBGMSG("Faild kzalloc of args\n");
1174 return -ENOMEM; 1214 return -ENOMEM;
1215 }
1175 1216
1176 fcb = &args->fcb; 1217 fcb = &args->fcb;
1177 1218
@@ -1200,7 +1241,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1200 } else 1241 } else
1201 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1242 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1202 1243
1203 ret = exofs_get_io_state(sbi, &ios); 1244 ret = exofs_get_io_state(&sbi->layout, &ios);
1204 if (unlikely(ret)) { 1245 if (unlikely(ret)) {
1205 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 1246 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1206 goto free_args; 1247 goto free_args;
@@ -1234,13 +1275,14 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1234free_args: 1275free_args:
1235 kfree(args); 1276 kfree(args);
1236out: 1277out:
1237 EXOFS_DBGMSG("ret=>%d\n", ret); 1278 EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n",
1279 inode->i_ino, do_sync, ret);
1238 return ret; 1280 return ret;
1239} 1281}
1240 1282
1241int exofs_write_inode(struct inode *inode, int wait) 1283int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1242{ 1284{
1243 return exofs_update_inode(inode, wait); 1285 return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1244} 1286}
1245 1287
1246/* 1288/*
@@ -1283,7 +1325,7 @@ void exofs_delete_inode(struct inode *inode)
1283 1325
1284 clear_inode(inode); 1326 clear_inode(inode);
1285 1327
1286 ret = exofs_get_io_state(sbi, &ios); 1328 ret = exofs_get_io_state(&sbi->layout, &ios);
1287 if (unlikely(ret)) { 1329 if (unlikely(ret)) {
1288 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1330 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1289 return; 1331 return;
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 5bad01fa1f9f..5293bc411d17 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -23,9 +23,13 @@
23 */ 23 */
24 24
25#include <scsi/scsi_device.h> 25#include <scsi/scsi_device.h>
26#include <asm/div64.h>
26 27
27#include "exofs.h" 28#include "exofs.h"
28 29
30#define EXOFS_DBGMSG2(M...) do {} while (0)
31/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
32
29void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 33void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
30{ 34{
31 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 35 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
@@ -64,21 +68,24 @@ out:
64 return ret; 68 return ret;
65} 69}
66 70
67int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) 71int exofs_get_io_state(struct exofs_layout *layout,
72 struct exofs_io_state **pios)
68{ 73{
69 struct exofs_io_state *ios; 74 struct exofs_io_state *ios;
70 75
71 /*TODO: Maybe use kmem_cach per sbi of size 76 /*TODO: Maybe use kmem_cach per sbi of size
72 * exofs_io_state_size(sbi->s_numdevs) 77 * exofs_io_state_size(layout->s_numdevs)
73 */ 78 */
74 ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); 79 ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
75 if (unlikely(!ios)) { 80 if (unlikely(!ios)) {
81 EXOFS_DBGMSG("Faild kzalloc bytes=%d\n",
82 exofs_io_state_size(layout->s_numdevs));
76 *pios = NULL; 83 *pios = NULL;
77 return -ENOMEM; 84 return -ENOMEM;
78 } 85 }
79 86
80 ios->sbi = sbi; 87 ios->layout = layout;
81 ios->obj.partition = sbi->s_pid; 88 ios->obj.partition = layout->s_pid;
82 *pios = ios; 89 *pios = ios;
83 return 0; 90 return 0;
84} 91}
@@ -101,6 +108,29 @@ void exofs_put_io_state(struct exofs_io_state *ios)
101 } 108 }
102} 109}
103 110
111unsigned exofs_layout_od_id(struct exofs_layout *layout,
112 osd_id obj_no, unsigned layout_index)
113{
114/* switch (layout->lay_func) {
115 case LAYOUT_MOVING_WINDOW:
116 {*/
117 unsigned dev_mod = obj_no;
118
119 return (layout_index + dev_mod * layout->mirrors_p1) %
120 layout->s_numdevs;
121/* }
122 case LAYOUT_FUNC_IMPLICT:
123 return layout->devs[layout_index];
124 }*/
125}
126
127static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
128 unsigned layout_index)
129{
130 return ios->layout->s_ods[
131 exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
132}
133
104static void _sync_done(struct exofs_io_state *ios, void *p) 134static void _sync_done(struct exofs_io_state *ios, void *p)
105{ 135{
106 struct completion *waiting = p; 136 struct completion *waiting = p;
@@ -168,6 +198,21 @@ static int exofs_io_execute(struct exofs_io_state *ios)
168 return ret; 198 return ret;
169} 199}
170 200
201static void _clear_bio(struct bio *bio)
202{
203 struct bio_vec *bv;
204 unsigned i;
205
206 __bio_for_each_segment(bv, bio, i, 0) {
207 unsigned this_count = bv->bv_len;
208
209 if (likely(PAGE_SIZE == this_count))
210 clear_highpage(bv->bv_page);
211 else
212 zero_user(bv->bv_page, bv->bv_offset, this_count);
213 }
214}
215
171int exofs_check_io(struct exofs_io_state *ios, u64 *resid) 216int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
172{ 217{
173 enum osd_err_priority acumulated_osd_err = 0; 218 enum osd_err_priority acumulated_osd_err = 0;
@@ -176,16 +221,25 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
176 221
177 for (i = 0; i < ios->numdevs; i++) { 222 for (i = 0; i < ios->numdevs; i++) {
178 struct osd_sense_info osi; 223 struct osd_sense_info osi;
179 int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); 224 struct osd_request *or = ios->per_dev[i].or;
225 int ret;
226
227 if (unlikely(!or))
228 continue;
180 229
230 ret = osd_req_decode_sense(or, &osi);
181 if (likely(!ret)) 231 if (likely(!ret))
182 continue; 232 continue;
183 233
184 if (unlikely(ret == -EFAULT)) { 234 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
185 EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); 235 /* start read offset passed endof file */
186 /*FIXME: All the pages in this device range should: 236 _clear_bio(ios->per_dev[i].bio);
187 * clear_highpage(page); 237 EXOFS_DBGMSG("start read offset passed end of file "
188 */ 238 "offset=0x%llx, length=0x%llx\n",
239 _LLU(ios->per_dev[i].offset),
240 _LLU(ios->per_dev[i].length));
241
242 continue; /* we recovered */
189 } 243 }
190 244
191 if (osi.osd_err_pri >= acumulated_osd_err) { 245 if (osi.osd_err_pri >= acumulated_osd_err) {
@@ -205,14 +259,259 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
205 return acumulated_lin_err; 259 return acumulated_lin_err;
206} 260}
207 261
262/*
263 * L - logical offset into the file
264 *
265 * U - The number of bytes in a stripe within a group
266 *
267 * U = stripe_unit * group_width
268 *
269 * T - The number of bytes striped within a group of component objects
270 * (before advancing to the next group)
271 *
272 * T = stripe_unit * group_width * group_depth
273 *
274 * S - The number of bytes striped across all component objects
275 * before the pattern repeats
276 *
277 * S = stripe_unit * group_width * group_depth * group_count
278 *
279 * M - The "major" (i.e., across all components) stripe number
280 *
281 * M = L / S
282 *
283 * G - Counts the groups from the beginning of the major stripe
284 *
285 * G = (L - (M * S)) / T [or (L % S) / T]
286 *
287 * H - The byte offset within the group
288 *
289 * H = (L - (M * S)) % T [or (L % S) % T]
290 *
291 * N - The "minor" (i.e., across the group) stripe number
292 *
293 * N = H / U
294 *
295 * C - The component index coresponding to L
296 *
297 * C = (H - (N * U)) / stripe_unit + G * group_width
298 * [or (L % U) / stripe_unit + G * group_width]
299 *
300 * O - The component offset coresponding to L
301 *
302 * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
303 */
304struct _striping_info {
305 u64 obj_offset;
306 u64 group_length;
307 u64 total_group_length;
308 u64 Major;
309 unsigned dev;
310 unsigned unit_off;
311};
312
313static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
314 struct _striping_info *si)
315{
316 u32 stripe_unit = ios->layout->stripe_unit;
317 u32 group_width = ios->layout->group_width;
318 u64 group_depth = ios->layout->group_depth;
319
320 u32 U = stripe_unit * group_width;
321 u64 T = U * group_depth;
322 u64 S = T * ios->layout->group_count;
323 u64 M = div64_u64(file_offset, S);
324
325 /*
326 G = (L - (M * S)) / T
327 H = (L - (M * S)) % T
328 */
329 u64 LmodS = file_offset - M * S;
330 u32 G = div64_u64(LmodS, T);
331 u64 H = LmodS - G * T;
332
333 u32 N = div_u64(H, U);
334
335 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
336 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
337 si->dev *= ios->layout->mirrors_p1;
338
339 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
340
341 si->obj_offset = si->unit_off + (N * stripe_unit) +
342 (M * group_depth * stripe_unit);
343
344 si->group_length = T - H;
345 si->total_group_length = T;
346 si->Major = M;
347}
348
349static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
350 unsigned pgbase, struct exofs_per_dev_state *per_dev,
351 int cur_len)
352{
353 unsigned pg = *cur_pg;
354 struct request_queue *q =
355 osd_request_queue(exofs_ios_od(ios, per_dev->dev));
356
357 per_dev->length += cur_len;
358
359 if (per_dev->bio == NULL) {
360 unsigned pages_in_stripe = ios->layout->group_width *
361 (ios->layout->stripe_unit / PAGE_SIZE);
362 unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
363 ios->layout->group_width;
364
365 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
366 if (unlikely(!per_dev->bio)) {
367 EXOFS_DBGMSG("Faild to allocate BIO size=%u\n",
368 bio_size);
369 return -ENOMEM;
370 }
371 }
372
373 while (cur_len > 0) {
374 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
375 unsigned added_len;
376
377 BUG_ON(ios->nr_pages <= pg);
378 cur_len -= pglen;
379
380 added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
381 pglen, pgbase);
382 if (unlikely(pglen != added_len))
383 return -ENOMEM;
384 pgbase = 0;
385 ++pg;
386 }
387 BUG_ON(cur_len);
388
389 *cur_pg = pg;
390 return 0;
391}
392
393static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
394 struct _striping_info *si, unsigned first_comp)
395{
396 unsigned stripe_unit = ios->layout->stripe_unit;
397 unsigned mirrors_p1 = ios->layout->mirrors_p1;
398 unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
399 unsigned dev = si->dev;
400 unsigned first_dev = dev - (dev % devs_in_group);
401 unsigned comp = first_comp + (dev - first_dev);
402 unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
403 unsigned cur_pg = ios->pages_consumed;
404 int ret = 0;
405
406 while (length) {
407 struct exofs_per_dev_state *per_dev = &ios->per_dev[comp];
408 unsigned cur_len, page_off = 0;
409
410 if (!per_dev->length) {
411 per_dev->dev = dev;
412 if (dev < si->dev) {
413 per_dev->offset = si->obj_offset + stripe_unit -
414 si->unit_off;
415 cur_len = stripe_unit;
416 } else if (dev == si->dev) {
417 per_dev->offset = si->obj_offset;
418 cur_len = stripe_unit - si->unit_off;
419 page_off = si->unit_off & ~PAGE_MASK;
420 BUG_ON(page_off && (page_off != ios->pgbase));
421 } else { /* dev > si->dev */
422 per_dev->offset = si->obj_offset - si->unit_off;
423 cur_len = stripe_unit;
424 }
425
426 if (max_comp < comp)
427 max_comp = comp;
428
429 dev += mirrors_p1;
430 dev = (dev % devs_in_group) + first_dev;
431 } else {
432 cur_len = stripe_unit;
433 }
434 if (cur_len >= length)
435 cur_len = length;
436
437 ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
438 cur_len);
439 if (unlikely(ret))
440 goto out;
441
442 comp += mirrors_p1;
443 comp = (comp % devs_in_group) + first_comp;
444
445 length -= cur_len;
446 }
447out:
448 ios->numdevs = max_comp + mirrors_p1;
449 ios->pages_consumed = cur_pg;
450 return ret;
451}
452
453static int _prepare_for_striping(struct exofs_io_state *ios)
454{
455 u64 length = ios->length;
456 struct _striping_info si;
457 unsigned devs_in_group = ios->layout->group_width *
458 ios->layout->mirrors_p1;
459 unsigned first_comp = 0;
460 int ret = 0;
461
462 _calc_stripe_info(ios, ios->offset, &si);
463
464 if (!ios->pages) {
465 if (ios->kern_buff) {
466 struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
467
468 per_dev->offset = si.obj_offset;
469 per_dev->dev = si.dev;
470
471 /* no cross device without page array */
472 BUG_ON((ios->layout->group_width > 1) &&
473 (si.unit_off + ios->length >
474 ios->layout->stripe_unit));
475 }
476 ios->numdevs = ios->layout->mirrors_p1;
477 return 0;
478 }
479
480 while (length) {
481 if (length < si.group_length)
482 si.group_length = length;
483
484 ret = _prepare_one_group(ios, si.group_length, &si, first_comp);
485 if (unlikely(ret))
486 goto out;
487
488 length -= si.group_length;
489
490 si.group_length = si.total_group_length;
491 si.unit_off = 0;
492 ++si.Major;
493 si.obj_offset = si.Major * ios->layout->stripe_unit *
494 ios->layout->group_depth;
495
496 si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
497 si.dev %= ios->layout->s_numdevs;
498
499 first_comp += devs_in_group;
500 first_comp %= ios->layout->s_numdevs;
501 }
502
503out:
504 return ret;
505}
506
208int exofs_sbi_create(struct exofs_io_state *ios) 507int exofs_sbi_create(struct exofs_io_state *ios)
209{ 508{
210 int i, ret; 509 int i, ret;
211 510
212 for (i = 0; i < ios->sbi->s_numdevs; i++) { 511 for (i = 0; i < ios->layout->s_numdevs; i++) {
213 struct osd_request *or; 512 struct osd_request *or;
214 513
215 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); 514 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
216 if (unlikely(!or)) { 515 if (unlikely(!or)) {
217 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 516 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
218 ret = -ENOMEM; 517 ret = -ENOMEM;
@@ -233,10 +532,10 @@ int exofs_sbi_remove(struct exofs_io_state *ios)
233{ 532{
234 int i, ret; 533 int i, ret;
235 534
236 for (i = 0; i < ios->sbi->s_numdevs; i++) { 535 for (i = 0; i < ios->layout->s_numdevs; i++) {
237 struct osd_request *or; 536 struct osd_request *or;
238 537
239 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); 538 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
240 if (unlikely(!or)) { 539 if (unlikely(!or)) {
241 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 540 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
242 ret = -ENOMEM; 541 ret = -ENOMEM;
@@ -253,51 +552,74 @@ out:
253 return ret; 552 return ret;
254} 553}
255 554
256int exofs_sbi_write(struct exofs_io_state *ios) 555static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
257{ 556{
258 int i, ret; 557 struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
558 unsigned dev = ios->per_dev[cur_comp].dev;
559 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
560 int ret = 0;
259 561
260 for (i = 0; i < ios->sbi->s_numdevs; i++) { 562 if (ios->pages && !master_dev->length)
563 return 0; /* Just an empty slot */
564
565 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
566 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
261 struct osd_request *or; 567 struct osd_request *or;
262 568
263 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); 569 or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
264 if (unlikely(!or)) { 570 if (unlikely(!or)) {
265 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 571 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
266 ret = -ENOMEM; 572 ret = -ENOMEM;
267 goto out; 573 goto out;
268 } 574 }
269 ios->per_dev[i].or = or; 575 per_dev->or = or;
270 ios->numdevs++; 576 per_dev->offset = master_dev->offset;
271 577
272 if (ios->bio) { 578 if (ios->pages) {
273 struct bio *bio; 579 struct bio *bio;
274 580
275 if (i != 0) { 581 if (per_dev != master_dev) {
276 bio = bio_kmalloc(GFP_KERNEL, 582 bio = bio_kmalloc(GFP_KERNEL,
277 ios->bio->bi_max_vecs); 583 master_dev->bio->bi_max_vecs);
278 if (unlikely(!bio)) { 584 if (unlikely(!bio)) {
585 EXOFS_DBGMSG(
586 "Faild to allocate BIO size=%u\n",
587 master_dev->bio->bi_max_vecs);
279 ret = -ENOMEM; 588 ret = -ENOMEM;
280 goto out; 589 goto out;
281 } 590 }
282 591
283 __bio_clone(bio, ios->bio); 592 __bio_clone(bio, master_dev->bio);
284 bio->bi_bdev = NULL; 593 bio->bi_bdev = NULL;
285 bio->bi_next = NULL; 594 bio->bi_next = NULL;
286 ios->per_dev[i].bio = bio; 595 per_dev->length = master_dev->length;
596 per_dev->bio = bio;
597 per_dev->dev = dev;
287 } else { 598 } else {
288 bio = ios->bio; 599 bio = master_dev->bio;
600 /* FIXME: bio_set_dir() */
601 bio->bi_rw |= (1 << BIO_RW);
289 } 602 }
290 603
291 osd_req_write(or, &ios->obj, ios->offset, bio, 604 osd_req_write(or, &ios->obj, per_dev->offset, bio,
292 ios->length); 605 per_dev->length);
293/* EXOFS_DBGMSG("write sync=%d\n", sync);*/ 606 EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
607 "length=0x%llx dev=%d\n",
608 _LLU(ios->obj.id), _LLU(per_dev->offset),
609 _LLU(per_dev->length), dev);
294 } else if (ios->kern_buff) { 610 } else if (ios->kern_buff) {
295 osd_req_write_kern(or, &ios->obj, ios->offset, 611 ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
296 ios->kern_buff, ios->length); 612 ios->kern_buff, ios->length);
297/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ 613 if (unlikely(ret))
614 goto out;
615 EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
616 "length=0x%llx dev=%d\n",
617 _LLU(ios->obj.id), _LLU(per_dev->offset),
618 _LLU(ios->length), dev);
298 } else { 619 } else {
299 osd_req_set_attributes(or, &ios->obj); 620 osd_req_set_attributes(or, &ios->obj);
300/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ 621 EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
622 _LLU(ios->obj.id), ios->out_attr_len, dev);
301 } 623 }
302 624
303 if (ios->out_attr) 625 if (ios->out_attr)
@@ -308,54 +630,93 @@ int exofs_sbi_write(struct exofs_io_state *ios)
308 osd_req_add_get_attr_list(or, ios->in_attr, 630 osd_req_add_get_attr_list(or, ios->in_attr,
309 ios->in_attr_len); 631 ios->in_attr_len);
310 } 632 }
311 ret = exofs_io_execute(ios);
312 633
313out: 634out:
314 return ret; 635 return ret;
315} 636}
316 637
317int exofs_sbi_read(struct exofs_io_state *ios) 638int exofs_sbi_write(struct exofs_io_state *ios)
318{ 639{
319 int i, ret; 640 int i;
641 int ret;
320 642
321 for (i = 0; i < 1; i++) { 643 ret = _prepare_for_striping(ios);
322 struct osd_request *or; 644 if (unlikely(ret))
323 unsigned first_dev = (unsigned)ios->obj.id; 645 return ret;
324 646
325 first_dev %= ios->sbi->s_numdevs; 647 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
326 or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL); 648 ret = _sbi_write_mirror(ios, i);
327 if (unlikely(!or)) { 649 if (unlikely(ret))
328 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 650 return ret;
329 ret = -ENOMEM; 651 }
330 goto out;
331 }
332 ios->per_dev[i].or = or;
333 ios->numdevs++;
334 652
335 if (ios->bio) { 653 ret = exofs_io_execute(ios);
336 osd_req_read(or, &ios->obj, ios->offset, ios->bio, 654 return ret;
337 ios->length); 655}
338/* EXOFS_DBGMSG("read sync=%d\n", sync);*/
339 } else if (ios->kern_buff) {
340 osd_req_read_kern(or, &ios->obj, ios->offset,
341 ios->kern_buff, ios->length);
342/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
343 } else {
344 osd_req_get_attributes(or, &ios->obj);
345/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
346 }
347 656
348 if (ios->out_attr) 657static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
349 osd_req_add_set_attr_list(or, ios->out_attr, 658{
350 ios->out_attr_len); 659 struct osd_request *or;
660 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
661 unsigned first_dev = (unsigned)ios->obj.id;
351 662
352 if (ios->in_attr) 663 if (ios->pages && !per_dev->length)
353 osd_req_add_get_attr_list(or, ios->in_attr, 664 return 0; /* Just an empty slot */
354 ios->in_attr_len); 665
666 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
667 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
668 if (unlikely(!or)) {
669 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
670 return -ENOMEM;
355 } 671 }
356 ret = exofs_io_execute(ios); 672 per_dev->or = or;
673
674 if (ios->pages) {
675 osd_req_read(or, &ios->obj, per_dev->offset,
676 per_dev->bio, per_dev->length);
677 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
678 " dev=%d\n", _LLU(ios->obj.id),
679 _LLU(per_dev->offset), _LLU(per_dev->length),
680 first_dev);
681 } else if (ios->kern_buff) {
682 int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
683 ios->kern_buff, ios->length);
684 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
685 "length=0x%llx dev=%d ret=>%d\n",
686 _LLU(ios->obj.id), _LLU(per_dev->offset),
687 _LLU(ios->length), first_dev, ret);
688 if (unlikely(ret))
689 return ret;
690 } else {
691 osd_req_get_attributes(or, &ios->obj);
692 EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
693 _LLU(ios->obj.id), ios->in_attr_len, first_dev);
694 }
695 if (ios->out_attr)
696 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
357 697
358out: 698 if (ios->in_attr)
699 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
700
701 return 0;
702}
703
704int exofs_sbi_read(struct exofs_io_state *ios)
705{
706 int i;
707 int ret;
708
709 ret = _prepare_for_striping(ios);
710 if (unlikely(ret))
711 return ret;
712
713 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
714 ret = _sbi_read_mirror(ios, i);
715 if (unlikely(ret))
716 return ret;
717 }
718
719 ret = exofs_io_execute(ios);
359 return ret; 720 return ret;
360} 721}
361 722
@@ -380,42 +741,82 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
380 return -EIO; 741 return -EIO;
381} 742}
382 743
744static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
745 struct osd_attr *attr)
746{
747 int last_comp = cur_comp + ios->layout->mirrors_p1;
748
749 for (; cur_comp < last_comp; ++cur_comp) {
750 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
751 struct osd_request *or;
752
753 or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
754 if (unlikely(!or)) {
755 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
756 return -ENOMEM;
757 }
758 per_dev->or = or;
759
760 osd_req_set_attributes(or, &ios->obj);
761 osd_req_add_set_attr_list(or, attr, 1);
762 }
763
764 return 0;
765}
766
383int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) 767int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
384{ 768{
385 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; 769 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
386 struct exofs_io_state *ios; 770 struct exofs_io_state *ios;
387 struct osd_attr attr; 771 struct exofs_trunc_attr {
388 __be64 newsize; 772 struct osd_attr attr;
773 __be64 newsize;
774 } *size_attrs;
775 struct _striping_info si;
389 int i, ret; 776 int i, ret;
390 777
391 if (exofs_get_io_state(sbi, &ios)) 778 ret = exofs_get_io_state(&sbi->layout, &ios);
392 return -ENOMEM; 779 if (unlikely(ret))
780 return ret;
781
782 size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
783 GFP_KERNEL);
784 if (unlikely(!size_attrs)) {
785 ret = -ENOMEM;
786 goto out;
787 }
393 788
394 ios->obj.id = exofs_oi_objno(oi); 789 ios->obj.id = exofs_oi_objno(oi);
395 ios->cred = oi->i_cred; 790 ios->cred = oi->i_cred;
396 791
397 newsize = cpu_to_be64(size); 792 ios->numdevs = ios->layout->s_numdevs;
398 attr = g_attr_logical_length; 793 _calc_stripe_info(ios, size, &si);
399 attr.val_ptr = &newsize;
400 794
401 for (i = 0; i < sbi->s_numdevs; i++) { 795 for (i = 0; i < ios->layout->group_width; ++i) {
402 struct osd_request *or; 796 struct exofs_trunc_attr *size_attr = &size_attrs[i];
797 u64 obj_size;
403 798
404 or = osd_start_request(sbi->s_ods[i], GFP_KERNEL); 799 if (i < si.dev)
405 if (unlikely(!or)) { 800 obj_size = si.obj_offset +
406 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 801 ios->layout->stripe_unit - si.unit_off;
407 ret = -ENOMEM; 802 else if (i == si.dev)
408 goto out; 803 obj_size = si.obj_offset;
409 } 804 else /* i > si.dev */
410 ios->per_dev[i].or = or; 805 obj_size = si.obj_offset - si.unit_off;
411 ios->numdevs++;
412 806
413 osd_req_set_attributes(or, &ios->obj); 807 size_attr->newsize = cpu_to_be64(obj_size);
414 osd_req_add_set_attr_list(or, &attr, 1); 808 size_attr->attr = g_attr_logical_length;
809 size_attr->attr.val_ptr = &size_attr->newsize;
810
811 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
812 &size_attr->attr);
813 if (unlikely(ret))
814 goto out;
415 } 815 }
416 ret = exofs_io_execute(ios); 816 ret = exofs_io_execute(ios);
417 817
418out: 818out:
819 kfree(size_attrs);
419 exofs_put_io_state(ios); 820 exofs_put_io_state(ios);
420 return ret; 821 return ret;
421} 822}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index a1d1e77b12eb..6cf5e4e84d61 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -210,7 +210,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
210 sbi = sb->s_fs_info; 210 sbi = sb->s_fs_info;
211 fscb = &sbi->s_fscb; 211 fscb = &sbi->s_fscb;
212 212
213 ret = exofs_get_io_state(sbi, &ios); 213 ret = exofs_get_io_state(&sbi->layout, &ios);
214 if (ret) 214 if (ret)
215 goto out; 215 goto out;
216 216
@@ -264,12 +264,12 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
264 264
265void exofs_free_sbi(struct exofs_sb_info *sbi) 265void exofs_free_sbi(struct exofs_sb_info *sbi)
266{ 266{
267 while (sbi->s_numdevs) { 267 while (sbi->layout.s_numdevs) {
268 int i = --sbi->s_numdevs; 268 int i = --sbi->layout.s_numdevs;
269 struct osd_dev *od = sbi->s_ods[i]; 269 struct osd_dev *od = sbi->layout.s_ods[i];
270 270
271 if (od) { 271 if (od) {
272 sbi->s_ods[i] = NULL; 272 sbi->layout.s_ods[i] = NULL;
273 osduld_put_device(od); 273 osduld_put_device(od);
274 } 274 }
275 } 275 }
@@ -298,7 +298,8 @@ static void exofs_put_super(struct super_block *sb)
298 msecs_to_jiffies(100)); 298 msecs_to_jiffies(100));
299 } 299 }
300 300
301 _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid); 301 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
302 sbi->layout.s_pid);
302 303
303 exofs_free_sbi(sbi); 304 exofs_free_sbi(sbi);
304 sb->s_fs_info = NULL; 305 sb->s_fs_info = NULL;
@@ -307,6 +308,8 @@ static void exofs_put_super(struct super_block *sb)
307static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, 308static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
308 struct exofs_device_table *dt) 309 struct exofs_device_table *dt)
309{ 310{
311 u64 stripe_length;
312
310 sbi->data_map.odm_num_comps = 313 sbi->data_map.odm_num_comps =
311 le32_to_cpu(dt->dt_data_map.cb_num_comps); 314 le32_to_cpu(dt->dt_data_map.cb_num_comps);
312 sbi->data_map.odm_stripe_unit = 315 sbi->data_map.odm_stripe_unit =
@@ -320,14 +323,63 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
320 sbi->data_map.odm_raid_algorithm = 323 sbi->data_map.odm_raid_algorithm =
321 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); 324 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
322 325
323/* FIXME: Hard coded mirror only for now. if not so do not mount */ 326/* FIXME: Only raid0 for now. if not so, do not mount */
324 if ((sbi->data_map.odm_num_comps != numdevs) || 327 if (sbi->data_map.odm_num_comps != numdevs) {
325 (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) || 328 EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
326 (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) || 329 sbi->data_map.odm_num_comps, numdevs);
327 (sbi->data_map.odm_mirror_cnt != (numdevs - 1)))
328 return -EINVAL; 330 return -EINVAL;
329 else 331 }
330 return 0; 332 if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) {
333 EXOFS_ERR("Only RAID_0 for now\n");
334 return -EINVAL;
335 }
336 if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) {
337 EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n",
338 numdevs, sbi->data_map.odm_mirror_cnt);
339 return -EINVAL;
340 }
341
342 if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
343 EXOFS_ERR("Stripe Unit(0x%llx)"
344 " must be Multples of PAGE_SIZE(0x%lx)\n",
345 _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE);
346 return -EINVAL;
347 }
348
349 sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
350 sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
351
352 if (sbi->data_map.odm_group_width) {
353 sbi->layout.group_width = sbi->data_map.odm_group_width;
354 sbi->layout.group_depth = sbi->data_map.odm_group_depth;
355 if (!sbi->layout.group_depth) {
356 EXOFS_ERR("group_depth == 0 && group_width != 0\n");
357 return -EINVAL;
358 }
359 sbi->layout.group_count = sbi->data_map.odm_num_comps /
360 sbi->layout.mirrors_p1 /
361 sbi->data_map.odm_group_width;
362 } else {
363 if (sbi->data_map.odm_group_depth) {
364 printk(KERN_NOTICE "Warning: group_depth ignored "
365 "group_width == 0 && group_depth == %d\n",
366 sbi->data_map.odm_group_depth);
367 sbi->data_map.odm_group_depth = 0;
368 }
369 sbi->layout.group_width = sbi->data_map.odm_num_comps /
370 sbi->layout.mirrors_p1;
371 sbi->layout.group_depth = -1;
372 sbi->layout.group_count = 1;
373 }
374
375 stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
376 if (stripe_length >= (1ULL << 32)) {
377 EXOFS_ERR("Total Stripe length(0x%llx)"
378 " >= 32bit is not supported\n", _LLU(stripe_length));
379 return -EINVAL;
380 }
381
382 return 0;
331} 383}
332 384
333/* @odi is valid only as long as @fscb_dev is valid */ 385/* @odi is valid only as long as @fscb_dev is valid */
@@ -361,7 +413,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
361{ 413{
362 struct exofs_sb_info *sbi = *psbi; 414 struct exofs_sb_info *sbi = *psbi;
363 struct osd_dev *fscb_od; 415 struct osd_dev *fscb_od;
364 struct osd_obj_id obj = {.partition = sbi->s_pid, 416 struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
365 .id = EXOFS_DEVTABLE_ID}; 417 .id = EXOFS_DEVTABLE_ID};
366 struct exofs_device_table *dt; 418 struct exofs_device_table *dt;
367 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 419 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
@@ -376,9 +428,9 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
376 return -ENOMEM; 428 return -ENOMEM;
377 } 429 }
378 430
379 fscb_od = sbi->s_ods[0]; 431 fscb_od = sbi->layout.s_ods[0];
380 sbi->s_ods[0] = NULL; 432 sbi->layout.s_ods[0] = NULL;
381 sbi->s_numdevs = 0; 433 sbi->layout.s_numdevs = 0;
382 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); 434 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
383 if (unlikely(ret)) { 435 if (unlikely(ret)) {
384 EXOFS_ERR("ERROR: reading device table\n"); 436 EXOFS_ERR("ERROR: reading device table\n");
@@ -397,14 +449,15 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
397 goto out; 449 goto out;
398 450
399 if (likely(numdevs > 1)) { 451 if (likely(numdevs > 1)) {
400 unsigned size = numdevs * sizeof(sbi->s_ods[0]); 452 unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
401 453
402 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); 454 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
403 if (unlikely(!sbi)) { 455 if (unlikely(!sbi)) {
404 ret = -ENOMEM; 456 ret = -ENOMEM;
405 goto out; 457 goto out;
406 } 458 }
407 memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0])); 459 memset(&sbi->layout.s_ods[1], 0,
460 size - sizeof(sbi->layout.s_ods[0]));
408 *psbi = sbi; 461 *psbi = sbi;
409 } 462 }
410 463
@@ -427,8 +480,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
427 * line. We always keep them in device-table order. 480 * line. We always keep them in device-table order.
428 */ 481 */
429 if (fscb_od && osduld_device_same(fscb_od, &odi)) { 482 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
430 sbi->s_ods[i] = fscb_od; 483 sbi->layout.s_ods[i] = fscb_od;
431 ++sbi->s_numdevs; 484 ++sbi->layout.s_numdevs;
432 fscb_od = NULL; 485 fscb_od = NULL;
433 continue; 486 continue;
434 } 487 }
@@ -441,8 +494,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
441 goto out; 494 goto out;
442 } 495 }
443 496
444 sbi->s_ods[i] = od; 497 sbi->layout.s_ods[i] = od;
445 ++sbi->s_numdevs; 498 ++sbi->layout.s_numdevs;
446 499
447 /* Read the fscb of the other devices to make sure the FS 500 /* Read the fscb of the other devices to make sure the FS
448 * partition is there. 501 * partition is there.
@@ -499,9 +552,15 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
499 goto free_sbi; 552 goto free_sbi;
500 } 553 }
501 554
502 sbi->s_ods[0] = od; 555 /* Default layout in case we do not have a device-table */
503 sbi->s_numdevs = 1; 556 sbi->layout.stripe_unit = PAGE_SIZE;
504 sbi->s_pid = opts->pid; 557 sbi->layout.mirrors_p1 = 1;
558 sbi->layout.group_width = 1;
559 sbi->layout.group_depth = -1;
560 sbi->layout.group_count = 1;
561 sbi->layout.s_ods[0] = od;
562 sbi->layout.s_numdevs = 1;
563 sbi->layout.s_pid = opts->pid;
505 sbi->s_timeout = opts->timeout; 564 sbi->s_timeout = opts->timeout;
506 565
507 /* fill in some other data by hand */ 566 /* fill in some other data by hand */
@@ -514,7 +573,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
514 sb->s_bdev = NULL; 573 sb->s_bdev = NULL;
515 sb->s_dev = 0; 574 sb->s_dev = 0;
516 575
517 obj.partition = sbi->s_pid; 576 obj.partition = sbi->layout.s_pid;
518 obj.id = EXOFS_SUPER_ID; 577 obj.id = EXOFS_SUPER_ID;
519 exofs_make_credential(sbi->s_cred, &obj); 578 exofs_make_credential(sbi->s_cred, &obj);
520 579
@@ -578,13 +637,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
578 goto free_sbi; 637 goto free_sbi;
579 } 638 }
580 639
581 _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0], 640 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
582 sbi->s_pid); 641 sbi->layout.s_pid);
583 return 0; 642 return 0;
584 643
585free_sbi: 644free_sbi:
586 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 645 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
587 opts->dev_name, sbi->s_pid, ret); 646 opts->dev_name, sbi->layout.s_pid, ret);
588 exofs_free_sbi(sbi); 647 exofs_free_sbi(sbi);
589 return ret; 648 return ret;
590} 649}
@@ -627,7 +686,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
627 uint8_t cred_a[OSD_CAP_LEN]; 686 uint8_t cred_a[OSD_CAP_LEN];
628 int ret; 687 int ret;
629 688
630 ret = exofs_get_io_state(sbi, &ios); 689 ret = exofs_get_io_state(&sbi->layout, &ios);
631 if (ret) { 690 if (ret) {
632 EXOFS_DBGMSG("exofs_get_io_state failed.\n"); 691 EXOFS_DBGMSG("exofs_get_io_state failed.\n");
633 return ret; 692 return ret;
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 7f8d2e5a7ea6..1d081f0cfec2 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -570,7 +570,7 @@ do_more:
570error_return: 570error_return:
571 brelse(bitmap_bh); 571 brelse(bitmap_bh);
572 release_blocks(sb, freed); 572 release_blocks(sb, freed);
573 vfs_dq_free_block(inode, freed); 573 dquot_free_block(inode, freed);
574} 574}
575 575
576/** 576/**
@@ -1236,6 +1236,7 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
1236 unsigned short windowsz = 0; 1236 unsigned short windowsz = 0;
1237 unsigned long ngroups; 1237 unsigned long ngroups;
1238 unsigned long num = *count; 1238 unsigned long num = *count;
1239 int ret;
1239 1240
1240 *errp = -ENOSPC; 1241 *errp = -ENOSPC;
1241 sb = inode->i_sb; 1242 sb = inode->i_sb;
@@ -1247,8 +1248,9 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
1247 /* 1248 /*
1248 * Check quota for allocation of this block. 1249 * Check quota for allocation of this block.
1249 */ 1250 */
1250 if (vfs_dq_alloc_block(inode, num)) { 1251 ret = dquot_alloc_block(inode, num);
1251 *errp = -EDQUOT; 1252 if (ret) {
1253 *errp = ret;
1252 return 0; 1254 return 0;
1253 } 1255 }
1254 1256
@@ -1409,7 +1411,7 @@ allocated:
1409 1411
1410 *errp = 0; 1412 *errp = 0;
1411 brelse(bitmap_bh); 1413 brelse(bitmap_bh);
1412 vfs_dq_free_block(inode, *count-num); 1414 dquot_free_block(inode, *count-num);
1413 *count = num; 1415 *count = num;
1414 return ret_block; 1416 return ret_block;
1415 1417
@@ -1420,7 +1422,7 @@ out:
1420 * Undo the block allocation 1422 * Undo the block allocation
1421 */ 1423 */
1422 if (!performed_allocation) 1424 if (!performed_allocation)
1423 vfs_dq_free_block(inode, *count); 1425 dquot_free_block(inode, *count);
1424 brelse(bitmap_bh); 1426 brelse(bitmap_bh);
1425 return 0; 1427 return 0;
1426} 1428}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 061914add3cf..0b038e47ad2f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -118,7 +118,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
118 118
119/* inode.c */ 119/* inode.c */
120extern struct inode *ext2_iget (struct super_block *, unsigned long); 120extern struct inode *ext2_iget (struct super_block *, unsigned long);
121extern int ext2_write_inode (struct inode *, int); 121extern int ext2_write_inode (struct inode *, struct writeback_control *);
122extern void ext2_delete_inode (struct inode *); 122extern void ext2_delete_inode (struct inode *);
123extern int ext2_sync_inode (struct inode *); 123extern int ext2_sync_inode (struct inode *);
124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); 124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 586e3589d4c2..5d198d0697fb 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -20,6 +20,7 @@
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/quotaops.h>
23#include "ext2.h" 24#include "ext2.h"
24#include "xattr.h" 25#include "xattr.h"
25#include "acl.h" 26#include "acl.h"
@@ -70,7 +71,7 @@ const struct file_operations ext2_file_operations = {
70 .compat_ioctl = ext2_compat_ioctl, 71 .compat_ioctl = ext2_compat_ioctl,
71#endif 72#endif
72 .mmap = generic_file_mmap, 73 .mmap = generic_file_mmap,
73 .open = generic_file_open, 74 .open = dquot_file_open,
74 .release = ext2_release_file, 75 .release = ext2_release_file,
75 .fsync = ext2_fsync, 76 .fsync = ext2_fsync,
76 .splice_read = generic_file_splice_read, 77 .splice_read = generic_file_splice_read,
@@ -87,7 +88,7 @@ const struct file_operations ext2_xip_file_operations = {
87 .compat_ioctl = ext2_compat_ioctl, 88 .compat_ioctl = ext2_compat_ioctl,
88#endif 89#endif
89 .mmap = xip_file_mmap, 90 .mmap = xip_file_mmap,
90 .open = generic_file_open, 91 .open = dquot_file_open,
91 .release = ext2_release_file, 92 .release = ext2_release_file,
92 .fsync = ext2_fsync, 93 .fsync = ext2_fsync,
93}; 94};
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 15387c9c17d8..ad7d572ee8dc 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -121,8 +121,8 @@ void ext2_free_inode (struct inode * inode)
121 if (!is_bad_inode(inode)) { 121 if (!is_bad_inode(inode)) {
122 /* Quota is already initialized in iput() */ 122 /* Quota is already initialized in iput() */
123 ext2_xattr_delete_inode(inode); 123 ext2_xattr_delete_inode(inode);
124 vfs_dq_free_inode(inode); 124 dquot_free_inode(inode);
125 vfs_dq_drop(inode); 125 dquot_drop(inode);
126 } 126 }
127 127
128 es = EXT2_SB(sb)->s_es; 128 es = EXT2_SB(sb)->s_es;
@@ -586,10 +586,10 @@ got:
586 goto fail_drop; 586 goto fail_drop;
587 } 587 }
588 588
589 if (vfs_dq_alloc_inode(inode)) { 589 dquot_initialize(inode);
590 err = -EDQUOT; 590 err = dquot_alloc_inode(inode);
591 if (err)
591 goto fail_drop; 592 goto fail_drop;
592 }
593 593
594 err = ext2_init_acl(inode, dir); 594 err = ext2_init_acl(inode, dir);
595 if (err) 595 if (err)
@@ -605,10 +605,10 @@ got:
605 return inode; 605 return inode;
606 606
607fail_free_drop: 607fail_free_drop:
608 vfs_dq_free_inode(inode); 608 dquot_free_inode(inode);
609 609
610fail_drop: 610fail_drop:
611 vfs_dq_drop(inode); 611 dquot_drop(inode);
612 inode->i_flags |= S_NOQUOTA; 612 inode->i_flags |= S_NOQUOTA;
613 inode->i_nlink = 0; 613 inode->i_nlink = 0;
614 unlock_new_inode(inode); 614 unlock_new_inode(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 71b032c65a02..fc13cc119aad 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,6 +41,8 @@ MODULE_AUTHOR("Remy Card and others");
41MODULE_DESCRIPTION("Second Extended Filesystem"); 41MODULE_DESCRIPTION("Second Extended Filesystem");
42MODULE_LICENSE("GPL"); 42MODULE_LICENSE("GPL");
43 43
44static int __ext2_write_inode(struct inode *inode, int do_sync);
45
44/* 46/*
45 * Test whether an inode is a fast symlink. 47 * Test whether an inode is a fast symlink.
46 */ 48 */
@@ -58,13 +60,15 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
58 */ 60 */
59void ext2_delete_inode (struct inode * inode) 61void ext2_delete_inode (struct inode * inode)
60{ 62{
63 if (!is_bad_inode(inode))
64 dquot_initialize(inode);
61 truncate_inode_pages(&inode->i_data, 0); 65 truncate_inode_pages(&inode->i_data, 0);
62 66
63 if (is_bad_inode(inode)) 67 if (is_bad_inode(inode))
64 goto no_delete; 68 goto no_delete;
65 EXT2_I(inode)->i_dtime = get_seconds(); 69 EXT2_I(inode)->i_dtime = get_seconds();
66 mark_inode_dirty(inode); 70 mark_inode_dirty(inode);
67 ext2_write_inode(inode, inode_needs_sync(inode)); 71 __ext2_write_inode(inode, inode_needs_sync(inode));
68 72
69 inode->i_size = 0; 73 inode->i_size = 0;
70 if (inode->i_blocks) 74 if (inode->i_blocks)
@@ -1335,7 +1339,7 @@ bad_inode:
1335 return ERR_PTR(ret); 1339 return ERR_PTR(ret);
1336} 1340}
1337 1341
1338int ext2_write_inode(struct inode *inode, int do_sync) 1342static int __ext2_write_inode(struct inode *inode, int do_sync)
1339{ 1343{
1340 struct ext2_inode_info *ei = EXT2_I(inode); 1344 struct ext2_inode_info *ei = EXT2_I(inode);
1341 struct super_block *sb = inode->i_sb; 1345 struct super_block *sb = inode->i_sb;
@@ -1440,6 +1444,11 @@ int ext2_write_inode(struct inode *inode, int do_sync)
1440 return err; 1444 return err;
1441} 1445}
1442 1446
1447int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
1448{
1449 return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1450}
1451
1443int ext2_sync_inode(struct inode *inode) 1452int ext2_sync_inode(struct inode *inode)
1444{ 1453{
1445 struct writeback_control wbc = { 1454 struct writeback_control wbc = {
@@ -1457,9 +1466,12 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1457 error = inode_change_ok(inode, iattr); 1466 error = inode_change_ok(inode, iattr);
1458 if (error) 1467 if (error)
1459 return error; 1468 return error;
1469
1470 if (iattr->ia_valid & ATTR_SIZE)
1471 dquot_initialize(inode);
1460 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 1472 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
1461 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { 1473 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
1462 error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0; 1474 error = dquot_transfer(inode, iattr);
1463 if (error) 1475 if (error)
1464 return error; 1476 return error;
1465 } 1477 }
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index dd7175ce5606..71efb0e9a3f2 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -31,6 +31,7 @@
31 */ 31 */
32 32
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/quotaops.h>
34#include "ext2.h" 35#include "ext2.h"
35#include "xattr.h" 36#include "xattr.h"
36#include "acl.h" 37#include "acl.h"
@@ -99,24 +100,27 @@ struct dentry *ext2_get_parent(struct dentry *child)
99 */ 100 */
100static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) 101static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
101{ 102{
102 struct inode * inode = ext2_new_inode (dir, mode); 103 struct inode *inode;
103 int err = PTR_ERR(inode); 104
104 if (!IS_ERR(inode)) { 105 dquot_initialize(dir);
105 inode->i_op = &ext2_file_inode_operations; 106
106 if (ext2_use_xip(inode->i_sb)) { 107 inode = ext2_new_inode(dir, mode);
107 inode->i_mapping->a_ops = &ext2_aops_xip; 108 if (IS_ERR(inode))
108 inode->i_fop = &ext2_xip_file_operations; 109 return PTR_ERR(inode);
109 } else if (test_opt(inode->i_sb, NOBH)) { 110
110 inode->i_mapping->a_ops = &ext2_nobh_aops; 111 inode->i_op = &ext2_file_inode_operations;
111 inode->i_fop = &ext2_file_operations; 112 if (ext2_use_xip(inode->i_sb)) {
112 } else { 113 inode->i_mapping->a_ops = &ext2_aops_xip;
113 inode->i_mapping->a_ops = &ext2_aops; 114 inode->i_fop = &ext2_xip_file_operations;
114 inode->i_fop = &ext2_file_operations; 115 } else if (test_opt(inode->i_sb, NOBH)) {
115 } 116 inode->i_mapping->a_ops = &ext2_nobh_aops;
116 mark_inode_dirty(inode); 117 inode->i_fop = &ext2_file_operations;
117 err = ext2_add_nondir(dentry, inode); 118 } else {
119 inode->i_mapping->a_ops = &ext2_aops;
120 inode->i_fop = &ext2_file_operations;
118 } 121 }
119 return err; 122 mark_inode_dirty(inode);
123 return ext2_add_nondir(dentry, inode);
120} 124}
121 125
122static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 126static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
@@ -127,6 +131,8 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
127 if (!new_valid_dev(rdev)) 131 if (!new_valid_dev(rdev))
128 return -EINVAL; 132 return -EINVAL;
129 133
134 dquot_initialize(dir);
135
130 inode = ext2_new_inode (dir, mode); 136 inode = ext2_new_inode (dir, mode);
131 err = PTR_ERR(inode); 137 err = PTR_ERR(inode);
132 if (!IS_ERR(inode)) { 138 if (!IS_ERR(inode)) {
@@ -151,6 +157,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
151 if (l > sb->s_blocksize) 157 if (l > sb->s_blocksize)
152 goto out; 158 goto out;
153 159
160 dquot_initialize(dir);
161
154 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); 162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
155 err = PTR_ERR(inode); 163 err = PTR_ERR(inode);
156 if (IS_ERR(inode)) 164 if (IS_ERR(inode))
@@ -194,6 +202,8 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
194 if (inode->i_nlink >= EXT2_LINK_MAX) 202 if (inode->i_nlink >= EXT2_LINK_MAX)
195 return -EMLINK; 203 return -EMLINK;
196 204
205 dquot_initialize(dir);
206
197 inode->i_ctime = CURRENT_TIME_SEC; 207 inode->i_ctime = CURRENT_TIME_SEC;
198 inode_inc_link_count(inode); 208 inode_inc_link_count(inode);
199 atomic_inc(&inode->i_count); 209 atomic_inc(&inode->i_count);
@@ -216,6 +226,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
216 if (dir->i_nlink >= EXT2_LINK_MAX) 226 if (dir->i_nlink >= EXT2_LINK_MAX)
217 goto out; 227 goto out;
218 228
229 dquot_initialize(dir);
230
219 inode_inc_link_count(dir); 231 inode_inc_link_count(dir);
220 232
221 inode = ext2_new_inode (dir, S_IFDIR | mode); 233 inode = ext2_new_inode (dir, S_IFDIR | mode);
@@ -262,6 +274,8 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
262 struct page * page; 274 struct page * page;
263 int err = -ENOENT; 275 int err = -ENOENT;
264 276
277 dquot_initialize(dir);
278
265 de = ext2_find_entry (dir, &dentry->d_name, &page); 279 de = ext2_find_entry (dir, &dentry->d_name, &page);
266 if (!de) 280 if (!de)
267 goto out; 281 goto out;
@@ -304,6 +318,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
304 struct ext2_dir_entry_2 * old_de; 318 struct ext2_dir_entry_2 * old_de;
305 int err = -ENOENT; 319 int err = -ENOENT;
306 320
321 dquot_initialize(old_dir);
322 dquot_initialize(new_dir);
323
307 old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page); 324 old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
308 if (!old_de) 325 if (!old_de)
309 goto out; 326 goto out;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f9cb54a585ce..42e4a303b675 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -194,6 +194,8 @@ static void destroy_inodecache(void)
194static void ext2_clear_inode(struct inode *inode) 194static void ext2_clear_inode(struct inode *inode)
195{ 195{
196 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; 196 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
197
198 dquot_drop(inode);
197 ext2_discard_reservation(inode); 199 ext2_discard_reservation(inode);
198 EXT2_I(inode)->i_block_alloc_info = NULL; 200 EXT2_I(inode)->i_block_alloc_info = NULL;
199 if (unlikely(rsv)) 201 if (unlikely(rsv))
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 904f00642f84..e44dc92609be 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -644,8 +644,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
644 the inode. */ 644 the inode. */
645 ea_bdebug(new_bh, "reusing block"); 645 ea_bdebug(new_bh, "reusing block");
646 646
647 error = -EDQUOT; 647 error = dquot_alloc_block(inode, 1);
648 if (vfs_dq_alloc_block(inode, 1)) { 648 if (error) {
649 unlock_buffer(new_bh); 649 unlock_buffer(new_bh);
650 goto cleanup; 650 goto cleanup;
651 } 651 }
@@ -702,7 +702,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
702 * as if nothing happened and cleanup the unused block */ 702 * as if nothing happened and cleanup the unused block */
703 if (error && error != -ENOSPC) { 703 if (error && error != -ENOSPC) {
704 if (new_bh && new_bh != old_bh) 704 if (new_bh && new_bh != old_bh)
705 vfs_dq_free_block(inode, 1); 705 dquot_free_block(inode, 1);
706 goto cleanup; 706 goto cleanup;
707 } 707 }
708 } else 708 } else
@@ -734,7 +734,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
734 le32_add_cpu(&HDR(old_bh)->h_refcount, -1); 734 le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
735 if (ce) 735 if (ce)
736 mb_cache_entry_release(ce); 736 mb_cache_entry_release(ce);
737 vfs_dq_free_block(inode, 1); 737 dquot_free_block(inode, 1);
738 mark_buffer_dirty(old_bh); 738 mark_buffer_dirty(old_bh);
739 ea_bdebug(old_bh, "refcount now=%d", 739 ea_bdebug(old_bh, "refcount now=%d",
740 le32_to_cpu(HDR(old_bh)->h_refcount)); 740 le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -797,7 +797,7 @@ ext2_xattr_delete_inode(struct inode *inode)
797 mark_buffer_dirty(bh); 797 mark_buffer_dirty(bh);
798 if (IS_SYNC(inode)) 798 if (IS_SYNC(inode))
799 sync_dirty_buffer(bh); 799 sync_dirty_buffer(bh);
800 vfs_dq_free_block(inode, 1); 800 dquot_free_block(inode, 1);
801 } 801 }
802 EXT2_I(inode)->i_file_acl = 0; 802 EXT2_I(inode)->i_file_acl = 0;
803 803
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 27967f92e820..161da2d3f890 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -676,7 +676,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
676 } 676 }
677 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); 677 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
678 if (dquot_freed_blocks) 678 if (dquot_freed_blocks)
679 vfs_dq_free_block(inode, dquot_freed_blocks); 679 dquot_free_block(inode, dquot_freed_blocks);
680 return; 680 return;
681} 681}
682 682
@@ -1502,8 +1502,9 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1502 /* 1502 /*
1503 * Check quota for allocation of this block. 1503 * Check quota for allocation of this block.
1504 */ 1504 */
1505 if (vfs_dq_alloc_block(inode, num)) { 1505 err = dquot_alloc_block(inode, num);
1506 *errp = -EDQUOT; 1506 if (err) {
1507 *errp = err;
1507 return 0; 1508 return 0;
1508 } 1509 }
1509 1510
@@ -1713,7 +1714,7 @@ allocated:
1713 1714
1714 *errp = 0; 1715 *errp = 0;
1715 brelse(bitmap_bh); 1716 brelse(bitmap_bh);
1716 vfs_dq_free_block(inode, *count-num); 1717 dquot_free_block(inode, *count-num);
1717 *count = num; 1718 *count = num;
1718 return ret_block; 1719 return ret_block;
1719 1720
@@ -1728,7 +1729,7 @@ out:
1728 * Undo the block allocation 1729 * Undo the block allocation
1729 */ 1730 */
1730 if (!performed_allocation) 1731 if (!performed_allocation)
1731 vfs_dq_free_block(inode, *count); 1732 dquot_free_block(inode, *count);
1732 brelse(bitmap_bh); 1733 brelse(bitmap_bh);
1733 return 0; 1734 return 0;
1734} 1735}
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 388bbdfa0b4e..f55df0e61cbd 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -21,6 +21,7 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd.h> 23#include <linux/jbd.h>
24#include <linux/quotaops.h>
24#include <linux/ext3_fs.h> 25#include <linux/ext3_fs.h>
25#include <linux/ext3_jbd.h> 26#include <linux/ext3_jbd.h>
26#include "xattr.h" 27#include "xattr.h"
@@ -33,9 +34,9 @@
33 */ 34 */
34static int ext3_release_file (struct inode * inode, struct file * filp) 35static int ext3_release_file (struct inode * inode, struct file * filp)
35{ 36{
36 if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { 37 if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) {
37 filemap_flush(inode->i_mapping); 38 filemap_flush(inode->i_mapping);
38 EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; 39 ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
39 } 40 }
40 /* if we are the last writer on the inode, drop the block reservation */ 41 /* if we are the last writer on the inode, drop the block reservation */
41 if ((filp->f_mode & FMODE_WRITE) && 42 if ((filp->f_mode & FMODE_WRITE) &&
@@ -62,7 +63,7 @@ const struct file_operations ext3_file_operations = {
62 .compat_ioctl = ext3_compat_ioctl, 63 .compat_ioctl = ext3_compat_ioctl,
63#endif 64#endif
64 .mmap = generic_file_mmap, 65 .mmap = generic_file_mmap,
65 .open = generic_file_open, 66 .open = dquot_file_open,
66 .release = ext3_release_file, 67 .release = ext3_release_file,
67 .fsync = ext3_sync_file, 68 .fsync = ext3_sync_file,
68 .splice_read = generic_file_splice_read, 69 .splice_read = generic_file_splice_read,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index b39991285136..ef9008b885b5 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -123,10 +123,10 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
123 * Note: we must free any quota before locking the superblock, 123 * Note: we must free any quota before locking the superblock,
124 * as writing the quota to disk may need the lock as well. 124 * as writing the quota to disk may need the lock as well.
125 */ 125 */
126 vfs_dq_init(inode); 126 dquot_initialize(inode);
127 ext3_xattr_delete_inode(handle, inode); 127 ext3_xattr_delete_inode(handle, inode);
128 vfs_dq_free_inode(inode); 128 dquot_free_inode(inode);
129 vfs_dq_drop(inode); 129 dquot_drop(inode);
130 130
131 is_directory = S_ISDIR(inode->i_mode); 131 is_directory = S_ISDIR(inode->i_mode);
132 132
@@ -588,10 +588,10 @@ got:
588 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; 588 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
589 589
590 ret = inode; 590 ret = inode;
591 if (vfs_dq_alloc_inode(inode)) { 591 dquot_initialize(inode);
592 err = -EDQUOT; 592 err = dquot_alloc_inode(inode);
593 if (err)
593 goto fail_drop; 594 goto fail_drop;
594 }
595 595
596 err = ext3_init_acl(handle, inode, dir); 596 err = ext3_init_acl(handle, inode, dir);
597 if (err) 597 if (err)
@@ -619,10 +619,10 @@ really_out:
619 return ret; 619 return ret;
620 620
621fail_free_drop: 621fail_free_drop:
622 vfs_dq_free_inode(inode); 622 dquot_free_inode(inode);
623 623
624fail_drop: 624fail_drop:
625 vfs_dq_drop(inode); 625 dquot_drop(inode);
626 inode->i_flags |= S_NOQUOTA; 626 inode->i_flags |= S_NOQUOTA;
627 inode->i_nlink = 0; 627 inode->i_nlink = 0;
628 unlock_new_inode(inode); 628 unlock_new_inode(inode);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..7f920b7263a4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -196,6 +196,9 @@ void ext3_delete_inode (struct inode * inode)
196{ 196{
197 handle_t *handle; 197 handle_t *handle;
198 198
199 if (!is_bad_inode(inode))
200 dquot_initialize(inode);
201
199 truncate_inode_pages(&inode->i_data, 0); 202 truncate_inode_pages(&inode->i_data, 0);
200 203
201 if (is_bad_inode(inode)) 204 if (is_bad_inode(inode))
@@ -1378,7 +1381,7 @@ static int ext3_journalled_write_end(struct file *file,
1378 */ 1381 */
1379 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1382 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1380 ext3_orphan_add(handle, inode); 1383 ext3_orphan_add(handle, inode);
1381 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1384 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1382 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1385 if (inode->i_size > EXT3_I(inode)->i_disksize) {
1383 EXT3_I(inode)->i_disksize = inode->i_size; 1386 EXT3_I(inode)->i_disksize = inode->i_size;
1384 ret2 = ext3_mark_inode_dirty(handle, inode); 1387 ret2 = ext3_mark_inode_dirty(handle, inode);
@@ -1417,7 +1420,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
1417 journal_t *journal; 1420 journal_t *journal;
1418 int err; 1421 int err;
1419 1422
1420 if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { 1423 if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
1421 /* 1424 /*
1422 * This is a REALLY heavyweight approach, but the use of 1425 * This is a REALLY heavyweight approach, but the use of
1423 * bmap on dirty files is expected to be extremely rare: 1426 * bmap on dirty files is expected to be extremely rare:
@@ -1436,7 +1439,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
1436 * everything they get. 1439 * everything they get.
1437 */ 1440 */
1438 1441
1439 EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA; 1442 ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
1440 journal = EXT3_JOURNAL(inode); 1443 journal = EXT3_JOURNAL(inode);
1441 journal_lock_updates(journal); 1444 journal_lock_updates(journal);
1442 err = journal_flush(journal); 1445 err = journal_flush(journal);
@@ -1528,6 +1531,7 @@ static int ext3_ordered_writepage(struct page *page,
1528 int err; 1531 int err;
1529 1532
1530 J_ASSERT(PageLocked(page)); 1533 J_ASSERT(PageLocked(page));
1534 WARN_ON_ONCE(IS_RDONLY(inode));
1531 1535
1532 /* 1536 /*
1533 * We give up here if we're reentered, because it might be for a 1537 * We give up here if we're reentered, because it might be for a
@@ -1600,6 +1604,9 @@ static int ext3_writeback_writepage(struct page *page,
1600 int ret = 0; 1604 int ret = 0;
1601 int err; 1605 int err;
1602 1606
1607 J_ASSERT(PageLocked(page));
1608 WARN_ON_ONCE(IS_RDONLY(inode));
1609
1603 if (ext3_journal_current_handle()) 1610 if (ext3_journal_current_handle())
1604 goto out_fail; 1611 goto out_fail;
1605 1612
@@ -1642,6 +1649,9 @@ static int ext3_journalled_writepage(struct page *page,
1642 int ret = 0; 1649 int ret = 0;
1643 int err; 1650 int err;
1644 1651
1652 J_ASSERT(PageLocked(page));
1653 WARN_ON_ONCE(IS_RDONLY(inode));
1654
1645 if (ext3_journal_current_handle()) 1655 if (ext3_journal_current_handle())
1646 goto no_write; 1656 goto no_write;
1647 1657
@@ -1670,7 +1680,7 @@ static int ext3_journalled_writepage(struct page *page,
1670 PAGE_CACHE_SIZE, NULL, write_end_fn); 1680 PAGE_CACHE_SIZE, NULL, write_end_fn);
1671 if (ret == 0) 1681 if (ret == 0)
1672 ret = err; 1682 ret = err;
1673 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1683 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1674 unlock_page(page); 1684 unlock_page(page);
1675 } else { 1685 } else {
1676 /* 1686 /*
@@ -1785,8 +1795,9 @@ retry:
1785 handle = ext3_journal_start(inode, 2); 1795 handle = ext3_journal_start(inode, 2);
1786 if (IS_ERR(handle)) { 1796 if (IS_ERR(handle)) {
1787 /* This is really bad luck. We've written the data 1797 /* This is really bad luck. We've written the data
1788 * but cannot extend i_size. Bail out and pretend 1798 * but cannot extend i_size. Truncate allocated blocks
1789 * the write failed... */ 1799 * and pretend the write failed... */
1800 ext3_truncate(inode);
1790 ret = PTR_ERR(handle); 1801 ret = PTR_ERR(handle);
1791 goto out; 1802 goto out;
1792 } 1803 }
@@ -2402,7 +2413,7 @@ void ext3_truncate(struct inode *inode)
2402 goto out_notrans; 2413 goto out_notrans;
2403 2414
2404 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2415 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2405 ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; 2416 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
2406 2417
2407 /* 2418 /*
2408 * We have to lock the EOF page here, because lock_page() nests 2419 * We have to lock the EOF page here, because lock_page() nests
@@ -2721,7 +2732,7 @@ int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
2721{ 2732{
2722 /* We have all inode data except xattrs in memory here. */ 2733 /* We have all inode data except xattrs in memory here. */
2723 return __ext3_get_inode_loc(inode, iloc, 2734 return __ext3_get_inode_loc(inode, iloc,
2724 !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)); 2735 !ext3_test_inode_state(inode, EXT3_STATE_XATTR));
2725} 2736}
2726 2737
2727void ext3_set_inode_flags(struct inode *inode) 2738void ext3_set_inode_flags(struct inode *inode)
@@ -2893,7 +2904,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2893 EXT3_GOOD_OLD_INODE_SIZE + 2904 EXT3_GOOD_OLD_INODE_SIZE +
2894 ei->i_extra_isize; 2905 ei->i_extra_isize;
2895 if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) 2906 if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
2896 ei->i_state |= EXT3_STATE_XATTR; 2907 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
2897 } 2908 }
2898 } else 2909 } else
2899 ei->i_extra_isize = 0; 2910 ei->i_extra_isize = 0;
@@ -2955,7 +2966,7 @@ again:
2955 2966
2956 /* For fields not not tracking in the in-memory inode, 2967 /* For fields not not tracking in the in-memory inode,
2957 * initialise them to zero for new inodes. */ 2968 * initialise them to zero for new inodes. */
2958 if (ei->i_state & EXT3_STATE_NEW) 2969 if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
2959 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 2970 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
2960 2971
2961 ext3_get_inode_flags(ei); 2972 ext3_get_inode_flags(ei);
@@ -3052,7 +3063,7 @@ again:
3052 rc = ext3_journal_dirty_metadata(handle, bh); 3063 rc = ext3_journal_dirty_metadata(handle, bh);
3053 if (!err) 3064 if (!err)
3054 err = rc; 3065 err = rc;
3055 ei->i_state &= ~EXT3_STATE_NEW; 3066 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
3056 3067
3057 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); 3068 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
3058out_brelse: 3069out_brelse:
@@ -3096,7 +3107,7 @@ out_brelse:
3096 * `stuff()' is running, and the new i_size will be lost. Plus the inode 3107 * `stuff()' is running, and the new i_size will be lost. Plus the inode
3097 * will no longer be on the superblock's dirty inode list. 3108 * will no longer be on the superblock's dirty inode list.
3098 */ 3109 */
3099int ext3_write_inode(struct inode *inode, int wait) 3110int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
3100{ 3111{
3101 if (current->flags & PF_MEMALLOC) 3112 if (current->flags & PF_MEMALLOC)
3102 return 0; 3113 return 0;
@@ -3107,7 +3118,7 @@ int ext3_write_inode(struct inode *inode, int wait)
3107 return -EIO; 3118 return -EIO;
3108 } 3119 }
3109 3120
3110 if (!wait) 3121 if (wbc->sync_mode != WB_SYNC_ALL)
3111 return 0; 3122 return 0;
3112 3123
3113 return ext3_force_commit(inode->i_sb); 3124 return ext3_force_commit(inode->i_sb);
@@ -3140,6 +3151,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3140 if (error) 3151 if (error)
3141 return error; 3152 return error;
3142 3153
3154 if (ia_valid & ATTR_SIZE)
3155 dquot_initialize(inode);
3143 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3156 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3144 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3157 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3145 handle_t *handle; 3158 handle_t *handle;
@@ -3152,7 +3165,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3152 error = PTR_ERR(handle); 3165 error = PTR_ERR(handle);
3153 goto err_out; 3166 goto err_out;
3154 } 3167 }
3155 error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; 3168 error = dquot_transfer(inode, attr);
3156 if (error) { 3169 if (error) {
3157 ext3_journal_stop(handle); 3170 ext3_journal_stop(handle);
3158 return error; 3171 return error;
@@ -3237,7 +3250,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3237 ret = 2 * (bpp + indirects) + 2; 3250 ret = 2 * (bpp + indirects) + 2;
3238 3251
3239#ifdef CONFIG_QUOTA 3252#ifdef CONFIG_QUOTA
3240 /* We know that structure was already allocated during vfs_dq_init so 3253 /* We know that structure was already allocated during dquot_initialize so
3241 * we will be updating only the data blocks + inodes */ 3254 * we will be updating only the data blocks + inodes */
3242 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 3255 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
3243#endif 3256#endif
@@ -3328,7 +3341,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3328 * i_size has been changed by generic_commit_write() and we thus need 3341 * i_size has been changed by generic_commit_write() and we thus need
3329 * to include the updated inode in the current transaction. 3342 * to include the updated inode in the current transaction.
3330 * 3343 *
3331 * Also, vfs_dq_alloc_space() will always dirty the inode when blocks 3344 * Also, dquot_alloc_space() will always dirty the inode when blocks
3332 * are allocated to the file. 3345 * are allocated to the file.
3333 * 3346 *
3334 * If the inode is marked synchronous, we don't honour that here - doing 3347 * If the inode is marked synchronous, we don't honour that here - doing
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 7b0e44f7d66f..ee184084ca42 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1696,6 +1696,8 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1696 struct inode * inode; 1696 struct inode * inode;
1697 int err, retries = 0; 1697 int err, retries = 0;
1698 1698
1699 dquot_initialize(dir);
1700
1699retry: 1701retry:
1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1702 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1703 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1730,6 +1732,8 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1730 if (!new_valid_dev(rdev)) 1732 if (!new_valid_dev(rdev))
1731 return -EINVAL; 1733 return -EINVAL;
1732 1734
1735 dquot_initialize(dir);
1736
1733retry: 1737retry:
1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1738 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1739 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1766,6 +1770,8 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1766 if (dir->i_nlink >= EXT3_LINK_MAX) 1770 if (dir->i_nlink >= EXT3_LINK_MAX)
1767 return -EMLINK; 1771 return -EMLINK;
1768 1772
1773 dquot_initialize(dir);
1774
1769retry: 1775retry:
1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1776 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1777 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -2060,7 +2066,9 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
2060 2066
2061 /* Initialize quotas before so that eventual writes go in 2067 /* Initialize quotas before so that eventual writes go in
2062 * separate transaction */ 2068 * separate transaction */
2063 vfs_dq_init(dentry->d_inode); 2069 dquot_initialize(dir);
2070 dquot_initialize(dentry->d_inode);
2071
2064 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); 2072 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2065 if (IS_ERR(handle)) 2073 if (IS_ERR(handle))
2066 return PTR_ERR(handle); 2074 return PTR_ERR(handle);
@@ -2119,7 +2127,9 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2119 2127
2120 /* Initialize quotas before so that eventual writes go 2128 /* Initialize quotas before so that eventual writes go
2121 * in separate transaction */ 2129 * in separate transaction */
2122 vfs_dq_init(dentry->d_inode); 2130 dquot_initialize(dir);
2131 dquot_initialize(dentry->d_inode);
2132
2123 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); 2133 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2124 if (IS_ERR(handle)) 2134 if (IS_ERR(handle))
2125 return PTR_ERR(handle); 2135 return PTR_ERR(handle);
@@ -2174,6 +2184,8 @@ static int ext3_symlink (struct inode * dir,
2174 if (l > dir->i_sb->s_blocksize) 2184 if (l > dir->i_sb->s_blocksize)
2175 return -ENAMETOOLONG; 2185 return -ENAMETOOLONG;
2176 2186
2187 dquot_initialize(dir);
2188
2177retry: 2189retry:
2178 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2190 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2179 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2191 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2228,6 +2240,9 @@ static int ext3_link (struct dentry * old_dentry,
2228 2240
2229 if (inode->i_nlink >= EXT3_LINK_MAX) 2241 if (inode->i_nlink >= EXT3_LINK_MAX)
2230 return -EMLINK; 2242 return -EMLINK;
2243
2244 dquot_initialize(dir);
2245
2231 /* 2246 /*
2232 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing 2247 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2233 * otherwise has the potential to corrupt the orphan inode list. 2248 * otherwise has the potential to corrupt the orphan inode list.
@@ -2278,12 +2293,15 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2278 struct ext3_dir_entry_2 * old_de, * new_de; 2293 struct ext3_dir_entry_2 * old_de, * new_de;
2279 int retval, flush_file = 0; 2294 int retval, flush_file = 0;
2280 2295
2296 dquot_initialize(old_dir);
2297 dquot_initialize(new_dir);
2298
2281 old_bh = new_bh = dir_bh = NULL; 2299 old_bh = new_bh = dir_bh = NULL;
2282 2300
2283 /* Initialize quotas before so that eventual writes go 2301 /* Initialize quotas before so that eventual writes go
2284 * in separate transaction */ 2302 * in separate transaction */
2285 if (new_dentry->d_inode) 2303 if (new_dentry->d_inode)
2286 vfs_dq_init(new_dentry->d_inode); 2304 dquot_initialize(new_dentry->d_inode);
2287 handle = ext3_journal_start(old_dir, 2 * 2305 handle = ext3_journal_start(old_dir, 2 *
2288 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2306 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2289 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); 2307 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index afa2b569da10..e844accbf55d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -181,7 +181,7 @@ static void ext3_handle_error(struct super_block *sb)
181 if (!test_opt (sb, ERRORS_CONT)) { 181 if (!test_opt (sb, ERRORS_CONT)) {
182 journal_t *journal = EXT3_SB(sb)->s_journal; 182 journal_t *journal = EXT3_SB(sb)->s_journal;
183 183
184 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 184 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
185 if (journal) 185 if (journal)
186 journal_abort(journal, -EIO); 186 journal_abort(journal, -EIO);
187 } 187 }
@@ -296,7 +296,7 @@ void ext3_abort (struct super_block * sb, const char * function,
296 "error: remounting filesystem read-only"); 296 "error: remounting filesystem read-only");
297 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 297 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
298 sb->s_flags |= MS_RDONLY; 298 sb->s_flags |= MS_RDONLY;
299 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 299 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
300 if (EXT3_SB(sb)->s_journal) 300 if (EXT3_SB(sb)->s_journal)
301 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 301 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
302} 302}
@@ -528,6 +528,8 @@ static void destroy_inodecache(void)
528static void ext3_clear_inode(struct inode *inode) 528static void ext3_clear_inode(struct inode *inode)
529{ 529{
530 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 530 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
531
532 dquot_drop(inode);
531 ext3_discard_reservation(inode); 533 ext3_discard_reservation(inode);
532 EXT3_I(inode)->i_block_alloc_info = NULL; 534 EXT3_I(inode)->i_block_alloc_info = NULL;
533 if (unlikely(rsv)) 535 if (unlikely(rsv))
@@ -562,10 +564,10 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
562 if (sbi->s_qf_names[GRPQUOTA]) 564 if (sbi->s_qf_names[GRPQUOTA])
563 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 565 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
564 566
565 if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) 567 if (test_opt(sb, USRQUOTA))
566 seq_puts(seq, ",usrquota"); 568 seq_puts(seq, ",usrquota");
567 569
568 if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) 570 if (test_opt(sb, GRPQUOTA))
569 seq_puts(seq, ",grpquota"); 571 seq_puts(seq, ",grpquota");
570#endif 572#endif
571} 573}
@@ -656,8 +658,7 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
656 if (test_opt(sb, NOBH)) 658 if (test_opt(sb, NOBH))
657 seq_puts(seq, ",nobh"); 659 seq_puts(seq, ",nobh");
658 660
659 seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt & 661 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
660 EXT3_MOUNT_DATA_FLAGS));
661 if (test_opt(sb, DATA_ERR_ABORT)) 662 if (test_opt(sb, DATA_ERR_ABORT))
662 seq_puts(seq, ",data_err=abort"); 663 seq_puts(seq, ",data_err=abort");
663 664
@@ -751,13 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
751 const char *data, size_t len, loff_t off); 752 const char *data, size_t len, loff_t off);
752 753
753static const struct dquot_operations ext3_quota_operations = { 754static const struct dquot_operations ext3_quota_operations = {
754 .initialize = dquot_initialize,
755 .drop = dquot_drop,
756 .alloc_space = dquot_alloc_space,
757 .alloc_inode = dquot_alloc_inode,
758 .free_space = dquot_free_space,
759 .free_inode = dquot_free_inode,
760 .transfer = dquot_transfer,
761 .write_dquot = ext3_write_dquot, 755 .write_dquot = ext3_write_dquot,
762 .acquire_dquot = ext3_acquire_dquot, 756 .acquire_dquot = ext3_acquire_dquot,
763 .release_dquot = ext3_release_dquot, 757 .release_dquot = ext3_release_dquot,
@@ -896,6 +890,63 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
896 return sb_block; 890 return sb_block;
897} 891}
898 892
893#ifdef CONFIG_QUOTA
894static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
895{
896 struct ext3_sb_info *sbi = EXT3_SB(sb);
897 char *qname;
898
899 if (sb_any_quota_loaded(sb) &&
900 !sbi->s_qf_names[qtype]) {
901 ext3_msg(sb, KERN_ERR,
902 "Cannot change journaled "
903 "quota options when quota turned on");
904 return 0;
905 }
906 qname = match_strdup(args);
907 if (!qname) {
908 ext3_msg(sb, KERN_ERR,
909 "Not enough memory for storing quotafile name");
910 return 0;
911 }
912 if (sbi->s_qf_names[qtype] &&
913 strcmp(sbi->s_qf_names[qtype], qname)) {
914 ext3_msg(sb, KERN_ERR,
915 "%s quota file already specified", QTYPE2NAME(qtype));
916 kfree(qname);
917 return 0;
918 }
919 sbi->s_qf_names[qtype] = qname;
920 if (strchr(sbi->s_qf_names[qtype], '/')) {
921 ext3_msg(sb, KERN_ERR,
922 "quotafile must be on filesystem root");
923 kfree(sbi->s_qf_names[qtype]);
924 sbi->s_qf_names[qtype] = NULL;
925 return 0;
926 }
927 set_opt(sbi->s_mount_opt, QUOTA);
928 return 1;
929}
930
931static int clear_qf_name(struct super_block *sb, int qtype) {
932
933 struct ext3_sb_info *sbi = EXT3_SB(sb);
934
935 if (sb_any_quota_loaded(sb) &&
936 sbi->s_qf_names[qtype]) {
937 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
938 " when quota turned on");
939 return 0;
940 }
941 /*
942 * The space will be released later when all options are confirmed
943 * to be correct
944 */
945 sbi->s_qf_names[qtype] = NULL;
946 return 1;
947}
948#endif
949
899static int parse_options (char *options, struct super_block *sb, 950static int parse_options (char *options, struct super_block *sb,
900 unsigned int *inum, unsigned long *journal_devnum, 951 unsigned int *inum, unsigned long *journal_devnum,
901 ext3_fsblk_t *n_blocks_count, int is_remount) 952 ext3_fsblk_t *n_blocks_count, int is_remount)
@@ -906,8 +957,7 @@ static int parse_options (char *options, struct super_block *sb,
906 int data_opt = 0; 957 int data_opt = 0;
907 int option; 958 int option;
908#ifdef CONFIG_QUOTA 959#ifdef CONFIG_QUOTA
909 int qtype, qfmt; 960 int qfmt;
910 char *qname;
911#endif 961#endif
912 962
913 if (!options) 963 if (!options)
@@ -1065,20 +1115,19 @@ static int parse_options (char *options, struct super_block *sb,
1065 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 1115 data_opt = EXT3_MOUNT_WRITEBACK_DATA;
1066 datacheck: 1116 datacheck:
1067 if (is_remount) { 1117 if (is_remount) {
1068 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 1118 if (test_opt(sb, DATA_FLAGS) == data_opt)
1069 == data_opt)
1070 break; 1119 break;
1071 ext3_msg(sb, KERN_ERR, 1120 ext3_msg(sb, KERN_ERR,
1072 "error: cannot change " 1121 "error: cannot change "
1073 "data mode on remount. The filesystem " 1122 "data mode on remount. The filesystem "
1074 "is mounted in data=%s mode and you " 1123 "is mounted in data=%s mode and you "
1075 "try to remount it in data=%s mode.", 1124 "try to remount it in data=%s mode.",
1076 data_mode_string(sbi->s_mount_opt & 1125 data_mode_string(test_opt(sb,
1077 EXT3_MOUNT_DATA_FLAGS), 1126 DATA_FLAGS)),
1078 data_mode_string(data_opt)); 1127 data_mode_string(data_opt));
1079 return 0; 1128 return 0;
1080 } else { 1129 } else {
1081 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; 1130 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1082 sbi->s_mount_opt |= data_opt; 1131 sbi->s_mount_opt |= data_opt;
1083 } 1132 }
1084 break; 1133 break;
@@ -1090,62 +1139,20 @@ static int parse_options (char *options, struct super_block *sb,
1090 break; 1139 break;
1091#ifdef CONFIG_QUOTA 1140#ifdef CONFIG_QUOTA
1092 case Opt_usrjquota: 1141 case Opt_usrjquota:
1093 qtype = USRQUOTA; 1142 if (!set_qf_name(sb, USRQUOTA, &args[0]))
1094 goto set_qf_name;
1095 case Opt_grpjquota:
1096 qtype = GRPQUOTA;
1097set_qf_name:
1098 if (sb_any_quota_loaded(sb) &&
1099 !sbi->s_qf_names[qtype]) {
1100 ext3_msg(sb, KERN_ERR,
1101 "error: cannot change journaled "
1102 "quota options when quota turned on.");
1103 return 0;
1104 }
1105 qname = match_strdup(&args[0]);
1106 if (!qname) {
1107 ext3_msg(sb, KERN_ERR,
1108 "error: not enough memory for "
1109 "storing quotafile name.");
1110 return 0; 1143 return 0;
1111 } 1144 break;
1112 if (sbi->s_qf_names[qtype] && 1145 case Opt_grpjquota:
1113 strcmp(sbi->s_qf_names[qtype], qname)) { 1146 if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1114 ext3_msg(sb, KERN_ERR,
1115 "error: %s quota file already "
1116 "specified.", QTYPE2NAME(qtype));
1117 kfree(qname);
1118 return 0;
1119 }
1120 sbi->s_qf_names[qtype] = qname;
1121 if (strchr(sbi->s_qf_names[qtype], '/')) {
1122 ext3_msg(sb, KERN_ERR,
1123 "error: quotafile must be on "
1124 "filesystem root.");
1125 kfree(sbi->s_qf_names[qtype]);
1126 sbi->s_qf_names[qtype] = NULL;
1127 return 0; 1147 return 0;
1128 }
1129 set_opt(sbi->s_mount_opt, QUOTA);
1130 break; 1148 break;
1131 case Opt_offusrjquota: 1149 case Opt_offusrjquota:
1132 qtype = USRQUOTA; 1150 if (!clear_qf_name(sb, USRQUOTA))
1133 goto clear_qf_name; 1151 return 0;
1152 break;
1134 case Opt_offgrpjquota: 1153 case Opt_offgrpjquota:
1135 qtype = GRPQUOTA; 1154 if (!clear_qf_name(sb, GRPQUOTA))
1136clear_qf_name:
1137 if (sb_any_quota_loaded(sb) &&
1138 sbi->s_qf_names[qtype]) {
1139 ext3_msg(sb, KERN_ERR, "error: cannot change "
1140 "journaled quota options when "
1141 "quota turned on.");
1142 return 0; 1155 return 0;
1143 }
1144 /*
1145 * The space will be released later when all options
1146 * are confirmed to be correct
1147 */
1148 sbi->s_qf_names[qtype] = NULL;
1149 break; 1156 break;
1150 case Opt_jqfmt_vfsold: 1157 case Opt_jqfmt_vfsold:
1151 qfmt = QFMT_VFS_OLD; 1158 qfmt = QFMT_VFS_OLD;
@@ -1244,18 +1251,12 @@ set_qf_format:
1244 } 1251 }
1245#ifdef CONFIG_QUOTA 1252#ifdef CONFIG_QUOTA
1246 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1253 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1247 if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && 1254 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1248 sbi->s_qf_names[USRQUOTA])
1249 clear_opt(sbi->s_mount_opt, USRQUOTA); 1255 clear_opt(sbi->s_mount_opt, USRQUOTA);
1250 1256 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1251 if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
1252 sbi->s_qf_names[GRPQUOTA])
1253 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1257 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1254 1258
1255 if ((sbi->s_qf_names[USRQUOTA] && 1259 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1256 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
1257 (sbi->s_qf_names[GRPQUOTA] &&
1258 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
1259 ext3_msg(sb, KERN_ERR, "error: old and new quota " 1260 ext3_msg(sb, KERN_ERR, "error: old and new quota "
1260 "format mixing."); 1261 "format mixing.");
1261 return 0; 1262 return 0;
@@ -1478,7 +1479,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1478 } 1479 }
1479 1480
1480 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1481 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1481 vfs_dq_init(inode); 1482 dquot_initialize(inode);
1482 if (inode->i_nlink) { 1483 if (inode->i_nlink) {
1483 printk(KERN_DEBUG 1484 printk(KERN_DEBUG
1484 "%s: truncating inode %lu to %Ld bytes\n", 1485 "%s: truncating inode %lu to %Ld bytes\n",
@@ -1671,11 +1672,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1671 set_opt(sbi->s_mount_opt, POSIX_ACL); 1672 set_opt(sbi->s_mount_opt, POSIX_ACL);
1672#endif 1673#endif
1673 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1674 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1674 sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA; 1675 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1675 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1676 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
1676 sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA; 1677 set_opt(sbi->s_mount_opt, ORDERED_DATA);
1677 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1678 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
1678 sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA; 1679 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
1679 1680
1680 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1681 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1681 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1682 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@@ -1694,7 +1695,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1694 goto failed_mount; 1695 goto failed_mount;
1695 1696
1696 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1697 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1697 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1698 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1698 1699
1699 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1700 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1700 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1701 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
@@ -2561,11 +2562,11 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2561 goto restore_opts; 2562 goto restore_opts;
2562 } 2563 }
2563 2564
2564 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2565 if (test_opt(sb, ABORT))
2565 ext3_abort(sb, __func__, "Abort forced by user"); 2566 ext3_abort(sb, __func__, "Abort forced by user");
2566 2567
2567 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2568 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2568 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2569 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2569 2570
2570 es = sbi->s_es; 2571 es = sbi->s_es;
2571 2572
@@ -2573,7 +2574,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2573 2574
2574 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2575 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2575 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2576 n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2576 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { 2577 if (test_opt(sb, ABORT)) {
2577 err = -EROFS; 2578 err = -EROFS;
2578 goto restore_opts; 2579 goto restore_opts;
2579 } 2580 }
@@ -2734,7 +2735,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2734 * Process 1 Process 2 2735 * Process 1 Process 2
2735 * ext3_create() quota_sync() 2736 * ext3_create() quota_sync()
2736 * journal_start() write_dquot() 2737 * journal_start() write_dquot()
2737 * vfs_dq_init() down(dqio_mutex) 2738 * dquot_initialize() down(dqio_mutex)
2738 * down(dqio_mutex) journal_start() 2739 * down(dqio_mutex) journal_start()
2739 * 2740 *
2740 */ 2741 */
@@ -2942,9 +2943,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2942 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2943 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2943 int err = 0; 2944 int err = 0;
2944 int offset = off & (sb->s_blocksize - 1); 2945 int offset = off & (sb->s_blocksize - 1);
2945 int tocopy;
2946 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; 2946 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
2947 size_t towrite = len;
2948 struct buffer_head *bh; 2947 struct buffer_head *bh;
2949 handle_t *handle = journal_current_handle(); 2948 handle_t *handle = journal_current_handle();
2950 2949
@@ -2955,53 +2954,54 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2955 (unsigned long long)off, (unsigned long long)len); 2954 (unsigned long long)off, (unsigned long long)len);
2956 return -EIO; 2955 return -EIO;
2957 } 2956 }
2957
2958 /*
2959 * Since we account only one data block in transaction credits,
2960 * then it is impossible to cross a block boundary.
2961 */
2962 if (sb->s_blocksize - offset < len) {
2963 ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
2964 " cancelled because not block aligned",
2965 (unsigned long long)off, (unsigned long long)len);
2966 return -EIO;
2967 }
2958 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2968 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2959 while (towrite > 0) { 2969 bh = ext3_bread(handle, inode, blk, 1, &err);
2960 tocopy = sb->s_blocksize - offset < towrite ? 2970 if (!bh)
2961 sb->s_blocksize - offset : towrite; 2971 goto out;
2962 bh = ext3_bread(handle, inode, blk, 1, &err); 2972 if (journal_quota) {
2963 if (!bh) 2973 err = ext3_journal_get_write_access(handle, bh);
2974 if (err) {
2975 brelse(bh);
2964 goto out; 2976 goto out;
2965 if (journal_quota) {
2966 err = ext3_journal_get_write_access(handle, bh);
2967 if (err) {
2968 brelse(bh);
2969 goto out;
2970 }
2971 }
2972 lock_buffer(bh);
2973 memcpy(bh->b_data+offset, data, tocopy);
2974 flush_dcache_page(bh->b_page);
2975 unlock_buffer(bh);
2976 if (journal_quota)
2977 err = ext3_journal_dirty_metadata(handle, bh);
2978 else {
2979 /* Always do at least ordered writes for quotas */
2980 err = ext3_journal_dirty_data(handle, bh);
2981 mark_buffer_dirty(bh);
2982 } 2977 }
2983 brelse(bh);
2984 if (err)
2985 goto out;
2986 offset = 0;
2987 towrite -= tocopy;
2988 data += tocopy;
2989 blk++;
2990 } 2978 }
2979 lock_buffer(bh);
2980 memcpy(bh->b_data+offset, data, len);
2981 flush_dcache_page(bh->b_page);
2982 unlock_buffer(bh);
2983 if (journal_quota)
2984 err = ext3_journal_dirty_metadata(handle, bh);
2985 else {
2986 /* Always do at least ordered writes for quotas */
2987 err = ext3_journal_dirty_data(handle, bh);
2988 mark_buffer_dirty(bh);
2989 }
2990 brelse(bh);
2991out: 2991out:
2992 if (len == towrite) { 2992 if (err) {
2993 mutex_unlock(&inode->i_mutex); 2993 mutex_unlock(&inode->i_mutex);
2994 return err; 2994 return err;
2995 } 2995 }
2996 if (inode->i_size < off+len-towrite) { 2996 if (inode->i_size < off + len) {
2997 i_size_write(inode, off+len-towrite); 2997 i_size_write(inode, off + len);
2998 EXT3_I(inode)->i_disksize = inode->i_size; 2998 EXT3_I(inode)->i_disksize = inode->i_size;
2999 } 2999 }
3000 inode->i_version++; 3000 inode->i_version++;
3001 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3001 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3002 ext3_mark_inode_dirty(handle, inode); 3002 ext3_mark_inode_dirty(handle, inode);
3003 mutex_unlock(&inode->i_mutex); 3003 mutex_unlock(&inode->i_mutex);
3004 return len - towrite; 3004 return len;
3005} 3005}
3006 3006
3007#endif 3007#endif
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 66895ccf76c7..534a94c3a933 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -274,7 +274,7 @@ ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
274 void *end; 274 void *end;
275 int error; 275 int error;
276 276
277 if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)) 277 if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
278 return -ENODATA; 278 return -ENODATA;
279 error = ext3_get_inode_loc(inode, &iloc); 279 error = ext3_get_inode_loc(inode, &iloc);
280 if (error) 280 if (error)
@@ -403,7 +403,7 @@ ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
403 void *end; 403 void *end;
404 int error; 404 int error;
405 405
406 if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)) 406 if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
407 return 0; 407 return 0;
408 error = ext3_get_inode_loc(inode, &iloc); 408 error = ext3_get_inode_loc(inode, &iloc);
409 if (error) 409 if (error)
@@ -500,7 +500,7 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode,
500 error = ext3_journal_dirty_metadata(handle, bh); 500 error = ext3_journal_dirty_metadata(handle, bh);
501 if (IS_SYNC(inode)) 501 if (IS_SYNC(inode))
502 handle->h_sync = 1; 502 handle->h_sync = 1;
503 vfs_dq_free_block(inode, 1); 503 dquot_free_block(inode, 1);
504 ea_bdebug(bh, "refcount now=%d; releasing", 504 ea_bdebug(bh, "refcount now=%d; releasing",
505 le32_to_cpu(BHDR(bh)->h_refcount)); 505 le32_to_cpu(BHDR(bh)->h_refcount));
506 if (ce) 506 if (ce)
@@ -775,8 +775,8 @@ inserted:
775 else { 775 else {
776 /* The old block is released after updating 776 /* The old block is released after updating
777 the inode. */ 777 the inode. */
778 error = -EDQUOT; 778 error = dquot_alloc_block(inode, 1);
779 if (vfs_dq_alloc_block(inode, 1)) 779 if (error)
780 goto cleanup; 780 goto cleanup;
781 error = ext3_journal_get_write_access(handle, 781 error = ext3_journal_get_write_access(handle,
782 new_bh); 782 new_bh);
@@ -850,7 +850,7 @@ cleanup:
850 return error; 850 return error;
851 851
852cleanup_dquot: 852cleanup_dquot:
853 vfs_dq_free_block(inode, 1); 853 dquot_free_block(inode, 1);
854 goto cleanup; 854 goto cleanup;
855 855
856bad_block: 856bad_block:
@@ -882,7 +882,7 @@ ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
882 is->s.base = is->s.first = IFIRST(header); 882 is->s.base = is->s.first = IFIRST(header);
883 is->s.here = is->s.first; 883 is->s.here = is->s.first;
884 is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; 884 is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
885 if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) { 885 if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) {
886 error = ext3_xattr_check_names(IFIRST(header), is->s.end); 886 error = ext3_xattr_check_names(IFIRST(header), is->s.end);
887 if (error) 887 if (error)
888 return error; 888 return error;
@@ -914,10 +914,10 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
914 header = IHDR(inode, ext3_raw_inode(&is->iloc)); 914 header = IHDR(inode, ext3_raw_inode(&is->iloc));
915 if (!IS_LAST_ENTRY(s->first)) { 915 if (!IS_LAST_ENTRY(s->first)) {
916 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); 916 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
917 EXT3_I(inode)->i_state |= EXT3_STATE_XATTR; 917 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
918 } else { 918 } else {
919 header->h_magic = cpu_to_le32(0); 919 header->h_magic = cpu_to_le32(0);
920 EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR; 920 ext3_clear_inode_state(inode, EXT3_STATE_XATTR);
921 } 921 }
922 return 0; 922 return 0;
923} 923}
@@ -967,10 +967,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
967 if (error) 967 if (error)
968 goto cleanup; 968 goto cleanup;
969 969
970 if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { 970 if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) {
971 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc); 971 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
972 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 972 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
973 EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; 973 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
974 } 974 }
975 975
976 error = ext3_xattr_ibody_find(inode, &i, &is); 976 error = ext3_xattr_ibody_find(inode, &i, &is);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 22bc7435d913..d2f37a5516c7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -97,8 +97,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
97 /* If checksum is bad mark all blocks used to prevent allocation 97 /* If checksum is bad mark all blocks used to prevent allocation
98 * essentially implementing a per-group read-only flag. */ 98 * essentially implementing a per-group read-only flag. */
99 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 99 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
100 ext4_error(sb, __func__, 100 ext4_error(sb, "Checksum bad for group %u",
101 "Checksum bad for group %u", block_group); 101 block_group);
102 ext4_free_blks_set(sb, gdp, 0); 102 ext4_free_blks_set(sb, gdp, 0);
103 ext4_free_inodes_set(sb, gdp, 0); 103 ext4_free_inodes_set(sb, gdp, 0);
104 ext4_itable_unused_set(sb, gdp, 0); 104 ext4_itable_unused_set(sb, gdp, 0);
@@ -130,8 +130,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
130 * to make sure we calculate the right free blocks 130 * to make sure we calculate the right free blocks
131 */ 131 */
132 group_blocks = ext4_blocks_count(sbi->s_es) - 132 group_blocks = ext4_blocks_count(sbi->s_es) -
133 le32_to_cpu(sbi->s_es->s_first_data_block) - 133 ext4_group_first_block_no(sb, ngroups - 1);
134 (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
135 } else { 134 } else {
136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 135 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
137 } 136 }
@@ -189,9 +188,6 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
189 * when a file system is mounted (see ext4_fill_super). 188 * when a file system is mounted (see ext4_fill_super).
190 */ 189 */
191 190
192
193#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
194
195/** 191/**
196 * ext4_get_group_desc() -- load group descriptor from disk 192 * ext4_get_group_desc() -- load group descriptor from disk
197 * @sb: super block 193 * @sb: super block
@@ -210,10 +206,8 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
210 struct ext4_sb_info *sbi = EXT4_SB(sb); 206 struct ext4_sb_info *sbi = EXT4_SB(sb);
211 207
212 if (block_group >= ngroups) { 208 if (block_group >= ngroups) {
213 ext4_error(sb, "ext4_get_group_desc", 209 ext4_error(sb, "block_group >= groups_count - block_group = %u,"
214 "block_group >= groups_count - " 210 " groups_count = %u", block_group, ngroups);
215 "block_group = %u, groups_count = %u",
216 block_group, ngroups);
217 211
218 return NULL; 212 return NULL;
219 } 213 }
@@ -221,8 +215,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 215 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 216 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
223 if (!sbi->s_group_desc[group_desc]) { 217 if (!sbi->s_group_desc[group_desc]) {
224 ext4_error(sb, "ext4_get_group_desc", 218 ext4_error(sb, "Group descriptor not loaded - "
225 "Group descriptor not loaded - "
226 "block_group = %u, group_desc = %u, desc = %u", 219 "block_group = %u, group_desc = %u, desc = %u",
227 block_group, group_desc, offset); 220 block_group, group_desc, offset);
228 return NULL; 221 return NULL;
@@ -282,9 +275,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
282 return 1; 275 return 1;
283 276
284err_out: 277err_out:
285 ext4_error(sb, __func__, 278 ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
286 "Invalid block bitmap - "
287 "block_group = %d, block = %llu",
288 block_group, bitmap_blk); 279 block_group, bitmap_blk);
289 return 0; 280 return 0;
290} 281}
@@ -311,8 +302,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
311 bitmap_blk = ext4_block_bitmap(sb, desc); 302 bitmap_blk = ext4_block_bitmap(sb, desc);
312 bh = sb_getblk(sb, bitmap_blk); 303 bh = sb_getblk(sb, bitmap_blk);
313 if (unlikely(!bh)) { 304 if (unlikely(!bh)) {
314 ext4_error(sb, __func__, 305 ext4_error(sb, "Cannot read block bitmap - "
315 "Cannot read block bitmap - "
316 "block_group = %u, block_bitmap = %llu", 306 "block_group = %u, block_bitmap = %llu",
317 block_group, bitmap_blk); 307 block_group, bitmap_blk);
318 return NULL; 308 return NULL;
@@ -354,8 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
354 set_bitmap_uptodate(bh); 344 set_bitmap_uptodate(bh);
355 if (bh_submit_read(bh) < 0) { 345 if (bh_submit_read(bh) < 0) {
356 put_bh(bh); 346 put_bh(bh);
357 ext4_error(sb, __func__, 347 ext4_error(sb, "Cannot read block bitmap - "
358 "Cannot read block bitmap - "
359 "block_group = %u, block_bitmap = %llu", 348 "block_group = %u, block_bitmap = %llu",
360 block_group, bitmap_blk); 349 block_group, bitmap_blk);
361 return NULL; 350 return NULL;
@@ -419,8 +408,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
419 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 408 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
420 in_range(block + count - 1, ext4_inode_table(sb, desc), 409 in_range(block + count - 1, ext4_inode_table(sb, desc),
421 sbi->s_itb_per_group)) { 410 sbi->s_itb_per_group)) {
422 ext4_error(sb, __func__, 411 ext4_error(sb, "Adding blocks in system zones - "
423 "Adding blocks in system zones - "
424 "Block = %llu, count = %lu", 412 "Block = %llu, count = %lu",
425 block, count); 413 block, count);
426 goto error_return; 414 goto error_return;
@@ -453,8 +441,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
453 BUFFER_TRACE(bitmap_bh, "clear bit"); 441 BUFFER_TRACE(bitmap_bh, "clear bit");
454 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), 442 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
455 bit + i, bitmap_bh->b_data)) { 443 bit + i, bitmap_bh->b_data)) {
456 ext4_error(sb, __func__, 444 ext4_error(sb, "bit already cleared for block %llu",
457 "bit already cleared for block %llu",
458 (ext4_fsblk_t)(block + i)); 445 (ext4_fsblk_t)(block + i));
459 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 446 BUFFER_TRACE(bitmap_bh, "bit already cleared");
460 } else { 447 } else {
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index a60ab9aad57d..983f0e127493 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -205,14 +205,14 @@ void ext4_release_system_zone(struct super_block *sb)
205 entry = rb_entry(n, struct ext4_system_zone, node); 205 entry = rb_entry(n, struct ext4_system_zone, node);
206 kmem_cache_free(ext4_system_zone_cachep, entry); 206 kmem_cache_free(ext4_system_zone_cachep, entry);
207 if (!parent) 207 if (!parent)
208 EXT4_SB(sb)->system_blks.rb_node = NULL; 208 EXT4_SB(sb)->system_blks = RB_ROOT;
209 else if (parent->rb_left == n) 209 else if (parent->rb_left == n)
210 parent->rb_left = NULL; 210 parent->rb_left = NULL;
211 else if (parent->rb_right == n) 211 else if (parent->rb_right == n)
212 parent->rb_right = NULL; 212 parent->rb_right = NULL;
213 n = parent; 213 n = parent;
214 } 214 }
215 EXT4_SB(sb)->system_blks.rb_node = NULL; 215 EXT4_SB(sb)->system_blks = RB_ROOT;
216} 216}
217 217
218/* 218/*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9dc93168e262..86cb6d86a048 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -83,10 +83,12 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
83 error_msg = "inode out of bounds"; 83 error_msg = "inode out of bounds";
84 84
85 if (error_msg != NULL) 85 if (error_msg != NULL)
86 ext4_error(dir->i_sb, function, 86 __ext4_error(dir->i_sb, function,
87 "bad entry in directory #%lu: %s - " 87 "bad entry in directory #%lu: %s - block=%llu"
88 "offset=%u, inode=%u, rec_len=%d, name_len=%d", 88 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
89 dir->i_ino, error_msg, offset, 89 dir->i_ino, error_msg,
90 (unsigned long long) bh->b_blocknr,
91 (unsigned) (offset%bh->b_size), offset,
90 le32_to_cpu(de->inode), 92 le32_to_cpu(de->inode),
91 rlen, de->name_len); 93 rlen, de->name_len);
92 return error_msg == NULL ? 1 : 0; 94 return error_msg == NULL ? 1 : 0;
@@ -150,7 +152,7 @@ static int ext4_readdir(struct file *filp,
150 */ 152 */
151 if (!bh) { 153 if (!bh) {
152 if (!dir_has_error) { 154 if (!dir_has_error) {
153 ext4_error(sb, __func__, "directory #%lu " 155 ext4_error(sb, "directory #%lu "
154 "contains a hole at offset %Lu", 156 "contains a hole at offset %Lu",
155 inode->i_ino, 157 inode->i_ino,
156 (unsigned long long) filp->f_pos); 158 (unsigned long long) filp->f_pos);
@@ -303,7 +305,7 @@ static void free_rb_tree_fname(struct rb_root *root)
303 kfree(old); 305 kfree(old);
304 } 306 }
305 if (!parent) 307 if (!parent)
306 root->rb_node = NULL; 308 *root = RB_ROOT;
307 else if (parent->rb_left == n) 309 else if (parent->rb_left == n)
308 parent->rb_left = NULL; 310 parent->rb_left = NULL;
309 else if (parent->rb_right == n) 311 else if (parent->rb_right == n)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4cedc91ec59d..bf938cf7c5f0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -53,6 +53,12 @@
53#define ext4_debug(f, a...) do {} while (0) 53#define ext4_debug(f, a...) do {} while (0)
54#endif 54#endif
55 55
56#define EXT4_ERROR_INODE(inode, fmt, a...) \
57 ext4_error_inode(__func__, (inode), (fmt), ## a);
58
59#define EXT4_ERROR_FILE(file, fmt, a...) \
60 ext4_error_file(__func__, (file), (fmt), ## a);
61
56/* data type for block offset of block group */ 62/* data type for block offset of block group */
57typedef int ext4_grpblk_t; 63typedef int ext4_grpblk_t;
58 64
@@ -133,14 +139,14 @@ struct mpage_da_data {
133 int pages_written; 139 int pages_written;
134 int retval; 140 int retval;
135}; 141};
136#define DIO_AIO_UNWRITTEN 0x1 142#define EXT4_IO_UNWRITTEN 0x1
137typedef struct ext4_io_end { 143typedef struct ext4_io_end {
138 struct list_head list; /* per-file finished AIO list */ 144 struct list_head list; /* per-file finished AIO list */
139 struct inode *inode; /* file being written to */ 145 struct inode *inode; /* file being written to */
140 unsigned int flag; /* unwritten or not */ 146 unsigned int flag; /* unwritten or not */
141 int error; /* I/O error code */ 147 struct page *page; /* page struct for buffer write */
142 ext4_lblk_t offset; /* offset in the file */ 148 loff_t offset; /* offset in the file */
143 size_t size; /* size of the extent */ 149 ssize_t size; /* size of the extent */
144 struct work_struct work; /* data work queue */ 150 struct work_struct work; /* data work queue */
145} ext4_io_end_t; 151} ext4_io_end_t;
146 152
@@ -284,10 +290,12 @@ struct flex_groups {
284#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 290#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
285#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ 291#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
286#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ 292#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
293#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
294#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
287#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 295#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
288 296
289#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ 297#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
290#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ 298#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
291 299
292/* Flags that should be inherited by new inodes from their parent. */ 300/* Flags that should be inherited by new inodes from their parent. */
293#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 301#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
@@ -313,17 +321,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
313 return flags & EXT4_OTHER_FLMASK; 321 return flags & EXT4_OTHER_FLMASK;
314} 322}
315 323
316/*
317 * Inode dynamic state flags
318 */
319#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
320#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
321#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
322#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
323#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
324#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
325#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/
326
327/* Used to pass group descriptor data when online resize is done */ 324/* Used to pass group descriptor data when online resize is done */
328struct ext4_new_group_input { 325struct ext4_new_group_input {
329 __u32 group; /* Group number for this data */ 326 __u32 group; /* Group number for this data */
@@ -364,19 +361,20 @@ struct ext4_new_group_data {
364 /* caller is from the direct IO path, request to creation of an 361 /* caller is from the direct IO path, request to creation of an
365 unitialized extents if not allocated, split the uninitialized 362 unitialized extents if not allocated, split the uninitialized
366 extent if blocks has been preallocated already*/ 363 extent if blocks has been preallocated already*/
367#define EXT4_GET_BLOCKS_DIO 0x0008 364#define EXT4_GET_BLOCKS_PRE_IO 0x0008
368#define EXT4_GET_BLOCKS_CONVERT 0x0010 365#define EXT4_GET_BLOCKS_CONVERT 0x0010
369#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 366#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
367 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
368 /* Convert extent to initialized after IO complete */
369#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
371 /* Convert extent to initialized after direct IO complete */
372#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
373 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
374 371
375/* 372/*
376 * Flags used by ext4_free_blocks 373 * Flags used by ext4_free_blocks
377 */ 374 */
378#define EXT4_FREE_BLOCKS_METADATA 0x0001 375#define EXT4_FREE_BLOCKS_METADATA 0x0001
379#define EXT4_FREE_BLOCKS_FORGET 0x0002 376#define EXT4_FREE_BLOCKS_FORGET 0x0002
377#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
380 378
381/* 379/*
382 * ioctl commands 380 * ioctl commands
@@ -630,7 +628,7 @@ struct ext4_inode_info {
630 * near to their parent directory's inode. 628 * near to their parent directory's inode.
631 */ 629 */
632 ext4_group_t i_block_group; 630 ext4_group_t i_block_group;
633 __u32 i_state; /* Dynamic state flags for ext4 */ 631 unsigned long i_state_flags; /* Dynamic state flags */
634 632
635 ext4_lblk_t i_dir_start_lookup; 633 ext4_lblk_t i_dir_start_lookup;
636#ifdef CONFIG_EXT4_FS_XATTR 634#ifdef CONFIG_EXT4_FS_XATTR
@@ -708,8 +706,9 @@ struct ext4_inode_info {
708 qsize_t i_reserved_quota; 706 qsize_t i_reserved_quota;
709#endif 707#endif
710 708
711 /* completed async DIOs that might need unwritten extents handling */ 709 /* completed IOs that might need unwritten extents handling */
712 struct list_head i_aio_dio_complete_list; 710 struct list_head i_completed_io_list;
711 spinlock_t i_completed_io_lock;
713 /* current io_end structure for async DIO write*/ 712 /* current io_end structure for async DIO write*/
714 ext4_io_end_t *cur_aio_dio; 713 ext4_io_end_t *cur_aio_dio;
715 714
@@ -760,6 +759,7 @@ struct ext4_inode_info {
760#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 759#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
761#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 760#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
762#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 761#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
762#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
763#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 763#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
764#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 764#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
765#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 765#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
@@ -1050,6 +1050,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1050 (ino >= EXT4_FIRST_INO(sb) && 1050 (ino >= EXT4_FIRST_INO(sb) &&
1051 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1051 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1052} 1052}
1053
1054/*
1055 * Inode dynamic state flags
1056 */
1057enum {
1058 EXT4_STATE_JDATA, /* journaled data exists */
1059 EXT4_STATE_NEW, /* inode is newly created */
1060 EXT4_STATE_XATTR, /* has in-inode xattrs */
1061 EXT4_STATE_NO_EXPAND, /* No space for expansion */
1062 EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
1063 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1064 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1065};
1066
1067static inline int ext4_test_inode_state(struct inode *inode, int bit)
1068{
1069 return test_bit(bit, &EXT4_I(inode)->i_state_flags);
1070}
1071
1072static inline void ext4_set_inode_state(struct inode *inode, int bit)
1073{
1074 set_bit(bit, &EXT4_I(inode)->i_state_flags);
1075}
1076
1077static inline void ext4_clear_inode_state(struct inode *inode, int bit)
1078{
1079 clear_bit(bit, &EXT4_I(inode)->i_state_flags);
1080}
1053#else 1081#else
1054/* Assume that user mode programs are passing in an ext4fs superblock, not 1082/* Assume that user mode programs are passing in an ext4fs superblock, not
1055 * a kernel struct super_block. This will allow us to call the feature-test 1083 * a kernel struct super_block. This will allow us to call the feature-test
@@ -1126,6 +1154,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1126#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 1154#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
1127#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 1155#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
1128#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1156#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1157#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1158#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1129 1159
1130#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR 1160#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
1131#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1161#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1416,7 +1446,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
1416 struct buffer_head *bh_result, int create); 1446 struct buffer_head *bh_result, int create);
1417 1447
1418extern struct inode *ext4_iget(struct super_block *, unsigned long); 1448extern struct inode *ext4_iget(struct super_block *, unsigned long);
1419extern int ext4_write_inode(struct inode *, int); 1449extern int ext4_write_inode(struct inode *, struct writeback_control *);
1420extern int ext4_setattr(struct dentry *, struct iattr *); 1450extern int ext4_setattr(struct dentry *, struct iattr *);
1421extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 1451extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1422 struct kstat *stat); 1452 struct kstat *stat);
@@ -1439,7 +1469,7 @@ extern int ext4_block_truncate_page(handle_t *handle,
1439 struct address_space *mapping, loff_t from); 1469 struct address_space *mapping, loff_t from);
1440extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1470extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1441extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1471extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1442extern int flush_aio_dio_completed_IO(struct inode *inode); 1472extern int flush_completed_IO(struct inode *inode);
1443extern void ext4_da_update_reserve_space(struct inode *inode, 1473extern void ext4_da_update_reserve_space(struct inode *inode,
1444 int used, int quota_claim); 1474 int used, int quota_claim);
1445/* ioctl.c */ 1475/* ioctl.c */
@@ -1465,13 +1495,20 @@ extern int ext4_group_extend(struct super_block *sb,
1465 ext4_fsblk_t n_blocks_count); 1495 ext4_fsblk_t n_blocks_count);
1466 1496
1467/* super.c */ 1497/* super.c */
1468extern void ext4_error(struct super_block *, const char *, const char *, ...) 1498extern void __ext4_error(struct super_block *, const char *, const char *, ...)
1499 __attribute__ ((format (printf, 3, 4)));
1500#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message)
1501extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
1502 __attribute__ ((format (printf, 3, 4)));
1503extern void ext4_error_file(const char *, struct file *, const char *, ...)
1469 __attribute__ ((format (printf, 3, 4))); 1504 __attribute__ ((format (printf, 3, 4)));
1470extern void __ext4_std_error(struct super_block *, const char *, int); 1505extern void __ext4_std_error(struct super_block *, const char *, int);
1471extern void ext4_abort(struct super_block *, const char *, const char *, ...) 1506extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1472 __attribute__ ((format (printf, 3, 4))); 1507 __attribute__ ((format (printf, 3, 4)));
1473extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1508extern void __ext4_warning(struct super_block *, const char *,
1509 const char *, ...)
1474 __attribute__ ((format (printf, 3, 4))); 1510 __attribute__ ((format (printf, 3, 4)));
1511#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message)
1475extern void ext4_msg(struct super_block *, const char *, const char *, ...) 1512extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1476 __attribute__ ((format (printf, 3, 4))); 1513 __attribute__ ((format (printf, 3, 4)));
1477extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, 1514extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
@@ -1744,7 +1781,7 @@ extern void ext4_ext_release(struct super_block *);
1744extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1781extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1745 loff_t len); 1782 loff_t len);
1746extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 1783extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
1747 loff_t len); 1784 ssize_t len);
1748extern int ext4_get_blocks(handle_t *handle, struct inode *inode, 1785extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1749 sector_t block, unsigned int max_blocks, 1786 sector_t block, unsigned int max_blocks,
1750 struct buffer_head *bh, int flags); 1787 struct buffer_head *bh, int flags);
@@ -1756,6 +1793,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
1756 __u64 len, __u64 *moved_len); 1793 __u64 len, __u64 *moved_len);
1757 1794
1758 1795
1796/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
1797enum ext4_state_bits {
1798 BH_Uninit /* blocks are allocated but uninitialized on disk */
1799 = BH_JBDPrivateStart,
1800};
1801
1802BUFFER_FNS(Uninit, uninit)
1803TAS_BUFFER_FNS(Uninit, uninit)
1804
1759/* 1805/*
1760 * Add new method to test wether block and inode bitmaps are properly 1806 * Add new method to test wether block and inode bitmaps are properly
1761 * initialized. With uninit_bg reading the block from disk is not enough 1807 * initialized. With uninit_bg reading the block from disk is not enough
@@ -1773,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
1773 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); 1819 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
1774} 1820}
1775 1821
1822#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
1823
1776#endif /* __KERNEL__ */ 1824#endif /* __KERNEL__ */
1777 1825
1778#endif /* _EXT4_H */ 1826#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b57e5c711b6d..53d2764d71ca 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -125,14 +125,14 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
125 ext4_journal_abort_handle(where, __func__, bh, 125 ext4_journal_abort_handle(where, __func__, bh,
126 handle, err); 126 handle, err);
127 } else { 127 } else {
128 if (inode && bh) 128 if (inode)
129 mark_buffer_dirty_inode(bh, inode); 129 mark_buffer_dirty_inode(bh, inode);
130 else 130 else
131 mark_buffer_dirty(bh); 131 mark_buffer_dirty(bh);
132 if (inode && inode_needs_sync(inode)) { 132 if (inode && inode_needs_sync(inode)) {
133 sync_dirty_buffer(bh); 133 sync_dirty_buffer(bh);
134 if (buffer_req(bh) && !buffer_uptodate(bh)) { 134 if (buffer_req(bh) && !buffer_uptodate(bh)) {
135 ext4_error(inode->i_sb, __func__, 135 ext4_error(inode->i_sb,
136 "IO error syncing inode, " 136 "IO error syncing inode, "
137 "inode=%lu, block=%llu", 137 "inode=%lu, block=%llu",
138 inode->i_ino, 138 inode->i_ino,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 05eca817d704..b79ad5126468 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -304,4 +304,28 @@ static inline int ext4_should_writeback_data(struct inode *inode)
304 return 0; 304 return 0;
305} 305}
306 306
307/*
308 * This function controls whether or not we should try to go down the
309 * dioread_nolock code paths, which makes it safe to avoid taking
310 * i_mutex for direct I/O reads. This only works for extent-based
311 * files, and it doesn't work for nobh or if data journaling is
312 * enabled, since the dioread_nolock code uses b_private to pass
313 * information back to the I/O completion handler, and this conflicts
314 * with the jbd's use of b_private.
315 */
316static inline int ext4_should_dioread_nolock(struct inode *inode)
317{
318 if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
319 return 0;
320 if (test_opt(inode->i_sb, NOBH))
321 return 0;
322 if (!S_ISREG(inode->i_mode))
323 return 0;
324 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
325 return 0;
326 if (ext4_should_journal_data(inode))
327 return 0;
328 return 1;
329}
330
307#endif /* _EXT4_JBD2_H */ 331#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 765a4826b118..94c8ee81f5e1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
195 if (S_ISREG(inode->i_mode)) 195 if (S_ISREG(inode->i_mode))
196 block_group++; 196 block_group++;
197 } 197 }
198 bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 198 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
199 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
200 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 199 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
201 200
202 /* 201 /*
@@ -440,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
440 return 0; 439 return 0;
441 440
442corrupted: 441corrupted:
443 ext4_error(inode->i_sb, function, 442 __ext4_error(inode->i_sb, function,
444 "bad header/extent in inode #%lu: %s - magic %x, " 443 "bad header/extent in inode #%lu: %s - magic %x, "
445 "entries %u, max %u(%u), depth %u(%u)", 444 "entries %u, max %u(%u), depth %u(%u)",
446 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), 445 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@@ -703,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
703 } 702 }
704 eh = ext_block_hdr(bh); 703 eh = ext_block_hdr(bh);
705 ppos++; 704 ppos++;
706 BUG_ON(ppos > depth); 705 if (unlikely(ppos > depth)) {
706 put_bh(bh);
707 EXT4_ERROR_INODE(inode,
708 "ppos %d > depth %d", ppos, depth);
709 goto err;
710 }
707 path[ppos].p_bh = bh; 711 path[ppos].p_bh = bh;
708 path[ppos].p_hdr = eh; 712 path[ppos].p_hdr = eh;
709 i--; 713 i--;
@@ -749,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
749 if (err) 753 if (err)
750 return err; 754 return err;
751 755
752 BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); 756 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
757 EXT4_ERROR_INODE(inode,
758 "logical %d == ei_block %d!",
759 logical, le32_to_cpu(curp->p_idx->ei_block));
760 return -EIO;
761 }
753 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 762 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
754 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 763 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
755 /* insert after */ 764 /* insert after */
@@ -779,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
779 ext4_idx_store_pblock(ix, ptr); 788 ext4_idx_store_pblock(ix, ptr);
780 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 789 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
781 790
782 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 791 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
783 > le16_to_cpu(curp->p_hdr->eh_max)); 792 > le16_to_cpu(curp->p_hdr->eh_max))) {
784 BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); 793 EXT4_ERROR_INODE(inode,
794 "logical %d == ei_block %d!",
795 logical, le32_to_cpu(curp->p_idx->ei_block));
796 return -EIO;
797 }
798 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
799 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
800 return -EIO;
801 }
785 802
786 err = ext4_ext_dirty(handle, inode, curp); 803 err = ext4_ext_dirty(handle, inode, curp);
787 ext4_std_error(inode->i_sb, err); 804 ext4_std_error(inode->i_sb, err);
@@ -819,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
819 836
820 /* if current leaf will be split, then we should use 837 /* if current leaf will be split, then we should use
821 * border from split point */ 838 * border from split point */
822 BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); 839 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
840 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
841 return -EIO;
842 }
823 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 843 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
824 border = path[depth].p_ext[1].ee_block; 844 border = path[depth].p_ext[1].ee_block;
825 ext_debug("leaf will be split." 845 ext_debug("leaf will be split."
@@ -860,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
860 880
861 /* initialize new leaf */ 881 /* initialize new leaf */
862 newblock = ablocks[--a]; 882 newblock = ablocks[--a];
863 BUG_ON(newblock == 0); 883 if (unlikely(newblock == 0)) {
884 EXT4_ERROR_INODE(inode, "newblock == 0!");
885 err = -EIO;
886 goto cleanup;
887 }
864 bh = sb_getblk(inode->i_sb, newblock); 888 bh = sb_getblk(inode->i_sb, newblock);
865 if (!bh) { 889 if (!bh) {
866 err = -EIO; 890 err = -EIO;
@@ -880,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
880 ex = EXT_FIRST_EXTENT(neh); 904 ex = EXT_FIRST_EXTENT(neh);
881 905
882 /* move remainder of path[depth] to the new leaf */ 906 /* move remainder of path[depth] to the new leaf */
883 BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); 907 if (unlikely(path[depth].p_hdr->eh_entries !=
908 path[depth].p_hdr->eh_max)) {
909 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
910 path[depth].p_hdr->eh_entries,
911 path[depth].p_hdr->eh_max);
912 err = -EIO;
913 goto cleanup;
914 }
884 /* start copy from next extent */ 915 /* start copy from next extent */
885 /* TODO: we could do it by single memmove */ 916 /* TODO: we could do it by single memmove */
886 m = 0; 917 m = 0;
@@ -927,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
927 958
928 /* create intermediate indexes */ 959 /* create intermediate indexes */
929 k = depth - at - 1; 960 k = depth - at - 1;
930 BUG_ON(k < 0); 961 if (unlikely(k < 0)) {
962 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
963 err = -EIO;
964 goto cleanup;
965 }
931 if (k) 966 if (k)
932 ext_debug("create %d intermediate indices\n", k); 967 ext_debug("create %d intermediate indices\n", k);
933 /* insert new index into current index block */ 968 /* insert new index into current index block */
@@ -964,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
964 999
965 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 1000 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
966 EXT_MAX_INDEX(path[i].p_hdr)); 1001 EXT_MAX_INDEX(path[i].p_hdr));
967 BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != 1002 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
968 EXT_LAST_INDEX(path[i].p_hdr)); 1003 EXT_LAST_INDEX(path[i].p_hdr))) {
1004 EXT4_ERROR_INODE(inode,
1005 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1006 le32_to_cpu(path[i].p_ext->ee_block));
1007 err = -EIO;
1008 goto cleanup;
1009 }
969 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 1010 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
970 ext_debug("%d: move %d:%llu in new index %llu\n", i, 1011 ext_debug("%d: move %d:%llu in new index %llu\n", i,
971 le32_to_cpu(path[i].p_idx->ei_block), 1012 le32_to_cpu(path[i].p_idx->ei_block),
@@ -1203,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1203 struct ext4_extent *ex; 1244 struct ext4_extent *ex;
1204 int depth, ee_len; 1245 int depth, ee_len;
1205 1246
1206 BUG_ON(path == NULL); 1247 if (unlikely(path == NULL)) {
1248 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1249 return -EIO;
1250 }
1207 depth = path->p_depth; 1251 depth = path->p_depth;
1208 *phys = 0; 1252 *phys = 0;
1209 1253
@@ -1217,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1217 ex = path[depth].p_ext; 1261 ex = path[depth].p_ext;
1218 ee_len = ext4_ext_get_actual_len(ex); 1262 ee_len = ext4_ext_get_actual_len(ex);
1219 if (*logical < le32_to_cpu(ex->ee_block)) { 1263 if (*logical < le32_to_cpu(ex->ee_block)) {
1220 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); 1264 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1265 EXT4_ERROR_INODE(inode,
1266 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1267 *logical, le32_to_cpu(ex->ee_block));
1268 return -EIO;
1269 }
1221 while (--depth >= 0) { 1270 while (--depth >= 0) {
1222 ix = path[depth].p_idx; 1271 ix = path[depth].p_idx;
1223 BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); 1272 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1273 EXT4_ERROR_INODE(inode,
1274 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1275 ix != NULL ? ix->ei_block : 0,
1276 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1277 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
1278 depth);
1279 return -EIO;
1280 }
1224 } 1281 }
1225 return 0; 1282 return 0;
1226 } 1283 }
1227 1284
1228 BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); 1285 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1286 EXT4_ERROR_INODE(inode,
1287 "logical %d < ee_block %d + ee_len %d!",
1288 *logical, le32_to_cpu(ex->ee_block), ee_len);
1289 return -EIO;
1290 }
1229 1291
1230 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1292 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1231 *phys = ext_pblock(ex) + ee_len - 1; 1293 *phys = ext_pblock(ex) + ee_len - 1;
@@ -1251,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1251 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1313 int depth; /* Note, NOT eh_depth; depth from top of tree */
1252 int ee_len; 1314 int ee_len;
1253 1315
1254 BUG_ON(path == NULL); 1316 if (unlikely(path == NULL)) {
1317 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1318 return -EIO;
1319 }
1255 depth = path->p_depth; 1320 depth = path->p_depth;
1256 *phys = 0; 1321 *phys = 0;
1257 1322
@@ -1265,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1265 ex = path[depth].p_ext; 1330 ex = path[depth].p_ext;
1266 ee_len = ext4_ext_get_actual_len(ex); 1331 ee_len = ext4_ext_get_actual_len(ex);
1267 if (*logical < le32_to_cpu(ex->ee_block)) { 1332 if (*logical < le32_to_cpu(ex->ee_block)) {
1268 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); 1333 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1334 EXT4_ERROR_INODE(inode,
1335 "first_extent(path[%d].p_hdr) != ex",
1336 depth);
1337 return -EIO;
1338 }
1269 while (--depth >= 0) { 1339 while (--depth >= 0) {
1270 ix = path[depth].p_idx; 1340 ix = path[depth].p_idx;
1271 BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); 1341 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1342 EXT4_ERROR_INODE(inode,
1343 "ix != EXT_FIRST_INDEX *logical %d!",
1344 *logical);
1345 return -EIO;
1346 }
1272 } 1347 }
1273 *logical = le32_to_cpu(ex->ee_block); 1348 *logical = le32_to_cpu(ex->ee_block);
1274 *phys = ext_pblock(ex); 1349 *phys = ext_pblock(ex);
1275 return 0; 1350 return 0;
1276 } 1351 }
1277 1352
1278 BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); 1353 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1354 EXT4_ERROR_INODE(inode,
1355 "logical %d < ee_block %d + ee_len %d!",
1356 *logical, le32_to_cpu(ex->ee_block), ee_len);
1357 return -EIO;
1358 }
1279 1359
1280 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1360 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1281 /* next allocated block in this leaf */ 1361 /* next allocated block in this leaf */
@@ -1414,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1414 1494
1415 eh = path[depth].p_hdr; 1495 eh = path[depth].p_hdr;
1416 ex = path[depth].p_ext; 1496 ex = path[depth].p_ext;
1417 BUG_ON(ex == NULL); 1497
1418 BUG_ON(eh == NULL); 1498 if (unlikely(ex == NULL || eh == NULL)) {
1499 EXT4_ERROR_INODE(inode,
1500 "ex %p == NULL or eh %p == NULL", ex, eh);
1501 return -EIO;
1502 }
1419 1503
1420 if (depth == 0) { 1504 if (depth == 0) {
1421 /* there is no tree at all */ 1505 /* there is no tree at all */
@@ -1538,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
1538 merge_done = 1; 1622 merge_done = 1;
1539 WARN_ON(eh->eh_entries == 0); 1623 WARN_ON(eh->eh_entries == 0);
1540 if (!eh->eh_entries) 1624 if (!eh->eh_entries)
1541 ext4_error(inode->i_sb, "ext4_ext_try_to_merge", 1625 ext4_error(inode->i_sb,
1542 "inode#%lu, eh->eh_entries = 0!", inode->i_ino); 1626 "inode#%lu, eh->eh_entries = 0!",
1627 inode->i_ino);
1543 } 1628 }
1544 1629
1545 return merge_done; 1630 return merge_done;
@@ -1612,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1612 ext4_lblk_t next; 1697 ext4_lblk_t next;
1613 unsigned uninitialized = 0; 1698 unsigned uninitialized = 0;
1614 1699
1615 BUG_ON(ext4_ext_get_actual_len(newext) == 0); 1700 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1701 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1702 return -EIO;
1703 }
1616 depth = ext_depth(inode); 1704 depth = ext_depth(inode);
1617 ex = path[depth].p_ext; 1705 ex = path[depth].p_ext;
1618 BUG_ON(path[depth].p_hdr == NULL); 1706 if (unlikely(path[depth].p_hdr == NULL)) {
1707 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1708 return -EIO;
1709 }
1619 1710
1620 /* try to insert block into found extent and return */ 1711 /* try to insert block into found extent and return */
1621 if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) 1712 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1622 && ext4_can_extents_be_merged(inode, ex, newext)) { 1713 && ext4_can_extents_be_merged(inode, ex, newext)) {
1623 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1714 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1624 ext4_ext_is_uninitialized(newext), 1715 ext4_ext_is_uninitialized(newext),
@@ -1739,7 +1830,7 @@ has_space:
1739 1830
1740merge: 1831merge:
1741 /* try to merge extents to the right */ 1832 /* try to merge extents to the right */
1742 if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) 1833 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1743 ext4_ext_try_to_merge(inode, path, nearex); 1834 ext4_ext_try_to_merge(inode, path, nearex);
1744 1835
1745 /* try to merge extents to the left */ 1836 /* try to merge extents to the left */
@@ -1787,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1787 } 1878 }
1788 1879
1789 depth = ext_depth(inode); 1880 depth = ext_depth(inode);
1790 BUG_ON(path[depth].p_hdr == NULL); 1881 if (unlikely(path[depth].p_hdr == NULL)) {
1882 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1883 err = -EIO;
1884 break;
1885 }
1791 ex = path[depth].p_ext; 1886 ex = path[depth].p_ext;
1792 next = ext4_ext_next_allocated_block(path); 1887 next = ext4_ext_next_allocated_block(path);
1793 1888
@@ -1838,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1838 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1933 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1839 } 1934 }
1840 1935
1841 BUG_ON(cbex.ec_len == 0); 1936 if (unlikely(cbex.ec_len == 0)) {
1937 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
1938 err = -EIO;
1939 break;
1940 }
1842 err = func(inode, path, &cbex, ex, cbdata); 1941 err = func(inode, path, &cbex, ex, cbdata);
1843 ext4_ext_drop_refs(path); 1942 ext4_ext_drop_refs(path);
1844 1943
@@ -1952,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1952 2051
1953 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 2052 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
1954 cex->ec_type != EXT4_EXT_CACHE_EXTENT); 2053 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
1955 if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { 2054 if (in_range(block, cex->ec_block, cex->ec_len)) {
1956 ex->ee_block = cpu_to_le32(cex->ec_block); 2055 ex->ee_block = cpu_to_le32(cex->ec_block);
1957 ext4_ext_store_pblock(ex, cex->ec_start); 2056 ext4_ext_store_pblock(ex, cex->ec_start);
1958 ex->ee_len = cpu_to_le16(cex->ec_len); 2057 ex->ee_len = cpu_to_le16(cex->ec_len);
@@ -1981,7 +2080,10 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1981 /* free index block */ 2080 /* free index block */
1982 path--; 2081 path--;
1983 leaf = idx_pblock(path->p_idx); 2082 leaf = idx_pblock(path->p_idx);
1984 BUG_ON(path->p_hdr->eh_entries == 0); 2083 if (unlikely(path->p_hdr->eh_entries == 0)) {
2084 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2085 return -EIO;
2086 }
1985 err = ext4_ext_get_access(handle, inode, path); 2087 err = ext4_ext_get_access(handle, inode, path);
1986 if (err) 2088 if (err)
1987 return err; 2089 return err;
@@ -2119,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2119 if (!path[depth].p_hdr) 2221 if (!path[depth].p_hdr)
2120 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2222 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2121 eh = path[depth].p_hdr; 2223 eh = path[depth].p_hdr;
2122 BUG_ON(eh == NULL); 2224 if (unlikely(path[depth].p_hdr == NULL)) {
2123 2225 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2226 return -EIO;
2227 }
2124 /* find where to start removing */ 2228 /* find where to start removing */
2125 ex = EXT_LAST_EXTENT(eh); 2229 ex = EXT_LAST_EXTENT(eh);
2126 2230
@@ -2983,7 +3087,7 @@ fix_extent_len:
2983 ext4_ext_dirty(handle, inode, path + depth); 3087 ext4_ext_dirty(handle, inode, path + depth);
2984 return err; 3088 return err;
2985} 3089}
2986static int ext4_convert_unwritten_extents_dio(handle_t *handle, 3090static int ext4_convert_unwritten_extents_endio(handle_t *handle,
2987 struct inode *inode, 3091 struct inode *inode,
2988 struct ext4_ext_path *path) 3092 struct ext4_ext_path *path)
2989{ 3093{
@@ -3063,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3063 flags, allocated); 3167 flags, allocated);
3064 ext4_ext_show_leaf(inode, path); 3168 ext4_ext_show_leaf(inode, path);
3065 3169
3066 /* DIO get_block() before submit the IO, split the extent */ 3170 /* get_block() before submit the IO, split the extent */
3067 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { 3171 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3068 ret = ext4_split_unwritten_extents(handle, 3172 ret = ext4_split_unwritten_extents(handle,
3069 inode, path, iblock, 3173 inode, path, iblock,
3070 max_blocks, flags); 3174 max_blocks, flags);
@@ -3074,14 +3178,16 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3074 * completed 3178 * completed
3075 */ 3179 */
3076 if (io) 3180 if (io)
3077 io->flag = DIO_AIO_UNWRITTEN; 3181 io->flag = EXT4_IO_UNWRITTEN;
3078 else 3182 else
3079 EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; 3183 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3184 if (ext4_should_dioread_nolock(inode))
3185 set_buffer_uninit(bh_result);
3080 goto out; 3186 goto out;
3081 } 3187 }
3082 /* async DIO end_io complete, convert the filled extent to written */ 3188 /* IO end_io complete, convert the filled extent to written */
3083 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3189 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3084 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3190 ret = ext4_convert_unwritten_extents_endio(handle, inode,
3085 path); 3191 path);
3086 if (ret >= 0) 3192 if (ret >= 0)
3087 ext4_update_inode_fsync_trans(handle, inode, 1); 3193 ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -3185,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3185{ 3291{
3186 struct ext4_ext_path *path = NULL; 3292 struct ext4_ext_path *path = NULL;
3187 struct ext4_extent_header *eh; 3293 struct ext4_extent_header *eh;
3188 struct ext4_extent newex, *ex; 3294 struct ext4_extent newex, *ex, *last_ex;
3189 ext4_fsblk_t newblock; 3295 ext4_fsblk_t newblock;
3190 int err = 0, depth, ret, cache_type; 3296 int err = 0, depth, ret, cache_type;
3191 unsigned int allocated = 0; 3297 unsigned int allocated = 0;
@@ -3237,10 +3343,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3237 * this situation is possible, though, _during_ tree modification; 3343 * this situation is possible, though, _during_ tree modification;
3238 * this is why assert can't be put in ext4_ext_find_extent() 3344 * this is why assert can't be put in ext4_ext_find_extent()
3239 */ 3345 */
3240 if (path[depth].p_ext == NULL && depth != 0) { 3346 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
3241 ext4_error(inode->i_sb, __func__, "bad extent address " 3347 EXT4_ERROR_INODE(inode, "bad extent address "
3242 "inode: %lu, iblock: %d, depth: %d", 3348 "iblock: %d, depth: %d pblock %lld",
3243 inode->i_ino, iblock, depth); 3349 iblock, depth, path[depth].p_block);
3244 err = -EIO; 3350 err = -EIO;
3245 goto out2; 3351 goto out2;
3246 } 3352 }
@@ -3258,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3258 */ 3364 */
3259 ee_len = ext4_ext_get_actual_len(ex); 3365 ee_len = ext4_ext_get_actual_len(ex);
3260 /* if found extent covers block, simply return it */ 3366 /* if found extent covers block, simply return it */
3261 if (iblock >= ee_block && iblock < ee_block + ee_len) { 3367 if (in_range(iblock, ee_block, ee_len)) {
3262 newblock = iblock - ee_block + ee_start; 3368 newblock = iblock - ee_block + ee_start;
3263 /* number of remaining blocks in the extent */ 3369 /* number of remaining blocks in the extent */
3264 allocated = ee_len - (iblock - ee_block); 3370 allocated = ee_len - (iblock - ee_block);
@@ -3350,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3350 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 3456 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
3351 ext4_ext_mark_uninitialized(&newex); 3457 ext4_ext_mark_uninitialized(&newex);
3352 /* 3458 /*
3353 * io_end structure was created for every async 3459 * io_end structure was created for every IO write to an
3354 * direct IO write to the middle of the file. 3460 * uninitialized extent. To avoid unecessary conversion,
3355 * To avoid unecessary convertion for every aio dio rewrite 3461 * here we flag the IO that really needs the conversion.
3356 * to the mid of file, here we flag the IO that is really
3357 * need the convertion.
3358 * For non asycn direct IO case, flag the inode state 3462 * For non asycn direct IO case, flag the inode state
3359 * that we need to perform convertion when IO is done. 3463 * that we need to perform convertion when IO is done.
3360 */ 3464 */
3361 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { 3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3362 if (io) 3466 if (io)
3363 io->flag = DIO_AIO_UNWRITTEN; 3467 io->flag = EXT4_IO_UNWRITTEN;
3364 else 3468 else
3365 EXT4_I(inode)->i_state |= 3469 ext4_set_inode_state(inode,
3366 EXT4_STATE_DIO_UNWRITTEN;; 3470 EXT4_STATE_DIO_UNWRITTEN);
3471 }
3472 if (ext4_should_dioread_nolock(inode))
3473 set_buffer_uninit(bh_result);
3474 }
3475
3476 if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
3477 if (unlikely(!eh->eh_entries)) {
3478 EXT4_ERROR_INODE(inode,
3479 "eh->eh_entries == 0 ee_block %d",
3480 ex->ee_block);
3481 err = -EIO;
3482 goto out2;
3367 } 3483 }
3484 last_ex = EXT_LAST_EXTENT(eh);
3485 if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
3486 + ext4_ext_get_actual_len(last_ex))
3487 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
3368 } 3488 }
3369 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3489 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3370 if (err) { 3490 if (err) {
@@ -3499,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
3499 i_size_write(inode, new_size); 3619 i_size_write(inode, new_size);
3500 if (new_size > EXT4_I(inode)->i_disksize) 3620 if (new_size > EXT4_I(inode)->i_disksize)
3501 ext4_update_i_disksize(inode, new_size); 3621 ext4_update_i_disksize(inode, new_size);
3622 } else {
3623 /*
3624 * Mark that we allocate beyond EOF so the subsequent truncate
3625 * can proceed even if the new size is the same as i_size.
3626 */
3627 if (new_size > i_size_read(inode))
3628 EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
3502 } 3629 }
3503 3630
3504} 3631}
@@ -3603,7 +3730,7 @@ retry:
3603 * Returns 0 on success. 3730 * Returns 0 on success.
3604 */ 3731 */
3605int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3732int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3606 loff_t len) 3733 ssize_t len)
3607{ 3734{
3608 handle_t *handle; 3735 handle_t *handle;
3609 ext4_lblk_t block; 3736 ext4_lblk_t block;
@@ -3635,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3635 map_bh.b_state = 0; 3762 map_bh.b_state = 0;
3636 ret = ext4_get_blocks(handle, inode, block, 3763 ret = ext4_get_blocks(handle, inode, block,
3637 max_blocks, &map_bh, 3764 max_blocks, &map_bh,
3638 EXT4_GET_BLOCKS_DIO_CONVERT_EXT); 3765 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
3639 if (ret <= 0) { 3766 if (ret <= 0) {
3640 WARN_ON(ret <= 0); 3767 WARN_ON(ret <= 0);
3641 printk(KERN_ERR "%s: ext4_ext_get_blocks " 3768 printk(KERN_ERR "%s: ext4_ext_get_blocks "
@@ -3739,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3739 int error = 0; 3866 int error = 0;
3740 3867
3741 /* in-inode? */ 3868 /* in-inode? */
3742 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { 3869 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
3743 struct ext4_iloc iloc; 3870 struct ext4_iloc iloc;
3744 int offset; /* offset of xattr in inode */ 3871 int offset; /* offset of xattr in inode */
3745 3872
@@ -3767,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3767 __u64 start, __u64 len) 3894 __u64 start, __u64 len)
3768{ 3895{
3769 ext4_lblk_t start_blk; 3896 ext4_lblk_t start_blk;
3770 ext4_lblk_t len_blks;
3771 int error = 0; 3897 int error = 0;
3772 3898
3773 /* fallback to generic here if not in extents fmt */ 3899 /* fallback to generic here if not in extents fmt */
@@ -3781,8 +3907,14 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3781 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 3907 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3782 error = ext4_xattr_fiemap(inode, fieinfo); 3908 error = ext4_xattr_fiemap(inode, fieinfo);
3783 } else { 3909 } else {
3910 ext4_lblk_t len_blks;
3911 __u64 last_blk;
3912
3784 start_blk = start >> inode->i_sb->s_blocksize_bits; 3913 start_blk = start >> inode->i_sb->s_blocksize_bits;
3785 len_blks = len >> inode->i_sb->s_blocksize_bits; 3914 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
3915 if (last_blk >= EXT_MAX_BLOCK)
3916 last_blk = EXT_MAX_BLOCK-1;
3917 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
3786 3918
3787 /* 3919 /*
3788 * Walk the extent tree gathering extent information. 3920 * Walk the extent tree gathering extent information.
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9630583cef28..d0776e410f34 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/path.h> 25#include <linux/path.h>
26#include <linux/quotaops.h>
26#include "ext4.h" 27#include "ext4.h"
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
@@ -35,9 +36,9 @@
35 */ 36 */
36static int ext4_release_file(struct inode *inode, struct file *filp) 37static int ext4_release_file(struct inode *inode, struct file *filp)
37{ 38{
38 if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { 39 if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
39 ext4_alloc_da_blocks(inode); 40 ext4_alloc_da_blocks(inode);
40 EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; 41 ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
41 } 42 }
42 /* if we are the last writer on the inode, drop the block reservation */ 43 /* if we are the last writer on the inode, drop the block reservation */
43 if ((filp->f_mode & FMODE_WRITE) && 44 if ((filp->f_mode & FMODE_WRITE) &&
@@ -116,18 +117,16 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
116 * devices or filesystem images. 117 * devices or filesystem images.
117 */ 118 */
118 memset(buf, 0, sizeof(buf)); 119 memset(buf, 0, sizeof(buf));
119 path.mnt = mnt->mnt_parent; 120 path.mnt = mnt;
120 path.dentry = mnt->mnt_mountpoint; 121 path.dentry = mnt->mnt_root;
121 path_get(&path);
122 cp = d_path(&path, buf, sizeof(buf)); 122 cp = d_path(&path, buf, sizeof(buf));
123 path_put(&path);
124 if (!IS_ERR(cp)) { 123 if (!IS_ERR(cp)) {
125 memcpy(sbi->s_es->s_last_mounted, cp, 124 memcpy(sbi->s_es->s_last_mounted, cp,
126 sizeof(sbi->s_es->s_last_mounted)); 125 sizeof(sbi->s_es->s_last_mounted));
127 sb->s_dirt = 1; 126 sb->s_dirt = 1;
128 } 127 }
129 } 128 }
130 return generic_file_open(inode, filp); 129 return dquot_file_open(inode, filp);
131} 130}
132 131
133const struct file_operations ext4_file_operations = { 132const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 98bd140aad01..0d0c3239c1cd 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -63,7 +63,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
63 if (inode->i_sb->s_flags & MS_RDONLY) 63 if (inode->i_sb->s_flags & MS_RDONLY)
64 return 0; 64 return 0;
65 65
66 ret = flush_aio_dio_completed_IO(inode); 66 ret = flush_completed_IO(inode);
67 if (ret < 0) 67 if (ret < 0)
68 return ret; 68 return ret;
69 69
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f3624ead4f6c..361c0b9962a8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,8 +76,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
76 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
77 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
78 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
79 ext4_error(sb, __func__, "Checksum bad for group %u", 79 ext4_error(sb, "Checksum bad for group %u", block_group);
80 block_group);
81 ext4_free_blks_set(sb, gdp, 0); 80 ext4_free_blks_set(sb, gdp, 0);
82 ext4_free_inodes_set(sb, gdp, 0); 81 ext4_free_inodes_set(sb, gdp, 0);
83 ext4_itable_unused_set(sb, gdp, 0); 82 ext4_itable_unused_set(sb, gdp, 0);
@@ -111,8 +110,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
111 bitmap_blk = ext4_inode_bitmap(sb, desc); 110 bitmap_blk = ext4_inode_bitmap(sb, desc);
112 bh = sb_getblk(sb, bitmap_blk); 111 bh = sb_getblk(sb, bitmap_blk);
113 if (unlikely(!bh)) { 112 if (unlikely(!bh)) {
114 ext4_error(sb, __func__, 113 ext4_error(sb, "Cannot read inode bitmap - "
115 "Cannot read inode bitmap - "
116 "block_group = %u, inode_bitmap = %llu", 114 "block_group = %u, inode_bitmap = %llu",
117 block_group, bitmap_blk); 115 block_group, bitmap_blk);
118 return NULL; 116 return NULL;
@@ -153,8 +151,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
153 set_bitmap_uptodate(bh); 151 set_bitmap_uptodate(bh);
154 if (bh_submit_read(bh) < 0) { 152 if (bh_submit_read(bh) < 0) {
155 put_bh(bh); 153 put_bh(bh);
156 ext4_error(sb, __func__, 154 ext4_error(sb, "Cannot read inode bitmap - "
157 "Cannot read inode bitmap - "
158 "block_group = %u, inode_bitmap = %llu", 155 "block_group = %u, inode_bitmap = %llu",
159 block_group, bitmap_blk); 156 block_group, bitmap_blk);
160 return NULL; 157 return NULL;
@@ -217,10 +214,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
217 * Note: we must free any quota before locking the superblock, 214 * Note: we must free any quota before locking the superblock,
218 * as writing the quota to disk may need the lock as well. 215 * as writing the quota to disk may need the lock as well.
219 */ 216 */
220 vfs_dq_init(inode); 217 dquot_initialize(inode);
221 ext4_xattr_delete_inode(handle, inode); 218 ext4_xattr_delete_inode(handle, inode);
222 vfs_dq_free_inode(inode); 219 dquot_free_inode(inode);
223 vfs_dq_drop(inode); 220 dquot_drop(inode);
224 221
225 is_directory = S_ISDIR(inode->i_mode); 222 is_directory = S_ISDIR(inode->i_mode);
226 223
@@ -229,8 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
229 226
230 es = EXT4_SB(sb)->s_es; 227 es = EXT4_SB(sb)->s_es;
231 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 228 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
232 ext4_error(sb, "ext4_free_inode", 229 ext4_error(sb, "reserved or nonexistent inode %lu", ino);
233 "reserved or nonexistent inode %lu", ino);
234 goto error_return; 230 goto error_return;
235 } 231 }
236 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 232 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -248,8 +244,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
248 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), 244 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
249 bit, bitmap_bh->b_data); 245 bit, bitmap_bh->b_data);
250 if (!cleared) 246 if (!cleared)
251 ext4_error(sb, "ext4_free_inode", 247 ext4_error(sb, "bit already cleared for inode %lu", ino);
252 "bit already cleared for inode %lu", ino);
253 else { 248 else {
254 gdp = ext4_get_group_desc(sb, block_group, &bh2); 249 gdp = ext4_get_group_desc(sb, block_group, &bh2);
255 250
@@ -736,8 +731,7 @@ static int ext4_claim_inode(struct super_block *sb,
736 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 731 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
737 ino > EXT4_INODES_PER_GROUP(sb)) { 732 ino > EXT4_INODES_PER_GROUP(sb)) {
738 ext4_unlock_group(sb, group); 733 ext4_unlock_group(sb, group);
739 ext4_error(sb, __func__, 734 ext4_error(sb, "reserved inode or inode > inodes count - "
740 "reserved inode or inode > inodes count - "
741 "block_group = %u, inode=%lu", group, 735 "block_group = %u, inode=%lu", group,
742 ino + group * EXT4_INODES_PER_GROUP(sb)); 736 ino + group * EXT4_INODES_PER_GROUP(sb));
743 return 1; 737 return 1;
@@ -904,7 +898,7 @@ repeat_in_this_group:
904 BUFFER_TRACE(inode_bitmap_bh, 898 BUFFER_TRACE(inode_bitmap_bh,
905 "call ext4_handle_dirty_metadata"); 899 "call ext4_handle_dirty_metadata");
906 err = ext4_handle_dirty_metadata(handle, 900 err = ext4_handle_dirty_metadata(handle,
907 inode, 901 NULL,
908 inode_bitmap_bh); 902 inode_bitmap_bh);
909 if (err) 903 if (err)
910 goto fail; 904 goto fail;
@@ -1029,15 +1023,16 @@ got:
1029 inode->i_generation = sbi->s_next_generation++; 1023 inode->i_generation = sbi->s_next_generation++;
1030 spin_unlock(&sbi->s_next_gen_lock); 1024 spin_unlock(&sbi->s_next_gen_lock);
1031 1025
1032 ei->i_state = EXT4_STATE_NEW; 1026 ei->i_state_flags = 0;
1027 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1033 1028
1034 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1029 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
1035 1030
1036 ret = inode; 1031 ret = inode;
1037 if (vfs_dq_alloc_inode(inode)) { 1032 dquot_initialize(inode);
1038 err = -EDQUOT; 1033 err = dquot_alloc_inode(inode);
1034 if (err)
1039 goto fail_drop; 1035 goto fail_drop;
1040 }
1041 1036
1042 err = ext4_init_acl(handle, inode, dir); 1037 err = ext4_init_acl(handle, inode, dir);
1043 if (err) 1038 if (err)
@@ -1074,10 +1069,10 @@ really_out:
1074 return ret; 1069 return ret;
1075 1070
1076fail_free_drop: 1071fail_free_drop:
1077 vfs_dq_free_inode(inode); 1072 dquot_free_inode(inode);
1078 1073
1079fail_drop: 1074fail_drop:
1080 vfs_dq_drop(inode); 1075 dquot_drop(inode);
1081 inode->i_flags |= S_NOQUOTA; 1076 inode->i_flags |= S_NOQUOTA;
1082 inode->i_nlink = 0; 1077 inode->i_nlink = 0;
1083 unlock_new_inode(inode); 1078 unlock_new_inode(inode);
@@ -1098,8 +1093,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
1098 1093
1099 /* Error cases - e2fsck has already cleaned up for us */ 1094 /* Error cases - e2fsck has already cleaned up for us */
1100 if (ino > max_ino) { 1095 if (ino > max_ino) {
1101 ext4_warning(sb, __func__, 1096 ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
1102 "bad orphan ino %lu! e2fsck was run?", ino);
1103 goto error; 1097 goto error;
1104 } 1098 }
1105 1099
@@ -1107,8 +1101,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
1107 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 1101 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
1108 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 1102 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
1109 if (!bitmap_bh) { 1103 if (!bitmap_bh) {
1110 ext4_warning(sb, __func__, 1104 ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
1111 "inode bitmap error for orphan %lu", ino);
1112 goto error; 1105 goto error;
1113 } 1106 }
1114 1107
@@ -1140,8 +1133,7 @@ iget_failed:
1140 err = PTR_ERR(inode); 1133 err = PTR_ERR(inode);
1141 inode = NULL; 1134 inode = NULL;
1142bad_orphan: 1135bad_orphan:
1143 ext4_warning(sb, __func__, 1136 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
1144 "bad orphan inode %lu! e2fsck was run?", ino);
1145 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 1137 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1146 bit, (unsigned long long)bitmap_bh->b_blocknr, 1138 bit, (unsigned long long)bitmap_bh->b_blocknr,
1147 ext4_test_bit(bit, bitmap_bh->b_data)); 1139 ext4_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e11952404e02..986120f30066 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/kernel.h>
41 42
42#include "ext4_jbd2.h" 43#include "ext4_jbd2.h"
43#include "xattr.h" 44#include "xattr.h"
@@ -170,6 +171,9 @@ void ext4_delete_inode(struct inode *inode)
170 handle_t *handle; 171 handle_t *handle;
171 int err; 172 int err;
172 173
174 if (!is_bad_inode(inode))
175 dquot_initialize(inode);
176
173 if (ext4_should_order_data(inode)) 177 if (ext4_should_order_data(inode))
174 ext4_begin_ordered_truncate(inode, 0); 178 ext4_begin_ordered_truncate(inode, 0);
175 truncate_inode_pages(&inode->i_data, 0); 179 truncate_inode_pages(&inode->i_data, 0);
@@ -194,7 +198,7 @@ void ext4_delete_inode(struct inode *inode)
194 inode->i_size = 0; 198 inode->i_size = 0;
195 err = ext4_mark_inode_dirty(handle, inode); 199 err = ext4_mark_inode_dirty(handle, inode);
196 if (err) { 200 if (err) {
197 ext4_warning(inode->i_sb, __func__, 201 ext4_warning(inode->i_sb,
198 "couldn't mark inode dirty (err %d)", err); 202 "couldn't mark inode dirty (err %d)", err);
199 goto stop_handle; 203 goto stop_handle;
200 } 204 }
@@ -212,7 +216,7 @@ void ext4_delete_inode(struct inode *inode)
212 if (err > 0) 216 if (err > 0)
213 err = ext4_journal_restart(handle, 3); 217 err = ext4_journal_restart(handle, 3);
214 if (err != 0) { 218 if (err != 0) {
215 ext4_warning(inode->i_sb, __func__, 219 ext4_warning(inode->i_sb,
216 "couldn't extend journal (err %d)", err); 220 "couldn't extend journal (err %d)", err);
217 stop_handle: 221 stop_handle:
218 ext4_journal_stop(handle); 222 ext4_journal_stop(handle);
@@ -323,8 +327,7 @@ static int ext4_block_to_path(struct inode *inode,
323 offsets[n++] = i_block & (ptrs - 1); 327 offsets[n++] = i_block & (ptrs - 1);
324 final = ptrs; 328 final = ptrs;
325 } else { 329 } else {
326 ext4_warning(inode->i_sb, "ext4_block_to_path", 330 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
327 "block %lu > max in inode %lu",
328 i_block + direct_blocks + 331 i_block + direct_blocks +
329 indirect_blocks + double_blocks, inode->i_ino); 332 indirect_blocks + double_blocks, inode->i_ino);
330 } 333 }
@@ -344,7 +347,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
344 if (blk && 347 if (blk &&
345 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 348 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
346 blk, 1))) { 349 blk, 1))) {
347 ext4_error(inode->i_sb, function, 350 __ext4_error(inode->i_sb, function,
348 "invalid block reference %u " 351 "invalid block reference %u "
349 "in inode #%lu", blk, inode->i_ino); 352 "in inode #%lu", blk, inode->i_ino);
350 return -EIO; 353 return -EIO;
@@ -607,7 +610,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
607 if (*err) 610 if (*err)
608 goto failed_out; 611 goto failed_out;
609 612
610 BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); 613 if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
614 EXT4_ERROR_INODE(inode,
615 "current_block %llu + count %lu > %d!",
616 current_block, count,
617 EXT4_MAX_BLOCK_FILE_PHYS);
618 *err = -EIO;
619 goto failed_out;
620 }
611 621
612 target -= count; 622 target -= count;
613 /* allocate blocks for indirect blocks */ 623 /* allocate blocks for indirect blocks */
@@ -643,7 +653,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
643 ar.flags = EXT4_MB_HINT_DATA; 653 ar.flags = EXT4_MB_HINT_DATA;
644 654
645 current_block = ext4_mb_new_blocks(handle, &ar, err); 655 current_block = ext4_mb_new_blocks(handle, &ar, err);
646 BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); 656 if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
657 EXT4_ERROR_INODE(inode,
658 "current_block %llu + ar.len %d > %d!",
659 current_block, ar.len,
660 EXT4_MAX_BLOCK_FILE_PHYS);
661 *err = -EIO;
662 goto failed_out;
663 }
647 664
648 if (*err && (target == blks)) { 665 if (*err && (target == blks)) {
649 /* 666 /*
@@ -1061,6 +1078,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
1061 int mdb_free = 0, allocated_meta_blocks = 0; 1078 int mdb_free = 0, allocated_meta_blocks = 0;
1062 1079
1063 spin_lock(&ei->i_block_reservation_lock); 1080 spin_lock(&ei->i_block_reservation_lock);
1081 trace_ext4_da_update_reserve_space(inode, used);
1064 if (unlikely(used > ei->i_reserved_data_blocks)) { 1082 if (unlikely(used > ei->i_reserved_data_blocks)) {
1065 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 1083 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1066 "with only %d reserved data blocks\n", 1084 "with only %d reserved data blocks\n",
@@ -1093,9 +1111,9 @@ void ext4_da_update_reserve_space(struct inode *inode,
1093 1111
1094 /* Update quota subsystem */ 1112 /* Update quota subsystem */
1095 if (quota_claim) { 1113 if (quota_claim) {
1096 vfs_dq_claim_block(inode, used); 1114 dquot_claim_block(inode, used);
1097 if (mdb_free) 1115 if (mdb_free)
1098 vfs_dq_release_reservation_block(inode, mdb_free); 1116 dquot_release_reservation_block(inode, mdb_free);
1099 } else { 1117 } else {
1100 /* 1118 /*
1101 * We did fallocate with an offset that is already delayed 1119 * We did fallocate with an offset that is already delayed
@@ -1106,8 +1124,8 @@ void ext4_da_update_reserve_space(struct inode *inode,
1106 * that 1124 * that
1107 */ 1125 */
1108 if (allocated_meta_blocks) 1126 if (allocated_meta_blocks)
1109 vfs_dq_claim_block(inode, allocated_meta_blocks); 1127 dquot_claim_block(inode, allocated_meta_blocks);
1110 vfs_dq_release_reservation_block(inode, mdb_free + used); 1128 dquot_release_reservation_block(inode, mdb_free + used);
1111 } 1129 }
1112 1130
1113 /* 1131 /*
@@ -1124,7 +1142,7 @@ static int check_block_validity(struct inode *inode, const char *msg,
1124 sector_t logical, sector_t phys, int len) 1142 sector_t logical, sector_t phys, int len)
1125{ 1143{
1126 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1144 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1127 ext4_error(inode->i_sb, msg, 1145 __ext4_error(inode->i_sb, msg,
1128 "inode #%lu logical block %llu mapped to %llu " 1146 "inode #%lu logical block %llu mapped to %llu "
1129 "(size %d)", inode->i_ino, 1147 "(size %d)", inode->i_ino,
1130 (unsigned long long) logical, 1148 (unsigned long long) logical,
@@ -1306,7 +1324,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1306 * i_data's format changing. Force the migrate 1324 * i_data's format changing. Force the migrate
1307 * to fail by clearing migrate flags 1325 * to fail by clearing migrate flags
1308 */ 1326 */
1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1327 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
1310 } 1328 }
1311 1329
1312 /* 1330 /*
@@ -1534,6 +1552,8 @@ static void ext4_truncate_failed_write(struct inode *inode)
1534 ext4_truncate(inode); 1552 ext4_truncate(inode);
1535} 1553}
1536 1554
1555static int ext4_get_block_write(struct inode *inode, sector_t iblock,
1556 struct buffer_head *bh_result, int create);
1537static int ext4_write_begin(struct file *file, struct address_space *mapping, 1557static int ext4_write_begin(struct file *file, struct address_space *mapping,
1538 loff_t pos, unsigned len, unsigned flags, 1558 loff_t pos, unsigned len, unsigned flags,
1539 struct page **pagep, void **fsdata) 1559 struct page **pagep, void **fsdata)
@@ -1575,8 +1595,12 @@ retry:
1575 } 1595 }
1576 *pagep = page; 1596 *pagep = page;
1577 1597
1578 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1598 if (ext4_should_dioread_nolock(inode))
1579 ext4_get_block); 1599 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1600 fsdata, ext4_get_block_write);
1601 else
1602 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1603 fsdata, ext4_get_block);
1580 1604
1581 if (!ret && ext4_should_journal_data(inode)) { 1605 if (!ret && ext4_should_journal_data(inode)) {
1582 ret = walk_page_buffers(handle, page_buffers(page), 1606 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1793,7 +1817,7 @@ static int ext4_journalled_write_end(struct file *file,
1793 new_i_size = pos + copied; 1817 new_i_size = pos + copied;
1794 if (new_i_size > inode->i_size) 1818 if (new_i_size > inode->i_size)
1795 i_size_write(inode, pos+copied); 1819 i_size_write(inode, pos+copied);
1796 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1820 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1797 if (new_i_size > EXT4_I(inode)->i_disksize) { 1821 if (new_i_size > EXT4_I(inode)->i_disksize) {
1798 ext4_update_i_disksize(inode, new_i_size); 1822 ext4_update_i_disksize(inode, new_i_size);
1799 ret2 = ext4_mark_inode_dirty(handle, inode); 1823 ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1836,6 +1860,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1836 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1860 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1837 struct ext4_inode_info *ei = EXT4_I(inode); 1861 struct ext4_inode_info *ei = EXT4_I(inode);
1838 unsigned long md_needed, md_reserved; 1862 unsigned long md_needed, md_reserved;
1863 int ret;
1839 1864
1840 /* 1865 /*
1841 * recalculate the amount of metadata blocks to reserve 1866 * recalculate the amount of metadata blocks to reserve
@@ -1846,6 +1871,7 @@ repeat:
1846 spin_lock(&ei->i_block_reservation_lock); 1871 spin_lock(&ei->i_block_reservation_lock);
1847 md_reserved = ei->i_reserved_meta_blocks; 1872 md_reserved = ei->i_reserved_meta_blocks;
1848 md_needed = ext4_calc_metadata_amount(inode, lblock); 1873 md_needed = ext4_calc_metadata_amount(inode, lblock);
1874 trace_ext4_da_reserve_space(inode, md_needed);
1849 spin_unlock(&ei->i_block_reservation_lock); 1875 spin_unlock(&ei->i_block_reservation_lock);
1850 1876
1851 /* 1877 /*
@@ -1853,11 +1879,12 @@ repeat:
1853 * later. Real quota accounting is done at pages writeout 1879 * later. Real quota accounting is done at pages writeout
1854 * time. 1880 * time.
1855 */ 1881 */
1856 if (vfs_dq_reserve_block(inode, md_needed + 1)) 1882 ret = dquot_reserve_block(inode, md_needed + 1);
1857 return -EDQUOT; 1883 if (ret)
1884 return ret;
1858 1885
1859 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1886 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1860 vfs_dq_release_reservation_block(inode, md_needed + 1); 1887 dquot_release_reservation_block(inode, md_needed + 1);
1861 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1888 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1862 yield(); 1889 yield();
1863 goto repeat; 1890 goto repeat;
@@ -1914,7 +1941,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1914 1941
1915 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1942 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1916 1943
1917 vfs_dq_release_reservation_block(inode, to_free); 1944 dquot_release_reservation_block(inode, to_free);
1918} 1945}
1919 1946
1920static void ext4_da_page_release_reservation(struct page *page, 1947static void ext4_da_page_release_reservation(struct page *page,
@@ -2091,6 +2118,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
2091 } else if (buffer_mapped(bh)) 2118 } else if (buffer_mapped(bh))
2092 BUG_ON(bh->b_blocknr != pblock); 2119 BUG_ON(bh->b_blocknr != pblock);
2093 2120
2121 if (buffer_uninit(exbh))
2122 set_buffer_uninit(bh);
2094 cur_logical++; 2123 cur_logical++;
2095 pblock++; 2124 pblock++;
2096 } while ((bh = bh->b_this_page) != head); 2125 } while ((bh = bh->b_this_page) != head);
@@ -2133,17 +2162,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2133 break; 2162 break;
2134 for (i = 0; i < nr_pages; i++) { 2163 for (i = 0; i < nr_pages; i++) {
2135 struct page *page = pvec.pages[i]; 2164 struct page *page = pvec.pages[i];
2136 index = page->index; 2165 if (page->index > end)
2137 if (index > end)
2138 break; 2166 break;
2139 index++;
2140
2141 BUG_ON(!PageLocked(page)); 2167 BUG_ON(!PageLocked(page));
2142 BUG_ON(PageWriteback(page)); 2168 BUG_ON(PageWriteback(page));
2143 block_invalidatepage(page, 0); 2169 block_invalidatepage(page, 0);
2144 ClearPageUptodate(page); 2170 ClearPageUptodate(page);
2145 unlock_page(page); 2171 unlock_page(page);
2146 } 2172 }
2173 index = pvec.pages[nr_pages - 1]->index + 1;
2174 pagevec_release(&pvec);
2147 } 2175 }
2148 return; 2176 return;
2149} 2177}
@@ -2220,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2220 */ 2248 */
2221 new.b_state = 0; 2249 new.b_state = 0;
2222 get_blocks_flags = EXT4_GET_BLOCKS_CREATE; 2250 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2251 if (ext4_should_dioread_nolock(mpd->inode))
2252 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2223 if (mpd->b_state & (1 << BH_Delay)) 2253 if (mpd->b_state & (1 << BH_Delay))
2224 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2254 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2225 2255
@@ -2630,11 +2660,14 @@ static int __ext4_journalled_writepage(struct page *page,
2630 ret = err; 2660 ret = err;
2631 2661
2632 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); 2662 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
2633 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 2663 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
2634out: 2664out:
2635 return ret; 2665 return ret;
2636} 2666}
2637 2667
2668static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
2669static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2670
2638/* 2671/*
2639 * Note that we don't need to start a transaction unless we're journaling data 2672 * Note that we don't need to start a transaction unless we're journaling data
2640 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2673 * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2682,7 +2715,7 @@ static int ext4_writepage(struct page *page,
2682 int ret = 0; 2715 int ret = 0;
2683 loff_t size; 2716 loff_t size;
2684 unsigned int len; 2717 unsigned int len;
2685 struct buffer_head *page_bufs; 2718 struct buffer_head *page_bufs = NULL;
2686 struct inode *inode = page->mapping->host; 2719 struct inode *inode = page->mapping->host;
2687 2720
2688 trace_ext4_writepage(inode, page); 2721 trace_ext4_writepage(inode, page);
@@ -2758,7 +2791,11 @@ static int ext4_writepage(struct page *page,
2758 2791
2759 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2792 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2760 ret = nobh_writepage(page, noalloc_get_block_write, wbc); 2793 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2761 else 2794 else if (page_bufs && buffer_uninit(page_bufs)) {
2795 ext4_set_bh_endio(page_bufs, inode);
2796 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2797 wbc, ext4_end_io_buffer_write);
2798 } else
2762 ret = block_write_full_page(page, noalloc_get_block_write, 2799 ret = block_write_full_page(page, noalloc_get_block_write,
2763 wbc); 2800 wbc);
2764 2801
@@ -3301,7 +3338,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3301 filemap_write_and_wait(mapping); 3338 filemap_write_and_wait(mapping);
3302 } 3339 }
3303 3340
3304 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 3341 if (EXT4_JOURNAL(inode) &&
3342 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
3305 /* 3343 /*
3306 * This is a REALLY heavyweight approach, but the use of 3344 * This is a REALLY heavyweight approach, but the use of
3307 * bmap on dirty files is expected to be extremely rare: 3345 * bmap on dirty files is expected to be extremely rare:
@@ -3320,7 +3358,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3320 * everything they get. 3358 * everything they get.
3321 */ 3359 */
3322 3360
3323 EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; 3361 ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
3324 journal = EXT4_JOURNAL(inode); 3362 journal = EXT4_JOURNAL(inode);
3325 jbd2_journal_lock_updates(journal); 3363 jbd2_journal_lock_updates(journal);
3326 err = jbd2_journal_flush(journal); 3364 err = jbd2_journal_flush(journal);
@@ -3345,11 +3383,45 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3345 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3383 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3346} 3384}
3347 3385
3386static void ext4_free_io_end(ext4_io_end_t *io)
3387{
3388 BUG_ON(!io);
3389 if (io->page)
3390 put_page(io->page);
3391 iput(io->inode);
3392 kfree(io);
3393}
3394
3395static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3396{
3397 struct buffer_head *head, *bh;
3398 unsigned int curr_off = 0;
3399
3400 if (!page_has_buffers(page))
3401 return;
3402 head = bh = page_buffers(page);
3403 do {
3404 if (offset <= curr_off && test_clear_buffer_uninit(bh)
3405 && bh->b_private) {
3406 ext4_free_io_end(bh->b_private);
3407 bh->b_private = NULL;
3408 bh->b_end_io = NULL;
3409 }
3410 curr_off = curr_off + bh->b_size;
3411 bh = bh->b_this_page;
3412 } while (bh != head);
3413}
3414
3348static void ext4_invalidatepage(struct page *page, unsigned long offset) 3415static void ext4_invalidatepage(struct page *page, unsigned long offset)
3349{ 3416{
3350 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3417 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3351 3418
3352 /* 3419 /*
3420 * free any io_end structure allocated for buffers to be discarded
3421 */
3422 if (ext4_should_dioread_nolock(page->mapping->host))
3423 ext4_invalidatepage_free_endio(page, offset);
3424 /*
3353 * If it's a full truncate we just forget about the pending dirtying 3425 * If it's a full truncate we just forget about the pending dirtying
3354 */ 3426 */
3355 if (offset == 0) 3427 if (offset == 0)
@@ -3420,7 +3492,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3420 } 3492 }
3421 3493
3422retry: 3494retry:
3423 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3495 if (rw == READ && ext4_should_dioread_nolock(inode))
3496 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
3497 inode->i_sb->s_bdev, iov,
3498 offset, nr_segs,
3499 ext4_get_block, NULL);
3500 else
3501 ret = blockdev_direct_IO(rw, iocb, inode,
3502 inode->i_sb->s_bdev, iov,
3424 offset, nr_segs, 3503 offset, nr_segs,
3425 ext4_get_block, NULL); 3504 ext4_get_block, NULL);
3426 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3505 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3436,6 +3515,9 @@ retry:
3436 * but cannot extend i_size. Bail out and pretend 3515 * but cannot extend i_size. Bail out and pretend
3437 * the write failed... */ 3516 * the write failed... */
3438 ret = PTR_ERR(handle); 3517 ret = PTR_ERR(handle);
3518 if (inode->i_nlink)
3519 ext4_orphan_del(NULL, inode);
3520
3439 goto out; 3521 goto out;
3440 } 3522 }
3441 if (inode->i_nlink) 3523 if (inode->i_nlink)
@@ -3463,75 +3545,63 @@ out:
3463 return ret; 3545 return ret;
3464} 3546}
3465 3547
3466static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3548static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3467 struct buffer_head *bh_result, int create) 3549 struct buffer_head *bh_result, int create)
3468{ 3550{
3469 handle_t *handle = NULL; 3551 handle_t *handle = ext4_journal_current_handle();
3470 int ret = 0; 3552 int ret = 0;
3471 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3553 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
3472 int dio_credits; 3554 int dio_credits;
3555 int started = 0;
3473 3556
3474 ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", 3557 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3475 inode->i_ino, create); 3558 inode->i_ino, create);
3476 /* 3559 /*
3477 * DIO VFS code passes create = 0 flag for write to 3560 * ext4_get_block in prepare for a DIO write or buffer write.
3478 * the middle of file. It does this to avoid block 3561 * We allocate an uinitialized extent if blocks haven't been allocated.
3479 * allocation for holes, to prevent expose stale data 3562 * The extent will be converted to initialized after IO complete.
3480 * out when there is parallel buffered read (which does
3481 * not hold the i_mutex lock) while direct IO write has
3482 * not completed. DIO request on holes finally falls back
3483 * to buffered IO for this reason.
3484 *
3485 * For ext4 extent based file, since we support fallocate,
3486 * new allocated extent as uninitialized, for holes, we
3487 * could fallocate blocks for holes, thus parallel
3488 * buffered IO read will zero out the page when read on
3489 * a hole while parallel DIO write to the hole has not completed.
3490 *
3491 * when we come here, we know it's a direct IO write to
3492 * to the middle of file (<i_size)
3493 * so it's safe to override the create flag from VFS.
3494 */ 3563 */
3495 create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; 3564 create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3496 3565
3497 if (max_blocks > DIO_MAX_BLOCKS) 3566 if (!handle) {
3498 max_blocks = DIO_MAX_BLOCKS; 3567 if (max_blocks > DIO_MAX_BLOCKS)
3499 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); 3568 max_blocks = DIO_MAX_BLOCKS;
3500 handle = ext4_journal_start(inode, dio_credits); 3569 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
3501 if (IS_ERR(handle)) { 3570 handle = ext4_journal_start(inode, dio_credits);
3502 ret = PTR_ERR(handle); 3571 if (IS_ERR(handle)) {
3503 goto out; 3572 ret = PTR_ERR(handle);
3573 goto out;
3574 }
3575 started = 1;
3504 } 3576 }
3577
3505 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, 3578 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
3506 create); 3579 create);
3507 if (ret > 0) { 3580 if (ret > 0) {
3508 bh_result->b_size = (ret << inode->i_blkbits); 3581 bh_result->b_size = (ret << inode->i_blkbits);
3509 ret = 0; 3582 ret = 0;
3510 } 3583 }
3511 ext4_journal_stop(handle); 3584 if (started)
3585 ext4_journal_stop(handle);
3512out: 3586out:
3513 return ret; 3587 return ret;
3514} 3588}
3515 3589
3516static void ext4_free_io_end(ext4_io_end_t *io) 3590static void dump_completed_IO(struct inode * inode)
3517{
3518 BUG_ON(!io);
3519 iput(io->inode);
3520 kfree(io);
3521}
3522static void dump_aio_dio_list(struct inode * inode)
3523{ 3591{
3524#ifdef EXT4_DEBUG 3592#ifdef EXT4_DEBUG
3525 struct list_head *cur, *before, *after; 3593 struct list_head *cur, *before, *after;
3526 ext4_io_end_t *io, *io0, *io1; 3594 ext4_io_end_t *io, *io0, *io1;
3595 unsigned long flags;
3527 3596
3528 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3597 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3529 ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); 3598 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3530 return; 3599 return;
3531 } 3600 }
3532 3601
3533 ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); 3602 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3534 list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ 3603 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3604 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3535 cur = &io->list; 3605 cur = &io->list;
3536 before = cur->prev; 3606 before = cur->prev;
3537 io0 = container_of(before, ext4_io_end_t, list); 3607 io0 = container_of(before, ext4_io_end_t, list);
@@ -3541,32 +3611,31 @@ static void dump_aio_dio_list(struct inode * inode)
3541 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", 3611 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3542 io, inode->i_ino, io0, io1); 3612 io, inode->i_ino, io0, io1);
3543 } 3613 }
3614 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3544#endif 3615#endif
3545} 3616}
3546 3617
3547/* 3618/*
3548 * check a range of space and convert unwritten extents to written. 3619 * check a range of space and convert unwritten extents to written.
3549 */ 3620 */
3550static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) 3621static int ext4_end_io_nolock(ext4_io_end_t *io)
3551{ 3622{
3552 struct inode *inode = io->inode; 3623 struct inode *inode = io->inode;
3553 loff_t offset = io->offset; 3624 loff_t offset = io->offset;
3554 size_t size = io->size; 3625 ssize_t size = io->size;
3555 int ret = 0; 3626 int ret = 0;
3556 3627
3557 ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," 3628 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3558 "list->prev 0x%p\n", 3629 "list->prev 0x%p\n",
3559 io, inode->i_ino, io->list.next, io->list.prev); 3630 io, inode->i_ino, io->list.next, io->list.prev);
3560 3631
3561 if (list_empty(&io->list)) 3632 if (list_empty(&io->list))
3562 return ret; 3633 return ret;
3563 3634
3564 if (io->flag != DIO_AIO_UNWRITTEN) 3635 if (io->flag != EXT4_IO_UNWRITTEN)
3565 return ret; 3636 return ret;
3566 3637
3567 if (offset + size <= i_size_read(inode)) 3638 ret = ext4_convert_unwritten_extents(inode, offset, size);
3568 ret = ext4_convert_unwritten_extents(inode, offset, size);
3569
3570 if (ret < 0) { 3639 if (ret < 0) {
3571 printk(KERN_EMERG "%s: failed to convert unwritten" 3640 printk(KERN_EMERG "%s: failed to convert unwritten"
3572 "extents to written extents, error is %d" 3641 "extents to written extents, error is %d"
@@ -3579,50 +3648,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
3579 io->flag = 0; 3648 io->flag = 0;
3580 return ret; 3649 return ret;
3581} 3650}
3651
3582/* 3652/*
3583 * work on completed aio dio IO, to convert unwritten extents to extents 3653 * work on completed aio dio IO, to convert unwritten extents to extents
3584 */ 3654 */
3585static void ext4_end_aio_dio_work(struct work_struct *work) 3655static void ext4_end_io_work(struct work_struct *work)
3586{ 3656{
3587 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 3657 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3588 struct inode *inode = io->inode; 3658 struct inode *inode = io->inode;
3589 int ret = 0; 3659 struct ext4_inode_info *ei = EXT4_I(inode);
3660 unsigned long flags;
3661 int ret;
3590 3662
3591 mutex_lock(&inode->i_mutex); 3663 mutex_lock(&inode->i_mutex);
3592 ret = ext4_end_aio_dio_nolock(io); 3664 ret = ext4_end_io_nolock(io);
3593 if (ret >= 0) { 3665 if (ret < 0) {
3594 if (!list_empty(&io->list)) 3666 mutex_unlock(&inode->i_mutex);
3595 list_del_init(&io->list); 3667 return;
3596 ext4_free_io_end(io);
3597 } 3668 }
3669
3670 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3671 if (!list_empty(&io->list))
3672 list_del_init(&io->list);
3673 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3598 mutex_unlock(&inode->i_mutex); 3674 mutex_unlock(&inode->i_mutex);
3675 ext4_free_io_end(io);
3599} 3676}
3677
3600/* 3678/*
3601 * This function is called from ext4_sync_file(). 3679 * This function is called from ext4_sync_file().
3602 * 3680 *
3603 * When AIO DIO IO is completed, the work to convert unwritten 3681 * When IO is completed, the work to convert unwritten extents to
3604 * extents to written is queued on workqueue but may not get immediately 3682 * written is queued on workqueue but may not get immediately
3605 * scheduled. When fsync is called, we need to ensure the 3683 * scheduled. When fsync is called, we need to ensure the
3606 * conversion is complete before fsync returns. 3684 * conversion is complete before fsync returns.
3607 * The inode keeps track of a list of completed AIO from DIO path 3685 * The inode keeps track of a list of pending/completed IO that
3608 * that might needs to do the conversion. This function walks through 3686 * might needs to do the conversion. This function walks through
3609 * the list and convert the related unwritten extents to written. 3687 * the list and convert the related unwritten extents for completed IO
3688 * to written.
3689 * The function return the number of pending IOs on success.
3610 */ 3690 */
3611int flush_aio_dio_completed_IO(struct inode *inode) 3691int flush_completed_IO(struct inode *inode)
3612{ 3692{
3613 ext4_io_end_t *io; 3693 ext4_io_end_t *io;
3694 struct ext4_inode_info *ei = EXT4_I(inode);
3695 unsigned long flags;
3614 int ret = 0; 3696 int ret = 0;
3615 int ret2 = 0; 3697 int ret2 = 0;
3616 3698
3617 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) 3699 if (list_empty(&ei->i_completed_io_list))
3618 return ret; 3700 return ret;
3619 3701
3620 dump_aio_dio_list(inode); 3702 dump_completed_IO(inode);
3621 while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3703 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3622 io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, 3704 while (!list_empty(&ei->i_completed_io_list)){
3705 io = list_entry(ei->i_completed_io_list.next,
3623 ext4_io_end_t, list); 3706 ext4_io_end_t, list);
3624 /* 3707 /*
3625 * Calling ext4_end_aio_dio_nolock() to convert completed 3708 * Calling ext4_end_io_nolock() to convert completed
3626 * IO to written. 3709 * IO to written.
3627 * 3710 *
3628 * When ext4_sync_file() is called, run_queue() may already 3711 * When ext4_sync_file() is called, run_queue() may already
@@ -3635,20 +3718,23 @@ int flush_aio_dio_completed_IO(struct inode *inode)
3635 * avoid double converting from both fsync and background work 3718 * avoid double converting from both fsync and background work
3636 * queue work. 3719 * queue work.
3637 */ 3720 */
3638 ret = ext4_end_aio_dio_nolock(io); 3721 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3722 ret = ext4_end_io_nolock(io);
3723 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3639 if (ret < 0) 3724 if (ret < 0)
3640 ret2 = ret; 3725 ret2 = ret;
3641 else 3726 else
3642 list_del_init(&io->list); 3727 list_del_init(&io->list);
3643 } 3728 }
3729 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3644 return (ret2 < 0) ? ret2 : 0; 3730 return (ret2 < 0) ? ret2 : 0;
3645} 3731}
3646 3732
3647static ext4_io_end_t *ext4_init_io_end (struct inode *inode) 3733static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3648{ 3734{
3649 ext4_io_end_t *io = NULL; 3735 ext4_io_end_t *io = NULL;
3650 3736
3651 io = kmalloc(sizeof(*io), GFP_NOFS); 3737 io = kmalloc(sizeof(*io), flags);
3652 3738
3653 if (io) { 3739 if (io) {
3654 igrab(inode); 3740 igrab(inode);
@@ -3656,8 +3742,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
3656 io->flag = 0; 3742 io->flag = 0;
3657 io->offset = 0; 3743 io->offset = 0;
3658 io->size = 0; 3744 io->size = 0;
3659 io->error = 0; 3745 io->page = NULL;
3660 INIT_WORK(&io->work, ext4_end_aio_dio_work); 3746 INIT_WORK(&io->work, ext4_end_io_work);
3661 INIT_LIST_HEAD(&io->list); 3747 INIT_LIST_HEAD(&io->list);
3662 } 3748 }
3663 3749
@@ -3669,6 +3755,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3669{ 3755{
3670 ext4_io_end_t *io_end = iocb->private; 3756 ext4_io_end_t *io_end = iocb->private;
3671 struct workqueue_struct *wq; 3757 struct workqueue_struct *wq;
3758 unsigned long flags;
3759 struct ext4_inode_info *ei;
3672 3760
3673 /* if not async direct IO or dio with 0 bytes write, just return */ 3761 /* if not async direct IO or dio with 0 bytes write, just return */
3674 if (!io_end || !size) 3762 if (!io_end || !size)
@@ -3680,7 +3768,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3680 size); 3768 size);
3681 3769
3682 /* if not aio dio with unwritten extents, just free io and return */ 3770 /* if not aio dio with unwritten extents, just free io and return */
3683 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3771 if (io_end->flag != EXT4_IO_UNWRITTEN){
3684 ext4_free_io_end(io_end); 3772 ext4_free_io_end(io_end);
3685 iocb->private = NULL; 3773 iocb->private = NULL;
3686 return; 3774 return;
@@ -3688,16 +3776,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3688 3776
3689 io_end->offset = offset; 3777 io_end->offset = offset;
3690 io_end->size = size; 3778 io_end->size = size;
3779 io_end->flag = EXT4_IO_UNWRITTEN;
3691 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3780 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3692 3781
3693 /* queue the work to convert unwritten extents to written */ 3782 /* queue the work to convert unwritten extents to written */
3694 queue_work(wq, &io_end->work); 3783 queue_work(wq, &io_end->work);
3695 3784
3696 /* Add the io_end to per-inode completed aio dio list*/ 3785 /* Add the io_end to per-inode completed aio dio list*/
3697 list_add_tail(&io_end->list, 3786 ei = EXT4_I(io_end->inode);
3698 &EXT4_I(io_end->inode)->i_aio_dio_complete_list); 3787 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3788 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3789 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3699 iocb->private = NULL; 3790 iocb->private = NULL;
3700} 3791}
3792
3793static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3794{
3795 ext4_io_end_t *io_end = bh->b_private;
3796 struct workqueue_struct *wq;
3797 struct inode *inode;
3798 unsigned long flags;
3799
3800 if (!test_clear_buffer_uninit(bh) || !io_end)
3801 goto out;
3802
3803 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3804 printk("sb umounted, discard end_io request for inode %lu\n",
3805 io_end->inode->i_ino);
3806 ext4_free_io_end(io_end);
3807 goto out;
3808 }
3809
3810 io_end->flag = EXT4_IO_UNWRITTEN;
3811 inode = io_end->inode;
3812
3813 /* Add the io_end to per-inode completed io list*/
3814 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3815 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
3816 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3817
3818 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
3819 /* queue the work to convert unwritten extents to written */
3820 queue_work(wq, &io_end->work);
3821out:
3822 bh->b_private = NULL;
3823 bh->b_end_io = NULL;
3824 clear_buffer_uninit(bh);
3825 end_buffer_async_write(bh, uptodate);
3826}
3827
3828static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3829{
3830 ext4_io_end_t *io_end;
3831 struct page *page = bh->b_page;
3832 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3833 size_t size = bh->b_size;
3834
3835retry:
3836 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3837 if (!io_end) {
3838 if (printk_ratelimit())
3839 printk(KERN_WARNING "%s: allocation fail\n", __func__);
3840 schedule();
3841 goto retry;
3842 }
3843 io_end->offset = offset;
3844 io_end->size = size;
3845 /*
3846 * We need to hold a reference to the page to make sure it
3847 * doesn't get evicted before ext4_end_io_work() has a chance
3848 * to convert the extent from written to unwritten.
3849 */
3850 io_end->page = page;
3851 get_page(io_end->page);
3852
3853 bh->b_private = io_end;
3854 bh->b_end_io = ext4_end_io_buffer_write;
3855 return 0;
3856}
3857
3701/* 3858/*
3702 * For ext4 extent files, ext4 will do direct-io write to holes, 3859 * For ext4 extent files, ext4 will do direct-io write to holes,
3703 * preallocated extents, and those write extend the file, no need to 3860 * preallocated extents, and those write extend the file, no need to
@@ -3751,7 +3908,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3751 iocb->private = NULL; 3908 iocb->private = NULL;
3752 EXT4_I(inode)->cur_aio_dio = NULL; 3909 EXT4_I(inode)->cur_aio_dio = NULL;
3753 if (!is_sync_kiocb(iocb)) { 3910 if (!is_sync_kiocb(iocb)) {
3754 iocb->private = ext4_init_io_end(inode); 3911 iocb->private = ext4_init_io_end(inode, GFP_NOFS);
3755 if (!iocb->private) 3912 if (!iocb->private)
3756 return -ENOMEM; 3913 return -ENOMEM;
3757 /* 3914 /*
@@ -3767,7 +3924,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3767 ret = blockdev_direct_IO(rw, iocb, inode, 3924 ret = blockdev_direct_IO(rw, iocb, inode,
3768 inode->i_sb->s_bdev, iov, 3925 inode->i_sb->s_bdev, iov,
3769 offset, nr_segs, 3926 offset, nr_segs,
3770 ext4_get_block_dio_write, 3927 ext4_get_block_write,
3771 ext4_end_io_dio); 3928 ext4_end_io_dio);
3772 if (iocb->private) 3929 if (iocb->private)
3773 EXT4_I(inode)->cur_aio_dio = NULL; 3930 EXT4_I(inode)->cur_aio_dio = NULL;
@@ -3788,8 +3945,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3788 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3945 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3789 ext4_free_io_end(iocb->private); 3946 ext4_free_io_end(iocb->private);
3790 iocb->private = NULL; 3947 iocb->private = NULL;
3791 } else if (ret > 0 && (EXT4_I(inode)->i_state & 3948 } else if (ret > 0 && ext4_test_inode_state(inode,
3792 EXT4_STATE_DIO_UNWRITTEN)) { 3949 EXT4_STATE_DIO_UNWRITTEN)) {
3793 int err; 3950 int err;
3794 /* 3951 /*
3795 * for non AIO case, since the IO is already 3952 * for non AIO case, since the IO is already
@@ -3799,7 +3956,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3799 offset, ret); 3956 offset, ret);
3800 if (err < 0) 3957 if (err < 0)
3801 ret = err; 3958 ret = err;
3802 EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; 3959 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3803 } 3960 }
3804 return ret; 3961 return ret;
3805 } 3962 }
@@ -4130,18 +4287,27 @@ no_top:
4130 * We release `count' blocks on disk, but (last - first) may be greater 4287 * We release `count' blocks on disk, but (last - first) may be greater
4131 * than `count' because there can be holes in there. 4288 * than `count' because there can be holes in there.
4132 */ 4289 */
4133static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 4290static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4134 struct buffer_head *bh, 4291 struct buffer_head *bh,
4135 ext4_fsblk_t block_to_free, 4292 ext4_fsblk_t block_to_free,
4136 unsigned long count, __le32 *first, 4293 unsigned long count, __le32 *first,
4137 __le32 *last) 4294 __le32 *last)
4138{ 4295{
4139 __le32 *p; 4296 __le32 *p;
4140 int flags = EXT4_FREE_BLOCKS_FORGET; 4297 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4141 4298
4142 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 4299 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4143 flags |= EXT4_FREE_BLOCKS_METADATA; 4300 flags |= EXT4_FREE_BLOCKS_METADATA;
4144 4301
4302 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
4303 count)) {
4304 ext4_error(inode->i_sb, "inode #%lu: "
4305 "attempt to clear blocks %llu len %lu, invalid",
4306 inode->i_ino, (unsigned long long) block_to_free,
4307 count);
4308 return 1;
4309 }
4310
4145 if (try_to_extend_transaction(handle, inode)) { 4311 if (try_to_extend_transaction(handle, inode)) {
4146 if (bh) { 4312 if (bh) {
4147 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4313 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4160,6 +4326,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4160 *p = 0; 4326 *p = 0;
4161 4327
4162 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); 4328 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4329 return 0;
4163} 4330}
4164 4331
4165/** 4332/**
@@ -4215,9 +4382,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4215 } else if (nr == block_to_free + count) { 4382 } else if (nr == block_to_free + count) {
4216 count++; 4383 count++;
4217 } else { 4384 } else {
4218 ext4_clear_blocks(handle, inode, this_bh, 4385 if (ext4_clear_blocks(handle, inode, this_bh,
4219 block_to_free, 4386 block_to_free, count,
4220 count, block_to_free_p, p); 4387 block_to_free_p, p))
4388 break;
4221 block_to_free = nr; 4389 block_to_free = nr;
4222 block_to_free_p = p; 4390 block_to_free_p = p;
4223 count = 1; 4391 count = 1;
@@ -4241,7 +4409,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4241 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) 4409 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
4242 ext4_handle_dirty_metadata(handle, inode, this_bh); 4410 ext4_handle_dirty_metadata(handle, inode, this_bh);
4243 else 4411 else
4244 ext4_error(inode->i_sb, __func__, 4412 ext4_error(inode->i_sb,
4245 "circular indirect block detected, " 4413 "circular indirect block detected, "
4246 "inode=%lu, block=%llu", 4414 "inode=%lu, block=%llu",
4247 inode->i_ino, 4415 inode->i_ino,
@@ -4281,6 +4449,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4281 if (!nr) 4449 if (!nr)
4282 continue; /* A hole */ 4450 continue; /* A hole */
4283 4451
4452 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
4453 nr, 1)) {
4454 ext4_error(inode->i_sb,
4455 "indirect mapped block in inode "
4456 "#%lu invalid (level %d, blk #%lu)",
4457 inode->i_ino, depth,
4458 (unsigned long) nr);
4459 break;
4460 }
4461
4284 /* Go read the buffer for the next level down */ 4462 /* Go read the buffer for the next level down */
4285 bh = sb_bread(inode->i_sb, nr); 4463 bh = sb_bread(inode->i_sb, nr);
4286 4464
@@ -4289,7 +4467,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4289 * (should be rare). 4467 * (should be rare).
4290 */ 4468 */
4291 if (!bh) { 4469 if (!bh) {
4292 ext4_error(inode->i_sb, "ext4_free_branches", 4470 ext4_error(inode->i_sb,
4293 "Read failure, inode=%lu, block=%llu", 4471 "Read failure, inode=%lu, block=%llu",
4294 inode->i_ino, nr); 4472 inode->i_ino, nr);
4295 continue; 4473 continue;
@@ -4433,8 +4611,10 @@ void ext4_truncate(struct inode *inode)
4433 if (!ext4_can_truncate(inode)) 4611 if (!ext4_can_truncate(inode))
4434 return; 4612 return;
4435 4613
4614 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
4615
4436 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4616 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
4437 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4617 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
4438 4618
4439 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4619 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
4440 ext4_ext_truncate(inode); 4620 ext4_ext_truncate(inode);
@@ -4604,9 +4784,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
4604 4784
4605 bh = sb_getblk(sb, block); 4785 bh = sb_getblk(sb, block);
4606 if (!bh) { 4786 if (!bh) {
4607 ext4_error(sb, "ext4_get_inode_loc", "unable to read " 4787 ext4_error(sb, "unable to read inode block - "
4608 "inode block - inode=%lu, block=%llu", 4788 "inode=%lu, block=%llu", inode->i_ino, block);
4609 inode->i_ino, block);
4610 return -EIO; 4789 return -EIO;
4611 } 4790 }
4612 if (!buffer_uptodate(bh)) { 4791 if (!buffer_uptodate(bh)) {
@@ -4704,9 +4883,8 @@ make_io:
4704 submit_bh(READ_META, bh); 4883 submit_bh(READ_META, bh);
4705 wait_on_buffer(bh); 4884 wait_on_buffer(bh);
4706 if (!buffer_uptodate(bh)) { 4885 if (!buffer_uptodate(bh)) {
4707 ext4_error(sb, __func__, 4886 ext4_error(sb, "unable to read inode block - inode=%lu,"
4708 "unable to read inode block - inode=%lu, " 4887 " block=%llu", inode->i_ino, block);
4709 "block=%llu", inode->i_ino, block);
4710 brelse(bh); 4888 brelse(bh);
4711 return -EIO; 4889 return -EIO;
4712 } 4890 }
@@ -4720,7 +4898,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
4720{ 4898{
4721 /* We have all inode data except xattrs in memory here. */ 4899 /* We have all inode data except xattrs in memory here. */
4722 return __ext4_get_inode_loc(inode, iloc, 4900 return __ext4_get_inode_loc(inode, iloc,
4723 !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); 4901 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
4724} 4902}
4725 4903
4726void ext4_set_inode_flags(struct inode *inode) 4904void ext4_set_inode_flags(struct inode *inode)
@@ -4814,7 +4992,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4814 } 4992 }
4815 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 4993 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4816 4994
4817 ei->i_state = 0; 4995 ei->i_state_flags = 0;
4818 ei->i_dir_start_lookup = 0; 4996 ei->i_dir_start_lookup = 0;
4819 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 4997 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4820 /* We now have enough fields to check if the inode was active or not. 4998 /* We now have enough fields to check if the inode was active or not.
@@ -4897,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4897 EXT4_GOOD_OLD_INODE_SIZE + 5075 EXT4_GOOD_OLD_INODE_SIZE +
4898 ei->i_extra_isize; 5076 ei->i_extra_isize;
4899 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 5077 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4900 ei->i_state |= EXT4_STATE_XATTR; 5078 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
4901 } 5079 }
4902 } else 5080 } else
4903 ei->i_extra_isize = 0; 5081 ei->i_extra_isize = 0;
@@ -4917,8 +5095,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4917 ret = 0; 5095 ret = 0;
4918 if (ei->i_file_acl && 5096 if (ei->i_file_acl &&
4919 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { 5097 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4920 ext4_error(sb, __func__, 5098 ext4_error(sb, "bad extended attribute block %llu inode #%lu",
4921 "bad extended attribute block %llu in inode #%lu",
4922 ei->i_file_acl, inode->i_ino); 5099 ei->i_file_acl, inode->i_ino);
4923 ret = -EIO; 5100 ret = -EIO;
4924 goto bad_inode; 5101 goto bad_inode;
@@ -4964,8 +5141,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4964 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 5141 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4965 } else { 5142 } else {
4966 ret = -EIO; 5143 ret = -EIO;
4967 ext4_error(inode->i_sb, __func__, 5144 ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
4968 "bogus i_mode (%o) for inode=%lu",
4969 inode->i_mode, inode->i_ino); 5145 inode->i_mode, inode->i_ino);
4970 goto bad_inode; 5146 goto bad_inode;
4971 } 5147 }
@@ -5037,7 +5213,7 @@ static int ext4_do_update_inode(handle_t *handle,
5037 5213
5038 /* For fields not not tracking in the in-memory inode, 5214 /* For fields not not tracking in the in-memory inode,
5039 * initialise them to zero for new inodes. */ 5215 * initialise them to zero for new inodes. */
5040 if (ei->i_state & EXT4_STATE_NEW) 5216 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
5041 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 5217 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
5042 5218
5043 ext4_get_inode_flags(ei); 5219 ext4_get_inode_flags(ei);
@@ -5101,7 +5277,7 @@ static int ext4_do_update_inode(handle_t *handle,
5101 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 5277 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
5102 sb->s_dirt = 1; 5278 sb->s_dirt = 1;
5103 ext4_handle_sync(handle); 5279 ext4_handle_sync(handle);
5104 err = ext4_handle_dirty_metadata(handle, inode, 5280 err = ext4_handle_dirty_metadata(handle, NULL,
5105 EXT4_SB(sb)->s_sbh); 5281 EXT4_SB(sb)->s_sbh);
5106 } 5282 }
5107 } 5283 }
@@ -5130,10 +5306,10 @@ static int ext4_do_update_inode(handle_t *handle,
5130 } 5306 }
5131 5307
5132 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 5308 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
5133 rc = ext4_handle_dirty_metadata(handle, inode, bh); 5309 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
5134 if (!err) 5310 if (!err)
5135 err = rc; 5311 err = rc;
5136 ei->i_state &= ~EXT4_STATE_NEW; 5312 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
5137 5313
5138 ext4_update_inode_fsync_trans(handle, inode, 0); 5314 ext4_update_inode_fsync_trans(handle, inode, 0);
5139out_brelse: 5315out_brelse:
@@ -5177,7 +5353,7 @@ out_brelse:
5177 * `stuff()' is running, and the new i_size will be lost. Plus the inode 5353 * `stuff()' is running, and the new i_size will be lost. Plus the inode
5178 * will no longer be on the superblock's dirty inode list. 5354 * will no longer be on the superblock's dirty inode list.
5179 */ 5355 */
5180int ext4_write_inode(struct inode *inode, int wait) 5356int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5181{ 5357{
5182 int err; 5358 int err;
5183 5359
@@ -5191,7 +5367,7 @@ int ext4_write_inode(struct inode *inode, int wait)
5191 return -EIO; 5367 return -EIO;
5192 } 5368 }
5193 5369
5194 if (!wait) 5370 if (wbc->sync_mode != WB_SYNC_ALL)
5195 return 0; 5371 return 0;
5196 5372
5197 err = ext4_force_commit(inode->i_sb); 5373 err = ext4_force_commit(inode->i_sb);
@@ -5201,13 +5377,11 @@ int ext4_write_inode(struct inode *inode, int wait)
5201 err = ext4_get_inode_loc(inode, &iloc); 5377 err = ext4_get_inode_loc(inode, &iloc);
5202 if (err) 5378 if (err)
5203 return err; 5379 return err;
5204 if (wait) 5380 if (wbc->sync_mode == WB_SYNC_ALL)
5205 sync_dirty_buffer(iloc.bh); 5381 sync_dirty_buffer(iloc.bh);
5206 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5382 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5207 ext4_error(inode->i_sb, __func__, 5383 ext4_error(inode->i_sb, "IO error syncing inode, "
5208 "IO error syncing inode, " 5384 "inode=%lu, block=%llu", inode->i_ino,
5209 "inode=%lu, block=%llu",
5210 inode->i_ino,
5211 (unsigned long long)iloc.bh->b_blocknr); 5385 (unsigned long long)iloc.bh->b_blocknr);
5212 err = -EIO; 5386 err = -EIO;
5213 } 5387 }
@@ -5249,6 +5423,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5249 if (error) 5423 if (error)
5250 return error; 5424 return error;
5251 5425
5426 if (ia_valid & ATTR_SIZE)
5427 dquot_initialize(inode);
5252 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 5428 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
5253 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 5429 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
5254 handle_t *handle; 5430 handle_t *handle;
@@ -5261,7 +5437,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5261 error = PTR_ERR(handle); 5437 error = PTR_ERR(handle);
5262 goto err_out; 5438 goto err_out;
5263 } 5439 }
5264 error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; 5440 error = dquot_transfer(inode, attr);
5265 if (error) { 5441 if (error) {
5266 ext4_journal_stop(handle); 5442 ext4_journal_stop(handle);
5267 return error; 5443 return error;
@@ -5288,7 +5464,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5288 } 5464 }
5289 5465
5290 if (S_ISREG(inode->i_mode) && 5466 if (S_ISREG(inode->i_mode) &&
5291 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 5467 attr->ia_valid & ATTR_SIZE &&
5468 (attr->ia_size < inode->i_size ||
5469 (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
5292 handle_t *handle; 5470 handle_t *handle;
5293 5471
5294 handle = ext4_journal_start(inode, 3); 5472 handle = ext4_journal_start(inode, 3);
@@ -5319,6 +5497,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5319 goto err_out; 5497 goto err_out;
5320 } 5498 }
5321 } 5499 }
5500 /* ext4_truncate will clear the flag */
5501 if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
5502 ext4_truncate(inode);
5322 } 5503 }
5323 5504
5324 rc = inode_setattr(inode, attr); 5505 rc = inode_setattr(inode, attr);
@@ -5557,8 +5738,8 @@ static int ext4_expand_extra_isize(struct inode *inode,
5557 entry = IFIRST(header); 5738 entry = IFIRST(header);
5558 5739
5559 /* No extended attributes present */ 5740 /* No extended attributes present */
5560 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || 5741 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5561 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5742 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5562 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, 5743 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
5563 new_extra_isize); 5744 new_extra_isize);
5564 EXT4_I(inode)->i_extra_isize = new_extra_isize; 5745 EXT4_I(inode)->i_extra_isize = new_extra_isize;
@@ -5602,7 +5783,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5602 err = ext4_reserve_inode_write(handle, inode, &iloc); 5783 err = ext4_reserve_inode_write(handle, inode, &iloc);
5603 if (ext4_handle_valid(handle) && 5784 if (ext4_handle_valid(handle) &&
5604 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 5785 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
5605 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { 5786 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
5606 /* 5787 /*
5607 * We need extra buffer credits since we may write into EA block 5788 * We need extra buffer credits since we may write into EA block
5608 * with this same handle. If journal_extend fails, then it will 5789 * with this same handle. If journal_extend fails, then it will
@@ -5616,10 +5797,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5616 sbi->s_want_extra_isize, 5797 sbi->s_want_extra_isize,
5617 iloc, handle); 5798 iloc, handle);
5618 if (ret) { 5799 if (ret) {
5619 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 5800 ext4_set_inode_state(inode,
5801 EXT4_STATE_NO_EXPAND);
5620 if (mnt_count != 5802 if (mnt_count !=
5621 le16_to_cpu(sbi->s_es->s_mnt_count)) { 5803 le16_to_cpu(sbi->s_es->s_mnt_count)) {
5622 ext4_warning(inode->i_sb, __func__, 5804 ext4_warning(inode->i_sb,
5623 "Unable to expand inode %lu. Delete" 5805 "Unable to expand inode %lu. Delete"
5624 " some EAs or run e2fsck.", 5806 " some EAs or run e2fsck.",
5625 inode->i_ino); 5807 inode->i_ino);
@@ -5641,7 +5823,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5641 * i_size has been changed by generic_commit_write() and we thus need 5823 * i_size has been changed by generic_commit_write() and we thus need
5642 * to include the updated inode in the current transaction. 5824 * to include the updated inode in the current transaction.
5643 * 5825 *
5644 * Also, vfs_dq_alloc_block() will always dirty the inode when blocks 5826 * Also, dquot_alloc_block() will always dirty the inode when blocks
5645 * are allocated to the file. 5827 * are allocated to the file.
5646 * 5828 *
5647 * If the inode is marked synchronous, we don't honour that here - doing 5829 * If the inode is marked synchronous, we don't honour that here - doing
@@ -5683,7 +5865,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
5683 err = jbd2_journal_get_write_access(handle, iloc.bh); 5865 err = jbd2_journal_get_write_access(handle, iloc.bh);
5684 if (!err) 5866 if (!err)
5685 err = ext4_handle_dirty_metadata(handle, 5867 err = ext4_handle_dirty_metadata(handle,
5686 inode, 5868 NULL,
5687 iloc.bh); 5869 iloc.bh);
5688 brelse(iloc.bh); 5870 brelse(iloc.bh);
5689 } 5871 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b63d193126db..016d0249294f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -92,6 +92,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
92 flags &= ~EXT4_EXTENTS_FL; 92 flags &= ~EXT4_EXTENTS_FL;
93 } 93 }
94 94
95 if (flags & EXT4_EOFBLOCKS_FL) {
96 /* we don't support adding EOFBLOCKS flag */
97 if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
98 err = -EOPNOTSUPP;
99 goto flags_out;
100 }
101 } else if (oldflags & EXT4_EOFBLOCKS_FL)
102 ext4_truncate(inode);
103
95 handle = ext4_journal_start(inode, 1); 104 handle = ext4_journal_start(inode, 1);
96 if (IS_ERR(handle)) { 105 if (IS_ERR(handle)) {
97 err = PTR_ERR(handle); 106 err = PTR_ERR(handle);
@@ -249,7 +258,8 @@ setversion_out:
249 if (me.moved_len > 0) 258 if (me.moved_len > 0)
250 file_remove_suid(donor_filp); 259 file_remove_suid(donor_filp);
251 260
252 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) 261 if (copy_to_user((struct move_extent __user *)arg,
262 &me, sizeof(me)))
253 err = -EFAULT; 263 err = -EFAULT;
254mext_out: 264mext_out:
255 fput(donor_filp); 265 fput(donor_filp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d34afad3e137..506713a2ebd8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -441,10 +441,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
441 for (i = 0; i < count; i++) { 441 for (i = 0; i < count; i++) {
442 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { 442 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
443 ext4_fsblk_t blocknr; 443 ext4_fsblk_t blocknr;
444 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); 444
445 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
445 blocknr += first + i; 446 blocknr += first + i;
446 blocknr +=
447 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
448 ext4_grp_locked_error(sb, e4b->bd_group, 447 ext4_grp_locked_error(sb, e4b->bd_group,
449 __func__, "double-free of inode" 448 __func__, "double-free of inode"
450 " %lu's block %llu(bit %u in group %u)", 449 " %lu's block %llu(bit %u in group %u)",
@@ -1255,10 +1254,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1255 1254
1256 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1255 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1257 ext4_fsblk_t blocknr; 1256 ext4_fsblk_t blocknr;
1258 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); 1257
1258 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1259 blocknr += block; 1259 blocknr += block;
1260 blocknr +=
1261 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1262 ext4_grp_locked_error(sb, e4b->bd_group, 1260 ext4_grp_locked_error(sb, e4b->bd_group,
1263 __func__, "double-free of inode" 1261 __func__, "double-free of inode"
1264 " %lu's block %llu(bit %u in group %u)", 1262 " %lu's block %llu(bit %u in group %u)",
@@ -1631,7 +1629,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1631 int max; 1629 int max;
1632 int err; 1630 int err;
1633 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 1631 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1634 struct ext4_super_block *es = sbi->s_es;
1635 struct ext4_free_extent ex; 1632 struct ext4_free_extent ex;
1636 1633
1637 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) 1634 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
@@ -1648,8 +1645,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1648 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1645 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1649 ext4_fsblk_t start; 1646 ext4_fsblk_t start;
1650 1647
1651 start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + 1648 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1652 ex.fe_start + le32_to_cpu(es->s_first_data_block); 1649 ex.fe_start;
1653 /* use do_div to get remainder (would be 64-bit modulo) */ 1650 /* use do_div to get remainder (would be 64-bit modulo) */
1654 if (do_div(start, sbi->s_stripe) == 0) { 1651 if (do_div(start, sbi->s_stripe) == 0) {
1655 ac->ac_found++; 1652 ac->ac_found++;
@@ -1803,8 +1800,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1803 BUG_ON(sbi->s_stripe == 0); 1800 BUG_ON(sbi->s_stripe == 0);
1804 1801
1805 /* find first stripe-aligned block in group */ 1802 /* find first stripe-aligned block in group */
1806 first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) 1803 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1807 + le32_to_cpu(sbi->s_es->s_first_data_block); 1804
1808 a = first_group_block + sbi->s_stripe - 1; 1805 a = first_group_block + sbi->s_stripe - 1;
1809 do_div(a, sbi->s_stripe); 1806 do_div(a, sbi->s_stripe);
1810 i = (a * sbi->s_stripe) - first_group_block; 1807 i = (a * sbi->s_stripe) - first_group_block;
@@ -2256,7 +2253,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2256 2253
2257 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2254 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2258 init_rwsem(&meta_group_info[i]->alloc_sem); 2255 init_rwsem(&meta_group_info[i]->alloc_sem);
2259 meta_group_info[i]->bb_free_root.rb_node = NULL; 2256 meta_group_info[i]->bb_free_root = RB_ROOT;
2260 2257
2261#ifdef DOUBLE_CHECK 2258#ifdef DOUBLE_CHECK
2262 { 2259 {
@@ -2560,12 +2557,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2560 ext4_unlock_group(sb, entry->group); 2557 ext4_unlock_group(sb, entry->group);
2561 if (test_opt(sb, DISCARD)) { 2558 if (test_opt(sb, DISCARD)) {
2562 ext4_fsblk_t discard_block; 2559 ext4_fsblk_t discard_block;
2563 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
2564 2560
2565 discard_block = (ext4_fsblk_t)entry->group * 2561 discard_block = entry->start_blk +
2566 EXT4_BLOCKS_PER_GROUP(sb) 2562 ext4_group_first_block_no(sb, entry->group);
2567 + entry->start_blk
2568 + le32_to_cpu(es->s_first_data_block);
2569 trace_ext4_discard_blocks(sb, 2563 trace_ext4_discard_blocks(sb,
2570 (unsigned long long)discard_block, 2564 (unsigned long long)discard_block,
2571 entry->count); 2565 entry->count);
@@ -2703,14 +2697,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2703 if (err) 2697 if (err)
2704 goto out_err; 2698 goto out_err;
2705 2699
2706 block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) 2700 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2707 + ac->ac_b_ex.fe_start
2708 + le32_to_cpu(es->s_first_data_block);
2709 2701
2710 len = ac->ac_b_ex.fe_len; 2702 len = ac->ac_b_ex.fe_len;
2711 if (!ext4_data_block_valid(sbi, block, len)) { 2703 if (!ext4_data_block_valid(sbi, block, len)) {
2712 ext4_error(sb, __func__, 2704 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2713 "Allocating blocks %llu-%llu which overlap "
2714 "fs metadata\n", block, block+len); 2705 "fs metadata\n", block, block+len);
2715 /* File system mounted not to panic on error 2706 /* File system mounted not to panic on error
2716 * Fix the bitmap and repeat the block allocation 2707 * Fix the bitmap and repeat the block allocation
@@ -3161,9 +3152,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3161 /* The max size of hash table is PREALLOC_TB_SIZE */ 3152 /* The max size of hash table is PREALLOC_TB_SIZE */
3162 order = PREALLOC_TB_SIZE - 1; 3153 order = PREALLOC_TB_SIZE - 1;
3163 3154
3164 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + 3155 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3165 ac->ac_g_ex.fe_start +
3166 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3167 /* 3156 /*
3168 * search for the prealloc space that is having 3157 * search for the prealloc space that is having
3169 * minimal distance from the goal block. 3158 * minimal distance from the goal block.
@@ -3526,8 +3515,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3526 if (bit >= end) 3515 if (bit >= end)
3527 break; 3516 break;
3528 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3517 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3529 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3518 start = ext4_group_first_block_no(sb, group) + bit;
3530 le32_to_cpu(sbi->s_es->s_first_data_block);
3531 mb_debug(1, " free preallocated %u/%u in group %u\n", 3519 mb_debug(1, " free preallocated %u/%u in group %u\n",
3532 (unsigned) start, (unsigned) next - bit, 3520 (unsigned) start, (unsigned) next - bit,
3533 (unsigned) group); 3521 (unsigned) group);
@@ -3623,15 +3611,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3623 3611
3624 bitmap_bh = ext4_read_block_bitmap(sb, group); 3612 bitmap_bh = ext4_read_block_bitmap(sb, group);
3625 if (bitmap_bh == NULL) { 3613 if (bitmap_bh == NULL) {
3626 ext4_error(sb, __func__, "Error in reading block " 3614 ext4_error(sb, "Error reading block bitmap for %u", group);
3627 "bitmap for %u", group);
3628 return 0; 3615 return 0;
3629 } 3616 }
3630 3617
3631 err = ext4_mb_load_buddy(sb, group, &e4b); 3618 err = ext4_mb_load_buddy(sb, group, &e4b);
3632 if (err) { 3619 if (err) {
3633 ext4_error(sb, __func__, "Error in loading buddy " 3620 ext4_error(sb, "Error loading buddy information for %u", group);
3634 "information for %u", group);
3635 put_bh(bitmap_bh); 3621 put_bh(bitmap_bh);
3636 return 0; 3622 return 0;
3637 } 3623 }
@@ -3804,15 +3790,15 @@ repeat:
3804 3790
3805 err = ext4_mb_load_buddy(sb, group, &e4b); 3791 err = ext4_mb_load_buddy(sb, group, &e4b);
3806 if (err) { 3792 if (err) {
3807 ext4_error(sb, __func__, "Error in loading buddy " 3793 ext4_error(sb, "Error loading buddy information for %u",
3808 "information for %u", group); 3794 group);
3809 continue; 3795 continue;
3810 } 3796 }
3811 3797
3812 bitmap_bh = ext4_read_block_bitmap(sb, group); 3798 bitmap_bh = ext4_read_block_bitmap(sb, group);
3813 if (bitmap_bh == NULL) { 3799 if (bitmap_bh == NULL) {
3814 ext4_error(sb, __func__, "Error in reading block " 3800 ext4_error(sb, "Error reading block bitmap for %u",
3815 "bitmap for %u", group); 3801 group);
3816 ext4_mb_release_desc(&e4b); 3802 ext4_mb_release_desc(&e4b);
3817 continue; 3803 continue;
3818 } 3804 }
@@ -3938,7 +3924,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3938 3924
3939 /* don't use group allocation for large files */ 3925 /* don't use group allocation for large files */
3940 size = max(size, isize); 3926 size = max(size, isize);
3941 if (size >= sbi->s_mb_stream_request) { 3927 if (size > sbi->s_mb_stream_request) {
3942 ac->ac_flags |= EXT4_MB_STREAM_ALLOC; 3928 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3943 return; 3929 return;
3944 } 3930 }
@@ -4077,8 +4063,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4077 4063
4078 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 4064 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4079 if (ext4_mb_load_buddy(sb, group, &e4b)) { 4065 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4080 ext4_error(sb, __func__, "Error in loading buddy " 4066 ext4_error(sb, "Error loading buddy information for %u",
4081 "information for %u", group); 4067 group);
4082 continue; 4068 continue;
4083 } 4069 }
4084 ext4_lock_group(sb, group); 4070 ext4_lock_group(sb, group);
@@ -4254,7 +4240,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4254 return 0; 4240 return 0;
4255 } 4241 }
4256 reserv_blks = ar->len; 4242 reserv_blks = ar->len;
4257 while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { 4243 while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
4258 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4244 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4259 ar->len--; 4245 ar->len--;
4260 } 4246 }
@@ -4331,7 +4317,7 @@ out2:
4331 kmem_cache_free(ext4_ac_cachep, ac); 4317 kmem_cache_free(ext4_ac_cachep, ac);
4332out1: 4318out1:
4333 if (inquota && ar->len < inquota) 4319 if (inquota && ar->len < inquota)
4334 vfs_dq_free_block(ar->inode, inquota - ar->len); 4320 dquot_free_block(ar->inode, inquota - ar->len);
4335out3: 4321out3:
4336 if (!ar->len) { 4322 if (!ar->len) {
4337 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4323 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4476,10 +4462,10 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4476 4462
4477 sbi = EXT4_SB(sb); 4463 sbi = EXT4_SB(sb);
4478 es = EXT4_SB(sb)->s_es; 4464 es = EXT4_SB(sb)->s_es;
4479 if (!ext4_data_block_valid(sbi, block, count)) { 4465 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4480 ext4_error(sb, __func__, 4466 !ext4_data_block_valid(sbi, block, count)) {
4481 "Freeing blocks not in datazone - " 4467 ext4_error(sb, "Freeing blocks not in datazone - "
4482 "block = %llu, count = %lu", block, count); 4468 "block = %llu, count = %lu", block, count);
4483 goto error_return; 4469 goto error_return;
4484 } 4470 }
4485 4471
@@ -4547,8 +4533,7 @@ do_more:
4547 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4533 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4548 EXT4_SB(sb)->s_itb_per_group)) { 4534 EXT4_SB(sb)->s_itb_per_group)) {
4549 4535
4550 ext4_error(sb, __func__, 4536 ext4_error(sb, "Freeing blocks in system zone - "
4551 "Freeing blocks in system zone - "
4552 "Block = %llu, count = %lu", block, count); 4537 "Block = %llu, count = %lu", block, count);
4553 /* err = 0. ext4_std_error should be a no op */ 4538 /* err = 0. ext4_std_error should be a no op */
4554 goto error_return; 4539 goto error_return;
@@ -4646,7 +4631,7 @@ do_more:
4646 sb->s_dirt = 1; 4631 sb->s_dirt = 1;
4647error_return: 4632error_return:
4648 if (freed) 4633 if (freed)
4649 vfs_dq_free_block(inode, freed); 4634 dquot_free_block(inode, freed);
4650 brelse(bitmap_bh); 4635 brelse(bitmap_bh);
4651 ext4_std_error(sb, err); 4636 ext4_std_error(sb, err);
4652 if (ac) 4637 if (ac)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 436521cae456..b619322c76f0 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -220,16 +220,9 @@ struct ext4_buddy {
220#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 220#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
221#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 221#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
222 222
223#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
224
225static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 223static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
226 struct ext4_free_extent *fex) 224 struct ext4_free_extent *fex)
227{ 225{
228 ext4_fsblk_t block; 226 return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
229
230 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
231 + fex->fe_start
232 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
233 return block;
234} 227}
235#endif 228#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 81415814b00b..8b87bd0eac95 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -365,12 +365,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
365 * happened after we started the migrate. We need to 365 * happened after we started the migrate. We need to
366 * fail the migrate 366 * fail the migrate
367 */ 367 */
368 if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { 368 if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
369 retval = -EAGAIN; 369 retval = -EAGAIN;
370 up_write(&EXT4_I(inode)->i_data_sem); 370 up_write(&EXT4_I(inode)->i_data_sem);
371 goto err_out; 371 goto err_out;
372 } else 372 } else
373 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 373 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
374 /* 374 /*
375 * We have the extent map build with the tmp inode. 375 * We have the extent map build with the tmp inode.
376 * Now copy the i_data across 376 * Now copy the i_data across
@@ -503,14 +503,10 @@ int ext4_ext_migrate(struct inode *inode)
503 } 503 }
504 i_size_write(tmp_inode, i_size_read(inode)); 504 i_size_write(tmp_inode, i_size_read(inode));
505 /* 505 /*
506 * We don't want the inode to be reclaimed 506 * Set the i_nlink to zero so it will be deleted later
507 * if we got interrupted in between. We have 507 * when we drop inode reference.
508 * this tmp inode carrying reference to the
509 * data blocks of the original file. We set
510 * the i_nlink to zero at the last stage after
511 * switching the original file to extent format
512 */ 508 */
513 tmp_inode->i_nlink = 1; 509 tmp_inode->i_nlink = 0;
514 510
515 ext4_ext_tree_init(handle, tmp_inode); 511 ext4_ext_tree_init(handle, tmp_inode);
516 ext4_orphan_add(handle, tmp_inode); 512 ext4_orphan_add(handle, tmp_inode);
@@ -533,10 +529,20 @@ int ext4_ext_migrate(struct inode *inode)
533 * allocation. 529 * allocation.
534 */ 530 */
535 down_read((&EXT4_I(inode)->i_data_sem)); 531 down_read((&EXT4_I(inode)->i_data_sem));
536 EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; 532 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
537 up_read((&EXT4_I(inode)->i_data_sem)); 533 up_read((&EXT4_I(inode)->i_data_sem));
538 534
539 handle = ext4_journal_start(inode, 1); 535 handle = ext4_journal_start(inode, 1);
536 if (IS_ERR(handle)) {
537 /*
538 * It is impossible to update on-disk structures without
539 * a handle, so just rollback in-core changes and live other
540 * work to orphan_list_cleanup()
541 */
542 ext4_orphan_del(NULL, tmp_inode);
543 retval = PTR_ERR(handle);
544 goto out;
545 }
540 546
541 ei = EXT4_I(inode); 547 ei = EXT4_I(inode);
542 i_data = ei->i_data; 548 i_data = ei->i_data;
@@ -618,15 +624,8 @@ err_out:
618 624
619 /* Reset the extent details */ 625 /* Reset the extent details */
620 ext4_ext_tree_init(handle, tmp_inode); 626 ext4_ext_tree_init(handle, tmp_inode);
621
622 /*
623 * Set the i_nlink to zero so that
624 * generic_drop_inode really deletes the
625 * inode
626 */
627 tmp_inode->i_nlink = 0;
628
629 ext4_journal_stop(handle); 627 ext4_journal_stop(handle);
628out:
630 unlock_new_inode(tmp_inode); 629 unlock_new_inode(tmp_inode);
631 iput(tmp_inode); 630 iput(tmp_inode);
632 631
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 82c415be87a4..aa5fe28d180f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -152,12 +152,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
152 int ret = 0; 152 int ret = 0;
153 153
154 if (inode1 == NULL) { 154 if (inode1 == NULL) {
155 ext4_error(inode2->i_sb, function, 155 __ext4_error(inode2->i_sb, function,
156 "Both inodes should not be NULL: " 156 "Both inodes should not be NULL: "
157 "inode1 NULL inode2 %lu", inode2->i_ino); 157 "inode1 NULL inode2 %lu", inode2->i_ino);
158 ret = -EIO; 158 ret = -EIO;
159 } else if (inode2 == NULL) { 159 } else if (inode2 == NULL) {
160 ext4_error(inode1->i_sb, function, 160 __ext4_error(inode1->i_sb, function,
161 "Both inodes should not be NULL: " 161 "Both inodes should not be NULL: "
162 "inode1 %lu inode2 NULL", inode1->i_ino); 162 "inode1 %lu inode2 NULL", inode1->i_ino);
163 ret = -EIO; 163 ret = -EIO;
@@ -252,6 +252,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
252 } 252 }
253 253
254 o_start->ee_len = start_ext->ee_len; 254 o_start->ee_len = start_ext->ee_len;
255 eblock = le32_to_cpu(start_ext->ee_block);
255 new_flag = 1; 256 new_flag = 1;
256 257
257 } else if (start_ext->ee_len && new_ext->ee_len && 258 } else if (start_ext->ee_len && new_ext->ee_len &&
@@ -262,6 +263,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
262 * orig |------------------------------| 263 * orig |------------------------------|
263 */ 264 */
264 o_start->ee_len = start_ext->ee_len; 265 o_start->ee_len = start_ext->ee_len;
266 eblock = le32_to_cpu(start_ext->ee_block);
265 new_flag = 1; 267 new_flag = 1;
266 268
267 } else if (!start_ext->ee_len && new_ext->ee_len && 269 } else if (!start_ext->ee_len && new_ext->ee_len &&
@@ -475,7 +477,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
475 struct ext4_extent *oext, *o_start, *o_end, *prev_ext; 477 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
476 struct ext4_extent new_ext, start_ext, end_ext; 478 struct ext4_extent new_ext, start_ext, end_ext;
477 ext4_lblk_t new_ext_end; 479 ext4_lblk_t new_ext_end;
478 ext4_fsblk_t new_phys_end;
479 int oext_alen, new_ext_alen, end_ext_alen; 480 int oext_alen, new_ext_alen, end_ext_alen;
480 int depth = ext_depth(orig_inode); 481 int depth = ext_depth(orig_inode);
481 int ret; 482 int ret;
@@ -489,7 +490,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
489 new_ext.ee_len = dext->ee_len; 490 new_ext.ee_len = dext->ee_len;
490 new_ext_alen = ext4_ext_get_actual_len(&new_ext); 491 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
491 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 492 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
492 new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
493 493
494 /* 494 /*
495 * Case: original extent is first 495 * Case: original extent is first
@@ -502,6 +502,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
502 le32_to_cpu(oext->ee_block) + oext_alen) { 502 le32_to_cpu(oext->ee_block) + oext_alen) {
503 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - 503 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
504 le32_to_cpu(oext->ee_block)); 504 le32_to_cpu(oext->ee_block));
505 start_ext.ee_block = oext->ee_block;
505 copy_extent_status(oext, &start_ext); 506 copy_extent_status(oext, &start_ext);
506 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { 507 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
507 prev_ext = oext - 1; 508 prev_ext = oext - 1;
@@ -515,6 +516,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
515 start_ext.ee_len = cpu_to_le16( 516 start_ext.ee_len = cpu_to_le16(
516 ext4_ext_get_actual_len(prev_ext) + 517 ext4_ext_get_actual_len(prev_ext) +
517 new_ext_alen); 518 new_ext_alen);
519 start_ext.ee_block = oext->ee_block;
518 copy_extent_status(prev_ext, &start_ext); 520 copy_extent_status(prev_ext, &start_ext);
519 new_ext.ee_len = 0; 521 new_ext.ee_len = 0;
520 } 522 }
@@ -526,7 +528,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
526 * new_ext |-------| 528 * new_ext |-------|
527 */ 529 */
528 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { 530 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
529 ext4_error(orig_inode->i_sb, __func__, 531 ext4_error(orig_inode->i_sb,
530 "new_ext_end(%u) should be less than or equal to " 532 "new_ext_end(%u) should be less than or equal to "
531 "oext->ee_block(%u) + oext_alen(%d) - 1", 533 "oext->ee_block(%u) + oext_alen(%d) - 1",
532 new_ext_end, le32_to_cpu(oext->ee_block), 534 new_ext_end, le32_to_cpu(oext->ee_block),
@@ -689,12 +691,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
689 while (1) { 691 while (1) {
690 /* The extent for donor must be found. */ 692 /* The extent for donor must be found. */
691 if (!dext) { 693 if (!dext) {
692 ext4_error(donor_inode->i_sb, __func__, 694 ext4_error(donor_inode->i_sb,
693 "The extent for donor must be found"); 695 "The extent for donor must be found");
694 *err = -EIO; 696 *err = -EIO;
695 goto out; 697 goto out;
696 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 698 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
697 ext4_error(donor_inode->i_sb, __func__, 699 ext4_error(donor_inode->i_sb,
698 "Donor offset(%u) and the first block of donor " 700 "Donor offset(%u) and the first block of donor "
699 "extent(%u) should be equal", 701 "extent(%u) should be equal",
700 donor_off, 702 donor_off,
@@ -928,7 +930,7 @@ out2:
928} 930}
929 931
930/** 932/**
931 * mext_check_argumants - Check whether move extent can be done 933 * mext_check_arguments - Check whether move extent can be done
932 * 934 *
933 * @orig_inode: original inode 935 * @orig_inode: original inode
934 * @donor_inode: donor inode 936 * @donor_inode: donor inode
@@ -949,14 +951,6 @@ mext_check_arguments(struct inode *orig_inode,
949 unsigned int blkbits = orig_inode->i_blkbits; 951 unsigned int blkbits = orig_inode->i_blkbits;
950 unsigned int blocksize = 1 << blkbits; 952 unsigned int blocksize = 1 << blkbits;
951 953
952 /* Regular file check */
953 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
954 ext4_debug("ext4 move extent: The argument files should be "
955 "regular file [ino:orig %lu, donor %lu]\n",
956 orig_inode->i_ino, donor_inode->i_ino);
957 return -EINVAL;
958 }
959
960 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { 954 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
961 ext4_debug("ext4 move extent: suid or sgid is set" 955 ext4_debug("ext4 move extent: suid or sgid is set"
962 " to donor file [ino:orig %lu, donor %lu]\n", 956 " to donor file [ino:orig %lu, donor %lu]\n",
@@ -1204,6 +1198,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1204 return -EINVAL; 1198 return -EINVAL;
1205 } 1199 }
1206 1200
1201 /* Regular file check */
1202 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
1203 ext4_debug("ext4 move extent: The argument files should be "
1204 "regular file [ino:orig %lu, donor %lu]\n",
1205 orig_inode->i_ino, donor_inode->i_ino);
1206 return -EINVAL;
1207 }
1208
1207 /* Protect orig and donor inodes against a truncate */ 1209 /* Protect orig and donor inodes against a truncate */
1208 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1210 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1209 if (ret1 < 0) 1211 if (ret1 < 0)
@@ -1351,7 +1353,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1351 if (ret1 < 0) 1353 if (ret1 < 0)
1352 break; 1354 break;
1353 if (*moved_len > len) { 1355 if (*moved_len > len) {
1354 ext4_error(orig_inode->i_sb, __func__, 1356 ext4_error(orig_inode->i_sb,
1355 "We replaced blocks too much! " 1357 "We replaced blocks too much! "
1356 "sum of replaced: %llu requested: %llu", 1358 "sum of replaced: %llu requested: %llu",
1357 *moved_len, len); 1359 *moved_len, len);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 17a17e10dd60..0c070fabd108 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -383,8 +383,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
383 if (root->info.hash_version != DX_HASH_TEA && 383 if (root->info.hash_version != DX_HASH_TEA &&
384 root->info.hash_version != DX_HASH_HALF_MD4 && 384 root->info.hash_version != DX_HASH_HALF_MD4 &&
385 root->info.hash_version != DX_HASH_LEGACY) { 385 root->info.hash_version != DX_HASH_LEGACY) {
386 ext4_warning(dir->i_sb, __func__, 386 ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
387 "Unrecognised inode hash code %d",
388 root->info.hash_version); 387 root->info.hash_version);
389 brelse(bh); 388 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 389 *err = ERR_BAD_DX_DIR;
@@ -399,8 +398,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
399 hash = hinfo->hash; 398 hash = hinfo->hash;
400 399
401 if (root->info.unused_flags & 1) { 400 if (root->info.unused_flags & 1) {
402 ext4_warning(dir->i_sb, __func__, 401 ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
403 "Unimplemented inode hash flags: %#06x",
404 root->info.unused_flags); 402 root->info.unused_flags);
405 brelse(bh); 403 brelse(bh);
406 *err = ERR_BAD_DX_DIR; 404 *err = ERR_BAD_DX_DIR;
@@ -408,8 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
408 } 406 }
409 407
410 if ((indirect = root->info.indirect_levels) > 1) { 408 if ((indirect = root->info.indirect_levels) > 1) {
411 ext4_warning(dir->i_sb, __func__, 409 ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
412 "Unimplemented inode hash depth: %#06x",
413 root->info.indirect_levels); 410 root->info.indirect_levels);
414 brelse(bh); 411 brelse(bh);
415 *err = ERR_BAD_DX_DIR; 412 *err = ERR_BAD_DX_DIR;
@@ -421,8 +418,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
421 418
422 if (dx_get_limit(entries) != dx_root_limit(dir, 419 if (dx_get_limit(entries) != dx_root_limit(dir,
423 root->info.info_length)) { 420 root->info.info_length)) {
424 ext4_warning(dir->i_sb, __func__, 421 ext4_warning(dir->i_sb, "dx entry: limit != root limit");
425 "dx entry: limit != root limit");
426 brelse(bh); 422 brelse(bh);
427 *err = ERR_BAD_DX_DIR; 423 *err = ERR_BAD_DX_DIR;
428 goto fail; 424 goto fail;
@@ -433,7 +429,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
433 { 429 {
434 count = dx_get_count(entries); 430 count = dx_get_count(entries);
435 if (!count || count > dx_get_limit(entries)) { 431 if (!count || count > dx_get_limit(entries)) {
436 ext4_warning(dir->i_sb, __func__, 432 ext4_warning(dir->i_sb,
437 "dx entry: no count or count > limit"); 433 "dx entry: no count or count > limit");
438 brelse(bh); 434 brelse(bh);
439 *err = ERR_BAD_DX_DIR; 435 *err = ERR_BAD_DX_DIR;
@@ -478,7 +474,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
478 goto fail2; 474 goto fail2;
479 at = entries = ((struct dx_node *) bh->b_data)->entries; 475 at = entries = ((struct dx_node *) bh->b_data)->entries;
480 if (dx_get_limit(entries) != dx_node_limit (dir)) { 476 if (dx_get_limit(entries) != dx_node_limit (dir)) {
481 ext4_warning(dir->i_sb, __func__, 477 ext4_warning(dir->i_sb,
482 "dx entry: limit != node limit"); 478 "dx entry: limit != node limit");
483 brelse(bh); 479 brelse(bh);
484 *err = ERR_BAD_DX_DIR; 480 *err = ERR_BAD_DX_DIR;
@@ -494,7 +490,7 @@ fail2:
494 } 490 }
495fail: 491fail:
496 if (*err == ERR_BAD_DX_DIR) 492 if (*err == ERR_BAD_DX_DIR)
497 ext4_warning(dir->i_sb, __func__, 493 ext4_warning(dir->i_sb,
498 "Corrupt dir inode %ld, running e2fsck is " 494 "Corrupt dir inode %ld, running e2fsck is "
499 "recommended.", dir->i_ino); 495 "recommended.", dir->i_ino);
500 return NULL; 496 return NULL;
@@ -947,9 +943,8 @@ restart:
947 wait_on_buffer(bh); 943 wait_on_buffer(bh);
948 if (!buffer_uptodate(bh)) { 944 if (!buffer_uptodate(bh)) {
949 /* read error, skip block & hope for the best */ 945 /* read error, skip block & hope for the best */
950 ext4_error(sb, __func__, "reading directory #%lu " 946 ext4_error(sb, "reading directory #%lu offset %lu",
951 "offset %lu", dir->i_ino, 947 dir->i_ino, (unsigned long)block);
952 (unsigned long)block);
953 brelse(bh); 948 brelse(bh);
954 goto next; 949 goto next;
955 } 950 }
@@ -1041,7 +1036,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1041 retval = ext4_htree_next_block(dir, hash, frame, 1036 retval = ext4_htree_next_block(dir, hash, frame,
1042 frames, NULL); 1037 frames, NULL);
1043 if (retval < 0) { 1038 if (retval < 0) {
1044 ext4_warning(sb, __func__, 1039 ext4_warning(sb,
1045 "error reading index page in directory #%lu", 1040 "error reading index page in directory #%lu",
1046 dir->i_ino); 1041 dir->i_ino);
1047 *err = retval; 1042 *err = retval;
@@ -1071,14 +1066,13 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1071 __u32 ino = le32_to_cpu(de->inode); 1066 __u32 ino = le32_to_cpu(de->inode);
1072 brelse(bh); 1067 brelse(bh);
1073 if (!ext4_valid_inum(dir->i_sb, ino)) { 1068 if (!ext4_valid_inum(dir->i_sb, ino)) {
1074 ext4_error(dir->i_sb, "ext4_lookup", 1069 ext4_error(dir->i_sb, "bad inode number: %u", ino);
1075 "bad inode number: %u", ino);
1076 return ERR_PTR(-EIO); 1070 return ERR_PTR(-EIO);
1077 } 1071 }
1078 inode = ext4_iget(dir->i_sb, ino); 1072 inode = ext4_iget(dir->i_sb, ino);
1079 if (unlikely(IS_ERR(inode))) { 1073 if (unlikely(IS_ERR(inode))) {
1080 if (PTR_ERR(inode) == -ESTALE) { 1074 if (PTR_ERR(inode) == -ESTALE) {
1081 ext4_error(dir->i_sb, __func__, 1075 ext4_error(dir->i_sb,
1082 "deleted inode referenced: %u", 1076 "deleted inode referenced: %u",
1083 ino); 1077 ino);
1084 return ERR_PTR(-EIO); 1078 return ERR_PTR(-EIO);
@@ -1110,7 +1104,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1110 brelse(bh); 1104 brelse(bh);
1111 1105
1112 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { 1106 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1113 ext4_error(child->d_inode->i_sb, "ext4_get_parent", 1107 ext4_error(child->d_inode->i_sb,
1114 "bad inode number: %u", ino); 1108 "bad inode number: %u", ino);
1115 return ERR_PTR(-EIO); 1109 return ERR_PTR(-EIO);
1116 } 1110 }
@@ -1410,7 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1410 de = (struct ext4_dir_entry_2 *)((char *)fde + 1404 de = (struct ext4_dir_entry_2 *)((char *)fde +
1411 ext4_rec_len_from_disk(fde->rec_len, blocksize)); 1405 ext4_rec_len_from_disk(fde->rec_len, blocksize));
1412 if ((char *) de >= (((char *) root) + blocksize)) { 1406 if ((char *) de >= (((char *) root) + blocksize)) {
1413 ext4_error(dir->i_sb, __func__, 1407 ext4_error(dir->i_sb,
1414 "invalid rec_len for '..' in inode %lu", 1408 "invalid rec_len for '..' in inode %lu",
1415 dir->i_ino); 1409 dir->i_ino);
1416 brelse(bh); 1410 brelse(bh);
@@ -1575,8 +1569,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1575 1569
1576 if (levels && (dx_get_count(frames->entries) == 1570 if (levels && (dx_get_count(frames->entries) ==
1577 dx_get_limit(frames->entries))) { 1571 dx_get_limit(frames->entries))) {
1578 ext4_warning(sb, __func__, 1572 ext4_warning(sb, "Directory index full!");
1579 "Directory index full!");
1580 err = -ENOSPC; 1573 err = -ENOSPC;
1581 goto cleanup; 1574 goto cleanup;
1582 } 1575 }
@@ -1766,6 +1759,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1766 struct inode *inode; 1759 struct inode *inode;
1767 int err, retries = 0; 1760 int err, retries = 0;
1768 1761
1762 dquot_initialize(dir);
1763
1769retry: 1764retry:
1770 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1765 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1771 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1766 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1800,6 +1795,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1800 if (!new_valid_dev(rdev)) 1795 if (!new_valid_dev(rdev))
1801 return -EINVAL; 1796 return -EINVAL;
1802 1797
1798 dquot_initialize(dir);
1799
1803retry: 1800retry:
1804 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1801 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1805 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1802 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1837,6 +1834,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1837 if (EXT4_DIR_LINK_MAX(dir)) 1834 if (EXT4_DIR_LINK_MAX(dir))
1838 return -EMLINK; 1835 return -EMLINK;
1839 1836
1837 dquot_initialize(dir);
1838
1840retry: 1839retry:
1841 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1840 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1842 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1841 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1916,11 +1915,11 @@ static int empty_dir(struct inode *inode)
1916 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1915 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1917 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { 1916 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
1918 if (err) 1917 if (err)
1919 ext4_error(inode->i_sb, __func__, 1918 ext4_error(inode->i_sb,
1920 "error %d reading directory #%lu offset 0", 1919 "error %d reading directory #%lu offset 0",
1921 err, inode->i_ino); 1920 err, inode->i_ino);
1922 else 1921 else
1923 ext4_warning(inode->i_sb, __func__, 1922 ext4_warning(inode->i_sb,
1924 "bad directory (dir #%lu) - no data block", 1923 "bad directory (dir #%lu) - no data block",
1925 inode->i_ino); 1924 inode->i_ino);
1926 return 1; 1925 return 1;
@@ -1931,7 +1930,7 @@ static int empty_dir(struct inode *inode)
1931 !le32_to_cpu(de1->inode) || 1930 !le32_to_cpu(de1->inode) ||
1932 strcmp(".", de->name) || 1931 strcmp(".", de->name) ||
1933 strcmp("..", de1->name)) { 1932 strcmp("..", de1->name)) {
1934 ext4_warning(inode->i_sb, "empty_dir", 1933 ext4_warning(inode->i_sb,
1935 "bad directory (dir #%lu) - no `.' or `..'", 1934 "bad directory (dir #%lu) - no `.' or `..'",
1936 inode->i_ino); 1935 inode->i_ino);
1937 brelse(bh); 1936 brelse(bh);
@@ -1949,7 +1948,7 @@ static int empty_dir(struct inode *inode)
1949 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1948 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1950 if (!bh) { 1949 if (!bh) {
1951 if (err) 1950 if (err)
1952 ext4_error(sb, __func__, 1951 ext4_error(sb,
1953 "error %d reading directory" 1952 "error %d reading directory"
1954 " #%lu offset %u", 1953 " #%lu offset %u",
1955 err, inode->i_ino, offset); 1954 err, inode->i_ino, offset);
@@ -2020,11 +2019,18 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2020 err = ext4_reserve_inode_write(handle, inode, &iloc); 2019 err = ext4_reserve_inode_write(handle, inode, &iloc);
2021 if (err) 2020 if (err)
2022 goto out_unlock; 2021 goto out_unlock;
2022 /*
2023 * Due to previous errors inode may be already a part of on-disk
2024 * orphan list. If so skip on-disk list modification.
2025 */
2026 if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
2027 (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
2028 goto mem_insert;
2023 2029
2024 /* Insert this inode at the head of the on-disk orphan list... */ 2030 /* Insert this inode at the head of the on-disk orphan list... */
2025 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); 2031 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
2026 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 2032 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2027 err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); 2033 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
2028 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 2034 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2029 if (!err) 2035 if (!err)
2030 err = rc; 2036 err = rc;
@@ -2037,6 +2043,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2037 * 2043 *
2038 * This is safe: on error we're going to ignore the orphan list 2044 * This is safe: on error we're going to ignore the orphan list
2039 * anyway on the next recovery. */ 2045 * anyway on the next recovery. */
2046mem_insert:
2040 if (!err) 2047 if (!err)
2041 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2048 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2042 2049
@@ -2096,7 +2103,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2096 if (err) 2103 if (err)
2097 goto out_brelse; 2104 goto out_brelse;
2098 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2105 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2099 err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); 2106 err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
2100 } else { 2107 } else {
2101 struct ext4_iloc iloc2; 2108 struct ext4_iloc iloc2;
2102 struct inode *i_prev = 2109 struct inode *i_prev =
@@ -2136,7 +2143,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2136 2143
2137 /* Initialize quotas before so that eventual writes go in 2144 /* Initialize quotas before so that eventual writes go in
2138 * separate transaction */ 2145 * separate transaction */
2139 vfs_dq_init(dentry->d_inode); 2146 dquot_initialize(dir);
2147 dquot_initialize(dentry->d_inode);
2148
2140 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); 2149 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2141 if (IS_ERR(handle)) 2150 if (IS_ERR(handle))
2142 return PTR_ERR(handle); 2151 return PTR_ERR(handle);
@@ -2163,7 +2172,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2163 if (retval) 2172 if (retval)
2164 goto end_rmdir; 2173 goto end_rmdir;
2165 if (!EXT4_DIR_LINK_EMPTY(inode)) 2174 if (!EXT4_DIR_LINK_EMPTY(inode))
2166 ext4_warning(inode->i_sb, "ext4_rmdir", 2175 ext4_warning(inode->i_sb,
2167 "empty directory has too many links (%d)", 2176 "empty directory has too many links (%d)",
2168 inode->i_nlink); 2177 inode->i_nlink);
2169 inode->i_version++; 2178 inode->i_version++;
@@ -2195,7 +2204,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2195 2204
2196 /* Initialize quotas before so that eventual writes go 2205 /* Initialize quotas before so that eventual writes go
2197 * in separate transaction */ 2206 * in separate transaction */
2198 vfs_dq_init(dentry->d_inode); 2207 dquot_initialize(dir);
2208 dquot_initialize(dentry->d_inode);
2209
2199 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); 2210 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2200 if (IS_ERR(handle)) 2211 if (IS_ERR(handle))
2201 return PTR_ERR(handle); 2212 return PTR_ERR(handle);
@@ -2215,7 +2226,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2215 goto end_unlink; 2226 goto end_unlink;
2216 2227
2217 if (!inode->i_nlink) { 2228 if (!inode->i_nlink) {
2218 ext4_warning(inode->i_sb, "ext4_unlink", 2229 ext4_warning(inode->i_sb,
2219 "Deleting nonexistent file (%lu), %d", 2230 "Deleting nonexistent file (%lu), %d",
2220 inode->i_ino, inode->i_nlink); 2231 inode->i_ino, inode->i_nlink);
2221 inode->i_nlink = 1; 2232 inode->i_nlink = 1;
@@ -2250,6 +2261,8 @@ static int ext4_symlink(struct inode *dir,
2250 if (l > dir->i_sb->s_blocksize) 2261 if (l > dir->i_sb->s_blocksize)
2251 return -ENAMETOOLONG; 2262 return -ENAMETOOLONG;
2252 2263
2264 dquot_initialize(dir);
2265
2253retry: 2266retry:
2254 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2267 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2255 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2268 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2308,6 +2321,8 @@ static int ext4_link(struct dentry *old_dentry,
2308 if (inode->i_nlink >= EXT4_LINK_MAX) 2321 if (inode->i_nlink >= EXT4_LINK_MAX)
2309 return -EMLINK; 2322 return -EMLINK;
2310 2323
2324 dquot_initialize(dir);
2325
2311 /* 2326 /*
2312 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing 2327 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2313 * otherwise has the potential to corrupt the orphan inode list. 2328 * otherwise has the potential to corrupt the orphan inode list.
@@ -2358,12 +2373,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2358 struct ext4_dir_entry_2 *old_de, *new_de; 2373 struct ext4_dir_entry_2 *old_de, *new_de;
2359 int retval, force_da_alloc = 0; 2374 int retval, force_da_alloc = 0;
2360 2375
2376 dquot_initialize(old_dir);
2377 dquot_initialize(new_dir);
2378
2361 old_bh = new_bh = dir_bh = NULL; 2379 old_bh = new_bh = dir_bh = NULL;
2362 2380
2363 /* Initialize quotas before so that eventual writes go 2381 /* Initialize quotas before so that eventual writes go
2364 * in separate transaction */ 2382 * in separate transaction */
2365 if (new_dentry->d_inode) 2383 if (new_dentry->d_inode)
2366 vfs_dq_init(new_dentry->d_inode); 2384 dquot_initialize(new_dentry->d_inode);
2367 handle = ext4_journal_start(old_dir, 2 * 2385 handle = ext4_journal_start(old_dir, 2 *
2368 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2386 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2369 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); 2387 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@ -2462,7 +2480,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2462 } 2480 }
2463 } 2481 }
2464 if (retval) { 2482 if (retval) {
2465 ext4_warning(old_dir->i_sb, "ext4_rename", 2483 ext4_warning(old_dir->i_sb,
2466 "Deleting old file (%lu), %d, error=%d", 2484 "Deleting old file (%lu), %d, error=%d",
2467 old_dir->i_ino, old_dir->i_nlink, retval); 2485 old_dir->i_ino, old_dir->i_nlink, retval);
2468 } 2486 }
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3b2c5541d8a6..5692c48754a0 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -48,65 +48,54 @@ static int verify_group_input(struct super_block *sb,
48 48
49 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 49 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
50 if (group != sbi->s_groups_count) 50 if (group != sbi->s_groups_count)
51 ext4_warning(sb, __func__, 51 ext4_warning(sb, "Cannot add at group %u (only %u groups)",
52 "Cannot add at group %u (only %u groups)",
53 input->group, sbi->s_groups_count); 52 input->group, sbi->s_groups_count);
54 else if (offset != 0) 53 else if (offset != 0)
55 ext4_warning(sb, __func__, "Last group not full"); 54 ext4_warning(sb, "Last group not full");
56 else if (input->reserved_blocks > input->blocks_count / 5) 55 else if (input->reserved_blocks > input->blocks_count / 5)
57 ext4_warning(sb, __func__, "Reserved blocks too high (%u)", 56 ext4_warning(sb, "Reserved blocks too high (%u)",
58 input->reserved_blocks); 57 input->reserved_blocks);
59 else if (free_blocks_count < 0) 58 else if (free_blocks_count < 0)
60 ext4_warning(sb, __func__, "Bad blocks count %u", 59 ext4_warning(sb, "Bad blocks count %u",
61 input->blocks_count); 60 input->blocks_count);
62 else if (!(bh = sb_bread(sb, end - 1))) 61 else if (!(bh = sb_bread(sb, end - 1)))
63 ext4_warning(sb, __func__, 62 ext4_warning(sb, "Cannot read last block (%llu)",
64 "Cannot read last block (%llu)",
65 end - 1); 63 end - 1);
66 else if (outside(input->block_bitmap, start, end)) 64 else if (outside(input->block_bitmap, start, end))
67 ext4_warning(sb, __func__, 65 ext4_warning(sb, "Block bitmap not in group (block %llu)",
68 "Block bitmap not in group (block %llu)",
69 (unsigned long long)input->block_bitmap); 66 (unsigned long long)input->block_bitmap);
70 else if (outside(input->inode_bitmap, start, end)) 67 else if (outside(input->inode_bitmap, start, end))
71 ext4_warning(sb, __func__, 68 ext4_warning(sb, "Inode bitmap not in group (block %llu)",
72 "Inode bitmap not in group (block %llu)",
73 (unsigned long long)input->inode_bitmap); 69 (unsigned long long)input->inode_bitmap);
74 else if (outside(input->inode_table, start, end) || 70 else if (outside(input->inode_table, start, end) ||
75 outside(itend - 1, start, end)) 71 outside(itend - 1, start, end))
76 ext4_warning(sb, __func__, 72 ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
77 "Inode table not in group (blocks %llu-%llu)",
78 (unsigned long long)input->inode_table, itend - 1); 73 (unsigned long long)input->inode_table, itend - 1);
79 else if (input->inode_bitmap == input->block_bitmap) 74 else if (input->inode_bitmap == input->block_bitmap)
80 ext4_warning(sb, __func__, 75 ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
81 "Block bitmap same as inode bitmap (%llu)",
82 (unsigned long long)input->block_bitmap); 76 (unsigned long long)input->block_bitmap);
83 else if (inside(input->block_bitmap, input->inode_table, itend)) 77 else if (inside(input->block_bitmap, input->inode_table, itend))
84 ext4_warning(sb, __func__, 78 ext4_warning(sb, "Block bitmap (%llu) in inode table "
85 "Block bitmap (%llu) in inode table (%llu-%llu)", 79 "(%llu-%llu)",
86 (unsigned long long)input->block_bitmap, 80 (unsigned long long)input->block_bitmap,
87 (unsigned long long)input->inode_table, itend - 1); 81 (unsigned long long)input->inode_table, itend - 1);
88 else if (inside(input->inode_bitmap, input->inode_table, itend)) 82 else if (inside(input->inode_bitmap, input->inode_table, itend))
89 ext4_warning(sb, __func__, 83 ext4_warning(sb, "Inode bitmap (%llu) in inode table "
90 "Inode bitmap (%llu) in inode table (%llu-%llu)", 84 "(%llu-%llu)",
91 (unsigned long long)input->inode_bitmap, 85 (unsigned long long)input->inode_bitmap,
92 (unsigned long long)input->inode_table, itend - 1); 86 (unsigned long long)input->inode_table, itend - 1);
93 else if (inside(input->block_bitmap, start, metaend)) 87 else if (inside(input->block_bitmap, start, metaend))
94 ext4_warning(sb, __func__, 88 ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
95 "Block bitmap (%llu) in GDT table"
96 " (%llu-%llu)",
97 (unsigned long long)input->block_bitmap, 89 (unsigned long long)input->block_bitmap,
98 start, metaend - 1); 90 start, metaend - 1);
99 else if (inside(input->inode_bitmap, start, metaend)) 91 else if (inside(input->inode_bitmap, start, metaend))
100 ext4_warning(sb, __func__, 92 ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
101 "Inode bitmap (%llu) in GDT table"
102 " (%llu-%llu)",
103 (unsigned long long)input->inode_bitmap, 93 (unsigned long long)input->inode_bitmap,
104 start, metaend - 1); 94 start, metaend - 1);
105 else if (inside(input->inode_table, start, metaend) || 95 else if (inside(input->inode_table, start, metaend) ||
106 inside(itend - 1, start, metaend)) 96 inside(itend - 1, start, metaend))
107 ext4_warning(sb, __func__, 97 ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
108 "Inode table (%llu-%llu) overlaps" 98 "(%llu-%llu)",
109 "GDT table (%llu-%llu)",
110 (unsigned long long)input->inode_table, 99 (unsigned long long)input->inode_table,
111 itend - 1, start, metaend - 1); 100 itend - 1, start, metaend - 1);
112 else 101 else
@@ -364,8 +353,7 @@ static int verify_reserved_gdb(struct super_block *sb,
364 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 353 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
365 if (le32_to_cpu(*p++) != 354 if (le32_to_cpu(*p++) !=
366 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 355 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
367 ext4_warning(sb, __func__, 356 ext4_warning(sb, "reserved GDT %llu"
368 "reserved GDT %llu"
369 " missing grp %d (%llu)", 357 " missing grp %d (%llu)",
370 blk, grp, 358 blk, grp,
371 grp * 359 grp *
@@ -420,8 +408,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
420 */ 408 */
421 if (EXT4_SB(sb)->s_sbh->b_blocknr != 409 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
422 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 410 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
423 ext4_warning(sb, __func__, 411 ext4_warning(sb, "won't resize using backup superblock at %llu",
424 "won't resize using backup superblock at %llu",
425 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 412 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
426 return -EPERM; 413 return -EPERM;
427 } 414 }
@@ -444,8 +431,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
444 431
445 data = (__le32 *)dind->b_data; 432 data = (__le32 *)dind->b_data;
446 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 433 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
447 ext4_warning(sb, __func__, 434 ext4_warning(sb, "new group %u GDT block %llu not reserved",
448 "new group %u GDT block %llu not reserved",
449 input->group, gdblock); 435 input->group, gdblock);
450 err = -EINVAL; 436 err = -EINVAL;
451 goto exit_dind; 437 goto exit_dind;
@@ -468,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
468 GFP_NOFS); 454 GFP_NOFS);
469 if (!n_group_desc) { 455 if (!n_group_desc) {
470 err = -ENOMEM; 456 err = -ENOMEM;
471 ext4_warning(sb, __func__, 457 ext4_warning(sb,
472 "not enough memory for %lu groups", gdb_num + 1); 458 "not enough memory for %lu groups", gdb_num + 1);
473 goto exit_inode; 459 goto exit_inode;
474 } 460 }
@@ -567,8 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
567 /* Get each reserved primary GDT block and verify it holds backups */ 553 /* Get each reserved primary GDT block and verify it holds backups */
568 for (res = 0; res < reserved_gdb; res++, blk++) { 554 for (res = 0; res < reserved_gdb; res++, blk++) {
569 if (le32_to_cpu(*data) != blk) { 555 if (le32_to_cpu(*data) != blk) {
570 ext4_warning(sb, __func__, 556 ext4_warning(sb, "reserved block %llu"
571 "reserved block %llu"
572 " not at offset %ld", 557 " not at offset %ld",
573 blk, 558 blk,
574 (long)(data - (__le32 *)dind->b_data)); 559 (long)(data - (__le32 *)dind->b_data));
@@ -713,8 +698,7 @@ static void update_backups(struct super_block *sb,
713 */ 698 */
714exit_err: 699exit_err:
715 if (err) { 700 if (err) {
716 ext4_warning(sb, __func__, 701 ext4_warning(sb, "can't update backup for group %u (err %d), "
717 "can't update backup for group %u (err %d), "
718 "forcing fsck on next reboot", group, err); 702 "forcing fsck on next reboot", group, err);
719 sbi->s_mount_state &= ~EXT4_VALID_FS; 703 sbi->s_mount_state &= ~EXT4_VALID_FS;
720 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 704 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@@ -753,20 +737,19 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
753 737
754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 738 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 739 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
756 ext4_warning(sb, __func__, 740 ext4_warning(sb, "Can't resize non-sparse filesystem further");
757 "Can't resize non-sparse filesystem further");
758 return -EPERM; 741 return -EPERM;
759 } 742 }
760 743
761 if (ext4_blocks_count(es) + input->blocks_count < 744 if (ext4_blocks_count(es) + input->blocks_count <
762 ext4_blocks_count(es)) { 745 ext4_blocks_count(es)) {
763 ext4_warning(sb, __func__, "blocks_count overflow"); 746 ext4_warning(sb, "blocks_count overflow");
764 return -EINVAL; 747 return -EINVAL;
765 } 748 }
766 749
767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 750 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
768 le32_to_cpu(es->s_inodes_count)) { 751 le32_to_cpu(es->s_inodes_count)) {
769 ext4_warning(sb, __func__, "inodes_count overflow"); 752 ext4_warning(sb, "inodes_count overflow");
770 return -EINVAL; 753 return -EINVAL;
771 } 754 }
772 755
@@ -774,14 +757,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
774 if (!EXT4_HAS_COMPAT_FEATURE(sb, 757 if (!EXT4_HAS_COMPAT_FEATURE(sb,
775 EXT4_FEATURE_COMPAT_RESIZE_INODE) 758 EXT4_FEATURE_COMPAT_RESIZE_INODE)
776 || !le16_to_cpu(es->s_reserved_gdt_blocks)) { 759 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
777 ext4_warning(sb, __func__, 760 ext4_warning(sb,
778 "No reserved GDT blocks, can't resize"); 761 "No reserved GDT blocks, can't resize");
779 return -EPERM; 762 return -EPERM;
780 } 763 }
781 inode = ext4_iget(sb, EXT4_RESIZE_INO); 764 inode = ext4_iget(sb, EXT4_RESIZE_INO);
782 if (IS_ERR(inode)) { 765 if (IS_ERR(inode)) {
783 ext4_warning(sb, __func__, 766 ext4_warning(sb, "Error opening resize inode");
784 "Error opening resize inode");
785 return PTR_ERR(inode); 767 return PTR_ERR(inode);
786 } 768 }
787 } 769 }
@@ -810,8 +792,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 792
811 mutex_lock(&sbi->s_resize_lock); 793 mutex_lock(&sbi->s_resize_lock);
812 if (input->group != sbi->s_groups_count) { 794 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __func__, 795 ext4_warning(sb, "multiple resizers run on filesystem!");
814 "multiple resizers run on filesystem!");
815 err = -EBUSY; 796 err = -EBUSY;
816 goto exit_journal; 797 goto exit_journal;
817 } 798 }
@@ -997,13 +978,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
997 " too large to resize to %llu blocks safely\n", 978 " too large to resize to %llu blocks safely\n",
998 sb->s_id, n_blocks_count); 979 sb->s_id, n_blocks_count);
999 if (sizeof(sector_t) < 8) 980 if (sizeof(sector_t) < 8)
1000 ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); 981 ext4_warning(sb, "CONFIG_LBDAF not enabled");
1001 return -EINVAL; 982 return -EINVAL;
1002 } 983 }
1003 984
1004 if (n_blocks_count < o_blocks_count) { 985 if (n_blocks_count < o_blocks_count) {
1005 ext4_warning(sb, __func__, 986 ext4_warning(sb, "can't shrink FS - resize aborted");
1006 "can't shrink FS - resize aborted");
1007 return -EBUSY; 987 return -EBUSY;
1008 } 988 }
1009 989
@@ -1011,15 +991,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1011 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); 991 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1012 992
1013 if (last == 0) { 993 if (last == 0) {
1014 ext4_warning(sb, __func__, 994 ext4_warning(sb, "need to use ext2online to resize further");
1015 "need to use ext2online to resize further");
1016 return -EPERM; 995 return -EPERM;
1017 } 996 }
1018 997
1019 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 998 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1020 999
1021 if (o_blocks_count + add < o_blocks_count) { 1000 if (o_blocks_count + add < o_blocks_count) {
1022 ext4_warning(sb, __func__, "blocks_count overflow"); 1001 ext4_warning(sb, "blocks_count overflow");
1023 return -EINVAL; 1002 return -EINVAL;
1024 } 1003 }
1025 1004
@@ -1027,16 +1006,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1027 add = n_blocks_count - o_blocks_count; 1006 add = n_blocks_count - o_blocks_count;
1028 1007
1029 if (o_blocks_count + add < n_blocks_count) 1008 if (o_blocks_count + add < n_blocks_count)
1030 ext4_warning(sb, __func__, 1009 ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
1031 "will only finish group (%llu"
1032 " blocks, %u new)",
1033 o_blocks_count + add, add); 1010 o_blocks_count + add, add);
1034 1011
1035 /* See if the device is actually as big as what was requested */ 1012 /* See if the device is actually as big as what was requested */
1036 bh = sb_bread(sb, o_blocks_count + add - 1); 1013 bh = sb_bread(sb, o_blocks_count + add - 1);
1037 if (!bh) { 1014 if (!bh) {
1038 ext4_warning(sb, __func__, 1015 ext4_warning(sb, "can't read last block, resize aborted");
1039 "can't read last block, resize aborted");
1040 return -ENOSPC; 1016 return -ENOSPC;
1041 } 1017 }
1042 brelse(bh); 1018 brelse(bh);
@@ -1047,14 +1023,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1047 handle = ext4_journal_start_sb(sb, 3); 1023 handle = ext4_journal_start_sb(sb, 3);
1048 if (IS_ERR(handle)) { 1024 if (IS_ERR(handle)) {
1049 err = PTR_ERR(handle); 1025 err = PTR_ERR(handle);
1050 ext4_warning(sb, __func__, "error %d on journal start", err); 1026 ext4_warning(sb, "error %d on journal start", err);
1051 goto exit_put; 1027 goto exit_put;
1052 } 1028 }
1053 1029
1054 mutex_lock(&EXT4_SB(sb)->s_resize_lock); 1030 mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1055 if (o_blocks_count != ext4_blocks_count(es)) { 1031 if (o_blocks_count != ext4_blocks_count(es)) {
1056 ext4_warning(sb, __func__, 1032 ext4_warning(sb, "multiple resizers run on filesystem!");
1057 "multiple resizers run on filesystem!");
1058 mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1033 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1059 ext4_journal_stop(handle); 1034 ext4_journal_stop(handle);
1060 err = -EBUSY; 1035 err = -EBUSY;
@@ -1063,8 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1063 1038
1064 if ((err = ext4_journal_get_write_access(handle, 1039 if ((err = ext4_journal_get_write_access(handle,
1065 EXT4_SB(sb)->s_sbh))) { 1040 EXT4_SB(sb)->s_sbh))) {
1066 ext4_warning(sb, __func__, 1041 ext4_warning(sb, "error %d on journal write access", err);
1067 "error %d on journal write access", err);
1068 mutex_unlock(&EXT4_SB(sb)->s_resize_lock); 1042 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1069 ext4_journal_stop(handle); 1043 ext4_journal_stop(handle);
1070 goto exit_put; 1044 goto exit_put;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 735c20d5fd56..2b83b96cb2eb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -333,7 +333,7 @@ static void ext4_handle_error(struct super_block *sb)
333 sb->s_id); 333 sb->s_id);
334} 334}
335 335
336void ext4_error(struct super_block *sb, const char *function, 336void __ext4_error(struct super_block *sb, const char *function,
337 const char *fmt, ...) 337 const char *fmt, ...)
338{ 338{
339 va_list args; 339 va_list args;
@@ -347,6 +347,42 @@ void ext4_error(struct super_block *sb, const char *function,
347 ext4_handle_error(sb); 347 ext4_handle_error(sb);
348} 348}
349 349
350void ext4_error_inode(const char *function, struct inode *inode,
351 const char *fmt, ...)
352{
353 va_list args;
354
355 va_start(args, fmt);
356 printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
357 inode->i_sb->s_id, function, inode->i_ino, current->comm);
358 vprintk(fmt, args);
359 printk("\n");
360 va_end(args);
361
362 ext4_handle_error(inode->i_sb);
363}
364
365void ext4_error_file(const char *function, struct file *file,
366 const char *fmt, ...)
367{
368 va_list args;
369 struct inode *inode = file->f_dentry->d_inode;
370 char pathname[80], *path;
371
372 va_start(args, fmt);
373 path = d_path(&(file->f_path), pathname, sizeof(pathname));
374 if (!path)
375 path = "(unknown)";
376 printk(KERN_CRIT
377 "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
378 inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
379 vprintk(fmt, args);
380 printk("\n");
381 va_end(args);
382
383 ext4_handle_error(inode->i_sb);
384}
385
350static const char *ext4_decode_error(struct super_block *sb, int errno, 386static const char *ext4_decode_error(struct super_block *sb, int errno,
351 char nbuf[16]) 387 char nbuf[16])
352{ 388{
@@ -450,7 +486,7 @@ void ext4_msg (struct super_block * sb, const char *prefix,
450 va_end(args); 486 va_end(args);
451} 487}
452 488
453void ext4_warning(struct super_block *sb, const char *function, 489void __ext4_warning(struct super_block *sb, const char *function,
454 const char *fmt, ...) 490 const char *fmt, ...)
455{ 491{
456 va_list args; 492 va_list args;
@@ -507,7 +543,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
507 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 543 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
508 return; 544 return;
509 545
510 ext4_warning(sb, __func__, 546 ext4_warning(sb,
511 "updating to rev %d because of new feature flag, " 547 "updating to rev %d because of new feature flag, "
512 "running e2fsck is recommended", 548 "running e2fsck is recommended",
513 EXT4_DYNAMIC_REV); 549 EXT4_DYNAMIC_REV);
@@ -708,7 +744,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
708#ifdef CONFIG_QUOTA 744#ifdef CONFIG_QUOTA
709 ei->i_reserved_quota = 0; 745 ei->i_reserved_quota = 0;
710#endif 746#endif
711 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 747 INIT_LIST_HEAD(&ei->i_completed_io_list);
748 spin_lock_init(&ei->i_completed_io_lock);
712 ei->cur_aio_dio = NULL; 749 ei->cur_aio_dio = NULL;
713 ei->i_sync_tid = 0; 750 ei->i_sync_tid = 0;
714 ei->i_datasync_tid = 0; 751 ei->i_datasync_tid = 0;
@@ -761,6 +798,7 @@ static void destroy_inodecache(void)
761 798
762static void ext4_clear_inode(struct inode *inode) 799static void ext4_clear_inode(struct inode *inode)
763{ 800{
801 dquot_drop(inode);
764 ext4_discard_preallocations(inode); 802 ext4_discard_preallocations(inode);
765 if (EXT4_JOURNAL(inode)) 803 if (EXT4_JOURNAL(inode))
766 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 804 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -796,10 +834,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
796 if (sbi->s_qf_names[GRPQUOTA]) 834 if (sbi->s_qf_names[GRPQUOTA])
797 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 835 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
798 836
799 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 837 if (test_opt(sb, USRQUOTA))
800 seq_puts(seq, ",usrquota"); 838 seq_puts(seq, ",usrquota");
801 839
802 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 840 if (test_opt(sb, GRPQUOTA))
803 seq_puts(seq, ",grpquota"); 841 seq_puts(seq, ",grpquota");
804#endif 842#endif
805} 843}
@@ -926,6 +964,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
926 if (test_opt(sb, NOLOAD)) 964 if (test_opt(sb, NOLOAD))
927 seq_puts(seq, ",norecovery"); 965 seq_puts(seq, ",norecovery");
928 966
967 if (test_opt(sb, DIOREAD_NOLOCK))
968 seq_puts(seq, ",dioread_nolock");
969
929 ext4_show_quota_options(seq, sb); 970 ext4_show_quota_options(seq, sb);
930 971
931 return 0; 972 return 0;
@@ -1012,19 +1053,9 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
1012 const char *data, size_t len, loff_t off); 1053 const char *data, size_t len, loff_t off);
1013 1054
1014static const struct dquot_operations ext4_quota_operations = { 1055static const struct dquot_operations ext4_quota_operations = {
1015 .initialize = dquot_initialize,
1016 .drop = dquot_drop,
1017 .alloc_space = dquot_alloc_space,
1018 .reserve_space = dquot_reserve_space,
1019 .claim_space = dquot_claim_space,
1020 .release_rsv = dquot_release_reserved_space,
1021#ifdef CONFIG_QUOTA 1056#ifdef CONFIG_QUOTA
1022 .get_reserved_space = ext4_get_reserved_space, 1057 .get_reserved_space = ext4_get_reserved_space,
1023#endif 1058#endif
1024 .alloc_inode = dquot_alloc_inode,
1025 .free_space = dquot_free_space,
1026 .free_inode = dquot_free_inode,
1027 .transfer = dquot_transfer,
1028 .write_dquot = ext4_write_dquot, 1059 .write_dquot = ext4_write_dquot,
1029 .acquire_dquot = ext4_acquire_dquot, 1060 .acquire_dquot = ext4_acquire_dquot,
1030 .release_dquot = ext4_release_dquot, 1061 .release_dquot = ext4_release_dquot,
@@ -1109,6 +1140,7 @@ enum {
1109 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1140 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1110 Opt_block_validity, Opt_noblock_validity, 1141 Opt_block_validity, Opt_noblock_validity,
1111 Opt_inode_readahead_blks, Opt_journal_ioprio, 1142 Opt_inode_readahead_blks, Opt_journal_ioprio,
1143 Opt_dioread_nolock, Opt_dioread_lock,
1112 Opt_discard, Opt_nodiscard, 1144 Opt_discard, Opt_nodiscard,
1113}; 1145};
1114 1146
@@ -1176,6 +1208,8 @@ static const match_table_t tokens = {
1176 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1208 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1177 {Opt_auto_da_alloc, "auto_da_alloc"}, 1209 {Opt_auto_da_alloc, "auto_da_alloc"},
1178 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1210 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1211 {Opt_dioread_nolock, "dioread_nolock"},
1212 {Opt_dioread_lock, "dioread_lock"},
1179 {Opt_discard, "discard"}, 1213 {Opt_discard, "discard"},
1180 {Opt_nodiscard, "nodiscard"}, 1214 {Opt_nodiscard, "nodiscard"},
1181 {Opt_err, NULL}, 1215 {Opt_err, NULL},
@@ -1205,6 +1239,66 @@ static ext4_fsblk_t get_sb_block(void **data)
1205} 1239}
1206 1240
1207#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1241#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1242static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1243 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1244
1245#ifdef CONFIG_QUOTA
1246static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1247{
1248 struct ext4_sb_info *sbi = EXT4_SB(sb);
1249 char *qname;
1250
1251 if (sb_any_quota_loaded(sb) &&
1252 !sbi->s_qf_names[qtype]) {
1253 ext4_msg(sb, KERN_ERR,
1254 "Cannot change journaled "
1255 "quota options when quota turned on");
1256 return 0;
1257 }
1258 qname = match_strdup(args);
1259 if (!qname) {
1260 ext4_msg(sb, KERN_ERR,
1261 "Not enough memory for storing quotafile name");
1262 return 0;
1263 }
1264 if (sbi->s_qf_names[qtype] &&
1265 strcmp(sbi->s_qf_names[qtype], qname)) {
1266 ext4_msg(sb, KERN_ERR,
1267 "%s quota file already specified", QTYPE2NAME(qtype));
1268 kfree(qname);
1269 return 0;
1270 }
1271 sbi->s_qf_names[qtype] = qname;
1272 if (strchr(sbi->s_qf_names[qtype], '/')) {
1273 ext4_msg(sb, KERN_ERR,
1274 "quotafile must be on filesystem root");
1275 kfree(sbi->s_qf_names[qtype]);
1276 sbi->s_qf_names[qtype] = NULL;
1277 return 0;
1278 }
1279 set_opt(sbi->s_mount_opt, QUOTA);
1280 return 1;
1281}
1282
1283static int clear_qf_name(struct super_block *sb, int qtype)
1284{
1285
1286 struct ext4_sb_info *sbi = EXT4_SB(sb);
1287
1288 if (sb_any_quota_loaded(sb) &&
1289 sbi->s_qf_names[qtype]) {
1290 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1291 " when quota turned on");
1292 return 0;
1293 }
1294 /*
1295 * The space will be released later when all options are confirmed
1296 * to be correct
1297 */
1298 sbi->s_qf_names[qtype] = NULL;
1299 return 1;
1300}
1301#endif
1208 1302
1209static int parse_options(char *options, struct super_block *sb, 1303static int parse_options(char *options, struct super_block *sb,
1210 unsigned long *journal_devnum, 1304 unsigned long *journal_devnum,
@@ -1217,8 +1311,7 @@ static int parse_options(char *options, struct super_block *sb,
1217 int data_opt = 0; 1311 int data_opt = 0;
1218 int option; 1312 int option;
1219#ifdef CONFIG_QUOTA 1313#ifdef CONFIG_QUOTA
1220 int qtype, qfmt; 1314 int qfmt;
1221 char *qname;
1222#endif 1315#endif
1223 1316
1224 if (!options) 1317 if (!options)
@@ -1229,19 +1322,31 @@ static int parse_options(char *options, struct super_block *sb,
1229 if (!*p) 1322 if (!*p)
1230 continue; 1323 continue;
1231 1324
1325 /*
1326 * Initialize args struct so we know whether arg was
1327 * found; some options take optional arguments.
1328 */
1329 args[0].to = args[0].from = 0;
1232 token = match_token(p, tokens, args); 1330 token = match_token(p, tokens, args);
1233 switch (token) { 1331 switch (token) {
1234 case Opt_bsd_df: 1332 case Opt_bsd_df:
1333 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1235 clear_opt(sbi->s_mount_opt, MINIX_DF); 1334 clear_opt(sbi->s_mount_opt, MINIX_DF);
1236 break; 1335 break;
1237 case Opt_minix_df: 1336 case Opt_minix_df:
1337 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1238 set_opt(sbi->s_mount_opt, MINIX_DF); 1338 set_opt(sbi->s_mount_opt, MINIX_DF);
1339
1239 break; 1340 break;
1240 case Opt_grpid: 1341 case Opt_grpid:
1342 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1241 set_opt(sbi->s_mount_opt, GRPID); 1343 set_opt(sbi->s_mount_opt, GRPID);
1344
1242 break; 1345 break;
1243 case Opt_nogrpid: 1346 case Opt_nogrpid:
1347 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1244 clear_opt(sbi->s_mount_opt, GRPID); 1348 clear_opt(sbi->s_mount_opt, GRPID);
1349
1245 break; 1350 break;
1246 case Opt_resuid: 1351 case Opt_resuid:
1247 if (match_int(&args[0], &option)) 1352 if (match_int(&args[0], &option))
@@ -1378,14 +1483,13 @@ static int parse_options(char *options, struct super_block *sb,
1378 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1483 data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1379 datacheck: 1484 datacheck:
1380 if (is_remount) { 1485 if (is_remount) {
1381 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1486 if (test_opt(sb, DATA_FLAGS) != data_opt) {
1382 != data_opt) {
1383 ext4_msg(sb, KERN_ERR, 1487 ext4_msg(sb, KERN_ERR,
1384 "Cannot change data mode on remount"); 1488 "Cannot change data mode on remount");
1385 return 0; 1489 return 0;
1386 } 1490 }
1387 } else { 1491 } else {
1388 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1492 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1389 sbi->s_mount_opt |= data_opt; 1493 sbi->s_mount_opt |= data_opt;
1390 } 1494 }
1391 break; 1495 break;
@@ -1397,63 +1501,22 @@ static int parse_options(char *options, struct super_block *sb,
1397 break; 1501 break;
1398#ifdef CONFIG_QUOTA 1502#ifdef CONFIG_QUOTA
1399 case Opt_usrjquota: 1503 case Opt_usrjquota:
1400 qtype = USRQUOTA; 1504 if (!set_qf_name(sb, USRQUOTA, &args[0]))
1401 goto set_qf_name;
1402 case Opt_grpjquota:
1403 qtype = GRPQUOTA;
1404set_qf_name:
1405 if (sb_any_quota_loaded(sb) &&
1406 !sbi->s_qf_names[qtype]) {
1407 ext4_msg(sb, KERN_ERR,
1408 "Cannot change journaled "
1409 "quota options when quota turned on");
1410 return 0; 1505 return 0;
1411 } 1506 break;
1412 qname = match_strdup(&args[0]); 1507 case Opt_grpjquota:
1413 if (!qname) { 1508 if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1414 ext4_msg(sb, KERN_ERR,
1415 "Not enough memory for "
1416 "storing quotafile name");
1417 return 0;
1418 }
1419 if (sbi->s_qf_names[qtype] &&
1420 strcmp(sbi->s_qf_names[qtype], qname)) {
1421 ext4_msg(sb, KERN_ERR,
1422 "%s quota file already "
1423 "specified", QTYPE2NAME(qtype));
1424 kfree(qname);
1425 return 0;
1426 }
1427 sbi->s_qf_names[qtype] = qname;
1428 if (strchr(sbi->s_qf_names[qtype], '/')) {
1429 ext4_msg(sb, KERN_ERR,
1430 "quotafile must be on "
1431 "filesystem root");
1432 kfree(sbi->s_qf_names[qtype]);
1433 sbi->s_qf_names[qtype] = NULL;
1434 return 0; 1509 return 0;
1435 }
1436 set_opt(sbi->s_mount_opt, QUOTA);
1437 break; 1510 break;
1438 case Opt_offusrjquota: 1511 case Opt_offusrjquota:
1439 qtype = USRQUOTA; 1512 if (!clear_qf_name(sb, USRQUOTA))
1440 goto clear_qf_name; 1513 return 0;
1514 break;
1441 case Opt_offgrpjquota: 1515 case Opt_offgrpjquota:
1442 qtype = GRPQUOTA; 1516 if (!clear_qf_name(sb, GRPQUOTA))
1443clear_qf_name:
1444 if (sb_any_quota_loaded(sb) &&
1445 sbi->s_qf_names[qtype]) {
1446 ext4_msg(sb, KERN_ERR, "Cannot change "
1447 "journaled quota options when "
1448 "quota turned on");
1449 return 0; 1517 return 0;
1450 }
1451 /*
1452 * The space will be released later when all options
1453 * are confirmed to be correct
1454 */
1455 sbi->s_qf_names[qtype] = NULL;
1456 break; 1518 break;
1519
1457 case Opt_jqfmt_vfsold: 1520 case Opt_jqfmt_vfsold:
1458 qfmt = QFMT_VFS_OLD; 1521 qfmt = QFMT_VFS_OLD;
1459 goto set_qf_format; 1522 goto set_qf_format;
@@ -1518,10 +1581,11 @@ set_qf_format:
1518 clear_opt(sbi->s_mount_opt, BARRIER); 1581 clear_opt(sbi->s_mount_opt, BARRIER);
1519 break; 1582 break;
1520 case Opt_barrier: 1583 case Opt_barrier:
1521 if (match_int(&args[0], &option)) { 1584 if (args[0].from) {
1522 set_opt(sbi->s_mount_opt, BARRIER); 1585 if (match_int(&args[0], &option))
1523 break; 1586 return 0;
1524 } 1587 } else
1588 option = 1; /* No argument, default to 1 */
1525 if (option) 1589 if (option)
1526 set_opt(sbi->s_mount_opt, BARRIER); 1590 set_opt(sbi->s_mount_opt, BARRIER);
1527 else 1591 else
@@ -1594,10 +1658,11 @@ set_qf_format:
1594 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1658 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1595 break; 1659 break;
1596 case Opt_auto_da_alloc: 1660 case Opt_auto_da_alloc:
1597 if (match_int(&args[0], &option)) { 1661 if (args[0].from) {
1598 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1662 if (match_int(&args[0], &option))
1599 break; 1663 return 0;
1600 } 1664 } else
1665 option = 1; /* No argument, default to 1 */
1601 if (option) 1666 if (option)
1602 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1667 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1603 else 1668 else
@@ -1609,6 +1674,12 @@ set_qf_format:
1609 case Opt_nodiscard: 1674 case Opt_nodiscard:
1610 clear_opt(sbi->s_mount_opt, DISCARD); 1675 clear_opt(sbi->s_mount_opt, DISCARD);
1611 break; 1676 break;
1677 case Opt_dioread_nolock:
1678 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1679 break;
1680 case Opt_dioread_lock:
1681 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1682 break;
1612 default: 1683 default:
1613 ext4_msg(sb, KERN_ERR, 1684 ext4_msg(sb, KERN_ERR,
1614 "Unrecognized mount option \"%s\" " 1685 "Unrecognized mount option \"%s\" "
@@ -1618,18 +1689,13 @@ set_qf_format:
1618 } 1689 }
1619#ifdef CONFIG_QUOTA 1690#ifdef CONFIG_QUOTA
1620 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1691 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1621 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1692 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1622 sbi->s_qf_names[USRQUOTA])
1623 clear_opt(sbi->s_mount_opt, USRQUOTA); 1693 clear_opt(sbi->s_mount_opt, USRQUOTA);
1624 1694
1625 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1695 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1626 sbi->s_qf_names[GRPQUOTA])
1627 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1696 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1628 1697
1629 if ((sbi->s_qf_names[USRQUOTA] && 1698 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1630 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1631 (sbi->s_qf_names[GRPQUOTA] &&
1632 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1633 ext4_msg(sb, KERN_ERR, "old and new quota " 1699 ext4_msg(sb, KERN_ERR, "old and new quota "
1634 "format mixing"); 1700 "format mixing");
1635 return 0; 1701 return 0;
@@ -1939,7 +2005,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1939 } 2005 }
1940 2006
1941 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2007 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1942 vfs_dq_init(inode); 2008 dquot_initialize(inode);
1943 if (inode->i_nlink) { 2009 if (inode->i_nlink) {
1944 ext4_msg(sb, KERN_DEBUG, 2010 ext4_msg(sb, KERN_DEBUG,
1945 "%s: truncating inode %lu to %lld bytes", 2011 "%s: truncating inode %lu to %lld bytes",
@@ -2432,8 +2498,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2432 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2498 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2433 if (def_mount_opts & EXT4_DEFM_DEBUG) 2499 if (def_mount_opts & EXT4_DEFM_DEBUG)
2434 set_opt(sbi->s_mount_opt, DEBUG); 2500 set_opt(sbi->s_mount_opt, DEBUG);
2435 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 2501 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
2502 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
2503 "2.6.38");
2436 set_opt(sbi->s_mount_opt, GRPID); 2504 set_opt(sbi->s_mount_opt, GRPID);
2505 }
2437 if (def_mount_opts & EXT4_DEFM_UID16) 2506 if (def_mount_opts & EXT4_DEFM_UID16)
2438 set_opt(sbi->s_mount_opt, NO_UID32); 2507 set_opt(sbi->s_mount_opt, NO_UID32);
2439#ifdef CONFIG_EXT4_FS_XATTR 2508#ifdef CONFIG_EXT4_FS_XATTR
@@ -2445,11 +2514,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2445 set_opt(sbi->s_mount_opt, POSIX_ACL); 2514 set_opt(sbi->s_mount_opt, POSIX_ACL);
2446#endif 2515#endif
2447 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2516 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2448 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 2517 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2449 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2518 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2450 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 2519 set_opt(sbi->s_mount_opt, ORDERED_DATA);
2451 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2520 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2452 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 2521 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2453 2522
2454 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2523 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2455 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2524 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@@ -2477,7 +2546,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2477 goto failed_mount; 2546 goto failed_mount;
2478 2547
2479 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2548 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2480 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2549 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2481 2550
2482 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2551 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2483 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2552 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
@@ -2766,7 +2835,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2766 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2835 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2767 ext4_msg(sb, KERN_ERR, "required journal recovery " 2836 ext4_msg(sb, KERN_ERR, "required journal recovery "
2768 "suppressed and not mounted read-only"); 2837 "suppressed and not mounted read-only");
2769 goto failed_mount4; 2838 goto failed_mount_wq;
2770 } else { 2839 } else {
2771 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2840 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2772 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2841 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
@@ -2779,7 +2848,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2779 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2848 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2780 JBD2_FEATURE_INCOMPAT_64BIT)) { 2849 JBD2_FEATURE_INCOMPAT_64BIT)) {
2781 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2850 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2782 goto failed_mount4; 2851 goto failed_mount_wq;
2783 } 2852 }
2784 2853
2785 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2854 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
@@ -2818,7 +2887,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2818 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2887 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2819 ext4_msg(sb, KERN_ERR, "Journal does not support " 2888 ext4_msg(sb, KERN_ERR, "Journal does not support "
2820 "requested data journaling mode"); 2889 "requested data journaling mode");
2821 goto failed_mount4; 2890 goto failed_mount_wq;
2822 } 2891 }
2823 default: 2892 default:
2824 break; 2893 break;
@@ -2826,13 +2895,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2826 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2895 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2827 2896
2828no_journal: 2897no_journal:
2829
2830 if (test_opt(sb, NOBH)) { 2898 if (test_opt(sb, NOBH)) {
2831 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2899 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2832 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2900 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2833 "its supported only with writeback mode"); 2901 "its supported only with writeback mode");
2834 clear_opt(sbi->s_mount_opt, NOBH); 2902 clear_opt(sbi->s_mount_opt, NOBH);
2835 } 2903 }
2904 if (test_opt(sb, DIOREAD_NOLOCK)) {
2905 ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
2906 "not supported with nobh mode");
2907 goto failed_mount_wq;
2908 }
2836 } 2909 }
2837 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 2910 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
2838 if (!EXT4_SB(sb)->dio_unwritten_wq) { 2911 if (!EXT4_SB(sb)->dio_unwritten_wq) {
@@ -2897,6 +2970,18 @@ no_journal:
2897 "requested data journaling mode"); 2970 "requested data journaling mode");
2898 clear_opt(sbi->s_mount_opt, DELALLOC); 2971 clear_opt(sbi->s_mount_opt, DELALLOC);
2899 } 2972 }
2973 if (test_opt(sb, DIOREAD_NOLOCK)) {
2974 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2975 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
2976 "option - requested data journaling mode");
2977 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
2978 }
2979 if (sb->s_blocksize < PAGE_SIZE) {
2980 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
2981 "option - block size is too small");
2982 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
2983 }
2984 }
2900 2985
2901 err = ext4_setup_system_zone(sb); 2986 err = ext4_setup_system_zone(sb);
2902 if (err) { 2987 if (err) {
@@ -3360,10 +3445,9 @@ static void ext4_clear_journal_err(struct super_block *sb,
3360 char nbuf[16]; 3445 char nbuf[16];
3361 3446
3362 errstr = ext4_decode_error(sb, j_errno, nbuf); 3447 errstr = ext4_decode_error(sb, j_errno, nbuf);
3363 ext4_warning(sb, __func__, "Filesystem error recorded " 3448 ext4_warning(sb, "Filesystem error recorded "
3364 "from previous mount: %s", errstr); 3449 "from previous mount: %s", errstr);
3365 ext4_warning(sb, __func__, "Marking fs in need of " 3450 ext4_warning(sb, "Marking fs in need of filesystem check.");
3366 "filesystem check.");
3367 3451
3368 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3452 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3369 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3453 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -3514,7 +3598,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3514 ext4_abort(sb, __func__, "Abort forced by user"); 3598 ext4_abort(sb, __func__, "Abort forced by user");
3515 3599
3516 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3600 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3517 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 3601 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3518 3602
3519 es = sbi->s_es; 3603 es = sbi->s_es;
3520 3604
@@ -3708,7 +3792,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3708 * Process 1 Process 2 3792 * Process 1 Process 2
3709 * ext4_create() quota_sync() 3793 * ext4_create() quota_sync()
3710 * jbd2_journal_start() write_dquot() 3794 * jbd2_journal_start() write_dquot()
3711 * vfs_dq_init() down(dqio_mutex) 3795 * dquot_initialize() down(dqio_mutex)
3712 * down(dqio_mutex) jbd2_journal_start() 3796 * down(dqio_mutex) jbd2_journal_start()
3713 * 3797 *
3714 */ 3798 */
@@ -3917,9 +4001,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3917 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4001 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3918 int err = 0; 4002 int err = 0;
3919 int offset = off & (sb->s_blocksize - 1); 4003 int offset = off & (sb->s_blocksize - 1);
3920 int tocopy;
3921 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 4004 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3922 size_t towrite = len;
3923 struct buffer_head *bh; 4005 struct buffer_head *bh;
3924 handle_t *handle = journal_current_handle(); 4006 handle_t *handle = journal_current_handle();
3925 4007
@@ -3929,52 +4011,53 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3929 (unsigned long long)off, (unsigned long long)len); 4011 (unsigned long long)off, (unsigned long long)len);
3930 return -EIO; 4012 return -EIO;
3931 } 4013 }
4014 /*
4015 * Since we account only one data block in transaction credits,
4016 * then it is impossible to cross a block boundary.
4017 */
4018 if (sb->s_blocksize - offset < len) {
4019 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
4020 " cancelled because not block aligned",
4021 (unsigned long long)off, (unsigned long long)len);
4022 return -EIO;
4023 }
4024
3932 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4025 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3933 while (towrite > 0) { 4026 bh = ext4_bread(handle, inode, blk, 1, &err);
3934 tocopy = sb->s_blocksize - offset < towrite ? 4027 if (!bh)
3935 sb->s_blocksize - offset : towrite; 4028 goto out;
3936 bh = ext4_bread(handle, inode, blk, 1, &err); 4029 if (journal_quota) {
3937 if (!bh) 4030 err = ext4_journal_get_write_access(handle, bh);
4031 if (err) {
4032 brelse(bh);
3938 goto out; 4033 goto out;
3939 if (journal_quota) {
3940 err = ext4_journal_get_write_access(handle, bh);
3941 if (err) {
3942 brelse(bh);
3943 goto out;
3944 }
3945 } 4034 }
3946 lock_buffer(bh);
3947 memcpy(bh->b_data+offset, data, tocopy);
3948 flush_dcache_page(bh->b_page);
3949 unlock_buffer(bh);
3950 if (journal_quota)
3951 err = ext4_handle_dirty_metadata(handle, NULL, bh);
3952 else {
3953 /* Always do at least ordered writes for quotas */
3954 err = ext4_jbd2_file_inode(handle, inode);
3955 mark_buffer_dirty(bh);
3956 }
3957 brelse(bh);
3958 if (err)
3959 goto out;
3960 offset = 0;
3961 towrite -= tocopy;
3962 data += tocopy;
3963 blk++;
3964 } 4035 }
4036 lock_buffer(bh);
4037 memcpy(bh->b_data+offset, data, len);
4038 flush_dcache_page(bh->b_page);
4039 unlock_buffer(bh);
4040 if (journal_quota)
4041 err = ext4_handle_dirty_metadata(handle, NULL, bh);
4042 else {
4043 /* Always do at least ordered writes for quotas */
4044 err = ext4_jbd2_file_inode(handle, inode);
4045 mark_buffer_dirty(bh);
4046 }
4047 brelse(bh);
3965out: 4048out:
3966 if (len == towrite) { 4049 if (err) {
3967 mutex_unlock(&inode->i_mutex); 4050 mutex_unlock(&inode->i_mutex);
3968 return err; 4051 return err;
3969 } 4052 }
3970 if (inode->i_size < off+len-towrite) { 4053 if (inode->i_size < off + len) {
3971 i_size_write(inode, off+len-towrite); 4054 i_size_write(inode, off + len);
3972 EXT4_I(inode)->i_disksize = inode->i_size; 4055 EXT4_I(inode)->i_disksize = inode->i_size;
3973 } 4056 }
3974 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4057 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3975 ext4_mark_inode_dirty(handle, inode); 4058 ext4_mark_inode_dirty(handle, inode);
3976 mutex_unlock(&inode->i_mutex); 4059 mutex_unlock(&inode->i_mutex);
3977 return len - towrite; 4060 return len;
3978} 4061}
3979 4062
3980#endif 4063#endif
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f3a2f7ed45aa..b4c5aa8489d8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -227,7 +227,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
227 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
229 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
230bad_block: ext4_error(inode->i_sb, __func__, 230bad_block:
231 ext4_error(inode->i_sb,
231 "inode %lu: bad block %llu", inode->i_ino, 232 "inode %lu: bad block %llu", inode->i_ino,
232 EXT4_I(inode)->i_file_acl); 233 EXT4_I(inode)->i_file_acl);
233 error = -EIO; 234 error = -EIO;
@@ -267,7 +268,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
267 void *end; 268 void *end;
268 int error; 269 int error;
269 270
270 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) 271 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
271 return -ENODATA; 272 return -ENODATA;
272 error = ext4_get_inode_loc(inode, &iloc); 273 error = ext4_get_inode_loc(inode, &iloc);
273 if (error) 274 if (error)
@@ -371,7 +372,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
371 ea_bdebug(bh, "b_count=%d, refcount=%d", 372 ea_bdebug(bh, "b_count=%d, refcount=%d",
372 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 373 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
373 if (ext4_xattr_check_block(bh)) { 374 if (ext4_xattr_check_block(bh)) {
374 ext4_error(inode->i_sb, __func__, 375 ext4_error(inode->i_sb,
375 "inode %lu: bad block %llu", inode->i_ino, 376 "inode %lu: bad block %llu", inode->i_ino,
376 EXT4_I(inode)->i_file_acl); 377 EXT4_I(inode)->i_file_acl);
377 error = -EIO; 378 error = -EIO;
@@ -396,7 +397,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
396 void *end; 397 void *end;
397 int error; 398 int error;
398 399
399 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) 400 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
400 return 0; 401 return 0;
401 error = ext4_get_inode_loc(inode, &iloc); 402 error = ext4_get_inode_loc(inode, &iloc);
402 if (error) 403 if (error)
@@ -494,7 +495,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
494 error = ext4_handle_dirty_metadata(handle, inode, bh); 495 error = ext4_handle_dirty_metadata(handle, inode, bh);
495 if (IS_SYNC(inode)) 496 if (IS_SYNC(inode))
496 ext4_handle_sync(handle); 497 ext4_handle_sync(handle);
497 vfs_dq_free_block(inode, 1); 498 dquot_free_block(inode, 1);
498 ea_bdebug(bh, "refcount now=%d; releasing", 499 ea_bdebug(bh, "refcount now=%d; releasing",
499 le32_to_cpu(BHDR(bh)->h_refcount)); 500 le32_to_cpu(BHDR(bh)->h_refcount));
500 if (ce) 501 if (ce)
@@ -665,9 +666,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
665 atomic_read(&(bs->bh->b_count)), 666 atomic_read(&(bs->bh->b_count)),
666 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 667 le32_to_cpu(BHDR(bs->bh)->h_refcount));
667 if (ext4_xattr_check_block(bs->bh)) { 668 if (ext4_xattr_check_block(bs->bh)) {
668 ext4_error(sb, __func__, 669 ext4_error(sb, "inode %lu: bad block %llu",
669 "inode %lu: bad block %llu", inode->i_ino, 670 inode->i_ino, EXT4_I(inode)->i_file_acl);
670 EXT4_I(inode)->i_file_acl);
671 error = -EIO; 671 error = -EIO;
672 goto cleanup; 672 goto cleanup;
673 } 673 }
@@ -787,8 +787,8 @@ inserted:
787 else { 787 else {
788 /* The old block is released after updating 788 /* The old block is released after updating
789 the inode. */ 789 the inode. */
790 error = -EDQUOT; 790 error = dquot_alloc_block(inode, 1);
791 if (vfs_dq_alloc_block(inode, 1)) 791 if (error)
792 goto cleanup; 792 goto cleanup;
793 error = ext4_journal_get_write_access(handle, 793 error = ext4_journal_get_write_access(handle,
794 new_bh); 794 new_bh);
@@ -876,13 +876,12 @@ cleanup:
876 return error; 876 return error;
877 877
878cleanup_dquot: 878cleanup_dquot:
879 vfs_dq_free_block(inode, 1); 879 dquot_free_block(inode, 1);
880 goto cleanup; 880 goto cleanup;
881 881
882bad_block: 882bad_block:
883 ext4_error(inode->i_sb, __func__, 883 ext4_error(inode->i_sb, "inode %lu: bad block %llu",
884 "inode %lu: bad block %llu", inode->i_ino, 884 inode->i_ino, EXT4_I(inode)->i_file_acl);
885 EXT4_I(inode)->i_file_acl);
886 goto cleanup; 885 goto cleanup;
887 886
888#undef header 887#undef header
@@ -908,7 +907,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
908 is->s.base = is->s.first = IFIRST(header); 907 is->s.base = is->s.first = IFIRST(header);
909 is->s.here = is->s.first; 908 is->s.here = is->s.first;
910 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 909 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
911 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { 910 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
912 error = ext4_xattr_check_names(IFIRST(header), is->s.end); 911 error = ext4_xattr_check_names(IFIRST(header), is->s.end);
913 if (error) 912 if (error)
914 return error; 913 return error;
@@ -940,10 +939,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
940 header = IHDR(inode, ext4_raw_inode(&is->iloc)); 939 header = IHDR(inode, ext4_raw_inode(&is->iloc));
941 if (!IS_LAST_ENTRY(s->first)) { 940 if (!IS_LAST_ENTRY(s->first)) {
942 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); 941 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
943 EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; 942 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
944 } else { 943 } else {
945 header->h_magic = cpu_to_le32(0); 944 header->h_magic = cpu_to_le32(0);
946 EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; 945 ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
947 } 946 }
948 return 0; 947 return 0;
949} 948}
@@ -986,8 +985,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
986 if (strlen(name) > 255) 985 if (strlen(name) > 255)
987 return -ERANGE; 986 return -ERANGE;
988 down_write(&EXT4_I(inode)->xattr_sem); 987 down_write(&EXT4_I(inode)->xattr_sem);
989 no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; 988 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
990 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 989 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
991 990
992 error = ext4_get_inode_loc(inode, &is.iloc); 991 error = ext4_get_inode_loc(inode, &is.iloc);
993 if (error) 992 if (error)
@@ -997,10 +996,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
997 if (error) 996 if (error)
998 goto cleanup; 997 goto cleanup;
999 998
1000 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { 999 if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1001 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); 1000 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
1002 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 1001 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1003 EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; 1002 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1004 } 1003 }
1005 1004
1006 error = ext4_xattr_ibody_find(inode, &i, &is); 1005 error = ext4_xattr_ibody_find(inode, &i, &is);
@@ -1052,7 +1051,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1052 ext4_xattr_update_super_block(handle, inode->i_sb); 1051 ext4_xattr_update_super_block(handle, inode->i_sb);
1053 inode->i_ctime = ext4_current_time(inode); 1052 inode->i_ctime = ext4_current_time(inode);
1054 if (!value) 1053 if (!value)
1055 EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; 1054 ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1056 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 1055 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1057 /* 1056 /*
1058 * The bh is consumed by ext4_mark_iloc_dirty, even with 1057 * The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1067,7 +1066,7 @@ cleanup:
1067 brelse(is.iloc.bh); 1066 brelse(is.iloc.bh);
1068 brelse(bs.bh); 1067 brelse(bs.bh);
1069 if (no_expand == 0) 1068 if (no_expand == 0)
1070 EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; 1069 ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1071 up_write(&EXT4_I(inode)->xattr_sem); 1070 up_write(&EXT4_I(inode)->xattr_sem);
1072 return error; 1071 return error;
1073} 1072}
@@ -1195,9 +1194,8 @@ retry:
1195 if (!bh) 1194 if (!bh)
1196 goto cleanup; 1195 goto cleanup;
1197 if (ext4_xattr_check_block(bh)) { 1196 if (ext4_xattr_check_block(bh)) {
1198 ext4_error(inode->i_sb, __func__, 1197 ext4_error(inode->i_sb, "inode %lu: bad block %llu",
1199 "inode %lu: bad block %llu", inode->i_ino, 1198 inode->i_ino, EXT4_I(inode)->i_file_acl);
1200 EXT4_I(inode)->i_file_acl);
1201 error = -EIO; 1199 error = -EIO;
1202 goto cleanup; 1200 goto cleanup;
1203 } 1201 }
@@ -1302,6 +1300,8 @@ retry:
1302 1300
1303 /* Remove the chosen entry from the inode */ 1301 /* Remove the chosen entry from the inode */
1304 error = ext4_xattr_ibody_set(handle, inode, &i, is); 1302 error = ext4_xattr_ibody_set(handle, inode, &i, is);
1303 if (error)
1304 goto cleanup;
1305 1305
1306 entry = IFIRST(header); 1306 entry = IFIRST(header);
1307 if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) 1307 if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
@@ -1372,16 +1372,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1372 goto cleanup; 1372 goto cleanup;
1373 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1373 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1374 if (!bh) { 1374 if (!bh) {
1375 ext4_error(inode->i_sb, __func__, 1375 ext4_error(inode->i_sb, "inode %lu: block %llu read error",
1376 "inode %lu: block %llu read error", inode->i_ino, 1376 inode->i_ino, EXT4_I(inode)->i_file_acl);
1377 EXT4_I(inode)->i_file_acl);
1378 goto cleanup; 1377 goto cleanup;
1379 } 1378 }
1380 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1379 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1381 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1380 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1382 ext4_error(inode->i_sb, __func__, 1381 ext4_error(inode->i_sb, "inode %lu: bad block %llu",
1383 "inode %lu: bad block %llu", inode->i_ino, 1382 inode->i_ino, EXT4_I(inode)->i_file_acl);
1384 EXT4_I(inode)->i_file_acl);
1385 goto cleanup; 1383 goto cleanup;
1386 } 1384 }
1387 ext4_xattr_release_block(handle, inode, bh); 1385 ext4_xattr_release_block(handle, inode, bh);
@@ -1506,7 +1504,7 @@ again:
1506 } 1504 }
1507 bh = sb_bread(inode->i_sb, ce->e_block); 1505 bh = sb_bread(inode->i_sb, ce->e_block);
1508 if (!bh) { 1506 if (!bh) {
1509 ext4_error(inode->i_sb, __func__, 1507 ext4_error(inode->i_sb,
1510 "inode %lu: block %lu read error", 1508 "inode %lu: block %lu read error",
1511 inode->i_ino, (unsigned long) ce->e_block); 1509 inode->i_ino, (unsigned long) ce->e_block);
1512 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1510 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 14da530b05ca..fbeecdc194dc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -577,7 +577,7 @@ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
577 return i_pos; 577 return i_pos;
578} 578}
579 579
580static int fat_write_inode(struct inode *inode, int wait) 580static int __fat_write_inode(struct inode *inode, int wait)
581{ 581{
582 struct super_block *sb = inode->i_sb; 582 struct super_block *sb = inode->i_sb;
583 struct msdos_sb_info *sbi = MSDOS_SB(sb); 583 struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -634,9 +634,14 @@ retry:
634 return err; 634 return err;
635} 635}
636 636
637static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
638{
639 return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
640}
641
637int fat_sync_inode(struct inode *inode) 642int fat_sync_inode(struct inode *inode)
638{ 643{
639 return fat_write_inode(inode, 1); 644 return __fat_write_inode(inode, 1);
640} 645}
641 646
642EXPORT_SYMBOL_GPL(fat_sync_inode); 647EXPORT_SYMBOL_GPL(fat_sync_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1a7c42c64ff4..76fc4d594acb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
381 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 381 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
382} 382}
383 383
384static int write_inode(struct inode *inode, int sync) 384static int write_inode(struct inode *inode, struct writeback_control *wbc)
385{ 385{
386 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 386 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
387 return inode->i_sb->s_op->write_inode(inode, sync); 387 return inode->i_sb->s_op->write_inode(inode, wbc);
388 return 0; 388 return 0;
389} 389}
390 390
@@ -421,7 +421,6 @@ static int
421writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 421writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
422{ 422{
423 struct address_space *mapping = inode->i_mapping; 423 struct address_space *mapping = inode->i_mapping;
424 int wait = wbc->sync_mode == WB_SYNC_ALL;
425 unsigned dirty; 424 unsigned dirty;
426 int ret; 425 int ret;
427 426
@@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
439 * We'll have another go at writing back this inode when we 438 * We'll have another go at writing back this inode when we
440 * completed a full scan of b_io. 439 * completed a full scan of b_io.
441 */ 440 */
442 if (!wait) { 441 if (wbc->sync_mode != WB_SYNC_ALL) {
443 requeue_io(inode); 442 requeue_io(inode);
444 return 0; 443 return 0;
445 } 444 }
@@ -461,15 +460,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
461 460
462 ret = do_writepages(mapping, wbc); 461 ret = do_writepages(mapping, wbc);
463 462
464 /* Don't write the inode if only I_DIRTY_PAGES was set */ 463 /*
465 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 464 * Make sure to wait on the data before writing out the metadata.
466 int err = write_inode(inode, wait); 465 * This is important for filesystems that modify metadata on data
466 * I/O completion.
467 */
468 if (wbc->sync_mode == WB_SYNC_ALL) {
469 int err = filemap_fdatawait(mapping);
467 if (ret == 0) 470 if (ret == 0)
468 ret = err; 471 ret = err;
469 } 472 }
470 473
471 if (wait) { 474 /* Don't write the inode if only I_DIRTY_PAGES was set */
472 int err = filemap_fdatawait(mapping); 475 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
476 int err = write_inode(inode, wbc);
473 if (ret == 0) 477 if (ret == 0)
474 ret = err; 478 ret = err;
475 } 479 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 84350e1be66d..4e64352d49de 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -976,122 +976,62 @@ out:
976} 976}
977 977
978/** 978/**
979 * gfs2_readlinki - return the contents of a symlink 979 * gfs2_follow_link - Follow a symbolic link
980 * @ip: the symlink's inode 980 * @dentry: The dentry of the link
981 * @buf: a pointer to the buffer to be filled 981 * @nd: Data that we pass to vfs_follow_link()
982 * @len: a pointer to the length of @buf
983 * 982 *
984 * If @buf is too small, a piece of memory is kmalloc()ed and needs 983 * This can handle symlinks of any size.
985 * to be freed by the caller.
986 * 984 *
987 * Returns: errno 985 * Returns: 0 on success or error code
988 */ 986 */
989 987
990static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) 988static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
991{ 989{
990 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
992 struct gfs2_holder i_gh; 991 struct gfs2_holder i_gh;
993 struct buffer_head *dibh; 992 struct buffer_head *dibh;
994 unsigned int x; 993 unsigned int x;
994 char *buf;
995 int error; 995 int error;
996 996
997 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 997 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
998 error = gfs2_glock_nq(&i_gh); 998 error = gfs2_glock_nq(&i_gh);
999 if (error) { 999 if (error) {
1000 gfs2_holder_uninit(&i_gh); 1000 gfs2_holder_uninit(&i_gh);
1001 return error; 1001 nd_set_link(nd, ERR_PTR(error));
1002 return NULL;
1002 } 1003 }
1003 1004
1004 if (!ip->i_disksize) { 1005 if (!ip->i_disksize) {
1005 gfs2_consist_inode(ip); 1006 gfs2_consist_inode(ip);
1006 error = -EIO; 1007 buf = ERR_PTR(-EIO);
1007 goto out; 1008 goto out;
1008 } 1009 }
1009 1010
1010 error = gfs2_meta_inode_buffer(ip, &dibh); 1011 error = gfs2_meta_inode_buffer(ip, &dibh);
1011 if (error) 1012 if (error) {
1013 buf = ERR_PTR(error);
1012 goto out; 1014 goto out;
1013
1014 x = ip->i_disksize + 1;
1015 if (x > *len) {
1016 *buf = kmalloc(x, GFP_NOFS);
1017 if (!*buf) {
1018 error = -ENOMEM;
1019 goto out_brelse;
1020 }
1021 } 1015 }
1022 1016
1023 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); 1017 x = ip->i_disksize + 1;
1024 *len = x; 1018 buf = kmalloc(x, GFP_NOFS);
1025 1019 if (!buf)
1026out_brelse: 1020 buf = ERR_PTR(-ENOMEM);
1021 else
1022 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1027 brelse(dibh); 1023 brelse(dibh);
1028out: 1024out:
1029 gfs2_glock_dq_uninit(&i_gh); 1025 gfs2_glock_dq_uninit(&i_gh);
1030 return error; 1026 nd_set_link(nd, buf);
1031} 1027 return NULL;
1032
1033/**
1034 * gfs2_readlink - Read the value of a symlink
1035 * @dentry: the symlink
1036 * @buf: the buffer to read the symlink data into
1037 * @size: the size of the buffer
1038 *
1039 * Returns: errno
1040 */
1041
1042static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
1043 int user_size)
1044{
1045 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1046 char array[GFS2_FAST_NAME_SIZE], *buf = array;
1047 unsigned int len = GFS2_FAST_NAME_SIZE;
1048 int error;
1049
1050 error = gfs2_readlinki(ip, &buf, &len);
1051 if (error)
1052 return error;
1053
1054 if (user_size > len - 1)
1055 user_size = len - 1;
1056
1057 if (copy_to_user(user_buf, buf, user_size))
1058 error = -EFAULT;
1059 else
1060 error = user_size;
1061
1062 if (buf != array)
1063 kfree(buf);
1064
1065 return error;
1066} 1028}
1067 1029
1068/** 1030static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1069 * gfs2_follow_link - Follow a symbolic link
1070 * @dentry: The dentry of the link
1071 * @nd: Data that we pass to vfs_follow_link()
1072 *
1073 * This can handle symlinks of any size. It is optimised for symlinks
1074 * under GFS2_FAST_NAME_SIZE.
1075 *
1076 * Returns: 0 on success or error code
1077 */
1078
1079static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1080{ 1031{
1081 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 1032 char *s = nd_get_link(nd);
1082 char array[GFS2_FAST_NAME_SIZE], *buf = array; 1033 if (!IS_ERR(s))
1083 unsigned int len = GFS2_FAST_NAME_SIZE; 1034 kfree(s);
1084 int error;
1085
1086 error = gfs2_readlinki(ip, &buf, &len);
1087 if (!error) {
1088 error = vfs_follow_link(nd, buf);
1089 if (buf != array)
1090 kfree(buf);
1091 } else
1092 path_put(&nd->path);
1093
1094 return ERR_PTR(error);
1095} 1035}
1096 1036
1097/** 1037/**
@@ -1426,8 +1366,9 @@ const struct inode_operations gfs2_dir_iops = {
1426}; 1366};
1427 1367
1428const struct inode_operations gfs2_symlink_iops = { 1368const struct inode_operations gfs2_symlink_iops = {
1429 .readlink = gfs2_readlink, 1369 .readlink = generic_readlink,
1430 .follow_link = gfs2_follow_link, 1370 .follow_link = gfs2_follow_link,
1371 .put_link = gfs2_put_link,
1431 .permission = gfs2_permission, 1372 .permission = gfs2_permission,
1432 .setattr = gfs2_setattr, 1373 .setattr = gfs2_setattr,
1433 .getattr = gfs2_getattr, 1374 .getattr = gfs2_getattr,
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e3bf6eab8750..6dbcbad6ab17 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1083,7 +1083,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1083 } 1083 }
1084} 1084}
1085 1085
1086int gfs2_quota_sync(struct super_block *sb, int type) 1086int gfs2_quota_sync(struct super_block *sb, int type, int wait)
1087{ 1087{
1088 struct gfs2_sbd *sdp = sb->s_fs_info; 1088 struct gfs2_sbd *sdp = sb->s_fs_info;
1089 struct gfs2_quota_data **qda; 1089 struct gfs2_quota_data **qda;
@@ -1127,6 +1127,11 @@ int gfs2_quota_sync(struct super_block *sb, int type)
1127 return error; 1127 return error;
1128} 1128}
1129 1129
1130static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
1131{
1132 return gfs2_quota_sync(sb, type, 0);
1133}
1134
1130int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) 1135int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1131{ 1136{
1132 struct gfs2_quota_data *qd; 1137 struct gfs2_quota_data *qd;
@@ -1382,7 +1387,7 @@ int gfs2_quotad(void *data)
1382 &tune->gt_statfs_quantum); 1387 &tune->gt_statfs_quantum);
1383 1388
1384 /* Update quota file */ 1389 /* Update quota file */
1385 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, 1390 quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t,
1386 &quotad_timeo, &tune->gt_quota_quantum); 1391 &quotad_timeo, &tune->gt_quota_quantum);
1387 1392
1388 /* Check for & recover partially truncated inodes */ 1393 /* Check for & recover partially truncated inodes */
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e271fa07ad02..195f60c8bd14 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -25,7 +25,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
25extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 25extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
26 u32 uid, u32 gid); 26 u32 uid, u32 gid);
27 27
28extern int gfs2_quota_sync(struct super_block *sb, int type); 28extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
29extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); 29extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
30 30
31extern int gfs2_quota_init(struct gfs2_sbd *sdp); 31extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e5e22629da67..50aac606b990 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -22,6 +22,7 @@
22#include <linux/crc32.h> 22#include <linux/crc32.h>
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h> 24#include <linux/wait.h>
25#include <linux/writeback.h>
25 26
26#include "gfs2.h" 27#include "gfs2.h"
27#include "incore.h" 28#include "incore.h"
@@ -711,7 +712,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
711 * Returns: errno 712 * Returns: errno
712 */ 713 */
713 714
714static int gfs2_write_inode(struct inode *inode, int sync) 715static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
715{ 716{
716 struct gfs2_inode *ip = GFS2_I(inode); 717 struct gfs2_inode *ip = GFS2_I(inode);
717 struct gfs2_sbd *sdp = GFS2_SB(inode); 718 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -745,7 +746,7 @@ static int gfs2_write_inode(struct inode *inode, int sync)
745do_unlock: 746do_unlock:
746 gfs2_glock_dq_uninit(&gh); 747 gfs2_glock_dq_uninit(&gh);
747do_flush: 748do_flush:
748 if (sync != 0) 749 if (wbc->sync_mode == WB_SYNC_ALL)
749 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 750 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
750 return ret; 751 return ret;
751} 752}
@@ -763,7 +764,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
763 int error; 764 int error;
764 765
765 flush_workqueue(gfs2_delete_workqueue); 766 flush_workqueue(gfs2_delete_workqueue);
766 gfs2_quota_sync(sdp->sd_vfs, 0); 767 gfs2_quota_sync(sdp->sd_vfs, 0, 1);
767 gfs2_statfs_sync(sdp->sd_vfs, 0); 768 gfs2_statfs_sync(sdp->sd_vfs, 0);
768 769
769 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, 770 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index a0db1c94317d..b5f1a46133c8 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -167,7 +167,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
167 if (simple_strtol(buf, NULL, 0) != 1) 167 if (simple_strtol(buf, NULL, 0) != 1)
168 return -EINVAL; 168 return -EINVAL;
169 169
170 gfs2_quota_sync(sdp->sd_vfs, 0); 170 gfs2_quota_sync(sdp->sd_vfs, 0, 1);
171 return len; 171 return len;
172} 172}
173 173
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 052387e11671..fe35e3b626c4 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -188,7 +188,7 @@ extern const struct address_space_operations hfs_btree_aops;
188 188
189extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); 189extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
190extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); 190extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
191extern int hfs_write_inode(struct inode *, int); 191extern int hfs_write_inode(struct inode *, struct writeback_control *);
192extern int hfs_inode_setattr(struct dentry *, struct iattr *); 192extern int hfs_inode_setattr(struct dentry *, struct iattr *);
193extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, 193extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
194 __be32 log_size, __be32 phys_size, u32 clump_size); 194 __be32 log_size, __be32 phys_size, u32 clump_size);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1cbff2b4d99..14f5cb1b9fdc 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext,
381 HFS_SB(inode->i_sb)->alloc_blksz); 381 HFS_SB(inode->i_sb)->alloc_blksz);
382} 382}
383 383
384int hfs_write_inode(struct inode *inode, int unused) 384int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
385{ 385{
386 struct inode *main_inode = inode; 386 struct inode *main_inode = inode;
387 struct hfs_find_data fd; 387 struct hfs_find_data fd;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 43022f3d5148..74b473a8ef92 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -87,7 +87,8 @@ bad_inode:
87 return ERR_PTR(err); 87 return ERR_PTR(err);
88} 88}
89 89
90static int hfsplus_write_inode(struct inode *inode, int unused) 90static int hfsplus_write_inode(struct inode *inode,
91 struct writeback_control *wbc)
91{ 92{
92 struct hfsplus_vh *vhdr; 93 struct hfsplus_vh *vhdr;
93 int ret = 0; 94 int ret = 0;
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 1aa88c4e0964..6a2f04bf3df0 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -353,7 +353,7 @@ int hpfs_ea_read(struct super_block *s, secno a, int ano, unsigned pos,
353} 353}
354 354
355int hpfs_ea_write(struct super_block *s, secno a, int ano, unsigned pos, 355int hpfs_ea_write(struct super_block *s, secno a, int ano, unsigned pos,
356 unsigned len, char *buf) 356 unsigned len, const char *buf)
357{ 357{
358 struct buffer_head *bh; 358 struct buffer_head *bh;
359 char *data; 359 char *data;
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 940d6d150bee..67d9d36b3d5f 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -20,8 +20,8 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
20 20
21 if (l == 1) if (qstr->name[0]=='.') goto x; 21 if (l == 1) if (qstr->name[0]=='.') goto x;
22 if (l == 2) if (qstr->name[0]=='.' || qstr->name[1]=='.') goto x; 22 if (l == 2) if (qstr->name[0]=='.' || qstr->name[1]=='.') goto x;
23 hpfs_adjust_length((char *)qstr->name, &l); 23 hpfs_adjust_length(qstr->name, &l);
24 /*if (hpfs_chk_name((char *)qstr->name,&l))*/ 24 /*if (hpfs_chk_name(qstr->name,&l))*/
25 /*return -ENAMETOOLONG;*/ 25 /*return -ENAMETOOLONG;*/
26 /*return -ENOENT;*/ 26 /*return -ENOENT;*/
27 x: 27 x:
@@ -38,14 +38,16 @@ static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qst
38{ 38{
39 unsigned al=a->len; 39 unsigned al=a->len;
40 unsigned bl=b->len; 40 unsigned bl=b->len;
41 hpfs_adjust_length((char *)a->name, &al); 41 hpfs_adjust_length(a->name, &al);
42 /*hpfs_adjust_length((char *)b->name, &bl);*/ 42 /*hpfs_adjust_length(b->name, &bl);*/
43 /* 'a' is the qstr of an already existing dentry, so the name 43 /* 'a' is the qstr of an already existing dentry, so the name
44 * must be valid. 'b' must be validated first. 44 * must be valid. 'b' must be validated first.
45 */ 45 */
46 46
47 if (hpfs_chk_name((char *)b->name, &bl)) return 1; 47 if (hpfs_chk_name(b->name, &bl))
48 if (hpfs_compare_names(dentry->d_sb, (char *)a->name, al, (char *)b->name, bl, 0)) return 1; 48 return 1;
49 if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0))
50 return 1;
49 return 0; 51 return 0;
50} 52}
51 53
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 8865c94f55f6..26e3964a4b8c 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -59,7 +59,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
59 struct hpfs_dirent *de; 59 struct hpfs_dirent *de;
60 int lc; 60 int lc;
61 long old_pos; 61 long old_pos;
62 char *tempname; 62 unsigned char *tempname;
63 int c1, c2 = 0; 63 int c1, c2 = 0;
64 int ret = 0; 64 int ret = 0;
65 65
@@ -158,11 +158,11 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
158 tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); 158 tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
159 if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) { 159 if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) {
160 filp->f_pos = old_pos; 160 filp->f_pos = old_pos;
161 if (tempname != (char *)de->name) kfree(tempname); 161 if (tempname != de->name) kfree(tempname);
162 hpfs_brelse4(&qbh); 162 hpfs_brelse4(&qbh);
163 goto out; 163 goto out;
164 } 164 }
165 if (tempname != (char *)de->name) kfree(tempname); 165 if (tempname != de->name) kfree(tempname);
166 hpfs_brelse4(&qbh); 166 hpfs_brelse4(&qbh);
167 } 167 }
168out: 168out:
@@ -187,7 +187,7 @@ out:
187 187
188struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 188struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
189{ 189{
190 const char *name = dentry->d_name.name; 190 const unsigned char *name = dentry->d_name.name;
191 unsigned len = dentry->d_name.len; 191 unsigned len = dentry->d_name.len;
192 struct quad_buffer_head qbh; 192 struct quad_buffer_head qbh;
193 struct hpfs_dirent *de; 193 struct hpfs_dirent *de;
@@ -197,7 +197,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
197 struct hpfs_inode_info *hpfs_result; 197 struct hpfs_inode_info *hpfs_result;
198 198
199 lock_kernel(); 199 lock_kernel();
200 if ((err = hpfs_chk_name((char *)name, &len))) { 200 if ((err = hpfs_chk_name(name, &len))) {
201 if (err == -ENAMETOOLONG) { 201 if (err == -ENAMETOOLONG) {
202 unlock_kernel(); 202 unlock_kernel();
203 return ERR_PTR(-ENAMETOOLONG); 203 return ERR_PTR(-ENAMETOOLONG);
@@ -209,7 +209,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
209 * '.' and '..' will never be passed here. 209 * '.' and '..' will never be passed here.
210 */ 210 */
211 211
212 de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *) name, len, NULL, &qbh); 212 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, NULL, &qbh);
213 213
214 /* 214 /*
215 * This is not really a bailout, just means file not found. 215 * This is not really a bailout, just means file not found.
@@ -250,7 +250,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
250 hpfs_result = hpfs_i(result); 250 hpfs_result = hpfs_i(result);
251 if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino; 251 if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
252 252
253 hpfs_decide_conv(result, (char *)name, len); 253 hpfs_decide_conv(result, name, len);
254 254
255 if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) { 255 if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
256 hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); 256 hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index fe83c2b7d2d8..9b2ffadfc8c4 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -158,7 +158,8 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
158 158
159/* Add an entry to dnode and don't care if it grows over 2048 bytes */ 159/* Add an entry to dnode and don't care if it grows over 2048 bytes */
160 160
161struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, unsigned char *name, 161struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
162 const unsigned char *name,
162 unsigned namelen, secno down_ptr) 163 unsigned namelen, secno down_ptr)
163{ 164{
164 struct hpfs_dirent *de; 165 struct hpfs_dirent *de;
@@ -223,7 +224,7 @@ static void fix_up_ptrs(struct super_block *s, struct dnode *d)
223/* Add an entry to dnode and do dnode splitting if required */ 224/* Add an entry to dnode and do dnode splitting if required */
224 225
225static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, 226static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
226 unsigned char *name, unsigned namelen, 227 const unsigned char *name, unsigned namelen,
227 struct hpfs_dirent *new_de, dnode_secno down_ptr) 228 struct hpfs_dirent *new_de, dnode_secno down_ptr)
228{ 229{
229 struct quad_buffer_head qbh, qbh1, qbh2; 230 struct quad_buffer_head qbh, qbh1, qbh2;
@@ -231,7 +232,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
231 dnode_secno adno, rdno; 232 dnode_secno adno, rdno;
232 struct hpfs_dirent *de; 233 struct hpfs_dirent *de;
233 struct hpfs_dirent nde; 234 struct hpfs_dirent nde;
234 char *nname; 235 unsigned char *nname;
235 int h; 236 int h;
236 int pos; 237 int pos;
237 struct buffer_head *bh; 238 struct buffer_head *bh;
@@ -305,7 +306,9 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
305 pos++; 306 pos++;
306 } 307 }
307 copy_de(new_de = &nde, de); 308 copy_de(new_de = &nde, de);
308 memcpy(name = nname, de->name, namelen = de->namelen); 309 memcpy(nname, de->name, de->namelen);
310 name = nname;
311 namelen = de->namelen;
309 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | pos, 4); 312 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | pos, 4);
310 down_ptr = adno; 313 down_ptr = adno;
311 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); 314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
@@ -368,7 +371,8 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
368 * I hope, now it's finally bug-free. 371 * I hope, now it's finally bug-free.
369 */ 372 */
370 373
371int hpfs_add_dirent(struct inode *i, unsigned char *name, unsigned namelen, 374int hpfs_add_dirent(struct inode *i,
375 const unsigned char *name, unsigned namelen,
372 struct hpfs_dirent *new_de, int cdepth) 376 struct hpfs_dirent *new_de, int cdepth)
373{ 377{
374 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 378 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
@@ -897,7 +901,8 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
897 901
898/* Find a dirent in tree */ 902/* Find a dirent in tree */
899 903
900struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, char *name, unsigned len, 904struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
905 const unsigned char *name, unsigned len,
901 dnode_secno *dd, struct quad_buffer_head *qbh) 906 dnode_secno *dd, struct quad_buffer_head *qbh)
902{ 907{
903 struct dnode *dnode; 908 struct dnode *dnode;
@@ -988,8 +993,8 @@ void hpfs_remove_dtree(struct super_block *s, dnode_secno dno)
988struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, 993struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
989 struct fnode *f, struct quad_buffer_head *qbh) 994 struct fnode *f, struct quad_buffer_head *qbh)
990{ 995{
991 char *name1; 996 unsigned char *name1;
992 char *name2; 997 unsigned char *name2;
993 int name1len, name2len; 998 int name1len, name2len;
994 struct dnode *d; 999 struct dnode *d;
995 dnode_secno dno, downd; 1000 dnode_secno dno, downd;
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 547a8384571f..45e53d972b42 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -62,8 +62,8 @@ static char *get_indirect_ea(struct super_block *s, int ano, secno a, int size)
62 return ret; 62 return ret;
63} 63}
64 64
65static void set_indirect_ea(struct super_block *s, int ano, secno a, char *data, 65static void set_indirect_ea(struct super_block *s, int ano, secno a,
66 int size) 66 const char *data, int size)
67{ 67{
68 hpfs_ea_write(s, a, ano, 0, size, data); 68 hpfs_ea_write(s, a, ano, 0, size, data);
69} 69}
@@ -186,7 +186,8 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
186 * This driver can't change sizes of eas ('cause I just don't need it). 186 * This driver can't change sizes of eas ('cause I just don't need it).
187 */ 187 */
188 188
189void hpfs_set_ea(struct inode *inode, struct fnode *fnode, char *key, char *data, int size) 189void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
190 const char *data, int size)
190{ 191{
191 fnode_secno fno = inode->i_ino; 192 fnode_secno fno = inode->i_ino;
192 struct super_block *s = inode->i_sb; 193 struct super_block *s = inode->i_sb;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 701ca54c0867..97bf738cd5d6 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -215,7 +215,7 @@ secno hpfs_bplus_lookup(struct super_block *, struct inode *, struct bplus_heade
215secno hpfs_add_sector_to_btree(struct super_block *, secno, int, unsigned); 215secno hpfs_add_sector_to_btree(struct super_block *, secno, int, unsigned);
216void hpfs_remove_btree(struct super_block *, struct bplus_header *); 216void hpfs_remove_btree(struct super_block *, struct bplus_header *);
217int hpfs_ea_read(struct super_block *, secno, int, unsigned, unsigned, char *); 217int hpfs_ea_read(struct super_block *, secno, int, unsigned, unsigned, char *);
218int hpfs_ea_write(struct super_block *, secno, int, unsigned, unsigned, char *); 218int hpfs_ea_write(struct super_block *, secno, int, unsigned, unsigned, const char *);
219void hpfs_ea_remove(struct super_block *, secno, int, unsigned); 219void hpfs_ea_remove(struct super_block *, secno, int, unsigned);
220void hpfs_truncate_btree(struct super_block *, secno, int, unsigned); 220void hpfs_truncate_btree(struct super_block *, secno, int, unsigned);
221void hpfs_remove_fnode(struct super_block *, fnode_secno fno); 221void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
@@ -244,13 +244,17 @@ extern const struct file_operations hpfs_dir_ops;
244 244
245void hpfs_add_pos(struct inode *, loff_t *); 245void hpfs_add_pos(struct inode *, loff_t *);
246void hpfs_del_pos(struct inode *, loff_t *); 246void hpfs_del_pos(struct inode *, loff_t *);
247struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, unsigned char *, unsigned, secno); 247struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
248int hpfs_add_dirent(struct inode *, unsigned char *, unsigned, struct hpfs_dirent *, int); 248 const unsigned char *, unsigned, secno);
249int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
250 struct hpfs_dirent *, int);
249int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int); 251int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
250void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *); 252void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
251dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno); 253dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
252struct hpfs_dirent *map_pos_dirent(struct inode *, loff_t *, struct quad_buffer_head *); 254struct hpfs_dirent *map_pos_dirent(struct inode *, loff_t *, struct quad_buffer_head *);
253struct hpfs_dirent *map_dirent(struct inode *, dnode_secno, char *, unsigned, dnode_secno *, struct quad_buffer_head *); 255struct hpfs_dirent *map_dirent(struct inode *, dnode_secno,
256 const unsigned char *, unsigned, dnode_secno *,
257 struct quad_buffer_head *);
254void hpfs_remove_dtree(struct super_block *, dnode_secno); 258void hpfs_remove_dtree(struct super_block *, dnode_secno);
255struct hpfs_dirent *map_fnode_dirent(struct super_block *, fnode_secno, struct fnode *, struct quad_buffer_head *); 259struct hpfs_dirent *map_fnode_dirent(struct super_block *, fnode_secno, struct fnode *, struct quad_buffer_head *);
256 260
@@ -259,7 +263,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *, fnode_secno, struct f
259void hpfs_ea_ext_remove(struct super_block *, secno, int, unsigned); 263void hpfs_ea_ext_remove(struct super_block *, secno, int, unsigned);
260int hpfs_read_ea(struct super_block *, struct fnode *, char *, char *, int); 264int hpfs_read_ea(struct super_block *, struct fnode *, char *, char *, int);
261char *hpfs_get_ea(struct super_block *, struct fnode *, char *, int *); 265char *hpfs_get_ea(struct super_block *, struct fnode *, char *, int *);
262void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int); 266void hpfs_set_ea(struct inode *, struct fnode *, const char *,
267 const char *, int);
263 268
264/* file.c */ 269/* file.c */
265 270
@@ -282,7 +287,7 @@ void hpfs_delete_inode(struct inode *);
282 287
283unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); 288unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *);
284unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); 289unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *);
285char *hpfs_load_code_page(struct super_block *, secno); 290unsigned char *hpfs_load_code_page(struct super_block *, secno);
286secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); 291secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp);
287struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); 292struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **);
288struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); 293struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **);
@@ -292,12 +297,13 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino);
292/* name.c */ 297/* name.c */
293 298
294unsigned char hpfs_upcase(unsigned char *, unsigned char); 299unsigned char hpfs_upcase(unsigned char *, unsigned char);
295int hpfs_chk_name(unsigned char *, unsigned *); 300int hpfs_chk_name(const unsigned char *, unsigned *);
296char *hpfs_translate_name(struct super_block *, unsigned char *, unsigned, int, int); 301unsigned char *hpfs_translate_name(struct super_block *, unsigned char *, unsigned, int, int);
297int hpfs_compare_names(struct super_block *, unsigned char *, unsigned, unsigned char *, unsigned, int); 302int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
298int hpfs_is_name_long(unsigned char *, unsigned); 303 const unsigned char *, unsigned, int);
299void hpfs_adjust_length(unsigned char *, unsigned *); 304int hpfs_is_name_long(const unsigned char *, unsigned);
300void hpfs_decide_conv(struct inode *, unsigned char *, unsigned); 305void hpfs_adjust_length(const unsigned char *, unsigned *);
306void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
301 307
302/* namei.c */ 308/* namei.c */
303 309
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index fe703ae46bc7..ff90affb94e1 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -46,7 +46,7 @@ void hpfs_read_inode(struct inode *i)
46 struct fnode *fnode; 46 struct fnode *fnode;
47 struct super_block *sb = i->i_sb; 47 struct super_block *sb = i->i_sb;
48 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 48 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
49 unsigned char *ea; 49 void *ea;
50 int ea_size; 50 int ea_size;
51 51
52 if (!(fnode = hpfs_map_fnode(sb, i->i_ino, &bh))) { 52 if (!(fnode = hpfs_map_fnode(sb, i->i_ino, &bh))) {
@@ -112,7 +112,7 @@ void hpfs_read_inode(struct inode *i)
112 } 112 }
113 } 113 }
114 if (fnode->dirflag) { 114 if (fnode->dirflag) {
115 unsigned n_dnodes, n_subdirs; 115 int n_dnodes, n_subdirs;
116 i->i_mode |= S_IFDIR; 116 i->i_mode |= S_IFDIR;
117 i->i_op = &hpfs_dir_iops; 117 i->i_op = &hpfs_dir_iops;
118 i->i_fop = &hpfs_dir_ops; 118 i->i_fop = &hpfs_dir_ops;
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index c4724589b2eb..840d033ecee8 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -35,7 +35,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
35 * lowercasing table 35 * lowercasing table
36 */ 36 */
37 37
38char *hpfs_load_code_page(struct super_block *s, secno cps) 38unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
39{ 39{
40 struct buffer_head *bh; 40 struct buffer_head *bh;
41 secno cpds; 41 secno cpds;
@@ -71,7 +71,7 @@ char *hpfs_load_code_page(struct super_block *s, secno cps)
71 brelse(bh); 71 brelse(bh);
72 return NULL; 72 return NULL;
73 } 73 }
74 ptr = (char *)cpd + cpd->offs[cpi] + 6; 74 ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6;
75 if (!(cp_table = kmalloc(256, GFP_KERNEL))) { 75 if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
76 printk("HPFS: out of memory for code page table\n"); 76 printk("HPFS: out of memory for code page table\n");
77 brelse(bh); 77 brelse(bh);
@@ -217,7 +217,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
217 if ((dnode = hpfs_map_4sectors(s, secno, qbh, DNODE_RD_AHEAD))) 217 if ((dnode = hpfs_map_4sectors(s, secno, qbh, DNODE_RD_AHEAD)))
218 if (hpfs_sb(s)->sb_chk) { 218 if (hpfs_sb(s)->sb_chk) {
219 unsigned p, pp = 0; 219 unsigned p, pp = 0;
220 unsigned char *d = (char *)dnode; 220 unsigned char *d = (unsigned char *)dnode;
221 int b = 0; 221 int b = 0;
222 if (dnode->magic != DNODE_MAGIC) { 222 if (dnode->magic != DNODE_MAGIC) {
223 hpfs_error(s, "bad magic on dnode %08x", secno); 223 hpfs_error(s, "bad magic on dnode %08x", secno);
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index 1f4a964384eb..f24736d7a439 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,16 +8,16 @@
8 8
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static char *text_postfix[]={ 11static const char *text_postfix[]={
12".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF", 12".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
13".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS", 13".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
14".RC", ".TEX", ".TXT", ".Y", ""}; 14".RC", ".TEX", ".TXT", ".Y", ""};
15 15
16static char *text_prefix[]={ 16static const char *text_prefix[]={
17"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ", 17"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
18"MAKEFILE", "READ.ME", "README", "TERMCAP", ""}; 18"MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
19 19
20void hpfs_decide_conv(struct inode *inode, unsigned char *name, unsigned len) 20void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
21{ 21{
22 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); 22 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
23 int i; 23 int i;
@@ -71,7 +71,7 @@ static inline unsigned char locase(unsigned char *dir, unsigned char a)
71 return dir[a]; 71 return dir[a];
72} 72}
73 73
74int hpfs_chk_name(unsigned char *name, unsigned *len) 74int hpfs_chk_name(const unsigned char *name, unsigned *len)
75{ 75{
76 int i; 76 int i;
77 if (*len > 254) return -ENAMETOOLONG; 77 if (*len > 254) return -ENAMETOOLONG;
@@ -83,10 +83,10 @@ int hpfs_chk_name(unsigned char *name, unsigned *len)
83 return 0; 83 return 0;
84} 84}
85 85
86char *hpfs_translate_name(struct super_block *s, unsigned char *from, 86unsigned char *hpfs_translate_name(struct super_block *s, unsigned char *from,
87 unsigned len, int lc, int lng) 87 unsigned len, int lc, int lng)
88{ 88{
89 char *to; 89 unsigned char *to;
90 int i; 90 int i;
91 if (hpfs_sb(s)->sb_chk >= 2) if (hpfs_is_name_long(from, len) != lng) { 91 if (hpfs_sb(s)->sb_chk >= 2) if (hpfs_is_name_long(from, len) != lng) {
92 printk("HPFS: Long name flag mismatch - name "); 92 printk("HPFS: Long name flag mismatch - name ");
@@ -103,8 +103,9 @@ char *hpfs_translate_name(struct super_block *s, unsigned char *from,
103 return to; 103 return to;
104} 104}
105 105
106int hpfs_compare_names(struct super_block *s, unsigned char *n1, unsigned l1, 106int hpfs_compare_names(struct super_block *s,
107 unsigned char *n2, unsigned l2, int last) 107 const unsigned char *n1, unsigned l1,
108 const unsigned char *n2, unsigned l2, int last)
108{ 109{
109 unsigned l = l1 < l2 ? l1 : l2; 110 unsigned l = l1 < l2 ? l1 : l2;
110 unsigned i; 111 unsigned i;
@@ -120,7 +121,7 @@ int hpfs_compare_names(struct super_block *s, unsigned char *n1, unsigned l1,
120 return 0; 121 return 0;
121} 122}
122 123
123int hpfs_is_name_long(unsigned char *name, unsigned len) 124int hpfs_is_name_long(const unsigned char *name, unsigned len)
124{ 125{
125 int i,j; 126 int i,j;
126 for (i = 0; i < len && name[i] != '.'; i++) 127 for (i = 0; i < len && name[i] != '.'; i++)
@@ -134,7 +135,7 @@ int hpfs_is_name_long(unsigned char *name, unsigned len)
134 135
135/* OS/2 clears dots and spaces at the end of file name, so we have to */ 136/* OS/2 clears dots and spaces at the end of file name, so we have to */
136 137
137void hpfs_adjust_length(unsigned char *name, unsigned *len) 138void hpfs_adjust_length(const unsigned char *name, unsigned *len)
138{ 139{
139 if (!*len) return; 140 if (!*len) return;
140 if (*len == 1 && name[0] == '.') return; 141 if (*len == 1 && name[0] == '.') return;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 82b9c4ba9ed0..11c2b4080f65 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -11,7 +11,7 @@
11 11
12static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 12static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
13{ 13{
14 const char *name = dentry->d_name.name; 14 const unsigned char *name = dentry->d_name.name;
15 unsigned len = dentry->d_name.len; 15 unsigned len = dentry->d_name.len;
16 struct quad_buffer_head qbh0; 16 struct quad_buffer_head qbh0;
17 struct buffer_head *bh; 17 struct buffer_head *bh;
@@ -24,7 +24,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
24 int r; 24 int r;
25 struct hpfs_dirent dee; 25 struct hpfs_dirent dee;
26 int err; 26 int err;
27 if ((err = hpfs_chk_name((char *)name, &len))) return err==-ENOENT ? -EINVAL : err; 27 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
28 lock_kernel(); 28 lock_kernel();
29 err = -ENOSPC; 29 err = -ENOSPC;
30 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 30 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
@@ -62,7 +62,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
62 result->i_mode &= ~0222; 62 result->i_mode &= ~0222;
63 63
64 mutex_lock(&hpfs_i(dir)->i_mutex); 64 mutex_lock(&hpfs_i(dir)->i_mutex);
65 r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0); 65 r = hpfs_add_dirent(dir, name, len, &dee, 0);
66 if (r == 1) 66 if (r == 1)
67 goto bail3; 67 goto bail3;
68 if (r == -1) { 68 if (r == -1) {
@@ -121,7 +121,7 @@ bail:
121 121
122static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 122static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
123{ 123{
124 const char *name = dentry->d_name.name; 124 const unsigned char *name = dentry->d_name.name;
125 unsigned len = dentry->d_name.len; 125 unsigned len = dentry->d_name.len;
126 struct inode *result = NULL; 126 struct inode *result = NULL;
127 struct buffer_head *bh; 127 struct buffer_head *bh;
@@ -130,7 +130,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
130 int r; 130 int r;
131 struct hpfs_dirent dee; 131 struct hpfs_dirent dee;
132 int err; 132 int err;
133 if ((err = hpfs_chk_name((char *)name, &len))) 133 if ((err = hpfs_chk_name(name, &len)))
134 return err==-ENOENT ? -EINVAL : err; 134 return err==-ENOENT ? -EINVAL : err;
135 lock_kernel(); 135 lock_kernel();
136 err = -ENOSPC; 136 err = -ENOSPC;
@@ -155,7 +155,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
155 result->i_op = &hpfs_file_iops; 155 result->i_op = &hpfs_file_iops;
156 result->i_fop = &hpfs_file_ops; 156 result->i_fop = &hpfs_file_ops;
157 result->i_nlink = 1; 157 result->i_nlink = 1;
158 hpfs_decide_conv(result, (char *)name, len); 158 hpfs_decide_conv(result, name, len);
159 hpfs_i(result)->i_parent_dir = dir->i_ino; 159 hpfs_i(result)->i_parent_dir = dir->i_ino;
160 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 160 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
161 result->i_ctime.tv_nsec = 0; 161 result->i_ctime.tv_nsec = 0;
@@ -170,7 +170,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
170 hpfs_i(result)->mmu_private = 0; 170 hpfs_i(result)->mmu_private = 0;
171 171
172 mutex_lock(&hpfs_i(dir)->i_mutex); 172 mutex_lock(&hpfs_i(dir)->i_mutex);
173 r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0); 173 r = hpfs_add_dirent(dir, name, len, &dee, 0);
174 if (r == 1) 174 if (r == 1)
175 goto bail2; 175 goto bail2;
176 if (r == -1) { 176 if (r == -1) {
@@ -211,7 +211,7 @@ bail:
211 211
212static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 212static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
213{ 213{
214 const char *name = dentry->d_name.name; 214 const unsigned char *name = dentry->d_name.name;
215 unsigned len = dentry->d_name.len; 215 unsigned len = dentry->d_name.len;
216 struct buffer_head *bh; 216 struct buffer_head *bh;
217 struct fnode *fnode; 217 struct fnode *fnode;
@@ -220,7 +220,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
220 struct hpfs_dirent dee; 220 struct hpfs_dirent dee;
221 struct inode *result = NULL; 221 struct inode *result = NULL;
222 int err; 222 int err;
223 if ((err = hpfs_chk_name((char *)name, &len))) return err==-ENOENT ? -EINVAL : err; 223 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
224 if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM; 224 if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM;
225 if (!new_valid_dev(rdev)) 225 if (!new_valid_dev(rdev))
226 return -EINVAL; 226 return -EINVAL;
@@ -256,7 +256,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
256 init_special_inode(result, mode, rdev); 256 init_special_inode(result, mode, rdev);
257 257
258 mutex_lock(&hpfs_i(dir)->i_mutex); 258 mutex_lock(&hpfs_i(dir)->i_mutex);
259 r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0); 259 r = hpfs_add_dirent(dir, name, len, &dee, 0);
260 if (r == 1) 260 if (r == 1)
261 goto bail2; 261 goto bail2;
262 if (r == -1) { 262 if (r == -1) {
@@ -289,7 +289,7 @@ bail:
289 289
290static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *symlink) 290static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *symlink)
291{ 291{
292 const char *name = dentry->d_name.name; 292 const unsigned char *name = dentry->d_name.name;
293 unsigned len = dentry->d_name.len; 293 unsigned len = dentry->d_name.len;
294 struct buffer_head *bh; 294 struct buffer_head *bh;
295 struct fnode *fnode; 295 struct fnode *fnode;
@@ -298,7 +298,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
298 struct hpfs_dirent dee; 298 struct hpfs_dirent dee;
299 struct inode *result; 299 struct inode *result;
300 int err; 300 int err;
301 if ((err = hpfs_chk_name((char *)name, &len))) return err==-ENOENT ? -EINVAL : err; 301 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
302 lock_kernel(); 302 lock_kernel();
303 if (hpfs_sb(dir->i_sb)->sb_eas < 2) { 303 if (hpfs_sb(dir->i_sb)->sb_eas < 2) {
304 unlock_kernel(); 304 unlock_kernel();
@@ -335,7 +335,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
335 result->i_data.a_ops = &hpfs_symlink_aops; 335 result->i_data.a_ops = &hpfs_symlink_aops;
336 336
337 mutex_lock(&hpfs_i(dir)->i_mutex); 337 mutex_lock(&hpfs_i(dir)->i_mutex);
338 r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0); 338 r = hpfs_add_dirent(dir, name, len, &dee, 0);
339 if (r == 1) 339 if (r == 1)
340 goto bail2; 340 goto bail2;
341 if (r == -1) { 341 if (r == -1) {
@@ -345,7 +345,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
345 fnode->len = len; 345 fnode->len = len;
346 memcpy(fnode->name, name, len > 15 ? 15 : len); 346 memcpy(fnode->name, name, len > 15 ? 15 : len);
347 fnode->up = dir->i_ino; 347 fnode->up = dir->i_ino;
348 hpfs_set_ea(result, fnode, "SYMLINK", (char *)symlink, strlen(symlink)); 348 hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
349 mark_buffer_dirty(bh); 349 mark_buffer_dirty(bh);
350 brelse(bh); 350 brelse(bh);
351 351
@@ -369,7 +369,7 @@ bail:
369 369
370static int hpfs_unlink(struct inode *dir, struct dentry *dentry) 370static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
371{ 371{
372 const char *name = dentry->d_name.name; 372 const unsigned char *name = dentry->d_name.name;
373 unsigned len = dentry->d_name.len; 373 unsigned len = dentry->d_name.len;
374 struct quad_buffer_head qbh; 374 struct quad_buffer_head qbh;
375 struct hpfs_dirent *de; 375 struct hpfs_dirent *de;
@@ -381,12 +381,12 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
381 int err; 381 int err;
382 382
383 lock_kernel(); 383 lock_kernel();
384 hpfs_adjust_length((char *)name, &len); 384 hpfs_adjust_length(name, &len);
385again: 385again:
386 mutex_lock(&hpfs_i(inode)->i_parent_mutex); 386 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
387 mutex_lock(&hpfs_i(dir)->i_mutex); 387 mutex_lock(&hpfs_i(dir)->i_mutex);
388 err = -ENOENT; 388 err = -ENOENT;
389 de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh); 389 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
390 if (!de) 390 if (!de)
391 goto out; 391 goto out;
392 392
@@ -413,22 +413,25 @@ again:
413 413
414 mutex_unlock(&hpfs_i(dir)->i_mutex); 414 mutex_unlock(&hpfs_i(dir)->i_mutex);
415 mutex_unlock(&hpfs_i(inode)->i_parent_mutex); 415 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
416 d_drop(dentry); 416 dentry_unhash(dentry);
417 spin_lock(&dentry->d_lock); 417 if (!d_unhashed(dentry)) {
418 if (atomic_read(&dentry->d_count) > 1 || 418 dput(dentry);
419 generic_permission(inode, MAY_WRITE, NULL) || 419 unlock_kernel();
420 return -ENOSPC;
421 }
422 if (generic_permission(inode, MAY_WRITE, NULL) ||
420 !S_ISREG(inode->i_mode) || 423 !S_ISREG(inode->i_mode) ||
421 get_write_access(inode)) { 424 get_write_access(inode)) {
422 spin_unlock(&dentry->d_lock);
423 d_rehash(dentry); 425 d_rehash(dentry);
426 dput(dentry);
424 } else { 427 } else {
425 struct iattr newattrs; 428 struct iattr newattrs;
426 spin_unlock(&dentry->d_lock);
427 /*printk("HPFS: truncating file before delete.\n");*/ 429 /*printk("HPFS: truncating file before delete.\n");*/
428 newattrs.ia_size = 0; 430 newattrs.ia_size = 0;
429 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 431 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
430 err = notify_change(dentry, &newattrs); 432 err = notify_change(dentry, &newattrs);
431 put_write_access(inode); 433 put_write_access(inode);
434 dput(dentry);
432 if (!err) 435 if (!err)
433 goto again; 436 goto again;
434 } 437 }
@@ -451,7 +454,7 @@ out:
451 454
452static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) 455static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
453{ 456{
454 const char *name = dentry->d_name.name; 457 const unsigned char *name = dentry->d_name.name;
455 unsigned len = dentry->d_name.len; 458 unsigned len = dentry->d_name.len;
456 struct quad_buffer_head qbh; 459 struct quad_buffer_head qbh;
457 struct hpfs_dirent *de; 460 struct hpfs_dirent *de;
@@ -462,12 +465,12 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
462 int err; 465 int err;
463 int r; 466 int r;
464 467
465 hpfs_adjust_length((char *)name, &len); 468 hpfs_adjust_length(name, &len);
466 lock_kernel(); 469 lock_kernel();
467 mutex_lock(&hpfs_i(inode)->i_parent_mutex); 470 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
468 mutex_lock(&hpfs_i(dir)->i_mutex); 471 mutex_lock(&hpfs_i(dir)->i_mutex);
469 err = -ENOENT; 472 err = -ENOENT;
470 de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh); 473 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
471 if (!de) 474 if (!de)
472 goto out; 475 goto out;
473 476
@@ -546,10 +549,10 @@ const struct address_space_operations hpfs_symlink_aops = {
546static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, 549static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
547 struct inode *new_dir, struct dentry *new_dentry) 550 struct inode *new_dir, struct dentry *new_dentry)
548{ 551{
549 char *old_name = (char *)old_dentry->d_name.name; 552 const unsigned char *old_name = old_dentry->d_name.name;
550 int old_len = old_dentry->d_name.len; 553 unsigned old_len = old_dentry->d_name.len;
551 char *new_name = (char *)new_dentry->d_name.name; 554 const unsigned char *new_name = new_dentry->d_name.name;
552 int new_len = new_dentry->d_name.len; 555 unsigned new_len = new_dentry->d_name.len;
553 struct inode *i = old_dentry->d_inode; 556 struct inode *i = old_dentry->d_inode;
554 struct inode *new_inode = new_dentry->d_inode; 557 struct inode *new_inode = new_dentry->d_inode;
555 struct quad_buffer_head qbh, qbh1; 558 struct quad_buffer_head qbh, qbh1;
@@ -560,9 +563,9 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
560 struct buffer_head *bh; 563 struct buffer_head *bh;
561 struct fnode *fnode; 564 struct fnode *fnode;
562 int err; 565 int err;
563 if ((err = hpfs_chk_name((char *)new_name, &new_len))) return err; 566 if ((err = hpfs_chk_name(new_name, &new_len))) return err;
564 err = 0; 567 err = 0;
565 hpfs_adjust_length((char *)old_name, &old_len); 568 hpfs_adjust_length(old_name, &old_len);
566 569
567 lock_kernel(); 570 lock_kernel();
568 /* order doesn't matter, due to VFS exclusion */ 571 /* order doesn't matter, due to VFS exclusion */
@@ -579,7 +582,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
579 goto end1; 582 goto end1;
580 } 583 }
581 584
582 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, (char *)old_name, old_len, &dno, &qbh))) { 585 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
583 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed"); 586 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed");
584 err = -ENOENT; 587 err = -ENOENT;
585 goto end1; 588 goto end1;
@@ -590,7 +593,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
590 if (new_inode) { 593 if (new_inode) {
591 int r; 594 int r;
592 if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 1)) != 2) { 595 if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 1)) != 2) {
593 if ((nde = map_dirent(new_dir, hpfs_i(new_dir)->i_dno, (char *)new_name, new_len, NULL, &qbh1))) { 596 if ((nde = map_dirent(new_dir, hpfs_i(new_dir)->i_dno, new_name, new_len, NULL, &qbh1))) {
594 clear_nlink(new_inode); 597 clear_nlink(new_inode);
595 copy_de(nde, &de); 598 copy_de(nde, &de);
596 memcpy(nde->name, new_name, new_len); 599 memcpy(nde->name, new_name, new_len);
@@ -618,7 +621,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
618 } 621 }
619 622
620 if (new_dir == old_dir) 623 if (new_dir == old_dir)
621 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, (char *)old_name, old_len, &dno, &qbh))) { 624 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
622 hpfs_unlock_creation(i->i_sb); 625 hpfs_unlock_creation(i->i_sb);
623 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2"); 626 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
624 err = -ENOENT; 627 err = -ENOENT;
@@ -648,7 +651,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
648 brelse(bh); 651 brelse(bh);
649 } 652 }
650 hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv; 653 hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
651 hpfs_decide_conv(i, (char *)new_name, new_len); 654 hpfs_decide_conv(i, new_name, new_len);
652end1: 655end1:
653 if (old_dir != new_dir) 656 if (old_dir != new_dir)
654 mutex_unlock(&hpfs_i(new_dir)->i_mutex); 657 mutex_unlock(&hpfs_i(new_dir)->i_mutex);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 7239efc690d8..2e4dfa8593da 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -718,7 +718,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
718 struct vfsmount *proc_mnt; 718 struct vfsmount *proc_mnt;
719 int err = -ENOENT; 719 int err = -ENOENT;
720 720
721 proc_mnt = do_kern_mount("proc", 0, "proc", NULL); 721 proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt);
722 if (IS_ERR(proc_mnt)) 722 if (IS_ERR(proc_mnt))
723 goto out; 723 goto out;
724 724
diff --git a/fs/inode.c b/fs/inode.c
index 03dfeb2e3928..407bf392e20a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -8,7 +8,6 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/dcache.h> 9#include <linux/dcache.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/quotaops.h>
12#include <linux/slab.h> 11#include <linux/slab.h>
13#include <linux/writeback.h> 12#include <linux/writeback.h>
14#include <linux/module.h> 13#include <linux/module.h>
@@ -314,7 +313,6 @@ void clear_inode(struct inode *inode)
314 BUG_ON(!(inode->i_state & I_FREEING)); 313 BUG_ON(!(inode->i_state & I_FREEING));
315 BUG_ON(inode->i_state & I_CLEAR); 314 BUG_ON(inode->i_state & I_CLEAR);
316 inode_sync_wait(inode); 315 inode_sync_wait(inode);
317 vfs_dq_drop(inode);
318 if (inode->i_sb->s_op->clear_inode) 316 if (inode->i_sb->s_op->clear_inode)
319 inode->i_sb->s_op->clear_inode(inode); 317 inode->i_sb->s_op->clear_inode(inode);
320 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 318 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
@@ -1211,8 +1209,6 @@ void generic_delete_inode(struct inode *inode)
1211 1209
1212 if (op->delete_inode) { 1210 if (op->delete_inode) {
1213 void (*delete)(struct inode *) = op->delete_inode; 1211 void (*delete)(struct inode *) = op->delete_inode;
1214 if (!is_bad_inode(inode))
1215 vfs_dq_init(inode);
1216 /* Filesystems implementing their own 1212 /* Filesystems implementing their own
1217 * s_op->delete_inode are required to call 1213 * s_op->delete_inode are required to call
1218 * truncate_inode_pages and clear_inode() 1214 * truncate_inode_pages and clear_inode()
diff --git a/fs/internal.h b/fs/internal.h
index e96a1667d749..8a03a5447bdf 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,6 +70,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
70 70
71extern void __init mnt_init(void); 71extern void __init mnt_init(void);
72 72
73extern spinlock_t vfsmount_lock;
74
73/* 75/*
74 * fs_struct.c 76 * fs_struct.c
75 */ 77 */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 4bd882548c45..2c90e3ef625f 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -862,12 +862,12 @@ restart_loop:
862 /* A buffer which has been freed while still being 862 /* A buffer which has been freed while still being
863 * journaled by a previous transaction may end up still 863 * journaled by a previous transaction may end up still
864 * being dirty here, but we want to avoid writing back 864 * being dirty here, but we want to avoid writing back
865 * that buffer in the future now that the last use has 865 * that buffer in the future after the "add to orphan"
866 * been committed. That's not only a performance gain, 866 * operation been committed, That's not only a performance
867 * it also stops aliasing problems if the buffer is left 867 * gain, it also stops aliasing problems if the buffer is
868 * behind for writeback and gets reallocated for another 868 * left behind for writeback and gets reallocated for another
869 * use in a different page. */ 869 * use in a different page. */
870 if (buffer_freed(bh)) { 870 if (buffer_freed(bh) && !jh->b_next_transaction) {
871 clear_buffer_freed(bh); 871 clear_buffer_freed(bh);
872 clear_buffer_jbddirty(bh); 872 clear_buffer_jbddirty(bh);
873 } 873 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 006f9ad838a2..99e9fea11077 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1864,6 +1864,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1864 if (!jh) 1864 if (!jh)
1865 goto zap_buffer_no_jh; 1865 goto zap_buffer_no_jh;
1866 1866
1867 /*
1868 * We cannot remove the buffer from checkpoint lists until the
1869 * transaction adding inode to orphan list (let's call it T)
1870 * is committed. Otherwise if the transaction changing the
1871 * buffer would be cleaned from the journal before T is
1872 * committed, a crash will cause that the correct contents of
1873 * the buffer will be lost. On the other hand we have to
1874 * clear the buffer dirty bit at latest at the moment when the
1875 * transaction marking the buffer as freed in the filesystem
1876 * structures is committed because from that moment on the
1877 * buffer can be reallocated and used by a different page.
1878 * Since the block hasn't been freed yet but the inode has
1879 * already been added to orphan list, it is safe for us to add
1880 * the buffer to BJ_Forget list of the newest transaction.
1881 */
1867 transaction = jh->b_transaction; 1882 transaction = jh->b_transaction;
1868 if (transaction == NULL) { 1883 if (transaction == NULL) {
1869 /* First case: not on any transaction. If it 1884 /* First case: not on any transaction. If it
@@ -1929,16 +1944,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1929 goto zap_buffer; 1944 goto zap_buffer;
1930 } 1945 }
1931 /* 1946 /*
1932 * If it is committing, we simply cannot touch it. We 1947 * The buffer is committing, we simply cannot touch
1933 * can remove it's next_transaction pointer from the 1948 * it. So we just set j_next_transaction to the
1934 * running transaction if that is set, but nothing 1949 * running transaction (if there is one) and mark
1935 * else. */ 1950 * buffer as freed so that commit code knows it should
1951 * clear dirty bits when it is done with the buffer.
1952 */
1936 set_buffer_freed(bh); 1953 set_buffer_freed(bh);
1937 if (jh->b_next_transaction) { 1954 if (journal->j_running_transaction && buffer_jbddirty(bh))
1938 J_ASSERT(jh->b_next_transaction == 1955 jh->b_next_transaction = journal->j_running_transaction;
1939 journal->j_running_transaction);
1940 jh->b_next_transaction = NULL;
1941 }
1942 journal_put_journal_head(jh); 1956 journal_put_journal_head(jh);
1943 spin_unlock(&journal->j_list_lock); 1957 spin_unlock(&journal->j_list_lock);
1944 jbd_unlock_bh_state(bh); 1958 jbd_unlock_bh_state(bh);
@@ -2120,7 +2134,7 @@ void journal_file_buffer(struct journal_head *jh,
2120 */ 2134 */
2121void __journal_refile_buffer(struct journal_head *jh) 2135void __journal_refile_buffer(struct journal_head *jh)
2122{ 2136{
2123 int was_dirty; 2137 int was_dirty, jlist;
2124 struct buffer_head *bh = jh2bh(jh); 2138 struct buffer_head *bh = jh2bh(jh);
2125 2139
2126 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 2140 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -2142,8 +2156,13 @@ void __journal_refile_buffer(struct journal_head *jh)
2142 __journal_temp_unlink_buffer(jh); 2156 __journal_temp_unlink_buffer(jh);
2143 jh->b_transaction = jh->b_next_transaction; 2157 jh->b_transaction = jh->b_next_transaction;
2144 jh->b_next_transaction = NULL; 2158 jh->b_next_transaction = NULL;
2145 __journal_file_buffer(jh, jh->b_transaction, 2159 if (buffer_freed(bh))
2146 jh->b_modified ? BJ_Metadata : BJ_Reserved); 2160 jlist = BJ_Forget;
2161 else if (jh->b_modified)
2162 jlist = BJ_Metadata;
2163 else
2164 jlist = BJ_Reserved;
2165 __journal_file_buffer(jh, jh->b_transaction, jlist);
2147 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2166 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2148 2167
2149 if (was_dirty) 2168 if (was_dirty)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 886849370950..30beb11ef928 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -507,6 +507,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
507 if (blocknr < journal->j_tail) 507 if (blocknr < journal->j_tail)
508 freed = freed + journal->j_last - journal->j_first; 508 freed = freed + journal->j_last - journal->j_first;
509 509
510 trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
510 jbd_debug(1, 511 jbd_debug(1,
511 "Cleaning journal tail from %d to %d (offset %lu), " 512 "Cleaning journal tail from %d to %d (offset %lu), "
512 "freeing %lu\n", 513 "freeing %lu\n",
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1bc74b6f26d2..671da7fb7ffd 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -883,8 +883,7 @@ restart_loop:
883 spin_unlock(&journal->j_list_lock); 883 spin_unlock(&journal->j_list_lock);
884 bh = jh2bh(jh); 884 bh = jh2bh(jh);
885 jbd_lock_bh_state(bh); 885 jbd_lock_bh_state(bh);
886 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 886 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
887 jh->b_transaction == journal->j_running_transaction);
888 887
889 /* 888 /*
890 * If there is undo-protected committed data against 889 * If there is undo-protected committed data against
@@ -930,12 +929,12 @@ restart_loop:
930 /* A buffer which has been freed while still being 929 /* A buffer which has been freed while still being
931 * journaled by a previous transaction may end up still 930 * journaled by a previous transaction may end up still
932 * being dirty here, but we want to avoid writing back 931 * being dirty here, but we want to avoid writing back
933 * that buffer in the future now that the last use has 932 * that buffer in the future after the "add to orphan"
934 * been committed. That's not only a performance gain, 933 * operation been committed, That's not only a performance
935 * it also stops aliasing problems if the buffer is left 934 * gain, it also stops aliasing problems if the buffer is
936 * behind for writeback and gets reallocated for another 935 * left behind for writeback and gets reallocated for another
937 * use in a different page. */ 936 * use in a different page. */
938 if (buffer_freed(bh)) { 937 if (buffer_freed(bh) && !jh->b_next_transaction) {
939 clear_buffer_freed(bh); 938 clear_buffer_freed(bh);
940 clear_buffer_jbddirty(bh); 939 clear_buffer_jbddirty(bh);
941 } 940 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ac0d027595d0..c03d4dce4d76 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -39,6 +39,8 @@
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
40#include <linux/math64.h> 40#include <linux/math64.h>
41#include <linux/hash.h> 41#include <linux/hash.h>
42#include <linux/log2.h>
43#include <linux/vmalloc.h>
42 44
43#define CREATE_TRACE_POINTS 45#define CREATE_TRACE_POINTS
44#include <trace/events/jbd2.h> 46#include <trace/events/jbd2.h>
@@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
93 95
94static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 96static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
95static void __journal_abort_soft (journal_t *journal, int errno); 97static void __journal_abort_soft (journal_t *journal, int errno);
98static int jbd2_journal_create_slab(size_t slab_size);
96 99
97/* 100/*
98 * Helper function used to manage commit timeouts 101 * Helper function used to manage commit timeouts
@@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal)
1248 } 1251 }
1249 } 1252 }
1250 1253
1254 /*
1255 * Create a slab for this blocksize
1256 */
1257 err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
1258 if (err)
1259 return err;
1260
1251 /* Let the recovery code check whether it needs to recover any 1261 /* Let the recovery code check whether it needs to recover any
1252 * data from the journal. */ 1262 * data from the journal. */
1253 if (jbd2_journal_recover(journal)) 1263 if (jbd2_journal_recover(journal))
@@ -1807,6 +1817,127 @@ size_t journal_tag_bytes(journal_t *journal)
1807} 1817}
1808 1818
1809/* 1819/*
1820 * JBD memory management
1821 *
1822 * These functions are used to allocate block-sized chunks of memory
1823 * used for making copies of buffer_head data. Very often it will be
1824 * page-sized chunks of data, but sometimes it will be in
1825 * sub-page-size chunks. (For example, 16k pages on Power systems
1826 * with a 4k block file system.) For blocks smaller than a page, we
1827 * use a SLAB allocator. There are slab caches for each block size,
1828 * which are allocated at mount time, if necessary, and we only free
1829 * (all of) the slab caches when/if the jbd2 module is unloaded. For
1830 * this reason we don't need to a mutex to protect access to
1831 * jbd2_slab[] allocating or releasing memory; only in
1832 * jbd2_journal_create_slab().
1833 */
1834#define JBD2_MAX_SLABS 8
1835static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
1836static DECLARE_MUTEX(jbd2_slab_create_sem);
1837
1838static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
1839 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
1840 "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
1841};
1842
1843
1844static void jbd2_journal_destroy_slabs(void)
1845{
1846 int i;
1847
1848 for (i = 0; i < JBD2_MAX_SLABS; i++) {
1849 if (jbd2_slab[i])
1850 kmem_cache_destroy(jbd2_slab[i]);
1851 jbd2_slab[i] = NULL;
1852 }
1853}
1854
1855static int jbd2_journal_create_slab(size_t size)
1856{
1857 int i = order_base_2(size) - 10;
1858 size_t slab_size;
1859
1860 if (size == PAGE_SIZE)
1861 return 0;
1862
1863 if (i >= JBD2_MAX_SLABS)
1864 return -EINVAL;
1865
1866 if (unlikely(i < 0))
1867 i = 0;
1868 down(&jbd2_slab_create_sem);
1869 if (jbd2_slab[i]) {
1870 up(&jbd2_slab_create_sem);
1871 return 0; /* Already created */
1872 }
1873
1874 slab_size = 1 << (i+10);
1875 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
1876 slab_size, 0, NULL);
1877 up(&jbd2_slab_create_sem);
1878 if (!jbd2_slab[i]) {
1879 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
1880 return -ENOMEM;
1881 }
1882 return 0;
1883}
1884
1885static struct kmem_cache *get_slab(size_t size)
1886{
1887 int i = order_base_2(size) - 10;
1888
1889 BUG_ON(i >= JBD2_MAX_SLABS);
1890 if (unlikely(i < 0))
1891 i = 0;
1892 BUG_ON(jbd2_slab[i] == 0);
1893 return jbd2_slab[i];
1894}
1895
1896void *jbd2_alloc(size_t size, gfp_t flags)
1897{
1898 void *ptr;
1899
1900 BUG_ON(size & (size-1)); /* Must be a power of 2 */
1901
1902 flags |= __GFP_REPEAT;
1903 if (size == PAGE_SIZE)
1904 ptr = (void *)__get_free_pages(flags, 0);
1905 else if (size > PAGE_SIZE) {
1906 int order = get_order(size);
1907
1908 if (order < 3)
1909 ptr = (void *)__get_free_pages(flags, order);
1910 else
1911 ptr = vmalloc(size);
1912 } else
1913 ptr = kmem_cache_alloc(get_slab(size), flags);
1914
1915 /* Check alignment; SLUB has gotten this wrong in the past,
1916 * and this can lead to user data corruption! */
1917 BUG_ON(((unsigned long) ptr) & (size-1));
1918
1919 return ptr;
1920}
1921
1922void jbd2_free(void *ptr, size_t size)
1923{
1924 if (size == PAGE_SIZE) {
1925 free_pages((unsigned long)ptr, 0);
1926 return;
1927 }
1928 if (size > PAGE_SIZE) {
1929 int order = get_order(size);
1930
1931 if (order < 3)
1932 free_pages((unsigned long)ptr, order);
1933 else
1934 vfree(ptr);
1935 return;
1936 }
1937 kmem_cache_free(get_slab(size), ptr);
1938};
1939
1940/*
1810 * Journal_head storage management 1941 * Journal_head storage management
1811 */ 1942 */
1812static struct kmem_cache *jbd2_journal_head_cache; 1943static struct kmem_cache *jbd2_journal_head_cache;
@@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void)
2204 jbd2_journal_destroy_revoke_caches(); 2335 jbd2_journal_destroy_revoke_caches();
2205 jbd2_journal_destroy_jbd2_journal_head_cache(); 2336 jbd2_journal_destroy_jbd2_journal_head_cache();
2206 jbd2_journal_destroy_handle_cache(); 2337 jbd2_journal_destroy_handle_cache();
2338 jbd2_journal_destroy_slabs();
2207} 2339}
2208 2340
2209static int __init journal_init(void) 2341static int __init journal_init(void)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a0512700542f..bfc70f57900f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1727,6 +1727,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1727 if (!jh) 1727 if (!jh)
1728 goto zap_buffer_no_jh; 1728 goto zap_buffer_no_jh;
1729 1729
1730 /*
1731 * We cannot remove the buffer from checkpoint lists until the
1732 * transaction adding inode to orphan list (let's call it T)
1733 * is committed. Otherwise if the transaction changing the
1734 * buffer would be cleaned from the journal before T is
1735 * committed, a crash will cause that the correct contents of
1736 * the buffer will be lost. On the other hand we have to
1737 * clear the buffer dirty bit at latest at the moment when the
1738 * transaction marking the buffer as freed in the filesystem
1739 * structures is committed because from that moment on the
1740 * buffer can be reallocated and used by a different page.
1741 * Since the block hasn't been freed yet but the inode has
1742 * already been added to orphan list, it is safe for us to add
1743 * the buffer to BJ_Forget list of the newest transaction.
1744 */
1730 transaction = jh->b_transaction; 1745 transaction = jh->b_transaction;
1731 if (transaction == NULL) { 1746 if (transaction == NULL) {
1732 /* First case: not on any transaction. If it 1747 /* First case: not on any transaction. If it
@@ -1783,16 +1798,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1783 } else if (transaction == journal->j_committing_transaction) { 1798 } else if (transaction == journal->j_committing_transaction) {
1784 JBUFFER_TRACE(jh, "on committing transaction"); 1799 JBUFFER_TRACE(jh, "on committing transaction");
1785 /* 1800 /*
1786 * If it is committing, we simply cannot touch it. We 1801 * The buffer is committing, we simply cannot touch
1787 * can remove it's next_transaction pointer from the 1802 * it. So we just set j_next_transaction to the
1788 * running transaction if that is set, but nothing 1803 * running transaction (if there is one) and mark
1789 * else. */ 1804 * buffer as freed so that commit code knows it should
1805 * clear dirty bits when it is done with the buffer.
1806 */
1790 set_buffer_freed(bh); 1807 set_buffer_freed(bh);
1791 if (jh->b_next_transaction) { 1808 if (journal->j_running_transaction && buffer_jbddirty(bh))
1792 J_ASSERT(jh->b_next_transaction == 1809 jh->b_next_transaction = journal->j_running_transaction;
1793 journal->j_running_transaction);
1794 jh->b_next_transaction = NULL;
1795 }
1796 jbd2_journal_put_journal_head(jh); 1810 jbd2_journal_put_journal_head(jh);
1797 spin_unlock(&journal->j_list_lock); 1811 spin_unlock(&journal->j_list_lock);
1798 jbd_unlock_bh_state(bh); 1812 jbd_unlock_bh_state(bh);
@@ -1969,7 +1983,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
1969 */ 1983 */
1970void __jbd2_journal_refile_buffer(struct journal_head *jh) 1984void __jbd2_journal_refile_buffer(struct journal_head *jh)
1971{ 1985{
1972 int was_dirty; 1986 int was_dirty, jlist;
1973 struct buffer_head *bh = jh2bh(jh); 1987 struct buffer_head *bh = jh2bh(jh);
1974 1988
1975 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 1989 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -1991,8 +2005,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
1991 __jbd2_journal_temp_unlink_buffer(jh); 2005 __jbd2_journal_temp_unlink_buffer(jh);
1992 jh->b_transaction = jh->b_next_transaction; 2006 jh->b_transaction = jh->b_next_transaction;
1993 jh->b_next_transaction = NULL; 2007 jh->b_next_transaction = NULL;
1994 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2008 if (buffer_freed(bh))
1995 jh->b_modified ? BJ_Metadata : BJ_Reserved); 2009 jlist = BJ_Forget;
2010 else if (jh->b_modified)
2011 jlist = BJ_Metadata;
2012 else
2013 jlist = BJ_Reserved;
2014 __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
1996 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2015 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
1997 2016
1998 if (was_dirty) 2017 if (was_dirty)
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index d66477c34306..213169780b6c 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -20,7 +20,6 @@
20 20
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/quotaops.h>
24#include <linux/posix_acl_xattr.h> 23#include <linux/posix_acl_xattr.h>
25#include "jfs_incore.h" 24#include "jfs_incore.h"
26#include "jfs_txnmgr.h" 25#include "jfs_txnmgr.h"
@@ -174,7 +173,7 @@ cleanup:
174 return rc; 173 return rc;
175} 174}
176 175
177static int jfs_acl_chmod(struct inode *inode) 176int jfs_acl_chmod(struct inode *inode)
178{ 177{
179 struct posix_acl *acl, *clone; 178 struct posix_acl *acl, *clone;
180 int rc; 179 int rc;
@@ -205,26 +204,3 @@ static int jfs_acl_chmod(struct inode *inode)
205 posix_acl_release(clone); 204 posix_acl_release(clone);
206 return rc; 205 return rc;
207} 206}
208
209int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
210{
211 struct inode *inode = dentry->d_inode;
212 int rc;
213
214 rc = inode_change_ok(inode, iattr);
215 if (rc)
216 return rc;
217
218 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
219 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
220 if (vfs_dq_transfer(inode, iattr))
221 return -EDQUOT;
222 }
223
224 rc = inode_setattr(inode, iattr);
225
226 if (!rc && (iattr->ia_valid & ATTR_MODE))
227 rc = jfs_acl_chmod(inode);
228
229 return rc;
230}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2b70fa78e4a7..14ba982b3f24 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/quotaops.h>
21#include "jfs_incore.h" 22#include "jfs_incore.h"
22#include "jfs_inode.h" 23#include "jfs_inode.h"
23#include "jfs_dmap.h" 24#include "jfs_dmap.h"
@@ -47,7 +48,7 @@ static int jfs_open(struct inode *inode, struct file *file)
47{ 48{
48 int rc; 49 int rc;
49 50
50 if ((rc = generic_file_open(inode, file))) 51 if ((rc = dquot_file_open(inode, file)))
51 return rc; 52 return rc;
52 53
53 /* 54 /*
@@ -88,14 +89,40 @@ static int jfs_release(struct inode *inode, struct file *file)
88 return 0; 89 return 0;
89} 90}
90 91
92int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
93{
94 struct inode *inode = dentry->d_inode;
95 int rc;
96
97 rc = inode_change_ok(inode, iattr);
98 if (rc)
99 return rc;
100
101 if (iattr->ia_valid & ATTR_SIZE)
102 dquot_initialize(inode);
103 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
104 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
105 rc = dquot_transfer(inode, iattr);
106 if (rc)
107 return rc;
108 }
109
110 rc = inode_setattr(inode, iattr);
111
112 if (!rc && (iattr->ia_valid & ATTR_MODE))
113 rc = jfs_acl_chmod(inode);
114
115 return rc;
116}
117
91const struct inode_operations jfs_file_inode_operations = { 118const struct inode_operations jfs_file_inode_operations = {
92 .truncate = jfs_truncate, 119 .truncate = jfs_truncate,
93 .setxattr = jfs_setxattr, 120 .setxattr = jfs_setxattr,
94 .getxattr = jfs_getxattr, 121 .getxattr = jfs_getxattr,
95 .listxattr = jfs_listxattr, 122 .listxattr = jfs_listxattr,
96 .removexattr = jfs_removexattr, 123 .removexattr = jfs_removexattr,
97#ifdef CONFIG_JFS_POSIX_ACL
98 .setattr = jfs_setattr, 124 .setattr = jfs_setattr,
125#ifdef CONFIG_JFS_POSIX_ACL
99 .check_acl = jfs_check_acl, 126 .check_acl = jfs_check_acl,
100#endif 127#endif
101}; 128};
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190a77ba..9dd126276c9f 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,6 +22,7 @@
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/quotaops.h> 24#include <linux/quotaops.h>
25#include <linux/writeback.h>
25#include "jfs_incore.h" 26#include "jfs_incore.h"
26#include "jfs_inode.h" 27#include "jfs_inode.h"
27#include "jfs_filsys.h" 28#include "jfs_filsys.h"
@@ -120,8 +121,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
120 return rc; 121 return rc;
121} 122}
122 123
123int jfs_write_inode(struct inode *inode, int wait) 124int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
124{ 125{
126 int wait = wbc->sync_mode == WB_SYNC_ALL;
127
125 if (test_cflag(COMMIT_Nolink, inode)) 128 if (test_cflag(COMMIT_Nolink, inode))
126 return 0; 129 return 0;
127 /* 130 /*
@@ -146,6 +149,9 @@ void jfs_delete_inode(struct inode *inode)
146{ 149{
147 jfs_info("In jfs_delete_inode, inode = 0x%p", inode); 150 jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
148 151
152 if (!is_bad_inode(inode))
153 dquot_initialize(inode);
154
149 if (!is_bad_inode(inode) && 155 if (!is_bad_inode(inode) &&
150 (JFS_IP(inode)->fileset == FILESYSTEM_I)) { 156 (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
151 truncate_inode_pages(&inode->i_data, 0); 157 truncate_inode_pages(&inode->i_data, 0);
@@ -158,9 +164,9 @@ void jfs_delete_inode(struct inode *inode)
158 /* 164 /*
159 * Free the inode from the quota allocation. 165 * Free the inode from the quota allocation.
160 */ 166 */
161 vfs_dq_init(inode); 167 dquot_initialize(inode);
162 vfs_dq_free_inode(inode); 168 dquot_free_inode(inode);
163 vfs_dq_drop(inode); 169 dquot_drop(inode);
164 } 170 }
165 171
166 clear_inode(inode); 172 clear_inode(inode);
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index b07bd417ef85..54e07559878d 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -22,7 +22,7 @@
22 22
23int jfs_check_acl(struct inode *, int); 23int jfs_check_acl(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_acl_chmod(struct inode *inode);
26 26
27#else 27#else
28 28
@@ -32,5 +32,10 @@ static inline int jfs_init_acl(tid_t tid, struct inode *inode,
32 return 0; 32 return 0;
33} 33}
34 34
35static inline int jfs_acl_chmod(struct inode *inode)
36{
37 return 0;
38}
39
35#endif 40#endif
36#endif /* _H_JFS_ACL */ 41#endif /* _H_JFS_ACL */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 925871e9887b..0e4623be70ce 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -381,10 +381,10 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
381 * It's time to move the inline table to an external 381 * It's time to move the inline table to an external
382 * page and begin to build the xtree 382 * page and begin to build the xtree
383 */ 383 */
384 if (vfs_dq_alloc_block(ip, sbi->nbperpage)) 384 if (dquot_alloc_block(ip, sbi->nbperpage))
385 goto clean_up; 385 goto clean_up;
386 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { 386 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
387 vfs_dq_free_block(ip, sbi->nbperpage); 387 dquot_free_block(ip, sbi->nbperpage);
388 goto clean_up; 388 goto clean_up;
389 } 389 }
390 390
@@ -408,7 +408,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
408 memcpy(&jfs_ip->i_dirtable, temp_table, 408 memcpy(&jfs_ip->i_dirtable, temp_table,
409 sizeof (temp_table)); 409 sizeof (temp_table));
410 dbFree(ip, xaddr, sbi->nbperpage); 410 dbFree(ip, xaddr, sbi->nbperpage);
411 vfs_dq_free_block(ip, sbi->nbperpage); 411 dquot_free_block(ip, sbi->nbperpage);
412 goto clean_up; 412 goto clean_up;
413 } 413 }
414 ip->i_size = PSIZE; 414 ip->i_size = PSIZE;
@@ -1027,10 +1027,9 @@ static int dtSplitUp(tid_t tid,
1027 n = xlen; 1027 n = xlen;
1028 1028
1029 /* Allocate blocks to quota. */ 1029 /* Allocate blocks to quota. */
1030 if (vfs_dq_alloc_block(ip, n)) { 1030 rc = dquot_alloc_block(ip, n);
1031 rc = -EDQUOT; 1031 if (rc)
1032 goto extendOut; 1032 goto extendOut;
1033 }
1034 quota_allocation += n; 1033 quota_allocation += n;
1035 1034
1036 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1035 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
@@ -1308,7 +1307,7 @@ static int dtSplitUp(tid_t tid,
1308 1307
1309 /* Rollback quota allocation */ 1308 /* Rollback quota allocation */
1310 if (rc && quota_allocation) 1309 if (rc && quota_allocation)
1311 vfs_dq_free_block(ip, quota_allocation); 1310 dquot_free_block(ip, quota_allocation);
1312 1311
1313 dtSplitUp_Exit: 1312 dtSplitUp_Exit:
1314 1313
@@ -1369,9 +1368,10 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1369 return -EIO; 1368 return -EIO;
1370 1369
1371 /* Allocate blocks to quota. */ 1370 /* Allocate blocks to quota. */
1372 if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { 1371 rc = dquot_alloc_block(ip, lengthPXD(pxd));
1372 if (rc) {
1373 release_metapage(rmp); 1373 release_metapage(rmp);
1374 return -EDQUOT; 1374 return rc;
1375 } 1375 }
1376 1376
1377 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1377 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
@@ -1892,6 +1892,7 @@ static int dtSplitRoot(tid_t tid,
1892 struct dt_lock *dtlck; 1892 struct dt_lock *dtlck;
1893 struct tlock *tlck; 1893 struct tlock *tlck;
1894 struct lv *lv; 1894 struct lv *lv;
1895 int rc;
1895 1896
1896 /* get split root page */ 1897 /* get split root page */
1897 smp = split->mp; 1898 smp = split->mp;
@@ -1916,9 +1917,10 @@ static int dtSplitRoot(tid_t tid,
1916 rp = rmp->data; 1917 rp = rmp->data;
1917 1918
1918 /* Allocate blocks to quota. */ 1919 /* Allocate blocks to quota. */
1919 if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { 1920 rc = dquot_alloc_block(ip, lengthPXD(pxd));
1921 if (rc) {
1920 release_metapage(rmp); 1922 release_metapage(rmp);
1921 return -EDQUOT; 1923 return rc;
1922 } 1924 }
1923 1925
1924 BT_MARK_DIRTY(rmp, ip); 1926 BT_MARK_DIRTY(rmp, ip);
@@ -2287,7 +2289,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2287 xlen = lengthPXD(&fp->header.self); 2289 xlen = lengthPXD(&fp->header.self);
2288 2290
2289 /* Free quota allocation. */ 2291 /* Free quota allocation. */
2290 vfs_dq_free_block(ip, xlen); 2292 dquot_free_block(ip, xlen);
2291 2293
2292 /* free/invalidate its buffer page */ 2294 /* free/invalidate its buffer page */
2293 discard_metapage(fmp); 2295 discard_metapage(fmp);
@@ -2363,7 +2365,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2363 xlen = lengthPXD(&p->header.self); 2365 xlen = lengthPXD(&p->header.self);
2364 2366
2365 /* Free quota allocation */ 2367 /* Free quota allocation */
2366 vfs_dq_free_block(ip, xlen); 2368 dquot_free_block(ip, xlen);
2367 2369
2368 /* free/invalidate its buffer page */ 2370 /* free/invalidate its buffer page */
2369 discard_metapage(mp); 2371 discard_metapage(mp);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 41d6045dbeb0..5d3bbd10f8db 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -141,10 +141,11 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
141 } 141 }
142 142
143 /* Allocate blocks to quota. */ 143 /* Allocate blocks to quota. */
144 if (vfs_dq_alloc_block(ip, nxlen)) { 144 rc = dquot_alloc_block(ip, nxlen);
145 if (rc) {
145 dbFree(ip, nxaddr, (s64) nxlen); 146 dbFree(ip, nxaddr, (s64) nxlen);
146 mutex_unlock(&JFS_IP(ip)->commit_mutex); 147 mutex_unlock(&JFS_IP(ip)->commit_mutex);
147 return -EDQUOT; 148 return rc;
148 } 149 }
149 150
150 /* determine the value of the extent flag */ 151 /* determine the value of the extent flag */
@@ -164,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
164 */ 165 */
165 if (rc) { 166 if (rc) {
166 dbFree(ip, nxaddr, nxlen); 167 dbFree(ip, nxaddr, nxlen);
167 vfs_dq_free_block(ip, nxlen); 168 dquot_free_block(ip, nxlen);
168 mutex_unlock(&JFS_IP(ip)->commit_mutex); 169 mutex_unlock(&JFS_IP(ip)->commit_mutex);
169 return (rc); 170 return (rc);
170 } 171 }
@@ -256,10 +257,11 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
256 goto exit; 257 goto exit;
257 258
258 /* Allocat blocks to quota. */ 259 /* Allocat blocks to quota. */
259 if (vfs_dq_alloc_block(ip, nxlen)) { 260 rc = dquot_alloc_block(ip, nxlen);
261 if (rc) {
260 dbFree(ip, nxaddr, (s64) nxlen); 262 dbFree(ip, nxaddr, (s64) nxlen);
261 mutex_unlock(&JFS_IP(ip)->commit_mutex); 263 mutex_unlock(&JFS_IP(ip)->commit_mutex);
262 return -EDQUOT; 264 return rc;
263 } 265 }
264 266
265 delta = nxlen - xlen; 267 delta = nxlen - xlen;
@@ -297,7 +299,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
297 /* extend the extent */ 299 /* extend the extent */
298 if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { 300 if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
299 dbFree(ip, xaddr + xlen, delta); 301 dbFree(ip, xaddr + xlen, delta);
300 vfs_dq_free_block(ip, nxlen); 302 dquot_free_block(ip, nxlen);
301 goto exit; 303 goto exit;
302 } 304 }
303 } else { 305 } else {
@@ -308,7 +310,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
308 */ 310 */
309 if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { 311 if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
310 dbFree(ip, nxaddr, nxlen); 312 dbFree(ip, nxaddr, nxlen);
311 vfs_dq_free_block(ip, nxlen); 313 dquot_free_block(ip, nxlen);
312 goto exit; 314 goto exit;
313 } 315 }
314 } 316 }
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index dc0e02159ac9..829921b67765 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -116,10 +116,10 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
116 /* 116 /*
117 * Allocate inode to quota. 117 * Allocate inode to quota.
118 */ 118 */
119 if (vfs_dq_alloc_inode(inode)) { 119 dquot_initialize(inode);
120 rc = -EDQUOT; 120 rc = dquot_alloc_inode(inode);
121 if (rc)
121 goto fail_drop; 122 goto fail_drop;
122 }
123 123
124 inode->i_mode = mode; 124 inode->i_mode = mode;
125 /* inherit flags from parent */ 125 /* inherit flags from parent */
@@ -162,7 +162,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
162 return inode; 162 return inode;
163 163
164fail_drop: 164fail_drop:
165 vfs_dq_drop(inode); 165 dquot_drop(inode);
166 inode->i_flags |= S_NOQUOTA; 166 inode->i_flags |= S_NOQUOTA;
167fail_unlock: 167fail_unlock:
168 inode->i_nlink = 0; 168 inode->i_nlink = 0;
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 1eff7db34d63..79e2c79661df 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -26,7 +26,7 @@ extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
26extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); 26extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
27extern struct inode *jfs_iget(struct super_block *, unsigned long); 27extern struct inode *jfs_iget(struct super_block *, unsigned long);
28extern int jfs_commit_inode(struct inode *, int); 28extern int jfs_commit_inode(struct inode *, int);
29extern int jfs_write_inode(struct inode*, int); 29extern int jfs_write_inode(struct inode *, struct writeback_control *);
30extern void jfs_delete_inode(struct inode *); 30extern void jfs_delete_inode(struct inode *);
31extern void jfs_dirty_inode(struct inode *); 31extern void jfs_dirty_inode(struct inode *);
32extern void jfs_truncate(struct inode *); 32extern void jfs_truncate(struct inode *);
@@ -40,6 +40,7 @@ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
40 int fh_len, int fh_type); 40 int fh_len, int fh_type);
41extern void jfs_set_inode_flags(struct inode *); 41extern void jfs_set_inode_flags(struct inode *);
42extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 42extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
43extern int jfs_setattr(struct dentry *, struct iattr *);
43 44
44extern const struct address_space_operations jfs_aops; 45extern const struct address_space_operations jfs_aops;
45extern const struct inode_operations jfs_dir_inode_operations; 46extern const struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index d654a6458648..6c50871e6220 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -585,10 +585,10 @@ int xtInsert(tid_t tid, /* transaction id */
585 hint = addressXAD(xad) + lengthXAD(xad) - 1; 585 hint = addressXAD(xad) + lengthXAD(xad) - 1;
586 } else 586 } else
587 hint = 0; 587 hint = 0;
588 if ((rc = vfs_dq_alloc_block(ip, xlen))) 588 if ((rc = dquot_alloc_block(ip, xlen)))
589 goto out; 589 goto out;
590 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 590 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
591 vfs_dq_free_block(ip, xlen); 591 dquot_free_block(ip, xlen);
592 goto out; 592 goto out;
593 } 593 }
594 } 594 }
@@ -617,7 +617,7 @@ int xtInsert(tid_t tid, /* transaction id */
617 /* undo data extent allocation */ 617 /* undo data extent allocation */
618 if (*xaddrp == 0) { 618 if (*xaddrp == 0) {
619 dbFree(ip, xaddr, (s64) xlen); 619 dbFree(ip, xaddr, (s64) xlen);
620 vfs_dq_free_block(ip, xlen); 620 dquot_free_block(ip, xlen);
621 } 621 }
622 return rc; 622 return rc;
623 } 623 }
@@ -985,10 +985,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
985 rbn = addressPXD(pxd); 985 rbn = addressPXD(pxd);
986 986
987 /* Allocate blocks to quota. */ 987 /* Allocate blocks to quota. */
988 if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { 988 rc = dquot_alloc_block(ip, lengthPXD(pxd));
989 rc = -EDQUOT; 989 if (rc)
990 goto clean_up; 990 goto clean_up;
991 }
992 991
993 quota_allocation += lengthPXD(pxd); 992 quota_allocation += lengthPXD(pxd);
994 993
@@ -1195,7 +1194,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
1195 1194
1196 /* Rollback quota allocation. */ 1195 /* Rollback quota allocation. */
1197 if (quota_allocation) 1196 if (quota_allocation)
1198 vfs_dq_free_block(ip, quota_allocation); 1197 dquot_free_block(ip, quota_allocation);
1199 1198
1200 return (rc); 1199 return (rc);
1201} 1200}
@@ -1235,6 +1234,7 @@ xtSplitRoot(tid_t tid,
1235 struct pxdlist *pxdlist; 1234 struct pxdlist *pxdlist;
1236 struct tlock *tlck; 1235 struct tlock *tlck;
1237 struct xtlock *xtlck; 1236 struct xtlock *xtlck;
1237 int rc;
1238 1238
1239 sp = &JFS_IP(ip)->i_xtroot; 1239 sp = &JFS_IP(ip)->i_xtroot;
1240 1240
@@ -1252,9 +1252,10 @@ xtSplitRoot(tid_t tid,
1252 return -EIO; 1252 return -EIO;
1253 1253
1254 /* Allocate blocks to quota. */ 1254 /* Allocate blocks to quota. */
1255 if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { 1255 rc = dquot_alloc_block(ip, lengthPXD(pxd));
1256 if (rc) {
1256 release_metapage(rmp); 1257 release_metapage(rmp);
1257 return -EDQUOT; 1258 return rc;
1258 } 1259 }
1259 1260
1260 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1261 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
@@ -3680,7 +3681,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3680 ip->i_size = newsize; 3681 ip->i_size = newsize;
3681 3682
3682 /* update quota allocation to reflect freed blocks */ 3683 /* update quota allocation to reflect freed blocks */
3683 vfs_dq_free_block(ip, nfreed); 3684 dquot_free_block(ip, nfreed);
3684 3685
3685 /* 3686 /*
3686 * free tlock of invalidated pages 3687 * free tlock of invalidated pages
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index c79a4270f083..4a3e9f39c21d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -85,6 +85,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
85 85
86 jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name); 86 jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
87 87
88 dquot_initialize(dip);
89
88 /* 90 /*
89 * search parent directory for entry/freespace 91 * search parent directory for entry/freespace
90 * (dtSearch() returns parent directory page pinned) 92 * (dtSearch() returns parent directory page pinned)
@@ -215,6 +217,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
215 217
216 jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name); 218 jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
217 219
220 dquot_initialize(dip);
221
218 /* link count overflow on parent directory ? */ 222 /* link count overflow on parent directory ? */
219 if (dip->i_nlink == JFS_LINK_MAX) { 223 if (dip->i_nlink == JFS_LINK_MAX) {
220 rc = -EMLINK; 224 rc = -EMLINK;
@@ -356,7 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
356 jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); 360 jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
357 361
358 /* Init inode for quota operations. */ 362 /* Init inode for quota operations. */
359 vfs_dq_init(ip); 363 dquot_initialize(dip);
364 dquot_initialize(ip);
360 365
361 /* directory must be empty to be removed */ 366 /* directory must be empty to be removed */
362 if (!dtEmpty(ip)) { 367 if (!dtEmpty(ip)) {
@@ -483,7 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
483 jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name); 488 jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
484 489
485 /* Init inode for quota operations. */ 490 /* Init inode for quota operations. */
486 vfs_dq_init(ip); 491 dquot_initialize(dip);
492 dquot_initialize(ip);
487 493
488 if ((rc = get_UCSname(&dname, dentry))) 494 if ((rc = get_UCSname(&dname, dentry)))
489 goto out; 495 goto out;
@@ -805,6 +811,8 @@ static int jfs_link(struct dentry *old_dentry,
805 if (ip->i_nlink == 0) 811 if (ip->i_nlink == 0)
806 return -ENOENT; 812 return -ENOENT;
807 813
814 dquot_initialize(dir);
815
808 tid = txBegin(ip->i_sb, 0); 816 tid = txBegin(ip->i_sb, 0);
809 817
810 mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); 818 mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
@@ -896,6 +904,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
896 904
897 jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name); 905 jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name);
898 906
907 dquot_initialize(dip);
908
899 ssize = strlen(name) + 1; 909 ssize = strlen(name) + 1;
900 910
901 /* 911 /*
@@ -1087,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1087 jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, 1097 jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
1088 new_dentry->d_name.name); 1098 new_dentry->d_name.name);
1089 1099
1100 dquot_initialize(old_dir);
1101 dquot_initialize(new_dir);
1102
1090 old_ip = old_dentry->d_inode; 1103 old_ip = old_dentry->d_inode;
1091 new_ip = new_dentry->d_inode; 1104 new_ip = new_dentry->d_inode;
1092 1105
@@ -1136,7 +1149,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1136 } else if (new_ip) { 1149 } else if (new_ip) {
1137 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); 1150 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
1138 /* Init inode for quota operations. */ 1151 /* Init inode for quota operations. */
1139 vfs_dq_init(new_ip); 1152 dquot_initialize(new_ip);
1140 } 1153 }
1141 1154
1142 /* 1155 /*
@@ -1360,6 +1373,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1360 1373
1361 jfs_info("jfs_mknod: %s", dentry->d_name.name); 1374 jfs_info("jfs_mknod: %s", dentry->d_name.name);
1362 1375
1376 dquot_initialize(dir);
1377
1363 if ((rc = get_UCSname(&dname, dentry))) 1378 if ((rc = get_UCSname(&dname, dentry)))
1364 goto out; 1379 goto out;
1365 1380
@@ -1541,8 +1556,8 @@ const struct inode_operations jfs_dir_inode_operations = {
1541 .getxattr = jfs_getxattr, 1556 .getxattr = jfs_getxattr,
1542 .listxattr = jfs_listxattr, 1557 .listxattr = jfs_listxattr,
1543 .removexattr = jfs_removexattr, 1558 .removexattr = jfs_removexattr,
1544#ifdef CONFIG_JFS_POSIX_ACL
1545 .setattr = jfs_setattr, 1559 .setattr = jfs_setattr,
1560#ifdef CONFIG_JFS_POSIX_ACL
1546 .check_acl = jfs_check_acl, 1561 .check_acl = jfs_check_acl,
1547#endif 1562#endif
1548}; 1563};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index d929a822a74e..266699deb1c6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -131,6 +131,11 @@ static void jfs_destroy_inode(struct inode *inode)
131 kmem_cache_free(jfs_inode_cachep, ji); 131 kmem_cache_free(jfs_inode_cachep, ji);
132} 132}
133 133
134static void jfs_clear_inode(struct inode *inode)
135{
136 dquot_drop(inode);
137}
138
134static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) 139static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
135{ 140{
136 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); 141 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
@@ -745,6 +750,7 @@ static const struct super_operations jfs_super_operations = {
745 .dirty_inode = jfs_dirty_inode, 750 .dirty_inode = jfs_dirty_inode,
746 .write_inode = jfs_write_inode, 751 .write_inode = jfs_write_inode,
747 .delete_inode = jfs_delete_inode, 752 .delete_inode = jfs_delete_inode,
753 .clear_inode = jfs_clear_inode,
748 .put_super = jfs_put_super, 754 .put_super = jfs_put_super,
749 .sync_fs = jfs_sync_fs, 755 .sync_fs = jfs_sync_fs,
750 .freeze_fs = jfs_freeze, 756 .freeze_fs = jfs_freeze,
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fad364548bc9..1f594ab21895 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -260,14 +260,14 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
260 nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits; 260 nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits;
261 261
262 /* Allocate new blocks to quota. */ 262 /* Allocate new blocks to quota. */
263 if (vfs_dq_alloc_block(ip, nblocks)) { 263 rc = dquot_alloc_block(ip, nblocks);
264 return -EDQUOT; 264 if (rc)
265 } 265 return rc;
266 266
267 rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); 267 rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
268 if (rc) { 268 if (rc) {
269 /*Rollback quota allocation. */ 269 /*Rollback quota allocation. */
270 vfs_dq_free_block(ip, nblocks); 270 dquot_free_block(ip, nblocks);
271 return rc; 271 return rc;
272 } 272 }
273 273
@@ -332,7 +332,7 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
332 332
333 failed: 333 failed:
334 /* Rollback quota allocation. */ 334 /* Rollback quota allocation. */
335 vfs_dq_free_block(ip, nblocks); 335 dquot_free_block(ip, nblocks);
336 336
337 dbFree(ip, blkno, nblocks); 337 dbFree(ip, blkno, nblocks);
338 return rc; 338 return rc;
@@ -538,7 +538,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
538 538
539 if (blocks_needed > current_blocks) { 539 if (blocks_needed > current_blocks) {
540 /* Allocate new blocks to quota. */ 540 /* Allocate new blocks to quota. */
541 if (vfs_dq_alloc_block(inode, blocks_needed)) 541 rc = dquot_alloc_block(inode, blocks_needed);
542 if (rc)
542 return -EDQUOT; 543 return -EDQUOT;
543 544
544 quota_allocation = blocks_needed; 545 quota_allocation = blocks_needed;
@@ -602,7 +603,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
602 clean_up: 603 clean_up:
603 /* Rollback quota allocation */ 604 /* Rollback quota allocation */
604 if (quota_allocation) 605 if (quota_allocation)
605 vfs_dq_free_block(inode, quota_allocation); 606 dquot_free_block(inode, quota_allocation);
606 607
607 return (rc); 608 return (rc);
608} 609}
@@ -677,7 +678,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
677 678
678 /* If old blocks exist, they must be removed from quota allocation. */ 679 /* If old blocks exist, they must be removed from quota allocation. */
679 if (old_blocks) 680 if (old_blocks)
680 vfs_dq_free_block(inode, old_blocks); 681 dquot_free_block(inode, old_blocks);
681 682
682 inode->i_ctime = CURRENT_TIME; 683 inode->i_ctime = CURRENT_TIME;
683 684
diff --git a/fs/libfs.c b/fs/libfs.c
index 6e8d17e1dc4c..9e50bcf55857 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -338,28 +338,14 @@ int simple_readpage(struct file *file, struct page *page)
338 return 0; 338 return 0;
339} 339}
340 340
341int simple_prepare_write(struct file *file, struct page *page,
342 unsigned from, unsigned to)
343{
344 if (!PageUptodate(page)) {
345 if (to - from != PAGE_CACHE_SIZE)
346 zero_user_segments(page,
347 0, from,
348 to, PAGE_CACHE_SIZE);
349 }
350 return 0;
351}
352
353int simple_write_begin(struct file *file, struct address_space *mapping, 341int simple_write_begin(struct file *file, struct address_space *mapping,
354 loff_t pos, unsigned len, unsigned flags, 342 loff_t pos, unsigned len, unsigned flags,
355 struct page **pagep, void **fsdata) 343 struct page **pagep, void **fsdata)
356{ 344{
357 struct page *page; 345 struct page *page;
358 pgoff_t index; 346 pgoff_t index;
359 unsigned from;
360 347
361 index = pos >> PAGE_CACHE_SHIFT; 348 index = pos >> PAGE_CACHE_SHIFT;
362 from = pos & (PAGE_CACHE_SIZE - 1);
363 349
364 page = grab_cache_page_write_begin(mapping, index, flags); 350 page = grab_cache_page_write_begin(mapping, index, flags);
365 if (!page) 351 if (!page)
@@ -367,43 +353,59 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
367 353
368 *pagep = page; 354 *pagep = page;
369 355
370 return simple_prepare_write(file, page, from, from+len); 356 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
371} 357 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
372
373static int simple_commit_write(struct file *file, struct page *page,
374 unsigned from, unsigned to)
375{
376 struct inode *inode = page->mapping->host;
377 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
378 358
379 if (!PageUptodate(page)) 359 zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
380 SetPageUptodate(page); 360 }
381 /*
382 * No need to use i_size_read() here, the i_size
383 * cannot change under us because we hold the i_mutex.
384 */
385 if (pos > inode->i_size)
386 i_size_write(inode, pos);
387 set_page_dirty(page);
388 return 0; 361 return 0;
389} 362}
390 363
364/**
365 * simple_write_end - .write_end helper for non-block-device FSes
366 * @available: See .write_end of address_space_operations
367 * @file: "
368 * @mapping: "
369 * @pos: "
370 * @len: "
371 * @copied: "
372 * @page: "
373 * @fsdata: "
374 *
375 * simple_write_end does the minimum needed for updating a page after writing is
376 * done. It has the same API signature as the .write_end of
377 * address_space_operations vector. So it can just be set onto .write_end for
378 * FSes that don't need any other processing. i_mutex is assumed to be held.
379 * Block based filesystems should use generic_write_end().
380 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
381 * is not called, so a filesystem that actually does store data in .write_inode
382 * should extend on what's done here with a call to mark_inode_dirty() in the
383 * case that i_size has changed.
384 */
391int simple_write_end(struct file *file, struct address_space *mapping, 385int simple_write_end(struct file *file, struct address_space *mapping,
392 loff_t pos, unsigned len, unsigned copied, 386 loff_t pos, unsigned len, unsigned copied,
393 struct page *page, void *fsdata) 387 struct page *page, void *fsdata)
394{ 388{
395 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 389 struct inode *inode = page->mapping->host;
390 loff_t last_pos = pos + copied;
396 391
397 /* zero the stale part of the page if we did a short copy */ 392 /* zero the stale part of the page if we did a short copy */
398 if (copied < len) { 393 if (copied < len) {
399 void *kaddr = kmap_atomic(page, KM_USER0); 394 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
400 memset(kaddr + from + copied, 0, len - copied); 395
401 flush_dcache_page(page); 396 zero_user(page, from + copied, len - copied);
402 kunmap_atomic(kaddr, KM_USER0);
403 } 397 }
404 398
405 simple_commit_write(file, page, from, from+copied); 399 if (!PageUptodate(page))
400 SetPageUptodate(page);
401 /*
402 * No need to use i_size_read() here, the i_size
403 * cannot change under us because we hold the i_mutex.
404 */
405 if (last_pos > inode->i_size)
406 i_size_write(inode, last_pos);
406 407
408 set_page_dirty(page);
407 unlock_page(page); 409 unlock_page(page);
408 page_cache_release(page); 410 page_cache_release(page);
409 411
@@ -853,7 +855,6 @@ EXPORT_SYMBOL(simple_getattr);
853EXPORT_SYMBOL(simple_link); 855EXPORT_SYMBOL(simple_link);
854EXPORT_SYMBOL(simple_lookup); 856EXPORT_SYMBOL(simple_lookup);
855EXPORT_SYMBOL(simple_pin_fs); 857EXPORT_SYMBOL(simple_pin_fs);
856EXPORT_UNUSED_SYMBOL(simple_prepare_write);
857EXPORT_SYMBOL(simple_readpage); 858EXPORT_SYMBOL(simple_readpage);
858EXPORT_SYMBOL(simple_release_fs); 859EXPORT_SYMBOL(simple_release_fs);
859EXPORT_SYMBOL(simple_rename); 860EXPORT_SYMBOL(simple_rename);
diff --git a/fs/locks.c b/fs/locks.c
index a8794f233bc9..ae9ded026b7c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1182,8 +1182,9 @@ int __break_lease(struct inode *inode, unsigned int mode)
1182 struct file_lock *fl; 1182 struct file_lock *fl;
1183 unsigned long break_time; 1183 unsigned long break_time;
1184 int i_have_this_lease = 0; 1184 int i_have_this_lease = 0;
1185 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1185 1186
1186 new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK); 1187 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1187 1188
1188 lock_kernel(); 1189 lock_kernel();
1189 1190
@@ -1197,7 +1198,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
1197 if (fl->fl_owner == current->files) 1198 if (fl->fl_owner == current->files)
1198 i_have_this_lease = 1; 1199 i_have_this_lease = 1;
1199 1200
1200 if (mode & FMODE_WRITE) { 1201 if (want_write) {
1201 /* If we want write access, we have to revoke any lease. */ 1202 /* If we want write access, we have to revoke any lease. */
1202 future = F_UNLCK | F_INPROGRESS; 1203 future = F_UNLCK | F_INPROGRESS;
1203 } else if (flock->fl_type & F_INPROGRESS) { 1204 } else if (flock->fl_type & F_INPROGRESS) {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 74ea82d72164..756f8c93780c 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -17,8 +17,10 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/highuid.h> 18#include <linux/highuid.h>
19#include <linux/vfs.h> 19#include <linux/vfs.h>
20#include <linux/writeback.h>
20 21
21static int minix_write_inode(struct inode * inode, int wait); 22static int minix_write_inode(struct inode *inode,
23 struct writeback_control *wbc);
22static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); 24static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
23static int minix_remount (struct super_block * sb, int * flags, char * data); 25static int minix_remount (struct super_block * sb, int * flags, char * data);
24 26
@@ -552,7 +554,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
552 return bh; 554 return bh;
553} 555}
554 556
555static int minix_write_inode(struct inode *inode, int wait) 557static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
556{ 558{
557 int err = 0; 559 int err = 0;
558 struct buffer_head *bh; 560 struct buffer_head *bh;
@@ -563,7 +565,7 @@ static int minix_write_inode(struct inode *inode, int wait)
563 bh = V2_minix_update_inode(inode); 565 bh = V2_minix_update_inode(inode);
564 if (!bh) 566 if (!bh)
565 return -EIO; 567 return -EIO;
566 if (wait && buffer_dirty(bh)) { 568 if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) {
567 sync_dirty_buffer(bh); 569 sync_dirty_buffer(bh);
568 if (buffer_req(bh) && !buffer_uptodate(bh)) { 570 if (buffer_req(bh) && !buffer_uptodate(bh)) {
569 printk("IO error syncing minix inode [%s:%08lx]\n", 571 printk("IO error syncing minix inode [%s:%08lx]\n",
diff --git a/fs/namei.c b/fs/namei.c
index a4855af776a8..3d9d2f965f84 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22#include <linux/quotaops.h>
23#include <linux/pagemap.h> 22#include <linux/pagemap.h>
24#include <linux/fsnotify.h> 23#include <linux/fsnotify.h>
25#include <linux/personality.h> 24#include <linux/personality.h>
@@ -498,8 +497,6 @@ static int link_path_walk(const char *, struct nameidata *);
498 497
499static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 498static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
500{ 499{
501 int res = 0;
502 char *name;
503 if (IS_ERR(link)) 500 if (IS_ERR(link))
504 goto fail; 501 goto fail;
505 502
@@ -510,22 +507,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
510 path_get(&nd->root); 507 path_get(&nd->root);
511 } 508 }
512 509
513 res = link_path_walk(link, nd); 510 return link_path_walk(link, nd);
514 if (nd->depth || res || nd->last_type!=LAST_NORM)
515 return res;
516 /*
517 * If it is an iterative symlinks resolution in open_namei() we
518 * have to copy the last component. And all that crap because of
519 * bloody create() on broken symlinks. Furrfu...
520 */
521 name = __getname();
522 if (unlikely(!name)) {
523 path_put(&nd->path);
524 return -ENOMEM;
525 }
526 strcpy(name, nd->last.name);
527 nd->last.name = name;
528 return 0;
529fail: 511fail:
530 path_put(&nd->path); 512 path_put(&nd->path);
531 return PTR_ERR(link); 513 return PTR_ERR(link);
@@ -547,10 +529,10 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
547 nd->path.dentry = path->dentry; 529 nd->path.dentry = path->dentry;
548} 530}
549 531
550static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 532static __always_inline int
533__do_follow_link(struct path *path, struct nameidata *nd, void **p)
551{ 534{
552 int error; 535 int error;
553 void *cookie;
554 struct dentry *dentry = path->dentry; 536 struct dentry *dentry = path->dentry;
555 537
556 touch_atime(path->mnt, dentry); 538 touch_atime(path->mnt, dentry);
@@ -562,9 +544,9 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
562 } 544 }
563 mntget(path->mnt); 545 mntget(path->mnt);
564 nd->last_type = LAST_BIND; 546 nd->last_type = LAST_BIND;
565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 547 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
566 error = PTR_ERR(cookie); 548 error = PTR_ERR(*p);
567 if (!IS_ERR(cookie)) { 549 if (!IS_ERR(*p)) {
568 char *s = nd_get_link(nd); 550 char *s = nd_get_link(nd);
569 error = 0; 551 error = 0;
570 if (s) 552 if (s)
@@ -574,8 +556,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
574 if (error) 556 if (error)
575 path_put(&nd->path); 557 path_put(&nd->path);
576 } 558 }
577 if (dentry->d_inode->i_op->put_link)
578 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
579 } 559 }
580 return error; 560 return error;
581} 561}
@@ -589,6 +569,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
589 */ 569 */
590static inline int do_follow_link(struct path *path, struct nameidata *nd) 570static inline int do_follow_link(struct path *path, struct nameidata *nd)
591{ 571{
572 void *cookie;
592 int err = -ELOOP; 573 int err = -ELOOP;
593 if (current->link_count >= MAX_NESTED_LINKS) 574 if (current->link_count >= MAX_NESTED_LINKS)
594 goto loop; 575 goto loop;
@@ -602,7 +583,9 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
602 current->link_count++; 583 current->link_count++;
603 current->total_link_count++; 584 current->total_link_count++;
604 nd->depth++; 585 nd->depth++;
605 err = __do_follow_link(path, nd); 586 err = __do_follow_link(path, nd, &cookie);
587 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
588 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
606 path_put(path); 589 path_put(path);
607 current->link_count--; 590 current->link_count--;
608 nd->depth--; 591 nd->depth--;
@@ -689,33 +672,20 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
689 set_root(nd); 672 set_root(nd);
690 673
691 while(1) { 674 while(1) {
692 struct vfsmount *parent;
693 struct dentry *old = nd->path.dentry; 675 struct dentry *old = nd->path.dentry;
694 676
695 if (nd->path.dentry == nd->root.dentry && 677 if (nd->path.dentry == nd->root.dentry &&
696 nd->path.mnt == nd->root.mnt) { 678 nd->path.mnt == nd->root.mnt) {
697 break; 679 break;
698 } 680 }
699 spin_lock(&dcache_lock);
700 if (nd->path.dentry != nd->path.mnt->mnt_root) { 681 if (nd->path.dentry != nd->path.mnt->mnt_root) {
701 nd->path.dentry = dget(nd->path.dentry->d_parent); 682 /* rare case of legitimate dget_parent()... */
702 spin_unlock(&dcache_lock); 683 nd->path.dentry = dget_parent(nd->path.dentry);
703 dput(old); 684 dput(old);
704 break; 685 break;
705 } 686 }
706 spin_unlock(&dcache_lock); 687 if (!follow_up(&nd->path))
707 spin_lock(&vfsmount_lock);
708 parent = nd->path.mnt->mnt_parent;
709 if (parent == nd->path.mnt) {
710 spin_unlock(&vfsmount_lock);
711 break; 688 break;
712 }
713 mntget(parent);
714 nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
715 spin_unlock(&vfsmount_lock);
716 dput(old);
717 mntput(nd->path.mnt);
718 nd->path.mnt = parent;
719 } 689 }
720 follow_mount(&nd->path); 690 follow_mount(&nd->path);
721} 691}
@@ -1347,7 +1317,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1347 return -ENOENT; 1317 return -ENOENT;
1348 1318
1349 BUG_ON(victim->d_parent->d_inode != dir); 1319 BUG_ON(victim->d_parent->d_inode != dir);
1350 audit_inode_child(victim->d_name.name, victim, dir); 1320 audit_inode_child(victim, dir);
1351 1321
1352 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 1322 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1353 if (error) 1323 if (error)
@@ -1388,22 +1358,6 @@ static inline int may_create(struct inode *dir, struct dentry *child)
1388 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 1358 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1389} 1359}
1390 1360
1391/*
1392 * O_DIRECTORY translates into forcing a directory lookup.
1393 */
1394static inline int lookup_flags(unsigned int f)
1395{
1396 unsigned long retval = LOOKUP_FOLLOW;
1397
1398 if (f & O_NOFOLLOW)
1399 retval &= ~LOOKUP_FOLLOW;
1400
1401 if (f & O_DIRECTORY)
1402 retval |= LOOKUP_DIRECTORY;
1403
1404 return retval;
1405}
1406
1407/* 1361/*
1408 * p1 and p2 should be directories on the same fs. 1362 * p1 and p2 should be directories on the same fs.
1409 */ 1363 */
@@ -1461,7 +1415,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1461 error = security_inode_create(dir, dentry, mode); 1415 error = security_inode_create(dir, dentry, mode);
1462 if (error) 1416 if (error)
1463 return error; 1417 return error;
1464 vfs_dq_init(dir);
1465 error = dir->i_op->create(dir, dentry, mode, nd); 1418 error = dir->i_op->create(dir, dentry, mode, nd);
1466 if (!error) 1419 if (!error)
1467 fsnotify_create(dir, dentry); 1420 fsnotify_create(dir, dentry);
@@ -1503,7 +1456,7 @@ int may_open(struct path *path, int acc_mode, int flag)
1503 * An append-only file must be opened in append mode for writing. 1456 * An append-only file must be opened in append mode for writing.
1504 */ 1457 */
1505 if (IS_APPEND(inode)) { 1458 if (IS_APPEND(inode)) {
1506 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1459 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
1507 return -EPERM; 1460 return -EPERM;
1508 if (flag & O_TRUNC) 1461 if (flag & O_TRUNC)
1509 return -EPERM; 1462 return -EPERM;
@@ -1547,7 +1500,7 @@ static int handle_truncate(struct path *path)
1547 * what get passed to sys_open(). 1500 * what get passed to sys_open().
1548 */ 1501 */
1549static int __open_namei_create(struct nameidata *nd, struct path *path, 1502static int __open_namei_create(struct nameidata *nd, struct path *path,
1550 int flag, int mode) 1503 int open_flag, int mode)
1551{ 1504{
1552 int error; 1505 int error;
1553 struct dentry *dir = nd->path.dentry; 1506 struct dentry *dir = nd->path.dentry;
@@ -1565,7 +1518,7 @@ out_unlock:
1565 if (error) 1518 if (error)
1566 return error; 1519 return error;
1567 /* Don't check for write permission, don't truncate */ 1520 /* Don't check for write permission, don't truncate */
1568 return may_open(&nd->path, 0, flag & ~O_TRUNC); 1521 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
1569} 1522}
1570 1523
1571/* 1524/*
@@ -1603,129 +1556,132 @@ static int open_will_truncate(int flag, struct inode *inode)
1603 return (flag & O_TRUNC); 1556 return (flag & O_TRUNC);
1604} 1557}
1605 1558
1606/* 1559static struct file *finish_open(struct nameidata *nd,
1607 * Note that the low bits of the passed in "open_flag" 1560 int open_flag, int acc_mode)
1608 * are not the same as in the local variable "flag". See
1609 * open_to_namei_flags() for more details.
1610 */
1611struct file *do_filp_open(int dfd, const char *pathname,
1612 int open_flag, int mode, int acc_mode)
1613{ 1561{
1614 struct file *filp; 1562 struct file *filp;
1615 struct nameidata nd;
1616 int error;
1617 struct path path;
1618 struct dentry *dir;
1619 int count = 0;
1620 int will_truncate; 1563 int will_truncate;
1621 int flag = open_to_namei_flags(open_flag); 1564 int error;
1622 int force_reval = 0;
1623 1565
1566 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
1567 if (will_truncate) {
1568 error = mnt_want_write(nd->path.mnt);
1569 if (error)
1570 goto exit;
1571 }
1572 error = may_open(&nd->path, acc_mode, open_flag);
1573 if (error) {
1574 if (will_truncate)
1575 mnt_drop_write(nd->path.mnt);
1576 goto exit;
1577 }
1578 filp = nameidata_to_filp(nd);
1579 if (!IS_ERR(filp)) {
1580 error = ima_file_check(filp, acc_mode);
1581 if (error) {
1582 fput(filp);
1583 filp = ERR_PTR(error);
1584 }
1585 }
1586 if (!IS_ERR(filp)) {
1587 if (will_truncate) {
1588 error = handle_truncate(&nd->path);
1589 if (error) {
1590 fput(filp);
1591 filp = ERR_PTR(error);
1592 }
1593 }
1594 }
1624 /* 1595 /*
1625 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1596 * It is now safe to drop the mnt write
1626 * check for O_DSYNC if the need any syncing at all we enforce it's 1597 * because the filp has had a write taken
1627 * always set instead of having to deal with possibly weird behaviour 1598 * on its behalf.
1628 * for malicious applications setting only __O_SYNC.
1629 */ 1599 */
1630 if (open_flag & __O_SYNC) 1600 if (will_truncate)
1631 open_flag |= O_DSYNC; 1601 mnt_drop_write(nd->path.mnt);
1632 1602 return filp;
1633 if (!acc_mode)
1634 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1635 1603
1636 /* O_TRUNC implies we need access checks for write permissions */ 1604exit:
1637 if (flag & O_TRUNC) 1605 if (!IS_ERR(nd->intent.open.file))
1638 acc_mode |= MAY_WRITE; 1606 release_open_intent(nd);
1607 path_put(&nd->path);
1608 return ERR_PTR(error);
1609}
1639 1610
1640 /* Allow the LSM permission hook to distinguish append 1611static struct file *do_last(struct nameidata *nd, struct path *path,
1641 access from general write access. */ 1612 int open_flag, int acc_mode,
1642 if (flag & O_APPEND) 1613 int mode, const char *pathname,
1643 acc_mode |= MAY_APPEND; 1614 int *want_dir)
1615{
1616 struct dentry *dir = nd->path.dentry;
1617 struct file *filp;
1618 int error = -EISDIR;
1644 1619
1645 /* 1620 switch (nd->last_type) {
1646 * The simplest case - just a plain lookup. 1621 case LAST_DOTDOT:
1647 */ 1622 follow_dotdot(nd);
1648 if (!(flag & O_CREAT)) { 1623 dir = nd->path.dentry;
1649 filp = get_empty_filp(); 1624 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
1650 1625 if (!dir->d_op->d_revalidate(dir, nd)) {
1651 if (filp == NULL) 1626 error = -ESTALE;
1652 return ERR_PTR(-ENFILE); 1627 goto exit;
1653 nd.intent.open.file = filp;
1654 filp->f_flags = open_flag;
1655 nd.intent.open.flags = flag;
1656 nd.intent.open.create_mode = 0;
1657 error = do_path_lookup(dfd, pathname,
1658 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1659 if (IS_ERR(nd.intent.open.file)) {
1660 if (error == 0) {
1661 error = PTR_ERR(nd.intent.open.file);
1662 path_put(&nd.path);
1663 } 1628 }
1664 } else if (error) 1629 }
1665 release_open_intent(&nd); 1630 /* fallthrough */
1666 if (error) 1631 case LAST_DOT:
1667 return ERR_PTR(error); 1632 case LAST_ROOT:
1633 if (open_flag & O_CREAT)
1634 goto exit;
1635 /* fallthrough */
1636 case LAST_BIND:
1637 audit_inode(pathname, dir);
1668 goto ok; 1638 goto ok;
1669 } 1639 }
1670 1640
1671 /* 1641 /* trailing slashes? */
1672 * Create - we need to know the parent. 1642 if (nd->last.name[nd->last.len]) {
1673 */ 1643 if (open_flag & O_CREAT)
1674reval: 1644 goto exit;
1675 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1645 *want_dir = 1;
1676 if (error)
1677 return ERR_PTR(error);
1678 if (force_reval)
1679 nd.flags |= LOOKUP_REVAL;
1680 error = path_walk(pathname, &nd);
1681 if (error) {
1682 if (nd.root.mnt)
1683 path_put(&nd.root);
1684 return ERR_PTR(error);
1685 } 1646 }
1686 if (unlikely(!audit_dummy_context()))
1687 audit_inode(pathname, nd.path.dentry);
1688 1647
1689 /* 1648 /* just plain open? */
1690 * We have the parent and last component. First of all, check 1649 if (!(open_flag & O_CREAT)) {
1691 * that we are not asked to creat(2) an obvious directory - that 1650 error = do_lookup(nd, &nd->last, path);
1692 * will not do. 1651 if (error)
1693 */ 1652 goto exit;
1694 error = -EISDIR; 1653 error = -ENOENT;
1695 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1654 if (!path->dentry->d_inode)
1696 goto exit_parent; 1655 goto exit_dput;
1656 if (path->dentry->d_inode->i_op->follow_link)
1657 return NULL;
1658 error = -ENOTDIR;
1659 if (*want_dir & !path->dentry->d_inode->i_op->lookup)
1660 goto exit_dput;
1661 path_to_nameidata(path, nd);
1662 audit_inode(pathname, nd->path.dentry);
1663 goto ok;
1664 }
1697 1665
1698 error = -ENFILE; 1666 /* OK, it's O_CREAT */
1699 filp = get_empty_filp();
1700 if (filp == NULL)
1701 goto exit_parent;
1702 nd.intent.open.file = filp;
1703 filp->f_flags = open_flag;
1704 nd.intent.open.flags = flag;
1705 nd.intent.open.create_mode = mode;
1706 dir = nd.path.dentry;
1707 nd.flags &= ~LOOKUP_PARENT;
1708 nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
1709 if (flag & O_EXCL)
1710 nd.flags |= LOOKUP_EXCL;
1711 mutex_lock(&dir->d_inode->i_mutex); 1667 mutex_lock(&dir->d_inode->i_mutex);
1712 path.dentry = lookup_hash(&nd);
1713 path.mnt = nd.path.mnt;
1714 1668
1715do_last: 1669 path->dentry = lookup_hash(nd);
1716 error = PTR_ERR(path.dentry); 1670 path->mnt = nd->path.mnt;
1717 if (IS_ERR(path.dentry)) { 1671
1672 error = PTR_ERR(path->dentry);
1673 if (IS_ERR(path->dentry)) {
1718 mutex_unlock(&dir->d_inode->i_mutex); 1674 mutex_unlock(&dir->d_inode->i_mutex);
1719 goto exit; 1675 goto exit;
1720 } 1676 }
1721 1677
1722 if (IS_ERR(nd.intent.open.file)) { 1678 if (IS_ERR(nd->intent.open.file)) {
1723 error = PTR_ERR(nd.intent.open.file); 1679 error = PTR_ERR(nd->intent.open.file);
1724 goto exit_mutex_unlock; 1680 goto exit_mutex_unlock;
1725 } 1681 }
1726 1682
1727 /* Negative dentry, just create the file */ 1683 /* Negative dentry, just create the file */
1728 if (!path.dentry->d_inode) { 1684 if (!path->dentry->d_inode) {
1729 /* 1685 /*
1730 * This write is needed to ensure that a 1686 * This write is needed to ensure that a
1731 * ro->rw transition does not occur between 1687 * ro->rw transition does not occur between
@@ -1733,18 +1689,16 @@ do_last:
1733 * a permanent write count is taken through 1689 * a permanent write count is taken through
1734 * the 'struct file' in nameidata_to_filp(). 1690 * the 'struct file' in nameidata_to_filp().
1735 */ 1691 */
1736 error = mnt_want_write(nd.path.mnt); 1692 error = mnt_want_write(nd->path.mnt);
1737 if (error) 1693 if (error)
1738 goto exit_mutex_unlock; 1694 goto exit_mutex_unlock;
1739 error = __open_namei_create(&nd, &path, flag, mode); 1695 error = __open_namei_create(nd, path, open_flag, mode);
1740 if (error) { 1696 if (error) {
1741 mnt_drop_write(nd.path.mnt); 1697 mnt_drop_write(nd->path.mnt);
1742 goto exit; 1698 goto exit;
1743 } 1699 }
1744 filp = nameidata_to_filp(&nd); 1700 filp = nameidata_to_filp(nd);
1745 mnt_drop_write(nd.path.mnt); 1701 mnt_drop_write(nd->path.mnt);
1746 if (nd.root.mnt)
1747 path_put(&nd.root);
1748 if (!IS_ERR(filp)) { 1702 if (!IS_ERR(filp)) {
1749 error = ima_file_check(filp, acc_mode); 1703 error = ima_file_check(filp, acc_mode);
1750 if (error) { 1704 if (error) {
@@ -1759,150 +1713,181 @@ do_last:
1759 * It already exists. 1713 * It already exists.
1760 */ 1714 */
1761 mutex_unlock(&dir->d_inode->i_mutex); 1715 mutex_unlock(&dir->d_inode->i_mutex);
1762 audit_inode(pathname, path.dentry); 1716 audit_inode(pathname, path->dentry);
1763 1717
1764 error = -EEXIST; 1718 error = -EEXIST;
1765 if (flag & O_EXCL) 1719 if (open_flag & O_EXCL)
1766 goto exit_dput; 1720 goto exit_dput;
1767 1721
1768 if (__follow_mount(&path)) { 1722 if (__follow_mount(path)) {
1769 error = -ELOOP; 1723 error = -ELOOP;
1770 if (flag & O_NOFOLLOW) 1724 if (open_flag & O_NOFOLLOW)
1771 goto exit_dput; 1725 goto exit_dput;
1772 } 1726 }
1773 1727
1774 error = -ENOENT; 1728 error = -ENOENT;
1775 if (!path.dentry->d_inode) 1729 if (!path->dentry->d_inode)
1776 goto exit_dput; 1730 goto exit_dput;
1777 if (path.dentry->d_inode->i_op->follow_link)
1778 goto do_link;
1779 1731
1780 path_to_nameidata(&path, &nd); 1732 if (path->dentry->d_inode->i_op->follow_link)
1733 return NULL;
1734
1735 path_to_nameidata(path, nd);
1781 error = -EISDIR; 1736 error = -EISDIR;
1782 if (S_ISDIR(path.dentry->d_inode->i_mode)) 1737 if (S_ISDIR(path->dentry->d_inode->i_mode))
1783 goto exit; 1738 goto exit;
1784ok: 1739ok:
1740 filp = finish_open(nd, open_flag, acc_mode);
1741 return filp;
1742
1743exit_mutex_unlock:
1744 mutex_unlock(&dir->d_inode->i_mutex);
1745exit_dput:
1746 path_put_conditional(path, nd);
1747exit:
1748 if (!IS_ERR(nd->intent.open.file))
1749 release_open_intent(nd);
1750 path_put(&nd->path);
1751 return ERR_PTR(error);
1752}
1753
1754/*
1755 * Note that the low bits of the passed in "open_flag"
1756 * are not the same as in the local variable "flag". See
1757 * open_to_namei_flags() for more details.
1758 */
1759struct file *do_filp_open(int dfd, const char *pathname,
1760 int open_flag, int mode, int acc_mode)
1761{
1762 struct file *filp;
1763 struct nameidata nd;
1764 int error;
1765 struct path path;
1766 int count = 0;
1767 int flag = open_to_namei_flags(open_flag);
1768 int force_reval = 0;
1769 int want_dir = open_flag & O_DIRECTORY;
1770
1771 if (!(open_flag & O_CREAT))
1772 mode = 0;
1773
1785 /* 1774 /*
1786 * Consider: 1775 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1787 * 1. may_open() truncates a file 1776 * check for O_DSYNC if the need any syncing at all we enforce it's
1788 * 2. a rw->ro mount transition occurs 1777 * always set instead of having to deal with possibly weird behaviour
1789 * 3. nameidata_to_filp() fails due to 1778 * for malicious applications setting only __O_SYNC.
1790 * the ro mount.
1791 * That would be inconsistent, and should
1792 * be avoided. Taking this mnt write here
1793 * ensures that (2) can not occur.
1794 */ 1779 */
1795 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode); 1780 if (open_flag & __O_SYNC)
1796 if (will_truncate) { 1781 open_flag |= O_DSYNC;
1797 error = mnt_want_write(nd.path.mnt); 1782
1798 if (error) 1783 if (!acc_mode)
1799 goto exit; 1784 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1800 } 1785
1801 error = may_open(&nd.path, acc_mode, flag); 1786 /* O_TRUNC implies we need access checks for write permissions */
1787 if (open_flag & O_TRUNC)
1788 acc_mode |= MAY_WRITE;
1789
1790 /* Allow the LSM permission hook to distinguish append
1791 access from general write access. */
1792 if (open_flag & O_APPEND)
1793 acc_mode |= MAY_APPEND;
1794
1795 /* find the parent */
1796reval:
1797 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1798 if (error)
1799 return ERR_PTR(error);
1800 if (force_reval)
1801 nd.flags |= LOOKUP_REVAL;
1802
1803 current->total_link_count = 0;
1804 error = link_path_walk(pathname, &nd);
1802 if (error) { 1805 if (error) {
1803 if (will_truncate) 1806 filp = ERR_PTR(error);
1804 mnt_drop_write(nd.path.mnt); 1807 goto out;
1805 goto exit;
1806 }
1807 filp = nameidata_to_filp(&nd);
1808 if (!IS_ERR(filp)) {
1809 error = ima_file_check(filp, acc_mode);
1810 if (error) {
1811 fput(filp);
1812 filp = ERR_PTR(error);
1813 }
1814 } 1808 }
1815 if (!IS_ERR(filp)) { 1809 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
1816 if (acc_mode & MAY_WRITE) 1810 audit_inode(pathname, nd.path.dentry);
1817 vfs_dq_init(nd.path.dentry->d_inode);
1818 1811
1819 if (will_truncate) {
1820 error = handle_truncate(&nd.path);
1821 if (error) {
1822 fput(filp);
1823 filp = ERR_PTR(error);
1824 }
1825 }
1826 }
1827 /* 1812 /*
1828 * It is now safe to drop the mnt write 1813 * We have the parent and last component.
1829 * because the filp has had a write taken
1830 * on its behalf.
1831 */ 1814 */
1832 if (will_truncate) 1815
1833 mnt_drop_write(nd.path.mnt); 1816 error = -ENFILE;
1817 filp = get_empty_filp();
1818 if (filp == NULL)
1819 goto exit_parent;
1820 nd.intent.open.file = filp;
1821 filp->f_flags = open_flag;
1822 nd.intent.open.flags = flag;
1823 nd.intent.open.create_mode = mode;
1824 nd.flags &= ~LOOKUP_PARENT;
1825 nd.flags |= LOOKUP_OPEN;
1826 if (open_flag & O_CREAT) {
1827 nd.flags |= LOOKUP_CREATE;
1828 if (open_flag & O_EXCL)
1829 nd.flags |= LOOKUP_EXCL;
1830 }
1831 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
1832 while (unlikely(!filp)) { /* trailing symlink */
1833 struct path holder;
1834 struct inode *inode = path.dentry->d_inode;
1835 void *cookie;
1836 error = -ELOOP;
1837 /* S_ISDIR part is a temporary automount kludge */
1838 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
1839 goto exit_dput;
1840 if (count++ == 32)
1841 goto exit_dput;
1842 /*
1843 * This is subtle. Instead of calling do_follow_link() we do
1844 * the thing by hands. The reason is that this way we have zero
1845 * link_count and path_walk() (called from ->follow_link)
1846 * honoring LOOKUP_PARENT. After that we have the parent and
1847 * last component, i.e. we are in the same situation as after
1848 * the first path_walk(). Well, almost - if the last component
1849 * is normal we get its copy stored in nd->last.name and we will
1850 * have to putname() it when we are done. Procfs-like symlinks
1851 * just set LAST_BIND.
1852 */
1853 nd.flags |= LOOKUP_PARENT;
1854 error = security_inode_follow_link(path.dentry, &nd);
1855 if (error)
1856 goto exit_dput;
1857 error = __do_follow_link(&path, &nd, &cookie);
1858 if (unlikely(error)) {
1859 /* nd.path had been dropped */
1860 if (!IS_ERR(cookie) && inode->i_op->put_link)
1861 inode->i_op->put_link(path.dentry, &nd, cookie);
1862 path_put(&path);
1863 release_open_intent(&nd);
1864 filp = ERR_PTR(error);
1865 goto out;
1866 }
1867 holder = path;
1868 nd.flags &= ~LOOKUP_PARENT;
1869 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
1870 if (inode->i_op->put_link)
1871 inode->i_op->put_link(holder.dentry, &nd, cookie);
1872 path_put(&holder);
1873 }
1874out:
1834 if (nd.root.mnt) 1875 if (nd.root.mnt)
1835 path_put(&nd.root); 1876 path_put(&nd.root);
1877 if (filp == ERR_PTR(-ESTALE) && !force_reval) {
1878 force_reval = 1;
1879 goto reval;
1880 }
1836 return filp; 1881 return filp;
1837 1882
1838exit_mutex_unlock:
1839 mutex_unlock(&dir->d_inode->i_mutex);
1840exit_dput: 1883exit_dput:
1841 path_put_conditional(&path, &nd); 1884 path_put_conditional(&path, &nd);
1842exit:
1843 if (!IS_ERR(nd.intent.open.file)) 1885 if (!IS_ERR(nd.intent.open.file))
1844 release_open_intent(&nd); 1886 release_open_intent(&nd);
1845exit_parent: 1887exit_parent:
1846 if (nd.root.mnt)
1847 path_put(&nd.root);
1848 path_put(&nd.path); 1888 path_put(&nd.path);
1849 return ERR_PTR(error); 1889 filp = ERR_PTR(error);
1850 1890 goto out;
1851do_link:
1852 error = -ELOOP;
1853 if (flag & O_NOFOLLOW)
1854 goto exit_dput;
1855 /*
1856 * This is subtle. Instead of calling do_follow_link() we do the
1857 * thing by hands. The reason is that this way we have zero link_count
1858 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1859 * After that we have the parent and last component, i.e.
1860 * we are in the same situation as after the first path_walk().
1861 * Well, almost - if the last component is normal we get its copy
1862 * stored in nd->last.name and we will have to putname() it when we
1863 * are done. Procfs-like symlinks just set LAST_BIND.
1864 */
1865 nd.flags |= LOOKUP_PARENT;
1866 error = security_inode_follow_link(path.dentry, &nd);
1867 if (error)
1868 goto exit_dput;
1869 error = __do_follow_link(&path, &nd);
1870 path_put(&path);
1871 if (error) {
1872 /* Does someone understand code flow here? Or it is only
1873 * me so stupid? Anathema to whoever designed this non-sense
1874 * with "intent.open".
1875 */
1876 release_open_intent(&nd);
1877 if (nd.root.mnt)
1878 path_put(&nd.root);
1879 if (error == -ESTALE && !force_reval) {
1880 force_reval = 1;
1881 goto reval;
1882 }
1883 return ERR_PTR(error);
1884 }
1885 nd.flags &= ~LOOKUP_PARENT;
1886 if (nd.last_type == LAST_BIND)
1887 goto ok;
1888 error = -EISDIR;
1889 if (nd.last_type != LAST_NORM)
1890 goto exit;
1891 if (nd.last.name[nd.last.len]) {
1892 __putname(nd.last.name);
1893 goto exit;
1894 }
1895 error = -ELOOP;
1896 if (count++==32) {
1897 __putname(nd.last.name);
1898 goto exit;
1899 }
1900 dir = nd.path.dentry;
1901 mutex_lock(&dir->d_inode->i_mutex);
1902 path.dentry = lookup_hash(&nd);
1903 path.mnt = nd.path.mnt;
1904 __putname(nd.last.name);
1905 goto do_last;
1906} 1891}
1907 1892
1908/** 1893/**
@@ -1996,7 +1981,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1996 if (error) 1981 if (error)
1997 return error; 1982 return error;
1998 1983
1999 vfs_dq_init(dir);
2000 error = dir->i_op->mknod(dir, dentry, mode, dev); 1984 error = dir->i_op->mknod(dir, dentry, mode, dev);
2001 if (!error) 1985 if (!error)
2002 fsnotify_create(dir, dentry); 1986 fsnotify_create(dir, dentry);
@@ -2095,7 +2079,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2095 if (error) 2079 if (error)
2096 return error; 2080 return error;
2097 2081
2098 vfs_dq_init(dir);
2099 error = dir->i_op->mkdir(dir, dentry, mode); 2082 error = dir->i_op->mkdir(dir, dentry, mode);
2100 if (!error) 2083 if (!error)
2101 fsnotify_mkdir(dir, dentry); 2084 fsnotify_mkdir(dir, dentry);
@@ -2181,8 +2164,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2181 if (!dir->i_op->rmdir) 2164 if (!dir->i_op->rmdir)
2182 return -EPERM; 2165 return -EPERM;
2183 2166
2184 vfs_dq_init(dir);
2185
2186 mutex_lock(&dentry->d_inode->i_mutex); 2167 mutex_lock(&dentry->d_inode->i_mutex);
2187 dentry_unhash(dentry); 2168 dentry_unhash(dentry);
2188 if (d_mountpoint(dentry)) 2169 if (d_mountpoint(dentry))
@@ -2268,15 +2249,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2268 if (!dir->i_op->unlink) 2249 if (!dir->i_op->unlink)
2269 return -EPERM; 2250 return -EPERM;
2270 2251
2271 vfs_dq_init(dir);
2272
2273 mutex_lock(&dentry->d_inode->i_mutex); 2252 mutex_lock(&dentry->d_inode->i_mutex);
2274 if (d_mountpoint(dentry)) 2253 if (d_mountpoint(dentry))
2275 error = -EBUSY; 2254 error = -EBUSY;
2276 else { 2255 else {
2277 error = security_inode_unlink(dir, dentry); 2256 error = security_inode_unlink(dir, dentry);
2278 if (!error) 2257 if (!error) {
2279 error = dir->i_op->unlink(dir, dentry); 2258 error = dir->i_op->unlink(dir, dentry);
2259 if (!error)
2260 dentry->d_inode->i_flags |= S_DEAD;
2261 }
2280 } 2262 }
2281 mutex_unlock(&dentry->d_inode->i_mutex); 2263 mutex_unlock(&dentry->d_inode->i_mutex);
2282 2264
@@ -2379,7 +2361,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2379 if (error) 2361 if (error)
2380 return error; 2362 return error;
2381 2363
2382 vfs_dq_init(dir);
2383 error = dir->i_op->symlink(dir, dentry, oldname); 2364 error = dir->i_op->symlink(dir, dentry, oldname);
2384 if (!error) 2365 if (!error)
2385 fsnotify_create(dir, dentry); 2366 fsnotify_create(dir, dentry);
@@ -2463,7 +2444,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2463 return error; 2444 return error;
2464 2445
2465 mutex_lock(&inode->i_mutex); 2446 mutex_lock(&inode->i_mutex);
2466 vfs_dq_init(dir);
2467 error = dir->i_op->link(old_dentry, dir, new_dentry); 2447 error = dir->i_op->link(old_dentry, dir, new_dentry);
2468 mutex_unlock(&inode->i_mutex); 2448 mutex_unlock(&inode->i_mutex);
2469 if (!error) 2449 if (!error)
@@ -2629,6 +2609,8 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2629 else 2609 else
2630 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2610 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2631 if (!error) { 2611 if (!error) {
2612 if (target)
2613 target->i_flags |= S_DEAD;
2632 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2614 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2633 d_move(old_dentry, new_dentry); 2615 d_move(old_dentry, new_dentry);
2634 } 2616 }
@@ -2662,20 +2644,15 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2662 if (!old_dir->i_op->rename) 2644 if (!old_dir->i_op->rename)
2663 return -EPERM; 2645 return -EPERM;
2664 2646
2665 vfs_dq_init(old_dir);
2666 vfs_dq_init(new_dir);
2667
2668 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2647 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
2669 2648
2670 if (is_dir) 2649 if (is_dir)
2671 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2650 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2672 else 2651 else
2673 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2652 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2674 if (!error) { 2653 if (!error)
2675 const char *new_name = old_dentry->d_name.name; 2654 fsnotify_move(old_dir, new_dir, old_name, is_dir,
2676 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
2677 new_dentry->d_inode, old_dentry); 2655 new_dentry->d_inode, old_dentry);
2678 }
2679 fsnotify_oldname_free(old_name); 2656 fsnotify_oldname_free(old_name);
2680 2657
2681 return error; 2658 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index c768f733c8d6..8174c8ab5c70 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -573,7 +573,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
573 mnt->mnt_master = old; 573 mnt->mnt_master = old;
574 CLEAR_MNT_SHARED(mnt); 574 CLEAR_MNT_SHARED(mnt);
575 } else if (!(flag & CL_PRIVATE)) { 575 } else if (!(flag & CL_PRIVATE)) {
576 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old)) 576 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
577 list_add(&mnt->mnt_share, &old->mnt_share); 577 list_add(&mnt->mnt_share, &old->mnt_share);
578 if (IS_MNT_SLAVE(old)) 578 if (IS_MNT_SLAVE(old))
579 list_add(&mnt->mnt_slave, &old->mnt_slave); 579 list_add(&mnt->mnt_slave, &old->mnt_slave);
@@ -737,6 +737,21 @@ static void m_stop(struct seq_file *m, void *v)
737 up_read(&namespace_sem); 737 up_read(&namespace_sem);
738} 738}
739 739
740int mnt_had_events(struct proc_mounts *p)
741{
742 struct mnt_namespace *ns = p->ns;
743 int res = 0;
744
745 spin_lock(&vfsmount_lock);
746 if (p->event != ns->event) {
747 p->event = ns->event;
748 res = 1;
749 }
750 spin_unlock(&vfsmount_lock);
751
752 return res;
753}
754
740struct proc_fs_info { 755struct proc_fs_info {
741 int flag; 756 int flag;
742 const char *str; 757 const char *str;
@@ -1121,8 +1136,15 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1121{ 1136{
1122 struct path path; 1137 struct path path;
1123 int retval; 1138 int retval;
1139 int lookup_flags = 0;
1124 1140
1125 retval = user_path(name, &path); 1141 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1142 return -EINVAL;
1143
1144 if (!(flags & UMOUNT_NOFOLLOW))
1145 lookup_flags |= LOOKUP_FOLLOW;
1146
1147 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1126 if (retval) 1148 if (retval)
1127 goto out; 1149 goto out;
1128 retval = -EINVAL; 1150 retval = -EINVAL;
@@ -1246,6 +1268,21 @@ void drop_collected_mounts(struct vfsmount *mnt)
1246 release_mounts(&umount_list); 1268 release_mounts(&umount_list);
1247} 1269}
1248 1270
1271int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1272 struct vfsmount *root)
1273{
1274 struct vfsmount *mnt;
1275 int res = f(root, arg);
1276 if (res)
1277 return res;
1278 list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
1279 res = f(mnt, arg);
1280 if (res)
1281 return res;
1282 }
1283 return 0;
1284}
1285
1249static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end) 1286static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1250{ 1287{
1251 struct vfsmount *p; 1288 struct vfsmount *p;
@@ -1538,7 +1575,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1538 err = do_remount_sb(sb, flags, data, 0); 1575 err = do_remount_sb(sb, flags, data, 0);
1539 if (!err) { 1576 if (!err) {
1540 spin_lock(&vfsmount_lock); 1577 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK; 1578 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
1542 path->mnt->mnt_flags = mnt_flags; 1579 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock); 1580 spin_unlock(&vfsmount_lock);
1544 } 1581 }
@@ -1671,7 +1708,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1671{ 1708{
1672 int err; 1709 int err;
1673 1710
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD); 1711 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1675 1712
1676 down_write(&namespace_sem); 1713 down_write(&namespace_sem);
1677 /* Something was mounted here while we slept */ 1714 /* Something was mounted here while we slept */
@@ -2314,17 +2351,13 @@ void __init mnt_init(void)
2314 2351
2315void put_mnt_ns(struct mnt_namespace *ns) 2352void put_mnt_ns(struct mnt_namespace *ns)
2316{ 2353{
2317 struct vfsmount *root;
2318 LIST_HEAD(umount_list); 2354 LIST_HEAD(umount_list);
2319 2355
2320 if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) 2356 if (!atomic_dec_and_test(&ns->count))
2321 return; 2357 return;
2322 root = ns->root;
2323 ns->root = NULL;
2324 spin_unlock(&vfsmount_lock);
2325 down_write(&namespace_sem); 2358 down_write(&namespace_sem);
2326 spin_lock(&vfsmount_lock); 2359 spin_lock(&vfsmount_lock);
2327 umount_tree(root, 0, &umount_list); 2360 umount_tree(ns->root, 0, &umount_list);
2328 spin_unlock(&vfsmount_lock); 2361 spin_unlock(&vfsmount_lock);
2329 up_write(&namespace_sem); 2362 up_write(&namespace_sem);
2330 release_mounts(&umount_list); 2363 release_mounts(&umount_list);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d4036be0b589..85a7cfd1b8dd 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -119,6 +119,14 @@ struct cb_recallanyargs {
119}; 119};
120 120
121extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); 121extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
122
123struct cb_recallslotargs {
124 struct sockaddr *crsa_addr;
125 uint32_t crsa_target_max_slots;
126};
127extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
128 void *dummy);
129
122#endif /* CONFIG_NFS_V4_1 */ 130#endif /* CONFIG_NFS_V4_1 */
123 131
124extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 132extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index defa9b4c470e..84761b5bb8e2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -143,44 +143,49 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
143 * Return success if the sequenceID is one more than what we last saw on 143 * Return success if the sequenceID is one more than what we last saw on
144 * this slot, accounting for wraparound. Increments the slot's sequence. 144 * this slot, accounting for wraparound. Increments the slot's sequence.
145 * 145 *
146 * We don't yet implement a duplicate request cache, so at this time 146 * We don't yet implement a duplicate request cache, instead we set the
147 * we will log replays, and process them as if we had not seen them before, 147 * back channel ca_maxresponsesize_cached to zero. This is OK for now
148 * but we don't bump the sequence in the slot. Not too worried about it,
149 * since we only currently implement idempotent callbacks anyway. 148 * since we only currently implement idempotent callbacks anyway.
150 * 149 *
151 * We have a single slot backchannel at this time, so we don't bother 150 * We have a single slot backchannel at this time, so we don't bother
152 * checking the used_slots bit array on the table. The lower layer guarantees 151 * checking the used_slots bit array on the table. The lower layer guarantees
153 * a single outstanding callback request at a time. 152 * a single outstanding callback request at a time.
154 */ 153 */
155static int 154static __be32
156validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) 155validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
157{ 156{
158 struct nfs4_slot *slot; 157 struct nfs4_slot *slot;
159 158
160 dprintk("%s enter. slotid %d seqid %d\n", 159 dprintk("%s enter. slotid %d seqid %d\n",
161 __func__, slotid, seqid); 160 __func__, args->csa_slotid, args->csa_sequenceid);
162 161
163 if (slotid > NFS41_BC_MAX_CALLBACKS) 162 if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS)
164 return htonl(NFS4ERR_BADSLOT); 163 return htonl(NFS4ERR_BADSLOT);
165 164
166 slot = tbl->slots + slotid; 165 slot = tbl->slots + args->csa_slotid;
167 dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); 166 dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr);
168 167
169 /* Normal */ 168 /* Normal */
170 if (likely(seqid == slot->seq_nr + 1)) { 169 if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
171 slot->seq_nr++; 170 slot->seq_nr++;
172 return htonl(NFS4_OK); 171 return htonl(NFS4_OK);
173 } 172 }
174 173
175 /* Replay */ 174 /* Replay */
176 if (seqid == slot->seq_nr) { 175 if (args->csa_sequenceid == slot->seq_nr) {
177 dprintk("%s seqid %d is a replay - no DRC available\n", 176 dprintk("%s seqid %d is a replay\n",
178 __func__, seqid); 177 __func__, args->csa_sequenceid);
179 return htonl(NFS4_OK); 178 /* Signal process_op to set this error on next op */
179 if (args->csa_cachethis == 0)
180 return htonl(NFS4ERR_RETRY_UNCACHED_REP);
181
182 /* The ca_maxresponsesize_cached is 0 with no DRC */
183 else if (args->csa_cachethis == 1)
184 return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
180 } 185 }
181 186
182 /* Wraparound */ 187 /* Wraparound */
183 if (seqid == 1 && (slot->seq_nr + 1) == 0) { 188 if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
184 slot->seq_nr = 1; 189 slot->seq_nr = 1;
185 return htonl(NFS4_OK); 190 return htonl(NFS4_OK);
186 } 191 }
@@ -225,27 +230,87 @@ validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid)
225 return NULL; 230 return NULL;
226} 231}
227 232
228/* FIXME: referring calls should be processed */ 233/*
229unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, 234 * For each referring call triple, check the session's slot table for
235 * a match. If the slot is in use and the sequence numbers match, the
236 * client is still waiting for a response to the original request.
237 */
238static bool referring_call_exists(struct nfs_client *clp,
239 uint32_t nrclists,
240 struct referring_call_list *rclists)
241{
242 bool status = 0;
243 int i, j;
244 struct nfs4_session *session;
245 struct nfs4_slot_table *tbl;
246 struct referring_call_list *rclist;
247 struct referring_call *ref;
248
249 /*
250 * XXX When client trunking is implemented, this becomes
251 * a session lookup from within the loop
252 */
253 session = clp->cl_session;
254 tbl = &session->fc_slot_table;
255
256 for (i = 0; i < nrclists; i++) {
257 rclist = &rclists[i];
258 if (memcmp(session->sess_id.data,
259 rclist->rcl_sessionid.data,
260 NFS4_MAX_SESSIONID_LEN) != 0)
261 continue;
262
263 for (j = 0; j < rclist->rcl_nrefcalls; j++) {
264 ref = &rclist->rcl_refcalls[j];
265
266 dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
267 "slotid %u\n", __func__,
268 ((u32 *)&rclist->rcl_sessionid.data)[0],
269 ((u32 *)&rclist->rcl_sessionid.data)[1],
270 ((u32 *)&rclist->rcl_sessionid.data)[2],
271 ((u32 *)&rclist->rcl_sessionid.data)[3],
272 ref->rc_sequenceid, ref->rc_slotid);
273
274 spin_lock(&tbl->slot_tbl_lock);
275 status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
276 tbl->slots[ref->rc_slotid].seq_nr ==
277 ref->rc_sequenceid);
278 spin_unlock(&tbl->slot_tbl_lock);
279 if (status)
280 goto out;
281 }
282 }
283
284out:
285 return status;
286}
287
288__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
230 struct cb_sequenceres *res) 289 struct cb_sequenceres *res)
231{ 290{
232 struct nfs_client *clp; 291 struct nfs_client *clp;
233 int i, status; 292 int i;
234 293 __be32 status;
235 for (i = 0; i < args->csa_nrclists; i++)
236 kfree(args->csa_rclists[i].rcl_refcalls);
237 kfree(args->csa_rclists);
238 294
239 status = htonl(NFS4ERR_BADSESSION); 295 status = htonl(NFS4ERR_BADSESSION);
240 clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); 296 clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
241 if (clp == NULL) 297 if (clp == NULL)
242 goto out; 298 goto out;
243 299
244 status = validate_seqid(&clp->cl_session->bc_slot_table, 300 status = validate_seqid(&clp->cl_session->bc_slot_table, args);
245 args->csa_slotid, args->csa_sequenceid);
246 if (status) 301 if (status)
247 goto out_putclient; 302 goto out_putclient;
248 303
304 /*
305 * Check for pending referring calls. If a match is found, a
306 * related callback was received before the response to the original
307 * call.
308 */
309 if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
310 status = htonl(NFS4ERR_DELAY);
311 goto out_putclient;
312 }
313
249 memcpy(&res->csr_sessionid, &args->csa_sessionid, 314 memcpy(&res->csr_sessionid, &args->csa_sessionid,
250 sizeof(res->csr_sessionid)); 315 sizeof(res->csr_sessionid));
251 res->csr_sequenceid = args->csa_sequenceid; 316 res->csr_sequenceid = args->csa_sequenceid;
@@ -256,15 +321,23 @@ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
256out_putclient: 321out_putclient:
257 nfs_put_client(clp); 322 nfs_put_client(clp);
258out: 323out:
259 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 324 for (i = 0; i < args->csa_nrclists; i++)
260 res->csr_status = status; 325 kfree(args->csa_rclists[i].rcl_refcalls);
261 return res->csr_status; 326 kfree(args->csa_rclists);
327
328 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
329 res->csr_status = 0;
330 else
331 res->csr_status = status;
332 dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
333 ntohl(status), ntohl(res->csr_status));
334 return status;
262} 335}
263 336
264unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) 337__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
265{ 338{
266 struct nfs_client *clp; 339 struct nfs_client *clp;
267 int status; 340 __be32 status;
268 fmode_t flags = 0; 341 fmode_t flags = 0;
269 342
270 status = htonl(NFS4ERR_OP_NOT_IN_SESSION); 343 status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
@@ -289,4 +362,40 @@ out:
289 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 362 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
290 return status; 363 return status;
291} 364}
365
366/* Reduce the fore channel's max_slots to the target value */
367__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
368{
369 struct nfs_client *clp;
370 struct nfs4_slot_table *fc_tbl;
371 __be32 status;
372
373 status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
374 clp = nfs_find_client(args->crsa_addr, 4);
375 if (clp == NULL)
376 goto out;
377
378 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
379 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
380 args->crsa_target_max_slots);
381
382 fc_tbl = &clp->cl_session->fc_slot_table;
383
384 status = htonl(NFS4ERR_BAD_HIGH_SLOT);
385 if (args->crsa_target_max_slots > fc_tbl->max_slots ||
386 args->crsa_target_max_slots < 1)
387 goto out_putclient;
388
389 status = htonl(NFS4_OK);
390 if (args->crsa_target_max_slots == fc_tbl->max_slots)
391 goto out_putclient;
392
393 fc_tbl->target_max_slots = args->crsa_target_max_slots;
394 nfs41_handle_recall_slot(clp);
395out_putclient:
396 nfs_put_client(clp); /* balance nfs_find_client */
397out:
398 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
399 return status;
400}
292#endif /* CONFIG_NFS_V4_1 */ 401#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 8e1a2511c8be..db30c0b398b5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -24,10 +24,14 @@
24#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 24#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
25 4 + 1 + 3) 25 4 + 1 + 3)
26#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 26#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
27#define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
27#endif /* CONFIG_NFS_V4_1 */ 28#endif /* CONFIG_NFS_V4_1 */
28 29
29#define NFSDBG_FACILITY NFSDBG_CALLBACK 30#define NFSDBG_FACILITY NFSDBG_CALLBACK
30 31
32/* Internal error code */
33#define NFS4ERR_RESOURCE_HDR 11050
34
31typedef __be32 (*callback_process_op_t)(void *, void *); 35typedef __be32 (*callback_process_op_t)(void *, void *);
32typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); 36typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
33typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); 37typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
@@ -173,7 +177,7 @@ static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
173 __be32 *p; 177 __be32 *p;
174 p = read_buf(xdr, 4); 178 p = read_buf(xdr, 4);
175 if (unlikely(p == NULL)) 179 if (unlikely(p == NULL))
176 return htonl(NFS4ERR_RESOURCE); 180 return htonl(NFS4ERR_RESOURCE_HDR);
177 *op = ntohl(*p); 181 *op = ntohl(*p);
178 return 0; 182 return 0;
179} 183}
@@ -215,10 +219,10 @@ out:
215 219
216#if defined(CONFIG_NFS_V4_1) 220#if defined(CONFIG_NFS_V4_1)
217 221
218static unsigned decode_sessionid(struct xdr_stream *xdr, 222static __be32 decode_sessionid(struct xdr_stream *xdr,
219 struct nfs4_sessionid *sid) 223 struct nfs4_sessionid *sid)
220{ 224{
221 uint32_t *p; 225 __be32 *p;
222 int len = NFS4_MAX_SESSIONID_LEN; 226 int len = NFS4_MAX_SESSIONID_LEN;
223 227
224 p = read_buf(xdr, len); 228 p = read_buf(xdr, len);
@@ -229,12 +233,12 @@ static unsigned decode_sessionid(struct xdr_stream *xdr,
229 return 0; 233 return 0;
230} 234}
231 235
232static unsigned decode_rc_list(struct xdr_stream *xdr, 236static __be32 decode_rc_list(struct xdr_stream *xdr,
233 struct referring_call_list *rc_list) 237 struct referring_call_list *rc_list)
234{ 238{
235 uint32_t *p; 239 __be32 *p;
236 int i; 240 int i;
237 unsigned status; 241 __be32 status;
238 242
239 status = decode_sessionid(xdr, &rc_list->rcl_sessionid); 243 status = decode_sessionid(xdr, &rc_list->rcl_sessionid);
240 if (status) 244 if (status)
@@ -267,13 +271,13 @@ out:
267 return status; 271 return status;
268} 272}
269 273
270static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp, 274static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
271 struct xdr_stream *xdr, 275 struct xdr_stream *xdr,
272 struct cb_sequenceargs *args) 276 struct cb_sequenceargs *args)
273{ 277{
274 uint32_t *p; 278 __be32 *p;
275 int i; 279 int i;
276 unsigned status; 280 __be32 status;
277 281
278 status = decode_sessionid(xdr, &args->csa_sessionid); 282 status = decode_sessionid(xdr, &args->csa_sessionid);
279 if (status) 283 if (status)
@@ -327,11 +331,11 @@ out_free:
327 goto out; 331 goto out;
328} 332}
329 333
330static unsigned decode_recallany_args(struct svc_rqst *rqstp, 334static __be32 decode_recallany_args(struct svc_rqst *rqstp,
331 struct xdr_stream *xdr, 335 struct xdr_stream *xdr,
332 struct cb_recallanyargs *args) 336 struct cb_recallanyargs *args)
333{ 337{
334 uint32_t *p; 338 __be32 *p;
335 339
336 args->craa_addr = svc_addr(rqstp); 340 args->craa_addr = svc_addr(rqstp);
337 p = read_buf(xdr, 4); 341 p = read_buf(xdr, 4);
@@ -346,6 +350,20 @@ static unsigned decode_recallany_args(struct svc_rqst *rqstp,
346 return 0; 350 return 0;
347} 351}
348 352
353static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
354 struct xdr_stream *xdr,
355 struct cb_recallslotargs *args)
356{
357 __be32 *p;
358
359 args->crsa_addr = svc_addr(rqstp);
360 p = read_buf(xdr, 4);
361 if (unlikely(p == NULL))
362 return htonl(NFS4ERR_BADXDR);
363 args->crsa_target_max_slots = ntohl(*p++);
364 return 0;
365}
366
349#endif /* CONFIG_NFS_V4_1 */ 367#endif /* CONFIG_NFS_V4_1 */
350 368
351static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 369static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
@@ -465,7 +483,7 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
465 483
466 p = xdr_reserve_space(xdr, 8); 484 p = xdr_reserve_space(xdr, 8);
467 if (unlikely(p == NULL)) 485 if (unlikely(p == NULL))
468 return htonl(NFS4ERR_RESOURCE); 486 return htonl(NFS4ERR_RESOURCE_HDR);
469 *p++ = htonl(op); 487 *p++ = htonl(op);
470 *p = res; 488 *p = res;
471 return 0; 489 return 0;
@@ -499,10 +517,10 @@ out:
499 517
500#if defined(CONFIG_NFS_V4_1) 518#if defined(CONFIG_NFS_V4_1)
501 519
502static unsigned encode_sessionid(struct xdr_stream *xdr, 520static __be32 encode_sessionid(struct xdr_stream *xdr,
503 const struct nfs4_sessionid *sid) 521 const struct nfs4_sessionid *sid)
504{ 522{
505 uint32_t *p; 523 __be32 *p;
506 int len = NFS4_MAX_SESSIONID_LEN; 524 int len = NFS4_MAX_SESSIONID_LEN;
507 525
508 p = xdr_reserve_space(xdr, len); 526 p = xdr_reserve_space(xdr, len);
@@ -513,11 +531,11 @@ static unsigned encode_sessionid(struct xdr_stream *xdr,
513 return 0; 531 return 0;
514} 532}
515 533
516static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp, 534static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
517 struct xdr_stream *xdr, 535 struct xdr_stream *xdr,
518 const struct cb_sequenceres *res) 536 const struct cb_sequenceres *res)
519{ 537{
520 uint32_t *p; 538 __be32 *p;
521 unsigned status = res->csr_status; 539 unsigned status = res->csr_status;
522 540
523 if (unlikely(status != 0)) 541 if (unlikely(status != 0))
@@ -554,6 +572,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
554 case OP_CB_RECALL: 572 case OP_CB_RECALL:
555 case OP_CB_SEQUENCE: 573 case OP_CB_SEQUENCE:
556 case OP_CB_RECALL_ANY: 574 case OP_CB_RECALL_ANY:
575 case OP_CB_RECALL_SLOT:
557 *op = &callback_ops[op_nr]; 576 *op = &callback_ops[op_nr];
558 break; 577 break;
559 578
@@ -562,7 +581,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
562 case OP_CB_NOTIFY: 581 case OP_CB_NOTIFY:
563 case OP_CB_PUSH_DELEG: 582 case OP_CB_PUSH_DELEG:
564 case OP_CB_RECALLABLE_OBJ_AVAIL: 583 case OP_CB_RECALLABLE_OBJ_AVAIL:
565 case OP_CB_RECALL_SLOT:
566 case OP_CB_WANTS_CANCELLED: 584 case OP_CB_WANTS_CANCELLED:
567 case OP_CB_NOTIFY_LOCK: 585 case OP_CB_NOTIFY_LOCK:
568 return htonl(NFS4ERR_NOTSUPP); 586 return htonl(NFS4ERR_NOTSUPP);
@@ -602,20 +620,18 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
602static __be32 process_op(uint32_t minorversion, int nop, 620static __be32 process_op(uint32_t minorversion, int nop,
603 struct svc_rqst *rqstp, 621 struct svc_rqst *rqstp,
604 struct xdr_stream *xdr_in, void *argp, 622 struct xdr_stream *xdr_in, void *argp,
605 struct xdr_stream *xdr_out, void *resp) 623 struct xdr_stream *xdr_out, void *resp, int* drc_status)
606{ 624{
607 struct callback_op *op = &callback_ops[0]; 625 struct callback_op *op = &callback_ops[0];
608 unsigned int op_nr = OP_CB_ILLEGAL; 626 unsigned int op_nr;
609 __be32 status; 627 __be32 status;
610 long maxlen; 628 long maxlen;
611 __be32 res; 629 __be32 res;
612 630
613 dprintk("%s: start\n", __func__); 631 dprintk("%s: start\n", __func__);
614 status = decode_op_hdr(xdr_in, &op_nr); 632 status = decode_op_hdr(xdr_in, &op_nr);
615 if (unlikely(status)) { 633 if (unlikely(status))
616 status = htonl(NFS4ERR_OP_ILLEGAL); 634 return status;
617 goto out;
618 }
619 635
620 dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", 636 dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
621 __func__, minorversion, nop, op_nr); 637 __func__, minorversion, nop, op_nr);
@@ -624,19 +640,32 @@ static __be32 process_op(uint32_t minorversion, int nop,
624 preprocess_nfs4_op(op_nr, &op); 640 preprocess_nfs4_op(op_nr, &op);
625 if (status == htonl(NFS4ERR_OP_ILLEGAL)) 641 if (status == htonl(NFS4ERR_OP_ILLEGAL))
626 op_nr = OP_CB_ILLEGAL; 642 op_nr = OP_CB_ILLEGAL;
627out: 643 if (status)
644 goto encode_hdr;
645
646 if (*drc_status) {
647 status = *drc_status;
648 goto encode_hdr;
649 }
650
628 maxlen = xdr_out->end - xdr_out->p; 651 maxlen = xdr_out->end - xdr_out->p;
629 if (maxlen > 0 && maxlen < PAGE_SIZE) { 652 if (maxlen > 0 && maxlen < PAGE_SIZE) {
630 if (likely(status == 0 && op->decode_args != NULL)) 653 status = op->decode_args(rqstp, xdr_in, argp);
631 status = op->decode_args(rqstp, xdr_in, argp); 654 if (likely(status == 0))
632 if (likely(status == 0 && op->process_op != NULL))
633 status = op->process_op(argp, resp); 655 status = op->process_op(argp, resp);
634 } else 656 } else
635 status = htonl(NFS4ERR_RESOURCE); 657 status = htonl(NFS4ERR_RESOURCE);
636 658
659 /* Only set by OP_CB_SEQUENCE processing */
660 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
661 *drc_status = status;
662 status = 0;
663 }
664
665encode_hdr:
637 res = encode_op_hdr(xdr_out, op_nr, status); 666 res = encode_op_hdr(xdr_out, op_nr, status);
638 if (status == 0) 667 if (unlikely(res))
639 status = res; 668 return res;
640 if (op->encode_res != NULL && status == 0) 669 if (op->encode_res != NULL && status == 0)
641 status = op->encode_res(rqstp, xdr_out, resp); 670 status = op->encode_res(rqstp, xdr_out, resp);
642 dprintk("%s: done, status = %d\n", __func__, ntohl(status)); 671 dprintk("%s: done, status = %d\n", __func__, ntohl(status));
@@ -652,7 +681,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
652 struct cb_compound_hdr_res hdr_res = { NULL }; 681 struct cb_compound_hdr_res hdr_res = { NULL };
653 struct xdr_stream xdr_in, xdr_out; 682 struct xdr_stream xdr_in, xdr_out;
654 __be32 *p; 683 __be32 *p;
655 __be32 status; 684 __be32 status, drc_status = 0;
656 unsigned int nops = 0; 685 unsigned int nops = 0;
657 686
658 dprintk("%s: start\n", __func__); 687 dprintk("%s: start\n", __func__);
@@ -672,11 +701,18 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
672 return rpc_system_err; 701 return rpc_system_err;
673 702
674 while (status == 0 && nops != hdr_arg.nops) { 703 while (status == 0 && nops != hdr_arg.nops) {
675 status = process_op(hdr_arg.minorversion, nops, 704 status = process_op(hdr_arg.minorversion, nops, rqstp,
676 rqstp, &xdr_in, argp, &xdr_out, resp); 705 &xdr_in, argp, &xdr_out, resp, &drc_status);
677 nops++; 706 nops++;
678 } 707 }
679 708
709 /* Buffer overflow in decode_ops_hdr or encode_ops_hdr. Return
710 * resource error in cb_compound status without returning op */
711 if (unlikely(status == htonl(NFS4ERR_RESOURCE_HDR))) {
712 status = htonl(NFS4ERR_RESOURCE);
713 nops--;
714 }
715
680 *hdr_res.status = status; 716 *hdr_res.status = status;
681 *hdr_res.nops = htonl(nops); 717 *hdr_res.nops = htonl(nops);
682 dprintk("%s: done, status = %u\n", __func__, ntohl(status)); 718 dprintk("%s: done, status = %u\n", __func__, ntohl(status));
@@ -713,6 +749,11 @@ static struct callback_op callback_ops[] = {
713 .decode_args = (callback_decode_arg_t)decode_recallany_args, 749 .decode_args = (callback_decode_arg_t)decode_recallany_args,
714 .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ, 750 .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
715 }, 751 },
752 [OP_CB_RECALL_SLOT] = {
753 .process_op = (callback_process_op_t)nfs4_callback_recallslot,
754 .decode_args = (callback_decode_arg_t)decode_recallslot_args,
755 .res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
756 },
716#endif /* CONFIG_NFS_V4_1 */ 757#endif /* CONFIG_NFS_V4_1 */
717}; 758};
718 759
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713ce68b..2274f1737336 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -164,30 +164,7 @@ error_0:
164 return ERR_PTR(err); 164 return ERR_PTR(err);
165} 165}
166 166
167static void nfs4_shutdown_client(struct nfs_client *clp)
168{
169#ifdef CONFIG_NFS_V4
170 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
171 nfs4_kill_renewd(clp);
172 BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
173 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
174 nfs_idmap_delete(clp);
175
176 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
177#endif
178}
179
180/*
181 * Destroy the NFS4 callback service
182 */
183static void nfs4_destroy_callback(struct nfs_client *clp)
184{
185#ifdef CONFIG_NFS_V4 167#ifdef CONFIG_NFS_V4
186 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
187 nfs_callback_down(clp->cl_minorversion);
188#endif /* CONFIG_NFS_V4 */
189}
190
191/* 168/*
192 * Clears/puts all minor version specific parts from an nfs_client struct 169 * Clears/puts all minor version specific parts from an nfs_client struct
193 * reverting it to minorversion 0. 170 * reverting it to minorversion 0.
@@ -202,9 +179,33 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
202 179
203 clp->cl_call_sync = _nfs4_call_sync; 180 clp->cl_call_sync = _nfs4_call_sync;
204#endif /* CONFIG_NFS_V4_1 */ 181#endif /* CONFIG_NFS_V4_1 */
182}
205 183
184/*
185 * Destroy the NFS4 callback service
186 */
187static void nfs4_destroy_callback(struct nfs_client *clp)
188{
189 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
190 nfs_callback_down(clp->cl_minorversion);
191}
192
193static void nfs4_shutdown_client(struct nfs_client *clp)
194{
195 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
196 nfs4_kill_renewd(clp);
197 nfs4_clear_client_minor_version(clp);
206 nfs4_destroy_callback(clp); 198 nfs4_destroy_callback(clp);
199 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
200 nfs_idmap_delete(clp);
201
202 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
207} 203}
204#else
205static void nfs4_shutdown_client(struct nfs_client *clp)
206{
207}
208#endif /* CONFIG_NFS_V4 */
208 209
209/* 210/*
210 * Destroy a shared client record 211 * Destroy a shared client record
@@ -213,7 +214,6 @@ static void nfs_free_client(struct nfs_client *clp)
213{ 214{
214 dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); 215 dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
215 216
216 nfs4_clear_client_minor_version(clp);
217 nfs4_shutdown_client(clp); 217 nfs4_shutdown_client(clp);
218 218
219 nfs_fscache_release_client_cookie(clp); 219 nfs_fscache_release_client_cookie(clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b669fb..a1f6b4438fb1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
560 desc->entry = &my_entry; 560 desc->entry = &my_entry;
561 561
562 nfs_block_sillyrename(dentry); 562 nfs_block_sillyrename(dentry);
563 res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); 563 res = nfs_revalidate_mapping(inode, filp->f_mapping);
564 if (res < 0) 564 if (res < 0)
565 goto out; 565 goto out;
566 566
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 95e1ca765d47..3f0cd4dfddaf 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -36,6 +36,19 @@ struct nfs_dns_ent {
36}; 36};
37 37
38 38
39static void nfs_dns_ent_update(struct cache_head *cnew,
40 struct cache_head *ckey)
41{
42 struct nfs_dns_ent *new;
43 struct nfs_dns_ent *key;
44
45 new = container_of(cnew, struct nfs_dns_ent, h);
46 key = container_of(ckey, struct nfs_dns_ent, h);
47
48 memcpy(&new->addr, &key->addr, key->addrlen);
49 new->addrlen = key->addrlen;
50}
51
39static void nfs_dns_ent_init(struct cache_head *cnew, 52static void nfs_dns_ent_init(struct cache_head *cnew,
40 struct cache_head *ckey) 53 struct cache_head *ckey)
41{ 54{
@@ -49,8 +62,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew,
49 new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); 62 new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
50 if (new->hostname) { 63 if (new->hostname) {
51 new->namelen = key->namelen; 64 new->namelen = key->namelen;
52 memcpy(&new->addr, &key->addr, key->addrlen); 65 nfs_dns_ent_update(cnew, ckey);
53 new->addrlen = key->addrlen;
54 } else { 66 } else {
55 new->namelen = 0; 67 new->namelen = 0;
56 new->addrlen = 0; 68 new->addrlen = 0;
@@ -234,7 +246,7 @@ static struct cache_detail nfs_dns_resolve = {
234 .cache_show = nfs_dns_show, 246 .cache_show = nfs_dns_show,
235 .match = nfs_dns_match, 247 .match = nfs_dns_match,
236 .init = nfs_dns_ent_init, 248 .init = nfs_dns_ent_init,
237 .update = nfs_dns_ent_init, 249 .update = nfs_dns_ent_update,
238 .alloc = nfs_dns_ent_alloc, 250 .alloc = nfs_dns_ent_alloc,
239}; 251};
240 252
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 63f2071d6445..ae8d02294e46 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -123,11 +123,11 @@ nfs_file_open(struct inode *inode, struct file *filp)
123 filp->f_path.dentry->d_parent->d_name.name, 123 filp->f_path.dentry->d_parent->d_name.name,
124 filp->f_path.dentry->d_name.name); 124 filp->f_path.dentry->d_name.name);
125 125
126 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
126 res = nfs_check_flags(filp->f_flags); 127 res = nfs_check_flags(filp->f_flags);
127 if (res) 128 if (res)
128 return res; 129 return res;
129 130
130 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
131 res = nfs_open(inode, filp); 131 res = nfs_open(inode, filp);
132 return res; 132 return res;
133} 133}
@@ -237,9 +237,9 @@ nfs_file_flush(struct file *file, fl_owner_t id)
237 dentry->d_parent->d_name.name, 237 dentry->d_parent->d_name.name,
238 dentry->d_name.name); 238 dentry->d_name.name);
239 239
240 nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
240 if ((file->f_mode & FMODE_WRITE) == 0) 241 if ((file->f_mode & FMODE_WRITE) == 0)
241 return 0; 242 return 0;
242 nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
243 243
244 /* Flush writes to the server and return any errors */ 244 /* Flush writes to the server and return any errors */
245 return nfs_do_fsync(ctx, inode); 245 return nfs_do_fsync(ctx, inode);
@@ -262,9 +262,11 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
262 (unsigned long) count, (unsigned long) pos); 262 (unsigned long) count, (unsigned long) pos);
263 263
264 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 264 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
265 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); 265 if (!result) {
266 if (!result)
267 result = generic_file_aio_read(iocb, iov, nr_segs, pos); 266 result = generic_file_aio_read(iocb, iov, nr_segs, pos);
267 if (result > 0)
268 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
269 }
268 return result; 270 return result;
269} 271}
270 272
@@ -282,8 +284,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
282 (unsigned long) count, (unsigned long long) *ppos); 284 (unsigned long) count, (unsigned long long) *ppos);
283 285
284 res = nfs_revalidate_mapping(inode, filp->f_mapping); 286 res = nfs_revalidate_mapping(inode, filp->f_mapping);
285 if (!res) 287 if (!res) {
286 res = generic_file_splice_read(filp, ppos, pipe, count, flags); 288 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
289 if (res > 0)
290 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
291 }
287 return res; 292 return res;
288} 293}
289 294
@@ -596,6 +601,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
596{ 601{
597 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 602 struct dentry * dentry = iocb->ki_filp->f_path.dentry;
598 struct inode * inode = dentry->d_inode; 603 struct inode * inode = dentry->d_inode;
604 unsigned long written = 0;
599 ssize_t result; 605 ssize_t result;
600 size_t count = iov_length(iov, nr_segs); 606 size_t count = iov_length(iov, nr_segs);
601 607
@@ -622,14 +628,18 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
622 if (!count) 628 if (!count)
623 goto out; 629 goto out;
624 630
625 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
626 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 631 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
632 if (result > 0)
633 written = result;
634
627 /* Return error values for O_DSYNC and IS_SYNC() */ 635 /* Return error values for O_DSYNC and IS_SYNC() */
628 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 636 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
629 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 637 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
630 if (err < 0) 638 if (err < 0)
631 result = err; 639 result = err;
632 } 640 }
641 if (result > 0)
642 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
633out: 643out:
634 return result; 644 return result;
635 645
@@ -644,6 +654,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
644{ 654{
645 struct dentry *dentry = filp->f_path.dentry; 655 struct dentry *dentry = filp->f_path.dentry;
646 struct inode *inode = dentry->d_inode; 656 struct inode *inode = dentry->d_inode;
657 unsigned long written = 0;
647 ssize_t ret; 658 ssize_t ret;
648 659
649 dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", 660 dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
@@ -654,14 +665,17 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
654 * The combination of splice and an O_APPEND destination is disallowed. 665 * The combination of splice and an O_APPEND destination is disallowed.
655 */ 666 */
656 667
657 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
658
659 ret = generic_file_splice_write(pipe, filp, ppos, count, flags); 668 ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
669 if (ret > 0)
670 written = ret;
671
660 if (ret >= 0 && nfs_need_sync_write(filp, inode)) { 672 if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
661 int err = nfs_do_fsync(nfs_file_open_context(filp), inode); 673 int err = nfs_do_fsync(nfs_file_open_context(filp), inode);
662 if (err < 0) 674 if (err < 0)
663 ret = err; 675 ret = err;
664 } 676 }
677 if (ret > 0)
678 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
665 return ret; 679 return ret;
666} 680}
667 681
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f141bde7756a..657201acda84 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,22 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
97 return ino; 97 return ino;
98} 98}
99 99
100int nfs_write_inode(struct inode *inode, int sync)
101{
102 int ret;
103
104 if (sync) {
105 ret = filemap_fdatawait(inode->i_mapping);
106 if (ret == 0)
107 ret = nfs_commit_inode(inode, FLUSH_SYNC);
108 } else
109 ret = nfs_commit_inode(inode, 0);
110 if (ret >= 0)
111 return 0;
112 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
113 return ret;
114}
115
116void nfs_clear_inode(struct inode *inode) 100void nfs_clear_inode(struct inode *inode)
117{ 101{
118 /* 102 /*
@@ -130,16 +114,12 @@ void nfs_clear_inode(struct inode *inode)
130 */ 114 */
131int nfs_sync_mapping(struct address_space *mapping) 115int nfs_sync_mapping(struct address_space *mapping)
132{ 116{
133 int ret; 117 int ret = 0;
134 118
135 if (mapping->nrpages == 0) 119 if (mapping->nrpages != 0) {
136 return 0; 120 unmap_mapping_range(mapping, 0, 0, 0);
137 unmap_mapping_range(mapping, 0, 0, 0); 121 ret = nfs_wb_all(mapping->host);
138 ret = filemap_write_and_wait(mapping); 122 }
139 if (ret != 0)
140 goto out;
141 ret = nfs_wb_all(mapping->host);
142out:
143 return ret; 123 return ret;
144} 124}
145 125
@@ -511,17 +491,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
511 int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; 491 int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
512 int err; 492 int err;
513 493
514 /* 494 /* Flush out writes to the server in order to update c/mtime. */
515 * Flush out writes to the server in order to update c/mtime.
516 *
517 * Hold the i_mutex to suspend application writes temporarily;
518 * this prevents long-running writing applications from blocking
519 * nfs_wb_nocommit.
520 */
521 if (S_ISREG(inode->i_mode)) { 495 if (S_ISREG(inode->i_mode)) {
522 mutex_lock(&inode->i_mutex); 496 err = filemap_write_and_wait(inode->i_mapping);
523 nfs_wb_nocommit(inode); 497 if (err)
524 mutex_unlock(&inode->i_mutex); 498 goto out;
525 } 499 }
526 500
527 /* 501 /*
@@ -545,6 +519,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
545 generic_fillattr(inode, stat); 519 generic_fillattr(inode, stat);
546 stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); 520 stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
547 } 521 }
522out:
548 return err; 523 return err;
549} 524}
550 525
@@ -574,14 +549,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
574 nfs_revalidate_inode(server, inode); 549 nfs_revalidate_inode(server, inode);
575} 550}
576 551
577static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) 552static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred)
578{ 553{
579 struct nfs_open_context *ctx; 554 struct nfs_open_context *ctx;
580 555
581 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 556 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
582 if (ctx != NULL) { 557 if (ctx != NULL) {
583 ctx->path.dentry = dget(dentry); 558 ctx->path = *path;
584 ctx->path.mnt = mntget(mnt); 559 path_get(&ctx->path);
585 ctx->cred = get_rpccred(cred); 560 ctx->cred = get_rpccred(cred);
586 ctx->state = NULL; 561 ctx->state = NULL;
587 ctx->lockowner = current->files; 562 ctx->lockowner = current->files;
@@ -620,11 +595,6 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
620 __put_nfs_open_context(ctx, 0); 595 __put_nfs_open_context(ctx, 0);
621} 596}
622 597
623static void put_nfs_open_context_sync(struct nfs_open_context *ctx)
624{
625 __put_nfs_open_context(ctx, 1);
626}
627
628/* 598/*
629 * Ensure that mmap has a recent RPC credential for use when writing out 599 * Ensure that mmap has a recent RPC credential for use when writing out
630 * shared pages 600 * shared pages
@@ -671,7 +641,7 @@ static void nfs_file_clear_open_context(struct file *filp)
671 spin_lock(&inode->i_lock); 641 spin_lock(&inode->i_lock);
672 list_move_tail(&ctx->list, &NFS_I(inode)->open_files); 642 list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
673 spin_unlock(&inode->i_lock); 643 spin_unlock(&inode->i_lock);
674 put_nfs_open_context_sync(ctx); 644 __put_nfs_open_context(ctx, filp->f_flags & O_DIRECT ? 0 : 1);
675 } 645 }
676} 646}
677 647
@@ -686,7 +656,7 @@ int nfs_open(struct inode *inode, struct file *filp)
686 cred = rpc_lookup_cred(); 656 cred = rpc_lookup_cred();
687 if (IS_ERR(cred)) 657 if (IS_ERR(cred))
688 return PTR_ERR(cred); 658 return PTR_ERR(cred);
689 ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); 659 ctx = alloc_nfs_open_context(&filp->f_path, cred);
690 put_rpccred(cred); 660 put_rpccred(cred);
691 if (ctx == NULL) 661 if (ctx == NULL)
692 return -ENOMEM; 662 return -ENOMEM;
@@ -779,7 +749,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
779 return __nfs_revalidate_inode(server, inode); 749 return __nfs_revalidate_inode(server, inode);
780} 750}
781 751
782static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) 752static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
783{ 753{
784 struct nfs_inode *nfsi = NFS_I(inode); 754 struct nfs_inode *nfsi = NFS_I(inode);
785 755
@@ -800,49 +770,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
800 return 0; 770 return 0;
801} 771}
802 772
803static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
804{
805 int ret = 0;
806
807 mutex_lock(&inode->i_mutex);
808 if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
809 ret = nfs_sync_mapping(mapping);
810 if (ret == 0)
811 ret = nfs_invalidate_mapping_nolock(inode, mapping);
812 }
813 mutex_unlock(&inode->i_mutex);
814 return ret;
815}
816
817/**
818 * nfs_revalidate_mapping_nolock - Revalidate the pagecache
819 * @inode - pointer to host inode
820 * @mapping - pointer to mapping
821 */
822int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
823{
824 struct nfs_inode *nfsi = NFS_I(inode);
825 int ret = 0;
826
827 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
828 || nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
829 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
830 if (ret < 0)
831 goto out;
832 }
833 if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
834 ret = nfs_invalidate_mapping_nolock(inode, mapping);
835out:
836 return ret;
837}
838
839/** 773/**
840 * nfs_revalidate_mapping - Revalidate the pagecache 774 * nfs_revalidate_mapping - Revalidate the pagecache
841 * @inode - pointer to host inode 775 * @inode - pointer to host inode
842 * @mapping - pointer to mapping 776 * @mapping - pointer to mapping
843 *
844 * This version of the function will take the inode->i_mutex and attempt to
845 * flush out all dirty data if it needs to invalidate the page cache.
846 */ 777 */
847int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) 778int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
848{ 779{
@@ -1420,6 +1351,7 @@ static void init_once(void *foo)
1420 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1351 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1421 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1352 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1422 nfsi->npages = 0; 1353 nfsi->npages = 0;
1354 nfsi->ncommit = 0;
1423 atomic_set(&nfsi->silly_count, 1); 1355 atomic_set(&nfsi->silly_count, 1);
1424 INIT_HLIST_HEAD(&nfsi->silly_list); 1356 INIT_HLIST_HEAD(&nfsi->silly_list);
1425 init_waitqueue_head(&nfsi->waitqueue); 1357 init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 29e464d23b32..11f82f03c5de 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
211extern struct workqueue_struct *nfsiod_workqueue; 211extern struct workqueue_struct *nfsiod_workqueue;
212extern struct inode *nfs_alloc_inode(struct super_block *sb); 212extern struct inode *nfs_alloc_inode(struct super_block *sb);
213extern void nfs_destroy_inode(struct inode *); 213extern void nfs_destroy_inode(struct inode *);
214extern int nfs_write_inode(struct inode *,int); 214extern int nfs_write_inode(struct inode *, struct writeback_control *);
215extern void nfs_clear_inode(struct inode *); 215extern void nfs_clear_inode(struct inode *);
216#ifdef CONFIG_NFS_V4 216#ifdef CONFIG_NFS_V4
217extern void nfs4_clear_inode(struct inode *); 217extern void nfs4_clear_inode(struct inode *);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3f8881d1a050..24992f0a29f2 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -22,14 +22,14 @@
22 22
23#define NFSDBG_FACILITY NFSDBG_PROC 23#define NFSDBG_FACILITY NFSDBG_PROC
24 24
25/* A wrapper to handle the EJUKEBOX error message */ 25/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */
26static int 26static int
27nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) 27nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
28{ 28{
29 int res; 29 int res;
30 do { 30 do {
31 res = rpc_call_sync(clnt, msg, flags); 31 res = rpc_call_sync(clnt, msg, flags);
32 if (res != -EJUKEBOX) 32 if (res != -EJUKEBOX && res != -EKEYEXPIRED)
33 break; 33 break;
34 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); 34 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
35 res = -ERESTARTSYS; 35 res = -ERESTARTSYS;
@@ -42,9 +42,10 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
42static int 42static int
43nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) 43nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
44{ 44{
45 if (task->tk_status != -EJUKEBOX) 45 if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED)
46 return 0; 46 return 0;
47 nfs_inc_stats(inode, NFSIOS_DELAY); 47 if (task->tk_status == -EJUKEBOX)
48 nfs_inc_stats(inode, NFSIOS_DELAY);
48 task->tk_status = 0; 49 task->tk_status = 0;
49 rpc_restart_call(task); 50 rpc_restart_call(task);
50 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 51 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0c6fda33d66e..a187200a7aac 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -46,6 +46,7 @@ enum nfs4_client_state {
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_SESSION_RESET, 47 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_SESSION_DRAINING, 48 NFS4CLNT_SESSION_DRAINING,
49 NFS4CLNT_RECALL_SLOT,
49}; 50};
50 51
51/* 52/*
@@ -280,6 +281,7 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
280extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 281extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
281extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); 282extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
282extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 283extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
284extern void nfs41_handle_recall_slot(struct nfs_client *clp);
283extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 285extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
284extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
285extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 375f0fae2c6a..eda74c42d552 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -281,6 +281,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
281 } 281 }
282 case -NFS4ERR_GRACE: 282 case -NFS4ERR_GRACE:
283 case -NFS4ERR_DELAY: 283 case -NFS4ERR_DELAY:
284 case -EKEYEXPIRED:
284 ret = nfs4_delay(server->client, &exception->timeout); 285 ret = nfs4_delay(server->client, &exception->timeout);
285 if (ret != 0) 286 if (ret != 0)
286 break; 287 break;
@@ -418,7 +419,8 @@ static void nfs41_sequence_done(struct nfs_client *clp,
418 clp->cl_last_renewal = timestamp; 419 clp->cl_last_renewal = timestamp;
419 spin_unlock(&clp->cl_lock); 420 spin_unlock(&clp->cl_lock);
420 /* Check sequence flags */ 421 /* Check sequence flags */
421 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 422 if (atomic_read(&clp->cl_count) > 1)
423 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
422 } 424 }
423out: 425out:
424 /* The session may be reset by one of the error handlers. */ 426 /* The session may be reset by one of the error handlers. */
@@ -724,8 +726,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
724 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); 726 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
725 if (p->o_arg.seqid == NULL) 727 if (p->o_arg.seqid == NULL)
726 goto err_free; 728 goto err_free;
727 p->path.mnt = mntget(path->mnt); 729 path_get(path);
728 p->path.dentry = dget(path->dentry); 730 p->path = *path;
729 p->dir = parent; 731 p->dir = parent;
730 p->owner = sp; 732 p->owner = sp;
731 atomic_inc(&sp->so_count); 733 atomic_inc(&sp->so_count);
@@ -1163,7 +1165,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1163 int err; 1165 int err;
1164 do { 1166 do {
1165 err = _nfs4_do_open_reclaim(ctx, state); 1167 err = _nfs4_do_open_reclaim(ctx, state);
1166 if (err != -NFS4ERR_DELAY) 1168 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED)
1167 break; 1169 break;
1168 nfs4_handle_exception(server, err, &exception); 1170 nfs4_handle_exception(server, err, &exception);
1169 } while (exception.retry); 1171 } while (exception.retry);
@@ -1582,6 +1584,7 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
1582 goto out; 1584 goto out;
1583 case -NFS4ERR_GRACE: 1585 case -NFS4ERR_GRACE:
1584 case -NFS4ERR_DELAY: 1586 case -NFS4ERR_DELAY:
1587 case -EKEYEXPIRED:
1585 nfs4_handle_exception(server, err, &exception); 1588 nfs4_handle_exception(server, err, &exception);
1586 err = 0; 1589 err = 0;
1587 } 1590 }
@@ -1944,8 +1947,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1944 calldata->res.seqid = calldata->arg.seqid; 1947 calldata->res.seqid = calldata->arg.seqid;
1945 calldata->res.server = server; 1948 calldata->res.server = server;
1946 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; 1949 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
1947 calldata->path.mnt = mntget(path->mnt); 1950 path_get(path);
1948 calldata->path.dentry = dget(path->dentry); 1951 calldata->path = *path;
1949 1952
1950 msg.rpc_argp = &calldata->arg, 1953 msg.rpc_argp = &calldata->arg,
1951 msg.rpc_resp = &calldata->res, 1954 msg.rpc_resp = &calldata->res,
@@ -3145,10 +3148,19 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
3145 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special 3148 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
3146 * standalone procedure for queueing an asynchronous RENEW. 3149 * standalone procedure for queueing an asynchronous RENEW.
3147 */ 3150 */
3151static void nfs4_renew_release(void *data)
3152{
3153 struct nfs_client *clp = data;
3154
3155 if (atomic_read(&clp->cl_count) > 1)
3156 nfs4_schedule_state_renewal(clp);
3157 nfs_put_client(clp);
3158}
3159
3148static void nfs4_renew_done(struct rpc_task *task, void *data) 3160static void nfs4_renew_done(struct rpc_task *task, void *data)
3149{ 3161{
3150 struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; 3162 struct nfs_client *clp = data;
3151 unsigned long timestamp = (unsigned long)data; 3163 unsigned long timestamp = task->tk_start;
3152 3164
3153 if (task->tk_status < 0) { 3165 if (task->tk_status < 0) {
3154 /* Unless we're shutting down, schedule state recovery! */ 3166 /* Unless we're shutting down, schedule state recovery! */
@@ -3164,6 +3176,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *data)
3164 3176
3165static const struct rpc_call_ops nfs4_renew_ops = { 3177static const struct rpc_call_ops nfs4_renew_ops = {
3166 .rpc_call_done = nfs4_renew_done, 3178 .rpc_call_done = nfs4_renew_done,
3179 .rpc_release = nfs4_renew_release,
3167}; 3180};
3168 3181
3169int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) 3182int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
@@ -3174,8 +3187,10 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
3174 .rpc_cred = cred, 3187 .rpc_cred = cred,
3175 }; 3188 };
3176 3189
3190 if (!atomic_inc_not_zero(&clp->cl_count))
3191 return -EIO;
3177 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, 3192 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
3178 &nfs4_renew_ops, (void *)jiffies); 3193 &nfs4_renew_ops, clp);
3179} 3194}
3180 3195
3181int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) 3196int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
@@ -3452,6 +3467,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3452 if (server) 3467 if (server)
3453 nfs_inc_server_stats(server, NFSIOS_DELAY); 3468 nfs_inc_server_stats(server, NFSIOS_DELAY);
3454 case -NFS4ERR_GRACE: 3469 case -NFS4ERR_GRACE:
3470 case -EKEYEXPIRED:
3455 rpc_delay(task, NFS4_POLL_RETRY_MAX); 3471 rpc_delay(task, NFS4_POLL_RETRY_MAX);
3456 task->tk_status = 0; 3472 task->tk_status = 0;
3457 return -EAGAIN; 3473 return -EAGAIN;
@@ -3564,6 +3580,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
3564 case -NFS4ERR_RESOURCE: 3580 case -NFS4ERR_RESOURCE:
3565 /* The IBM lawyers misread another document! */ 3581 /* The IBM lawyers misread another document! */
3566 case -NFS4ERR_DELAY: 3582 case -NFS4ERR_DELAY:
3583 case -EKEYEXPIRED:
3567 err = nfs4_delay(clp->cl_rpcclient, &timeout); 3584 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3568 } 3585 }
3569 } while (err == 0); 3586 } while (err == 0);
@@ -4179,7 +4196,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4179 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4196 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4180 return 0; 4197 return 0;
4181 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); 4198 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4182 if (err != -NFS4ERR_DELAY) 4199 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED)
4183 break; 4200 break;
4184 nfs4_handle_exception(server, err, &exception); 4201 nfs4_handle_exception(server, err, &exception);
4185 } while (exception.retry); 4202 } while (exception.retry);
@@ -4204,6 +4221,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4204 goto out; 4221 goto out;
4205 case -NFS4ERR_GRACE: 4222 case -NFS4ERR_GRACE:
4206 case -NFS4ERR_DELAY: 4223 case -NFS4ERR_DELAY:
4224 case -EKEYEXPIRED:
4207 nfs4_handle_exception(server, err, &exception); 4225 nfs4_handle_exception(server, err, &exception);
4208 err = 0; 4226 err = 0;
4209 } 4227 }
@@ -4355,6 +4373,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4355 err = 0; 4373 err = 0;
4356 goto out; 4374 goto out;
4357 case -NFS4ERR_DELAY: 4375 case -NFS4ERR_DELAY:
4376 case -EKEYEXPIRED:
4358 break; 4377 break;
4359 } 4378 }
4360 err = nfs4_handle_exception(server, err, &exception); 4379 err = nfs4_handle_exception(server, err, &exception);
@@ -4500,7 +4519,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4500 4519
4501 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 4520 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
4502 4521
4503 if (status != NFS4ERR_CLID_INUSE) 4522 if (status != -NFS4ERR_CLID_INUSE)
4504 break; 4523 break;
4505 4524
4506 if (signalled()) 4525 if (signalled())
@@ -4554,6 +4573,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4554 switch (task->tk_status) { 4573 switch (task->tk_status) {
4555 case -NFS4ERR_DELAY: 4574 case -NFS4ERR_DELAY:
4556 case -NFS4ERR_GRACE: 4575 case -NFS4ERR_GRACE:
4576 case -EKEYEXPIRED:
4557 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4577 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4558 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4578 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4559 task->tk_status = 0; 4579 task->tk_status = 0;
@@ -4611,26 +4631,32 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4611/* 4631/*
4612 * Reset a slot table 4632 * Reset a slot table
4613 */ 4633 */
4614static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, 4634static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
4615 int old_max_slots, int ivalue) 4635 int ivalue)
4616{ 4636{
4637 struct nfs4_slot *new = NULL;
4617 int i; 4638 int i;
4618 int ret = 0; 4639 int ret = 0;
4619 4640
4620 dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl); 4641 dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
4642 max_reqs, tbl->max_slots);
4621 4643
4622 /* 4644 /* Does the newly negotiated max_reqs match the existing slot table? */
4623 * Until we have dynamic slot table adjustment, insist 4645 if (max_reqs != tbl->max_slots) {
4624 * upon the same slot table size 4646 ret = -ENOMEM;
4625 */ 4647 new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
4626 if (max_slots != old_max_slots) { 4648 GFP_KERNEL);
4627 dprintk("%s reset slot table does't match old\n", 4649 if (!new)
4628 __func__); 4650 goto out;
4629 ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */ 4651 ret = 0;
4630 goto out; 4652 kfree(tbl->slots);
4631 } 4653 }
4632 spin_lock(&tbl->slot_tbl_lock); 4654 spin_lock(&tbl->slot_tbl_lock);
4633 for (i = 0; i < max_slots; ++i) 4655 if (new) {
4656 tbl->slots = new;
4657 tbl->max_slots = max_reqs;
4658 }
4659 for (i = 0; i < tbl->max_slots; ++i)
4634 tbl->slots[i].seq_nr = ivalue; 4660 tbl->slots[i].seq_nr = ivalue;
4635 spin_unlock(&tbl->slot_tbl_lock); 4661 spin_unlock(&tbl->slot_tbl_lock);
4636 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, 4662 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
@@ -4648,16 +4674,12 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session)
4648 int status; 4674 int status;
4649 4675
4650 status = nfs4_reset_slot_table(&session->fc_slot_table, 4676 status = nfs4_reset_slot_table(&session->fc_slot_table,
4651 session->fc_attrs.max_reqs, 4677 session->fc_attrs.max_reqs, 1);
4652 session->fc_slot_table.max_slots,
4653 1);
4654 if (status) 4678 if (status)
4655 return status; 4679 return status;
4656 4680
4657 status = nfs4_reset_slot_table(&session->bc_slot_table, 4681 status = nfs4_reset_slot_table(&session->bc_slot_table,
4658 session->bc_attrs.max_reqs, 4682 session->bc_attrs.max_reqs, 0);
4659 session->bc_slot_table.max_slots,
4660 0);
4661 return status; 4683 return status;
4662} 4684}
4663 4685
@@ -4798,16 +4820,14 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4798 args->fc_attrs.headerpadsz = 0; 4820 args->fc_attrs.headerpadsz = 0;
4799 args->fc_attrs.max_rqst_sz = mxrqst_sz; 4821 args->fc_attrs.max_rqst_sz = mxrqst_sz;
4800 args->fc_attrs.max_resp_sz = mxresp_sz; 4822 args->fc_attrs.max_resp_sz = mxresp_sz;
4801 args->fc_attrs.max_resp_sz_cached = mxresp_sz;
4802 args->fc_attrs.max_ops = NFS4_MAX_OPS; 4823 args->fc_attrs.max_ops = NFS4_MAX_OPS;
4803 args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; 4824 args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
4804 4825
4805 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " 4826 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
4806 "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", 4827 "max_ops=%u max_reqs=%u\n",
4807 __func__, 4828 __func__,
4808 args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz, 4829 args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz,
4809 args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops, 4830 args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
4810 args->fc_attrs.max_reqs);
4811 4831
4812 /* Back channel attributes */ 4832 /* Back channel attributes */
4813 args->bc_attrs.headerpadsz = 0; 4833 args->bc_attrs.headerpadsz = 0;
@@ -5016,7 +5036,16 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5016 &res, args.sa_cache_this, 1); 5036 &res, args.sa_cache_this, 1);
5017} 5037}
5018 5038
5019void nfs41_sequence_call_done(struct rpc_task *task, void *data) 5039static void nfs41_sequence_release(void *data)
5040{
5041 struct nfs_client *clp = (struct nfs_client *)data;
5042
5043 if (atomic_read(&clp->cl_count) > 1)
5044 nfs4_schedule_state_renewal(clp);
5045 nfs_put_client(clp);
5046}
5047
5048static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
5020{ 5049{
5021 struct nfs_client *clp = (struct nfs_client *)data; 5050 struct nfs_client *clp = (struct nfs_client *)data;
5022 5051
@@ -5024,6 +5053,8 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data)
5024 5053
5025 if (task->tk_status < 0) { 5054 if (task->tk_status < 0) {
5026 dprintk("%s ERROR %d\n", __func__, task->tk_status); 5055 dprintk("%s ERROR %d\n", __func__, task->tk_status);
5056 if (atomic_read(&clp->cl_count) == 1)
5057 goto out;
5027 5058
5028 if (_nfs4_async_handle_error(task, NULL, clp, NULL) 5059 if (_nfs4_async_handle_error(task, NULL, clp, NULL)
5029 == -EAGAIN) { 5060 == -EAGAIN) {
@@ -5032,7 +5063,7 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data)
5032 } 5063 }
5033 } 5064 }
5034 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); 5065 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
5035 5066out:
5036 kfree(task->tk_msg.rpc_argp); 5067 kfree(task->tk_msg.rpc_argp);
5037 kfree(task->tk_msg.rpc_resp); 5068 kfree(task->tk_msg.rpc_resp);
5038 5069
@@ -5057,6 +5088,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
5057static const struct rpc_call_ops nfs41_sequence_ops = { 5088static const struct rpc_call_ops nfs41_sequence_ops = {
5058 .rpc_call_done = nfs41_sequence_call_done, 5089 .rpc_call_done = nfs41_sequence_call_done,
5059 .rpc_call_prepare = nfs41_sequence_prepare, 5090 .rpc_call_prepare = nfs41_sequence_prepare,
5091 .rpc_release = nfs41_sequence_release,
5060}; 5092};
5061 5093
5062static int nfs41_proc_async_sequence(struct nfs_client *clp, 5094static int nfs41_proc_async_sequence(struct nfs_client *clp,
@@ -5069,12 +5101,13 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp,
5069 .rpc_cred = cred, 5101 .rpc_cred = cred,
5070 }; 5102 };
5071 5103
5104 if (!atomic_inc_not_zero(&clp->cl_count))
5105 return -EIO;
5072 args = kzalloc(sizeof(*args), GFP_KERNEL); 5106 args = kzalloc(sizeof(*args), GFP_KERNEL);
5073 if (!args)
5074 return -ENOMEM;
5075 res = kzalloc(sizeof(*res), GFP_KERNEL); 5107 res = kzalloc(sizeof(*res), GFP_KERNEL);
5076 if (!res) { 5108 if (!args || !res) {
5077 kfree(args); 5109 kfree(args);
5110 nfs_put_client(clp);
5078 return -ENOMEM; 5111 return -ENOMEM;
5079 } 5112 }
5080 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 5113 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 0156c01c212c..d87f10327b72 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -36,11 +36,6 @@
36 * as an rpc_task, not a real kernel thread, so it always runs in rpciod's 36 * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
37 * context. There is one renewd per nfs_server. 37 * context. There is one renewd per nfs_server.
38 * 38 *
39 * TODO: If the send queue gets backlogged (e.g., if the server goes down),
40 * we will keep filling the queue with periodic RENEW requests. We need a
41 * mechanism for ensuring that if renewd successfully sends off a request,
42 * then it only wakes up when the request is finished. Maybe use the
43 * child task framework of the RPC layer?
44 */ 39 */
45 40
46#include <linux/mm.h> 41#include <linux/mm.h>
@@ -63,7 +58,7 @@ nfs4_renew_state(struct work_struct *work)
63 struct nfs_client *clp = 58 struct nfs_client *clp =
64 container_of(work, struct nfs_client, cl_renewd.work); 59 container_of(work, struct nfs_client, cl_renewd.work);
65 struct rpc_cred *cred; 60 struct rpc_cred *cred;
66 long lease, timeout; 61 long lease;
67 unsigned long last, now; 62 unsigned long last, now;
68 63
69 ops = nfs4_state_renewal_ops[clp->cl_minorversion]; 64 ops = nfs4_state_renewal_ops[clp->cl_minorversion];
@@ -75,7 +70,6 @@ nfs4_renew_state(struct work_struct *work)
75 lease = clp->cl_lease_time; 70 lease = clp->cl_lease_time;
76 last = clp->cl_last_renewal; 71 last = clp->cl_last_renewal;
77 now = jiffies; 72 now = jiffies;
78 timeout = (2 * lease) / 3 + (long)last - (long)now;
79 /* Are we close to a lease timeout? */ 73 /* Are we close to a lease timeout? */
80 if (time_after(now, last + lease/3)) { 74 if (time_after(now, last + lease/3)) {
81 cred = ops->get_state_renewal_cred_locked(clp); 75 cred = ops->get_state_renewal_cred_locked(clp);
@@ -90,19 +84,15 @@ nfs4_renew_state(struct work_struct *work)
90 /* Queue an asynchronous RENEW. */ 84 /* Queue an asynchronous RENEW. */
91 ops->sched_state_renewal(clp, cred); 85 ops->sched_state_renewal(clp, cred);
92 put_rpccred(cred); 86 put_rpccred(cred);
87 goto out_exp;
93 } 88 }
94 timeout = (2 * lease) / 3; 89 } else {
95 spin_lock(&clp->cl_lock);
96 } else
97 dprintk("%s: failed to call renewd. Reason: lease not expired \n", 90 dprintk("%s: failed to call renewd. Reason: lease not expired \n",
98 __func__); 91 __func__);
99 if (timeout < 5 * HZ) /* safeguard */ 92 spin_unlock(&clp->cl_lock);
100 timeout = 5 * HZ; 93 }
101 dprintk("%s: requeueing work. Lease period = %ld\n", 94 nfs4_schedule_state_renewal(clp);
102 __func__, (timeout + HZ - 1) / HZ); 95out_exp:
103 cancel_delayed_work(&clp->cl_renewd);
104 schedule_delayed_work(&clp->cl_renewd, timeout);
105 spin_unlock(&clp->cl_lock);
106 nfs_expire_unreferenced_delegations(clp); 96 nfs_expire_unreferenced_delegations(clp);
107out: 97out:
108 dprintk("%s: done\n", __func__); 98 dprintk("%s: done\n", __func__);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c1e2733f4fa4..6c5ed51f105e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1249,26 +1249,65 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1249} 1249}
1250 1250
1251#ifdef CONFIG_NFS_V4_1 1251#ifdef CONFIG_NFS_V4_1
1252void nfs41_handle_recall_slot(struct nfs_client *clp)
1253{
1254 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1255 nfs4_schedule_state_recovery(clp);
1256}
1257
1258static void nfs4_reset_all_state(struct nfs_client *clp)
1259{
1260 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1261 clp->cl_boot_time = CURRENT_TIME;
1262 nfs4_state_start_reclaim_nograce(clp);
1263 nfs4_schedule_state_recovery(clp);
1264 }
1265}
1266
1267static void nfs41_handle_server_reboot(struct nfs_client *clp)
1268{
1269 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1270 nfs4_state_start_reclaim_reboot(clp);
1271 nfs4_schedule_state_recovery(clp);
1272 }
1273}
1274
1275static void nfs41_handle_state_revoked(struct nfs_client *clp)
1276{
1277 /* Temporary */
1278 nfs4_reset_all_state(clp);
1279}
1280
1281static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
1282{
1283 /* This will need to handle layouts too */
1284 nfs_expire_all_delegations(clp);
1285}
1286
1287static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1288{
1289 nfs_expire_all_delegations(clp);
1290 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1291 nfs4_schedule_state_recovery(clp);
1292}
1293
1252void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1294void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1253{ 1295{
1254 if (!flags) 1296 if (!flags)
1255 return; 1297 return;
1256 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) { 1298 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1257 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1299 nfs41_handle_server_reboot(clp);
1258 nfs4_state_start_reclaim_reboot(clp); 1300 else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1259 nfs4_schedule_state_recovery(clp);
1260 } else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1261 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | 1301 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
1262 SEQ4_STATUS_ADMIN_STATE_REVOKED | 1302 SEQ4_STATUS_ADMIN_STATE_REVOKED |
1263 SEQ4_STATUS_RECALLABLE_STATE_REVOKED | 1303 SEQ4_STATUS_LEASE_MOVED))
1264 SEQ4_STATUS_LEASE_MOVED)) { 1304 nfs41_handle_state_revoked(clp);
1265 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1305 else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1266 nfs4_state_start_reclaim_nograce(clp); 1306 nfs41_handle_recallable_state_revoked(clp);
1267 nfs4_schedule_state_recovery(clp); 1307 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1268 } else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1269 SEQ4_STATUS_BACKCHANNEL_FAULT | 1308 SEQ4_STATUS_BACKCHANNEL_FAULT |
1270 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1309 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1271 nfs_expire_all_delegations(clp); 1310 nfs41_handle_cb_path_down(clp);
1272} 1311}
1273 1312
1274static int nfs4_reset_session(struct nfs_client *clp) 1313static int nfs4_reset_session(struct nfs_client *clp)
@@ -1285,23 +1324,52 @@ static int nfs4_reset_session(struct nfs_client *clp)
1285 1324
1286 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); 1325 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
1287 status = nfs4_proc_create_session(clp); 1326 status = nfs4_proc_create_session(clp);
1288 if (status) 1327 if (status) {
1289 status = nfs4_recovery_handle_error(clp, status); 1328 status = nfs4_recovery_handle_error(clp, status);
1329 goto out;
1330 }
1331 /* create_session negotiated new slot table */
1332 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1290 1333
1291out: 1334 /* Let the state manager reestablish state */
1292 /* 1335 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1293 * Let the state manager reestablish state
1294 */
1295 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1296 status == 0)
1297 nfs41_setup_state_renewal(clp); 1336 nfs41_setup_state_renewal(clp);
1298 1337out:
1299 return status; 1338 return status;
1300} 1339}
1301 1340
1341static int nfs4_recall_slot(struct nfs_client *clp)
1342{
1343 struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table;
1344 struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs;
1345 struct nfs4_slot *new, *old;
1346 int i;
1347
1348 nfs4_begin_drain_session(clp);
1349 new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
1350 GFP_KERNEL);
1351 if (!new)
1352 return -ENOMEM;
1353
1354 spin_lock(&fc_tbl->slot_tbl_lock);
1355 for (i = 0; i < fc_tbl->target_max_slots; i++)
1356 new[i].seq_nr = fc_tbl->slots[i].seq_nr;
1357 old = fc_tbl->slots;
1358 fc_tbl->slots = new;
1359 fc_tbl->max_slots = fc_tbl->target_max_slots;
1360 fc_tbl->target_max_slots = 0;
1361 fc_attrs->max_reqs = fc_tbl->max_slots;
1362 spin_unlock(&fc_tbl->slot_tbl_lock);
1363
1364 kfree(old);
1365 nfs4_end_drain_session(clp);
1366 return 0;
1367}
1368
1302#else /* CONFIG_NFS_V4_1 */ 1369#else /* CONFIG_NFS_V4_1 */
1303static int nfs4_reset_session(struct nfs_client *clp) { return 0; } 1370static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1304static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } 1371static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1372static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1305#endif /* CONFIG_NFS_V4_1 */ 1373#endif /* CONFIG_NFS_V4_1 */
1306 1374
1307/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors 1375/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1314,6 +1382,7 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1314 case -NFS4ERR_DELAY: 1382 case -NFS4ERR_DELAY:
1315 case -NFS4ERR_CLID_INUSE: 1383 case -NFS4ERR_CLID_INUSE:
1316 case -EAGAIN: 1384 case -EAGAIN:
1385 case -EKEYEXPIRED:
1317 break; 1386 break;
1318 1387
1319 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1388 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
@@ -1397,6 +1466,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
1397 nfs_client_return_marked_delegations(clp); 1466 nfs_client_return_marked_delegations(clp);
1398 continue; 1467 continue;
1399 } 1468 }
1469 /* Recall session slots */
1470 if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
1471 && nfs4_has_session(clp)) {
1472 status = nfs4_recall_slot(clp);
1473 if (status < 0)
1474 goto out_error;
1475 continue;
1476 }
1477
1400 1478
1401 nfs4_clear_state_manager_bit(clp); 1479 nfs4_clear_state_manager_bit(clp);
1402 /* Did we race with an attempt to give us more work? */ 1480 /* Did we race with an attempt to give us more work? */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5cd5184b56db..4d338be492cb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1578,6 +1578,14 @@ static void encode_create_session(struct xdr_stream *xdr,
1578 char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; 1578 char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
1579 uint32_t len; 1579 uint32_t len;
1580 struct nfs_client *clp = args->client; 1580 struct nfs_client *clp = args->client;
1581 u32 max_resp_sz_cached;
1582
1583 /*
1584 * Assumes OPEN is the biggest non-idempotent compound.
1585 * 2 is the verifier.
1586 */
1587 max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE +
1588 RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT;
1581 1589
1582 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1590 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1583 clp->cl_ipaddr); 1591 clp->cl_ipaddr);
@@ -1592,7 +1600,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1592 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ 1600 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1593 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ 1601 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
1594 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ 1602 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
1595 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1603 *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */
1596 *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */ 1604 *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
1597 *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */ 1605 *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
1598 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ 1606 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ef583854d8d0..c752d944fe9e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -47,6 +47,39 @@
47#define NFSDBG_FACILITY NFSDBG_PROC 47#define NFSDBG_FACILITY NFSDBG_PROC
48 48
49/* 49/*
50 * wrapper to handle the -EKEYEXPIRED error message. This should generally
51 * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't
52 * support the NFSERR_JUKEBOX error code, but we handle this situation in the
53 * same way that we handle that error with NFSv3.
54 */
55static int
56nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
57{
58 int res;
59 do {
60 res = rpc_call_sync(clnt, msg, flags);
61 if (res != -EKEYEXPIRED)
62 break;
63 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
64 res = -ERESTARTSYS;
65 } while (!fatal_signal_pending(current));
66 return res;
67}
68
69#define rpc_call_sync(clnt, msg, flags) nfs_rpc_wrapper(clnt, msg, flags)
70
71static int
72nfs_async_handle_expired_key(struct rpc_task *task)
73{
74 if (task->tk_status != -EKEYEXPIRED)
75 return 0;
76 task->tk_status = 0;
77 rpc_restart_call(task);
78 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
79 return 1;
80}
81
82/*
50 * Bare-bones access to getattr: this is for nfs_read_super. 83 * Bare-bones access to getattr: this is for nfs_read_super.
51 */ 84 */
52static int 85static int
@@ -307,6 +340,8 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
307 340
308static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) 341static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
309{ 342{
343 if (nfs_async_handle_expired_key(task))
344 return 0;
310 nfs_mark_for_revalidate(dir); 345 nfs_mark_for_revalidate(dir);
311 return 1; 346 return 1;
312} 347}
@@ -560,6 +595,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
560 595
561static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 596static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
562{ 597{
598 if (nfs_async_handle_expired_key(task))
599 return -EAGAIN;
600
563 nfs_invalidate_atime(data->inode); 601 nfs_invalidate_atime(data->inode);
564 if (task->tk_status >= 0) { 602 if (task->tk_status >= 0) {
565 nfs_refresh_inode(data->inode, data->res.fattr); 603 nfs_refresh_inode(data->inode, data->res.fattr);
@@ -579,6 +617,9 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *
579 617
580static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 618static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
581{ 619{
620 if (nfs_async_handle_expired_key(task))
621 return -EAGAIN;
622
582 if (task->tk_status >= 0) 623 if (task->tk_status >= 0)
583 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 624 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
584 return 0; 625 return 0;
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738dbfbc7..2ea9e5c27e55 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
50 struct page *page; 50 struct page *page;
51 void *err; 51 void *err;
52 52
53 err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); 53 err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
54 if (err) 54 if (err)
55 goto read_failed; 55 goto read_failed;
56 page = read_cache_page(&inode->i_data, 0, 56 page = read_cache_page(&inode->i_data, 0,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d63d964a0392..53ff70e23993 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -438,6 +438,7 @@ nfs_mark_request_commit(struct nfs_page *req)
438 radix_tree_tag_set(&nfsi->nfs_page_tree, 438 radix_tree_tag_set(&nfsi->nfs_page_tree,
439 req->wb_index, 439 req->wb_index,
440 NFS_PAGE_TAG_COMMIT); 440 NFS_PAGE_TAG_COMMIT);
441 nfsi->ncommit++;
441 spin_unlock(&inode->i_lock); 442 spin_unlock(&inode->i_lock);
442 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
443 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
@@ -501,57 +502,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
501} 502}
502#endif 503#endif
503 504
504/*
505 * Wait for a request to complete.
506 *
507 * Interruptible by fatal signals only.
508 */
509static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
510{
511 struct nfs_inode *nfsi = NFS_I(inode);
512 struct nfs_page *req;
513 pgoff_t idx_end, next;
514 unsigned int res = 0;
515 int error;
516
517 if (npages == 0)
518 idx_end = ~0;
519 else
520 idx_end = idx_start + npages - 1;
521
522 next = idx_start;
523 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
524 if (req->wb_index > idx_end)
525 break;
526
527 next = req->wb_index + 1;
528 BUG_ON(!NFS_WBACK_BUSY(req));
529
530 kref_get(&req->wb_kref);
531 spin_unlock(&inode->i_lock);
532 error = nfs_wait_on_request(req);
533 nfs_release_request(req);
534 spin_lock(&inode->i_lock);
535 if (error < 0)
536 return error;
537 res++;
538 }
539 return res;
540}
541
542static void nfs_cancel_commit_list(struct list_head *head)
543{
544 struct nfs_page *req;
545
546 while(!list_empty(head)) {
547 req = nfs_list_entry(head->next);
548 nfs_list_remove_request(req);
549 nfs_clear_request_commit(req);
550 nfs_inode_remove_request(req);
551 nfs_unlock_request(req);
552 }
553}
554
555#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 505#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
556static int 506static int
557nfs_need_commit(struct nfs_inode *nfsi) 507nfs_need_commit(struct nfs_inode *nfsi)
@@ -573,11 +523,17 @@ static int
573nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 523nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
574{ 524{
575 struct nfs_inode *nfsi = NFS_I(inode); 525 struct nfs_inode *nfsi = NFS_I(inode);
526 int ret;
576 527
577 if (!nfs_need_commit(nfsi)) 528 if (!nfs_need_commit(nfsi))
578 return 0; 529 return 0;
579 530
580 return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 531 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
532 if (ret > 0)
533 nfsi->ncommit -= ret;
534 if (nfs_need_commit(NFS_I(inode)))
535 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
536 return ret;
581} 537}
582#else 538#else
583static inline int nfs_need_commit(struct nfs_inode *nfsi) 539static inline int nfs_need_commit(struct nfs_inode *nfsi)
@@ -642,9 +598,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
642 spin_lock(&inode->i_lock); 598 spin_lock(&inode->i_lock);
643 } 599 }
644 600
645 if (nfs_clear_request_commit(req)) 601 if (nfs_clear_request_commit(req) &&
646 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 602 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
647 req->wb_index, NFS_PAGE_TAG_COMMIT); 603 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
604 NFS_I(inode)->ncommit--;
648 605
649 /* Okay, the request matches. Update the region */ 606 /* Okay, the request matches. Update the region */
650 if (offset < req->wb_offset) { 607 if (offset < req->wb_offset) {
@@ -1391,7 +1348,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
1391 .rpc_release = nfs_commit_release, 1348 .rpc_release = nfs_commit_release,
1392}; 1349};
1393 1350
1394int nfs_commit_inode(struct inode *inode, int how) 1351static int nfs_commit_inode(struct inode *inode, int how)
1395{ 1352{
1396 LIST_HEAD(head); 1353 LIST_HEAD(head);
1397 int res; 1354 int res;
@@ -1406,92 +1363,51 @@ int nfs_commit_inode(struct inode *inode, int how)
1406 } 1363 }
1407 return res; 1364 return res;
1408} 1365}
1409#else
1410static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1411{
1412 return 0;
1413}
1414#endif
1415 1366
1416long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1367static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1417{ 1368{
1418 struct inode *inode = mapping->host; 1369 struct nfs_inode *nfsi = NFS_I(inode);
1419 pgoff_t idx_start, idx_end; 1370 int flags = FLUSH_SYNC;
1420 unsigned int npages = 0; 1371 int ret = 0;
1421 LIST_HEAD(head); 1372
1422 int nocommit = how & FLUSH_NOCOMMIT; 1373 /* Don't commit yet if this is a non-blocking flush and there are
1423 long pages, ret; 1374 * lots of outstanding writes for this mapping.
1424 1375 */
1425 /* FIXME */ 1376 if (wbc->sync_mode == WB_SYNC_NONE &&
1426 if (wbc->range_cyclic) 1377 nfsi->ncommit <= (nfsi->npages >> 1))
1427 idx_start = 0; 1378 goto out_mark_dirty;
1428 else { 1379
1429 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 1380 if (wbc->nonblocking || wbc->for_background)
1430 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 1381 flags = 0;
1431 if (idx_end > idx_start) { 1382 ret = nfs_commit_inode(inode, flags);
1432 pgoff_t l_npages = 1 + idx_end - idx_start; 1383 if (ret >= 0) {
1433 npages = l_npages; 1384 if (wbc->sync_mode == WB_SYNC_NONE) {
1434 if (sizeof(npages) != sizeof(l_npages) && 1385 if (ret < wbc->nr_to_write)
1435 (pgoff_t)npages != l_npages) 1386 wbc->nr_to_write -= ret;
1436 npages = 0; 1387 else
1388 wbc->nr_to_write = 0;
1437 } 1389 }
1390 return 0;
1438 } 1391 }
1439 how &= ~FLUSH_NOCOMMIT; 1392out_mark_dirty:
1440 spin_lock(&inode->i_lock); 1393 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1441 do {
1442 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1443 if (ret != 0)
1444 continue;
1445 if (nocommit)
1446 break;
1447 pages = nfs_scan_commit(inode, &head, idx_start, npages);
1448 if (pages == 0)
1449 break;
1450 if (how & FLUSH_INVALIDATE) {
1451 spin_unlock(&inode->i_lock);
1452 nfs_cancel_commit_list(&head);
1453 ret = pages;
1454 spin_lock(&inode->i_lock);
1455 continue;
1456 }
1457 pages += nfs_scan_commit(inode, &head, 0, 0);
1458 spin_unlock(&inode->i_lock);
1459 ret = nfs_commit_list(inode, &head, how);
1460 spin_lock(&inode->i_lock);
1461
1462 } while (ret >= 0);
1463 spin_unlock(&inode->i_lock);
1464 return ret; 1394 return ret;
1465} 1395}
1466 1396#else
1467static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) 1397static int nfs_commit_inode(struct inode *inode, int how)
1468{ 1398{
1469 int ret;
1470
1471 ret = nfs_writepages(mapping, wbc);
1472 if (ret < 0)
1473 goto out;
1474 ret = nfs_sync_mapping_wait(mapping, wbc, how);
1475 if (ret < 0)
1476 goto out;
1477 return 0; 1399 return 0;
1478out:
1479 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1480 return ret;
1481} 1400}
1482 1401
1483/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ 1402static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1484static int nfs_write_mapping(struct address_space *mapping, int how)
1485{ 1403{
1486 struct writeback_control wbc = { 1404 return 0;
1487 .bdi = mapping->backing_dev_info, 1405}
1488 .sync_mode = WB_SYNC_ALL, 1406#endif
1489 .nr_to_write = LONG_MAX,
1490 .range_start = 0,
1491 .range_end = LLONG_MAX,
1492 };
1493 1407
1494 return __nfs_write_mapping(mapping, &wbc, how); 1408int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1409{
1410 return nfs_commit_unstable_pages(inode, wbc);
1495} 1411}
1496 1412
1497/* 1413/*
@@ -1499,37 +1415,26 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
1499 */ 1415 */
1500int nfs_wb_all(struct inode *inode) 1416int nfs_wb_all(struct inode *inode)
1501{ 1417{
1502 return nfs_write_mapping(inode->i_mapping, 0); 1418 struct writeback_control wbc = {
1503} 1419 .sync_mode = WB_SYNC_ALL,
1420 .nr_to_write = LONG_MAX,
1421 .range_start = 0,
1422 .range_end = LLONG_MAX,
1423 };
1504 1424
1505int nfs_wb_nocommit(struct inode *inode) 1425 return sync_inode(inode, &wbc);
1506{
1507 return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
1508} 1426}
1509 1427
1510int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1428int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1511{ 1429{
1512 struct nfs_page *req; 1430 struct nfs_page *req;
1513 loff_t range_start = page_offset(page);
1514 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1515 struct writeback_control wbc = {
1516 .bdi = page->mapping->backing_dev_info,
1517 .sync_mode = WB_SYNC_ALL,
1518 .nr_to_write = LONG_MAX,
1519 .range_start = range_start,
1520 .range_end = range_end,
1521 };
1522 int ret = 0; 1431 int ret = 0;
1523 1432
1524 BUG_ON(!PageLocked(page)); 1433 BUG_ON(!PageLocked(page));
1525 for (;;) { 1434 for (;;) {
1526 req = nfs_page_find_request(page); 1435 req = nfs_page_find_request(page);
1527 if (req == NULL) 1436 if (req == NULL)
1528 goto out;
1529 if (test_bit(PG_CLEAN, &req->wb_flags)) {
1530 nfs_release_request(req);
1531 break; 1437 break;
1532 }
1533 if (nfs_lock_request_dontget(req)) { 1438 if (nfs_lock_request_dontget(req)) {
1534 nfs_inode_remove_request(req); 1439 nfs_inode_remove_request(req);
1535 /* 1440 /*
@@ -1543,54 +1448,54 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1543 ret = nfs_wait_on_request(req); 1448 ret = nfs_wait_on_request(req);
1544 nfs_release_request(req); 1449 nfs_release_request(req);
1545 if (ret < 0) 1450 if (ret < 0)
1546 goto out; 1451 break;
1547 } 1452 }
1548 if (!PagePrivate(page))
1549 return 0;
1550 ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
1551out:
1552 return ret; 1453 return ret;
1553} 1454}
1554 1455
1555static int nfs_wb_page_priority(struct inode *inode, struct page *page, 1456/*
1556 int how) 1457 * Write back all requests on one page - we do this before reading it.
1458 */
1459int nfs_wb_page(struct inode *inode, struct page *page)
1557{ 1460{
1558 loff_t range_start = page_offset(page); 1461 loff_t range_start = page_offset(page);
1559 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1462 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1560 struct writeback_control wbc = { 1463 struct writeback_control wbc = {
1561 .bdi = page->mapping->backing_dev_info,
1562 .sync_mode = WB_SYNC_ALL, 1464 .sync_mode = WB_SYNC_ALL,
1563 .nr_to_write = LONG_MAX, 1465 .nr_to_write = 0,
1564 .range_start = range_start, 1466 .range_start = range_start,
1565 .range_end = range_end, 1467 .range_end = range_end,
1566 }; 1468 };
1469 struct nfs_page *req;
1470 int need_commit;
1567 int ret; 1471 int ret;
1568 1472
1569 do { 1473 while(PagePrivate(page)) {
1570 if (clear_page_dirty_for_io(page)) { 1474 if (clear_page_dirty_for_io(page)) {
1571 ret = nfs_writepage_locked(page, &wbc); 1475 ret = nfs_writepage_locked(page, &wbc);
1572 if (ret < 0) 1476 if (ret < 0)
1573 goto out_error; 1477 goto out_error;
1574 } else if (!PagePrivate(page)) 1478 }
1479 req = nfs_find_and_lock_request(page);
1480 if (!req)
1575 break; 1481 break;
1576 ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); 1482 if (IS_ERR(req)) {
1577 if (ret < 0) 1483 ret = PTR_ERR(req);
1578 goto out_error; 1484 goto out_error;
1579 } while (PagePrivate(page)); 1485 }
1486 need_commit = test_bit(PG_CLEAN, &req->wb_flags);
1487 nfs_clear_page_tag_locked(req);
1488 if (need_commit) {
1489 ret = nfs_commit_inode(inode, FLUSH_SYNC);
1490 if (ret < 0)
1491 goto out_error;
1492 }
1493 }
1580 return 0; 1494 return 0;
1581out_error: 1495out_error:
1582 __mark_inode_dirty(inode, I_DIRTY_PAGES);
1583 return ret; 1496 return ret;
1584} 1497}
1585 1498
1586/*
1587 * Write back all requests on one page - we do this before reading it.
1588 */
1589int nfs_wb_page(struct inode *inode, struct page* page)
1590{
1591 return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1592}
1593
1594#ifdef CONFIG_MIGRATION 1499#ifdef CONFIG_MIGRATION
1595int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1500int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1596 struct page *page) 1501 struct page *page)
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index d3854d94b7cf..bf9cbd242ddd 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -36,10 +36,9 @@ static struct file *do_open(char *name, int flags)
36 return ERR_PTR(error); 36 return ERR_PTR(error);
37 37
38 if (flags == O_RDWR) 38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, 39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 FMODE_READ|FMODE_WRITE);
41 else 40 else
42 error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE); 41 error = may_open(&nd.path, MAY_WRITE, flags);
43 42
44 if (!error) 43 if (!error)
45 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 44 return dentry_open(nd.path.dentry, nd.path.mnt, flags,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a8587e90fd5a..bbf72d8f9fc0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2121,9 +2121,15 @@ out_acl:
2121 * and this is the root of a cross-mounted filesystem. 2121 * and this is the root of a cross-mounted filesystem.
2122 */ 2122 */
2123 if (ignore_crossmnt == 0 && 2123 if (ignore_crossmnt == 0 &&
2124 exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) { 2124 dentry == exp->ex_path.mnt->mnt_root) {
2125 err = vfs_getattr(exp->ex_path.mnt->mnt_parent, 2125 struct path path = exp->ex_path;
2126 exp->ex_path.mnt->mnt_mountpoint, &stat); 2126 path_get(&path);
2127 while (follow_up(&path)) {
2128 if (path.dentry != path.mnt->mnt_root)
2129 break;
2130 }
2131 err = vfs_getattr(path.mnt, path.dentry, &stat);
2132 path_put(&path);
2127 if (err) 2133 if (err)
2128 goto out_nfserr; 2134 goto out_nfserr;
2129 } 2135 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8715d194561a..8eca17df4f63 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -20,7 +20,6 @@
20#include <linux/fcntl.h> 20#include <linux/fcntl.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22#include <linux/delay.h> 22#include <linux/delay.h>
23#include <linux/quotaops.h>
24#include <linux/fsnotify.h> 23#include <linux/fsnotify.h>
25#include <linux/posix_acl_xattr.h> 24#include <linux/posix_acl_xattr.h>
26#include <linux/xattr.h> 25#include <linux/xattr.h>
@@ -361,7 +360,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
361 * If we are changing the size of the file, then 360 * If we are changing the size of the file, then
362 * we need to break all leases. 361 * we need to break all leases.
363 */ 362 */
364 host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK); 363 host_err = break_lease(inode, O_WRONLY | O_NONBLOCK);
365 if (host_err == -EWOULDBLOCK) 364 if (host_err == -EWOULDBLOCK)
366 host_err = -ETIMEDOUT; 365 host_err = -ETIMEDOUT;
367 if (host_err) /* ENOMEM or EWOULDBLOCK */ 366 if (host_err) /* ENOMEM or EWOULDBLOCK */
@@ -377,7 +376,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
377 put_write_access(inode); 376 put_write_access(inode);
378 goto out_nfserr; 377 goto out_nfserr;
379 } 378 }
380 vfs_dq_init(inode);
381 } 379 }
382 380
383 /* sanitize the mode change */ 381 /* sanitize the mode change */
@@ -734,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
734 * Check to see if there are any leases on this file. 732 * Check to see if there are any leases on this file.
735 * This may block while leases are broken. 733 * This may block while leases are broken.
736 */ 734 */
737 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0)); 735 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
738 if (host_err == -EWOULDBLOCK) 736 if (host_err == -EWOULDBLOCK)
739 host_err = -ETIMEDOUT; 737 host_err = -ETIMEDOUT;
740 if (host_err) /* NOMEM or WOULDBLOCK */ 738 if (host_err) /* NOMEM or WOULDBLOCK */
@@ -745,8 +743,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
745 flags = O_RDWR|O_LARGEFILE; 743 flags = O_RDWR|O_LARGEFILE;
746 else 744 else
747 flags = O_WRONLY|O_LARGEFILE; 745 flags = O_WRONLY|O_LARGEFILE;
748
749 vfs_dq_init(inode);
750 } 746 }
751 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 747 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
752 flags, current_cred()); 748 flags, current_cred());
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 76d803e060a9..0092840492ee 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -224,7 +224,7 @@ fail:
224 * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller. 224 * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller.
225 */ 225 */
226static int 226static int
227nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de) 227nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de)
228{ 228{
229 if (len != de->name_len) 229 if (len != de->name_len)
230 return 0; 230 return 0;
@@ -349,11 +349,11 @@ done:
349 * Entry is guaranteed to be valid. 349 * Entry is guaranteed to be valid.
350 */ 350 */
351struct nilfs_dir_entry * 351struct nilfs_dir_entry *
352nilfs_find_entry(struct inode *dir, struct dentry *dentry, 352nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
353 struct page **res_page) 353 struct page **res_page)
354{ 354{
355 const char *name = dentry->d_name.name; 355 const unsigned char *name = qstr->name;
356 int namelen = dentry->d_name.len; 356 int namelen = qstr->len;
357 unsigned reclen = NILFS_DIR_REC_LEN(namelen); 357 unsigned reclen = NILFS_DIR_REC_LEN(namelen);
358 unsigned long start, n; 358 unsigned long start, n;
359 unsigned long npages = dir_pages(dir); 359 unsigned long npages = dir_pages(dir);
@@ -424,13 +424,13 @@ struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p)
424 return de; 424 return de;
425} 425}
426 426
427ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry) 427ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
428{ 428{
429 ino_t res = 0; 429 ino_t res = 0;
430 struct nilfs_dir_entry *de; 430 struct nilfs_dir_entry *de;
431 struct page *page; 431 struct page *page;
432 432
433 de = nilfs_find_entry(dir, dentry, &page); 433 de = nilfs_find_entry(dir, qstr, &page);
434 if (de) { 434 if (de) {
435 res = le64_to_cpu(de->inode); 435 res = le64_to_cpu(de->inode);
436 kunmap(page); 436 kunmap(page);
@@ -465,7 +465,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
465int nilfs_add_link(struct dentry *dentry, struct inode *inode) 465int nilfs_add_link(struct dentry *dentry, struct inode *inode)
466{ 466{
467 struct inode *dir = dentry->d_parent->d_inode; 467 struct inode *dir = dentry->d_parent->d_inode;
468 const char *name = dentry->d_name.name; 468 const unsigned char *name = dentry->d_name.name;
469 int namelen = dentry->d_name.len; 469 int namelen = dentry->d_name.len;
470 unsigned chunk_size = nilfs_chunk_size(dir); 470 unsigned chunk_size = nilfs_chunk_size(dir);
471 unsigned reclen = NILFS_DIR_REC_LEN(namelen); 471 unsigned reclen = NILFS_DIR_REC_LEN(namelen);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 07ba838ef089..ad6ed2cf19b4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -67,7 +67,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
67 if (dentry->d_name.len > NILFS_NAME_LEN) 67 if (dentry->d_name.len > NILFS_NAME_LEN)
68 return ERR_PTR(-ENAMETOOLONG); 68 return ERR_PTR(-ENAMETOOLONG);
69 69
70 ino = nilfs_inode_by_name(dir, dentry); 70 ino = nilfs_inode_by_name(dir, &dentry->d_name);
71 inode = NULL; 71 inode = NULL;
72 if (ino) { 72 if (ino) {
73 inode = nilfs_iget(dir->i_sb, ino); 73 inode = nilfs_iget(dir->i_sb, ino);
@@ -81,10 +81,7 @@ struct dentry *nilfs_get_parent(struct dentry *child)
81{ 81{
82 unsigned long ino; 82 unsigned long ino;
83 struct inode *inode; 83 struct inode *inode;
84 struct dentry dotdot; 84 struct qstr dotdot = {.name = "..", .len = 2};
85
86 dotdot.d_name.name = "..";
87 dotdot.d_name.len = 2;
88 85
89 ino = nilfs_inode_by_name(child->d_inode, &dotdot); 86 ino = nilfs_inode_by_name(child->d_inode, &dotdot);
90 if (!ino) 87 if (!ino)
@@ -296,7 +293,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
296 int err; 293 int err;
297 294
298 err = -ENOENT; 295 err = -ENOENT;
299 de = nilfs_find_entry(dir, dentry, &page); 296 de = nilfs_find_entry(dir, &dentry->d_name, &page);
300 if (!de) 297 if (!de)
301 goto out; 298 goto out;
302 299
@@ -389,7 +386,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
389 return err; 386 return err;
390 387
391 err = -ENOENT; 388 err = -ENOENT;
392 old_de = nilfs_find_entry(old_dir, old_dentry, &old_page); 389 old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
393 if (!old_de) 390 if (!old_de)
394 goto out; 391 goto out;
395 392
@@ -409,7 +406,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
409 goto out_dir; 406 goto out_dir;
410 407
411 err = -ENOENT; 408 err = -ENOENT;
412 new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); 409 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
413 if (!new_de) 410 if (!new_de)
414 goto out_dir; 411 goto out_dir;
415 inc_nlink(old_inode); 412 inc_nlink(old_inode);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 4da6f67e9a91..8723e5bfd071 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -217,10 +217,10 @@ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
217 217
218/* dir.c */ 218/* dir.c */
219extern int nilfs_add_link(struct dentry *, struct inode *); 219extern int nilfs_add_link(struct dentry *, struct inode *);
220extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *); 220extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
221extern int nilfs_make_empty(struct inode *, struct inode *); 221extern int nilfs_make_empty(struct inode *, struct inode *);
222extern struct nilfs_dir_entry * 222extern struct nilfs_dir_entry *
223nilfs_find_entry(struct inode *, struct dentry *, struct page **); 223nilfs_find_entry(struct inode *, const struct qstr *, struct page **);
224extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); 224extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *);
225extern int nilfs_empty_dir(struct inode *); 225extern int nilfs_empty_dir(struct inode *);
226extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); 226extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a94e8bd8eb1f..472cdf29ef82 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -29,14 +29,12 @@
29#include <linux/init.h> /* module_init */ 29#include <linux/init.h> /* module_init */
30#include <linux/inotify.h> 30#include <linux/inotify.h>
31#include <linux/kernel.h> /* roundup() */ 31#include <linux/kernel.h> /* roundup() */
32#include <linux/magic.h> /* superblock magic number */
33#include <linux/mount.h> /* mntget */
34#include <linux/namei.h> /* LOOKUP_FOLLOW */ 32#include <linux/namei.h> /* LOOKUP_FOLLOW */
35#include <linux/path.h> /* struct path */
36#include <linux/sched.h> /* struct user */ 33#include <linux/sched.h> /* struct user */
37#include <linux/slab.h> /* struct kmem_cache */ 34#include <linux/slab.h> /* struct kmem_cache */
38#include <linux/syscalls.h> 35#include <linux/syscalls.h>
39#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/anon_inodes.h>
40#include <linux/uaccess.h> 38#include <linux/uaccess.h>
41#include <linux/poll.h> 39#include <linux/poll.h>
42#include <linux/wait.h> 40#include <linux/wait.h>
@@ -45,8 +43,6 @@
45 43
46#include <asm/ioctls.h> 44#include <asm/ioctls.h>
47 45
48static struct vfsmount *inotify_mnt __read_mostly;
49
50/* these are configurable via /proc/sys/fs/inotify/ */ 46/* these are configurable via /proc/sys/fs/inotify/ */
51static int inotify_max_user_instances __read_mostly; 47static int inotify_max_user_instances __read_mostly;
52static int inotify_max_queued_events __read_mostly; 48static int inotify_max_queued_events __read_mostly;
@@ -645,9 +641,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
645{ 641{
646 struct fsnotify_group *group; 642 struct fsnotify_group *group;
647 struct user_struct *user; 643 struct user_struct *user;
648 struct file *filp; 644 int ret;
649 struct path path;
650 int fd, ret;
651 645
652 /* Check the IN_* constants for consistency. */ 646 /* Check the IN_* constants for consistency. */
653 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC); 647 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
@@ -656,10 +650,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
656 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) 650 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
657 return -EINVAL; 651 return -EINVAL;
658 652
659 fd = get_unused_fd_flags(flags & O_CLOEXEC);
660 if (fd < 0)
661 return fd;
662
663 user = get_current_user(); 653 user = get_current_user();
664 if (unlikely(atomic_read(&user->inotify_devs) >= 654 if (unlikely(atomic_read(&user->inotify_devs) >=
665 inotify_max_user_instances)) { 655 inotify_max_user_instances)) {
@@ -676,27 +666,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
676 666
677 atomic_inc(&user->inotify_devs); 667 atomic_inc(&user->inotify_devs);
678 668
679 path.mnt = inotify_mnt; 669 ret = anon_inode_getfd("inotify", &inotify_fops, group,
680 path.dentry = inotify_mnt->mnt_root; 670 O_RDONLY | flags);
681 path_get(&path); 671 if (ret >= 0)
682 filp = alloc_file(&path, FMODE_READ, &inotify_fops); 672 return ret;
683 if (!filp)
684 goto Enfile;
685 673
686 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
687 filp->private_data = group;
688
689 fd_install(fd, filp);
690
691 return fd;
692
693Enfile:
694 ret = -ENFILE;
695 path_put(&path);
696 atomic_dec(&user->inotify_devs); 674 atomic_dec(&user->inotify_devs);
697out_free_uid: 675out_free_uid:
698 free_uid(user); 676 free_uid(user);
699 put_unused_fd(fd);
700 return ret; 677 return ret;
701} 678}
702 679
@@ -783,20 +760,6 @@ out:
783 return ret; 760 return ret;
784} 761}
785 762
786static int
787inotify_get_sb(struct file_system_type *fs_type, int flags,
788 const char *dev_name, void *data, struct vfsmount *mnt)
789{
790 return get_sb_pseudo(fs_type, "inotify", NULL,
791 INOTIFYFS_SUPER_MAGIC, mnt);
792}
793
794static struct file_system_type inotify_fs_type = {
795 .name = "inotifyfs",
796 .get_sb = inotify_get_sb,
797 .kill_sb = kill_anon_super,
798};
799
800/* 763/*
801 * inotify_user_setup - Our initialization function. Note that we cannnot return 764 * inotify_user_setup - Our initialization function. Note that we cannnot return
802 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 765 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
@@ -804,16 +767,6 @@ static struct file_system_type inotify_fs_type = {
804 */ 767 */
805static int __init inotify_user_setup(void) 768static int __init inotify_user_setup(void)
806{ 769{
807 int ret;
808
809 ret = register_filesystem(&inotify_fs_type);
810 if (unlikely(ret))
811 panic("inotify: register_filesystem returned %d!\n", ret);
812
813 inotify_mnt = kern_mount(&inotify_fs_type);
814 if (IS_ERR(inotify_mnt))
815 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
816
817 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); 770 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
818 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); 771 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
819 772
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 5a9e34475e37..9173e82a45d1 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1545,7 +1545,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
1545 write_inode_now(bmp_vi, !datasync); 1545 write_inode_now(bmp_vi, !datasync);
1546 iput(bmp_vi); 1546 iput(bmp_vi);
1547 } 1547 }
1548 ret = ntfs_write_inode(vi, 1); 1548 ret = __ntfs_write_inode(vi, 1);
1549 write_inode_now(vi, !datasync); 1549 write_inode_now(vi, !datasync);
1550 err = sync_blockdev(vi->i_sb->s_bdev); 1550 err = sync_blockdev(vi->i_sb->s_bdev);
1551 if (unlikely(err && !ret)) 1551 if (unlikely(err && !ret))
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 43179ddd336f..b681c71d7069 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2182,7 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
2182 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 2182 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2183 BUG_ON(S_ISDIR(vi->i_mode)); 2183 BUG_ON(S_ISDIR(vi->i_mode));
2184 if (!datasync || !NInoNonResident(NTFS_I(vi))) 2184 if (!datasync || !NInoNonResident(NTFS_I(vi)))
2185 ret = ntfs_write_inode(vi, 1); 2185 ret = __ntfs_write_inode(vi, 1);
2186 write_inode_now(vi, !datasync); 2186 write_inode_now(vi, !datasync);
2187 /* 2187 /*
2188 * NOTE: If we were to use mapping->private_list (see ext2 and 2188 * NOTE: If we were to use mapping->private_list (see ext2 and
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc2505abb6d7..4b57fb1eac2a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2957,7 +2957,7 @@ out:
2957 * 2957 *
2958 * Return 0 on success and -errno on error. 2958 * Return 0 on success and -errno on error.
2959 */ 2959 */
2960int ntfs_write_inode(struct inode *vi, int sync) 2960int __ntfs_write_inode(struct inode *vi, int sync)
2961{ 2961{
2962 sle64 nt; 2962 sle64 nt;
2963 ntfs_inode *ni = NTFS_I(vi); 2963 ntfs_inode *ni = NTFS_I(vi);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 117eaf8032a3..9a113544605d 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -307,12 +307,12 @@ extern void ntfs_truncate_vfs(struct inode *vi);
307 307
308extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); 308extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
309 309
310extern int ntfs_write_inode(struct inode *vi, int sync); 310extern int __ntfs_write_inode(struct inode *vi, int sync);
311 311
312static inline void ntfs_commit_inode(struct inode *vi) 312static inline void ntfs_commit_inode(struct inode *vi)
313{ 313{
314 if (!is_bad_inode(vi)) 314 if (!is_bad_inode(vi))
315 ntfs_write_inode(vi, 1); 315 __ntfs_write_inode(vi, 1);
316 return; 316 return;
317} 317}
318 318
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 80b04770e8e9..1cf39dfaee7a 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -39,6 +39,7 @@
39#include "dir.h" 39#include "dir.h"
40#include "debug.h" 40#include "debug.h"
41#include "index.h" 41#include "index.h"
42#include "inode.h"
42#include "aops.h" 43#include "aops.h"
43#include "layout.h" 44#include "layout.h"
44#include "malloc.h" 45#include "malloc.h"
@@ -2662,6 +2663,13 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
2662 return 0; 2663 return 0;
2663} 2664}
2664 2665
2666#ifdef NTFS_RW
2667static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
2668{
2669 return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
2670}
2671#endif
2672
2665/** 2673/**
2666 * The complete super operations. 2674 * The complete super operations.
2667 */ 2675 */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2bbe1ecc08c0..9f8bd913c51e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5713,7 +5713,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
5713 goto out; 5713 goto out;
5714 } 5714 }
5715 5715
5716 vfs_dq_free_space_nodirty(inode, 5716 dquot_free_space_nodirty(inode,
5717 ocfs2_clusters_to_bytes(inode->i_sb, len)); 5717 ocfs2_clusters_to_bytes(inode->i_sb, len));
5718 5718
5719 ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc); 5719 ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
@@ -6936,7 +6936,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6936 goto bail; 6936 goto bail;
6937 } 6937 }
6938 6938
6939 vfs_dq_free_space_nodirty(inode, 6939 dquot_free_space_nodirty(inode,
6940 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); 6940 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6941 spin_lock(&OCFS2_I(inode)->ip_lock); 6941 spin_lock(&OCFS2_I(inode)->ip_lock);
6942 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 6942 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
@@ -7301,11 +7301,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7301 unsigned int page_end; 7301 unsigned int page_end;
7302 u64 phys; 7302 u64 phys;
7303 7303
7304 if (vfs_dq_alloc_space_nodirty(inode, 7304 ret = dquot_alloc_space_nodirty(inode,
7305 ocfs2_clusters_to_bytes(osb->sb, 1))) { 7305 ocfs2_clusters_to_bytes(osb->sb, 1));
7306 ret = -EDQUOT; 7306 if (ret)
7307 goto out_commit; 7307 goto out_commit;
7308 }
7309 did_quota = 1; 7308 did_quota = 1;
7310 7309
7311 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 7310 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
@@ -7381,7 +7380,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7381 7380
7382out_commit: 7381out_commit:
7383 if (ret < 0 && did_quota) 7382 if (ret < 0 && did_quota)
7384 vfs_dq_free_space_nodirty(inode, 7383 dquot_free_space_nodirty(inode,
7385 ocfs2_clusters_to_bytes(osb->sb, 1)); 7384 ocfs2_clusters_to_bytes(osb->sb, 1));
7386 7385
7387 ocfs2_commit_trans(osb, handle); 7386 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4c2a6d282c4d..21441ddb5506 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1764,10 +1764,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1764 1764
1765 wc->w_handle = handle; 1765 wc->w_handle = handle;
1766 1766
1767 if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode, 1767 if (clusters_to_alloc) {
1768 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) { 1768 ret = dquot_alloc_space_nodirty(inode,
1769 ret = -EDQUOT; 1769 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
1770 goto out_commit; 1770 if (ret)
1771 goto out_commit;
1771 } 1772 }
1772 /* 1773 /*
1773 * We don't want this to fail in ocfs2_write_end(), so do it 1774 * We don't want this to fail in ocfs2_write_end(), so do it
@@ -1810,7 +1811,7 @@ success:
1810 return 0; 1811 return 0;
1811out_quota: 1812out_quota:
1812 if (clusters_to_alloc) 1813 if (clusters_to_alloc)
1813 vfs_dq_free_space(inode, 1814 dquot_free_space(inode,
1814 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); 1815 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
1815out_commit: 1816out_commit:
1816 ocfs2_commit_trans(osb, handle); 1817 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 765d66c70989..efd77d071c80 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2964,12 +2964,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2964 goto out; 2964 goto out;
2965 } 2965 }
2966 2966
2967 if (vfs_dq_alloc_space_nodirty(dir, 2967 ret = dquot_alloc_space_nodirty(dir,
2968 ocfs2_clusters_to_bytes(osb->sb, 2968 ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
2969 alloc + dx_alloc))) { 2969 if (ret)
2970 ret = -EDQUOT;
2971 goto out_commit; 2970 goto out_commit;
2972 }
2973 did_quota = 1; 2971 did_quota = 1;
2974 2972
2975 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { 2973 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
@@ -3178,7 +3176,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3178 3176
3179out_commit: 3177out_commit:
3180 if (ret < 0 && did_quota) 3178 if (ret < 0 && did_quota)
3181 vfs_dq_free_space_nodirty(dir, bytes_allocated); 3179 dquot_free_space_nodirty(dir, bytes_allocated);
3182 3180
3183 ocfs2_commit_trans(osb, handle); 3181 ocfs2_commit_trans(osb, handle);
3184 3182
@@ -3221,11 +3219,10 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
3221 if (extend) { 3219 if (extend) {
3222 u32 offset = OCFS2_I(dir)->ip_clusters; 3220 u32 offset = OCFS2_I(dir)->ip_clusters;
3223 3221
3224 if (vfs_dq_alloc_space_nodirty(dir, 3222 status = dquot_alloc_space_nodirty(dir,
3225 ocfs2_clusters_to_bytes(sb, 1))) { 3223 ocfs2_clusters_to_bytes(sb, 1));
3226 status = -EDQUOT; 3224 if (status)
3227 goto bail; 3225 goto bail;
3228 }
3229 did_quota = 1; 3226 did_quota = 1;
3230 3227
3231 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, 3228 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
@@ -3254,7 +3251,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
3254 status = 0; 3251 status = 0;
3255bail: 3252bail:
3256 if (did_quota && status < 0) 3253 if (did_quota && status < 0)
3257 vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); 3254 dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
3258 mlog_exit(status); 3255 mlog_exit(status);
3259 return status; 3256 return status;
3260} 3257}
@@ -3889,11 +3886,10 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3889 goto out; 3886 goto out;
3890 } 3887 }
3891 3888
3892 if (vfs_dq_alloc_space_nodirty(dir, 3889 ret = dquot_alloc_space_nodirty(dir,
3893 ocfs2_clusters_to_bytes(dir->i_sb, 1))) { 3890 ocfs2_clusters_to_bytes(dir->i_sb, 1));
3894 ret = -EDQUOT; 3891 if (ret)
3895 goto out_commit; 3892 goto out_commit;
3896 }
3897 did_quota = 1; 3893 did_quota = 1;
3898 3894
3899 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh, 3895 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
@@ -3983,7 +3979,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3983 3979
3984out_commit: 3980out_commit:
3985 if (ret < 0 && did_quota) 3981 if (ret < 0 && did_quota)
3986 vfs_dq_free_space_nodirty(dir, 3982 dquot_free_space_nodirty(dir,
3987 ocfs2_clusters_to_bytes(dir->i_sb, 1)); 3983 ocfs2_clusters_to_bytes(dir->i_sb, 1));
3988 3984
3989 ocfs2_commit_trans(osb, handle); 3985 ocfs2_commit_trans(osb, handle);
@@ -4165,11 +4161,10 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4165 goto out; 4161 goto out;
4166 } 4162 }
4167 4163
4168 if (vfs_dq_alloc_space_nodirty(dir, 4164 ret = dquot_alloc_space_nodirty(dir,
4169 ocfs2_clusters_to_bytes(osb->sb, 1))) { 4165 ocfs2_clusters_to_bytes(osb->sb, 1));
4170 ret = -EDQUOT; 4166 if (ret)
4171 goto out_commit; 4167 goto out_commit;
4172 }
4173 did_quota = 1; 4168 did_quota = 1;
4174 4169
4175 /* 4170 /*
@@ -4229,7 +4224,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4229 4224
4230out_commit: 4225out_commit:
4231 if (ret < 0 && did_quota) 4226 if (ret < 0 && did_quota)
4232 vfs_dq_free_space_nodirty(dir, 4227 dquot_free_space_nodirty(dir,
4233 ocfs2_clusters_to_bytes(dir->i_sb, 1)); 4228 ocfs2_clusters_to_bytes(dir->i_sb, 1));
4234 4229
4235 ocfs2_commit_trans(osb, handle); 4230 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5b52547d6299..17947dc8341e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -107,6 +107,9 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
107 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 107 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
108 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); 108 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
109 109
110 if (file->f_mode & FMODE_WRITE)
111 dquot_initialize(inode);
112
110 spin_lock(&oi->ip_lock); 113 spin_lock(&oi->ip_lock);
111 114
112 /* Check that the inode hasn't been wiped from disk by another 115 /* Check that the inode hasn't been wiped from disk by another
@@ -629,11 +632,10 @@ restart_all:
629 } 632 }
630 633
631restarted_transaction: 634restarted_transaction:
632 if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 635 status = dquot_alloc_space_nodirty(inode,
633 clusters_to_add))) { 636 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
634 status = -EDQUOT; 637 if (status)
635 goto leave; 638 goto leave;
636 }
637 did_quota = 1; 639 did_quota = 1;
638 640
639 /* reserve a write to the file entry early on - that we if we 641 /* reserve a write to the file entry early on - that we if we
@@ -674,7 +676,7 @@ restarted_transaction:
674 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 676 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
675 spin_unlock(&OCFS2_I(inode)->ip_lock); 677 spin_unlock(&OCFS2_I(inode)->ip_lock);
676 /* Release unused quota reservation */ 678 /* Release unused quota reservation */
677 vfs_dq_free_space(inode, 679 dquot_free_space(inode,
678 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); 680 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
679 did_quota = 0; 681 did_quota = 0;
680 682
@@ -710,7 +712,7 @@ restarted_transaction:
710 712
711leave: 713leave:
712 if (status < 0 && did_quota) 714 if (status < 0 && did_quota)
713 vfs_dq_free_space(inode, 715 dquot_free_space(inode,
714 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); 716 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
715 if (handle) { 717 if (handle) {
716 ocfs2_commit_trans(osb, handle); 718 ocfs2_commit_trans(osb, handle);
@@ -978,6 +980,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
978 980
979 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; 981 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
980 if (size_change) { 982 if (size_change) {
983 dquot_initialize(inode);
984
981 status = ocfs2_rw_lock(inode, 1); 985 status = ocfs2_rw_lock(inode, 1);
982 if (status < 0) { 986 if (status < 0) {
983 mlog_errno(status); 987 mlog_errno(status);
@@ -1020,7 +1024,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1020 /* 1024 /*
1021 * Gather pointers to quota structures so that allocation / 1025 * Gather pointers to quota structures so that allocation /
1022 * freeing of quota structures happens here and not inside 1026 * freeing of quota structures happens here and not inside
1023 * vfs_dq_transfer() where we have problems with lock ordering 1027 * dquot_transfer() where we have problems with lock ordering
1024 */ 1028 */
1025 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid 1029 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
1026 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1030 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1053,7 +1057,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1053 mlog_errno(status); 1057 mlog_errno(status);
1054 goto bail_unlock; 1058 goto bail_unlock;
1055 } 1059 }
1056 status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; 1060 status = dquot_transfer(inode, attr);
1057 if (status < 0) 1061 if (status < 0)
1058 goto bail_commit; 1062 goto bail_commit;
1059 } else { 1063 } else {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 88459bdd1ff3..278a223aae14 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -665,7 +665,7 @@ static int ocfs2_remove_inode(struct inode *inode,
665 } 665 }
666 666
667 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); 667 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
668 vfs_dq_free_inode(inode); 668 dquot_free_inode(inode);
669 669
670 status = ocfs2_free_dinode(handle, inode_alloc_inode, 670 status = ocfs2_free_dinode(handle, inode_alloc_inode,
671 inode_alloc_bh, di); 671 inode_alloc_bh, di);
@@ -971,6 +971,8 @@ void ocfs2_delete_inode(struct inode *inode)
971 goto bail; 971 goto bail;
972 } 972 }
973 973
974 dquot_initialize(inode);
975
974 if (!ocfs2_inode_is_valid_to_delete(inode)) { 976 if (!ocfs2_inode_is_valid_to_delete(inode)) {
975 /* It's probably not necessary to truncate_inode_pages 977 /* It's probably not necessary to truncate_inode_pages
976 * here but we do it for safety anyway (it will most 978 * here but we do it for safety anyway (it will most
@@ -1087,6 +1089,8 @@ void ocfs2_clear_inode(struct inode *inode)
1087 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, 1089 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
1088 "Inode=%lu\n", inode->i_ino); 1090 "Inode=%lu\n", inode->i_ino);
1089 1091
1092 dquot_drop(inode);
1093
1090 /* To preven remote deletes we hold open lock before, now it 1094 /* To preven remote deletes we hold open lock before, now it
1091 * is time to unlock PR and EX open locks. */ 1095 * is time to unlock PR and EX open locks. */
1092 ocfs2_open_unlock(inode); 1096 ocfs2_open_unlock(inode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 50fb26a6a5f5..d9cd4e373a53 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -212,7 +212,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
212 } else 212 } else
213 inode->i_gid = current_fsgid(); 213 inode->i_gid = current_fsgid();
214 inode->i_mode = mode; 214 inode->i_mode = mode;
215 vfs_dq_init(inode); 215 dquot_initialize(inode);
216 return inode; 216 return inode;
217} 217}
218 218
@@ -244,6 +244,8 @@ static int ocfs2_mknod(struct inode *dir,
244 (unsigned long)dev, dentry->d_name.len, 244 (unsigned long)dev, dentry->d_name.len,
245 dentry->d_name.name); 245 dentry->d_name.name);
246 246
247 dquot_initialize(dir);
248
247 /* get our super block */ 249 /* get our super block */
248 osb = OCFS2_SB(dir->i_sb); 250 osb = OCFS2_SB(dir->i_sb);
249 251
@@ -348,13 +350,9 @@ static int ocfs2_mknod(struct inode *dir,
348 goto leave; 350 goto leave;
349 } 351 }
350 352
351 /* We don't use standard VFS wrapper because we don't want vfs_dq_init 353 status = dquot_alloc_inode(inode);
352 * to be called. */ 354 if (status)
353 if (sb_any_quota_active(osb->sb) &&
354 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
355 status = -EDQUOT;
356 goto leave; 355 goto leave;
357 }
358 did_quota_inode = 1; 356 did_quota_inode = 1;
359 357
360 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, 358 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
@@ -431,7 +429,7 @@ static int ocfs2_mknod(struct inode *dir,
431 status = 0; 429 status = 0;
432leave: 430leave:
433 if (status < 0 && did_quota_inode) 431 if (status < 0 && did_quota_inode)
434 vfs_dq_free_inode(inode); 432 dquot_free_inode(inode);
435 if (handle) 433 if (handle)
436 ocfs2_commit_trans(osb, handle); 434 ocfs2_commit_trans(osb, handle);
437 435
@@ -636,6 +634,8 @@ static int ocfs2_link(struct dentry *old_dentry,
636 if (S_ISDIR(inode->i_mode)) 634 if (S_ISDIR(inode->i_mode))
637 return -EPERM; 635 return -EPERM;
638 636
637 dquot_initialize(dir);
638
639 err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); 639 err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
640 if (err < 0) { 640 if (err < 0) {
641 if (err != -ENOENT) 641 if (err != -ENOENT)
@@ -791,6 +791,8 @@ static int ocfs2_unlink(struct inode *dir,
791 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 791 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
792 dentry->d_name.len, dentry->d_name.name); 792 dentry->d_name.len, dentry->d_name.name);
793 793
794 dquot_initialize(dir);
795
794 BUG_ON(dentry->d_parent->d_inode != dir); 796 BUG_ON(dentry->d_parent->d_inode != dir);
795 797
796 mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 798 mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1051,6 +1053,9 @@ static int ocfs2_rename(struct inode *old_dir,
1051 old_dentry->d_name.len, old_dentry->d_name.name, 1053 old_dentry->d_name.len, old_dentry->d_name.name,
1052 new_dentry->d_name.len, new_dentry->d_name.name); 1054 new_dentry->d_name.len, new_dentry->d_name.name);
1053 1055
1056 dquot_initialize(old_dir);
1057 dquot_initialize(new_dir);
1058
1054 osb = OCFS2_SB(old_dir->i_sb); 1059 osb = OCFS2_SB(old_dir->i_sb);
1055 1060
1056 if (new_inode) { 1061 if (new_inode) {
@@ -1599,6 +1604,8 @@ static int ocfs2_symlink(struct inode *dir,
1599 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1604 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1600 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1605 dentry, symname, dentry->d_name.len, dentry->d_name.name);
1601 1606
1607 dquot_initialize(dir);
1608
1602 sb = dir->i_sb; 1609 sb = dir->i_sb;
1603 osb = OCFS2_SB(sb); 1610 osb = OCFS2_SB(sb);
1604 1611
@@ -1688,13 +1695,9 @@ static int ocfs2_symlink(struct inode *dir,
1688 goto bail; 1695 goto bail;
1689 } 1696 }
1690 1697
1691 /* We don't use standard VFS wrapper because we don't want vfs_dq_init 1698 status = dquot_alloc_inode(inode);
1692 * to be called. */ 1699 if (status)
1693 if (sb_any_quota_active(osb->sb) &&
1694 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
1695 status = -EDQUOT;
1696 goto bail; 1700 goto bail;
1697 }
1698 did_quota_inode = 1; 1701 did_quota_inode = 1;
1699 1702
1700 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, 1703 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
@@ -1716,11 +1719,10 @@ static int ocfs2_symlink(struct inode *dir,
1716 u32 offset = 0; 1719 u32 offset = 0;
1717 1720
1718 inode->i_op = &ocfs2_symlink_inode_operations; 1721 inode->i_op = &ocfs2_symlink_inode_operations;
1719 if (vfs_dq_alloc_space_nodirty(inode, 1722 status = dquot_alloc_space_nodirty(inode,
1720 ocfs2_clusters_to_bytes(osb->sb, 1))) { 1723 ocfs2_clusters_to_bytes(osb->sb, 1));
1721 status = -EDQUOT; 1724 if (status)
1722 goto bail; 1725 goto bail;
1723 }
1724 did_quota = 1; 1726 did_quota = 1;
1725 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, 1727 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1726 new_fe_bh, 1728 new_fe_bh,
@@ -1788,10 +1790,10 @@ static int ocfs2_symlink(struct inode *dir,
1788 d_instantiate(dentry, inode); 1790 d_instantiate(dentry, inode);
1789bail: 1791bail:
1790 if (status < 0 && did_quota) 1792 if (status < 0 && did_quota)
1791 vfs_dq_free_space_nodirty(inode, 1793 dquot_free_space_nodirty(inode,
1792 ocfs2_clusters_to_bytes(osb->sb, 1)); 1794 ocfs2_clusters_to_bytes(osb->sb, 1));
1793 if (status < 0 && did_quota_inode) 1795 if (status < 0 && did_quota_inode)
1794 vfs_dq_free_inode(inode); 1796 dquot_free_inode(inode);
1795 if (handle) 1797 if (handle)
1796 ocfs2_commit_trans(osb, handle); 1798 ocfs2_commit_trans(osb, handle);
1797 1799
@@ -2099,13 +2101,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2099 goto leave; 2101 goto leave;
2100 } 2102 }
2101 2103
2102 /* We don't use standard VFS wrapper because we don't want vfs_dq_init 2104 status = dquot_alloc_inode(inode);
2103 * to be called. */ 2105 if (status)
2104 if (sb_any_quota_active(osb->sb) &&
2105 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
2106 status = -EDQUOT;
2107 goto leave; 2106 goto leave;
2108 }
2109 did_quota_inode = 1; 2107 did_quota_inode = 1;
2110 2108
2111 inode->i_nlink = 0; 2109 inode->i_nlink = 0;
@@ -2140,7 +2138,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2140 insert_inode_hash(inode); 2138 insert_inode_hash(inode);
2141leave: 2139leave:
2142 if (status < 0 && did_quota_inode) 2140 if (status < 0 && did_quota_inode)
2143 vfs_dq_free_inode(inode); 2141 dquot_free_inode(inode);
2144 if (handle) 2142 if (handle)
2145 ocfs2_commit_trans(osb, handle); 2143 ocfs2_commit_trans(osb, handle);
2146 2144
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index b437dc0c4cad..355f41d1d520 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -851,13 +851,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
851} 851}
852 852
853const struct dquot_operations ocfs2_quota_operations = { 853const struct dquot_operations ocfs2_quota_operations = {
854 .initialize = dquot_initialize,
855 .drop = dquot_drop,
856 .alloc_space = dquot_alloc_space,
857 .alloc_inode = dquot_alloc_inode,
858 .free_space = dquot_free_space,
859 .free_inode = dquot_free_inode,
860 .transfer = dquot_transfer,
861 .write_dquot = ocfs2_write_dquot, 854 .write_dquot = ocfs2_write_dquot,
862 .acquire_dquot = ocfs2_acquire_dquot, 855 .acquire_dquot = ocfs2_acquire_dquot,
863 .release_dquot = ocfs2_release_dquot, 856 .release_dquot = ocfs2_release_dquot,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index fb6aa7acf54b..9e96921dffda 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4390,7 +4390,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4390 } 4390 }
4391 4391
4392 mutex_lock(&inode->i_mutex); 4392 mutex_lock(&inode->i_mutex);
4393 vfs_dq_init(dir); 4393 dquot_initialize(dir);
4394 error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); 4394 error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
4395 mutex_unlock(&inode->i_mutex); 4395 mutex_unlock(&inode->i_mutex);
4396 if (!error) 4396 if (!error)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index f3b7c1541f3a..75d9b5ba1d45 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -11,6 +11,7 @@
11#include <linux/parser.h> 11#include <linux/parser.h>
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/writeback.h>
14#include <linux/crc-itu-t.h> 15#include <linux/crc-itu-t.h>
15#include "omfs.h" 16#include "omfs.h"
16 17
@@ -89,7 +90,7 @@ static void omfs_update_checksums(struct omfs_inode *oi)
89 oi->i_head.h_check_xor = xor; 90 oi->i_head.h_check_xor = xor;
90} 91}
91 92
92static int omfs_write_inode(struct inode *inode, int wait) 93static int __omfs_write_inode(struct inode *inode, int wait)
93{ 94{
94 struct omfs_inode *oi; 95 struct omfs_inode *oi;
95 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); 96 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
@@ -162,9 +163,14 @@ out:
162 return ret; 163 return ret;
163} 164}
164 165
166static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc)
167{
168 return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
169}
170
165int omfs_sync_inode(struct inode *inode) 171int omfs_sync_inode(struct inode *inode)
166{ 172{
167 return omfs_write_inode(inode, 1); 173 return __omfs_write_inode(inode, 1);
168} 174}
169 175
170/* 176/*
diff --git a/fs/open.c b/fs/open.c
index 040cef72bc00..e17f54454b50 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -8,7 +8,6 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/fdtable.h> 10#include <linux/fdtable.h>
11#include <linux/quotaops.h>
12#include <linux/fsnotify.h> 11#include <linux/fsnotify.h>
13#include <linux/module.h> 12#include <linux/module.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
@@ -271,17 +270,15 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
271 * Make sure that there are no leases. get_write_access() protects 270 * Make sure that there are no leases. get_write_access() protects
272 * against the truncate racing with a lease-granting setlease(). 271 * against the truncate racing with a lease-granting setlease().
273 */ 272 */
274 error = break_lease(inode, FMODE_WRITE); 273 error = break_lease(inode, O_WRONLY);
275 if (error) 274 if (error)
276 goto put_write_and_out; 275 goto put_write_and_out;
277 276
278 error = locks_verify_truncate(inode, NULL, length); 277 error = locks_verify_truncate(inode, NULL, length);
279 if (!error) 278 if (!error)
280 error = security_path_truncate(&path, length, 0); 279 error = security_path_truncate(&path, length, 0);
281 if (!error) { 280 if (!error)
282 vfs_dq_init(inode);
283 error = do_truncate(path.dentry, length, 0, NULL); 281 error = do_truncate(path.dentry, length, 0, NULL);
284 }
285 282
286put_write_and_out: 283put_write_and_out:
287 put_write_access(inode); 284 put_write_access(inode);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8d5f392ec3d3..5cc564a83149 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -86,7 +86,7 @@ static int do_make_slave(struct vfsmount *mnt)
86 86
87 /* 87 /*
88 * slave 'mnt' to a peer mount that has the 88 * slave 'mnt' to a peer mount that has the
89 * same root dentry. If none is available than 89 * same root dentry. If none is available then
90 * slave it to anything that is available. 90 * slave it to anything that is available.
91 */ 91 */
92 while ((peer_mnt = next_peer(peer_mnt)) != mnt && 92 while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
@@ -147,6 +147,11 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
147 * get the next mount in the propagation tree. 147 * get the next mount in the propagation tree.
148 * @m: the mount seen last 148 * @m: the mount seen last
149 * @origin: the original mount from where the tree walk initiated 149 * @origin: the original mount from where the tree walk initiated
150 *
151 * Note that peer groups form contiguous segments of slave lists.
152 * We rely on that in get_source() to be able to find out if
153 * vfsmount found while iterating with propagation_next() is
154 * a peer of one we'd found earlier.
150 */ 155 */
151static struct vfsmount *propagation_next(struct vfsmount *m, 156static struct vfsmount *propagation_next(struct vfsmount *m,
152 struct vfsmount *origin) 157 struct vfsmount *origin)
@@ -186,10 +191,6 @@ static struct vfsmount *get_source(struct vfsmount *dest,
186{ 191{
187 struct vfsmount *p_last_src = NULL; 192 struct vfsmount *p_last_src = NULL;
188 struct vfsmount *p_last_dest = NULL; 193 struct vfsmount *p_last_dest = NULL;
189 *type = CL_PROPAGATION;
190
191 if (IS_MNT_SHARED(dest))
192 *type |= CL_MAKE_SHARED;
193 194
194 while (last_dest != dest->mnt_master) { 195 while (last_dest != dest->mnt_master) {
195 p_last_dest = last_dest; 196 p_last_dest = last_dest;
@@ -202,13 +203,18 @@ static struct vfsmount *get_source(struct vfsmount *dest,
202 do { 203 do {
203 p_last_dest = next_peer(p_last_dest); 204 p_last_dest = next_peer(p_last_dest);
204 } while (IS_MNT_NEW(p_last_dest)); 205 } while (IS_MNT_NEW(p_last_dest));
206 /* is that a peer of the earlier? */
207 if (dest == p_last_dest) {
208 *type = CL_MAKE_SHARED;
209 return p_last_src;
210 }
205 } 211 }
206 212 /* slave of the earlier, then */
207 if (dest != p_last_dest) { 213 *type = CL_SLAVE;
208 *type |= CL_SLAVE; 214 /* beginning of peer group among the slaves? */
209 return last_src; 215 if (IS_MNT_SHARED(dest))
210 } else 216 *type |= CL_MAKE_SHARED;
211 return p_last_src; 217 return last_src;
212} 218}
213 219
214/* 220/*
diff --git a/fs/pnode.h b/fs/pnode.h
index 958665d662af..1ea4ae1efcd3 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -21,12 +21,11 @@
21#define CL_SLAVE 0x02 21#define CL_SLAVE 0x02
22#define CL_COPY_ALL 0x04 22#define CL_COPY_ALL 0x04
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PROPAGATION 0x10 24#define CL_PRIVATE 0x10
25#define CL_PRIVATE 0x20
26 25
27static inline void set_mnt_shared(struct vfsmount *mnt) 26static inline void set_mnt_shared(struct vfsmount *mnt)
28{ 27{
29 mnt->mnt_flags &= ~MNT_PNODE_MASK; 28 mnt->mnt_flags &= ~MNT_SHARED_MASK;
30 mnt->mnt_flags |= MNT_SHARED; 29 mnt->mnt_flags |= MNT_SHARED;
31} 30}
32 31
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 623e2ffb5d2b..a7310841c831 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -647,17 +647,11 @@ static int mounts_release(struct inode *inode, struct file *file)
647static unsigned mounts_poll(struct file *file, poll_table *wait) 647static unsigned mounts_poll(struct file *file, poll_table *wait)
648{ 648{
649 struct proc_mounts *p = file->private_data; 649 struct proc_mounts *p = file->private_data;
650 struct mnt_namespace *ns = p->ns;
651 unsigned res = POLLIN | POLLRDNORM; 650 unsigned res = POLLIN | POLLRDNORM;
652 651
653 poll_wait(file, &ns->poll, wait); 652 poll_wait(file, &p->ns->poll, wait);
654 653 if (mnt_had_events(p))
655 spin_lock(&vfsmount_lock);
656 if (p->event != ns->event) {
657 p->event = ns->event;
658 res |= POLLERR | POLLPRI; 654 res |= POLLERR | POLLPRI;
659 }
660 spin_unlock(&vfsmount_lock);
661 655
662 return res; 656 return res;
663} 657}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 480cb1065eec..9580abeadeb3 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -662,6 +662,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
662 } 662 }
663 return ent; 663 return ent;
664} 664}
665EXPORT_SYMBOL(proc_symlink);
665 666
666struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 667struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
667 struct proc_dir_entry *parent) 668 struct proc_dir_entry *parent)
@@ -700,6 +701,7 @@ struct proc_dir_entry *proc_mkdir(const char *name,
700{ 701{
701 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 702 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
702} 703}
704EXPORT_SYMBOL(proc_mkdir);
703 705
704struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 706struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
705 struct proc_dir_entry *parent) 707 struct proc_dir_entry *parent)
@@ -728,6 +730,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
728 } 730 }
729 return ent; 731 return ent;
730} 732}
733EXPORT_SYMBOL(create_proc_entry);
731 734
732struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, 735struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
733 struct proc_dir_entry *parent, 736 struct proc_dir_entry *parent,
@@ -762,6 +765,7 @@ out_free:
762out: 765out:
763 return NULL; 766 return NULL;
764} 767}
768EXPORT_SYMBOL(proc_create_data);
765 769
766static void free_proc_entry(struct proc_dir_entry *de) 770static void free_proc_entry(struct proc_dir_entry *de)
767{ 771{
@@ -853,3 +857,4 @@ continue_removing:
853 de->parent->name, de->name, de->subdir->name); 857 de->parent->name, de->name, de->subdir->name);
854 pde_put(de); 858 pde_put(de);
855} 859}
860EXPORT_SYMBOL(remove_proc_entry);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b080b791d9e3..757c069f2a65 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -220,9 +220,3 @@ void pid_ns_release_proc(struct pid_namespace *ns)
220{ 220{
221 mntput(ns->proc_mnt); 221 mntput(ns->proc_mnt);
222} 222}
223
224EXPORT_SYMBOL(proc_symlink);
225EXPORT_SYMBOL(proc_mkdir);
226EXPORT_SYMBOL(create_proc_entry);
227EXPORT_SYMBOL(proc_create_data);
228EXPORT_SYMBOL(remove_proc_entry);
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index efc02ebb8c70..dad7fb247ddc 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -59,3 +59,8 @@ config QUOTACTL
59 bool 59 bool
60 depends on XFS_QUOTA || QUOTA 60 depends on XFS_QUOTA || QUOTA
61 default y 61 default y
62
63config QUOTACTL_COMPAT
64 bool
65 depends on QUOTACTL && COMPAT_FOR_U64_ALIGNMENT
66 default y
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 68d4f6dc0578..5f9e9e276af0 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -3,3 +3,5 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o
3obj-$(CONFIG_QFMT_V2) += quota_v2.o 3obj-$(CONFIG_QFMT_V2) += quota_v2.o
4obj-$(CONFIG_QUOTA_TREE) += quota_tree.o 4obj-$(CONFIG_QUOTA_TREE) += quota_tree.o
5obj-$(CONFIG_QUOTACTL) += quota.o 5obj-$(CONFIG_QUOTACTL) += quota.o
6obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o
7obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o
diff --git a/fs/quota/compat.c b/fs/quota/compat.c
new file mode 100644
index 000000000000..fb1892fe3e56
--- /dev/null
+++ b/fs/quota/compat.c
@@ -0,0 +1,118 @@
1
2#include <linux/syscalls.h>
3#include <linux/compat.h>
4#include <linux/quotaops.h>
5
6/*
7 * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
8 * and is necessary due to alignment problems.
9 */
10struct compat_if_dqblk {
11 compat_u64 dqb_bhardlimit;
12 compat_u64 dqb_bsoftlimit;
13 compat_u64 dqb_curspace;
14 compat_u64 dqb_ihardlimit;
15 compat_u64 dqb_isoftlimit;
16 compat_u64 dqb_curinodes;
17 compat_u64 dqb_btime;
18 compat_u64 dqb_itime;
19 compat_uint_t dqb_valid;
20};
21
22/* XFS structures */
23struct compat_fs_qfilestat {
24 compat_u64 dqb_bhardlimit;
25 compat_u64 qfs_nblks;
26 compat_uint_t qfs_nextents;
27};
28
29struct compat_fs_quota_stat {
30 __s8 qs_version;
31 __u16 qs_flags;
32 __s8 qs_pad;
33 struct compat_fs_qfilestat qs_uquota;
34 struct compat_fs_qfilestat qs_gquota;
35 compat_uint_t qs_incoredqs;
36 compat_int_t qs_btimelimit;
37 compat_int_t qs_itimelimit;
38 compat_int_t qs_rtbtimelimit;
39 __u16 qs_bwarnlimit;
40 __u16 qs_iwarnlimit;
41};
42
43asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
44 qid_t id, void __user *addr)
45{
46 unsigned int cmds;
47 struct if_dqblk __user *dqblk;
48 struct compat_if_dqblk __user *compat_dqblk;
49 struct fs_quota_stat __user *fsqstat;
50 struct compat_fs_quota_stat __user *compat_fsqstat;
51 compat_uint_t data;
52 u16 xdata;
53 long ret;
54
55 cmds = cmd >> SUBCMDSHIFT;
56
57 switch (cmds) {
58 case Q_GETQUOTA:
59 dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
60 compat_dqblk = addr;
61 ret = sys_quotactl(cmd, special, id, dqblk);
62 if (ret)
63 break;
64 if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
65 get_user(data, &dqblk->dqb_valid) ||
66 put_user(data, &compat_dqblk->dqb_valid))
67 ret = -EFAULT;
68 break;
69 case Q_SETQUOTA:
70 dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
71 compat_dqblk = addr;
72 ret = -EFAULT;
73 if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
74 get_user(data, &compat_dqblk->dqb_valid) ||
75 put_user(data, &dqblk->dqb_valid))
76 break;
77 ret = sys_quotactl(cmd, special, id, dqblk);
78 break;
79 case Q_XGETQSTAT:
80 fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
81 compat_fsqstat = addr;
82 ret = sys_quotactl(cmd, special, id, fsqstat);
83 if (ret)
84 break;
85 ret = -EFAULT;
86 /* Copying qs_version, qs_flags, qs_pad */
87 if (copy_in_user(compat_fsqstat, fsqstat,
88 offsetof(struct compat_fs_quota_stat, qs_uquota)))
89 break;
90 /* Copying qs_uquota */
91 if (copy_in_user(&compat_fsqstat->qs_uquota,
92 &fsqstat->qs_uquota,
93 sizeof(compat_fsqstat->qs_uquota)) ||
94 get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
95 put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
96 break;
97 /* Copying qs_gquota */
98 if (copy_in_user(&compat_fsqstat->qs_gquota,
99 &fsqstat->qs_gquota,
100 sizeof(compat_fsqstat->qs_gquota)) ||
101 get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
102 put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
103 break;
104 /* Copying the rest */
105 if (copy_in_user(&compat_fsqstat->qs_incoredqs,
106 &fsqstat->qs_incoredqs,
107 sizeof(struct compat_fs_quota_stat) -
108 offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
109 get_user(xdata, &fsqstat->qs_iwarnlimit) ||
110 put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
111 break;
112 ret = 0;
113 break;
114 default:
115 ret = sys_quotactl(cmd, special, id, addr);
116 }
117 return ret;
118}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3fc62b097bed..e0b870f4749f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -100,9 +100,13 @@
100 * 100 *
101 * Any operation working on dquots via inode pointers must hold dqptr_sem. If 101 * Any operation working on dquots via inode pointers must hold dqptr_sem. If
102 * operation is just reading pointers from inode (or not using them at all) the 102 * operation is just reading pointers from inode (or not using them at all) the
103 * read lock is enough. If pointers are altered function must hold write lock 103 * read lock is enough. If pointers are altered function must hold write lock.
104 * (these locking rules also apply for S_NOQUOTA flag in the inode - note that 104 * Special care needs to be taken about S_NOQUOTA inode flag (marking that
105 * for altering the flag i_mutex is also needed). 105 * inode is a quota file). Functions adding pointers from inode to dquots have
106 * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
107 * have to do all pointer modifications before dropping dqptr_sem. This makes
108 * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
109 * then drops all pointers to dquots from an inode.
106 * 110 *
107 * Each dquot has its dq_lock mutex. Locked dquots might not be referenced 111 * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
108 * from inodes (dquot_alloc_space() and such don't check the dq_lock). 112 * from inodes (dquot_alloc_space() and such don't check the dq_lock).
@@ -225,6 +229,9 @@ static struct hlist_head *dquot_hash;
225struct dqstats dqstats; 229struct dqstats dqstats;
226EXPORT_SYMBOL(dqstats); 230EXPORT_SYMBOL(dqstats);
227 231
232static qsize_t inode_get_rsv_space(struct inode *inode);
233static void __dquot_initialize(struct inode *inode, int type);
234
228static inline unsigned int 235static inline unsigned int
229hashfn(const struct super_block *sb, unsigned int id, int type) 236hashfn(const struct super_block *sb, unsigned int id, int type)
230{ 237{
@@ -564,7 +571,7 @@ out:
564} 571}
565EXPORT_SYMBOL(dquot_scan_active); 572EXPORT_SYMBOL(dquot_scan_active);
566 573
567int vfs_quota_sync(struct super_block *sb, int type) 574int vfs_quota_sync(struct super_block *sb, int type, int wait)
568{ 575{
569 struct list_head *dirty; 576 struct list_head *dirty;
570 struct dquot *dquot; 577 struct dquot *dquot;
@@ -609,6 +616,33 @@ int vfs_quota_sync(struct super_block *sb, int type)
609 spin_unlock(&dq_list_lock); 616 spin_unlock(&dq_list_lock);
610 mutex_unlock(&dqopt->dqonoff_mutex); 617 mutex_unlock(&dqopt->dqonoff_mutex);
611 618
619 if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
620 return 0;
621
622 /* This is not very clever (and fast) but currently I don't know about
623 * any other simple way of getting quota data to disk and we must get
624 * them there for userspace to be visible... */
625 if (sb->s_op->sync_fs)
626 sb->s_op->sync_fs(sb, 1);
627 sync_blockdev(sb->s_bdev);
628
629 /*
630 * Now when everything is written we can discard the pagecache so
631 * that userspace sees the changes.
632 */
633 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
634 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
635 if (type != -1 && cnt != type)
636 continue;
637 if (!sb_has_quota_active(sb, cnt))
638 continue;
639 mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
640 I_MUTEX_QUOTA);
641 truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
642 mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
643 }
644 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
645
612 return 0; 646 return 0;
613} 647}
614EXPORT_SYMBOL(vfs_quota_sync); 648EXPORT_SYMBOL(vfs_quota_sync);
@@ -840,11 +874,14 @@ static int dqinit_needed(struct inode *inode, int type)
840static void add_dquot_ref(struct super_block *sb, int type) 874static void add_dquot_ref(struct super_block *sb, int type)
841{ 875{
842 struct inode *inode, *old_inode = NULL; 876 struct inode *inode, *old_inode = NULL;
877 int reserved = 0;
843 878
844 spin_lock(&inode_lock); 879 spin_lock(&inode_lock);
845 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 880 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
846 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 881 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
847 continue; 882 continue;
883 if (unlikely(inode_get_rsv_space(inode) > 0))
884 reserved = 1;
848 if (!atomic_read(&inode->i_writecount)) 885 if (!atomic_read(&inode->i_writecount))
849 continue; 886 continue;
850 if (!dqinit_needed(inode, type)) 887 if (!dqinit_needed(inode, type))
@@ -854,7 +891,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
854 spin_unlock(&inode_lock); 891 spin_unlock(&inode_lock);
855 892
856 iput(old_inode); 893 iput(old_inode);
857 sb->dq_op->initialize(inode, type); 894 __dquot_initialize(inode, type);
858 /* We hold a reference to 'inode' so it couldn't have been 895 /* We hold a reference to 'inode' so it couldn't have been
859 * removed from s_inodes list while we dropped the inode_lock. 896 * removed from s_inodes list while we dropped the inode_lock.
860 * We cannot iput the inode now as we can be holding the last 897 * We cannot iput the inode now as we can be holding the last
@@ -865,6 +902,12 @@ static void add_dquot_ref(struct super_block *sb, int type)
865 } 902 }
866 spin_unlock(&inode_lock); 903 spin_unlock(&inode_lock);
867 iput(old_inode); 904 iput(old_inode);
905
906 if (reserved) {
907 printk(KERN_WARNING "VFS (%s): Writes happened before quota"
908 " was turned on thus quota information is probably "
909 "inconsistent. Please run quotacheck(8).\n", sb->s_id);
910 }
868} 911}
869 912
870/* 913/*
@@ -978,10 +1021,12 @@ static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
978/* 1021/*
979 * Claim reserved quota space 1022 * Claim reserved quota space
980 */ 1023 */
981static void dquot_claim_reserved_space(struct dquot *dquot, 1024static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
982 qsize_t number)
983{ 1025{
984 WARN_ON(dquot->dq_dqb.dqb_rsvspace < number); 1026 if (dquot->dq_dqb.dqb_rsvspace < number) {
1027 WARN_ON_ONCE(1);
1028 number = dquot->dq_dqb.dqb_rsvspace;
1029 }
985 dquot->dq_dqb.dqb_curspace += number; 1030 dquot->dq_dqb.dqb_curspace += number;
986 dquot->dq_dqb.dqb_rsvspace -= number; 1031 dquot->dq_dqb.dqb_rsvspace -= number;
987} 1032}
@@ -989,7 +1034,12 @@ static void dquot_claim_reserved_space(struct dquot *dquot,
989static inline 1034static inline
990void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) 1035void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
991{ 1036{
992 dquot->dq_dqb.dqb_rsvspace -= number; 1037 if (dquot->dq_dqb.dqb_rsvspace >= number)
1038 dquot->dq_dqb.dqb_rsvspace -= number;
1039 else {
1040 WARN_ON_ONCE(1);
1041 dquot->dq_dqb.dqb_rsvspace = 0;
1042 }
993} 1043}
994 1044
995static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) 1045static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
@@ -1131,13 +1181,13 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
1131 *warntype = QUOTA_NL_NOWARN; 1181 *warntype = QUOTA_NL_NOWARN;
1132 if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || 1182 if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
1133 test_bit(DQ_FAKE_B, &dquot->dq_flags)) 1183 test_bit(DQ_FAKE_B, &dquot->dq_flags))
1134 return QUOTA_OK; 1184 return 0;
1135 1185
1136 if (dquot->dq_dqb.dqb_ihardlimit && 1186 if (dquot->dq_dqb.dqb_ihardlimit &&
1137 newinodes > dquot->dq_dqb.dqb_ihardlimit && 1187 newinodes > dquot->dq_dqb.dqb_ihardlimit &&
1138 !ignore_hardlimit(dquot)) { 1188 !ignore_hardlimit(dquot)) {
1139 *warntype = QUOTA_NL_IHARDWARN; 1189 *warntype = QUOTA_NL_IHARDWARN;
1140 return NO_QUOTA; 1190 return -EDQUOT;
1141 } 1191 }
1142 1192
1143 if (dquot->dq_dqb.dqb_isoftlimit && 1193 if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1146,7 +1196,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
1146 get_seconds() >= dquot->dq_dqb.dqb_itime && 1196 get_seconds() >= dquot->dq_dqb.dqb_itime &&
1147 !ignore_hardlimit(dquot)) { 1197 !ignore_hardlimit(dquot)) {
1148 *warntype = QUOTA_NL_ISOFTLONGWARN; 1198 *warntype = QUOTA_NL_ISOFTLONGWARN;
1149 return NO_QUOTA; 1199 return -EDQUOT;
1150 } 1200 }
1151 1201
1152 if (dquot->dq_dqb.dqb_isoftlimit && 1202 if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1157,7 +1207,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
1157 sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; 1207 sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
1158 } 1208 }
1159 1209
1160 return QUOTA_OK; 1210 return 0;
1161} 1211}
1162 1212
1163/* needs dq_data_lock */ 1213/* needs dq_data_lock */
@@ -1169,7 +1219,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1169 *warntype = QUOTA_NL_NOWARN; 1219 *warntype = QUOTA_NL_NOWARN;
1170 if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || 1220 if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
1171 test_bit(DQ_FAKE_B, &dquot->dq_flags)) 1221 test_bit(DQ_FAKE_B, &dquot->dq_flags))
1172 return QUOTA_OK; 1222 return 0;
1173 1223
1174 tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace 1224 tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
1175 + space; 1225 + space;
@@ -1179,7 +1229,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1179 !ignore_hardlimit(dquot)) { 1229 !ignore_hardlimit(dquot)) {
1180 if (!prealloc) 1230 if (!prealloc)
1181 *warntype = QUOTA_NL_BHARDWARN; 1231 *warntype = QUOTA_NL_BHARDWARN;
1182 return NO_QUOTA; 1232 return -EDQUOT;
1183 } 1233 }
1184 1234
1185 if (dquot->dq_dqb.dqb_bsoftlimit && 1235 if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1189,7 +1239,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1189 !ignore_hardlimit(dquot)) { 1239 !ignore_hardlimit(dquot)) {
1190 if (!prealloc) 1240 if (!prealloc)
1191 *warntype = QUOTA_NL_BSOFTLONGWARN; 1241 *warntype = QUOTA_NL_BSOFTLONGWARN;
1192 return NO_QUOTA; 1242 return -EDQUOT;
1193 } 1243 }
1194 1244
1195 if (dquot->dq_dqb.dqb_bsoftlimit && 1245 if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1205,10 +1255,10 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1205 * We don't allow preallocation to exceed softlimit so exceeding will 1255 * We don't allow preallocation to exceed softlimit so exceeding will
1206 * be always printed 1256 * be always printed
1207 */ 1257 */
1208 return NO_QUOTA; 1258 return -EDQUOT;
1209 } 1259 }
1210 1260
1211 return QUOTA_OK; 1261 return 0;
1212} 1262}
1213 1263
1214static int info_idq_free(struct dquot *dquot, qsize_t inodes) 1264static int info_idq_free(struct dquot *dquot, qsize_t inodes)
@@ -1242,25 +1292,32 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
1242 return QUOTA_NL_BHARDBELOW; 1292 return QUOTA_NL_BHARDBELOW;
1243 return QUOTA_NL_NOWARN; 1293 return QUOTA_NL_NOWARN;
1244} 1294}
1295
1245/* 1296/*
1246 * Initialize quota pointers in inode 1297 * Initialize quota pointers in inode
1247 * We do things in a bit complicated way but by that we avoid calling 1298 *
1248 * dqget() and thus filesystem callbacks under dqptr_sem. 1299 * We do things in a bit complicated way but by that we avoid calling
1300 * dqget() and thus filesystem callbacks under dqptr_sem.
1301 *
1302 * It is better to call this function outside of any transaction as it
1303 * might need a lot of space in journal for dquot structure allocation.
1249 */ 1304 */
1250int dquot_initialize(struct inode *inode, int type) 1305static void __dquot_initialize(struct inode *inode, int type)
1251{ 1306{
1252 unsigned int id = 0; 1307 unsigned int id = 0;
1253 int cnt, ret = 0; 1308 int cnt;
1254 struct dquot *got[MAXQUOTAS] = { NULL, NULL }; 1309 struct dquot *got[MAXQUOTAS];
1255 struct super_block *sb = inode->i_sb; 1310 struct super_block *sb = inode->i_sb;
1311 qsize_t rsv;
1256 1312
1257 /* First test before acquiring mutex - solves deadlocks when we 1313 /* First test before acquiring mutex - solves deadlocks when we
1258 * re-enter the quota code and are already holding the mutex */ 1314 * re-enter the quota code and are already holding the mutex */
1259 if (IS_NOQUOTA(inode)) 1315 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
1260 return 0; 1316 return;
1261 1317
1262 /* First get references to structures we might need. */ 1318 /* First get references to structures we might need. */
1263 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1319 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1320 got[cnt] = NULL;
1264 if (type != -1 && cnt != type) 1321 if (type != -1 && cnt != type)
1265 continue; 1322 continue;
1266 switch (cnt) { 1323 switch (cnt) {
@@ -1275,7 +1332,6 @@ int dquot_initialize(struct inode *inode, int type)
1275 } 1332 }
1276 1333
1277 down_write(&sb_dqopt(sb)->dqptr_sem); 1334 down_write(&sb_dqopt(sb)->dqptr_sem);
1278 /* Having dqptr_sem we know NOQUOTA flags can't be altered... */
1279 if (IS_NOQUOTA(inode)) 1335 if (IS_NOQUOTA(inode))
1280 goto out_err; 1336 goto out_err;
1281 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1337 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1287,20 +1343,31 @@ int dquot_initialize(struct inode *inode, int type)
1287 if (!inode->i_dquot[cnt]) { 1343 if (!inode->i_dquot[cnt]) {
1288 inode->i_dquot[cnt] = got[cnt]; 1344 inode->i_dquot[cnt] = got[cnt];
1289 got[cnt] = NULL; 1345 got[cnt] = NULL;
1346 /*
1347 * Make quota reservation system happy if someone
1348 * did a write before quota was turned on
1349 */
1350 rsv = inode_get_rsv_space(inode);
1351 if (unlikely(rsv))
1352 dquot_resv_space(inode->i_dquot[cnt], rsv);
1290 } 1353 }
1291 } 1354 }
1292out_err: 1355out_err:
1293 up_write(&sb_dqopt(sb)->dqptr_sem); 1356 up_write(&sb_dqopt(sb)->dqptr_sem);
1294 /* Drop unused references */ 1357 /* Drop unused references */
1295 dqput_all(got); 1358 dqput_all(got);
1296 return ret; 1359}
1360
1361void dquot_initialize(struct inode *inode)
1362{
1363 __dquot_initialize(inode, -1);
1297} 1364}
1298EXPORT_SYMBOL(dquot_initialize); 1365EXPORT_SYMBOL(dquot_initialize);
1299 1366
1300/* 1367/*
1301 * Release all quotas referenced by inode 1368 * Release all quotas referenced by inode
1302 */ 1369 */
1303int dquot_drop(struct inode *inode) 1370static void __dquot_drop(struct inode *inode)
1304{ 1371{
1305 int cnt; 1372 int cnt;
1306 struct dquot *put[MAXQUOTAS]; 1373 struct dquot *put[MAXQUOTAS];
@@ -1312,32 +1379,31 @@ int dquot_drop(struct inode *inode)
1312 } 1379 }
1313 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1380 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1314 dqput_all(put); 1381 dqput_all(put);
1315 return 0;
1316} 1382}
1317EXPORT_SYMBOL(dquot_drop);
1318 1383
1319/* Wrapper to remove references to quota structures from inode */ 1384void dquot_drop(struct inode *inode)
1320void vfs_dq_drop(struct inode *inode) 1385{
1321{ 1386 int cnt;
1322 /* Here we can get arbitrary inode from clear_inode() so we have 1387
1323 * to be careful. OTOH we don't need locking as quota operations 1388 if (IS_NOQUOTA(inode))
1324 * are allowed to change only at mount time */ 1389 return;
1325 if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op 1390
1326 && inode->i_sb->dq_op->drop) { 1391 /*
1327 int cnt; 1392 * Test before calling to rule out calls from proc and such
1328 /* Test before calling to rule out calls from proc and such 1393 * where we are not allowed to block. Note that this is
1329 * where we are not allowed to block. Note that this is 1394 * actually reliable test even without the lock - the caller
1330 * actually reliable test even without the lock - the caller 1395 * must assure that nobody can come after the DQUOT_DROP and
1331 * must assure that nobody can come after the DQUOT_DROP and 1396 * add quota pointers back anyway.
1332 * add quota pointers back anyway */ 1397 */
1333 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1398 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1334 if (inode->i_dquot[cnt]) 1399 if (inode->i_dquot[cnt])
1335 break; 1400 break;
1336 if (cnt < MAXQUOTAS) 1401 }
1337 inode->i_sb->dq_op->drop(inode); 1402
1338 } 1403 if (cnt < MAXQUOTAS)
1339} 1404 __dquot_drop(inode);
1340EXPORT_SYMBOL(vfs_dq_drop); 1405}
1406EXPORT_SYMBOL(dquot_drop);
1341 1407
1342/* 1408/*
1343 * inode_reserved_space is managed internally by quota, and protected by 1409 * inode_reserved_space is managed internally by quota, and protected by
@@ -1351,28 +1417,30 @@ static qsize_t *inode_reserved_space(struct inode * inode)
1351 return inode->i_sb->dq_op->get_reserved_space(inode); 1417 return inode->i_sb->dq_op->get_reserved_space(inode);
1352} 1418}
1353 1419
1354static void inode_add_rsv_space(struct inode *inode, qsize_t number) 1420void inode_add_rsv_space(struct inode *inode, qsize_t number)
1355{ 1421{
1356 spin_lock(&inode->i_lock); 1422 spin_lock(&inode->i_lock);
1357 *inode_reserved_space(inode) += number; 1423 *inode_reserved_space(inode) += number;
1358 spin_unlock(&inode->i_lock); 1424 spin_unlock(&inode->i_lock);
1359} 1425}
1426EXPORT_SYMBOL(inode_add_rsv_space);
1360 1427
1361 1428void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1362static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1363{ 1429{
1364 spin_lock(&inode->i_lock); 1430 spin_lock(&inode->i_lock);
1365 *inode_reserved_space(inode) -= number; 1431 *inode_reserved_space(inode) -= number;
1366 __inode_add_bytes(inode, number); 1432 __inode_add_bytes(inode, number);
1367 spin_unlock(&inode->i_lock); 1433 spin_unlock(&inode->i_lock);
1368} 1434}
1435EXPORT_SYMBOL(inode_claim_rsv_space);
1369 1436
1370static void inode_sub_rsv_space(struct inode *inode, qsize_t number) 1437void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1371{ 1438{
1372 spin_lock(&inode->i_lock); 1439 spin_lock(&inode->i_lock);
1373 *inode_reserved_space(inode) -= number; 1440 *inode_reserved_space(inode) -= number;
1374 spin_unlock(&inode->i_lock); 1441 spin_unlock(&inode->i_lock);
1375} 1442}
1443EXPORT_SYMBOL(inode_sub_rsv_space);
1376 1444
1377static qsize_t inode_get_rsv_space(struct inode *inode) 1445static qsize_t inode_get_rsv_space(struct inode *inode)
1378{ 1446{
@@ -1404,38 +1472,34 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
1404} 1472}
1405 1473
1406/* 1474/*
1407 * Following four functions update i_blocks+i_bytes fields and 1475 * This functions updates i_blocks+i_bytes fields and quota information
1408 * quota information (together with appropriate checks) 1476 * (together with appropriate checks).
1409 * NOTE: We absolutely rely on the fact that caller dirties 1477 *
1410 * the inode (usually macros in quotaops.h care about this) and 1478 * NOTE: We absolutely rely on the fact that caller dirties the inode
1411 * holds a handle for the current transaction so that dquot write and 1479 * (usually helpers in quotaops.h care about this) and holds a handle for
1412 * inode write go into the same transaction. 1480 * the current transaction so that dquot write and inode write go into the
1481 * same transaction.
1413 */ 1482 */
1414 1483
1415/* 1484/*
1416 * This operation can block, but only after everything is updated 1485 * This operation can block, but only after everything is updated
1417 */ 1486 */
1418int __dquot_alloc_space(struct inode *inode, qsize_t number, 1487int __dquot_alloc_space(struct inode *inode, qsize_t number,
1419 int warn, int reserve) 1488 int warn, int reserve)
1420{ 1489{
1421 int cnt, ret = QUOTA_OK; 1490 int cnt, ret = 0;
1422 char warntype[MAXQUOTAS]; 1491 char warntype[MAXQUOTAS];
1423 1492
1424 /* 1493 /*
1425 * First test before acquiring mutex - solves deadlocks when we 1494 * First test before acquiring mutex - solves deadlocks when we
1426 * re-enter the quota code and are already holding the mutex 1495 * re-enter the quota code and are already holding the mutex
1427 */ 1496 */
1428 if (IS_NOQUOTA(inode)) { 1497 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
1429 inode_incr_space(inode, number, reserve); 1498 inode_incr_space(inode, number, reserve);
1430 goto out; 1499 goto out;
1431 } 1500 }
1432 1501
1433 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1502 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1434 if (IS_NOQUOTA(inode)) {
1435 inode_incr_space(inode, number, reserve);
1436 goto out_unlock;
1437 }
1438
1439 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1503 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1440 warntype[cnt] = QUOTA_NL_NOWARN; 1504 warntype[cnt] = QUOTA_NL_NOWARN;
1441 1505
@@ -1443,9 +1507,9 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1443 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1507 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1444 if (!inode->i_dquot[cnt]) 1508 if (!inode->i_dquot[cnt])
1445 continue; 1509 continue;
1446 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) 1510 ret = check_bdq(inode->i_dquot[cnt], number, !warn,
1447 == NO_QUOTA) { 1511 warntype+cnt);
1448 ret = NO_QUOTA; 1512 if (ret) {
1449 spin_unlock(&dq_data_lock); 1513 spin_unlock(&dq_data_lock);
1450 goto out_flush_warn; 1514 goto out_flush_warn;
1451 } 1515 }
@@ -1466,61 +1530,45 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1466 mark_all_dquot_dirty(inode->i_dquot); 1530 mark_all_dquot_dirty(inode->i_dquot);
1467out_flush_warn: 1531out_flush_warn:
1468 flush_warnings(inode->i_dquot, warntype); 1532 flush_warnings(inode->i_dquot, warntype);
1469out_unlock:
1470 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1533 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1471out: 1534out:
1472 return ret; 1535 return ret;
1473} 1536}
1474 1537EXPORT_SYMBOL(__dquot_alloc_space);
1475int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
1476{
1477 return __dquot_alloc_space(inode, number, warn, 0);
1478}
1479EXPORT_SYMBOL(dquot_alloc_space);
1480
1481int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
1482{
1483 return __dquot_alloc_space(inode, number, warn, 1);
1484}
1485EXPORT_SYMBOL(dquot_reserve_space);
1486 1538
1487/* 1539/*
1488 * This operation can block, but only after everything is updated 1540 * This operation can block, but only after everything is updated
1489 */ 1541 */
1490int dquot_alloc_inode(const struct inode *inode, qsize_t number) 1542int dquot_alloc_inode(const struct inode *inode)
1491{ 1543{
1492 int cnt, ret = NO_QUOTA; 1544 int cnt, ret = 0;
1493 char warntype[MAXQUOTAS]; 1545 char warntype[MAXQUOTAS];
1494 1546
1495 /* First test before acquiring mutex - solves deadlocks when we 1547 /* First test before acquiring mutex - solves deadlocks when we
1496 * re-enter the quota code and are already holding the mutex */ 1548 * re-enter the quota code and are already holding the mutex */
1497 if (IS_NOQUOTA(inode)) 1549 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
1498 return QUOTA_OK; 1550 return 0;
1499 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1551 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1500 warntype[cnt] = QUOTA_NL_NOWARN; 1552 warntype[cnt] = QUOTA_NL_NOWARN;
1501 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1553 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1502 if (IS_NOQUOTA(inode)) {
1503 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1504 return QUOTA_OK;
1505 }
1506 spin_lock(&dq_data_lock); 1554 spin_lock(&dq_data_lock);
1507 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1555 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1508 if (!inode->i_dquot[cnt]) 1556 if (!inode->i_dquot[cnt])
1509 continue; 1557 continue;
1510 if (check_idq(inode->i_dquot[cnt], number, warntype+cnt) 1558 ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
1511 == NO_QUOTA) 1559 if (ret)
1512 goto warn_put_all; 1560 goto warn_put_all;
1513 } 1561 }
1514 1562
1515 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1563 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1516 if (!inode->i_dquot[cnt]) 1564 if (!inode->i_dquot[cnt])
1517 continue; 1565 continue;
1518 dquot_incr_inodes(inode->i_dquot[cnt], number); 1566 dquot_incr_inodes(inode->i_dquot[cnt], 1);
1519 } 1567 }
1520 ret = QUOTA_OK; 1568
1521warn_put_all: 1569warn_put_all:
1522 spin_unlock(&dq_data_lock); 1570 spin_unlock(&dq_data_lock);
1523 if (ret == QUOTA_OK) 1571 if (ret == 0)
1524 mark_all_dquot_dirty(inode->i_dquot); 1572 mark_all_dquot_dirty(inode->i_dquot);
1525 flush_warnings(inode->i_dquot, warntype); 1573 flush_warnings(inode->i_dquot, warntype);
1526 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1574 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1528,23 +1576,19 @@ warn_put_all:
1528} 1576}
1529EXPORT_SYMBOL(dquot_alloc_inode); 1577EXPORT_SYMBOL(dquot_alloc_inode);
1530 1578
1531int dquot_claim_space(struct inode *inode, qsize_t number) 1579/*
1580 * Convert in-memory reserved quotas to real consumed quotas
1581 */
1582int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1532{ 1583{
1533 int cnt; 1584 int cnt;
1534 int ret = QUOTA_OK;
1535 1585
1536 if (IS_NOQUOTA(inode)) { 1586 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
1537 inode_claim_rsv_space(inode, number); 1587 inode_claim_rsv_space(inode, number);
1538 goto out; 1588 return 0;
1539 } 1589 }
1540 1590
1541 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1591 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1542 if (IS_NOQUOTA(inode)) {
1543 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1544 inode_claim_rsv_space(inode, number);
1545 goto out;
1546 }
1547
1548 spin_lock(&dq_data_lock); 1592 spin_lock(&dq_data_lock);
1549 /* Claim reserved quotas to allocated quotas */ 1593 /* Claim reserved quotas to allocated quotas */
1550 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1594 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1557,33 +1601,26 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1557 spin_unlock(&dq_data_lock); 1601 spin_unlock(&dq_data_lock);
1558 mark_all_dquot_dirty(inode->i_dquot); 1602 mark_all_dquot_dirty(inode->i_dquot);
1559 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1603 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1560out: 1604 return 0;
1561 return ret;
1562} 1605}
1563EXPORT_SYMBOL(dquot_claim_space); 1606EXPORT_SYMBOL(dquot_claim_space_nodirty);
1564 1607
1565/* 1608/*
1566 * This operation can block, but only after everything is updated 1609 * This operation can block, but only after everything is updated
1567 */ 1610 */
1568int __dquot_free_space(struct inode *inode, qsize_t number, int reserve) 1611void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
1569{ 1612{
1570 unsigned int cnt; 1613 unsigned int cnt;
1571 char warntype[MAXQUOTAS]; 1614 char warntype[MAXQUOTAS];
1572 1615
1573 /* First test before acquiring mutex - solves deadlocks when we 1616 /* First test before acquiring mutex - solves deadlocks when we
1574 * re-enter the quota code and are already holding the mutex */ 1617 * re-enter the quota code and are already holding the mutex */
1575 if (IS_NOQUOTA(inode)) { 1618 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
1576out_sub:
1577 inode_decr_space(inode, number, reserve); 1619 inode_decr_space(inode, number, reserve);
1578 return QUOTA_OK; 1620 return;
1579 } 1621 }
1580 1622
1581 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1623 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1582 /* Now recheck reliably when holding dqptr_sem */
1583 if (IS_NOQUOTA(inode)) {
1584 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1585 goto out_sub;
1586 }
1587 spin_lock(&dq_data_lock); 1624 spin_lock(&dq_data_lock);
1588 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1625 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1589 if (!inode->i_dquot[cnt]) 1626 if (!inode->i_dquot[cnt])
@@ -1603,56 +1640,34 @@ out_sub:
1603out_unlock: 1640out_unlock:
1604 flush_warnings(inode->i_dquot, warntype); 1641 flush_warnings(inode->i_dquot, warntype);
1605 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1642 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1606 return QUOTA_OK;
1607}
1608
1609int dquot_free_space(struct inode *inode, qsize_t number)
1610{
1611 return __dquot_free_space(inode, number, 0);
1612} 1643}
1613EXPORT_SYMBOL(dquot_free_space); 1644EXPORT_SYMBOL(__dquot_free_space);
1614
1615/*
1616 * Release reserved quota space
1617 */
1618void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1619{
1620 __dquot_free_space(inode, number, 1);
1621
1622}
1623EXPORT_SYMBOL(dquot_release_reserved_space);
1624 1645
1625/* 1646/*
1626 * This operation can block, but only after everything is updated 1647 * This operation can block, but only after everything is updated
1627 */ 1648 */
1628int dquot_free_inode(const struct inode *inode, qsize_t number) 1649void dquot_free_inode(const struct inode *inode)
1629{ 1650{
1630 unsigned int cnt; 1651 unsigned int cnt;
1631 char warntype[MAXQUOTAS]; 1652 char warntype[MAXQUOTAS];
1632 1653
1633 /* First test before acquiring mutex - solves deadlocks when we 1654 /* First test before acquiring mutex - solves deadlocks when we
1634 * re-enter the quota code and are already holding the mutex */ 1655 * re-enter the quota code and are already holding the mutex */
1635 if (IS_NOQUOTA(inode)) 1656 if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
1636 return QUOTA_OK; 1657 return;
1637 1658
1638 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1659 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1639 /* Now recheck reliably when holding dqptr_sem */
1640 if (IS_NOQUOTA(inode)) {
1641 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1642 return QUOTA_OK;
1643 }
1644 spin_lock(&dq_data_lock); 1660 spin_lock(&dq_data_lock);
1645 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1661 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1646 if (!inode->i_dquot[cnt]) 1662 if (!inode->i_dquot[cnt])
1647 continue; 1663 continue;
1648 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number); 1664 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
1649 dquot_decr_inodes(inode->i_dquot[cnt], number); 1665 dquot_decr_inodes(inode->i_dquot[cnt], 1);
1650 } 1666 }
1651 spin_unlock(&dq_data_lock); 1667 spin_unlock(&dq_data_lock);
1652 mark_all_dquot_dirty(inode->i_dquot); 1668 mark_all_dquot_dirty(inode->i_dquot);
1653 flush_warnings(inode->i_dquot, warntype); 1669 flush_warnings(inode->i_dquot, warntype);
1654 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1670 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1655 return QUOTA_OK;
1656} 1671}
1657EXPORT_SYMBOL(dquot_free_inode); 1672EXPORT_SYMBOL(dquot_free_inode);
1658 1673
@@ -1662,37 +1677,31 @@ EXPORT_SYMBOL(dquot_free_inode);
1662 * This operation can block, but only after everything is updated 1677 * This operation can block, but only after everything is updated
1663 * A transaction must be started when entering this function. 1678 * A transaction must be started when entering this function.
1664 */ 1679 */
1665int dquot_transfer(struct inode *inode, struct iattr *iattr) 1680static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
1666{ 1681{
1667 qsize_t space, cur_space; 1682 qsize_t space, cur_space;
1668 qsize_t rsv_space = 0; 1683 qsize_t rsv_space = 0;
1669 struct dquot *transfer_from[MAXQUOTAS]; 1684 struct dquot *transfer_from[MAXQUOTAS];
1670 struct dquot *transfer_to[MAXQUOTAS]; 1685 struct dquot *transfer_to[MAXQUOTAS];
1671 int cnt, ret = QUOTA_OK; 1686 int cnt, ret = 0;
1672 int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid,
1673 chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid;
1674 char warntype_to[MAXQUOTAS]; 1687 char warntype_to[MAXQUOTAS];
1675 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; 1688 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1676 1689
1677 /* First test before acquiring mutex - solves deadlocks when we 1690 /* First test before acquiring mutex - solves deadlocks when we
1678 * re-enter the quota code and are already holding the mutex */ 1691 * re-enter the quota code and are already holding the mutex */
1679 if (IS_NOQUOTA(inode)) 1692 if (IS_NOQUOTA(inode))
1680 return QUOTA_OK; 1693 return 0;
1681 /* Initialize the arrays */ 1694 /* Initialize the arrays */
1682 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1695 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1683 transfer_from[cnt] = NULL; 1696 transfer_from[cnt] = NULL;
1684 transfer_to[cnt] = NULL; 1697 transfer_to[cnt] = NULL;
1685 warntype_to[cnt] = QUOTA_NL_NOWARN; 1698 warntype_to[cnt] = QUOTA_NL_NOWARN;
1686 } 1699 }
1687 if (chuid) 1700 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1688 transfer_to[USRQUOTA] = dqget(inode->i_sb, iattr->ia_uid, 1701 if (mask & (1 << cnt))
1689 USRQUOTA); 1702 transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
1690 if (chgid) 1703 }
1691 transfer_to[GRPQUOTA] = dqget(inode->i_sb, iattr->ia_gid,
1692 GRPQUOTA);
1693
1694 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1704 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1695 /* Now recheck reliably when holding dqptr_sem */
1696 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ 1705 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
1697 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1706 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1698 goto put_all; 1707 goto put_all;
@@ -1706,9 +1715,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1706 if (!transfer_to[cnt]) 1715 if (!transfer_to[cnt])
1707 continue; 1716 continue;
1708 transfer_from[cnt] = inode->i_dquot[cnt]; 1717 transfer_from[cnt] = inode->i_dquot[cnt];
1709 if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) == 1718 ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
1710 NO_QUOTA || check_bdq(transfer_to[cnt], space, 0, 1719 if (ret)
1711 warntype_to + cnt) == NO_QUOTA) 1720 goto over_quota;
1721 ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt);
1722 if (ret)
1712 goto over_quota; 1723 goto over_quota;
1713 } 1724 }
1714 1725
@@ -1762,22 +1773,32 @@ over_quota:
1762 /* Clear dquot pointers we don't want to dqput() */ 1773 /* Clear dquot pointers we don't want to dqput() */
1763 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1774 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1764 transfer_from[cnt] = NULL; 1775 transfer_from[cnt] = NULL;
1765 ret = NO_QUOTA;
1766 goto warn_put_all; 1776 goto warn_put_all;
1767} 1777}
1768EXPORT_SYMBOL(dquot_transfer);
1769 1778
1770/* Wrapper for transferring ownership of an inode */ 1779/* Wrapper for transferring ownership of an inode for uid/gid only
1771int vfs_dq_transfer(struct inode *inode, struct iattr *iattr) 1780 * Called from FSXXX_setattr()
1781 */
1782int dquot_transfer(struct inode *inode, struct iattr *iattr)
1772{ 1783{
1784 qid_t chid[MAXQUOTAS];
1785 unsigned long mask = 0;
1786
1787 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
1788 mask |= 1 << USRQUOTA;
1789 chid[USRQUOTA] = iattr->ia_uid;
1790 }
1791 if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
1792 mask |= 1 << GRPQUOTA;
1793 chid[GRPQUOTA] = iattr->ia_gid;
1794 }
1773 if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { 1795 if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
1774 vfs_dq_init(inode); 1796 dquot_initialize(inode);
1775 if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) 1797 return __dquot_transfer(inode, chid, mask);
1776 return 1;
1777 } 1798 }
1778 return 0; 1799 return 0;
1779} 1800}
1780EXPORT_SYMBOL(vfs_dq_transfer); 1801EXPORT_SYMBOL(dquot_transfer);
1781 1802
1782/* 1803/*
1783 * Write info of quota file to disk 1804 * Write info of quota file to disk
@@ -1798,13 +1819,6 @@ EXPORT_SYMBOL(dquot_commit_info);
1798 * Definitions of diskquota operations. 1819 * Definitions of diskquota operations.
1799 */ 1820 */
1800const struct dquot_operations dquot_operations = { 1821const struct dquot_operations dquot_operations = {
1801 .initialize = dquot_initialize,
1802 .drop = dquot_drop,
1803 .alloc_space = dquot_alloc_space,
1804 .alloc_inode = dquot_alloc_inode,
1805 .free_space = dquot_free_space,
1806 .free_inode = dquot_free_inode,
1807 .transfer = dquot_transfer,
1808 .write_dquot = dquot_commit, 1822 .write_dquot = dquot_commit,
1809 .acquire_dquot = dquot_acquire, 1823 .acquire_dquot = dquot_acquire,
1810 .release_dquot = dquot_release, 1824 .release_dquot = dquot_release,
@@ -1815,6 +1829,20 @@ const struct dquot_operations dquot_operations = {
1815}; 1829};
1816 1830
1817/* 1831/*
1832 * Generic helper for ->open on filesystems supporting disk quotas.
1833 */
1834int dquot_file_open(struct inode *inode, struct file *file)
1835{
1836 int error;
1837
1838 error = generic_file_open(inode, file);
1839 if (!error && (file->f_mode & FMODE_WRITE))
1840 dquot_initialize(inode);
1841 return error;
1842}
1843EXPORT_SYMBOL(dquot_file_open);
1844
1845/*
1818 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) 1846 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
1819 */ 1847 */
1820int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags) 1848int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
@@ -1993,11 +2021,13 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
1993 } 2021 }
1994 2022
1995 if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { 2023 if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
1996 /* As we bypass the pagecache we must now flush the inode so 2024 /* As we bypass the pagecache we must now flush all the
1997 * that we see all the changes from userspace... */ 2025 * dirty data and invalidate caches so that kernel sees
1998 write_inode_now(inode, 1); 2026 * changes from userspace. It is not enough to just flush
1999 /* And now flush the block cache so that kernel sees the 2027 * the quota file since if blocksize < pagesize, invalidation
2000 * changes */ 2028 * of the cache could fail because of other unrelated dirty
2029 * data */
2030 sync_filesystem(sb);
2001 invalidate_bdev(sb->s_bdev); 2031 invalidate_bdev(sb->s_bdev);
2002 } 2032 }
2003 mutex_lock(&dqopt->dqonoff_mutex); 2033 mutex_lock(&dqopt->dqonoff_mutex);
@@ -2010,14 +2040,16 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2010 /* We don't want quota and atime on quota files (deadlocks 2040 /* We don't want quota and atime on quota files (deadlocks
2011 * possible) Also nobody should write to the file - we use 2041 * possible) Also nobody should write to the file - we use
2012 * special IO operations which ignore the immutable bit. */ 2042 * special IO operations which ignore the immutable bit. */
2013 down_write(&dqopt->dqptr_sem);
2014 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2043 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2015 oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | 2044 oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
2016 S_NOQUOTA); 2045 S_NOQUOTA);
2017 inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; 2046 inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
2018 mutex_unlock(&inode->i_mutex); 2047 mutex_unlock(&inode->i_mutex);
2019 up_write(&dqopt->dqptr_sem); 2048 /*
2020 sb->dq_op->drop(inode); 2049 * When S_NOQUOTA is set, remove dquot references as no more
2050 * references can be added
2051 */
2052 __dquot_drop(inode);
2021 } 2053 }
2022 2054
2023 error = -EIO; 2055 error = -EIO;
@@ -2053,14 +2085,12 @@ out_file_init:
2053 iput(inode); 2085 iput(inode);
2054out_lock: 2086out_lock:
2055 if (oldflags != -1) { 2087 if (oldflags != -1) {
2056 down_write(&dqopt->dqptr_sem);
2057 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2088 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2058 /* Set the flags back (in the case of accidental quotaon() 2089 /* Set the flags back (in the case of accidental quotaon()
2059 * on a wrong file we don't want to mess up the flags) */ 2090 * on a wrong file we don't want to mess up the flags) */
2060 inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); 2091 inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
2061 inode->i_flags |= oldflags; 2092 inode->i_flags |= oldflags;
2062 mutex_unlock(&inode->i_mutex); 2093 mutex_unlock(&inode->i_mutex);
2063 up_write(&dqopt->dqptr_sem);
2064 } 2094 }
2065 mutex_unlock(&dqopt->dqonoff_mutex); 2095 mutex_unlock(&dqopt->dqonoff_mutex);
2066out_fmt: 2096out_fmt:
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
new file mode 100644
index 000000000000..2663ed90fb03
--- /dev/null
+++ b/fs/quota/netlink.c
@@ -0,0 +1,95 @@
1
2#include <linux/cred.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <linux/kernel.h>
6#include <linux/quotaops.h>
7#include <linux/sched.h>
8#include <net/netlink.h>
9#include <net/genetlink.h>
10
11/* Netlink family structure for quota */
12static struct genl_family quota_genl_family = {
13 .id = GENL_ID_GENERATE,
14 .hdrsize = 0,
15 .name = "VFS_DQUOT",
16 .version = 1,
17 .maxattr = QUOTA_NL_A_MAX,
18};
19
20/**
21 * quota_send_warning - Send warning to userspace about exceeded quota
22 * @type: The quota type: USRQQUOTA, GRPQUOTA,...
23 * @id: The user or group id of the quota that was exceeded
24 * @dev: The device on which the fs is mounted (sb->s_dev)
25 * @warntype: The type of the warning: QUOTA_NL_...
26 *
27 * This can be used by filesystems (including those which don't use
28 * dquot) to send a message to userspace relating to quota limits.
29 *
30 */
31
32void quota_send_warning(short type, unsigned int id, dev_t dev,
33 const char warntype)
34{
35 static atomic_t seq;
36 struct sk_buff *skb;
37 void *msg_head;
38 int ret;
39 int msg_size = 4 * nla_total_size(sizeof(u32)) +
40 2 * nla_total_size(sizeof(u64));
41
42 /* We have to allocate using GFP_NOFS as we are called from a
43 * filesystem performing write and thus further recursion into
44 * the fs to free some data could cause deadlocks. */
45 skb = genlmsg_new(msg_size, GFP_NOFS);
46 if (!skb) {
47 printk(KERN_ERR
48 "VFS: Not enough memory to send quota warning.\n");
49 return;
50 }
51 msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
52 &quota_genl_family, 0, QUOTA_NL_C_WARNING);
53 if (!msg_head) {
54 printk(KERN_ERR
55 "VFS: Cannot store netlink header in quota warning.\n");
56 goto err_out;
57 }
58 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
59 if (ret)
60 goto attr_err_out;
61 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
62 if (ret)
63 goto attr_err_out;
64 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
65 if (ret)
66 goto attr_err_out;
67 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
68 if (ret)
69 goto attr_err_out;
70 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
71 if (ret)
72 goto attr_err_out;
73 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
74 if (ret)
75 goto attr_err_out;
76 genlmsg_end(skb, msg_head);
77
78 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
79 return;
80attr_err_out:
81 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
82err_out:
83 kfree_skb(skb);
84}
85EXPORT_SYMBOL(quota_send_warning);
86
87static int __init quota_init(void)
88{
89 if (genl_register_family(&quota_genl_family) != 0)
90 printk(KERN_ERR
91 "VFS: Failed to create quota netlink interface.\n");
92 return 0;
93};
94
95module_init(quota_init);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ee91e2756950..95388f9b7356 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -10,7 +10,6 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <asm/current.h> 11#include <asm/current.h>
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include <linux/compat.h>
14#include <linux/kernel.h> 13#include <linux/kernel.h>
15#include <linux/security.h> 14#include <linux/security.h>
16#include <linux/syscalls.h> 15#include <linux/syscalls.h>
@@ -18,220 +17,205 @@
18#include <linux/capability.h> 17#include <linux/capability.h>
19#include <linux/quotaops.h> 18#include <linux/quotaops.h>
20#include <linux/types.h> 19#include <linux/types.h>
21#include <net/netlink.h> 20#include <linux/writeback.h>
22#include <net/genetlink.h>
23 21
24/* Check validity of generic quotactl commands */ 22static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
25static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, 23 qid_t id)
26 qid_t id)
27{ 24{
28 if (type >= MAXQUOTAS)
29 return -EINVAL;
30 if (!sb && cmd != Q_SYNC)
31 return -ENODEV;
32 /* Is operation supported? */
33 if (sb && !sb->s_qcop)
34 return -ENOSYS;
35
36 switch (cmd) { 25 switch (cmd) {
37 case Q_GETFMT: 26 /* these commands do not require any special privilegues */
38 break; 27 case Q_GETFMT:
39 case Q_QUOTAON: 28 case Q_SYNC:
40 if (!sb->s_qcop->quota_on) 29 case Q_GETINFO:
41 return -ENOSYS; 30 case Q_XGETQSTAT:
42 break; 31 case Q_XQUOTASYNC:
43 case Q_QUOTAOFF: 32 break;
44 if (!sb->s_qcop->quota_off) 33 /* allow to query information for dquots we "own" */
45 return -ENOSYS; 34 case Q_GETQUOTA:
46 break; 35 case Q_XGETQUOTA:
47 case Q_SETINFO: 36 if ((type == USRQUOTA && current_euid() == id) ||
48 if (!sb->s_qcop->set_info) 37 (type == GRPQUOTA && in_egroup_p(id)))
49 return -ENOSYS;
50 break;
51 case Q_GETINFO:
52 if (!sb->s_qcop->get_info)
53 return -ENOSYS;
54 break;
55 case Q_SETQUOTA:
56 if (!sb->s_qcop->set_dqblk)
57 return -ENOSYS;
58 break;
59 case Q_GETQUOTA:
60 if (!sb->s_qcop->get_dqblk)
61 return -ENOSYS;
62 break;
63 case Q_SYNC:
64 if (sb && !sb->s_qcop->quota_sync)
65 return -ENOSYS;
66 break; 38 break;
67 default: 39 /*FALLTHROUGH*/
68 return -EINVAL; 40 default:
41 if (!capable(CAP_SYS_ADMIN))
42 return -EPERM;
69 } 43 }
70 44
71 /* Is quota turned on for commands which need it? */ 45 return security_quotactl(cmd, type, id, sb);
72 switch (cmd) { 46}
73 case Q_GETFMT:
74 case Q_GETINFO:
75 case Q_SETINFO:
76 case Q_SETQUOTA:
77 case Q_GETQUOTA:
78 /* This is just an informative test so we are satisfied
79 * without the lock */
80 if (!sb_has_quota_active(sb, type))
81 return -ESRCH;
82 }
83 47
84 /* Check privileges */ 48static int quota_sync_all(int type)
85 if (cmd == Q_GETQUOTA) { 49{
86 if (((type == USRQUOTA && current_euid() != id) || 50 struct super_block *sb;
87 (type == GRPQUOTA && !in_egroup_p(id))) && 51 int ret;
88 !capable(CAP_SYS_ADMIN)) 52
89 return -EPERM; 53 if (type >= MAXQUOTAS)
54 return -EINVAL;
55 ret = security_quotactl(Q_SYNC, type, 0, NULL);
56 if (ret)
57 return ret;
58
59 spin_lock(&sb_lock);
60restart:
61 list_for_each_entry(sb, &super_blocks, s_list) {
62 if (!sb->s_qcop || !sb->s_qcop->quota_sync)
63 continue;
64
65 sb->s_count++;
66 spin_unlock(&sb_lock);
67 down_read(&sb->s_umount);
68 if (sb->s_root)
69 sb->s_qcop->quota_sync(sb, type, 1);
70 up_read(&sb->s_umount);
71 spin_lock(&sb_lock);
72 if (__put_super_and_need_restart(sb))
73 goto restart;
90 } 74 }
91 else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO) 75 spin_unlock(&sb_lock);
92 if (!capable(CAP_SYS_ADMIN))
93 return -EPERM;
94 76
95 return 0; 77 return 0;
96} 78}
97 79
98/* Check validity of XFS Quota Manager commands */ 80static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
99static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, 81 void __user *addr)
100 qid_t id)
101{ 82{
102 if (type >= XQM_MAXQUOTAS) 83 char *pathname;
103 return -EINVAL; 84 int ret = -ENOSYS;
104 if (!sb) 85
105 return -ENODEV; 86 pathname = getname(addr);
106 if (!sb->s_qcop) 87 if (IS_ERR(pathname))
107 return -ENOSYS; 88 return PTR_ERR(pathname);
89 if (sb->s_qcop->quota_on)
90 ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
91 putname(pathname);
92 return ret;
93}
108 94
109 switch (cmd) { 95static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
110 case Q_XQUOTAON: 96{
111 case Q_XQUOTAOFF: 97 __u32 fmt;
112 case Q_XQUOTARM:
113 if (!sb->s_qcop->set_xstate)
114 return -ENOSYS;
115 break;
116 case Q_XGETQSTAT:
117 if (!sb->s_qcop->get_xstate)
118 return -ENOSYS;
119 break;
120 case Q_XSETQLIM:
121 if (!sb->s_qcop->set_xquota)
122 return -ENOSYS;
123 break;
124 case Q_XGETQUOTA:
125 if (!sb->s_qcop->get_xquota)
126 return -ENOSYS;
127 break;
128 case Q_XQUOTASYNC:
129 if (!sb->s_qcop->quota_sync)
130 return -ENOSYS;
131 break;
132 default:
133 return -EINVAL;
134 }
135 98
136 /* Check privileges */ 99 down_read(&sb_dqopt(sb)->dqptr_sem);
137 if (cmd == Q_XGETQUOTA) { 100 if (!sb_has_quota_active(sb, type)) {
138 if (((type == XQM_USRQUOTA && current_euid() != id) || 101 up_read(&sb_dqopt(sb)->dqptr_sem);
139 (type == XQM_GRPQUOTA && !in_egroup_p(id))) && 102 return -ESRCH;
140 !capable(CAP_SYS_ADMIN))
141 return -EPERM;
142 } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
143 if (!capable(CAP_SYS_ADMIN))
144 return -EPERM;
145 } 103 }
104 fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
105 up_read(&sb_dqopt(sb)->dqptr_sem);
106 if (copy_to_user(addr, &fmt, sizeof(fmt)))
107 return -EFAULT;
108 return 0;
109}
146 110
111static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
112{
113 struct if_dqinfo info;
114 int ret;
115
116 if (!sb_has_quota_active(sb, type))
117 return -ESRCH;
118 if (!sb->s_qcop->get_info)
119 return -ENOSYS;
120 ret = sb->s_qcop->get_info(sb, type, &info);
121 if (!ret && copy_to_user(addr, &info, sizeof(info)))
122 return -EFAULT;
123 return ret;
124}
125
126static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
127{
128 struct if_dqinfo info;
129
130 if (copy_from_user(&info, addr, sizeof(info)))
131 return -EFAULT;
132 if (!sb_has_quota_active(sb, type))
133 return -ESRCH;
134 if (!sb->s_qcop->set_info)
135 return -ENOSYS;
136 return sb->s_qcop->set_info(sb, type, &info);
137}
138
139static int quota_getquota(struct super_block *sb, int type, qid_t id,
140 void __user *addr)
141{
142 struct if_dqblk idq;
143 int ret;
144
145 if (!sb_has_quota_active(sb, type))
146 return -ESRCH;
147 if (!sb->s_qcop->get_dqblk)
148 return -ENOSYS;
149 ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
150 if (ret)
151 return ret;
152 if (copy_to_user(addr, &idq, sizeof(idq)))
153 return -EFAULT;
147 return 0; 154 return 0;
148} 155}
149 156
150static int check_quotactl_valid(struct super_block *sb, int type, int cmd, 157static int quota_setquota(struct super_block *sb, int type, qid_t id,
151 qid_t id) 158 void __user *addr)
152{ 159{
153 int error; 160 struct if_dqblk idq;
154 161
155 if (XQM_COMMAND(cmd)) 162 if (copy_from_user(&idq, addr, sizeof(idq)))
156 error = xqm_quotactl_valid(sb, type, cmd, id); 163 return -EFAULT;
157 else 164 if (!sb_has_quota_active(sb, type))
158 error = generic_quotactl_valid(sb, type, cmd, id); 165 return -ESRCH;
159 if (!error) 166 if (!sb->s_qcop->set_dqblk)
160 error = security_quotactl(cmd, type, id, sb); 167 return -ENOSYS;
161 return error; 168 return sb->s_qcop->set_dqblk(sb, type, id, &idq);
162} 169}
163 170
164#ifdef CONFIG_QUOTA 171static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
165void sync_quota_sb(struct super_block *sb, int type)
166{ 172{
167 int cnt; 173 __u32 flags;
168 174
169 if (!sb->s_qcop->quota_sync) 175 if (copy_from_user(&flags, addr, sizeof(flags)))
170 return; 176 return -EFAULT;
177 if (!sb->s_qcop->set_xstate)
178 return -ENOSYS;
179 return sb->s_qcop->set_xstate(sb, flags, cmd);
180}
171 181
172 sb->s_qcop->quota_sync(sb, type); 182static int quota_getxstate(struct super_block *sb, void __user *addr)
183{
184 struct fs_quota_stat fqs;
185 int ret;
173 186
174 if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) 187 if (!sb->s_qcop->get_xstate)
175 return; 188 return -ENOSYS;
176 /* This is not very clever (and fast) but currently I don't know about 189 ret = sb->s_qcop->get_xstate(sb, &fqs);
177 * any other simple way of getting quota data to disk and we must get 190 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
178 * them there for userspace to be visible... */ 191 return -EFAULT;
179 if (sb->s_op->sync_fs) 192 return ret;
180 sb->s_op->sync_fs(sb, 1); 193}
181 sync_blockdev(sb->s_bdev);
182 194
183 /* 195static int quota_setxquota(struct super_block *sb, int type, qid_t id,
184 * Now when everything is written we can discard the pagecache so 196 void __user *addr)
185 * that userspace sees the changes. 197{
186 */ 198 struct fs_disk_quota fdq;
187 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 199
188 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 200 if (copy_from_user(&fdq, addr, sizeof(fdq)))
189 if (type != -1 && cnt != type) 201 return -EFAULT;
190 continue; 202 if (!sb->s_qcop->set_xquota)
191 if (!sb_has_quota_active(sb, cnt)) 203 return -ENOSYS;
192 continue; 204 return sb->s_qcop->set_xquota(sb, type, id, &fdq);
193 mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
194 I_MUTEX_QUOTA);
195 truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
196 mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
197 }
198 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
199} 205}
200#endif
201 206
202static void sync_dquots(int type) 207static int quota_getxquota(struct super_block *sb, int type, qid_t id,
208 void __user *addr)
203{ 209{
204 struct super_block *sb; 210 struct fs_disk_quota fdq;
205 int cnt; 211 int ret;
206 212
207 spin_lock(&sb_lock); 213 if (!sb->s_qcop->get_xquota)
208restart: 214 return -ENOSYS;
209 list_for_each_entry(sb, &super_blocks, s_list) { 215 ret = sb->s_qcop->get_xquota(sb, type, id, &fdq);
210 /* This test just improves performance so it needn't be 216 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
211 * reliable... */ 217 return -EFAULT;
212 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 218 return ret;
213 if (type != -1 && type != cnt)
214 continue;
215 if (!sb_has_quota_active(sb, cnt))
216 continue;
217 if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
218 list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
219 continue;
220 break;
221 }
222 if (cnt == MAXQUOTAS)
223 continue;
224 sb->s_count++;
225 spin_unlock(&sb_lock);
226 down_read(&sb->s_umount);
227 if (sb->s_root)
228 sync_quota_sb(sb, type);
229 up_read(&sb->s_umount);
230 spin_lock(&sb_lock);
231 if (__put_super_and_need_restart(sb))
232 goto restart;
233 }
234 spin_unlock(&sb_lock);
235} 219}
236 220
237/* Copy parameters and call proper function */ 221/* Copy parameters and call proper function */
@@ -240,117 +224,55 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
240{ 224{
241 int ret; 225 int ret;
242 226
227 if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
228 return -EINVAL;
229 if (!sb->s_qcop)
230 return -ENOSYS;
231
232 ret = check_quotactl_permission(sb, type, cmd, id);
233 if (ret < 0)
234 return ret;
235
243 switch (cmd) { 236 switch (cmd) {
244 case Q_QUOTAON: { 237 case Q_QUOTAON:
245 char *pathname; 238 return quota_quotaon(sb, type, cmd, id, addr);
246 239 case Q_QUOTAOFF:
247 pathname = getname(addr); 240 if (!sb->s_qcop->quota_off)
248 if (IS_ERR(pathname)) 241 return -ENOSYS;
249 return PTR_ERR(pathname); 242 return sb->s_qcop->quota_off(sb, type, 0);
250 ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0); 243 case Q_GETFMT:
251 putname(pathname); 244 return quota_getfmt(sb, type, addr);
252 return ret; 245 case Q_GETINFO:
253 } 246 return quota_getinfo(sb, type, addr);
254 case Q_QUOTAOFF: 247 case Q_SETINFO:
255 return sb->s_qcop->quota_off(sb, type, 0); 248 return quota_setinfo(sb, type, addr);
256 249 case Q_GETQUOTA:
257 case Q_GETFMT: { 250 return quota_getquota(sb, type, id, addr);
258 __u32 fmt; 251 case Q_SETQUOTA:
259 252 return quota_setquota(sb, type, id, addr);
260 down_read(&sb_dqopt(sb)->dqptr_sem); 253 case Q_SYNC:
261 if (!sb_has_quota_active(sb, type)) { 254 if (!sb->s_qcop->quota_sync)
262 up_read(&sb_dqopt(sb)->dqptr_sem); 255 return -ENOSYS;
263 return -ESRCH; 256 return sb->s_qcop->quota_sync(sb, type, 1);
264 } 257 case Q_XQUOTAON:
265 fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; 258 case Q_XQUOTAOFF:
266 up_read(&sb_dqopt(sb)->dqptr_sem); 259 case Q_XQUOTARM:
267 if (copy_to_user(addr, &fmt, sizeof(fmt))) 260 return quota_setxstate(sb, cmd, addr);
268 return -EFAULT; 261 case Q_XGETQSTAT:
269 return 0; 262 return quota_getxstate(sb, addr);
270 } 263 case Q_XSETQLIM:
271 case Q_GETINFO: { 264 return quota_setxquota(sb, type, id, addr);
272 struct if_dqinfo info; 265 case Q_XGETQUOTA:
273 266 return quota_getxquota(sb, type, id, addr);
274 ret = sb->s_qcop->get_info(sb, type, &info); 267 case Q_XQUOTASYNC:
275 if (ret) 268 /* caller already holds s_umount */
276 return ret; 269 if (sb->s_flags & MS_RDONLY)
277 if (copy_to_user(addr, &info, sizeof(info))) 270 return -EROFS;
278 return -EFAULT; 271 writeback_inodes_sb(sb);
279 return 0; 272 return 0;
280 } 273 default:
281 case Q_SETINFO: { 274 return -EINVAL;
282 struct if_dqinfo info;
283
284 if (copy_from_user(&info, addr, sizeof(info)))
285 return -EFAULT;
286 return sb->s_qcop->set_info(sb, type, &info);
287 }
288 case Q_GETQUOTA: {
289 struct if_dqblk idq;
290
291 ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
292 if (ret)
293 return ret;
294 if (copy_to_user(addr, &idq, sizeof(idq)))
295 return -EFAULT;
296 return 0;
297 }
298 case Q_SETQUOTA: {
299 struct if_dqblk idq;
300
301 if (copy_from_user(&idq, addr, sizeof(idq)))
302 return -EFAULT;
303 return sb->s_qcop->set_dqblk(sb, type, id, &idq);
304 }
305 case Q_SYNC:
306 if (sb)
307 sync_quota_sb(sb, type);
308 else
309 sync_dquots(type);
310 return 0;
311
312 case Q_XQUOTAON:
313 case Q_XQUOTAOFF:
314 case Q_XQUOTARM: {
315 __u32 flags;
316
317 if (copy_from_user(&flags, addr, sizeof(flags)))
318 return -EFAULT;
319 return sb->s_qcop->set_xstate(sb, flags, cmd);
320 }
321 case Q_XGETQSTAT: {
322 struct fs_quota_stat fqs;
323
324 if ((ret = sb->s_qcop->get_xstate(sb, &fqs)))
325 return ret;
326 if (copy_to_user(addr, &fqs, sizeof(fqs)))
327 return -EFAULT;
328 return 0;
329 }
330 case Q_XSETQLIM: {
331 struct fs_disk_quota fdq;
332
333 if (copy_from_user(&fdq, addr, sizeof(fdq)))
334 return -EFAULT;
335 return sb->s_qcop->set_xquota(sb, type, id, &fdq);
336 }
337 case Q_XGETQUOTA: {
338 struct fs_disk_quota fdq;
339
340 ret = sb->s_qcop->get_xquota(sb, type, id, &fdq);
341 if (ret)
342 return ret;
343 if (copy_to_user(addr, &fdq, sizeof(fdq)))
344 return -EFAULT;
345 return 0;
346 }
347 case Q_XQUOTASYNC:
348 return sb->s_qcop->quota_sync(sb, type);
349 /* We never reach here unless validity check is broken */
350 default:
351 BUG();
352 } 275 }
353 return 0;
354} 276}
355 277
356/* 278/*
@@ -397,224 +319,23 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
397 cmds = cmd >> SUBCMDSHIFT; 319 cmds = cmd >> SUBCMDSHIFT;
398 type = cmd & SUBCMDMASK; 320 type = cmd & SUBCMDMASK;
399 321
400 if (cmds != Q_SYNC || special) { 322 /*
401 sb = quotactl_block(special); 323 * As a special case Q_SYNC can be called without a specific device.
402 if (IS_ERR(sb)) 324 * It will iterate all superblocks that have quota enabled and call
403 return PTR_ERR(sb); 325 * the sync action on each of them.
326 */
327 if (!special) {
328 if (cmds == Q_SYNC)
329 return quota_sync_all(type);
330 return -ENODEV;
404 } 331 }
405 332
406 ret = check_quotactl_valid(sb, type, cmds, id); 333 sb = quotactl_block(special);
407 if (ret >= 0) 334 if (IS_ERR(sb))
408 ret = do_quotactl(sb, type, cmds, id, addr); 335 return PTR_ERR(sb);
409 if (sb)
410 drop_super(sb);
411 336
412 return ret; 337 ret = do_quotactl(sb, type, cmds, id, addr);
413}
414
415#if defined(CONFIG_COMPAT_FOR_U64_ALIGNMENT)
416/*
417 * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
418 * and is necessary due to alignment problems.
419 */
420struct compat_if_dqblk {
421 compat_u64 dqb_bhardlimit;
422 compat_u64 dqb_bsoftlimit;
423 compat_u64 dqb_curspace;
424 compat_u64 dqb_ihardlimit;
425 compat_u64 dqb_isoftlimit;
426 compat_u64 dqb_curinodes;
427 compat_u64 dqb_btime;
428 compat_u64 dqb_itime;
429 compat_uint_t dqb_valid;
430};
431
432/* XFS structures */
433struct compat_fs_qfilestat {
434 compat_u64 dqb_bhardlimit;
435 compat_u64 qfs_nblks;
436 compat_uint_t qfs_nextents;
437};
438
439struct compat_fs_quota_stat {
440 __s8 qs_version;
441 __u16 qs_flags;
442 __s8 qs_pad;
443 struct compat_fs_qfilestat qs_uquota;
444 struct compat_fs_qfilestat qs_gquota;
445 compat_uint_t qs_incoredqs;
446 compat_int_t qs_btimelimit;
447 compat_int_t qs_itimelimit;
448 compat_int_t qs_rtbtimelimit;
449 __u16 qs_bwarnlimit;
450 __u16 qs_iwarnlimit;
451};
452
453asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
454 qid_t id, void __user *addr)
455{
456 unsigned int cmds;
457 struct if_dqblk __user *dqblk;
458 struct compat_if_dqblk __user *compat_dqblk;
459 struct fs_quota_stat __user *fsqstat;
460 struct compat_fs_quota_stat __user *compat_fsqstat;
461 compat_uint_t data;
462 u16 xdata;
463 long ret;
464 338
465 cmds = cmd >> SUBCMDSHIFT; 339 drop_super(sb);
466
467 switch (cmds) {
468 case Q_GETQUOTA:
469 dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
470 compat_dqblk = addr;
471 ret = sys_quotactl(cmd, special, id, dqblk);
472 if (ret)
473 break;
474 if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
475 get_user(data, &dqblk->dqb_valid) ||
476 put_user(data, &compat_dqblk->dqb_valid))
477 ret = -EFAULT;
478 break;
479 case Q_SETQUOTA:
480 dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
481 compat_dqblk = addr;
482 ret = -EFAULT;
483 if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
484 get_user(data, &compat_dqblk->dqb_valid) ||
485 put_user(data, &dqblk->dqb_valid))
486 break;
487 ret = sys_quotactl(cmd, special, id, dqblk);
488 break;
489 case Q_XGETQSTAT:
490 fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
491 compat_fsqstat = addr;
492 ret = sys_quotactl(cmd, special, id, fsqstat);
493 if (ret)
494 break;
495 ret = -EFAULT;
496 /* Copying qs_version, qs_flags, qs_pad */
497 if (copy_in_user(compat_fsqstat, fsqstat,
498 offsetof(struct compat_fs_quota_stat, qs_uquota)))
499 break;
500 /* Copying qs_uquota */
501 if (copy_in_user(&compat_fsqstat->qs_uquota,
502 &fsqstat->qs_uquota,
503 sizeof(compat_fsqstat->qs_uquota)) ||
504 get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
505 put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
506 break;
507 /* Copying qs_gquota */
508 if (copy_in_user(&compat_fsqstat->qs_gquota,
509 &fsqstat->qs_gquota,
510 sizeof(compat_fsqstat->qs_gquota)) ||
511 get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
512 put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
513 break;
514 /* Copying the rest */
515 if (copy_in_user(&compat_fsqstat->qs_incoredqs,
516 &fsqstat->qs_incoredqs,
517 sizeof(struct compat_fs_quota_stat) -
518 offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
519 get_user(xdata, &fsqstat->qs_iwarnlimit) ||
520 put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
521 break;
522 ret = 0;
523 break;
524 default:
525 ret = sys_quotactl(cmd, special, id, addr);
526 }
527 return ret; 340 return ret;
528} 341}
529#endif
530
531
532#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
533
534/* Netlink family structure for quota */
535static struct genl_family quota_genl_family = {
536 .id = GENL_ID_GENERATE,
537 .hdrsize = 0,
538 .name = "VFS_DQUOT",
539 .version = 1,
540 .maxattr = QUOTA_NL_A_MAX,
541};
542
543/**
544 * quota_send_warning - Send warning to userspace about exceeded quota
545 * @type: The quota type: USRQQUOTA, GRPQUOTA,...
546 * @id: The user or group id of the quota that was exceeded
547 * @dev: The device on which the fs is mounted (sb->s_dev)
548 * @warntype: The type of the warning: QUOTA_NL_...
549 *
550 * This can be used by filesystems (including those which don't use
551 * dquot) to send a message to userspace relating to quota limits.
552 *
553 */
554
555void quota_send_warning(short type, unsigned int id, dev_t dev,
556 const char warntype)
557{
558 static atomic_t seq;
559 struct sk_buff *skb;
560 void *msg_head;
561 int ret;
562 int msg_size = 4 * nla_total_size(sizeof(u32)) +
563 2 * nla_total_size(sizeof(u64));
564
565 /* We have to allocate using GFP_NOFS as we are called from a
566 * filesystem performing write and thus further recursion into
567 * the fs to free some data could cause deadlocks. */
568 skb = genlmsg_new(msg_size, GFP_NOFS);
569 if (!skb) {
570 printk(KERN_ERR
571 "VFS: Not enough memory to send quota warning.\n");
572 return;
573 }
574 msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
575 &quota_genl_family, 0, QUOTA_NL_C_WARNING);
576 if (!msg_head) {
577 printk(KERN_ERR
578 "VFS: Cannot store netlink header in quota warning.\n");
579 goto err_out;
580 }
581 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
582 if (ret)
583 goto attr_err_out;
584 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
585 if (ret)
586 goto attr_err_out;
587 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
588 if (ret)
589 goto attr_err_out;
590 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
591 if (ret)
592 goto attr_err_out;
593 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
594 if (ret)
595 goto attr_err_out;
596 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
597 if (ret)
598 goto attr_err_out;
599 genlmsg_end(skb, msg_head);
600
601 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
602 return;
603attr_err_out:
604 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
605err_out:
606 kfree_skb(skb);
607}
608EXPORT_SYMBOL(quota_send_warning);
609
610static int __init quota_init(void)
611{
612 if (genl_register_family(&quota_genl_family) != 0)
613 printk(KERN_ERR
614 "VFS: Failed to create quota netlink interface.\n");
615 return 0;
616};
617
618module_init(quota_init);
619#endif
620
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 65c872761177..dc014f7def05 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -425,7 +425,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
425 425
426 journal_mark_dirty(th, s, sbh); 426 journal_mark_dirty(th, s, sbh);
427 if (for_unformatted) 427 if (for_unformatted)
428 vfs_dq_free_block_nodirty(inode, 1); 428 dquot_free_block_nodirty(inode, 1);
429} 429}
430 430
431void reiserfs_free_block(struct reiserfs_transaction_handle *th, 431void reiserfs_free_block(struct reiserfs_transaction_handle *th,
@@ -1049,7 +1049,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
1049 amount_needed, hint->inode->i_uid); 1049 amount_needed, hint->inode->i_uid);
1050#endif 1050#endif
1051 quota_ret = 1051 quota_ret =
1052 vfs_dq_alloc_block_nodirty(hint->inode, amount_needed); 1052 dquot_alloc_block_nodirty(hint->inode, amount_needed);
1053 if (quota_ret) /* Quota exceeded? */ 1053 if (quota_ret) /* Quota exceeded? */
1054 return QUOTA_EXCEEDED; 1054 return QUOTA_EXCEEDED;
1055 if (hint->preallocate && hint->prealloc_size) { 1055 if (hint->preallocate && hint->prealloc_size) {
@@ -1058,7 +1058,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
1058 "reiserquota: allocating (prealloc) %d blocks id=%u", 1058 "reiserquota: allocating (prealloc) %d blocks id=%u",
1059 hint->prealloc_size, hint->inode->i_uid); 1059 hint->prealloc_size, hint->inode->i_uid);
1060#endif 1060#endif
1061 quota_ret = vfs_dq_prealloc_block_nodirty(hint->inode, 1061 quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1062 hint->prealloc_size); 1062 hint->prealloc_size);
1063 if (quota_ret) 1063 if (quota_ret)
1064 hint->preallocate = hint->prealloc_size = 0; 1064 hint->preallocate = hint->prealloc_size = 0;
@@ -1092,7 +1092,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
1092 hint->inode->i_uid); 1092 hint->inode->i_uid);
1093#endif 1093#endif
1094 /* Free not allocated blocks */ 1094 /* Free not allocated blocks */
1095 vfs_dq_free_block_nodirty(hint->inode, 1095 dquot_free_block_nodirty(hint->inode,
1096 amount_needed + hint->prealloc_size - 1096 amount_needed + hint->prealloc_size -
1097 nr_allocated); 1097 nr_allocated);
1098 } 1098 }
@@ -1125,7 +1125,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
1125 REISERFS_I(hint->inode)->i_prealloc_count, 1125 REISERFS_I(hint->inode)->i_prealloc_count,
1126 hint->inode->i_uid); 1126 hint->inode->i_uid);
1127#endif 1127#endif
1128 vfs_dq_free_block_nodirty(hint->inode, amount_needed + 1128 dquot_free_block_nodirty(hint->inode, amount_needed +
1129 hint->prealloc_size - nr_allocated - 1129 hint->prealloc_size - nr_allocated -
1130 REISERFS_I(hint->inode)-> 1130 REISERFS_I(hint->inode)->
1131 i_prealloc_count); 1131 i_prealloc_count);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index da2dba082e2d..1d9c12714c5c 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -289,7 +289,7 @@ const struct file_operations reiserfs_file_operations = {
289 .compat_ioctl = reiserfs_compat_ioctl, 289 .compat_ioctl = reiserfs_compat_ioctl,
290#endif 290#endif
291 .mmap = reiserfs_file_mmap, 291 .mmap = reiserfs_file_mmap,
292 .open = generic_file_open, 292 .open = dquot_file_open,
293 .release = reiserfs_file_release, 293 .release = reiserfs_file_release,
294 .fsync = reiserfs_sync_file, 294 .fsync = reiserfs_sync_file,
295 .aio_read = generic_file_aio_read, 295 .aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2df0f5c7c60b..d1da94b82d8f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -34,6 +34,9 @@ void reiserfs_delete_inode(struct inode *inode)
34 int depth; 34 int depth;
35 int err; 35 int err;
36 36
37 if (!is_bad_inode(inode))
38 dquot_initialize(inode);
39
37 truncate_inode_pages(&inode->i_data, 0); 40 truncate_inode_pages(&inode->i_data, 0);
38 41
39 depth = reiserfs_write_lock_once(inode->i_sb); 42 depth = reiserfs_write_lock_once(inode->i_sb);
@@ -54,7 +57,7 @@ void reiserfs_delete_inode(struct inode *inode)
54 * after delete_object so that quota updates go into the same transaction as 57 * after delete_object so that quota updates go into the same transaction as
55 * stat data deletion */ 58 * stat data deletion */
56 if (!err) 59 if (!err)
57 vfs_dq_free_inode(inode); 60 dquot_free_inode(inode);
58 61
59 if (journal_end(&th, inode->i_sb, jbegin_count)) 62 if (journal_end(&th, inode->i_sb, jbegin_count))
60 goto out; 63 goto out;
@@ -1615,7 +1618,7 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1615** to properly mark inodes for datasync and such, but only actually 1618** to properly mark inodes for datasync and such, but only actually
1616** does something when called for a synchronous update. 1619** does something when called for a synchronous update.
1617*/ 1620*/
1618int reiserfs_write_inode(struct inode *inode, int do_sync) 1621int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1619{ 1622{
1620 struct reiserfs_transaction_handle th; 1623 struct reiserfs_transaction_handle th;
1621 int jbegin_count = 1; 1624 int jbegin_count = 1;
@@ -1627,7 +1630,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync)
1627 ** inode needs to reach disk for safety, and they can safely be 1630 ** inode needs to reach disk for safety, and they can safely be
1628 ** ignored because the altered inode has already been logged. 1631 ** ignored because the altered inode has already been logged.
1629 */ 1632 */
1630 if (do_sync && !(current->flags & PF_MEMALLOC)) { 1633 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
1631 reiserfs_write_lock(inode->i_sb); 1634 reiserfs_write_lock(inode->i_sb);
1632 if (!journal_begin(&th, inode->i_sb, jbegin_count)) { 1635 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1633 reiserfs_update_sd(&th, inode); 1636 reiserfs_update_sd(&th, inode);
@@ -1765,10 +1768,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1765 1768
1766 BUG_ON(!th->t_trans_id); 1769 BUG_ON(!th->t_trans_id);
1767 1770
1768 if (vfs_dq_alloc_inode(inode)) { 1771 dquot_initialize(inode);
1769 err = -EDQUOT; 1772 err = dquot_alloc_inode(inode);
1773 if (err)
1770 goto out_end_trans; 1774 goto out_end_trans;
1771 }
1772 if (!dir->i_nlink) { 1775 if (!dir->i_nlink) {
1773 err = -EPERM; 1776 err = -EPERM;
1774 goto out_bad_inode; 1777 goto out_bad_inode;
@@ -1959,12 +1962,12 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1959 INODE_PKEY(inode)->k_objectid = 0; 1962 INODE_PKEY(inode)->k_objectid = 0;
1960 1963
1961 /* Quota change must be inside a transaction for journaling */ 1964 /* Quota change must be inside a transaction for journaling */
1962 vfs_dq_free_inode(inode); 1965 dquot_free_inode(inode);
1963 1966
1964 out_end_trans: 1967 out_end_trans:
1965 journal_end(th, th->t_super, th->t_blocks_allocated); 1968 journal_end(th, th->t_super, th->t_blocks_allocated);
1966 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1969 /* Drop can be outside and it needs more credits so it's better to have it outside */
1967 vfs_dq_drop(inode); 1970 dquot_drop(inode);
1968 inode->i_flags |= S_NOQUOTA; 1971 inode->i_flags |= S_NOQUOTA;
1969 make_bad_inode(inode); 1972 make_bad_inode(inode);
1970 1973
@@ -3073,6 +3076,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3073 3076
3074 depth = reiserfs_write_lock_once(inode->i_sb); 3077 depth = reiserfs_write_lock_once(inode->i_sb);
3075 if (attr->ia_valid & ATTR_SIZE) { 3078 if (attr->ia_valid & ATTR_SIZE) {
3079 dquot_initialize(inode);
3080
3076 /* version 2 items will be caught by the s_maxbytes check 3081 /* version 2 items will be caught by the s_maxbytes check
3077 ** done for us in vmtruncate 3082 ** done for us in vmtruncate
3078 */ 3083 */
@@ -3134,8 +3139,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3134 jbegin_count); 3139 jbegin_count);
3135 if (error) 3140 if (error)
3136 goto out; 3141 goto out;
3137 error = 3142 error = dquot_transfer(inode, attr);
3138 vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
3139 if (error) { 3143 if (error) {
3140 journal_end(&th, inode->i_sb, 3144 journal_end(&th, inode->i_sb,
3141 jbegin_count); 3145 jbegin_count);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 9d4dcf0b07cb..96e4cbbfaa18 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -546,7 +546,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
546*/ 546*/
547static int drop_new_inode(struct inode *inode) 547static int drop_new_inode(struct inode *inode)
548{ 548{
549 vfs_dq_drop(inode); 549 dquot_drop(inode);
550 make_bad_inode(inode); 550 make_bad_inode(inode);
551 inode->i_flags |= S_NOQUOTA; 551 inode->i_flags |= S_NOQUOTA;
552 iput(inode); 552 iput(inode);
@@ -554,7 +554,7 @@ static int drop_new_inode(struct inode *inode)
554} 554}
555 555
556/* utility function that does setup for reiserfs_new_inode. 556/* utility function that does setup for reiserfs_new_inode.
557** vfs_dq_init needs lots of credits so it's better to have it 557** dquot_initialize needs lots of credits so it's better to have it
558** outside of a transaction, so we had to pull some bits of 558** outside of a transaction, so we had to pull some bits of
559** reiserfs_new_inode out into this func. 559** reiserfs_new_inode out into this func.
560*/ 560*/
@@ -577,7 +577,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
577 } else { 577 } else {
578 inode->i_gid = current_fsgid(); 578 inode->i_gid = current_fsgid();
579 } 579 }
580 vfs_dq_init(inode); 580 dquot_initialize(inode);
581 return 0; 581 return 0;
582} 582}
583 583
@@ -594,6 +594,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
594 struct reiserfs_transaction_handle th; 594 struct reiserfs_transaction_handle th;
595 struct reiserfs_security_handle security; 595 struct reiserfs_security_handle security;
596 596
597 dquot_initialize(dir);
598
597 if (!(inode = new_inode(dir->i_sb))) { 599 if (!(inode = new_inode(dir->i_sb))) {
598 return -ENOMEM; 600 return -ENOMEM;
599 } 601 }
@@ -666,6 +668,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
666 if (!new_valid_dev(rdev)) 668 if (!new_valid_dev(rdev))
667 return -EINVAL; 669 return -EINVAL;
668 670
671 dquot_initialize(dir);
672
669 if (!(inode = new_inode(dir->i_sb))) { 673 if (!(inode = new_inode(dir->i_sb))) {
670 return -ENOMEM; 674 return -ENOMEM;
671 } 675 }
@@ -739,6 +743,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
739 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 743 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
740 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); 744 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
741 745
746 dquot_initialize(dir);
747
742#ifdef DISPLACE_NEW_PACKING_LOCALITIES 748#ifdef DISPLACE_NEW_PACKING_LOCALITIES
743 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ 749 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */
744 REISERFS_I(dir)->new_packing_locality = 1; 750 REISERFS_I(dir)->new_packing_locality = 1;
@@ -842,6 +848,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
842 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 848 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
843 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 849 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
844 850
851 dquot_initialize(dir);
852
845 reiserfs_write_lock(dir->i_sb); 853 reiserfs_write_lock(dir->i_sb);
846 retval = journal_begin(&th, dir->i_sb, jbegin_count); 854 retval = journal_begin(&th, dir->i_sb, jbegin_count);
847 if (retval) 855 if (retval)
@@ -923,6 +931,8 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
923 unsigned long savelink; 931 unsigned long savelink;
924 int depth; 932 int depth;
925 933
934 dquot_initialize(dir);
935
926 inode = dentry->d_inode; 936 inode = dentry->d_inode;
927 937
928 /* in this transaction we can be doing at max two balancings and update 938 /* in this transaction we can be doing at max two balancings and update
@@ -1024,6 +1034,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
1024 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + 1034 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
1025 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); 1035 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
1026 1036
1037 dquot_initialize(parent_dir);
1038
1027 if (!(inode = new_inode(parent_dir->i_sb))) { 1039 if (!(inode = new_inode(parent_dir->i_sb))) {
1028 return -ENOMEM; 1040 return -ENOMEM;
1029 } 1041 }
@@ -1111,6 +1123,8 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1111 JOURNAL_PER_BALANCE_CNT * 3 + 1123 JOURNAL_PER_BALANCE_CNT * 3 +
1112 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 1124 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
1113 1125
1126 dquot_initialize(dir);
1127
1114 reiserfs_write_lock(dir->i_sb); 1128 reiserfs_write_lock(dir->i_sb);
1115 if (inode->i_nlink >= REISERFS_LINK_MAX) { 1129 if (inode->i_nlink >= REISERFS_LINK_MAX) {
1116 //FIXME: sd_nlink is 32 bit for new files 1130 //FIXME: sd_nlink is 32 bit for new files
@@ -1235,6 +1249,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1235 JOURNAL_PER_BALANCE_CNT * 3 + 5 + 1249 JOURNAL_PER_BALANCE_CNT * 3 + 5 +
1236 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); 1250 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
1237 1251
1252 dquot_initialize(old_dir);
1253 dquot_initialize(new_dir);
1254
1238 old_inode = old_dentry->d_inode; 1255 old_inode = old_dentry->d_inode;
1239 new_dentry_inode = new_dentry->d_inode; 1256 new_dentry_inode = new_dentry->d_inode;
1240 1257
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5fa7118f04e1..313d39d639eb 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1299,7 +1299,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1299 "reiserquota delete_item(): freeing %u, id=%u type=%c", 1299 "reiserquota delete_item(): freeing %u, id=%u type=%c",
1300 quota_cut_bytes, inode->i_uid, head2type(&s_ih)); 1300 quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1301#endif 1301#endif
1302 vfs_dq_free_space_nodirty(inode, quota_cut_bytes); 1302 dquot_free_space_nodirty(inode, quota_cut_bytes);
1303 1303
1304 /* Return deleted body length */ 1304 /* Return deleted body length */
1305 return ret_value; 1305 return ret_value;
@@ -1383,7 +1383,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1383 quota_cut_bytes, inode->i_uid, 1383 quota_cut_bytes, inode->i_uid,
1384 key2type(key)); 1384 key2type(key));
1385#endif 1385#endif
1386 vfs_dq_free_space_nodirty(inode, 1386 dquot_free_space_nodirty(inode,
1387 quota_cut_bytes); 1387 quota_cut_bytes);
1388 } 1388 }
1389 break; 1389 break;
@@ -1733,7 +1733,7 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1733 "reiserquota cut_from_item(): freeing %u id=%u type=%c", 1733 "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1734 quota_cut_bytes, inode->i_uid, '?'); 1734 quota_cut_bytes, inode->i_uid, '?');
1735#endif 1735#endif
1736 vfs_dq_free_space_nodirty(inode, quota_cut_bytes); 1736 dquot_free_space_nodirty(inode, quota_cut_bytes);
1737 return ret_value; 1737 return ret_value;
1738} 1738}
1739 1739
@@ -1968,9 +1968,10 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1968 key2type(&(key->on_disk_key))); 1968 key2type(&(key->on_disk_key)));
1969#endif 1969#endif
1970 1970
1971 if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) { 1971 retval = dquot_alloc_space_nodirty(inode, pasted_size);
1972 if (retval) {
1972 pathrelse(search_path); 1973 pathrelse(search_path);
1973 return -EDQUOT; 1974 return retval;
1974 } 1975 }
1975 init_tb_struct(th, &s_paste_balance, th->t_super, search_path, 1976 init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
1976 pasted_size); 1977 pasted_size);
@@ -2024,7 +2025,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
2024 pasted_size, inode->i_uid, 2025 pasted_size, inode->i_uid,
2025 key2type(&(key->on_disk_key))); 2026 key2type(&(key->on_disk_key)));
2026#endif 2027#endif
2027 vfs_dq_free_space_nodirty(inode, pasted_size); 2028 dquot_free_space_nodirty(inode, pasted_size);
2028 return retval; 2029 return retval;
2029} 2030}
2030 2031
@@ -2062,9 +2063,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2062#endif 2063#endif
2063 /* We can't dirty inode here. It would be immediately written but 2064 /* We can't dirty inode here. It would be immediately written but
2064 * appropriate stat item isn't inserted yet... */ 2065 * appropriate stat item isn't inserted yet... */
2065 if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) { 2066 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2067 if (retval) {
2066 pathrelse(path); 2068 pathrelse(path);
2067 return -EDQUOT; 2069 return retval;
2068 } 2070 }
2069 } 2071 }
2070 init_tb_struct(th, &s_ins_balance, th->t_super, path, 2072 init_tb_struct(th, &s_ins_balance, th->t_super, path,
@@ -2113,6 +2115,6 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2113 quota_bytes, inode->i_uid, head2type(ih)); 2115 quota_bytes, inode->i_uid, head2type(ih));
2114#endif 2116#endif
2115 if (inode) 2117 if (inode)
2116 vfs_dq_free_space_nodirty(inode, quota_bytes); 2118 dquot_free_space_nodirty(inode, quota_bytes);
2117 return retval; 2119 return retval;
2118} 2120}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b4a7dd03bdb9..04bf5d791bda 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -246,7 +246,7 @@ static int finish_unfinished(struct super_block *s)
246 retval = remove_save_link_only(s, &save_link_key, 0); 246 retval = remove_save_link_only(s, &save_link_key, 0);
247 continue; 247 continue;
248 } 248 }
249 vfs_dq_init(inode); 249 dquot_initialize(inode);
250 250
251 if (truncate && S_ISDIR(inode->i_mode)) { 251 if (truncate && S_ISDIR(inode->i_mode)) {
252 /* We got a truncate request for a dir which is impossible. 252 /* We got a truncate request for a dir which is impossible.
@@ -578,6 +578,11 @@ out:
578 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 578 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
579} 579}
580 580
581static void reiserfs_clear_inode(struct inode *inode)
582{
583 dquot_drop(inode);
584}
585
581#ifdef CONFIG_QUOTA 586#ifdef CONFIG_QUOTA
582static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 587static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
583 size_t, loff_t); 588 size_t, loff_t);
@@ -590,6 +595,7 @@ static const struct super_operations reiserfs_sops = {
590 .destroy_inode = reiserfs_destroy_inode, 595 .destroy_inode = reiserfs_destroy_inode,
591 .write_inode = reiserfs_write_inode, 596 .write_inode = reiserfs_write_inode,
592 .dirty_inode = reiserfs_dirty_inode, 597 .dirty_inode = reiserfs_dirty_inode,
598 .clear_inode = reiserfs_clear_inode,
593 .delete_inode = reiserfs_delete_inode, 599 .delete_inode = reiserfs_delete_inode,
594 .put_super = reiserfs_put_super, 600 .put_super = reiserfs_put_super,
595 .write_super = reiserfs_write_super, 601 .write_super = reiserfs_write_super,
@@ -616,13 +622,6 @@ static int reiserfs_write_info(struct super_block *, int);
616static int reiserfs_quota_on(struct super_block *, int, int, char *, int); 622static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
617 623
618static const struct dquot_operations reiserfs_quota_operations = { 624static const struct dquot_operations reiserfs_quota_operations = {
619 .initialize = dquot_initialize,
620 .drop = dquot_drop,
621 .alloc_space = dquot_alloc_space,
622 .alloc_inode = dquot_alloc_inode,
623 .free_space = dquot_free_space,
624 .free_inode = dquot_free_inode,
625 .transfer = dquot_transfer,
626 .write_dquot = reiserfs_write_dquot, 625 .write_dquot = reiserfs_write_dquot,
627 .acquire_dquot = reiserfs_acquire_dquot, 626 .acquire_dquot = reiserfs_acquire_dquot,
628 .release_dquot = reiserfs_release_dquot, 627 .release_dquot = reiserfs_release_dquot,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 81f09fab8ae4..37d034ca7d99 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -61,7 +61,6 @@
61static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) 61static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
62{ 62{
63 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 63 BUG_ON(!mutex_is_locked(&dir->i_mutex));
64 vfs_dq_init(dir);
65 return dir->i_op->create(dir, dentry, mode, NULL); 64 return dir->i_op->create(dir, dentry, mode, NULL);
66} 65}
67#endif 66#endif
@@ -69,7 +68,6 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
69static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode) 68static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
70{ 69{
71 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 70 BUG_ON(!mutex_is_locked(&dir->i_mutex));
72 vfs_dq_init(dir);
73 return dir->i_op->mkdir(dir, dentry, mode); 71 return dir->i_op->mkdir(dir, dentry, mode);
74} 72}
75 73
@@ -81,7 +79,6 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
81{ 79{
82 int error; 80 int error;
83 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 81 BUG_ON(!mutex_is_locked(&dir->i_mutex));
84 vfs_dq_init(dir);
85 82
86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, 83 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb); 84 I_MUTEX_CHILD, dir->i_sb);
@@ -97,7 +94,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
97{ 94{
98 int error; 95 int error;
99 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 96 BUG_ON(!mutex_is_locked(&dir->i_mutex));
100 vfs_dq_init(dir);
101 97
102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, 98 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb); 99 I_MUTEX_CHILD, dir->i_sb);
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 70e3244fa30f..df8a19ef870d 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_SQUASHFS) += squashfs.o 5obj-$(CONFIG_SQUASHFS) += squashfs.o
6squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o 6squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
7squashfs-y += namei.o super.o symlink.o 7squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2a7960310349..1cb0d81b164b 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -29,15 +29,14 @@
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/vfs.h> 30#include <linux/vfs.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/mutex.h>
33#include <linux/string.h> 32#include <linux/string.h>
34#include <linux/buffer_head.h> 33#include <linux/buffer_head.h>
35#include <linux/zlib.h>
36 34
37#include "squashfs_fs.h" 35#include "squashfs_fs.h"
38#include "squashfs_fs_sb.h" 36#include "squashfs_fs_sb.h"
39#include "squashfs_fs_i.h" 37#include "squashfs_fs_i.h"
40#include "squashfs.h" 38#include "squashfs.h"
39#include "decompressor.h"
41 40
42/* 41/*
43 * Read the metadata block length, this is stored in the first two 42 * Read the metadata block length, this is stored in the first two
@@ -153,72 +152,10 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
153 } 152 }
154 153
155 if (compressed) { 154 if (compressed) {
156 int zlib_err = 0, zlib_init = 0; 155 length = squashfs_decompress(msblk, buffer, bh, b, offset,
157 156 length, srclength, pages);
158 /* 157 if (length < 0)
159 * Uncompress block. 158 goto read_failure;
160 */
161
162 mutex_lock(&msblk->read_data_mutex);
163
164 msblk->stream.avail_out = 0;
165 msblk->stream.avail_in = 0;
166
167 bytes = length;
168 do {
169 if (msblk->stream.avail_in == 0 && k < b) {
170 avail = min(bytes, msblk->devblksize - offset);
171 bytes -= avail;
172 wait_on_buffer(bh[k]);
173 if (!buffer_uptodate(bh[k]))
174 goto release_mutex;
175
176 if (avail == 0) {
177 offset = 0;
178 put_bh(bh[k++]);
179 continue;
180 }
181
182 msblk->stream.next_in = bh[k]->b_data + offset;
183 msblk->stream.avail_in = avail;
184 offset = 0;
185 }
186
187 if (msblk->stream.avail_out == 0 && page < pages) {
188 msblk->stream.next_out = buffer[page++];
189 msblk->stream.avail_out = PAGE_CACHE_SIZE;
190 }
191
192 if (!zlib_init) {
193 zlib_err = zlib_inflateInit(&msblk->stream);
194 if (zlib_err != Z_OK) {
195 ERROR("zlib_inflateInit returned"
196 " unexpected result 0x%x,"
197 " srclength %d\n", zlib_err,
198 srclength);
199 goto release_mutex;
200 }
201 zlib_init = 1;
202 }
203
204 zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
205
206 if (msblk->stream.avail_in == 0 && k < b)
207 put_bh(bh[k++]);
208 } while (zlib_err == Z_OK);
209
210 if (zlib_err != Z_STREAM_END) {
211 ERROR("zlib_inflate error, data probably corrupt\n");
212 goto release_mutex;
213 }
214
215 zlib_err = zlib_inflateEnd(&msblk->stream);
216 if (zlib_err != Z_OK) {
217 ERROR("zlib_inflate error, data probably corrupt\n");
218 goto release_mutex;
219 }
220 length = msblk->stream.total_out;
221 mutex_unlock(&msblk->read_data_mutex);
222 } else { 159 } else {
223 /* 160 /*
224 * Block is uncompressed. 161 * Block is uncompressed.
@@ -255,9 +192,6 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
255 kfree(bh); 192 kfree(bh);
256 return length; 193 return length;
257 194
258release_mutex:
259 mutex_unlock(&msblk->read_data_mutex);
260
261block_release: 195block_release:
262 for (; k < b; k++) 196 for (; k < b; k++)
263 put_bh(bh[k]); 197 put_bh(bh[k]);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 40c98fa6b5d6..57314bee9059 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -51,7 +51,6 @@
51#include <linux/sched.h> 51#include <linux/sched.h>
52#include <linux/spinlock.h> 52#include <linux/spinlock.h>
53#include <linux/wait.h> 53#include <linux/wait.h>
54#include <linux/zlib.h>
55#include <linux/pagemap.h> 54#include <linux/pagemap.h>
56 55
57#include "squashfs_fs.h" 56#include "squashfs_fs.h"
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
new file mode 100644
index 000000000000..157478da6ac9
--- /dev/null
+++ b/fs/squashfs/decompressor.c
@@ -0,0 +1,68 @@
1/*
2 * Squashfs - a compressed read only filesystem for Linux
3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 * Phillip Lougher <phillip@lougher.demon.co.uk>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2,
10 * or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 *
21 * decompressor.c
22 */
23
24#include <linux/types.h>
25#include <linux/mutex.h>
26#include <linux/buffer_head.h>
27
28#include "squashfs_fs.h"
29#include "squashfs_fs_sb.h"
30#include "squashfs_fs_i.h"
31#include "decompressor.h"
32#include "squashfs.h"
33
34/*
35 * This file (and decompressor.h) implements a decompressor framework for
36 * Squashfs, allowing multiple decompressors to be easily supported
37 */
38
39static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
40 NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
41};
42
43static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = {
44 NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
45};
46
47static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
48 NULL, NULL, NULL, 0, "unknown", 0
49};
50
51static const struct squashfs_decompressor *decompressor[] = {
52 &squashfs_zlib_comp_ops,
53 &squashfs_lzma_unsupported_comp_ops,
54 &squashfs_lzo_unsupported_comp_ops,
55 &squashfs_unknown_comp_ops
56};
57
58
59const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
60{
61 int i;
62
63 for (i = 0; decompressor[i]->id; i++)
64 if (id == decompressor[i]->id)
65 break;
66
67 return decompressor[i];
68}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
new file mode 100644
index 000000000000..7425f80783f6
--- /dev/null
+++ b/fs/squashfs/decompressor.h
@@ -0,0 +1,55 @@
1#ifndef DECOMPRESSOR_H
2#define DECOMPRESSOR_H
3/*
4 * Squashfs - a compressed read only filesystem for Linux
5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
7 * Phillip Lougher <phillip@lougher.demon.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2,
12 * or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 *
23 * decompressor.h
24 */
25
26struct squashfs_decompressor {
27 void *(*init)(struct squashfs_sb_info *);
28 void (*free)(void *);
29 int (*decompress)(struct squashfs_sb_info *, void **,
30 struct buffer_head **, int, int, int, int, int);
31 int id;
32 char *name;
33 int supported;
34};
35
36static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk)
37{
38 return msblk->decompressor->init(msblk);
39}
40
41static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
42 void *s)
43{
44 if (msblk->decompressor)
45 msblk->decompressor->free(s);
46}
47
48static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
49 void **buffer, struct buffer_head **bh, int b, int offset, int length,
50 int srclength, int pages)
51{
52 return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
53 length, srclength, pages);
54}
55#endif
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 566b0eaed868..12b933ac6585 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -30,7 +30,6 @@
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/vfs.h> 31#include <linux/vfs.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/zlib.h>
34 33
35#include "squashfs_fs.h" 34#include "squashfs_fs.h"
36#include "squashfs_fs_sb.h" 35#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 2b1b8fe5e037..7f93d5a9ee05 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -39,7 +39,6 @@
39#include <linux/vfs.h> 39#include <linux/vfs.h>
40#include <linux/dcache.h> 40#include <linux/dcache.h>
41#include <linux/exportfs.h> 41#include <linux/exportfs.h>
42#include <linux/zlib.h>
43#include <linux/slab.h> 42#include <linux/slab.h>
44 43
45#include "squashfs_fs.h" 44#include "squashfs_fs.h"
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 717767d831df..a25c5060bdcb 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,7 +47,6 @@
47#include <linux/string.h> 47#include <linux/string.h>
48#include <linux/pagemap.h> 48#include <linux/pagemap.h>
49#include <linux/mutex.h> 49#include <linux/mutex.h>
50#include <linux/zlib.h>
51 50
52#include "squashfs_fs.h" 51#include "squashfs_fs.h"
53#include "squashfs_fs_sb.h" 52#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index b5a2c15bbbc7..7c90bbd6879d 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -36,7 +36,6 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/vfs.h> 37#include <linux/vfs.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/zlib.h>
40 39
41#include "squashfs_fs.h" 40#include "squashfs_fs.h"
42#include "squashfs_fs_sb.h" 41#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index 3795b837ba28..b7f64bcd2b70 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -34,7 +34,6 @@
34#include <linux/fs.h> 34#include <linux/fs.h>
35#include <linux/vfs.h> 35#include <linux/vfs.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/zlib.h>
38 37
39#include "squashfs_fs.h" 38#include "squashfs_fs.h"
40#include "squashfs_fs_sb.h" 39#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 9101dbde39ec..49daaf669e41 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -40,7 +40,6 @@
40 40
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/vfs.h> 42#include <linux/vfs.h>
43#include <linux/zlib.h>
44 43
45#include "squashfs_fs.h" 44#include "squashfs_fs.h"
46#include "squashfs_fs_sb.h" 45#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 9e398653b22b..5266bd8ad932 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -57,7 +57,6 @@
57#include <linux/slab.h> 57#include <linux/slab.h>
58#include <linux/string.h> 58#include <linux/string.h>
59#include <linux/dcache.h> 59#include <linux/dcache.h>
60#include <linux/zlib.h>
61 60
62#include "squashfs_fs.h" 61#include "squashfs_fs.h"
63#include "squashfs_fs_sb.h" 62#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 0e9feb6adf7e..fe2587af5512 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -51,6 +51,9 @@ extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *,
51 u64, int); 51 u64, int);
52extern int squashfs_read_table(struct super_block *, void *, u64, int); 52extern int squashfs_read_table(struct super_block *, void *, u64, int);
53 53
54/* decompressor.c */
55extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
56
54/* export.c */ 57/* export.c */
55extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, 58extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64,
56 unsigned int); 59 unsigned int);
@@ -71,7 +74,7 @@ extern struct inode *squashfs_iget(struct super_block *, long long,
71extern int squashfs_read_inode(struct inode *, long long); 74extern int squashfs_read_inode(struct inode *, long long);
72 75
73/* 76/*
74 * Inodes and files operations 77 * Inodes, files and decompressor operations
75 */ 78 */
76 79
77/* dir.c */ 80/* dir.c */
@@ -88,3 +91,6 @@ extern const struct inode_operations squashfs_dir_inode_ops;
88 91
89/* symlink.c */ 92/* symlink.c */
90extern const struct address_space_operations squashfs_symlink_aops; 93extern const struct address_space_operations squashfs_symlink_aops;
94
95/* zlib_wrapper.c */
96extern const struct squashfs_decompressor squashfs_zlib_comp_ops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 283daafc568e..79024245ea00 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -183,8 +183,6 @@
183#define SQUASHFS_MAX_FILE_SIZE (1LL << \ 183#define SQUASHFS_MAX_FILE_SIZE (1LL << \
184 (SQUASHFS_MAX_FILE_SIZE_LOG - 2)) 184 (SQUASHFS_MAX_FILE_SIZE_LOG - 2))
185 185
186#define SQUASHFS_MARKER_BYTE 0xff
187
188/* meta index cache */ 186/* meta index cache */
189#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) 187#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
190#define SQUASHFS_META_ENTRIES 127 188#define SQUASHFS_META_ENTRIES 127
@@ -211,7 +209,9 @@ struct meta_index {
211/* 209/*
212 * definitions for structures on disk 210 * definitions for structures on disk
213 */ 211 */
214#define ZLIB_COMPRESSION 1 212#define ZLIB_COMPRESSION 1
213#define LZMA_COMPRESSION 2
214#define LZO_COMPRESSION 3
215 215
216struct squashfs_super_block { 216struct squashfs_super_block {
217 __le32 s_magic; 217 __le32 s_magic;
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index c8c65614dd1c..2e77dc547e25 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -52,25 +52,25 @@ struct squashfs_cache_entry {
52}; 52};
53 53
54struct squashfs_sb_info { 54struct squashfs_sb_info {
55 int devblksize; 55 const struct squashfs_decompressor *decompressor;
56 int devblksize_log2; 56 int devblksize;
57 struct squashfs_cache *block_cache; 57 int devblksize_log2;
58 struct squashfs_cache *fragment_cache; 58 struct squashfs_cache *block_cache;
59 struct squashfs_cache *read_page; 59 struct squashfs_cache *fragment_cache;
60 int next_meta_index; 60 struct squashfs_cache *read_page;
61 __le64 *id_table; 61 int next_meta_index;
62 __le64 *fragment_index; 62 __le64 *id_table;
63 unsigned int *fragment_index_2; 63 __le64 *fragment_index;
64 struct mutex read_data_mutex; 64 struct mutex read_data_mutex;
65 struct mutex meta_index_mutex; 65 struct mutex meta_index_mutex;
66 struct meta_index *meta_index; 66 struct meta_index *meta_index;
67 z_stream stream; 67 void *stream;
68 __le64 *inode_lookup_table; 68 __le64 *inode_lookup_table;
69 u64 inode_table; 69 u64 inode_table;
70 u64 directory_table; 70 u64 directory_table;
71 unsigned int block_size; 71 unsigned int block_size;
72 unsigned short block_log; 72 unsigned short block_log;
73 long long bytes_used; 73 long long bytes_used;
74 unsigned int inodes; 74 unsigned int inodes;
75}; 75};
76#endif 76#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 6c197ef53add..3550aec2f655 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -35,34 +35,41 @@
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/init.h> 36#include <linux/init.h>
37#include <linux/module.h> 37#include <linux/module.h>
38#include <linux/zlib.h>
39#include <linux/magic.h> 38#include <linux/magic.h>
40 39
41#include "squashfs_fs.h" 40#include "squashfs_fs.h"
42#include "squashfs_fs_sb.h" 41#include "squashfs_fs_sb.h"
43#include "squashfs_fs_i.h" 42#include "squashfs_fs_i.h"
44#include "squashfs.h" 43#include "squashfs.h"
44#include "decompressor.h"
45 45
46static struct file_system_type squashfs_fs_type; 46static struct file_system_type squashfs_fs_type;
47static const struct super_operations squashfs_super_ops; 47static const struct super_operations squashfs_super_ops;
48 48
49static int supported_squashfs_filesystem(short major, short minor, short comp) 49static const struct squashfs_decompressor *supported_squashfs_filesystem(short
50 major, short minor, short id)
50{ 51{
52 const struct squashfs_decompressor *decompressor;
53
51 if (major < SQUASHFS_MAJOR) { 54 if (major < SQUASHFS_MAJOR) {
52 ERROR("Major/Minor mismatch, older Squashfs %d.%d " 55 ERROR("Major/Minor mismatch, older Squashfs %d.%d "
53 "filesystems are unsupported\n", major, minor); 56 "filesystems are unsupported\n", major, minor);
54 return -EINVAL; 57 return NULL;
55 } else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) { 58 } else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) {
56 ERROR("Major/Minor mismatch, trying to mount newer " 59 ERROR("Major/Minor mismatch, trying to mount newer "
57 "%d.%d filesystem\n", major, minor); 60 "%d.%d filesystem\n", major, minor);
58 ERROR("Please update your kernel\n"); 61 ERROR("Please update your kernel\n");
59 return -EINVAL; 62 return NULL;
60 } 63 }
61 64
62 if (comp != ZLIB_COMPRESSION) 65 decompressor = squashfs_lookup_decompressor(id);
63 return -EINVAL; 66 if (!decompressor->supported) {
67 ERROR("Filesystem uses \"%s\" compression. This is not "
68 "supported\n", decompressor->name);
69 return NULL;
70 }
64 71
65 return 0; 72 return decompressor;
66} 73}
67 74
68 75
@@ -87,13 +94,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
87 } 94 }
88 msblk = sb->s_fs_info; 95 msblk = sb->s_fs_info;
89 96
90 msblk->stream.workspace = kmalloc(zlib_inflate_workspacesize(),
91 GFP_KERNEL);
92 if (msblk->stream.workspace == NULL) {
93 ERROR("Failed to allocate zlib workspace\n");
94 goto failure;
95 }
96
97 sblk = kzalloc(sizeof(*sblk), GFP_KERNEL); 97 sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
98 if (sblk == NULL) { 98 if (sblk == NULL) {
99 ERROR("Failed to allocate squashfs_super_block\n"); 99 ERROR("Failed to allocate squashfs_super_block\n");
@@ -120,25 +120,25 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
120 goto failed_mount; 120 goto failed_mount;
121 } 121 }
122 122
123 err = -EINVAL;
124
123 /* Check it is a SQUASHFS superblock */ 125 /* Check it is a SQUASHFS superblock */
124 sb->s_magic = le32_to_cpu(sblk->s_magic); 126 sb->s_magic = le32_to_cpu(sblk->s_magic);
125 if (sb->s_magic != SQUASHFS_MAGIC) { 127 if (sb->s_magic != SQUASHFS_MAGIC) {
126 if (!silent) 128 if (!silent)
127 ERROR("Can't find a SQUASHFS superblock on %s\n", 129 ERROR("Can't find a SQUASHFS superblock on %s\n",
128 bdevname(sb->s_bdev, b)); 130 bdevname(sb->s_bdev, b));
129 err = -EINVAL;
130 goto failed_mount; 131 goto failed_mount;
131 } 132 }
132 133
133 /* Check the MAJOR & MINOR versions and compression type */ 134 /* Check the MAJOR & MINOR versions and lookup compression type */
134 err = supported_squashfs_filesystem(le16_to_cpu(sblk->s_major), 135 msblk->decompressor = supported_squashfs_filesystem(
136 le16_to_cpu(sblk->s_major),
135 le16_to_cpu(sblk->s_minor), 137 le16_to_cpu(sblk->s_minor),
136 le16_to_cpu(sblk->compression)); 138 le16_to_cpu(sblk->compression));
137 if (err < 0) 139 if (msblk->decompressor == NULL)
138 goto failed_mount; 140 goto failed_mount;
139 141
140 err = -EINVAL;
141
142 /* 142 /*
143 * Check if there's xattrs in the filesystem. These are not 143 * Check if there's xattrs in the filesystem. These are not
144 * supported in this version, so warn that they will be ignored. 144 * supported in this version, so warn that they will be ignored.
@@ -205,6 +205,10 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
205 205
206 err = -ENOMEM; 206 err = -ENOMEM;
207 207
208 msblk->stream = squashfs_decompressor_init(msblk);
209 if (msblk->stream == NULL)
210 goto failed_mount;
211
208 msblk->block_cache = squashfs_cache_init("metadata", 212 msblk->block_cache = squashfs_cache_init("metadata",
209 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); 213 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE);
210 if (msblk->block_cache == NULL) 214 if (msblk->block_cache == NULL)
@@ -292,17 +296,16 @@ failed_mount:
292 squashfs_cache_delete(msblk->block_cache); 296 squashfs_cache_delete(msblk->block_cache);
293 squashfs_cache_delete(msblk->fragment_cache); 297 squashfs_cache_delete(msblk->fragment_cache);
294 squashfs_cache_delete(msblk->read_page); 298 squashfs_cache_delete(msblk->read_page);
299 squashfs_decompressor_free(msblk, msblk->stream);
295 kfree(msblk->inode_lookup_table); 300 kfree(msblk->inode_lookup_table);
296 kfree(msblk->fragment_index); 301 kfree(msblk->fragment_index);
297 kfree(msblk->id_table); 302 kfree(msblk->id_table);
298 kfree(msblk->stream.workspace);
299 kfree(sb->s_fs_info); 303 kfree(sb->s_fs_info);
300 sb->s_fs_info = NULL; 304 sb->s_fs_info = NULL;
301 kfree(sblk); 305 kfree(sblk);
302 return err; 306 return err;
303 307
304failure: 308failure:
305 kfree(msblk->stream.workspace);
306 kfree(sb->s_fs_info); 309 kfree(sb->s_fs_info);
307 sb->s_fs_info = NULL; 310 sb->s_fs_info = NULL;
308 return -ENOMEM; 311 return -ENOMEM;
@@ -346,10 +349,10 @@ static void squashfs_put_super(struct super_block *sb)
346 squashfs_cache_delete(sbi->block_cache); 349 squashfs_cache_delete(sbi->block_cache);
347 squashfs_cache_delete(sbi->fragment_cache); 350 squashfs_cache_delete(sbi->fragment_cache);
348 squashfs_cache_delete(sbi->read_page); 351 squashfs_cache_delete(sbi->read_page);
352 squashfs_decompressor_free(sbi, sbi->stream);
349 kfree(sbi->id_table); 353 kfree(sbi->id_table);
350 kfree(sbi->fragment_index); 354 kfree(sbi->fragment_index);
351 kfree(sbi->meta_index); 355 kfree(sbi->meta_index);
352 kfree(sbi->stream.workspace);
353 kfree(sb->s_fs_info); 356 kfree(sb->s_fs_info);
354 sb->s_fs_info = NULL; 357 sb->s_fs_info = NULL;
355 } 358 }
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 83d87880aac8..e80be2022a7f 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -36,7 +36,6 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/pagemap.h> 38#include <linux/pagemap.h>
39#include <linux/zlib.h>
40 39
41#include "squashfs_fs.h" 40#include "squashfs_fs.h"
42#include "squashfs_fs_sb.h" 41#include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
new file mode 100644
index 000000000000..4dd70e04333b
--- /dev/null
+++ b/fs/squashfs/zlib_wrapper.c
@@ -0,0 +1,150 @@
1/*
2 * Squashfs - a compressed read only filesystem for Linux
3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 * Phillip Lougher <phillip@lougher.demon.co.uk>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2,
10 * or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 *
21 * zlib_wrapper.c
22 */
23
24
25#include <linux/mutex.h>
26#include <linux/buffer_head.h>
27#include <linux/zlib.h>
28
29#include "squashfs_fs.h"
30#include "squashfs_fs_sb.h"
31#include "squashfs_fs_i.h"
32#include "squashfs.h"
33#include "decompressor.h"
34
35static void *zlib_init(struct squashfs_sb_info *dummy)
36{
37 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
38 if (stream == NULL)
39 goto failed;
40 stream->workspace = kmalloc(zlib_inflate_workspacesize(),
41 GFP_KERNEL);
42 if (stream->workspace == NULL)
43 goto failed;
44
45 return stream;
46
47failed:
48 ERROR("Failed to allocate zlib workspace\n");
49 kfree(stream);
50 return NULL;
51}
52
53
54static void zlib_free(void *strm)
55{
56 z_stream *stream = strm;
57
58 if (stream)
59 kfree(stream->workspace);
60 kfree(stream);
61}
62
63
64static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
65 struct buffer_head **bh, int b, int offset, int length, int srclength,
66 int pages)
67{
68 int zlib_err = 0, zlib_init = 0;
69 int avail, bytes, k = 0, page = 0;
70 z_stream *stream = msblk->stream;
71
72 mutex_lock(&msblk->read_data_mutex);
73
74 stream->avail_out = 0;
75 stream->avail_in = 0;
76
77 bytes = length;
78 do {
79 if (stream->avail_in == 0 && k < b) {
80 avail = min(bytes, msblk->devblksize - offset);
81 bytes -= avail;
82 wait_on_buffer(bh[k]);
83 if (!buffer_uptodate(bh[k]))
84 goto release_mutex;
85
86 if (avail == 0) {
87 offset = 0;
88 put_bh(bh[k++]);
89 continue;
90 }
91
92 stream->next_in = bh[k]->b_data + offset;
93 stream->avail_in = avail;
94 offset = 0;
95 }
96
97 if (stream->avail_out == 0 && page < pages) {
98 stream->next_out = buffer[page++];
99 stream->avail_out = PAGE_CACHE_SIZE;
100 }
101
102 if (!zlib_init) {
103 zlib_err = zlib_inflateInit(stream);
104 if (zlib_err != Z_OK) {
105 ERROR("zlib_inflateInit returned unexpected "
106 "result 0x%x, srclength %d\n",
107 zlib_err, srclength);
108 goto release_mutex;
109 }
110 zlib_init = 1;
111 }
112
113 zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH);
114
115 if (stream->avail_in == 0 && k < b)
116 put_bh(bh[k++]);
117 } while (zlib_err == Z_OK);
118
119 if (zlib_err != Z_STREAM_END) {
120 ERROR("zlib_inflate error, data probably corrupt\n");
121 goto release_mutex;
122 }
123
124 zlib_err = zlib_inflateEnd(stream);
125 if (zlib_err != Z_OK) {
126 ERROR("zlib_inflate error, data probably corrupt\n");
127 goto release_mutex;
128 }
129
130 mutex_unlock(&msblk->read_data_mutex);
131 return stream->total_out;
132
133release_mutex:
134 mutex_unlock(&msblk->read_data_mutex);
135
136 for (; k < b; k++)
137 put_bh(bh[k]);
138
139 return -EIO;
140}
141
142const struct squashfs_decompressor squashfs_zlib_comp_ops = {
143 .init = zlib_init,
144 .free = zlib_free,
145 .decompress = zlib_uncompress,
146 .id = ZLIB_COMPRESSION,
147 .name = "zlib",
148 .supported = 1
149};
150
diff --git a/fs/super.c b/fs/super.c
index aff046b0fe78..f35ac6022109 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -568,7 +568,7 @@ out:
568int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 568int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
569{ 569{
570 int retval; 570 int retval;
571 int remount_rw; 571 int remount_rw, remount_ro;
572 572
573 if (sb->s_frozen != SB_UNFROZEN) 573 if (sb->s_frozen != SB_UNFROZEN)
574 return -EBUSY; 574 return -EBUSY;
@@ -583,9 +583,12 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
583 shrink_dcache_sb(sb); 583 shrink_dcache_sb(sb);
584 sync_filesystem(sb); 584 sync_filesystem(sb);
585 585
586 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
587 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
588
586 /* If we are remounting RDONLY and current sb is read/write, 589 /* If we are remounting RDONLY and current sb is read/write,
587 make sure there are no rw files opened */ 590 make sure there are no rw files opened */
588 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { 591 if (remount_ro) {
589 if (force) 592 if (force)
590 mark_files_ro(sb); 593 mark_files_ro(sb);
591 else if (!fs_may_remount_ro(sb)) 594 else if (!fs_may_remount_ro(sb))
@@ -594,7 +597,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
594 if (retval < 0 && retval != -ENOSYS) 597 if (retval < 0 && retval != -ENOSYS)
595 return -EBUSY; 598 return -EBUSY;
596 } 599 }
597 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
598 600
599 if (sb->s_op->remount_fs) { 601 if (sb->s_op->remount_fs) {
600 retval = sb->s_op->remount_fs(sb, &flags, data); 602 retval = sb->s_op->remount_fs(sb, &flags, data);
@@ -604,6 +606,16 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
604 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 606 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
605 if (remount_rw) 607 if (remount_rw)
606 vfs_dq_quota_on_remount(sb); 608 vfs_dq_quota_on_remount(sb);
609 /*
610 * Some filesystems modify their metadata via some other path than the
611 * bdev buffer cache (eg. use a private mapping, or directories in
612 * pagecache, etc). Also file data modifications go via their own
613 * mappings. So If we try to mount readonly then copy the filesystem
614 * from bdev, we could get stale data, so invalidate it to give a best
615 * effort at coherency.
616 */
617 if (remount_ro && sb->s_bdev)
618 invalidate_bdev(sb->s_bdev);
607 return 0; 619 return 0;
608} 620}
609 621
@@ -925,6 +937,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
925 if (!mnt) 937 if (!mnt)
926 goto out; 938 goto out;
927 939
940 if (flags & MS_KERNMOUNT)
941 mnt->mnt_flags = MNT_INTERNAL;
942
928 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 943 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
929 secdata = alloc_secdata(); 944 secdata = alloc_secdata();
930 if (!secdata) 945 if (!secdata)
diff --git a/fs/sync.c b/fs/sync.c
index 418727a2a239..f557d71cb097 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -34,14 +34,14 @@ static int __sync_filesystem(struct super_block *sb, int wait)
34 if (!sb->s_bdi) 34 if (!sb->s_bdi)
35 return 0; 35 return 0;
36 36
37 /* Avoid doing twice syncing and cache pruning for quota sync */ 37 if (sb->s_qcop && sb->s_qcop->quota_sync)
38 if (!wait) { 38 sb->s_qcop->quota_sync(sb, -1, wait);
39 writeout_quota_sb(sb, -1); 39
40 writeback_inodes_sb(sb); 40 if (wait)
41 } else {
42 sync_quota_sb(sb, -1);
43 sync_inodes_sb(sb); 41 sync_inodes_sb(sb);
44 } 42 else
43 writeback_inodes_sb(sb);
44
45 if (sb->s_op->sync_fs) 45 if (sb->s_op->sync_fs)
46 sb->s_op->sync_fs(sb, wait); 46 sb->s_op->sync_fs(sb, wait);
47 return __sync_blockdev(sb->s_bdev, wait); 47 return __sync_blockdev(sb->s_bdev, wait);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9824743832a7..4573734d723d 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -26,6 +26,7 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/vfs.h> 28#include <linux/vfs.h>
29#include <linux/writeback.h>
29#include <linux/namei.h> 30#include <linux/namei.h>
30#include <asm/byteorder.h> 31#include <asm/byteorder.h>
31#include "sysv.h" 32#include "sysv.h"
@@ -246,7 +247,7 @@ bad_inode:
246 return ERR_PTR(-EIO); 247 return ERR_PTR(-EIO);
247} 248}
248 249
249int sysv_write_inode(struct inode *inode, int wait) 250static int __sysv_write_inode(struct inode *inode, int wait)
250{ 251{
251 struct super_block * sb = inode->i_sb; 252 struct super_block * sb = inode->i_sb;
252 struct sysv_sb_info * sbi = SYSV_SB(sb); 253 struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -296,9 +297,14 @@ int sysv_write_inode(struct inode *inode, int wait)
296 return 0; 297 return 0;
297} 298}
298 299
300int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
301{
302 return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
303}
304
299int sysv_sync_inode(struct inode *inode) 305int sysv_sync_inode(struct inode *inode)
300{ 306{
301 return sysv_write_inode(inode, 1); 307 return __sysv_write_inode(inode, 1);
302} 308}
303 309
304static void sysv_delete_inode(struct inode *inode) 310static void sysv_delete_inode(struct inode *inode)
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 53786eb5cf60..94cb9b4d76c2 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
142 142
143/* inode.c */ 143/* inode.c */
144extern struct inode *sysv_iget(struct super_block *, unsigned int); 144extern struct inode *sysv_iget(struct super_block *, unsigned int);
145extern int sysv_write_inode(struct inode *, int); 145extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
146extern int sysv_sync_inode(struct inode *); 146extern int sysv_sync_inode(struct inode *);
147extern void sysv_set_inode(struct inode *, dev_t); 147extern void sysv_set_inode(struct inode *, dev_t);
148extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); 148extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 552fb0111fff..401e503d44a1 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1120,7 +1120,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1120 if (release) 1120 if (release)
1121 ubifs_release_budget(c, &ino_req); 1121 ubifs_release_budget(c, &ino_req);
1122 if (IS_SYNC(old_inode)) 1122 if (IS_SYNC(old_inode))
1123 err = old_inode->i_sb->s_op->write_inode(old_inode, 1); 1123 err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
1124 return err; 1124 return err;
1125 1125
1126out_cancel: 1126out_cancel:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 16a6444330ec..e26c02ab6cd5 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1011,7 +1011,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
1011 /* Is the page fully inside @i_size? */ 1011 /* Is the page fully inside @i_size? */
1012 if (page->index < end_index) { 1012 if (page->index < end_index) {
1013 if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { 1013 if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
1014 err = inode->i_sb->s_op->write_inode(inode, 1); 1014 err = inode->i_sb->s_op->write_inode(inode, NULL);
1015 if (err) 1015 if (err)
1016 goto out_unlock; 1016 goto out_unlock;
1017 /* 1017 /*
@@ -1039,7 +1039,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
1039 kunmap_atomic(kaddr, KM_USER0); 1039 kunmap_atomic(kaddr, KM_USER0);
1040 1040
1041 if (i_size > synced_i_size) { 1041 if (i_size > synced_i_size) {
1042 err = inode->i_sb->s_op->write_inode(inode, 1); 1042 err = inode->i_sb->s_op->write_inode(inode, NULL);
1043 if (err) 1043 if (err)
1044 goto out_unlock; 1044 goto out_unlock;
1045 } 1045 }
@@ -1242,7 +1242,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
1242 if (release) 1242 if (release)
1243 ubifs_release_budget(c, &req); 1243 ubifs_release_budget(c, &req);
1244 if (IS_SYNC(inode)) 1244 if (IS_SYNC(inode))
1245 err = inode->i_sb->s_op->write_inode(inode, 1); 1245 err = inode->i_sb->s_op->write_inode(inode, NULL);
1246 return err; 1246 return err;
1247 1247
1248out: 1248out:
@@ -1316,7 +1316,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1316 * the inode unless this is a 'datasync()' call. 1316 * the inode unless this is a 'datasync()' call.
1317 */ 1317 */
1318 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { 1318 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
1319 err = inode->i_sb->s_op->write_inode(inode, 1); 1319 err = inode->i_sb->s_op->write_inode(inode, NULL);
1320 if (err) 1320 if (err)
1321 return err; 1321 return err;
1322 } 1322 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 43f9d19a6f33..4d2f2157dd3f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -283,7 +283,7 @@ static void ubifs_destroy_inode(struct inode *inode)
283/* 283/*
284 * Note, Linux write-back code calls this without 'i_mutex'. 284 * Note, Linux write-back code calls this without 'i_mutex'.
285 */ 285 */
286static int ubifs_write_inode(struct inode *inode, int wait) 286static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
287{ 287{
288 int err = 0; 288 int err = 0;
289 struct ubifs_info *c = inode->i_sb->s_fs_info; 289 struct ubifs_info *c = inode->i_sb->s_fs_info;
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 82372e332f08..ccc3ad7242d4 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -208,7 +208,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
208 ((char *)bh->b_data)[(bit + i) >> 3]); 208 ((char *)bh->b_data)[(bit + i) >> 3]);
209 } else { 209 } else {
210 if (inode) 210 if (inode)
211 vfs_dq_free_block(inode, 1); 211 dquot_free_block(inode, 1);
212 udf_add_free_space(sb, sbi->s_partition, 1); 212 udf_add_free_space(sb, sbi->s_partition, 1);
213 } 213 }
214 } 214 }
@@ -260,11 +260,11 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
260 while (bit < (sb->s_blocksize << 3) && block_count > 0) { 260 while (bit < (sb->s_blocksize << 3) && block_count > 0) {
261 if (!udf_test_bit(bit, bh->b_data)) 261 if (!udf_test_bit(bit, bh->b_data))
262 goto out; 262 goto out;
263 else if (vfs_dq_prealloc_block(inode, 1)) 263 else if (dquot_prealloc_block(inode, 1))
264 goto out; 264 goto out;
265 else if (!udf_clear_bit(bit, bh->b_data)) { 265 else if (!udf_clear_bit(bit, bh->b_data)) {
266 udf_debug("bit already cleared for block %d\n", bit); 266 udf_debug("bit already cleared for block %d\n", bit);
267 vfs_dq_free_block(inode, 1); 267 dquot_free_block(inode, 1);
268 goto out; 268 goto out;
269 } 269 }
270 block_count--; 270 block_count--;
@@ -390,10 +390,14 @@ got_block:
390 /* 390 /*
391 * Check quota for allocation of this block. 391 * Check quota for allocation of this block.
392 */ 392 */
393 if (inode && vfs_dq_alloc_block(inode, 1)) { 393 if (inode) {
394 mutex_unlock(&sbi->s_alloc_mutex); 394 int ret = dquot_alloc_block(inode, 1);
395 *err = -EDQUOT; 395
396 return 0; 396 if (ret) {
397 mutex_unlock(&sbi->s_alloc_mutex);
398 *err = ret;
399 return 0;
400 }
397 } 401 }
398 402
399 newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - 403 newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
@@ -449,7 +453,7 @@ static void udf_table_free_blocks(struct super_block *sb,
449 /* We do this up front - There are some error conditions that 453 /* We do this up front - There are some error conditions that
450 could occure, but.. oh well */ 454 could occure, but.. oh well */
451 if (inode) 455 if (inode)
452 vfs_dq_free_block(inode, count); 456 dquot_free_block(inode, count);
453 udf_add_free_space(sb, sbi->s_partition, count); 457 udf_add_free_space(sb, sbi->s_partition, count);
454 458
455 start = bloc->logicalBlockNum + offset; 459 start = bloc->logicalBlockNum + offset;
@@ -547,7 +551,7 @@ static void udf_table_free_blocks(struct super_block *sb,
547 } 551 }
548 552
549 if (epos.offset + (2 * adsize) > sb->s_blocksize) { 553 if (epos.offset + (2 * adsize) > sb->s_blocksize) {
550 char *sptr, *dptr; 554 unsigned char *sptr, *dptr;
551 int loffset; 555 int loffset;
552 556
553 brelse(oepos.bh); 557 brelse(oepos.bh);
@@ -694,7 +698,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
694 epos.offset -= adsize; 698 epos.offset -= adsize;
695 699
696 alloc_count = (elen >> sb->s_blocksize_bits); 700 alloc_count = (elen >> sb->s_blocksize_bits);
697 if (inode && vfs_dq_prealloc_block(inode, 701 if (inode && dquot_prealloc_block(inode,
698 alloc_count > block_count ? block_count : alloc_count)) 702 alloc_count > block_count ? block_count : alloc_count))
699 alloc_count = 0; 703 alloc_count = 0;
700 else if (alloc_count > block_count) { 704 else if (alloc_count > block_count) {
@@ -797,12 +801,13 @@ static int udf_table_new_block(struct super_block *sb,
797 newblock = goal_eloc.logicalBlockNum; 801 newblock = goal_eloc.logicalBlockNum;
798 goal_eloc.logicalBlockNum++; 802 goal_eloc.logicalBlockNum++;
799 goal_elen -= sb->s_blocksize; 803 goal_elen -= sb->s_blocksize;
800 804 if (inode) {
801 if (inode && vfs_dq_alloc_block(inode, 1)) { 805 *err = dquot_alloc_block(inode, 1);
802 brelse(goal_epos.bh); 806 if (*err) {
803 mutex_unlock(&sbi->s_alloc_mutex); 807 brelse(goal_epos.bh);
804 *err = -EDQUOT; 808 mutex_unlock(&sbi->s_alloc_mutex);
805 return 0; 809 return 0;
810 }
806 } 811 }
807 812
808 if (goal_elen) 813 if (goal_elen)
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 61d9a76a3a69..f0f2a436251e 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -45,8 +45,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
45 int block, iblock; 45 int block, iblock;
46 loff_t nf_pos = (filp->f_pos - 1) << 2; 46 loff_t nf_pos = (filp->f_pos - 1) << 2;
47 int flen; 47 int flen;
48 char *fname = NULL; 48 unsigned char *fname = NULL;
49 char *nameptr; 49 unsigned char *nameptr;
50 uint16_t liu; 50 uint16_t liu;
51 uint8_t lfi; 51 uint8_t lfi;
52 loff_t size = udf_ext0_offset(dir) + dir->i_size; 52 loff_t size = udf_ext0_offset(dir) + dir->i_size;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index f311d509b6a3..1eb06774ed90 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,6 +34,7 @@
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/smp_lock.h> 35#include <linux/smp_lock.h>
36#include <linux/pagemap.h> 36#include <linux/pagemap.h>
37#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
38#include <linux/aio.h> 39#include <linux/aio.h>
39 40
@@ -207,7 +208,7 @@ const struct file_operations udf_file_operations = {
207 .read = do_sync_read, 208 .read = do_sync_read,
208 .aio_read = generic_file_aio_read, 209 .aio_read = generic_file_aio_read,
209 .ioctl = udf_ioctl, 210 .ioctl = udf_ioctl,
210 .open = generic_file_open, 211 .open = dquot_file_open,
211 .mmap = generic_file_mmap, 212 .mmap = generic_file_mmap,
212 .write = do_sync_write, 213 .write = do_sync_write,
213 .aio_write = udf_file_aio_write, 214 .aio_write = udf_file_aio_write,
@@ -217,6 +218,29 @@ const struct file_operations udf_file_operations = {
217 .llseek = generic_file_llseek, 218 .llseek = generic_file_llseek,
218}; 219};
219 220
221static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
222{
223 struct inode *inode = dentry->d_inode;
224 int error;
225
226 error = inode_change_ok(inode, iattr);
227 if (error)
228 return error;
229
230 if (iattr->ia_valid & ATTR_SIZE)
231 dquot_initialize(inode);
232
233 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
234 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
235 error = dquot_transfer(inode, iattr);
236 if (error)
237 return error;
238 }
239
240 return inode_setattr(inode, iattr);
241}
242
220const struct inode_operations udf_file_inode_operations = { 243const struct inode_operations udf_file_inode_operations = {
221 .truncate = udf_truncate, 244 .truncate = udf_truncate,
245 .setattr = udf_setattr,
222}; 246};
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index c10fa39f97e2..fb68c9cd0c3e 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -36,8 +36,8 @@ void udf_free_inode(struct inode *inode)
36 * Note: we must free any quota before locking the superblock, 36 * Note: we must free any quota before locking the superblock,
37 * as writing the quota to disk may need the lock as well. 37 * as writing the quota to disk may need the lock as well.
38 */ 38 */
39 vfs_dq_free_inode(inode); 39 dquot_free_inode(inode);
40 vfs_dq_drop(inode); 40 dquot_drop(inode);
41 41
42 clear_inode(inode); 42 clear_inode(inode);
43 43
@@ -61,7 +61,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
61 struct super_block *sb = dir->i_sb; 61 struct super_block *sb = dir->i_sb;
62 struct udf_sb_info *sbi = UDF_SB(sb); 62 struct udf_sb_info *sbi = UDF_SB(sb);
63 struct inode *inode; 63 struct inode *inode;
64 int block; 64 int block, ret;
65 uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; 65 uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
66 struct udf_inode_info *iinfo; 66 struct udf_inode_info *iinfo;
67 struct udf_inode_info *dinfo = UDF_I(dir); 67 struct udf_inode_info *dinfo = UDF_I(dir);
@@ -153,12 +153,14 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
153 insert_inode_hash(inode); 153 insert_inode_hash(inode);
154 mark_inode_dirty(inode); 154 mark_inode_dirty(inode);
155 155
156 if (vfs_dq_alloc_inode(inode)) { 156 dquot_initialize(inode);
157 vfs_dq_drop(inode); 157 ret = dquot_alloc_inode(inode);
158 if (ret) {
159 dquot_drop(inode);
158 inode->i_flags |= S_NOQUOTA; 160 inode->i_flags |= S_NOQUOTA;
159 inode->i_nlink = 0; 161 inode->i_nlink = 0;
160 iput(inode); 162 iput(inode);
161 *err = -EDQUOT; 163 *err = ret;
162 return NULL; 164 return NULL;
163 } 165 }
164 166
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f90231eb2916..b57ab0402d89 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -36,6 +36,7 @@
36#include <linux/pagemap.h> 36#include <linux/pagemap.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/writeback.h> 38#include <linux/writeback.h>
39#include <linux/quotaops.h>
39#include <linux/slab.h> 40#include <linux/slab.h>
40#include <linux/crc-itu-t.h> 41#include <linux/crc-itu-t.h>
41 42
@@ -70,6 +71,9 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
70 71
71void udf_delete_inode(struct inode *inode) 72void udf_delete_inode(struct inode *inode)
72{ 73{
74 if (!is_bad_inode(inode))
75 dquot_initialize(inode);
76
73 truncate_inode_pages(&inode->i_data, 0); 77 truncate_inode_pages(&inode->i_data, 0);
74 78
75 if (is_bad_inode(inode)) 79 if (is_bad_inode(inode))
@@ -108,6 +112,8 @@ void udf_clear_inode(struct inode *inode)
108 (unsigned long long)inode->i_size, 112 (unsigned long long)inode->i_size,
109 (unsigned long long)iinfo->i_lenExtents); 113 (unsigned long long)iinfo->i_lenExtents);
110 } 114 }
115
116 dquot_drop(inode);
111 kfree(iinfo->i_ext.i_data); 117 kfree(iinfo->i_ext.i_data);
112 iinfo->i_ext.i_data = NULL; 118 iinfo->i_ext.i_data = NULL;
113} 119}
@@ -1373,12 +1379,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
1373 return mode; 1379 return mode;
1374} 1380}
1375 1381
1376int udf_write_inode(struct inode *inode, int sync) 1382int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
1377{ 1383{
1378 int ret; 1384 int ret;
1379 1385
1380 lock_kernel(); 1386 lock_kernel();
1381 ret = udf_update_inode(inode, sync); 1387 ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1382 unlock_kernel(); 1388 unlock_kernel();
1383 1389
1384 return ret; 1390 return ret;
@@ -1672,7 +1678,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1672 return -1; 1678 return -1;
1673 1679
1674 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) { 1680 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) {
1675 char *sptr, *dptr; 1681 unsigned char *sptr, *dptr;
1676 struct buffer_head *nbh; 1682 struct buffer_head *nbh;
1677 int err, loffset; 1683 int err, loffset;
1678 struct kernel_lb_addr obloc = epos->block; 1684 struct kernel_lb_addr obloc = epos->block;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index cd2115060fdc..db423ab078b1 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -34,8 +34,8 @@
34#include <linux/crc-itu-t.h> 34#include <linux/crc-itu-t.h>
35#include <linux/exportfs.h> 35#include <linux/exportfs.h>
36 36
37static inline int udf_match(int len1, const char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
38 const char *name2) 38 const unsigned char *name2)
39{ 39{
40 if (len1 != len2) 40 if (len1 != len2)
41 return 0; 41 return 0;
@@ -142,15 +142,15 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
142} 142}
143 143
144static struct fileIdentDesc *udf_find_entry(struct inode *dir, 144static struct fileIdentDesc *udf_find_entry(struct inode *dir,
145 struct qstr *child, 145 const struct qstr *child,
146 struct udf_fileident_bh *fibh, 146 struct udf_fileident_bh *fibh,
147 struct fileIdentDesc *cfi) 147 struct fileIdentDesc *cfi)
148{ 148{
149 struct fileIdentDesc *fi = NULL; 149 struct fileIdentDesc *fi = NULL;
150 loff_t f_pos; 150 loff_t f_pos;
151 int block, flen; 151 int block, flen;
152 char *fname = NULL; 152 unsigned char *fname = NULL;
153 char *nameptr; 153 unsigned char *nameptr;
154 uint8_t lfi; 154 uint8_t lfi;
155 uint16_t liu; 155 uint16_t liu;
156 loff_t size; 156 loff_t size;
@@ -308,7 +308,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
308{ 308{
309 struct super_block *sb = dir->i_sb; 309 struct super_block *sb = dir->i_sb;
310 struct fileIdentDesc *fi = NULL; 310 struct fileIdentDesc *fi = NULL;
311 char *name = NULL; 311 unsigned char *name = NULL;
312 int namelen; 312 int namelen;
313 loff_t f_pos; 313 loff_t f_pos;
314 loff_t size = udf_ext0_offset(dir) + dir->i_size; 314 loff_t size = udf_ext0_offset(dir) + dir->i_size;
@@ -563,6 +563,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
563 int err; 563 int err;
564 struct udf_inode_info *iinfo; 564 struct udf_inode_info *iinfo;
565 565
566 dquot_initialize(dir);
567
566 lock_kernel(); 568 lock_kernel();
567 inode = udf_new_inode(dir, mode, &err); 569 inode = udf_new_inode(dir, mode, &err);
568 if (!inode) { 570 if (!inode) {
@@ -616,6 +618,8 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
616 if (!old_valid_dev(rdev)) 618 if (!old_valid_dev(rdev))
617 return -EINVAL; 619 return -EINVAL;
618 620
621 dquot_initialize(dir);
622
619 lock_kernel(); 623 lock_kernel();
620 err = -EIO; 624 err = -EIO;
621 inode = udf_new_inode(dir, mode, &err); 625 inode = udf_new_inode(dir, mode, &err);
@@ -662,6 +666,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
662 struct udf_inode_info *dinfo = UDF_I(dir); 666 struct udf_inode_info *dinfo = UDF_I(dir);
663 struct udf_inode_info *iinfo; 667 struct udf_inode_info *iinfo;
664 668
669 dquot_initialize(dir);
670
665 lock_kernel(); 671 lock_kernel();
666 err = -EMLINK; 672 err = -EMLINK;
667 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 673 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
@@ -799,6 +805,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
799 struct fileIdentDesc *fi, cfi; 805 struct fileIdentDesc *fi, cfi;
800 struct kernel_lb_addr tloc; 806 struct kernel_lb_addr tloc;
801 807
808 dquot_initialize(dir);
809
802 retval = -ENOENT; 810 retval = -ENOENT;
803 lock_kernel(); 811 lock_kernel();
804 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 812 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -845,6 +853,8 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
845 struct fileIdentDesc cfi; 853 struct fileIdentDesc cfi;
846 struct kernel_lb_addr tloc; 854 struct kernel_lb_addr tloc;
847 855
856 dquot_initialize(dir);
857
848 retval = -ENOENT; 858 retval = -ENOENT;
849 lock_kernel(); 859 lock_kernel();
850 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 860 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -885,20 +895,22 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
885{ 895{
886 struct inode *inode; 896 struct inode *inode;
887 struct pathComponent *pc; 897 struct pathComponent *pc;
888 char *compstart; 898 const char *compstart;
889 struct udf_fileident_bh fibh; 899 struct udf_fileident_bh fibh;
890 struct extent_position epos = {}; 900 struct extent_position epos = {};
891 int eoffset, elen = 0; 901 int eoffset, elen = 0;
892 struct fileIdentDesc *fi; 902 struct fileIdentDesc *fi;
893 struct fileIdentDesc cfi; 903 struct fileIdentDesc cfi;
894 char *ea; 904 uint8_t *ea;
895 int err; 905 int err;
896 int block; 906 int block;
897 char *name = NULL; 907 unsigned char *name = NULL;
898 int namelen; 908 int namelen;
899 struct buffer_head *bh; 909 struct buffer_head *bh;
900 struct udf_inode_info *iinfo; 910 struct udf_inode_info *iinfo;
901 911
912 dquot_initialize(dir);
913
902 lock_kernel(); 914 lock_kernel();
903 inode = udf_new_inode(dir, S_IFLNK, &err); 915 inode = udf_new_inode(dir, S_IFLNK, &err);
904 if (!inode) 916 if (!inode)
@@ -970,7 +982,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
970 982
971 pc = (struct pathComponent *)(ea + elen); 983 pc = (struct pathComponent *)(ea + elen);
972 984
973 compstart = (char *)symname; 985 compstart = symname;
974 986
975 do { 987 do {
976 symname++; 988 symname++;
@@ -1069,6 +1081,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1069 int err; 1081 int err;
1070 struct buffer_head *bh; 1082 struct buffer_head *bh;
1071 1083
1084 dquot_initialize(dir);
1085
1072 lock_kernel(); 1086 lock_kernel();
1073 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1087 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
1074 unlock_kernel(); 1088 unlock_kernel();
@@ -1131,6 +1145,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 struct kernel_lb_addr tloc; 1145 struct kernel_lb_addr tloc;
1132 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1146 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1133 1147
1148 dquot_initialize(old_dir);
1149 dquot_initialize(new_dir);
1150
1134 lock_kernel(); 1151 lock_kernel();
1135 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); 1152 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1136 if (ofi) { 1153 if (ofi) {
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index c3265e1385d4..852e91845688 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -32,12 +32,12 @@
32#include <linux/buffer_head.h> 32#include <linux/buffer_head.h>
33#include "udf_i.h" 33#include "udf_i.h"
34 34
35static void udf_pc_to_char(struct super_block *sb, char *from, int fromlen, 35static void udf_pc_to_char(struct super_block *sb, unsigned char *from,
36 char *to) 36 int fromlen, unsigned char *to)
37{ 37{
38 struct pathComponent *pc; 38 struct pathComponent *pc;
39 int elen = 0; 39 int elen = 0;
40 char *p = to; 40 unsigned char *p = to;
41 41
42 while (elen < fromlen) { 42 while (elen < fromlen) {
43 pc = (struct pathComponent *)(from + elen); 43 pc = (struct pathComponent *)(from + elen);
@@ -75,9 +75,9 @@ static int udf_symlink_filler(struct file *file, struct page *page)
75{ 75{
76 struct inode *inode = page->mapping->host; 76 struct inode *inode = page->mapping->host;
77 struct buffer_head *bh = NULL; 77 struct buffer_head *bh = NULL;
78 char *symlink; 78 unsigned char *symlink;
79 int err = -EIO; 79 int err = -EIO;
80 char *p = kmap(page); 80 unsigned char *p = kmap(page);
81 struct udf_inode_info *iinfo; 81 struct udf_inode_info *iinfo;
82 82
83 lock_kernel(); 83 lock_kernel();
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8d46f4294ee7..4223ac855da9 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -142,7 +142,7 @@ extern void udf_truncate(struct inode *);
142extern void udf_read_inode(struct inode *); 142extern void udf_read_inode(struct inode *);
143extern void udf_delete_inode(struct inode *); 143extern void udf_delete_inode(struct inode *);
144extern void udf_clear_inode(struct inode *); 144extern void udf_clear_inode(struct inode *);
145extern int udf_write_inode(struct inode *, int); 145extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
146extern long udf_block_map(struct inode *, sector_t); 146extern long udf_block_map(struct inode *, sector_t);
147extern int udf_extend_file(struct inode *, struct extent_position *, 147extern int udf_extend_file(struct inode *, struct extent_position *,
148 struct kernel_long_ad *, sector_t); 148 struct kernel_long_ad *, sector_t);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 54c16ec95dff..5cfa4d85ccf2 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -85,7 +85,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
85 "bit already cleared for fragment %u", i); 85 "bit already cleared for fragment %u", i);
86 } 86 }
87 87
88 vfs_dq_free_block(inode, count); 88 dquot_free_block(inode, count);
89 89
90 90
91 fs32_add(sb, &ucg->cg_cs.cs_nffree, count); 91 fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
@@ -195,7 +195,7 @@ do_more:
195 ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); 195 ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
196 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 196 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
197 ufs_clusteracct (sb, ucpi, blkno, 1); 197 ufs_clusteracct (sb, ucpi, blkno, 1);
198 vfs_dq_free_block(inode, uspi->s_fpb); 198 dquot_free_block(inode, uspi->s_fpb);
199 199
200 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); 200 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
201 uspi->cs_total.cs_nbfree++; 201 uspi->cs_total.cs_nbfree++;
@@ -511,6 +511,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
511 struct ufs_cg_private_info * ucpi; 511 struct ufs_cg_private_info * ucpi;
512 struct ufs_cylinder_group * ucg; 512 struct ufs_cylinder_group * ucg;
513 unsigned cgno, fragno, fragoff, count, fragsize, i; 513 unsigned cgno, fragno, fragoff, count, fragsize, i;
514 int ret;
514 515
515 UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n", 516 UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
516 (unsigned long long)fragment, oldcount, newcount); 517 (unsigned long long)fragment, oldcount, newcount);
@@ -556,8 +557,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
556 fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); 557 fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
557 for (i = oldcount; i < newcount; i++) 558 for (i = oldcount; i < newcount; i++)
558 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i); 559 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
559 if (vfs_dq_alloc_block(inode, count)) { 560 ret = dquot_alloc_block(inode, count);
560 *err = -EDQUOT; 561 if (ret) {
562 *err = ret;
561 return 0; 563 return 0;
562 } 564 }
563 565
@@ -596,6 +598,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
596 struct ufs_cylinder_group * ucg; 598 struct ufs_cylinder_group * ucg;
597 unsigned oldcg, i, j, k, allocsize; 599 unsigned oldcg, i, j, k, allocsize;
598 u64 result; 600 u64 result;
601 int ret;
599 602
600 UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n", 603 UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
601 inode->i_ino, cgno, (unsigned long long)goal, count); 604 inode->i_ino, cgno, (unsigned long long)goal, count);
@@ -664,7 +667,7 @@ cg_found:
664 for (i = count; i < uspi->s_fpb; i++) 667 for (i = count; i < uspi->s_fpb; i++)
665 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); 668 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
666 i = uspi->s_fpb - count; 669 i = uspi->s_fpb - count;
667 vfs_dq_free_block(inode, i); 670 dquot_free_block(inode, i);
668 671
669 fs32_add(sb, &ucg->cg_cs.cs_nffree, i); 672 fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
670 uspi->cs_total.cs_nffree += i; 673 uspi->cs_total.cs_nffree += i;
@@ -676,8 +679,9 @@ cg_found:
676 result = ufs_bitmap_search (sb, ucpi, goal, allocsize); 679 result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
677 if (result == INVBLOCK) 680 if (result == INVBLOCK)
678 return 0; 681 return 0;
679 if (vfs_dq_alloc_block(inode, count)) { 682 ret = dquot_alloc_block(inode, count);
680 *err = -EDQUOT; 683 if (ret) {
684 *err = ret;
681 return 0; 685 return 0;
682 } 686 }
683 for (i = 0; i < count; i++) 687 for (i = 0; i < count; i++)
@@ -714,6 +718,7 @@ static u64 ufs_alloccg_block(struct inode *inode,
714 struct ufs_super_block_first * usb1; 718 struct ufs_super_block_first * usb1;
715 struct ufs_cylinder_group * ucg; 719 struct ufs_cylinder_group * ucg;
716 u64 result, blkno; 720 u64 result, blkno;
721 int ret;
717 722
718 UFSD("ENTER, goal %llu\n", (unsigned long long)goal); 723 UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
719 724
@@ -747,8 +752,9 @@ gotit:
747 ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); 752 ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
748 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 753 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
749 ufs_clusteracct (sb, ucpi, blkno, -1); 754 ufs_clusteracct (sb, ucpi, blkno, -1);
750 if (vfs_dq_alloc_block(inode, uspi->s_fpb)) { 755 ret = dquot_alloc_block(inode, uspi->s_fpb);
751 *err = -EDQUOT; 756 if (ret) {
757 *err = ret;
752 return INVBLOCK; 758 return INVBLOCK;
753 } 759 }
754 760
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 22af68f8b682..317a0d444f6b 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -31,7 +31,7 @@
31 * len <= UFS_MAXNAMLEN and de != NULL are guaranteed by caller. 31 * len <= UFS_MAXNAMLEN and de != NULL are guaranteed by caller.
32 */ 32 */
33static inline int ufs_match(struct super_block *sb, int len, 33static inline int ufs_match(struct super_block *sb, int len,
34 const char * const name, struct ufs_dir_entry * de) 34 const unsigned char *name, struct ufs_dir_entry *de)
35{ 35{
36 if (len != ufs_get_de_namlen(sb, de)) 36 if (len != ufs_get_de_namlen(sb, de))
37 return 0; 37 return 0;
@@ -70,7 +70,7 @@ static inline unsigned long ufs_dir_pages(struct inode *inode)
70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; 70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
71} 71}
72 72
73ino_t ufs_inode_by_name(struct inode *dir, struct qstr *qstr) 73ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
74{ 74{
75 ino_t res = 0; 75 ino_t res = 0;
76 struct ufs_dir_entry *de; 76 struct ufs_dir_entry *de;
@@ -249,11 +249,11 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
249 * (as a parameter - res_dir). Page is returned mapped and unlocked. 249 * (as a parameter - res_dir). Page is returned mapped and unlocked.
250 * Entry is guaranteed to be valid. 250 * Entry is guaranteed to be valid.
251 */ 251 */
252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct qstr *qstr, 252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
253 struct page **res_page) 253 struct page **res_page)
254{ 254{
255 struct super_block *sb = dir->i_sb; 255 struct super_block *sb = dir->i_sb;
256 const char *name = qstr->name; 256 const unsigned char *name = qstr->name;
257 int namelen = qstr->len; 257 int namelen = qstr->len;
258 unsigned reclen = UFS_DIR_REC_LEN(namelen); 258 unsigned reclen = UFS_DIR_REC_LEN(namelen);
259 unsigned long start, n; 259 unsigned long start, n;
@@ -313,7 +313,7 @@ found:
313int ufs_add_link(struct dentry *dentry, struct inode *inode) 313int ufs_add_link(struct dentry *dentry, struct inode *inode)
314{ 314{
315 struct inode *dir = dentry->d_parent->d_inode; 315 struct inode *dir = dentry->d_parent->d_inode;
316 const char *name = dentry->d_name.name; 316 const unsigned char *name = dentry->d_name.name;
317 int namelen = dentry->d_name.len; 317 int namelen = dentry->d_name.len;
318 struct super_block *sb = dir->i_sb; 318 struct super_block *sb = dir->i_sb;
319 unsigned reclen = UFS_DIR_REC_LEN(namelen); 319 unsigned reclen = UFS_DIR_REC_LEN(namelen);
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 73655c61240a..a8962cecde5b 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,6 +24,7 @@
24 */ 24 */
25 25
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/quotaops.h>
27 28
28#include "ufs_fs.h" 29#include "ufs_fs.h"
29#include "ufs.h" 30#include "ufs.h"
@@ -40,7 +41,7 @@ const struct file_operations ufs_file_operations = {
40 .write = do_sync_write, 41 .write = do_sync_write,
41 .aio_write = generic_file_aio_write, 42 .aio_write = generic_file_aio_write,
42 .mmap = generic_file_mmap, 43 .mmap = generic_file_mmap,
43 .open = generic_file_open, 44 .open = dquot_file_open,
44 .fsync = simple_fsync, 45 .fsync = simple_fsync,
45 .splice_read = generic_file_splice_read, 46 .splice_read = generic_file_splice_read,
46}; 47};
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 3527c00fef0d..230ecf608026 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -95,8 +95,8 @@ void ufs_free_inode (struct inode * inode)
95 95
96 is_directory = S_ISDIR(inode->i_mode); 96 is_directory = S_ISDIR(inode->i_mode);
97 97
98 vfs_dq_free_inode(inode); 98 dquot_free_inode(inode);
99 vfs_dq_drop(inode); 99 dquot_drop(inode);
100 100
101 clear_inode (inode); 101 clear_inode (inode);
102 102
@@ -355,9 +355,10 @@ cg_found:
355 355
356 unlock_super (sb); 356 unlock_super (sb);
357 357
358 if (vfs_dq_alloc_inode(inode)) { 358 dquot_initialize(inode);
359 vfs_dq_drop(inode); 359 err = dquot_alloc_inode(inode);
360 err = -EDQUOT; 360 if (err) {
361 dquot_drop(inode);
361 goto fail_without_unlock; 362 goto fail_without_unlock;
362 } 363 }
363 364
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cf33379fd46..80b68c3702d1 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -36,6 +36,8 @@
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/smp_lock.h> 37#include <linux/smp_lock.h>
38#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
39#include <linux/writeback.h>
40#include <linux/quotaops.h>
39 41
40#include "ufs_fs.h" 42#include "ufs_fs.h"
41#include "ufs.h" 43#include "ufs.h"
@@ -890,11 +892,11 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
890 return 0; 892 return 0;
891} 893}
892 894
893int ufs_write_inode (struct inode * inode, int wait) 895int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
894{ 896{
895 int ret; 897 int ret;
896 lock_kernel(); 898 lock_kernel();
897 ret = ufs_update_inode (inode, wait); 899 ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
898 unlock_kernel(); 900 unlock_kernel();
899 return ret; 901 return ret;
900} 902}
@@ -908,6 +910,9 @@ void ufs_delete_inode (struct inode * inode)
908{ 910{
909 loff_t old_i_size; 911 loff_t old_i_size;
910 912
913 if (!is_bad_inode(inode))
914 dquot_initialize(inode);
915
911 truncate_inode_pages(&inode->i_data, 0); 916 truncate_inode_pages(&inode->i_data, 0);
912 if (is_bad_inode(inode)) 917 if (is_bad_inode(inode))
913 goto no_delete; 918 goto no_delete;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 4c26d9e8bc94..118556243e7a 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -30,6 +30,7 @@
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/smp_lock.h> 32#include <linux/smp_lock.h>
33#include <linux/quotaops.h>
33 34
34#include "ufs_fs.h" 35#include "ufs_fs.h"
35#include "ufs.h" 36#include "ufs.h"
@@ -84,6 +85,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
84 int err; 85 int err;
85 86
86 UFSD("BEGIN\n"); 87 UFSD("BEGIN\n");
88
89 dquot_initialize(dir);
90
87 inode = ufs_new_inode(dir, mode); 91 inode = ufs_new_inode(dir, mode);
88 err = PTR_ERR(inode); 92 err = PTR_ERR(inode);
89 93
@@ -107,6 +111,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
107 111
108 if (!old_valid_dev(rdev)) 112 if (!old_valid_dev(rdev))
109 return -EINVAL; 113 return -EINVAL;
114
115 dquot_initialize(dir);
116
110 inode = ufs_new_inode(dir, mode); 117 inode = ufs_new_inode(dir, mode);
111 err = PTR_ERR(inode); 118 err = PTR_ERR(inode);
112 if (!IS_ERR(inode)) { 119 if (!IS_ERR(inode)) {
@@ -131,6 +138,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
131 if (l > sb->s_blocksize) 138 if (l > sb->s_blocksize)
132 goto out_notlocked; 139 goto out_notlocked;
133 140
141 dquot_initialize(dir);
142
134 lock_kernel(); 143 lock_kernel();
135 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); 144 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
136 err = PTR_ERR(inode); 145 err = PTR_ERR(inode);
@@ -176,6 +185,8 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
176 return -EMLINK; 185 return -EMLINK;
177 } 186 }
178 187
188 dquot_initialize(dir);
189
179 inode->i_ctime = CURRENT_TIME_SEC; 190 inode->i_ctime = CURRENT_TIME_SEC;
180 inode_inc_link_count(inode); 191 inode_inc_link_count(inode);
181 atomic_inc(&inode->i_count); 192 atomic_inc(&inode->i_count);
@@ -193,6 +204,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
193 if (dir->i_nlink >= UFS_LINK_MAX) 204 if (dir->i_nlink >= UFS_LINK_MAX)
194 goto out; 205 goto out;
195 206
207 dquot_initialize(dir);
208
196 lock_kernel(); 209 lock_kernel();
197 inode_inc_link_count(dir); 210 inode_inc_link_count(dir);
198 211
@@ -237,6 +250,8 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
237 struct page *page; 250 struct page *page;
238 int err = -ENOENT; 251 int err = -ENOENT;
239 252
253 dquot_initialize(dir);
254
240 de = ufs_find_entry(dir, &dentry->d_name, &page); 255 de = ufs_find_entry(dir, &dentry->d_name, &page);
241 if (!de) 256 if (!de)
242 goto out; 257 goto out;
@@ -281,6 +296,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
281 struct ufs_dir_entry *old_de; 296 struct ufs_dir_entry *old_de;
282 int err = -ENOENT; 297 int err = -ENOENT;
283 298
299 dquot_initialize(old_dir);
300 dquot_initialize(new_dir);
301
284 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); 302 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
285 if (!old_de) 303 if (!old_de)
286 goto out; 304 goto out;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 143c20bfb04b..66b63a751615 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1432,6 +1432,11 @@ static void destroy_inodecache(void)
1432 kmem_cache_destroy(ufs_inode_cachep); 1432 kmem_cache_destroy(ufs_inode_cachep);
1433} 1433}
1434 1434
1435static void ufs_clear_inode(struct inode *inode)
1436{
1437 dquot_drop(inode);
1438}
1439
1435#ifdef CONFIG_QUOTA 1440#ifdef CONFIG_QUOTA
1436static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t); 1441static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
1437static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); 1442static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
@@ -1442,6 +1447,7 @@ static const struct super_operations ufs_super_ops = {
1442 .destroy_inode = ufs_destroy_inode, 1447 .destroy_inode = ufs_destroy_inode,
1443 .write_inode = ufs_write_inode, 1448 .write_inode = ufs_write_inode,
1444 .delete_inode = ufs_delete_inode, 1449 .delete_inode = ufs_delete_inode,
1450 .clear_inode = ufs_clear_inode,
1445 .put_super = ufs_put_super, 1451 .put_super = ufs_put_super,
1446 .write_super = ufs_write_super, 1452 .write_super = ufs_write_super,
1447 .sync_fs = ufs_sync_fs, 1453 .sync_fs = ufs_sync_fs,
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 41dd431ce228..d3b6270cb377 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -44,6 +44,7 @@
44#include <linux/buffer_head.h> 44#include <linux/buffer_head.h>
45#include <linux/blkdev.h> 45#include <linux/blkdev.h>
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/quotaops.h>
47 48
48#include "ufs_fs.h" 49#include "ufs_fs.h"
49#include "ufs.h" 50#include "ufs.h"
@@ -517,9 +518,18 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
517 if (error) 518 if (error)
518 return error; 519 return error;
519 520
521 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
522 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
523 error = dquot_transfer(inode, attr);
524 if (error)
525 return error;
526 }
520 if (ia_valid & ATTR_SIZE && 527 if (ia_valid & ATTR_SIZE &&
521 attr->ia_size != i_size_read(inode)) { 528 attr->ia_size != i_size_read(inode)) {
522 loff_t old_i_size = inode->i_size; 529 loff_t old_i_size = inode->i_size;
530
531 dquot_initialize(inode);
532
523 error = vmtruncate(inode, attr->ia_size); 533 error = vmtruncate(inode, attr->ia_size);
524 if (error) 534 if (error)
525 return error; 535 return error;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 0b4c39bc0d9e..43f9f5d5670e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -86,9 +86,9 @@ extern void ufs_put_cylinder (struct super_block *, unsigned);
86/* dir.c */ 86/* dir.c */
87extern const struct inode_operations ufs_dir_inode_operations; 87extern const struct inode_operations ufs_dir_inode_operations;
88extern int ufs_add_link (struct dentry *, struct inode *); 88extern int ufs_add_link (struct dentry *, struct inode *);
89extern ino_t ufs_inode_by_name(struct inode *, struct qstr *); 89extern ino_t ufs_inode_by_name(struct inode *, const struct qstr *);
90extern int ufs_make_empty(struct inode *, struct inode *); 90extern int ufs_make_empty(struct inode *, struct inode *);
91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct qstr *, struct page **); 91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct qstr *, struct page **);
92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *); 92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
93extern int ufs_empty_dir (struct inode *); 93extern int ufs_empty_dir (struct inode *);
94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **); 94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
@@ -106,7 +106,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
106 106
107/* inode.c */ 107/* inode.c */
108extern struct inode *ufs_iget(struct super_block *, unsigned long); 108extern struct inode *ufs_iget(struct super_block *, unsigned long);
109extern int ufs_write_inode (struct inode *, int); 109extern int ufs_write_inode (struct inode *, struct writeback_control *);
110extern int ufs_sync_inode (struct inode *); 110extern int ufs_sync_inode (struct inode *);
111extern void ufs_delete_inode (struct inode *); 111extern void ufs_delete_inode (struct inode *);
112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); 112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d4a0c84d634..1947514ce1ad 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -44,20 +44,6 @@ xfs_quota_type(int type)
44} 44}
45 45
46STATIC int 46STATIC int
47xfs_fs_quota_sync(
48 struct super_block *sb,
49 int type)
50{
51 struct xfs_mount *mp = XFS_M(sb);
52
53 if (sb->s_flags & MS_RDONLY)
54 return -EROFS;
55 if (!XFS_IS_QUOTA_RUNNING(mp))
56 return -ENOSYS;
57 return -xfs_sync_data(mp, 0);
58}
59
60STATIC int
61xfs_fs_get_xstate( 47xfs_fs_get_xstate(
62 struct super_block *sb, 48 struct super_block *sb,
63 struct fs_quota_stat *fqs) 49 struct fs_quota_stat *fqs)
@@ -82,8 +68,6 @@ xfs_fs_set_xstate(
82 return -EROFS; 68 return -EROFS;
83 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) 69 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
84 return -ENOSYS; 70 return -ENOSYS;
85 if (!capable(CAP_SYS_ADMIN))
86 return -EPERM;
87 71
88 if (uflags & XFS_QUOTA_UDQ_ACCT) 72 if (uflags & XFS_QUOTA_UDQ_ACCT)
89 flags |= XFS_UQUOTA_ACCT; 73 flags |= XFS_UQUOTA_ACCT;
@@ -144,14 +128,11 @@ xfs_fs_set_xquota(
144 return -ENOSYS; 128 return -ENOSYS;
145 if (!XFS_IS_QUOTA_ON(mp)) 129 if (!XFS_IS_QUOTA_ON(mp))
146 return -ESRCH; 130 return -ESRCH;
147 if (!capable(CAP_SYS_ADMIN))
148 return -EPERM;
149 131
150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); 132 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
151} 133}
152 134
153const struct quotactl_ops xfs_quotactl_operations = { 135const struct quotactl_ops xfs_quotactl_operations = {
154 .quota_sync = xfs_fs_quota_sync,
155 .get_xstate = xfs_fs_get_xstate, 136 .get_xstate = xfs_fs_get_xstate,
156 .set_xstate = xfs_fs_set_xstate, 137 .set_xstate = xfs_fs_set_xstate,
157 .get_xquota = xfs_fs_get_xquota, 138 .get_xquota = xfs_fs_get_xquota,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 25ea2408118f..71345a370d9f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1063,7 +1063,7 @@ xfs_log_inode(
1063STATIC int 1063STATIC int
1064xfs_fs_write_inode( 1064xfs_fs_write_inode(
1065 struct inode *inode, 1065 struct inode *inode,
1066 int sync) 1066 struct writeback_control *wbc)
1067{ 1067{
1068 struct xfs_inode *ip = XFS_I(inode); 1068 struct xfs_inode *ip = XFS_I(inode);
1069 struct xfs_mount *mp = ip->i_mount; 1069 struct xfs_mount *mp = ip->i_mount;
@@ -1074,11 +1074,7 @@ xfs_fs_write_inode(
1074 if (XFS_FORCED_SHUTDOWN(mp)) 1074 if (XFS_FORCED_SHUTDOWN(mp))
1075 return XFS_ERROR(EIO); 1075 return XFS_ERROR(EIO);
1076 1076
1077 if (sync) { 1077 if (wbc->sync_mode == WB_SYNC_ALL) {
1078 error = xfs_wait_on_pages(ip, 0, -1);
1079 if (error)
1080 goto out;
1081
1082 /* 1078 /*
1083 * Make sure the inode has hit stable storage. By using the 1079 * Make sure the inode has hit stable storage. By using the
1084 * log and the fsync transactions we reduce the IOs we have 1080 * log and the fsync transactions we reduce the IOs we have