aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig3
-rw-r--r--fs/9p/fid.c17
-rw-r--r--fs/9p/v9fs.c34
-rw-r--r--fs/9p/v9fs.h10
-rw-r--r--fs/9p/vfs_dir.c92
-rw-r--r--fs/9p/vfs_file.c5
-rw-r--r--fs/9p/vfs_inode.c9
-rw-r--r--fs/9p/vfs_inode_dotl.c21
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/adfs/Kconfig4
-rw-r--r--fs/affs/Kconfig4
-rw-r--r--fs/afs/Kconfig7
-rw-r--r--fs/afs/afs.h11
-rw-r--r--fs/afs/fsclient.c14
-rw-r--r--fs/afs/inode.c6
-rw-r--r--fs/afs/super.c6
-rw-r--r--fs/aio.c7
-rw-r--r--fs/befs/Kconfig4
-rw-r--r--fs/bfs/Kconfig4
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/extent-tree.c34
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c20
-rw-r--r--fs/btrfs/inode.c137
-rw-r--r--fs/btrfs/ioctl.c134
-rw-r--r--fs/btrfs/ordered-data.c13
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c47
-rw-r--r--fs/btrfs/tree-log.c10
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/ceph/Kconfig4
-rw-r--r--fs/ceph/caps.c17
-rw-r--r--fs/ceph/inode.c23
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/mds_client.h4
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/cifs/Kconfig8
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h8
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifsacl.c47
-rw-r--r--fs/cifs/cifsfs.c14
-rw-r--r--fs/cifs/cifsglob.h22
-rw-r--r--fs/cifs/cifspdu.h1
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c10
-rw-r--r--fs/cifs/connect.c68
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/file.c8
-rw-r--r--fs/cifs/inode.c50
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/coda_fs_i.h2
-rw-r--r--fs/coda/coda_linux.c8
-rw-r--r--fs/coda/inode.c6
-rw-r--r--fs/coda/psdev.c7
-rw-r--r--fs/coda/upcall.c10
-rw-r--r--fs/compat.c52
-rw-r--r--fs/configfs/dir.c5
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/devpts/inode.c18
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c15
-rw-r--r--fs/dlm/user.c8
-rw-r--r--fs/ecryptfs/Kconfig4
-rw-r--r--fs/efs/Kconfig4
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/f2fs/acl.c13
-rw-r--r--fs/f2fs/checkpoint.c66
-rw-r--r--fs/f2fs/data.c17
-rw-r--r--fs/f2fs/debug.c54
-rw-r--r--fs/f2fs/dir.c31
-rw-r--r--fs/f2fs/f2fs.h62
-rw-r--r--fs/f2fs/file.c51
-rw-r--r--fs/f2fs/gc.c158
-rw-r--r--fs/f2fs/gc.h21
-rw-r--r--fs/f2fs/inode.c56
-rw-r--r--fs/f2fs/node.c39
-rw-r--r--fs/f2fs/recovery.c24
-rw-r--r--fs/f2fs/segment.c31
-rw-r--r--fs/f2fs/segment.h23
-rw-r--r--fs/f2fs/super.c177
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--fs/file.c2
-rw-r--r--fs/fuse/Kconfig16
-rw-r--r--fs/fuse/cuse.c46
-rw-r--r--fs/fuse/dev.c133
-rw-r--r--fs/fuse/dir.c259
-rw-r--r--fs/fuse/file.c243
-rw-r--r--fs/fuse/fuse_i.h74
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/gfs2/acl.c2
-rw-r--r--fs/gfs2/aops.c17
-rw-r--r--fs/gfs2/bmap.c32
-rw-r--r--fs/gfs2/dir.c32
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/glock.c116
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/incore.h11
-rw-r--r--fs/gfs2/inode.c40
-rw-r--r--fs/gfs2/lock_dlm.c7
-rw-r--r--fs/gfs2/log.c76
-rw-r--r--fs/gfs2/log.h12
-rw-r--r--fs/gfs2/lops.c83
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/meta_io.c35
-rw-r--r--fs/gfs2/meta_io.h3
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/quota.c142
-rw-r--r--fs/gfs2/quota.h15
-rw-r--r--fs/gfs2/rgrp.c18
-rw-r--r--fs/gfs2/super.c76
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/gfs2/sys.c62
-rw-r--r--fs/gfs2/trans.c124
-rw-r--r--fs/gfs2/trans.h3
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/xattr.c40
-rw-r--r--fs/hfs/Kconfig4
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/jffs2/Kconfig10
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/logfs/Kconfig4
-rw-r--r--fs/namespace.c29
-rw-r--r--fs/ncpfs/inode.c55
-rw-r--r--fs/ncpfs/ioctl.c25
-rw-r--r--fs/ncpfs/ncp_fs_sb.h6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/callback_proc.c61
-rw-r--r--fs/nfs/delegation.c154
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/idmap.c53
-rw-r--r--fs/nfs/inode.c17
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs2xdr.c19
-rw-r--r--fs/nfs/nfs3xdr.c18
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4client.c62
-rw-r--r--fs/nfs/nfs4proc.c133
-rw-r--r--fs/nfs/nfs4state.c33
-rw-r--r--fs/nfs/nfs4xdr.c16
-rw-r--r--fs/nfs/objlayout/objio_osd.c1
-rw-r--r--fs/nfs/pnfs.c150
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/super.c71
-rw-r--r--fs/nfs/unlink.c5
-rw-r--r--fs/nfs_common/nfsacl.c41
-rw-r--r--fs/nfsd/Kconfig4
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/auth.h6
-rw-r--r--fs/nfsd/export.c22
-rw-r--r--fs/nfsd/idmap.h8
-rw-r--r--fs/nfsd/nfs3xdr.c14
-rw-r--r--fs/nfsd/nfs4acl.c63
-rw-r--r--fs/nfsd/nfs4idmap.c38
-rw-r--r--fs/nfsd/nfs4recover.c4
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/nfs4xdr.c54
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nfsd/nfsxdr.c14
-rw-r--r--fs/nfsd/state.h4
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nilfs2/Kconfig3
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/ioctl.c5
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/acl.c31
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c8
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlmglue.c13
-rw-r--r--fs/ocfs2/extent_map.c3
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/inode.c12
-rw-r--r--fs/ocfs2/journal.c10
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c3
-rw-r--r--fs/proc/Makefile3
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/proc_net.c14
-rw-r--r--fs/pstore/inode.c18
-rw-r--r--fs/pstore/platform.c35
-rw-r--r--fs/pstore/ram.c10
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/select.c1
-rw-r--r--fs/sysfs/group.c42
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/symlink.c45
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/timerfd.c85
-rw-r--r--fs/ubifs/debug.c8
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ubifs/lpt_commit.c14
-rw-r--r--fs/ubifs/orphan.c12
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/ubifs.h6
-rw-r--r--fs/ufs/Kconfig2
-rw-r--r--fs/xfs/Kconfig4
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_attr.c9
-rw-r--r--fs/xfs/xfs_bmap.c124
-rw-r--r--fs/xfs/xfs_buf.c22
-rw-r--r--fs/xfs/xfs_buf_item.c130
-rw-r--r--fs/xfs/xfs_buf_item.h14
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dquot.c12
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_inode.c6
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h4
-rw-r--r--fs/xfs/xfs_iomap.c86
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_mount.c14
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c28
-rw-r--r--fs/xfs/xfs_super.c29
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c376
-rw-r--r--fs/xfs/xfs_trans.h18
-rw-r--r--fs/xfs/xfs_trans_ail.c14
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_trans_inode.c41
-rw-r--r--fs/xfs/xfs_types.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c12
255 files changed, 3998 insertions, 2666 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 0a93dc1cb4ac..55abfd62654a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -11,8 +11,7 @@ config 9P_FS
11 11
12if 9P_FS 12if 9P_FS
13config 9P_FSCACHE 13config 9P_FSCACHE
14 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support"
15 depends on EXPERIMENTAL
16 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y 15 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
17 help 16 help
18 Choose Y here to enable persistent, read-only local 17 Choose Y here to enable persistent, read-only local
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index da8eefbe830d..afd4724b2d92 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -74,19 +74,20 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
74 * 74 *
75 */ 75 */
76 76
77static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any) 77static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
78{ 78{
79 struct v9fs_dentry *dent; 79 struct v9fs_dentry *dent;
80 struct p9_fid *fid, *ret; 80 struct p9_fid *fid, *ret;
81 81
82 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", 82 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
83 dentry->d_name.name, dentry, uid, any); 83 dentry->d_name.name, dentry, from_kuid(&init_user_ns, uid),
84 any);
84 dent = (struct v9fs_dentry *) dentry->d_fsdata; 85 dent = (struct v9fs_dentry *) dentry->d_fsdata;
85 ret = NULL; 86 ret = NULL;
86 if (dent) { 87 if (dent) {
87 spin_lock(&dent->lock); 88 spin_lock(&dent->lock);
88 list_for_each_entry(fid, &dent->fidlist, dlist) { 89 list_for_each_entry(fid, &dent->fidlist, dlist) {
89 if (any || fid->uid == uid) { 90 if (any || uid_eq(fid->uid, uid)) {
90 ret = fid; 91 ret = fid;
91 break; 92 break;
92 } 93 }
@@ -126,7 +127,7 @@ err_out:
126} 127}
127 128
128static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, 129static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
129 uid_t uid, int any) 130 kuid_t uid, int any)
130{ 131{
131 struct dentry *ds; 132 struct dentry *ds;
132 char **wnames, *uname; 133 char **wnames, *uname;
@@ -233,7 +234,7 @@ err_out:
233 234
234struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) 235struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
235{ 236{
236 uid_t uid; 237 kuid_t uid;
237 int any, access; 238 int any, access;
238 struct v9fs_session_info *v9ses; 239 struct v9fs_session_info *v9ses;
239 240
@@ -253,7 +254,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
253 break; 254 break;
254 255
255 default: 256 default:
256 uid = ~0; 257 uid = INVALID_UID;
257 any = 0; 258 any = 0;
258 break; 259 break;
259 } 260 }
@@ -272,7 +273,7 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
272 return ret; 273 return ret;
273} 274}
274 275
275static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid) 276static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, kuid_t uid)
276{ 277{
277 struct p9_fid *fid, *ret; 278 struct p9_fid *fid, *ret;
278 279
@@ -289,7 +290,7 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
289 int err; 290 int err;
290 struct p9_fid *fid; 291 struct p9_fid *fid;
291 292
292 fid = v9fs_fid_clone_with_uid(dentry, 0); 293 fid = v9fs_fid_clone_with_uid(dentry, GLOBAL_ROOT_UID);
293 if (IS_ERR(fid)) 294 if (IS_ERR(fid))
294 goto error_out; 295 goto error_out;
295 /* 296 /*
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index d934f04e7736..58e6cbce4156 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -161,7 +161,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
161 ret = r; 161 ret = r;
162 continue; 162 continue;
163 } 163 }
164 v9ses->dfltuid = option; 164 v9ses->dfltuid = make_kuid(current_user_ns(), option);
165 if (!uid_valid(v9ses->dfltuid)) {
166 p9_debug(P9_DEBUG_ERROR,
167 "uid field, but not a uid?\n");
168 ret = -EINVAL;
169 continue;
170 }
165 break; 171 break;
166 case Opt_dfltgid: 172 case Opt_dfltgid:
167 r = match_int(&args[0], &option); 173 r = match_int(&args[0], &option);
@@ -171,7 +177,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
171 ret = r; 177 ret = r;
172 continue; 178 continue;
173 } 179 }
174 v9ses->dfltgid = option; 180 v9ses->dfltgid = make_kgid(current_user_ns(), option);
181 if (!gid_valid(v9ses->dfltgid)) {
182 p9_debug(P9_DEBUG_ERROR,
183 "gid field, but not a gid?\n");
184 ret = -EINVAL;
185 continue;
186 }
175 break; 187 break;
176 case Opt_afid: 188 case Opt_afid:
177 r = match_int(&args[0], &option); 189 r = match_int(&args[0], &option);
@@ -248,8 +260,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
248 else if (strcmp(s, "client") == 0) { 260 else if (strcmp(s, "client") == 0) {
249 v9ses->flags |= V9FS_ACCESS_CLIENT; 261 v9ses->flags |= V9FS_ACCESS_CLIENT;
250 } else { 262 } else {
263 uid_t uid;
251 v9ses->flags |= V9FS_ACCESS_SINGLE; 264 v9ses->flags |= V9FS_ACCESS_SINGLE;
252 v9ses->uid = simple_strtoul(s, &e, 10); 265 uid = simple_strtoul(s, &e, 10);
253 if (*e != '\0') { 266 if (*e != '\0') {
254 ret = -EINVAL; 267 ret = -EINVAL;
255 pr_info("Unknown access argument %s\n", 268 pr_info("Unknown access argument %s\n",
@@ -257,6 +270,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
257 kfree(s); 270 kfree(s);
258 goto free_and_return; 271 goto free_and_return;
259 } 272 }
273 v9ses->uid = make_kuid(current_user_ns(), uid);
274 if (!uid_valid(v9ses->uid)) {
275 ret = -EINVAL;
276 pr_info("Uknown uid %s\n", s);
277 kfree(s);
278 goto free_and_return;
279 }
260 } 280 }
261 281
262 kfree(s); 282 kfree(s);
@@ -319,7 +339,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
319 list_add(&v9ses->slist, &v9fs_sessionlist); 339 list_add(&v9ses->slist, &v9fs_sessionlist);
320 spin_unlock(&v9fs_sessionlist_lock); 340 spin_unlock(&v9fs_sessionlist_lock);
321 341
322 v9ses->uid = ~0; 342 v9ses->uid = INVALID_UID;
323 v9ses->dfltuid = V9FS_DEFUID; 343 v9ses->dfltuid = V9FS_DEFUID;
324 v9ses->dfltgid = V9FS_DEFGID; 344 v9ses->dfltgid = V9FS_DEFGID;
325 345
@@ -364,7 +384,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
364 384
365 v9ses->flags &= ~V9FS_ACCESS_MASK; 385 v9ses->flags &= ~V9FS_ACCESS_MASK;
366 v9ses->flags |= V9FS_ACCESS_ANY; 386 v9ses->flags |= V9FS_ACCESS_ANY;
367 v9ses->uid = ~0; 387 v9ses->uid = INVALID_UID;
368 } 388 }
369 if (!v9fs_proto_dotl(v9ses) || 389 if (!v9fs_proto_dotl(v9ses) ||
370 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) { 390 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
@@ -375,7 +395,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
375 v9ses->flags &= ~V9FS_ACL_MASK; 395 v9ses->flags &= ~V9FS_ACL_MASK;
376 } 396 }
377 397
378 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, 398 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, INVALID_UID,
379 v9ses->aname); 399 v9ses->aname);
380 if (IS_ERR(fid)) { 400 if (IS_ERR(fid)) {
381 retval = PTR_ERR(fid); 401 retval = PTR_ERR(fid);
@@ -387,7 +407,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
387 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_SINGLE) 407 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_SINGLE)
388 fid->uid = v9ses->uid; 408 fid->uid = v9ses->uid;
389 else 409 else
390 fid->uid = ~0; 410 fid->uid = INVALID_UID;
391 411
392#ifdef CONFIG_9P_FSCACHE 412#ifdef CONFIG_9P_FSCACHE
393 /* register the session for caching */ 413 /* register the session for caching */
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 34c59f14a1c9..a8e127c89627 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -109,9 +109,9 @@ struct v9fs_session_info {
109 char *uname; /* user name to mount as */ 109 char *uname; /* user name to mount as */
110 char *aname; /* name of remote hierarchy being mounted */ 110 char *aname; /* name of remote hierarchy being mounted */
111 unsigned int maxdata; /* max data for client interface */ 111 unsigned int maxdata; /* max data for client interface */
112 unsigned int dfltuid; /* default uid/muid for legacy support */ 112 kuid_t dfltuid; /* default uid/muid for legacy support */
113 unsigned int dfltgid; /* default gid for legacy support */ 113 kgid_t dfltgid; /* default gid for legacy support */
114 u32 uid; /* if ACCESS_SINGLE, the uid that has access */ 114 kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */
115 struct p9_client *clnt; /* 9p client */ 115 struct p9_client *clnt; /* 9p client */
116 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
117 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
@@ -165,8 +165,8 @@ extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
165#define V9FS_PORT 564 165#define V9FS_PORT 564
166#define V9FS_DEFUSER "nobody" 166#define V9FS_DEFUSER "nobody"
167#define V9FS_DEFANAME "" 167#define V9FS_DEFANAME ""
168#define V9FS_DEFUID (-2) 168#define V9FS_DEFUID KUIDT_INIT(-2)
169#define V9FS_DEFGID (-2) 169#define V9FS_DEFGID KGIDT_INIT(-2)
170 170
171static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) 171static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
172{ 172{
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ff911e779651..be1e34adc3c6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -52,10 +52,9 @@
52 */ 52 */
53 53
54struct p9_rdir { 54struct p9_rdir {
55 struct mutex mutex;
56 int head; 55 int head;
57 int tail; 56 int tail;
58 uint8_t *buf; 57 uint8_t buf[];
59}; 58};
60 59
61/** 60/**
@@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf)
93 * 92 *
94 */ 93 */
95 94
96static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) 95static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen)
97{ 96{
98 struct p9_rdir *rdir; 97 struct p9_fid *fid = filp->private_data;
99 struct p9_fid *fid; 98 if (!fid->rdir)
100 int err = 0; 99 fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
101 100 return fid->rdir;
102 fid = filp->private_data;
103 if (!fid->rdir) {
104 rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
105
106 if (rdir == NULL) {
107 err = -ENOMEM;
108 goto exit;
109 }
110 spin_lock(&filp->f_dentry->d_lock);
111 if (!fid->rdir) {
112 rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir);
113 mutex_init(&rdir->mutex);
114 rdir->head = rdir->tail = 0;
115 fid->rdir = (void *) rdir;
116 rdir = NULL;
117 }
118 spin_unlock(&filp->f_dentry->d_lock);
119 kfree(rdir);
120 }
121exit:
122 return err;
123} 101}
124 102
125/** 103/**
@@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
145 123
146 buflen = fid->clnt->msize - P9_IOHDRSZ; 124 buflen = fid->clnt->msize - P9_IOHDRSZ;
147 125
148 err = v9fs_alloc_rdir_buf(filp, buflen); 126 rdir = v9fs_alloc_rdir_buf(filp, buflen);
149 if (err) 127 if (!rdir)
150 goto exit; 128 return -ENOMEM;
151 rdir = (struct p9_rdir *) fid->rdir;
152 129
153 err = mutex_lock_interruptible(&rdir->mutex); 130 while (1) {
154 if (err)
155 return err;
156 while (err == 0) {
157 if (rdir->tail == rdir->head) { 131 if (rdir->tail == rdir->head) {
158 err = v9fs_file_readn(filp, rdir->buf, NULL, 132 err = v9fs_file_readn(filp, rdir->buf, NULL,
159 buflen, filp->f_pos); 133 buflen, filp->f_pos);
160 if (err <= 0) 134 if (err <= 0)
161 goto unlock_and_exit; 135 return err;
162 136
163 rdir->head = 0; 137 rdir->head = 0;
164 rdir->tail = err; 138 rdir->tail = err;
@@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
169 rdir->tail - rdir->head, &st); 143 rdir->tail - rdir->head, &st);
170 if (err) { 144 if (err) {
171 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 145 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
172 err = -EIO;
173 p9stat_free(&st); 146 p9stat_free(&st);
174 goto unlock_and_exit; 147 return -EIO;
175 } 148 }
176 reclen = st.size+2; 149 reclen = st.size+2;
177 150
@@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
180 153
181 p9stat_free(&st); 154 p9stat_free(&st);
182 155
183 if (over) { 156 if (over)
184 err = 0; 157 return 0;
185 goto unlock_and_exit; 158
186 }
187 rdir->head += reclen; 159 rdir->head += reclen;
188 filp->f_pos += reclen; 160 filp->f_pos += reclen;
189 } 161 }
190 } 162 }
191
192unlock_and_exit:
193 mutex_unlock(&rdir->mutex);
194exit:
195 return err;
196} 163}
197 164
198/** 165/**
@@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
218 185
219 buflen = fid->clnt->msize - P9_READDIRHDRSZ; 186 buflen = fid->clnt->msize - P9_READDIRHDRSZ;
220 187
221 err = v9fs_alloc_rdir_buf(filp, buflen); 188 rdir = v9fs_alloc_rdir_buf(filp, buflen);
222 if (err) 189 if (!rdir)
223 goto exit; 190 return -ENOMEM;
224 rdir = (struct p9_rdir *) fid->rdir;
225 191
226 err = mutex_lock_interruptible(&rdir->mutex); 192 while (1) {
227 if (err)
228 return err;
229
230 while (err == 0) {
231 if (rdir->tail == rdir->head) { 193 if (rdir->tail == rdir->head) {
232 err = p9_client_readdir(fid, rdir->buf, buflen, 194 err = p9_client_readdir(fid, rdir->buf, buflen,
233 filp->f_pos); 195 filp->f_pos);
234 if (err <= 0) 196 if (err <= 0)
235 goto unlock_and_exit; 197 return err;
236 198
237 rdir->head = 0; 199 rdir->head = 0;
238 rdir->tail = err; 200 rdir->tail = err;
@@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
245 &curdirent); 207 &curdirent);
246 if (err < 0) { 208 if (err < 0) {
247 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 209 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
248 err = -EIO; 210 return -EIO;
249 goto unlock_and_exit;
250 } 211 }
251 212
252 /* d_off in dirent structure tracks the offset into 213 /* d_off in dirent structure tracks the offset into
@@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
261 curdirent.d_type); 222 curdirent.d_type);
262 oldoffset = curdirent.d_off; 223 oldoffset = curdirent.d_off;
263 224
264 if (over) { 225 if (over)
265 err = 0; 226 return 0;
266 goto unlock_and_exit;
267 }
268 227
269 filp->f_pos = curdirent.d_off; 228 filp->f_pos = curdirent.d_off;
270 rdir->head += err; 229 rdir->head += err;
271 } 230 }
272 } 231 }
273
274unlock_and_exit:
275 mutex_unlock(&rdir->mutex);
276exit:
277 return err;
278} 232}
279 233
280 234
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..c921ac92ea4c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file)
80 p9_client_clunk(fid); 80 p9_client_clunk(fid);
81 return err; 81 return err;
82 } 82 }
83 if (file->f_flags & O_TRUNC) {
84 i_size_write(inode, 0);
85 inode->i_blocks = 0;
86 }
87 if ((file->f_flags & O_APPEND) && 83 if ((file->f_flags & O_APPEND) &&
88 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) 84 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
89 generic_file_llseek(file, 0, SEEK_END); 85 generic_file_llseek(file, 0, SEEK_END);
@@ -620,6 +616,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
620 lock_page(page); 616 lock_page(page);
621 if (page->mapping != inode->i_mapping) 617 if (page->mapping != inode->i_mapping)
622 goto out_unlock; 618 goto out_unlock;
619 wait_for_stable_page(page);
623 620
624 return VM_FAULT_LOCKED; 621 return VM_FAULT_LOCKED;
625out_unlock: 622out_unlock:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 890bed538f9b..b5340c829de1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended)
192 break; 192 break;
193 } 193 }
194 194
195 if (uflags & O_TRUNC)
196 ret |= P9_OTRUNC;
197
198 if (extended) { 195 if (extended) {
199 if (uflags & O_EXCL) 196 if (uflags & O_EXCL)
200 ret |= P9_OEXCL; 197 ret |= P9_OEXCL;
@@ -228,9 +225,9 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
228 wstat->uid = NULL; 225 wstat->uid = NULL;
229 wstat->gid = NULL; 226 wstat->gid = NULL;
230 wstat->muid = NULL; 227 wstat->muid = NULL;
231 wstat->n_uid = ~0; 228 wstat->n_uid = INVALID_UID;
232 wstat->n_gid = ~0; 229 wstat->n_gid = INVALID_GID;
233 wstat->n_muid = ~0; 230 wstat->n_muid = INVALID_UID;
234 wstat->extension = NULL; 231 wstat->extension = NULL;
235} 232}
236 233
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 40895546e103..07f409288d1b 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -57,7 +57,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
57 * group of the new file system object. 57 * group of the new file system object.
58 */ 58 */
59 59
60static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) 60static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
61{ 61{
62 BUG_ON(dir_inode == NULL); 62 BUG_ON(dir_inode == NULL);
63 63
@@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags)
186 { O_CREAT, P9_DOTL_CREATE }, 186 { O_CREAT, P9_DOTL_CREATE },
187 { O_EXCL, P9_DOTL_EXCL }, 187 { O_EXCL, P9_DOTL_EXCL },
188 { O_NOCTTY, P9_DOTL_NOCTTY }, 188 { O_NOCTTY, P9_DOTL_NOCTTY },
189 { O_TRUNC, P9_DOTL_TRUNC },
190 { O_APPEND, P9_DOTL_APPEND }, 189 { O_APPEND, P9_DOTL_APPEND },
191 { O_NONBLOCK, P9_DOTL_NONBLOCK }, 190 { O_NONBLOCK, P9_DOTL_NONBLOCK },
192 { O_DSYNC, P9_DOTL_DSYNC }, 191 { O_DSYNC, P9_DOTL_DSYNC },
@@ -246,7 +245,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
246 int *opened) 245 int *opened)
247{ 246{
248 int err = 0; 247 int err = 0;
249 gid_t gid; 248 kgid_t gid;
250 umode_t mode; 249 umode_t mode;
251 char *name = NULL; 250 char *name = NULL;
252 struct p9_qid qid; 251 struct p9_qid qid;
@@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
268 } 267 }
269 268
270 /* Only creates */ 269 /* Only creates */
271 if (!(flags & O_CREAT) || dentry->d_inode) 270 if (!(flags & O_CREAT))
272 return finish_no_open(file, res); 271 return finish_no_open(file, res);
272 else if (dentry->d_inode) {
273 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
274 return -EEXIST;
275 else
276 return finish_no_open(file, res);
277 }
273 278
274 v9ses = v9fs_inode2v9ses(dir); 279 v9ses = v9fs_inode2v9ses(dir);
275 280
@@ -391,7 +396,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
391 int err; 396 int err;
392 struct v9fs_session_info *v9ses; 397 struct v9fs_session_info *v9ses;
393 struct p9_fid *fid = NULL, *dfid = NULL; 398 struct p9_fid *fid = NULL, *dfid = NULL;
394 gid_t gid; 399 kgid_t gid;
395 char *name; 400 char *name;
396 umode_t mode; 401 umode_t mode;
397 struct inode *inode; 402 struct inode *inode;
@@ -692,7 +697,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
692 const char *symname) 697 const char *symname)
693{ 698{
694 int err; 699 int err;
695 gid_t gid; 700 kgid_t gid;
696 char *name; 701 char *name;
697 struct p9_qid qid; 702 struct p9_qid qid;
698 struct inode *inode; 703 struct inode *inode;
@@ -832,7 +837,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
832 dev_t rdev) 837 dev_t rdev)
833{ 838{
834 int err; 839 int err;
835 gid_t gid; 840 kgid_t gid;
836 char *name; 841 char *name;
837 umode_t mode; 842 umode_t mode;
838 struct v9fs_session_info *v9ses; 843 struct v9fs_session_info *v9ses;
diff --git a/fs/Kconfig b/fs/Kconfig
index cfe512fd1caf..780725a463b1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -68,16 +68,6 @@ source "fs/quota/Kconfig"
68source "fs/autofs4/Kconfig" 68source "fs/autofs4/Kconfig"
69source "fs/fuse/Kconfig" 69source "fs/fuse/Kconfig"
70 70
71config CUSE
72 tristate "Character device in Userspace support"
73 depends on FUSE_FS
74 help
75 This FUSE extension allows character devices to be
76 implemented in userspace.
77
78 If you want to develop or use userspace character device
79 based on CUSE, answer Y or M.
80
81config GENERIC_ACL 71config GENERIC_ACL
82 bool 72 bool
83 select FS_POSIX_ACL 73 select FS_POSIX_ACL
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index e55182a74605..c5a7787dd5e9 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,6 +1,6 @@
1config ADFS_FS 1config ADFS_FS
2 tristate "ADFS file system support (EXPERIMENTAL)" 2 tristate "ADFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Acorn Disc Filing System is the standard file system of the 5 The Acorn Disc Filing System is the standard file system of the
6 RiscOS operating system which runs on Acorn's ARM-based Risc PC 6 RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index cfad9afb4762..a04d9e848d05 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -1,6 +1,6 @@
1config AFFS_FS 1config AFFS_FS
2 tristate "Amiga FFS file system support (EXPERIMENTAL)" 2 tristate "Amiga FFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Fast File System (FFS) is the common file system used on hard 5 The Fast File System (FFS) is the common file system used on hard
6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y 6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 8f975f25b486..ebba3b18e5da 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -1,6 +1,6 @@
1config AFS_FS 1config AFS_FS
2 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2 tristate "Andrew File System support (AFS)"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select AF_RXRPC 4 select AF_RXRPC
5 select DNS_RESOLVER 5 select DNS_RESOLVER
6 help 6 help
@@ -22,8 +22,7 @@ config AFS_DEBUG
22 If unsure, say N. 22 If unsure, say N.
23 23
24config AFS_FSCACHE 24config AFS_FSCACHE
25 bool "Provide AFS client caching support (EXPERIMENTAL)" 25 bool "Provide AFS client caching support"
26 depends on EXPERIMENTAL
27 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y 26 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
28 help 27 help
29 Say Y here if you want AFS data to be cached locally on disk through 28 Say Y here if you want AFS data to be cached locally on disk through
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index c548aa346f0d..3c462ff6db63 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -119,8 +119,8 @@ struct afs_file_status {
119 u64 size; /* file size */ 119 u64 size; /* file size */
120 afs_dataversion_t data_version; /* current data version */ 120 afs_dataversion_t data_version; /* current data version */
121 u32 author; /* author ID */ 121 u32 author; /* author ID */
122 u32 owner; /* owner ID */ 122 kuid_t owner; /* owner ID */
123 u32 group; /* group ID */ 123 kgid_t group; /* group ID */
124 afs_access_t caller_access; /* access rights for authenticated caller */ 124 afs_access_t caller_access; /* access rights for authenticated caller */
125 afs_access_t anon_access; /* access rights for unauthenticated caller */ 125 afs_access_t anon_access; /* access rights for unauthenticated caller */
126 umode_t mode; /* UNIX mode */ 126 umode_t mode; /* UNIX mode */
@@ -133,13 +133,6 @@ struct afs_file_status {
133/* 133/*
134 * AFS file status change request 134 * AFS file status change request
135 */ 135 */
136struct afs_store_status {
137 u32 mask; /* which bits of the struct are set */
138 u32 mtime_client; /* last time client changed data */
139 u32 owner; /* owner ID */
140 u32 group; /* group ID */
141 umode_t mode; /* UNIX mode */
142};
143 136
144#define AFS_SET_MTIME 0x01 /* set the mtime */ 137#define AFS_SET_MTIME 0x01 /* set the mtime */
145#define AFS_SET_OWNER 0x02 /* set the owner ID */ 138#define AFS_SET_OWNER 0x02 /* set the owner ID */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index b960ff05ea0b..c2e930ec2888 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -42,6 +42,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
42 umode_t mode; 42 umode_t mode;
43 u64 data_version, size; 43 u64 data_version, size;
44 u32 changed = 0; /* becomes non-zero if ctime-type changes seen */ 44 u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
45 kuid_t owner;
46 kgid_t group;
45 47
46#define EXTRACT(DST) \ 48#define EXTRACT(DST) \
47 do { \ 49 do { \
@@ -56,7 +58,9 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
56 size = ntohl(*bp++); 58 size = ntohl(*bp++);
57 data_version = ntohl(*bp++); 59 data_version = ntohl(*bp++);
58 EXTRACT(status->author); 60 EXTRACT(status->author);
59 EXTRACT(status->owner); 61 owner = make_kuid(&init_user_ns, ntohl(*bp++));
62 changed |= !uid_eq(owner, status->owner);
63 status->owner = owner;
60 EXTRACT(status->caller_access); /* call ticket dependent */ 64 EXTRACT(status->caller_access); /* call ticket dependent */
61 EXTRACT(status->anon_access); 65 EXTRACT(status->anon_access);
62 EXTRACT(status->mode); 66 EXTRACT(status->mode);
@@ -65,7 +69,9 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
65 bp++; /* seg size */ 69 bp++; /* seg size */
66 status->mtime_client = ntohl(*bp++); 70 status->mtime_client = ntohl(*bp++);
67 status->mtime_server = ntohl(*bp++); 71 status->mtime_server = ntohl(*bp++);
68 EXTRACT(status->group); 72 group = make_kgid(&init_user_ns, ntohl(*bp++));
73 changed |= !gid_eq(group, status->group);
74 status->group = group;
69 bp++; /* sync counter */ 75 bp++; /* sync counter */
70 data_version |= (u64) ntohl(*bp++) << 32; 76 data_version |= (u64) ntohl(*bp++) << 32;
71 EXTRACT(status->lock_count); 77 EXTRACT(status->lock_count);
@@ -181,12 +187,12 @@ static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr)
181 187
182 if (attr->ia_valid & ATTR_UID) { 188 if (attr->ia_valid & ATTR_UID) {
183 mask |= AFS_SET_OWNER; 189 mask |= AFS_SET_OWNER;
184 owner = attr->ia_uid; 190 owner = from_kuid(&init_user_ns, attr->ia_uid);
185 } 191 }
186 192
187 if (attr->ia_valid & ATTR_GID) { 193 if (attr->ia_valid & ATTR_GID) {
188 mask |= AFS_SET_GROUP; 194 mask |= AFS_SET_GROUP;
189 group = attr->ia_gid; 195 group = from_kgid(&init_user_ns, attr->ia_gid);
190 } 196 }
191 197
192 if (attr->ia_valid & ATTR_MODE) { 198 if (attr->ia_valid & ATTR_MODE) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 95cffd38239f..789bc253b5f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -69,7 +69,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
69 69
70 set_nlink(inode, vnode->status.nlink); 70 set_nlink(inode, vnode->status.nlink);
71 inode->i_uid = vnode->status.owner; 71 inode->i_uid = vnode->status.owner;
72 inode->i_gid = 0; 72 inode->i_gid = GLOBAL_ROOT_GID;
73 inode->i_size = vnode->status.size; 73 inode->i_size = vnode->status.size;
74 inode->i_ctime.tv_sec = vnode->status.mtime_server; 74 inode->i_ctime.tv_sec = vnode->status.mtime_server;
75 inode->i_ctime.tv_nsec = 0; 75 inode->i_ctime.tv_nsec = 0;
@@ -175,8 +175,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
176 inode->i_op = &afs_autocell_inode_operations; 176 inode->i_op = &afs_autocell_inode_operations;
177 set_nlink(inode, 2); 177 set_nlink(inode, 2);
178 inode->i_uid = 0; 178 inode->i_uid = GLOBAL_ROOT_UID;
179 inode->i_gid = 0; 179 inode->i_gid = GLOBAL_ROOT_GID;
180 inode->i_ctime.tv_sec = get_seconds(); 180 inode->i_ctime.tv_sec = get_seconds();
181 inode->i_ctime.tv_nsec = 0; 181 inode->i_ctime.tv_nsec = 0;
182 inode->i_atime = inode->i_mtime = inode->i_ctime; 182 inode->i_atime = inode->i_mtime = inode->i_ctime;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 43165009428d..7c31ec399575 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -24,6 +24,8 @@
24#include <linux/parser.h> 24#include <linux/parser.h>
25#include <linux/statfs.h> 25#include <linux/statfs.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/nsproxy.h>
28#include <net/net_namespace.h>
27#include "internal.h" 29#include "internal.h"
28 30
29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 31#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
@@ -363,6 +365,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
363 365
364 memset(&params, 0, sizeof(params)); 366 memset(&params, 0, sizeof(params));
365 367
368 ret = -EINVAL;
369 if (current->nsproxy->net_ns != &init_net)
370 goto error;
371
366 /* parse the options and device name */ 372 /* parse the options and device name */
367 if (options) { 373 if (options) {
368 ret = afs_parse_options(&params, options, &dev_name); 374 ret = afs_parse_options(&params, options, &dev_name);
diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..064bfbe37566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -101,7 +101,7 @@ static int aio_setup_ring(struct kioctx *ctx)
101 struct aio_ring *ring; 101 struct aio_ring *ring;
102 struct aio_ring_info *info = &ctx->ring_info; 102 struct aio_ring_info *info = &ctx->ring_info;
103 unsigned nr_events = ctx->max_reqs; 103 unsigned nr_events = ctx->max_reqs;
104 unsigned long size; 104 unsigned long size, populate;
105 int nr_pages; 105 int nr_pages;
106 106
107 /* Compensate for the ring buffer's head/tail overlap entry */ 107 /* Compensate for the ring buffer's head/tail overlap entry */
@@ -129,7 +129,8 @@ static int aio_setup_ring(struct kioctx *ctx)
129 down_write(&ctx->mm->mmap_sem); 129 down_write(&ctx->mm->mmap_sem);
130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
131 PROT_READ|PROT_WRITE, 131 PROT_READ|PROT_WRITE,
132 MAP_ANONYMOUS|MAP_PRIVATE, 0); 132 MAP_ANONYMOUS|MAP_PRIVATE, 0,
133 &populate);
133 if (IS_ERR((void *)info->mmap_base)) { 134 if (IS_ERR((void *)info->mmap_base)) {
134 up_write(&ctx->mm->mmap_sem); 135 up_write(&ctx->mm->mmap_sem);
135 info->mmap_size = 0; 136 info->mmap_size = 0;
@@ -147,6 +148,8 @@ static int aio_setup_ring(struct kioctx *ctx)
147 aio_free_ring(ctx); 148 aio_free_ring(ctx);
148 return -EAGAIN; 149 return -EAGAIN;
149 } 150 }
151 if (populate)
152 mm_populate(info->mmap_base, populate);
150 153
151 ctx->user_id = info->mmap_base; 154 ctx->user_id = info->mmap_base;
152 155
diff --git a/fs/befs/Kconfig b/fs/befs/Kconfig
index 7835d30f211f..edc5cc2aefad 100644
--- a/fs/befs/Kconfig
+++ b/fs/befs/Kconfig
@@ -1,6 +1,6 @@
1config BEFS_FS 1config BEFS_FS
2 tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" 2 tristate "BeOS file system (BeFS) support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 The BeOS File System (BeFS) is the native file system of Be, Inc's 6 The BeOS File System (BeFS) is the native file system of Be, Inc's
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index c2336c62024f..3728a6479c64 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -1,6 +1,6 @@
1config BFS_FS 1config BFS_FS
2 tristate "BFS file system support (EXPERIMENTAL)" 2 tristate "BFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 Boot File System (BFS) is a file system used under SCO UnixWare to 5 Boot File System (BFS) is a file system used under SCO UnixWare to
6 allow the bootloader access to the kernel image and other important 6 allow the bootloader access to the kernel image and other important
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..ff9dbc630efa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/utsname.h> 34#include <linux/utsname.h>
35#include <linux/coredump.h> 35#include <linux/coredump.h>
36#include <linux/sched.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/param.h> 38#include <asm/param.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -1248,7 +1249,7 @@ static int writenote(struct memelfnote *men, struct file *file,
1248#undef DUMP_WRITE 1249#undef DUMP_WRITE
1249 1250
1250static void fill_elf_header(struct elfhdr *elf, int segs, 1251static void fill_elf_header(struct elfhdr *elf, int segs,
1251 u16 machine, u32 flags, u8 osabi) 1252 u16 machine, u32 flags)
1252{ 1253{
1253 memset(elf, 0, sizeof(*elf)); 1254 memset(elf, 0, sizeof(*elf));
1254 1255
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1320 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1321 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1321 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1322 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1322 } else { 1323 } else {
1323 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1324 cputime_t utime, stime;
1324 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1325
1326 task_cputime(p, &utime, &stime);
1327 cputime_to_timeval(utime, &prstatus->pr_utime);
1328 cputime_to_timeval(stime, &prstatus->pr_stime);
1325 } 1329 }
1326 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1330 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1327 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1331 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
@@ -1630,7 +1634,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1630 * Initialize the ELF file header. 1634 * Initialize the ELF file header.
1631 */ 1635 */
1632 fill_elf_header(elf, phdrs, 1636 fill_elf_header(elf, phdrs,
1633 view->e_machine, view->e_flags, view->ei_osabi); 1637 view->e_machine, view->e_flags);
1634 1638
1635 /* 1639 /*
1636 * Allocate a structure for each thread. 1640 * Allocate a structure for each thread.
@@ -1870,7 +1874,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1870 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 1874 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1871 1875
1872 /* Set up header */ 1876 /* Set up header */
1873 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI); 1877 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1874 1878
1875 /* 1879 /*
1876 * Set up the notes in similar form to SVR4 core dumps made 1880 * Set up the notes in similar form to SVR4 core dumps made
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1377 } else { 1377 } else {
1378 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1378 cputime_t utime, stime;
1379 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1379
1380 task_cputime(p, &utime, &stime);
1381 cputime_to_timeval(utime, &prstatus->pr_utime);
1382 cputime_to_timeval(stime, &prstatus->pr_stime);
1380 } 1383 }
1381 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1384 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1382 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1385 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..78333a37f49d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -994,6 +994,7 @@ int revalidate_disk(struct gendisk *disk)
994 994
995 mutex_lock(&bdev->bd_mutex); 995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev); 996 check_disk_size_change(disk, bdev);
997 bdev->bd_invalidated = 0;
997 mutex_unlock(&bdev->bd_mutex); 998 mutex_unlock(&bdev->bd_mutex);
998 bdput(bdev); 999 bdput(bdev);
999 return ret; 1000 return ret;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index d33f01c08b60..ccd25ba7a9ac 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,5 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" 2 tristate "Btrfs filesystem Unstable disk format"
3 depends on EXPERIMENTAL
4 select LIBCRC32C 3 select LIBCRC32C
5 select ZLIB_INFLATE 4 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f6..1e59ed575cc9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3997,7 +3997,7 @@ again:
3997 * We make the other tasks wait for the flush only when we can flush 3997 * We make the other tasks wait for the flush only when we can flush
3998 * all things. 3998 * all things.
3999 */ 3999 */
4000 if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { 4000 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4001 flushing = true; 4001 flushing = true;
4002 space_info->flush = 1; 4002 space_info->flush = 1;
4003 } 4003 }
@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4534 unsigned nr_extents = 0; 4534 unsigned nr_extents = 0;
4535 int extra_reserve = 0; 4535 int extra_reserve = 0;
4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
4537 int ret; 4537 int ret = 0;
4538 bool delalloc_lock = true; 4538 bool delalloc_lock = true;
4539 4539
4540 /* If we are a free space inode we need to not flush since we will be in 4540 /* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4579 csum_bytes = BTRFS_I(inode)->csum_bytes; 4579 csum_bytes = BTRFS_I(inode)->csum_bytes;
4580 spin_unlock(&BTRFS_I(inode)->lock); 4580 spin_unlock(&BTRFS_I(inode)->lock);
4581 4581
4582 if (root->fs_info->quota_enabled) { 4582 if (root->fs_info->quota_enabled)
4583 ret = btrfs_qgroup_reserve(root, num_bytes + 4583 ret = btrfs_qgroup_reserve(root, num_bytes +
4584 nr_extents * root->leafsize); 4584 nr_extents * root->leafsize);
4585 if (ret) {
4586 spin_lock(&BTRFS_I(inode)->lock);
4587 calc_csum_metadata_size(inode, num_bytes, 0);
4588 spin_unlock(&BTRFS_I(inode)->lock);
4589 if (delalloc_lock)
4590 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4591 return ret;
4592 }
4593 }
4594 4585
4595 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4586 /*
4587 * ret != 0 here means the qgroup reservation failed, we go straight to
4588 * the shared error handling then.
4589 */
4590 if (ret == 0)
4591 ret = reserve_metadata_bytes(root, block_rsv,
4592 to_reserve, flush);
4593
4596 if (ret) { 4594 if (ret) {
4597 u64 to_free = 0; 4595 u64 to_free = 0;
4598 unsigned dropped; 4596 unsigned dropped;
@@ -5560,7 +5558,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5560 int empty_cluster = 2 * 1024 * 1024; 5558 int empty_cluster = 2 * 1024 * 1024;
5561 struct btrfs_space_info *space_info; 5559 struct btrfs_space_info *space_info;
5562 int loop = 0; 5560 int loop = 0;
5563 int index = 0; 5561 int index = __get_raid_index(data);
5564 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5562 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
5565 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 5563 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
5566 bool found_uncached_bg = false; 5564 bool found_uncached_bg = false;
@@ -6524,7 +6522,7 @@ reada:
6524} 6522}
6525 6523
6526/* 6524/*
6527 * hepler to process tree block while walking down the tree. 6525 * helper to process tree block while walking down the tree.
6528 * 6526 *
6529 * when wc->stage == UPDATE_BACKREF, this function updates 6527 * when wc->stage == UPDATE_BACKREF, this function updates
6530 * back refs for pointers in the block. 6528 * back refs for pointers in the block.
@@ -6599,7 +6597,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6599} 6597}
6600 6598
6601/* 6599/*
6602 * hepler to process tree block pointer. 6600 * helper to process tree block pointer.
6603 * 6601 *
6604 * when wc->stage == DROP_REFERENCE, this function checks 6602 * when wc->stage == DROP_REFERENCE, this function checks
6605 * reference count of the block pointed to. if the block 6603 * reference count of the block pointed to. if the block
@@ -6737,7 +6735,7 @@ skip:
6737} 6735}
6738 6736
6739/* 6737/*
6740 * hepler to process tree block while walking up the tree. 6738 * helper to process tree block while walking up the tree.
6741 * 6739 *
6742 * when wc->stage == DROP_REFERENCE, this function drops 6740 * when wc->stage == DROP_REFERENCE, this function drops
6743 * reference count on the block. 6741 * reference count on the block.
@@ -6788,11 +6786,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6788 &wc->flags[level]); 6786 &wc->flags[level]);
6789 if (ret < 0) { 6787 if (ret < 0) {
6790 btrfs_tree_unlock_rw(eb, path->locks[level]); 6788 btrfs_tree_unlock_rw(eb, path->locks[level]);
6789 path->locks[level] = 0;
6791 return ret; 6790 return ret;
6792 } 6791 }
6793 BUG_ON(wc->refs[level] == 0); 6792 BUG_ON(wc->refs[level] == 0);
6794 if (wc->refs[level] == 1) { 6793 if (wc->refs[level] == 1) {
6795 btrfs_tree_unlock_rw(eb, path->locks[level]); 6794 btrfs_tree_unlock_rw(eb, path->locks[level]);
6795 path->locks[level] = 0;
6796 return 1; 6796 return 1;
6797 } 6797 }
6798 } 6798 }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f169d6b11d7f..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) 171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
172 return 0; 172 return 0;
173 173
174 if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
175 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
176 return 0;
177
174 if (extent_map_end(prev) == next->start && 178 if (extent_map_end(prev) == next->start &&
175 prev->flags == next->flags && 179 prev->flags == next->flags &&
176 prev->bdev == next->bdev && 180 prev->bdev == next->bdev &&
@@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
255 if (!em) 259 if (!em)
256 goto out; 260 goto out;
257 261
258 list_move(&em->list, &tree->modified_extents); 262 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
263 list_move(&em->list, &tree->modified_extents);
259 em->generation = gen; 264 em->generation = gen;
260 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 265 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
261 em->mod_start = em->start; 266 em->mod_start = em->start;
@@ -280,6 +285,13 @@ out:
280 285
281} 286}
282 287
288void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
289{
290 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
291 if (em->in_tree)
292 try_merge_map(tree, em);
293}
294
283/** 295/**
284 * add_extent_mapping - add new extent map to the extent tree 296 * add_extent_mapping - add new extent map to the extent tree
285 * @tree: tree to insert new map in 297 * @tree: tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 922943ce29e8..c6598c89cff8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
69int __init extent_map_init(void); 69int __init extent_map_init(void);
70void extent_map_exit(void); 70void extent_map_exit(void);
71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); 71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
72void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
72struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 73struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
73 u64 start, u64 len); 74 u64 start, u64 len);
74#endif 75#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index bd38cef42358..94aa53b38721 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
460 if (!contig) 460 if (!contig)
461 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 461 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
462 462
463 if (!contig && (offset >= ordered->file_offset + ordered->len || 463 if (offset >= ordered->file_offset + ordered->len ||
464 offset < ordered->file_offset)) { 464 offset < ordered->file_offset) {
465 unsigned long bytes_left; 465 unsigned long bytes_left;
466 sums->len = this_sum_bytes; 466 sums->len = this_sum_bytes;
467 this_sum_bytes = 0; 467 this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43edb..aeb84469d2c4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
293 struct btrfs_key key; 293 struct btrfs_key key;
294 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
295 int num_defrag; 295 int num_defrag;
296 int index;
297 int ret;
296 298
297 /* get the inode */ 299 /* get the inode */
298 key.objectid = defrag->root; 300 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 301 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1; 302 key.offset = (u64)-1;
303
304 index = srcu_read_lock(&fs_info->subvol_srcu);
305
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key); 306 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) { 307 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 308 ret = PTR_ERR(inode_root);
304 return PTR_ERR(inode_root); 309 goto cleanup;
310 }
311 if (btrfs_root_refs(&inode_root->root_item) == 0) {
312 ret = -ENOENT;
313 goto cleanup;
305 } 314 }
306 315
307 key.objectid = defrag->ino; 316 key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
309 key.offset = 0; 318 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); 319 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) { 320 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 321 ret = PTR_ERR(inode);
313 return PTR_ERR(inode); 322 goto cleanup;
314 } 323 }
324 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 325
316 /* do a chunk of defrag */ 326 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 327 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
346 356
347 iput(inode); 357 iput(inode);
348 return 0; 358 return 0;
359cleanup:
360 srcu_read_unlock(&fs_info->subvol_srcu, index);
361 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
362 return ret;
349} 363}
350 364
351/* 365/*
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1594 if (err < 0 && num_written > 0) 1608 if (err < 0 && num_written > 0)
1595 num_written = err; 1609 num_written = err;
1596 } 1610 }
1597out: 1611
1598 if (sync) 1612 if (sync)
1599 atomic_dec(&BTRFS_I(inode)->sync_writers); 1613 atomic_dec(&BTRFS_I(inode)->sync_writers);
1614out:
1600 sb_end_write(inode->i_sb); 1615 sb_end_write(inode->i_sb);
1601 current->backing_dev_info = NULL; 1616 current->backing_dev_info = NULL;
1602 return num_written ? num_written : err; 1617 return num_written ? num_written : err;
@@ -2241,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2241 if (lockend <= lockstart) 2256 if (lockend <= lockstart)
2242 lockend = lockstart + root->sectorsize; 2257 lockend = lockstart + root->sectorsize;
2243 2258
2259 lockend--;
2244 len = lockend - lockstart + 1; 2260 len = lockend - lockstart + 1;
2245 2261
2246 len = max_t(u64, len, root->sectorsize); 2262 len = max_t(u64, len, root->sectorsize);
@@ -2307,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2307 } 2323 }
2308 } 2324 }
2309 2325
2310 *offset = start; 2326 if (!test_bit(EXTENT_FLAG_PREALLOC,
2311 free_extent_map(em); 2327 &em->flags)) {
2312 break; 2328 *offset = start;
2329 free_extent_map(em);
2330 break;
2331 }
2313 } 2332 }
2314 } 2333 }
2315 2334
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 59ea2e4349c9..0be7a8742a43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1862{ 1862{
1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1864 struct btrfs_free_space *info; 1864 struct btrfs_free_space *info;
1865 int ret = 0; 1865 int ret;
1866 bool re_search = false;
1866 1867
1867 spin_lock(&ctl->tree_lock); 1868 spin_lock(&ctl->tree_lock);
1868 1869
1869again: 1870again:
1871 ret = 0;
1870 if (!bytes) 1872 if (!bytes)
1871 goto out_lock; 1873 goto out_lock;
1872 1874
@@ -1879,17 +1881,17 @@ again:
1879 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1881 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1880 1, 0); 1882 1, 0);
1881 if (!info) { 1883 if (!info) {
1882 /* the tree logging code might be calling us before we 1884 /*
1883 * have fully loaded the free space rbtree for this 1885 * If we found a partial bit of our free space in a
1884 * block group. So it is possible the entry won't 1886 * bitmap but then couldn't find the other part this may
1885 * be in the rbtree yet at all. The caching code 1887 * be a problem, so WARN about it.
1886 * will make sure not to put it in the rbtree if
1887 * the logging code has pinned it.
1888 */ 1888 */
1889 WARN_ON(re_search);
1889 goto out_lock; 1890 goto out_lock;
1890 } 1891 }
1891 } 1892 }
1892 1893
1894 re_search = false;
1893 if (!info->bitmap) { 1895 if (!info->bitmap) {
1894 unlink_free_space(ctl, info); 1896 unlink_free_space(ctl, info);
1895 if (offset == info->offset) { 1897 if (offset == info->offset) {
@@ -1935,8 +1937,10 @@ again:
1935 } 1937 }
1936 1938
1937 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1939 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1938 if (ret == -EAGAIN) 1940 if (ret == -EAGAIN) {
1941 re_search = true;
1939 goto again; 1942 goto again;
1943 }
1940 BUG_ON(ret); /* logic error */ 1944 BUG_ON(ret); /* logic error */
1941out_lock: 1945out_lock:
1942 spin_unlock(&ctl->tree_lock); 1946 spin_unlock(&ctl->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e6..cc93b23ca352 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, 88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
89}; 89};
90 90
91static int btrfs_setsize(struct inode *inode, loff_t newsize); 91static int btrfs_setsize(struct inode *inode, struct iattr *attr);
92static int btrfs_truncate(struct inode *inode); 92static int btrfs_truncate(struct inode *inode);
93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); 93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
94static noinline int cow_file_range(struct inode *inode, 94static noinline int cow_file_range(struct inode *inode,
@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2478 continue; 2478 continue;
2479 } 2479 }
2480 nr_truncate++; 2480 nr_truncate++;
2481
2482 /* 1 for the orphan item deletion. */
2483 trans = btrfs_start_transaction(root, 1);
2484 if (IS_ERR(trans)) {
2485 ret = PTR_ERR(trans);
2486 goto out;
2487 }
2488 ret = btrfs_orphan_add(trans, inode);
2489 btrfs_end_transaction(trans, root);
2490 if (ret)
2491 goto out;
2492
2481 ret = btrfs_truncate(inode); 2493 ret = btrfs_truncate(inode);
2482 } else { 2494 } else {
2483 nr_unlink++; 2495 nr_unlink++;
@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3665 block_end - cur_offset, 0); 3677 block_end - cur_offset, 0);
3666 if (IS_ERR(em)) { 3678 if (IS_ERR(em)) {
3667 err = PTR_ERR(em); 3679 err = PTR_ERR(em);
3680 em = NULL;
3668 break; 3681 break;
3669 } 3682 }
3670 last_byte = min(extent_map_end(em), block_end); 3683 last_byte = min(extent_map_end(em), block_end);
@@ -3748,16 +3761,27 @@ next:
3748 return err; 3761 return err;
3749} 3762}
3750 3763
3751static int btrfs_setsize(struct inode *inode, loff_t newsize) 3764static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3752{ 3765{
3753 struct btrfs_root *root = BTRFS_I(inode)->root; 3766 struct btrfs_root *root = BTRFS_I(inode)->root;
3754 struct btrfs_trans_handle *trans; 3767 struct btrfs_trans_handle *trans;
3755 loff_t oldsize = i_size_read(inode); 3768 loff_t oldsize = i_size_read(inode);
3769 loff_t newsize = attr->ia_size;
3770 int mask = attr->ia_valid;
3756 int ret; 3771 int ret;
3757 3772
3758 if (newsize == oldsize) 3773 if (newsize == oldsize)
3759 return 0; 3774 return 0;
3760 3775
3776 /*
3777 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
3778 * special case where we need to update the times despite not having
3779 * these flags set. For all other operations the VFS set these flags
3780 * explicitly if it wants a timestamp update.
3781 */
3782 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
3783 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
3784
3761 if (newsize > oldsize) { 3785 if (newsize > oldsize) {
3762 truncate_pagecache(inode, oldsize, newsize); 3786 truncate_pagecache(inode, oldsize, newsize);
3763 ret = btrfs_cont_expand(inode, oldsize, newsize); 3787 ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3783 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 3807 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
3784 &BTRFS_I(inode)->runtime_flags); 3808 &BTRFS_I(inode)->runtime_flags);
3785 3809
3810 /*
3811 * 1 for the orphan item we're going to add
3812 * 1 for the orphan item deletion.
3813 */
3814 trans = btrfs_start_transaction(root, 2);
3815 if (IS_ERR(trans))
3816 return PTR_ERR(trans);
3817
3818 /*
3819 * We need to do this in case we fail at _any_ point during the
3820 * actual truncate. Once we do the truncate_setsize we could
3821 * invalidate pages which forces any outstanding ordered io to
3822 * be instantly completed which will give us extents that need
3823 * to be truncated. If we fail to get an orphan inode down we
3824 * could have left over extents that were never meant to live,
3825 * so we need to garuntee from this point on that everything
3826 * will be consistent.
3827 */
3828 ret = btrfs_orphan_add(trans, inode);
3829 btrfs_end_transaction(trans, root);
3830 if (ret)
3831 return ret;
3832
3786 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3787 truncate_setsize(inode, newsize); 3834 truncate_setsize(inode, newsize);
3788 ret = btrfs_truncate(inode); 3835 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode);
3789 } 3838 }
3790 3839
3791 return ret; 3840 return ret;
@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3805 return err; 3854 return err;
3806 3855
3807 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3856 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3808 err = btrfs_setsize(inode, attr->ia_size); 3857 err = btrfs_setsize(inode, attr);
3809 if (err) 3858 if (err)
3810 return err; 3859 return err;
3811 } 3860 }
@@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5572 return em; 5621 return em;
5573 if (em) { 5622 if (em) {
5574 /* 5623 /*
5575 * if our em maps to a hole, there might 5624 * if our em maps to
5576 * actually be delalloc bytes behind it 5625 * - a hole or
5626 * - a pre-alloc extent,
5627 * there might actually be delalloc bytes behind it.
5577 */ 5628 */
5578 if (em->block_start != EXTENT_MAP_HOLE) 5629 if (em->block_start != EXTENT_MAP_HOLE &&
5630 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5579 return em; 5631 return em;
5580 else 5632 else
5581 hole_em = em; 5633 hole_em = em;
@@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5657 */ 5709 */
5658 em->block_start = hole_em->block_start; 5710 em->block_start = hole_em->block_start;
5659 em->block_len = hole_len; 5711 em->block_len = hole_len;
5712 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
5713 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
5660 } else { 5714 } else {
5661 em->start = range_start; 5715 em->start = range_start;
5662 em->len = found; 5716 em->len = found;
@@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
6915 6969
6916 /* 6970 /*
6917 * 1 for the truncate slack space 6971 * 1 for the truncate slack space
6918 * 1 for the orphan item we're going to add
6919 * 1 for the orphan item deletion
6920 * 1 for updating the inode. 6972 * 1 for updating the inode.
6921 */ 6973 */
6922 trans = btrfs_start_transaction(root, 4); 6974 trans = btrfs_start_transaction(root, 2);
6923 if (IS_ERR(trans)) { 6975 if (IS_ERR(trans)) {
6924 err = PTR_ERR(trans); 6976 err = PTR_ERR(trans);
6925 goto out; 6977 goto out;
@@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
6930 min_size); 6982 min_size);
6931 BUG_ON(ret); 6983 BUG_ON(ret);
6932 6984
6933 ret = btrfs_orphan_add(trans, inode);
6934 if (ret) {
6935 btrfs_end_transaction(trans, root);
6936 goto out;
6937 }
6938
6939 /* 6985 /*
6940 * setattr is responsible for setting the ordered_data_close flag, 6986 * setattr is responsible for setting the ordered_data_close flag,
6941 * but that is only tested during the last file release. That 6987 * but that is only tested during the last file release. That
@@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
7004 ret = btrfs_orphan_del(trans, inode); 7050 ret = btrfs_orphan_del(trans, inode);
7005 if (ret) 7051 if (ret)
7006 err = ret; 7052 err = ret;
7007 } else if (ret && inode->i_nlink > 0) {
7008 /*
7009 * Failed to do the truncate, remove us from the in memory
7010 * orphan list.
7011 */
7012 ret = btrfs_orphan_del(NULL, inode);
7013 } 7053 }
7014 7054
7015 if (trans) { 7055 if (trans) {
@@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
7531 */ 7571 */
7532int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) 7572int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7533{ 7573{
7534 struct list_head *head = &root->fs_info->delalloc_inodes;
7535 struct btrfs_inode *binode; 7574 struct btrfs_inode *binode;
7536 struct inode *inode; 7575 struct inode *inode;
7537 struct btrfs_delalloc_work *work, *next; 7576 struct btrfs_delalloc_work *work, *next;
7538 struct list_head works; 7577 struct list_head works;
7578 struct list_head splice;
7539 int ret = 0; 7579 int ret = 0;
7540 7580
7541 if (root->fs_info->sb->s_flags & MS_RDONLY) 7581 if (root->fs_info->sb->s_flags & MS_RDONLY)
7542 return -EROFS; 7582 return -EROFS;
7543 7583
7544 INIT_LIST_HEAD(&works); 7584 INIT_LIST_HEAD(&works);
7545 7585 INIT_LIST_HEAD(&splice);
7586again:
7546 spin_lock(&root->fs_info->delalloc_lock); 7587 spin_lock(&root->fs_info->delalloc_lock);
7547 while (!list_empty(head)) { 7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7548 binode = list_entry(head->next, struct btrfs_inode, 7589 while (!list_empty(&splice)) {
7590 binode = list_entry(splice.next, struct btrfs_inode,
7549 delalloc_inodes); 7591 delalloc_inodes);
7592
7593 list_del_init(&binode->delalloc_inodes);
7594
7550 inode = igrab(&binode->vfs_inode); 7595 inode = igrab(&binode->vfs_inode);
7551 if (!inode) 7596 if (!inode)
7552 list_del_init(&binode->delalloc_inodes); 7597 continue;
7598
7599 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes);
7553 spin_unlock(&root->fs_info->delalloc_lock); 7601 spin_unlock(&root->fs_info->delalloc_lock);
7554 if (inode) { 7602
7555 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 7603 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
7556 if (!work) { 7604 if (unlikely(!work)) {
7557 ret = -ENOMEM; 7605 ret = -ENOMEM;
7558 goto out; 7606 goto out;
7559 }
7560 list_add_tail(&work->list, &works);
7561 btrfs_queue_worker(&root->fs_info->flush_workers,
7562 &work->work);
7563 } 7607 }
7608 list_add_tail(&work->list, &works);
7609 btrfs_queue_worker(&root->fs_info->flush_workers,
7610 &work->work);
7611
7564 cond_resched(); 7612 cond_resched();
7565 spin_lock(&root->fs_info->delalloc_lock); 7613 spin_lock(&root->fs_info->delalloc_lock);
7566 } 7614 }
7567 spin_unlock(&root->fs_info->delalloc_lock); 7615 spin_unlock(&root->fs_info->delalloc_lock);
7568 7616
7617 list_for_each_entry_safe(work, next, &works, list) {
7618 list_del_init(&work->list);
7619 btrfs_wait_and_free_delalloc_work(work);
7620 }
7621
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7569 /* the filemap_flush will queue IO into the worker threads, but 7629 /* the filemap_flush will queue IO into the worker threads, but
7570 * we have to make sure the IO is actually started and that 7630 * we have to make sure the IO is actually started and that
7571 * ordered extents get created before we return 7631 * ordered extents get created before we return
@@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7578 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 7638 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
7579 } 7639 }
7580 atomic_dec(&root->fs_info->async_submit_draining); 7640 atomic_dec(&root->fs_info->async_submit_draining);
7641 return 0;
7581out: 7642out:
7582 list_for_each_entry_safe(work, next, &works, list) { 7643 list_for_each_entry_safe(work, next, &works, list) {
7583 list_del_init(&work->list); 7644 list_del_init(&work->list);
7584 btrfs_wait_and_free_delalloc_work(work); 7645 btrfs_wait_and_free_delalloc_work(work);
7585 } 7646 }
7647
7648 if (!list_empty_careful(&splice)) {
7649 spin_lock(&root->fs_info->delalloc_lock);
7650 list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
7651 spin_unlock(&root->fs_info->delalloc_lock);
7652 }
7586 return ret; 7653 return ret;
7587} 7654}
7588 7655
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f05..338f2597bf7f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
515 515
516 BUG_ON(ret); 516 BUG_ON(ret);
517 517
518 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
519fail: 518fail:
520 if (async_transid) { 519 if (async_transid) {
521 *async_transid = trans->transid; 520 *async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
525 } 524 }
526 if (err && !ret) 525 if (err && !ret)
527 ret = err; 526 ret = err;
527
528 if (!ret)
529 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
530
528 return ret; 531 return ret;
529} 532}
530 533
@@ -1339,7 +1342,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1339 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1342 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1340 1)) { 1343 1)) {
1341 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 1344 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
1342 return -EINPROGRESS; 1345 mnt_drop_write_file(file);
1346 return -EINVAL;
1343 } 1347 }
1344 1348
1345 mutex_lock(&root->fs_info->volume_mutex); 1349 mutex_lock(&root->fs_info->volume_mutex);
@@ -1362,6 +1366,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1362 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1366 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1363 (unsigned long long)devid); 1367 (unsigned long long)devid);
1364 } 1368 }
1369
1365 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1370 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1366 if (!device) { 1371 if (!device) {
1367 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1372 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@@ -1369,9 +1374,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1369 ret = -EINVAL; 1374 ret = -EINVAL;
1370 goto out_free; 1375 goto out_free;
1371 } 1376 }
1372 if (device->fs_devices && device->fs_devices->seeding) { 1377
1378 if (!device->writeable) {
1373 printk(KERN_INFO "btrfs: resizer unable to apply on " 1379 printk(KERN_INFO "btrfs: resizer unable to apply on "
1374 "seeding device %llu\n", 1380 "readonly device %llu\n",
1375 (unsigned long long)devid); 1381 (unsigned long long)devid);
1376 ret = -EINVAL; 1382 ret = -EINVAL;
1377 goto out_free; 1383 goto out_free;
@@ -1443,8 +1449,8 @@ out_free:
1443 kfree(vol_args); 1449 kfree(vol_args);
1444out: 1450out:
1445 mutex_unlock(&root->fs_info->volume_mutex); 1451 mutex_unlock(&root->fs_info->volume_mutex);
1446 mnt_drop_write_file(file);
1447 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1452 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
1453 mnt_drop_write_file(file);
1448 return ret; 1454 return ret;
1449} 1455}
1450 1456
@@ -2095,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2095 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2101 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2096 if (err) 2102 if (err)
2097 goto out_dput; 2103 goto out_dput;
2098
2099 /* check if subvolume may be deleted by a non-root user */
2100 err = btrfs_may_delete(dir, dentry, 1);
2101 if (err)
2102 goto out_dput;
2103 } 2104 }
2104 2105
2106 /* check if subvolume may be deleted by a user */
2107 err = btrfs_may_delete(dir, dentry, 1);
2108 if (err)
2109 goto out_dput;
2110
2105 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2111 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2106 err = -EINVAL; 2112 err = -EINVAL;
2107 goto out_dput; 2113 goto out_dput;
@@ -2183,19 +2189,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2183 struct btrfs_ioctl_defrag_range_args *range; 2189 struct btrfs_ioctl_defrag_range_args *range;
2184 int ret; 2190 int ret;
2185 2191
2186 if (btrfs_root_readonly(root)) 2192 ret = mnt_want_write_file(file);
2187 return -EROFS; 2193 if (ret)
2194 return ret;
2188 2195
2189 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2196 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2190 1)) { 2197 1)) {
2191 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2198 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2192 return -EINPROGRESS; 2199 mnt_drop_write_file(file);
2200 return -EINVAL;
2193 } 2201 }
2194 ret = mnt_want_write_file(file); 2202
2195 if (ret) { 2203 if (btrfs_root_readonly(root)) {
2196 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 2204 ret = -EROFS;
2197 0); 2205 goto out;
2198 return ret;
2199 } 2206 }
2200 2207
2201 switch (inode->i_mode & S_IFMT) { 2208 switch (inode->i_mode & S_IFMT) {
@@ -2247,8 +2254,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2247 ret = -EINVAL; 2254 ret = -EINVAL;
2248 } 2255 }
2249out: 2256out:
2250 mnt_drop_write_file(file);
2251 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2257 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2258 mnt_drop_write_file(file);
2252 return ret; 2259 return ret;
2253} 2260}
2254 2261
@@ -2263,7 +2270,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2263 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2270 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2264 1)) { 2271 1)) {
2265 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2272 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2266 return -EINPROGRESS; 2273 return -EINVAL;
2267 } 2274 }
2268 2275
2269 mutex_lock(&root->fs_info->volume_mutex); 2276 mutex_lock(&root->fs_info->volume_mutex);
@@ -2300,7 +2307,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2300 1)) { 2307 1)) {
2301 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2308 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2302 mnt_drop_write_file(file); 2309 mnt_drop_write_file(file);
2303 return -EINPROGRESS; 2310 return -EINVAL;
2304 } 2311 }
2305 2312
2306 mutex_lock(&root->fs_info->volume_mutex); 2313 mutex_lock(&root->fs_info->volume_mutex);
@@ -2316,8 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2316 kfree(vol_args); 2323 kfree(vol_args);
2317out: 2324out:
2318 mutex_unlock(&root->fs_info->volume_mutex); 2325 mutex_unlock(&root->fs_info->volume_mutex);
2319 mnt_drop_write_file(file);
2320 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2326 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2327 mnt_drop_write_file(file);
2321 return ret; 2328 return ret;
2322} 2329}
2323 2330
@@ -3437,8 +3444,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3437 struct btrfs_fs_info *fs_info = root->fs_info; 3444 struct btrfs_fs_info *fs_info = root->fs_info;
3438 struct btrfs_ioctl_balance_args *bargs; 3445 struct btrfs_ioctl_balance_args *bargs;
3439 struct btrfs_balance_control *bctl; 3446 struct btrfs_balance_control *bctl;
3447 bool need_unlock; /* for mut. excl. ops lock */
3440 int ret; 3448 int ret;
3441 int need_to_clear_lock = 0;
3442 3449
3443 if (!capable(CAP_SYS_ADMIN)) 3450 if (!capable(CAP_SYS_ADMIN))
3444 return -EPERM; 3451 return -EPERM;
@@ -3447,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3447 if (ret) 3454 if (ret)
3448 return ret; 3455 return ret;
3449 3456
3450 mutex_lock(&fs_info->volume_mutex); 3457again:
3458 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
3459 mutex_lock(&fs_info->volume_mutex);
3460 mutex_lock(&fs_info->balance_mutex);
3461 need_unlock = true;
3462 goto locked;
3463 }
3464
3465 /*
3466 * mut. excl. ops lock is locked. Three possibilites:
3467 * (1) some other op is running
3468 * (2) balance is running
3469 * (3) balance is paused -- special case (think resume)
3470 */
3451 mutex_lock(&fs_info->balance_mutex); 3471 mutex_lock(&fs_info->balance_mutex);
3472 if (fs_info->balance_ctl) {
3473 /* this is either (2) or (3) */
3474 if (!atomic_read(&fs_info->balance_running)) {
3475 mutex_unlock(&fs_info->balance_mutex);
3476 if (!mutex_trylock(&fs_info->volume_mutex))
3477 goto again;
3478 mutex_lock(&fs_info->balance_mutex);
3479
3480 if (fs_info->balance_ctl &&
3481 !atomic_read(&fs_info->balance_running)) {
3482 /* this is (3) */
3483 need_unlock = false;
3484 goto locked;
3485 }
3486
3487 mutex_unlock(&fs_info->balance_mutex);
3488 mutex_unlock(&fs_info->volume_mutex);
3489 goto again;
3490 } else {
3491 /* this is (2) */
3492 mutex_unlock(&fs_info->balance_mutex);
3493 ret = -EINPROGRESS;
3494 goto out;
3495 }
3496 } else {
3497 /* this is (1) */
3498 mutex_unlock(&fs_info->balance_mutex);
3499 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3500 ret = -EINVAL;
3501 goto out;
3502 }
3503
3504locked:
3505 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
3452 3506
3453 if (arg) { 3507 if (arg) {
3454 bargs = memdup_user(arg, sizeof(*bargs)); 3508 bargs = memdup_user(arg, sizeof(*bargs));
3455 if (IS_ERR(bargs)) { 3509 if (IS_ERR(bargs)) {
3456 ret = PTR_ERR(bargs); 3510 ret = PTR_ERR(bargs);
3457 goto out; 3511 goto out_unlock;
3458 } 3512 }
3459 3513
3460 if (bargs->flags & BTRFS_BALANCE_RESUME) { 3514 if (bargs->flags & BTRFS_BALANCE_RESUME) {
@@ -3474,13 +3528,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3474 bargs = NULL; 3528 bargs = NULL;
3475 } 3529 }
3476 3530
3477 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 3531 if (fs_info->balance_ctl) {
3478 1)) {
3479 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3480 ret = -EINPROGRESS; 3532 ret = -EINPROGRESS;
3481 goto out_bargs; 3533 goto out_bargs;
3482 } 3534 }
3483 need_to_clear_lock = 1;
3484 3535
3485 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 3536 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3486 if (!bctl) { 3537 if (!bctl) {
@@ -3501,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3501 } 3552 }
3502 3553
3503do_balance: 3554do_balance:
3504 ret = btrfs_balance(bctl, bargs);
3505 /* 3555 /*
3506 * bctl is freed in __cancel_balance or in free_fs_info if 3556 * Ownership of bctl and mutually_exclusive_operation_running
3507 * restriper was paused all the way until unmount 3557 * goes to to btrfs_balance. bctl is freed in __cancel_balance,
3558 * or, if restriper was paused all the way until unmount, in
3559 * free_fs_info. mutually_exclusive_operation_running is
3560 * cleared in __cancel_balance.
3508 */ 3561 */
3562 need_unlock = false;
3563
3564 ret = btrfs_balance(bctl, bargs);
3565
3509 if (arg) { 3566 if (arg) {
3510 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3567 if (copy_to_user(arg, bargs, sizeof(*bargs)))
3511 ret = -EFAULT; 3568 ret = -EFAULT;
@@ -3513,12 +3570,12 @@ do_balance:
3513 3570
3514out_bargs: 3571out_bargs:
3515 kfree(bargs); 3572 kfree(bargs);
3516out: 3573out_unlock:
3517 if (need_to_clear_lock)
3518 atomic_set(&root->fs_info->mutually_exclusive_operation_running,
3519 0);
3520 mutex_unlock(&fs_info->balance_mutex); 3574 mutex_unlock(&fs_info->balance_mutex);
3521 mutex_unlock(&fs_info->volume_mutex); 3575 mutex_unlock(&fs_info->volume_mutex);
3576 if (need_unlock)
3577 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3578out:
3522 mnt_drop_write_file(file); 3579 mnt_drop_write_file(file);
3523 return ret; 3580 return ret;
3524} 3581}
@@ -3698,6 +3755,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
3698 goto drop_write; 3755 goto drop_write;
3699 } 3756 }
3700 3757
3758 if (!sa->qgroupid) {
3759 ret = -EINVAL;
3760 goto out;
3761 }
3762
3701 trans = btrfs_join_transaction(root); 3763 trans = btrfs_join_transaction(root);
3702 if (IS_ERR(trans)) { 3764 if (IS_ERR(trans)) {
3703 ret = PTR_ERR(trans); 3765 ret = PTR_ERR(trans);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
836 * if the disk i_size is already at the inode->i_size, or 836 * if the disk i_size is already at the inode->i_size, or
837 * this ordered extent is inside the disk i_size, we're done 837 * this ordered extent is inside the disk i_size, we're done
838 */ 838 */
839 if (disk_i_size == i_size || offset <= disk_i_size) { 839 if (disk_i_size == i_size)
840 goto out;
841
842 /*
843 * We still need to update disk_i_size if outstanding_isize is greater
844 * than disk_i_size.
845 */
846 if (offset <= disk_i_size &&
847 (!ordered || ordered->outstanding_isize <= disk_i_size))
840 goto out; 848 goto out;
841 }
842 849
843 /* 850 /*
844 * walk backward from this ordered extent to disk_i_size. 851 * walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
870 break; 877 break;
871 if (test->file_offset >= i_size) 878 if (test->file_offset >= i_size)
872 break; 879 break;
873 if (test->file_offset >= disk_i_size) { 880 if (entry_end(test) > disk_i_size) {
874 /* 881 /*
875 * we don't update disk_i_size now, so record this 882 * we don't update disk_i_size now, so record this
876 * undealt i_size. Or we will not know the real 883 * undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe9d02c45f8e..a5c856234323 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -379,6 +379,13 @@ next1:
379 379
380 ret = add_relation_rb(fs_info, found_key.objectid, 380 ret = add_relation_rb(fs_info, found_key.objectid,
381 found_key.offset); 381 found_key.offset);
382 if (ret == -ENOENT) {
383 printk(KERN_WARNING
384 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
385 (unsigned long long)found_key.objectid,
386 (unsigned long long)found_key.offset);
387 ret = 0; /* ignore the error */
388 }
382 if (ret) 389 if (ret)
383 goto out; 390 goto out;
384next2: 391next2:
@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
956 struct btrfs_fs_info *fs_info, u64 qgroupid) 963 struct btrfs_fs_info *fs_info, u64 qgroupid)
957{ 964{
958 struct btrfs_root *quota_root; 965 struct btrfs_root *quota_root;
966 struct btrfs_qgroup *qgroup;
959 int ret = 0; 967 int ret = 0;
960 968
961 quota_root = fs_info->quota_root; 969 quota_root = fs_info->quota_root;
962 if (!quota_root) 970 if (!quota_root)
963 return -EINVAL; 971 return -EINVAL;
964 972
973 /* check if there are no relations to this qgroup */
974 spin_lock(&fs_info->qgroup_lock);
975 qgroup = find_qgroup_rb(fs_info, qgroupid);
976 if (qgroup) {
977 if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
978 spin_unlock(&fs_info->qgroup_lock);
979 return -EBUSY;
980 }
981 }
982 spin_unlock(&fs_info->qgroup_lock);
983
965 ret = del_qgroup_item(trans, quota_root, qgroupid); 984 ret = del_qgroup_item(trans, quota_root, qgroupid);
966 985
967 spin_lock(&fs_info->qgroup_lock); 986 spin_lock(&fs_info->qgroup_lock);
968 del_qgroup_rb(quota_root->fs_info, qgroupid); 987 del_qgroup_rb(quota_root->fs_info, qgroupid);
969
970 spin_unlock(&fs_info->qgroup_lock); 988 spin_unlock(&fs_info->qgroup_lock);
971 989
972 return ret; 990 return ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 300e09ac3659..17c306bf177a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3472,7 +3472,7 @@ out:
3472} 3472}
3473 3473
3474/* 3474/*
3475 * hepler to find all tree blocks that reference a given data extent 3475 * helper to find all tree blocks that reference a given data extent
3476 */ 3476 */
3477static noinline_for_stack 3477static noinline_for_stack
3478int add_data_references(struct reloc_control *rc, 3478int add_data_references(struct reloc_control *rc,
@@ -3566,7 +3566,7 @@ int add_data_references(struct reloc_control *rc,
3566} 3566}
3567 3567
3568/* 3568/*
3569 * hepler to find next unprocessed extent 3569 * helper to find next unprocessed extent
3570 */ 3570 */
3571static noinline_for_stack 3571static noinline_for_stack
3572int find_next_extent(struct btrfs_trans_handle *trans, 3572int find_next_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
580 int corrected = 0; 580 int corrected = 0;
581 struct btrfs_key key; 581 struct btrfs_key key;
582 struct inode *inode = NULL; 582 struct inode *inode = NULL;
583 struct btrfs_fs_info *fs_info;
583 u64 end = offset + PAGE_SIZE - 1; 584 u64 end = offset + PAGE_SIZE - 1;
584 struct btrfs_root *local_root; 585 struct btrfs_root *local_root;
586 int srcu_index;
585 587
586 key.objectid = root; 588 key.objectid = root;
587 key.type = BTRFS_ROOT_ITEM_KEY; 589 key.type = BTRFS_ROOT_ITEM_KEY;
588 key.offset = (u64)-1; 590 key.offset = (u64)-1;
589 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); 591
590 if (IS_ERR(local_root)) 592 fs_info = fixup->root->fs_info;
593 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
594
595 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
596 if (IS_ERR(local_root)) {
597 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
591 return PTR_ERR(local_root); 598 return PTR_ERR(local_root);
599 }
592 600
593 key.type = BTRFS_INODE_ITEM_KEY; 601 key.type = BTRFS_INODE_ITEM_KEY;
594 key.objectid = inum; 602 key.objectid = inum;
595 key.offset = 0; 603 key.offset = 0;
596 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); 604 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
605 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
597 if (IS_ERR(inode)) 606 if (IS_ERR(inode))
598 return PTR_ERR(inode); 607 return PTR_ERR(inode);
599 608
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
606 } 615 }
607 616
608 if (PageUptodate(page)) { 617 if (PageUptodate(page)) {
609 struct btrfs_fs_info *fs_info;
610 if (PageDirty(page)) { 618 if (PageDirty(page)) {
611 /* 619 /*
612 * we need to write the data to the defect sector. the 620 * we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3180 u64 physical_for_dev_replace; 3188 u64 physical_for_dev_replace;
3181 u64 len; 3189 u64 len;
3182 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3190 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3191 int srcu_index;
3183 3192
3184 key.objectid = root; 3193 key.objectid = root;
3185 key.type = BTRFS_ROOT_ITEM_KEY; 3194 key.type = BTRFS_ROOT_ITEM_KEY;
3186 key.offset = (u64)-1; 3195 key.offset = (u64)-1;
3196
3197 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3198
3187 local_root = btrfs_read_fs_root_no_name(fs_info, &key); 3199 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3188 if (IS_ERR(local_root)) 3200 if (IS_ERR(local_root)) {
3201 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3189 return PTR_ERR(local_root); 3202 return PTR_ERR(local_root);
3203 }
3190 3204
3191 key.type = BTRFS_INODE_ITEM_KEY; 3205 key.type = BTRFS_INODE_ITEM_KEY;
3192 key.objectid = inum; 3206 key.objectid = inum;
3193 key.offset = 0; 3207 key.offset = 0;
3194 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); 3208 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3209 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3195 if (IS_ERR(inode)) 3210 if (IS_ERR(inode))
3196 return PTR_ERR(inode); 3211 return PTR_ERR(inode);
3197 3212
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad40..321b7fb4e441 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
1814 (unsigned long)nce->ino); 1814 (unsigned long)nce->ino);
1815 if (!nce_head) { 1815 if (!nce_head) {
1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1817 if (!nce_head) 1817 if (!nce_head) {
1818 kfree(nce);
1818 return -ENOMEM; 1819 return -ENOMEM;
1820 }
1819 INIT_LIST_HEAD(nce_head); 1821 INIT_LIST_HEAD(nce_head);
1820 1822
1821 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1823 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 99545df1b86c..d8982e9601d3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
267 function, line, errstr); 267 function, line, errstr);
268 return; 268 return;
269 } 269 }
270 trans->transaction->aborted = errno; 270 ACCESS_ONCE(trans->transaction->aborted) = errno;
271 __btrfs_std_error(root->fs_info, function, line, errno, NULL); 271 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
272} 272}
273/* 273/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87fac9a21ea5..4c0067c4f76d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -112,7 +112,6 @@ loop:
112 * to redo the trans_no_join checks above 112 * to redo the trans_no_join checks above
113 */ 113 */
114 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 114 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
115 cur_trans = fs_info->running_transaction;
116 goto loop; 115 goto loop;
117 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 116 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
118 spin_unlock(&fs_info->trans_lock); 117 spin_unlock(&fs_info->trans_lock);
@@ -333,12 +332,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
333 &root->fs_info->trans_block_rsv, 332 &root->fs_info->trans_block_rsv,
334 num_bytes, flush); 333 num_bytes, flush);
335 if (ret) 334 if (ret)
336 return ERR_PTR(ret); 335 goto reserve_fail;
337 } 336 }
338again: 337again:
339 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 338 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
340 if (!h) 339 if (!h) {
341 return ERR_PTR(-ENOMEM); 340 ret = -ENOMEM;
341 goto alloc_fail;
342 }
342 343
343 /* 344 /*
344 * If we are JOIN_NOLOCK we're already committing a transaction and 345 * If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +366,7 @@ again:
365 if (ret < 0) { 366 if (ret < 0) {
366 /* We must get the transaction if we are JOIN_NOLOCK. */ 367 /* We must get the transaction if we are JOIN_NOLOCK. */
367 BUG_ON(type == TRANS_JOIN_NOLOCK); 368 BUG_ON(type == TRANS_JOIN_NOLOCK);
368 369 goto join_fail;
369 if (type < TRANS_JOIN_NOLOCK)
370 sb_end_intwrite(root->fs_info->sb);
371 kmem_cache_free(btrfs_trans_handle_cachep, h);
372 return ERR_PTR(ret);
373 } 370 }
374 371
375 cur_trans = root->fs_info->running_transaction; 372 cur_trans = root->fs_info->running_transaction;
@@ -410,6 +407,19 @@ got_it:
410 if (!current->journal_info && type != TRANS_USERSPACE) 407 if (!current->journal_info && type != TRANS_USERSPACE)
411 current->journal_info = h; 408 current->journal_info = h;
412 return h; 409 return h;
410
411join_fail:
412 if (type < TRANS_JOIN_NOLOCK)
413 sb_end_intwrite(root->fs_info->sb);
414 kmem_cache_free(btrfs_trans_handle_cachep, h);
415alloc_fail:
416 if (num_bytes)
417 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
418 num_bytes);
419reserve_fail:
420 if (qgroup_reserved)
421 btrfs_qgroup_free(root, qgroup_reserved);
422 return ERR_PTR(ret);
413} 423}
414 424
415struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 425struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
@@ -1468,7 +1478,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1468 goto cleanup_transaction; 1478 goto cleanup_transaction;
1469 } 1479 }
1470 1480
1471 if (cur_trans->aborted) { 1481 /* Stop the commit early if ->aborted is set */
1482 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1472 ret = cur_trans->aborted; 1483 ret = cur_trans->aborted;
1473 goto cleanup_transaction; 1484 goto cleanup_transaction;
1474 } 1485 }
@@ -1574,6 +1585,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1574 wait_event(cur_trans->writer_wait, 1585 wait_event(cur_trans->writer_wait,
1575 atomic_read(&cur_trans->num_writers) == 1); 1586 atomic_read(&cur_trans->num_writers) == 1);
1576 1587
1588 /* ->aborted might be set after the previous check, so check it */
1589 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1590 ret = cur_trans->aborted;
1591 goto cleanup_transaction;
1592 }
1577 /* 1593 /*
1578 * the reloc mutex makes sure that we stop 1594 * the reloc mutex makes sure that we stop
1579 * the balancing code from coming in and moving 1595 * the balancing code from coming in and moving
@@ -1657,6 +1673,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1657 goto cleanup_transaction; 1673 goto cleanup_transaction;
1658 } 1674 }
1659 1675
1676 /*
1677 * The tasks which save the space cache and inode cache may also
1678 * update ->aborted, check it.
1679 */
1680 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1681 ret = cur_trans->aborted;
1682 mutex_unlock(&root->fs_info->tree_log_mutex);
1683 mutex_unlock(&root->fs_info->reloc_mutex);
1684 goto cleanup_transaction;
1685 }
1686
1660 btrfs_prepare_extent_commit(trans, root); 1687 btrfs_prepare_extent_commit(trans, root);
1661 1688
1662 cur_trans = root->fs_info->running_transaction; 1689 cur_trans = root->fs_info->running_transaction;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 83186c7e45d4..9027bb1e7466 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3357 if (skip_csum) 3357 if (skip_csum)
3358 return 0; 3358 return 0;
3359 3359
3360 if (em->compress_type) {
3361 csum_offset = 0;
3362 csum_len = block_len;
3363 }
3364
3360 /* block start is already adjusted for the file extent offset. */ 3365 /* block start is already adjusted for the file extent offset. */
3361 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3362 em->block_start + csum_offset, 3367 em->block_start + csum_offset,
@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3410 em = list_entry(extents.next, struct extent_map, list); 3415 em = list_entry(extents.next, struct extent_map, list);
3411 3416
3412 list_del_init(&em->list); 3417 list_del_init(&em->list);
3413 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
3414 3418
3415 /* 3419 /*
3416 * If we had an error we just need to delete everybody from our 3420 * If we had an error we just need to delete everybody from our
3417 * private list. 3421 * private list.
3418 */ 3422 */
3419 if (ret) { 3423 if (ret) {
3424 clear_em_logging(tree, em);
3420 free_extent_map(em); 3425 free_extent_map(em);
3421 continue; 3426 continue;
3422 } 3427 }
@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3424 write_unlock(&tree->lock); 3429 write_unlock(&tree->lock);
3425 3430
3426 ret = log_one_extent(trans, inode, root, em, path); 3431 ret = log_one_extent(trans, inode, root, em, path);
3427 free_extent_map(em);
3428 write_lock(&tree->lock); 3432 write_lock(&tree->lock);
3433 clear_em_logging(tree, em);
3434 free_extent_map(em);
3429 } 3435 }
3430 WARN_ON(!list_empty(&extents)); 3436 WARN_ON(!list_empty(&extents));
3431 write_unlock(&tree->lock); 3437 write_unlock(&tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cce6aa74012..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1431 } 1431 }
1432 } else { 1432 } else {
1433 ret = btrfs_get_bdev_and_sb(device_path, 1433 ret = btrfs_get_bdev_and_sb(device_path,
1434 FMODE_READ | FMODE_EXCL, 1434 FMODE_WRITE | FMODE_EXCL,
1435 root->fs_info->bdev_holder, 0, 1435 root->fs_info->bdev_holder, 0,
1436 &bdev, &bh); 1436 &bdev, &bh);
1437 if (ret) 1437 if (ret)
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1556 ret = 0; 1556 ret = 0;
1557 1557
1558 /* Notify udev that device has changed */ 1558 /* Notify udev that device has changed */
1559 btrfs_kobject_uevent(bdev, KOBJ_CHANGE); 1559 if (bdev)
1560 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
1560 1561
1561error_brelse: 1562error_brelse:
1562 brelse(bh); 1563 brelse(bh);
@@ -2614,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2614 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 2615 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2615 chunk_used = btrfs_block_group_used(&cache->item); 2616 chunk_used = btrfs_block_group_used(&cache->item);
2616 2617
2617 user_thresh = div_factor_fine(cache->key.offset, bargs->usage); 2618 if (bargs->usage == 0)
2619 user_thresh = 0;
2620 else if (bargs->usage > 100)
2621 user_thresh = cache->key.offset;
2622 else
2623 user_thresh = div_factor_fine(cache->key.offset,
2624 bargs->usage);
2625
2618 if (chunk_used < user_thresh) 2626 if (chunk_used < user_thresh)
2619 ret = 0; 2627 ret = 0;
2620 2628
@@ -2959,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
2959 unset_balance_control(fs_info); 2967 unset_balance_control(fs_info);
2960 ret = del_balance_item(fs_info->tree_root); 2968 ret = del_balance_item(fs_info->tree_root);
2961 BUG_ON(ret); 2969 BUG_ON(ret);
2970
2971 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
2962} 2972}
2963 2973
2964void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 2974void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -3138,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3138out: 3148out:
3139 if (bctl->flags & BTRFS_BALANCE_RESUME) 3149 if (bctl->flags & BTRFS_BALANCE_RESUME)
3140 __cancel_balance(fs_info); 3150 __cancel_balance(fs_info);
3141 else 3151 else {
3142 kfree(bctl); 3152 kfree(bctl);
3153 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3154 }
3143 return ret; 3155 return ret;
3144} 3156}
3145 3157
@@ -3156,7 +3168,6 @@ static int balance_kthread(void *data)
3156 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3168 ret = btrfs_balance(fs_info->balance_ctl, NULL);
3157 } 3169 }
3158 3170
3159 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3160 mutex_unlock(&fs_info->balance_mutex); 3171 mutex_unlock(&fs_info->balance_mutex);
3161 mutex_unlock(&fs_info->volume_mutex); 3172 mutex_unlock(&fs_info->volume_mutex);
3162 3173
@@ -3179,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
3179 return 0; 3190 return 0;
3180 } 3191 }
3181 3192
3182 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3183 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); 3193 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
3184 if (IS_ERR(tsk)) 3194 if (IS_ERR(tsk))
3185 return PTR_ERR(tsk); 3195 return PTR_ERR(tsk);
@@ -3233,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
3233 btrfs_balance_sys(leaf, item, &disk_bargs); 3243 btrfs_balance_sys(leaf, item, &disk_bargs);
3234 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 3244 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
3235 3245
3246 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3247
3236 mutex_lock(&fs_info->volume_mutex); 3248 mutex_lock(&fs_info->volume_mutex);
3237 mutex_lock(&fs_info->balance_mutex); 3249 mutex_lock(&fs_info->balance_mutex);
3238 3250
@@ -3496,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3496 { 1, 1, 2, 2, 2, 2 /* raid1 */ }, 3508 { 1, 1, 2, 2, 2, 2 /* raid1 */ },
3497 { 1, 2, 1, 1, 1, 2 /* dup */ }, 3509 { 1, 2, 1, 1, 1, 2 /* dup */ },
3498 { 1, 1, 0, 2, 1, 1 /* raid0 */ }, 3510 { 1, 1, 0, 2, 1, 1 /* raid0 */ },
3499 { 1, 1, 0, 1, 1, 1 /* single */ }, 3511 { 1, 1, 1, 1, 1, 1 /* single */ },
3500}; 3512};
3501 3513
3502static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3514static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/buffer.c b/fs/buffer.c
index 7a75c3e0fd58..62169c192c21 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2359 if (unlikely(ret < 0)) 2359 if (unlikely(ret < 0))
2360 goto out_unlock; 2360 goto out_unlock;
2361 set_page_dirty(page); 2361 set_page_dirty(page);
2362 wait_on_page_writeback(page); 2362 wait_for_stable_page(page);
2363 return 0; 2363 return 0;
2364out_unlock: 2364out_unlock:
2365 unlock_page(page); 2365 unlock_page(page);
@@ -3227,7 +3227,7 @@ static struct kmem_cache *bh_cachep __read_mostly;
3227 * Once the number of bh's in the machine exceeds this level, we start 3227 * Once the number of bh's in the machine exceeds this level, we start
3228 * stripping them in writeback. 3228 * stripping them in writeback.
3229 */ 3229 */
3230static int max_buffer_heads; 3230static unsigned long max_buffer_heads;
3231 3231
3232int buffer_heads_over_limit; 3232int buffer_heads_over_limit;
3233 3233
@@ -3343,7 +3343,7 @@ EXPORT_SYMBOL(bh_submit_read);
3343 3343
3344void __init buffer_init(void) 3344void __init buffer_init(void)
3345{ 3345{
3346 int nrpages; 3346 unsigned long nrpages;
3347 3347
3348 bh_cachep = kmem_cache_create("buffer_head", 3348 bh_cachep = kmem_cache_create("buffer_head",
3349 sizeof(struct buffer_head), 0, 3349 sizeof(struct buffer_head), 0,
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 9eb134ea6eb2..49bc78243db9 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,6 +1,6 @@
1config CEPH_FS 1config CEPH_FS
2 tristate "Ceph distributed file system (EXPERIMENTAL)" 2 tristate "Ceph distributed file system"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select CEPH_LIB 4 select CEPH_LIB
5 select LIBCRC32C 5 select LIBCRC32C
6 select CRYPTO_AES 6 select CRYPTO_AES
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a1d9bb30c1bf..ae2be696eb5b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -930,7 +930,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
930 u64 size, u64 max_size, 930 u64 size, u64 max_size,
931 struct timespec *mtime, struct timespec *atime, 931 struct timespec *mtime, struct timespec *atime,
932 u64 time_warp_seq, 932 u64 time_warp_seq,
933 uid_t uid, gid_t gid, umode_t mode, 933 kuid_t uid, kgid_t gid, umode_t mode,
934 u64 xattr_version, 934 u64 xattr_version,
935 struct ceph_buffer *xattrs_buf, 935 struct ceph_buffer *xattrs_buf,
936 u64 follows) 936 u64 follows)
@@ -974,8 +974,8 @@ static int send_cap_msg(struct ceph_mds_session *session,
974 ceph_encode_timespec(&fc->atime, atime); 974 ceph_encode_timespec(&fc->atime, atime);
975 fc->time_warp_seq = cpu_to_le32(time_warp_seq); 975 fc->time_warp_seq = cpu_to_le32(time_warp_seq);
976 976
977 fc->uid = cpu_to_le32(uid); 977 fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
978 fc->gid = cpu_to_le32(gid); 978 fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
979 fc->mode = cpu_to_le32(mode); 979 fc->mode = cpu_to_le32(mode);
980 980
981 fc->xattr_version = cpu_to_le64(xattr_version); 981 fc->xattr_version = cpu_to_le64(xattr_version);
@@ -1081,8 +1081,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1081 struct timespec mtime, atime; 1081 struct timespec mtime, atime;
1082 int wake = 0; 1082 int wake = 0;
1083 umode_t mode; 1083 umode_t mode;
1084 uid_t uid; 1084 kuid_t uid;
1085 gid_t gid; 1085 kgid_t gid;
1086 struct ceph_mds_session *session; 1086 struct ceph_mds_session *session;
1087 u64 xattr_version = 0; 1087 u64 xattr_version = 0;
1088 struct ceph_buffer *xattr_blob = NULL; 1088 struct ceph_buffer *xattr_blob = NULL;
@@ -2359,10 +2359,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2359 2359
2360 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 2360 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
2361 inode->i_mode = le32_to_cpu(grant->mode); 2361 inode->i_mode = le32_to_cpu(grant->mode);
2362 inode->i_uid = le32_to_cpu(grant->uid); 2362 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
2363 inode->i_gid = le32_to_cpu(grant->gid); 2363 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
2364 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, 2364 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
2365 inode->i_uid, inode->i_gid); 2365 from_kuid(&init_user_ns, inode->i_uid),
2366 from_kgid(&init_user_ns, inode->i_gid));
2366 } 2367 }
2367 2368
2368 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 2369 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2971eaa65cdc..d45895f4a04d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -612,10 +612,11 @@ static int fill_inode(struct inode *inode,
612 612
613 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 613 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
614 inode->i_mode = le32_to_cpu(info->mode); 614 inode->i_mode = le32_to_cpu(info->mode);
615 inode->i_uid = le32_to_cpu(info->uid); 615 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
616 inode->i_gid = le32_to_cpu(info->gid); 616 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
617 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, 617 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
618 inode->i_uid, inode->i_gid); 618 from_kuid(&init_user_ns, inode->i_uid),
619 from_kgid(&init_user_ns, inode->i_gid));
619 } 620 }
620 621
621 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 622 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -1565,26 +1566,30 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1565 1566
1566 if (ia_valid & ATTR_UID) { 1567 if (ia_valid & ATTR_UID) {
1567 dout("setattr %p uid %d -> %d\n", inode, 1568 dout("setattr %p uid %d -> %d\n", inode,
1568 inode->i_uid, attr->ia_uid); 1569 from_kuid(&init_user_ns, inode->i_uid),
1570 from_kuid(&init_user_ns, attr->ia_uid));
1569 if (issued & CEPH_CAP_AUTH_EXCL) { 1571 if (issued & CEPH_CAP_AUTH_EXCL) {
1570 inode->i_uid = attr->ia_uid; 1572 inode->i_uid = attr->ia_uid;
1571 dirtied |= CEPH_CAP_AUTH_EXCL; 1573 dirtied |= CEPH_CAP_AUTH_EXCL;
1572 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1574 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
1573 attr->ia_uid != inode->i_uid) { 1575 !uid_eq(attr->ia_uid, inode->i_uid)) {
1574 req->r_args.setattr.uid = cpu_to_le32(attr->ia_uid); 1576 req->r_args.setattr.uid = cpu_to_le32(
1577 from_kuid(&init_user_ns, attr->ia_uid));
1575 mask |= CEPH_SETATTR_UID; 1578 mask |= CEPH_SETATTR_UID;
1576 release |= CEPH_CAP_AUTH_SHARED; 1579 release |= CEPH_CAP_AUTH_SHARED;
1577 } 1580 }
1578 } 1581 }
1579 if (ia_valid & ATTR_GID) { 1582 if (ia_valid & ATTR_GID) {
1580 dout("setattr %p gid %d -> %d\n", inode, 1583 dout("setattr %p gid %d -> %d\n", inode,
1581 inode->i_gid, attr->ia_gid); 1584 from_kgid(&init_user_ns, inode->i_gid),
1585 from_kgid(&init_user_ns, attr->ia_gid));
1582 if (issued & CEPH_CAP_AUTH_EXCL) { 1586 if (issued & CEPH_CAP_AUTH_EXCL) {
1583 inode->i_gid = attr->ia_gid; 1587 inode->i_gid = attr->ia_gid;
1584 dirtied |= CEPH_CAP_AUTH_EXCL; 1588 dirtied |= CEPH_CAP_AUTH_EXCL;
1585 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1589 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
1586 attr->ia_gid != inode->i_gid) { 1590 !gid_eq(attr->ia_gid, inode->i_gid)) {
1587 req->r_args.setattr.gid = cpu_to_le32(attr->ia_gid); 1591 req->r_args.setattr.gid = cpu_to_le32(
1592 from_kgid(&init_user_ns, attr->ia_gid));
1588 mask |= CEPH_SETATTR_GID; 1593 mask |= CEPH_SETATTR_GID;
1589 release |= CEPH_CAP_AUTH_SHARED; 1594 release |= CEPH_CAP_AUTH_SHARED;
1590 } 1595 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9165eb8309eb..7a3dfe0a9a80 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1658,8 +1658,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1658 1658
1659 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 1659 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
1660 head->op = cpu_to_le32(req->r_op); 1660 head->op = cpu_to_le32(req->r_op);
1661 head->caller_uid = cpu_to_le32(req->r_uid); 1661 head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
1662 head->caller_gid = cpu_to_le32(req->r_gid); 1662 head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
1663 head->args = req->r_args; 1663 head->args = req->r_args;
1664 1664
1665 ceph_encode_filepath(&p, end, ino1, path1); 1665 ceph_encode_filepath(&p, end, ino1, path1);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index dd26846dd71d..ff4188bf6199 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -184,8 +184,8 @@ struct ceph_mds_request {
184 184
185 union ceph_mds_request_args r_args; 185 union ceph_mds_request_args r_args;
186 int r_fmode; /* file mode, if expecting cap */ 186 int r_fmode; /* file mode, if expecting cap */
187 uid_t r_uid; 187 kuid_t r_uid;
188 gid_t r_gid; 188 kgid_t r_gid;
189 189
190 /* for choosing which mds to send this request to */ 190 /* for choosing which mds to send this request to */
191 int r_direct_mode; 191 int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 66ebe720e40d..f053bbd1886f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -138,8 +138,8 @@ struct ceph_cap_snap {
138 struct ceph_snap_context *context; 138 struct ceph_snap_context *context;
139 139
140 umode_t mode; 140 umode_t mode;
141 uid_t uid; 141 kuid_t uid;
142 gid_t gid; 142 kgid_t gid;
143 143
144 struct ceph_buffer *xattr_blob; 144 struct ceph_buffer *xattr_blob;
145 u64 xattr_version; 145 u64 xattr_version;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 21ff76c22a17..2906ee276408 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -155,14 +155,14 @@ config CIFS_DFS_UPCALL
155 points. If unsure, say N. 155 points. If unsure, say N.
156 156
157config CIFS_NFSD_EXPORT 157config CIFS_NFSD_EXPORT
158 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 158 bool "Allow nfsd to export CIFS file system"
159 depends on CIFS && EXPERIMENTAL && BROKEN 159 depends on CIFS && BROKEN
160 help 160 help
161 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 161 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
162 162
163config CIFS_SMB2 163config CIFS_SMB2
164 bool "SMB2 network file system support (EXPERIMENTAL)" 164 bool "SMB2 network file system support"
165 depends on CIFS && EXPERIMENTAL && INET 165 depends on CIFS && INET
166 select NLS 166 select NLS
167 select KEYS 167 select KEYS
168 select FSCACHE 168 select FSCACHE
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ce5cbd717bfc..210fce2df308 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
226compose_mount_options_err: 226compose_mount_options_err:
227 kfree(mountdata); 227 kfree(mountdata);
228 mountdata = ERR_PTR(rc); 228 mountdata = ERR_PTR(rc);
229 kfree(*devname);
230 *devname = NULL;
229 goto compose_mount_options_out; 231 goto compose_mount_options_out;
230} 232}
231 233
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index c865bfdfe819..37e4a72a7d1c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -55,10 +55,10 @@ struct cifs_sb_info {
55 unsigned int wsize; 55 unsigned int wsize;
56 unsigned long actimeo; /* attribute cache timeout (jiffies) */ 56 unsigned long actimeo; /* attribute cache timeout (jiffies) */
57 atomic_t active; 57 atomic_t active;
58 uid_t mnt_uid; 58 kuid_t mnt_uid;
59 gid_t mnt_gid; 59 kgid_t mnt_gid;
60 uid_t mnt_backupuid; 60 kuid_t mnt_backupuid;
61 gid_t mnt_backupgid; 61 kgid_t mnt_backupgid;
62 umode_t mnt_file_mode; 62 umode_t mnt_file_mode;
63 umode_t mnt_dir_mode; 63 umode_t mnt_dir_mode;
64 unsigned int mnt_cifs_flags; 64 unsigned int mnt_cifs_flags;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 086f381d6489..10e774761299 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -149,10 +149,12 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
149 goto out; 149 goto out;
150 150
151 dp = description + strlen(description); 151 dp = description + strlen(description);
152 sprintf(dp, ";uid=0x%x", sesInfo->linux_uid); 152 sprintf(dp, ";uid=0x%x",
153 from_kuid_munged(&init_user_ns, sesInfo->linux_uid));
153 154
154 dp = description + strlen(description); 155 dp = description + strlen(description);
155 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); 156 sprintf(dp, ";creduid=0x%x",
157 from_kuid_munged(&init_user_ns, sesInfo->cred_uid));
156 158
157 if (sesInfo->user_name) { 159 if (sesInfo->user_name) {
158 dp = description + strlen(description); 160 dp = description + strlen(description);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 5cbd00e74067..f1e3f25fe004 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -266,8 +266,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
266 struct key *sidkey; 266 struct key *sidkey;
267 char *sidstr; 267 char *sidstr;
268 const struct cred *saved_cred; 268 const struct cred *saved_cred;
269 uid_t fuid = cifs_sb->mnt_uid; 269 kuid_t fuid = cifs_sb->mnt_uid;
270 gid_t fgid = cifs_sb->mnt_gid; 270 kgid_t fgid = cifs_sb->mnt_gid;
271 271
272 /* 272 /*
273 * If we have too many subauthorities, then something is really wrong. 273 * If we have too many subauthorities, then something is really wrong.
@@ -297,6 +297,7 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
297 * probably a safe assumption but might be better to check based on 297 * probably a safe assumption but might be better to check based on
298 * sidtype. 298 * sidtype.
299 */ 299 */
300 BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
300 if (sidkey->datalen != sizeof(uid_t)) { 301 if (sidkey->datalen != sizeof(uid_t)) {
301 rc = -EIO; 302 rc = -EIO;
302 cFYI(1, "%s: Downcall contained malformed key " 303 cFYI(1, "%s: Downcall contained malformed key "
@@ -305,10 +306,21 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
305 goto out_key_put; 306 goto out_key_put;
306 } 307 }
307 308
308 if (sidtype == SIDOWNER) 309 if (sidtype == SIDOWNER) {
309 memcpy(&fuid, &sidkey->payload.value, sizeof(uid_t)); 310 kuid_t uid;
310 else 311 uid_t id;
311 memcpy(&fgid, &sidkey->payload.value, sizeof(gid_t)); 312 memcpy(&id, &sidkey->payload.value, sizeof(uid_t));
313 uid = make_kuid(&init_user_ns, id);
314 if (uid_valid(uid))
315 fuid = uid;
316 } else {
317 kgid_t gid;
318 gid_t id;
319 memcpy(&id, &sidkey->payload.value, sizeof(gid_t));
320 gid = make_kgid(&init_user_ns, id);
321 if (gid_valid(gid))
322 fgid = gid;
323 }
312 324
313out_key_put: 325out_key_put:
314 key_put(sidkey); 326 key_put(sidkey);
@@ -346,7 +358,8 @@ init_cifs_idmap(void)
346 if (!cred) 358 if (!cred)
347 return -ENOMEM; 359 return -ENOMEM;
348 360
349 keyring = keyring_alloc(".cifs_idmap", 0, 0, cred, 361 keyring = keyring_alloc(".cifs_idmap",
362 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
350 (KEY_POS_ALL & ~KEY_POS_SETATTR) | 363 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
351 KEY_USR_VIEW | KEY_USR_READ, 364 KEY_USR_VIEW | KEY_USR_READ,
352 KEY_ALLOC_NOT_IN_QUOTA, NULL); 365 KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -774,7 +787,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
774 787
775/* Convert permission bits from mode to equivalent CIFS ACL */ 788/* Convert permission bits from mode to equivalent CIFS ACL */
776static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, 789static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
777 __u32 secdesclen, __u64 nmode, uid_t uid, gid_t gid, int *aclflag) 790 __u32 secdesclen, __u64 nmode, kuid_t uid, kgid_t gid, int *aclflag)
778{ 791{
779 int rc = 0; 792 int rc = 0;
780 __u32 dacloffset; 793 __u32 dacloffset;
@@ -806,17 +819,19 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
806 *aclflag = CIFS_ACL_DACL; 819 *aclflag = CIFS_ACL_DACL;
807 } else { 820 } else {
808 memcpy(pnntsd, pntsd, secdesclen); 821 memcpy(pnntsd, pntsd, secdesclen);
809 if (uid != NO_CHANGE_32) { /* chown */ 822 if (uid_valid(uid)) { /* chown */
823 uid_t id;
810 owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + 824 owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
811 le32_to_cpu(pnntsd->osidoffset)); 825 le32_to_cpu(pnntsd->osidoffset));
812 nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid), 826 nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
813 GFP_KERNEL); 827 GFP_KERNEL);
814 if (!nowner_sid_ptr) 828 if (!nowner_sid_ptr)
815 return -ENOMEM; 829 return -ENOMEM;
816 rc = id_to_sid(uid, SIDOWNER, nowner_sid_ptr); 830 id = from_kuid(&init_user_ns, uid);
831 rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
817 if (rc) { 832 if (rc) {
818 cFYI(1, "%s: Mapping error %d for owner id %d", 833 cFYI(1, "%s: Mapping error %d for owner id %d",
819 __func__, rc, uid); 834 __func__, rc, id);
820 kfree(nowner_sid_ptr); 835 kfree(nowner_sid_ptr);
821 return rc; 836 return rc;
822 } 837 }
@@ -824,17 +839,19 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
824 kfree(nowner_sid_ptr); 839 kfree(nowner_sid_ptr);
825 *aclflag = CIFS_ACL_OWNER; 840 *aclflag = CIFS_ACL_OWNER;
826 } 841 }
827 if (gid != NO_CHANGE_32) { /* chgrp */ 842 if (gid_valid(gid)) { /* chgrp */
843 gid_t id;
828 group_sid_ptr = (struct cifs_sid *)((char *)pnntsd + 844 group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
829 le32_to_cpu(pnntsd->gsidoffset)); 845 le32_to_cpu(pnntsd->gsidoffset));
830 ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid), 846 ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
831 GFP_KERNEL); 847 GFP_KERNEL);
832 if (!ngroup_sid_ptr) 848 if (!ngroup_sid_ptr)
833 return -ENOMEM; 849 return -ENOMEM;
834 rc = id_to_sid(gid, SIDGROUP, ngroup_sid_ptr); 850 id = from_kgid(&init_user_ns, gid);
851 rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
835 if (rc) { 852 if (rc) {
836 cFYI(1, "%s: Mapping error %d for group id %d", 853 cFYI(1, "%s: Mapping error %d for group id %d",
837 __func__, rc, gid); 854 __func__, rc, id);
838 kfree(ngroup_sid_ptr); 855 kfree(ngroup_sid_ptr);
839 return rc; 856 return rc;
840 } 857 }
@@ -1002,7 +1019,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
1002/* Convert mode bits to an ACL so we can update the ACL on the server */ 1019/* Convert mode bits to an ACL so we can update the ACL on the server */
1003int 1020int
1004id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, 1021id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
1005 uid_t uid, gid_t gid) 1022 kuid_t uid, kgid_t gid)
1006{ 1023{
1007 int rc = 0; 1024 int rc = 0;
1008 int aclflag = CIFS_ACL_DACL; /* default flag to set */ 1025 int aclflag = CIFS_ACL_DACL; /* default flag to set */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index de7f9168a118..9be09b21b4e0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -375,13 +375,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
375 (int)(srcaddr->sa_family)); 375 (int)(srcaddr->sa_family));
376 } 376 }
377 377
378 seq_printf(s, ",uid=%u", cifs_sb->mnt_uid); 378 seq_printf(s, ",uid=%u",
379 from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid));
379 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 380 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
380 seq_printf(s, ",forceuid"); 381 seq_printf(s, ",forceuid");
381 else 382 else
382 seq_printf(s, ",noforceuid"); 383 seq_printf(s, ",noforceuid");
383 384
384 seq_printf(s, ",gid=%u", cifs_sb->mnt_gid); 385 seq_printf(s, ",gid=%u",
386 from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid));
385 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 387 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
386 seq_printf(s, ",forcegid"); 388 seq_printf(s, ",forcegid");
387 else 389 else
@@ -436,9 +438,13 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
436 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) 438 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
437 seq_printf(s, ",noperm"); 439 seq_printf(s, ",noperm");
438 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) 440 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
439 seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid); 441 seq_printf(s, ",backupuid=%u",
442 from_kuid_munged(&init_user_ns,
443 cifs_sb->mnt_backupuid));
440 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) 444 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
441 seq_printf(s, ",backupgid=%u", cifs_sb->mnt_backupgid); 445 seq_printf(s, ",backupgid=%u",
446 from_kgid_munged(&init_user_ns,
447 cifs_sb->mnt_backupgid));
442 448
443 seq_printf(s, ",rsize=%u", cifs_sb->rsize); 449 seq_printf(s, ",rsize=%u", cifs_sb->rsize);
444 seq_printf(s, ",wsize=%u", cifs_sb->wsize); 450 seq_printf(s, ",wsize=%u", cifs_sb->wsize);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e6899cea1c35..4f07f6fbe494 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -400,11 +400,11 @@ struct smb_vol {
400 char *iocharset; /* local code page for mapping to and from Unicode */ 400 char *iocharset; /* local code page for mapping to and from Unicode */
401 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ 401 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
402 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ 402 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
403 uid_t cred_uid; 403 kuid_t cred_uid;
404 uid_t linux_uid; 404 kuid_t linux_uid;
405 gid_t linux_gid; 405 kgid_t linux_gid;
406 uid_t backupuid; 406 kuid_t backupuid;
407 gid_t backupgid; 407 kgid_t backupgid;
408 umode_t file_mode; 408 umode_t file_mode;
409 umode_t dir_mode; 409 umode_t dir_mode;
410 unsigned secFlg; 410 unsigned secFlg;
@@ -703,8 +703,8 @@ struct cifs_ses {
703 char *serverNOS; /* name of network operating system of server */ 703 char *serverNOS; /* name of network operating system of server */
704 char *serverDomain; /* security realm of server */ 704 char *serverDomain; /* security realm of server */
705 __u64 Suid; /* remote smb uid */ 705 __u64 Suid; /* remote smb uid */
706 uid_t linux_uid; /* overriding owner of files on the mount */ 706 kuid_t linux_uid; /* overriding owner of files on the mount */
707 uid_t cred_uid; /* owner of credentials */ 707 kuid_t cred_uid; /* owner of credentials */
708 unsigned int capabilities; 708 unsigned int capabilities;
709 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 709 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
710 TCP names - will ipv6 and sctp addresses fit? */ 710 TCP names - will ipv6 and sctp addresses fit? */
@@ -838,7 +838,7 @@ struct cifs_tcon {
838 */ 838 */
839struct tcon_link { 839struct tcon_link {
840 struct rb_node tl_rbnode; 840 struct rb_node tl_rbnode;
841 uid_t tl_uid; 841 kuid_t tl_uid;
842 unsigned long tl_flags; 842 unsigned long tl_flags;
843#define TCON_LINK_MASTER 0 843#define TCON_LINK_MASTER 0
844#define TCON_LINK_PENDING 1 844#define TCON_LINK_PENDING 1
@@ -931,7 +931,7 @@ struct cifsFileInfo {
931 struct list_head tlist; /* pointer to next fid owned by tcon */ 931 struct list_head tlist; /* pointer to next fid owned by tcon */
932 struct list_head flist; /* next fid (file instance) for this inode */ 932 struct list_head flist; /* next fid (file instance) for this inode */
933 struct cifs_fid_locks *llist; /* brlocks held by this fid */ 933 struct cifs_fid_locks *llist; /* brlocks held by this fid */
934 unsigned int uid; /* allows finding which FileInfo structure */ 934 kuid_t uid; /* allows finding which FileInfo structure */
935 __u32 pid; /* process id who opened file */ 935 __u32 pid; /* process id who opened file */
936 struct cifs_fid fid; /* file id from remote */ 936 struct cifs_fid fid; /* file id from remote */
937 /* BB add lock scope info here if needed */ ; 937 /* BB add lock scope info here if needed */ ;
@@ -1245,8 +1245,8 @@ struct cifs_fattr {
1245 u64 cf_eof; 1245 u64 cf_eof;
1246 u64 cf_bytes; 1246 u64 cf_bytes;
1247 u64 cf_createtime; 1247 u64 cf_createtime;
1248 uid_t cf_uid; 1248 kuid_t cf_uid;
1249 gid_t cf_gid; 1249 kgid_t cf_gid;
1250 umode_t cf_mode; 1250 umode_t cf_mode;
1251 dev_t cf_rdev; 1251 dev_t cf_rdev;
1252 unsigned int cf_nlink; 1252 unsigned int cf_nlink;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b9d59a948a2c..e996ff6b26d1 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -277,7 +277,6 @@
277#define CIFS_NO_HANDLE 0xFFFF 277#define CIFS_NO_HANDLE 0xFFFF
278 278
279#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL 279#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
280#define NO_CHANGE_32 0xFFFFFFFFUL
281 280
282/* IPC$ in ASCII */ 281/* IPC$ in ASCII */
283#define CIFS_IPC_RESOURCE "\x49\x50\x43\x24" 282#define CIFS_IPC_RESOURCE "\x49\x50\x43\x24"
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1988c1baa224..f450f0683ddd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -46,7 +46,8 @@ extern void _free_xid(unsigned int);
46({ \ 46({ \
47 unsigned int __xid = _get_xid(); \ 47 unsigned int __xid = _get_xid(); \
48 cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d", \ 48 cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d", \
49 __func__, __xid, current_fsuid()); \ 49 __func__, __xid, \
50 from_kuid(&init_user_ns, current_fsuid())); \
50 __xid; \ 51 __xid; \
51}) 52})
52 53
@@ -161,7 +162,7 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
161 struct cifs_fattr *fattr, struct inode *inode, 162 struct cifs_fattr *fattr, struct inode *inode,
162 const char *path, const __u16 *pfid); 163 const char *path, const __u16 *pfid);
163extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64, 164extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64,
164 uid_t, gid_t); 165 kuid_t, kgid_t);
165extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 166extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
166 const char *, u32 *); 167 const char *, u32 *);
167extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, 168extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
@@ -304,8 +305,8 @@ struct cifs_unix_set_info_args {
304 __u64 atime; 305 __u64 atime;
305 __u64 mtime; 306 __u64 mtime;
306 __u64 mode; 307 __u64 mode;
307 __u64 uid; 308 kuid_t uid;
308 __u64 gid; 309 kgid_t gid;
309 dev_t device; 310 dev_t device;
310}; 311};
311 312
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76d0d2998850..00e12f2d626b 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5819,8 +5819,14 @@ static void
5819cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, 5819cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5820 const struct cifs_unix_set_info_args *args) 5820 const struct cifs_unix_set_info_args *args)
5821{ 5821{
5822 u64 uid = NO_CHANGE_64, gid = NO_CHANGE_64;
5822 u64 mode = args->mode; 5823 u64 mode = args->mode;
5823 5824
5825 if (uid_valid(args->uid))
5826 uid = from_kuid(&init_user_ns, args->uid);
5827 if (gid_valid(args->gid))
5828 gid = from_kgid(&init_user_ns, args->gid);
5829
5824 /* 5830 /*
5825 * Samba server ignores set of file size to zero due to bugs in some 5831 * Samba server ignores set of file size to zero due to bugs in some
5826 * older clients, but we should be precise - we use SetFileSize to 5832 * older clients, but we should be precise - we use SetFileSize to
@@ -5833,8 +5839,8 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5833 data_offset->LastStatusChange = cpu_to_le64(args->ctime); 5839 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5834 data_offset->LastAccessTime = cpu_to_le64(args->atime); 5840 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5835 data_offset->LastModificationTime = cpu_to_le64(args->mtime); 5841 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5836 data_offset->Uid = cpu_to_le64(args->uid); 5842 data_offset->Uid = cpu_to_le64(uid);
5837 data_offset->Gid = cpu_to_le64(args->gid); 5843 data_offset->Gid = cpu_to_le64(gid);
5838 /* better to leave device as zero when it is */ 5844 /* better to leave device as zero when it is */
5839 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); 5845 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5840 data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); 5846 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 17c3643e5950..4474a57f30ab 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -987,6 +987,41 @@ static int get_option_ul(substring_t args[], unsigned long *option)
987 return rc; 987 return rc;
988} 988}
989 989
990static int get_option_uid(substring_t args[], kuid_t *result)
991{
992 unsigned long value;
993 kuid_t uid;
994 int rc;
995
996 rc = get_option_ul(args, &value);
997 if (rc)
998 return rc;
999
1000 uid = make_kuid(current_user_ns(), value);
1001 if (!uid_valid(uid))
1002 return -EINVAL;
1003
1004 *result = uid;
1005 return 0;
1006}
1007
1008static int get_option_gid(substring_t args[], kgid_t *result)
1009{
1010 unsigned long value;
1011 kgid_t gid;
1012 int rc;
1013
1014 rc = get_option_ul(args, &value);
1015 if (rc)
1016 return rc;
1017
1018 gid = make_kgid(current_user_ns(), value);
1019 if (!gid_valid(gid))
1020 return -EINVAL;
1021
1022 *result = gid;
1023 return 0;
1024}
990 1025
991static int cifs_parse_security_flavors(char *value, 1026static int cifs_parse_security_flavors(char *value,
992 struct smb_vol *vol) 1027 struct smb_vol *vol)
@@ -1424,47 +1459,42 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1424 1459
1425 /* Numeric Values */ 1460 /* Numeric Values */
1426 case Opt_backupuid: 1461 case Opt_backupuid:
1427 if (get_option_ul(args, &option)) { 1462 if (get_option_uid(args, &vol->backupuid)) {
1428 cERROR(1, "%s: Invalid backupuid value", 1463 cERROR(1, "%s: Invalid backupuid value",
1429 __func__); 1464 __func__);
1430 goto cifs_parse_mount_err; 1465 goto cifs_parse_mount_err;
1431 } 1466 }
1432 vol->backupuid = option;
1433 vol->backupuid_specified = true; 1467 vol->backupuid_specified = true;
1434 break; 1468 break;
1435 case Opt_backupgid: 1469 case Opt_backupgid:
1436 if (get_option_ul(args, &option)) { 1470 if (get_option_gid(args, &vol->backupgid)) {
1437 cERROR(1, "%s: Invalid backupgid value", 1471 cERROR(1, "%s: Invalid backupgid value",
1438 __func__); 1472 __func__);
1439 goto cifs_parse_mount_err; 1473 goto cifs_parse_mount_err;
1440 } 1474 }
1441 vol->backupgid = option;
1442 vol->backupgid_specified = true; 1475 vol->backupgid_specified = true;
1443 break; 1476 break;
1444 case Opt_uid: 1477 case Opt_uid:
1445 if (get_option_ul(args, &option)) { 1478 if (get_option_uid(args, &vol->linux_uid)) {
1446 cERROR(1, "%s: Invalid uid value", 1479 cERROR(1, "%s: Invalid uid value",
1447 __func__); 1480 __func__);
1448 goto cifs_parse_mount_err; 1481 goto cifs_parse_mount_err;
1449 } 1482 }
1450 vol->linux_uid = option;
1451 uid_specified = true; 1483 uid_specified = true;
1452 break; 1484 break;
1453 case Opt_cruid: 1485 case Opt_cruid:
1454 if (get_option_ul(args, &option)) { 1486 if (get_option_uid(args, &vol->cred_uid)) {
1455 cERROR(1, "%s: Invalid cruid value", 1487 cERROR(1, "%s: Invalid cruid value",
1456 __func__); 1488 __func__);
1457 goto cifs_parse_mount_err; 1489 goto cifs_parse_mount_err;
1458 } 1490 }
1459 vol->cred_uid = option;
1460 break; 1491 break;
1461 case Opt_gid: 1492 case Opt_gid:
1462 if (get_option_ul(args, &option)) { 1493 if (get_option_gid(args, &vol->linux_gid)) {
1463 cERROR(1, "%s: Invalid gid value", 1494 cERROR(1, "%s: Invalid gid value",
1464 __func__); 1495 __func__);
1465 goto cifs_parse_mount_err; 1496 goto cifs_parse_mount_err;
1466 } 1497 }
1467 vol->linux_gid = option;
1468 gid_specified = true; 1498 gid_specified = true;
1469 break; 1499 break;
1470 case Opt_file_mode: 1500 case Opt_file_mode:
@@ -1917,7 +1947,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1917 } 1947 }
1918 case AF_INET6: { 1948 case AF_INET6: {
1919 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; 1949 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
1920 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs; 1950 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
1921 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr); 1951 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
1922 } 1952 }
1923 default: 1953 default:
@@ -2241,7 +2271,7 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
2241{ 2271{
2242 switch (ses->server->secType) { 2272 switch (ses->server->secType) {
2243 case Kerberos: 2273 case Kerberos:
2244 if (vol->cred_uid != ses->cred_uid) 2274 if (!uid_eq(vol->cred_uid, ses->cred_uid))
2245 return 0; 2275 return 0;
2246 break; 2276 break;
2247 default: 2277 default:
@@ -2713,7 +2743,7 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
2713 if (new->rsize && new->rsize < old->rsize) 2743 if (new->rsize && new->rsize < old->rsize)
2714 return 0; 2744 return 0;
2715 2745
2716 if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid) 2746 if (!uid_eq(old->mnt_uid, new->mnt_uid) || !gid_eq(old->mnt_gid, new->mnt_gid))
2717 return 0; 2747 return 0;
2718 2748
2719 if (old->mnt_file_mode != new->mnt_file_mode || 2749 if (old->mnt_file_mode != new->mnt_file_mode ||
@@ -3919,7 +3949,7 @@ cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses)
3919} 3949}
3920 3950
3921static struct cifs_tcon * 3951static struct cifs_tcon *
3922cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) 3952cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
3923{ 3953{
3924 int rc; 3954 int rc;
3925 struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); 3955 struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
@@ -3989,7 +4019,7 @@ cifs_sb_tcon_pending_wait(void *unused)
3989 4019
3990/* find and return a tlink with given uid */ 4020/* find and return a tlink with given uid */
3991static struct tcon_link * 4021static struct tcon_link *
3992tlink_rb_search(struct rb_root *root, uid_t uid) 4022tlink_rb_search(struct rb_root *root, kuid_t uid)
3993{ 4023{
3994 struct rb_node *node = root->rb_node; 4024 struct rb_node *node = root->rb_node;
3995 struct tcon_link *tlink; 4025 struct tcon_link *tlink;
@@ -3997,9 +4027,9 @@ tlink_rb_search(struct rb_root *root, uid_t uid)
3997 while (node) { 4027 while (node) {
3998 tlink = rb_entry(node, struct tcon_link, tl_rbnode); 4028 tlink = rb_entry(node, struct tcon_link, tl_rbnode);
3999 4029
4000 if (tlink->tl_uid > uid) 4030 if (uid_gt(tlink->tl_uid, uid))
4001 node = node->rb_left; 4031 node = node->rb_left;
4002 else if (tlink->tl_uid < uid) 4032 else if (uid_lt(tlink->tl_uid, uid))
4003 node = node->rb_right; 4033 node = node->rb_right;
4004 else 4034 else
4005 return tlink; 4035 return tlink;
@@ -4018,7 +4048,7 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
4018 tlink = rb_entry(*new, struct tcon_link, tl_rbnode); 4048 tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
4019 parent = *new; 4049 parent = *new;
4020 4050
4021 if (tlink->tl_uid > new_tlink->tl_uid) 4051 if (uid_gt(tlink->tl_uid, new_tlink->tl_uid))
4022 new = &((*new)->rb_left); 4052 new = &((*new)->rb_left);
4023 else 4053 else
4024 new = &((*new)->rb_right); 4054 new = &((*new)->rb_right);
@@ -4048,7 +4078,7 @@ struct tcon_link *
4048cifs_sb_tlink(struct cifs_sb_info *cifs_sb) 4078cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
4049{ 4079{
4050 int ret; 4080 int ret;
4051 uid_t fsuid = current_fsuid(); 4081 kuid_t fsuid = current_fsuid();
4052 struct tcon_link *tlink, *newtlink; 4082 struct tcon_link *tlink, *newtlink;
4053 4083
4054 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 4084 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 8719bbe0dcc3..1cd016217448 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -342,14 +342,14 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
342 342
343 *created |= FILE_CREATED; 343 *created |= FILE_CREATED;
344 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 344 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
345 args.uid = (__u64) current_fsuid(); 345 args.uid = current_fsuid();
346 if (inode->i_mode & S_ISGID) 346 if (inode->i_mode & S_ISGID)
347 args.gid = (__u64) inode->i_gid; 347 args.gid = inode->i_gid;
348 else 348 else
349 args.gid = (__u64) current_fsgid(); 349 args.gid = current_fsgid();
350 } else { 350 } else {
351 args.uid = NO_CHANGE_64; 351 args.uid = INVALID_UID; /* no change */
352 args.gid = NO_CHANGE_64; 352 args.gid = INVALID_GID; /* no change */
353 } 353 }
354 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid, 354 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid,
355 current->tgid); 355 current->tgid);
@@ -588,11 +588,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
588 .device = device_number, 588 .device = device_number,
589 }; 589 };
590 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 590 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
591 args.uid = (__u64) current_fsuid(); 591 args.uid = current_fsuid();
592 args.gid = (__u64) current_fsgid(); 592 args.gid = current_fsgid();
593 } else { 593 } else {
594 args.uid = NO_CHANGE_64; 594 args.uid = INVALID_UID; /* no change */
595 args.gid = NO_CHANGE_64; 595 args.gid = INVALID_GID; /* no change */
596 } 596 }
597 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, 597 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
598 cifs_sb->local_nls, 598 cifs_sb->local_nls,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8ea6ca50a665..a8d8b589ee0e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -515,8 +515,8 @@ int cifs_open(struct inode *inode, struct file *file)
515 */ 515 */
516 struct cifs_unix_set_info_args args = { 516 struct cifs_unix_set_info_args args = {
517 .mode = inode->i_mode, 517 .mode = inode->i_mode,
518 .uid = NO_CHANGE_64, 518 .uid = INVALID_UID, /* no change */
519 .gid = NO_CHANGE_64, 519 .gid = INVALID_GID, /* no change */
520 .ctime = NO_CHANGE_64, 520 .ctime = NO_CHANGE_64,
521 .atime = NO_CHANGE_64, 521 .atime = NO_CHANGE_64,
522 .mtime = NO_CHANGE_64, 522 .mtime = NO_CHANGE_64,
@@ -1693,7 +1693,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1693 are always at the end of the list but since the first entry might 1693 are always at the end of the list but since the first entry might
1694 have a close pending, we go through the whole list */ 1694 have a close pending, we go through the whole list */
1695 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1695 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1696 if (fsuid_only && open_file->uid != current_fsuid()) 1696 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1697 continue; 1697 continue;
1698 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 1698 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1699 if (!open_file->invalidHandle) { 1699 if (!open_file->invalidHandle) {
@@ -1746,7 +1746,7 @@ refind_writable:
1746 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1746 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1747 if (!any_available && open_file->pid != current->tgid) 1747 if (!any_available && open_file->pid != current->tgid)
1748 continue; 1748 continue;
1749 if (fsuid_only && open_file->uid != current_fsuid()) 1749 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1750 continue; 1750 continue;
1751 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 1751 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1752 if (!open_file->invalidHandle) { 1752 if (!open_file->invalidHandle) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ed6208ff85a7..9638233964fc 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -244,15 +244,25 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
244 break; 244 break;
245 } 245 }
246 246
247 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 247 fattr->cf_uid = cifs_sb->mnt_uid;
248 fattr->cf_uid = cifs_sb->mnt_uid; 248 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) {
249 else 249 u64 id = le64_to_cpu(info->Uid);
250 fattr->cf_uid = le64_to_cpu(info->Uid); 250 if (id < ((uid_t)-1)) {
251 251 kuid_t uid = make_kuid(&init_user_ns, id);
252 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 252 if (uid_valid(uid))
253 fattr->cf_gid = cifs_sb->mnt_gid; 253 fattr->cf_uid = uid;
254 else 254 }
255 fattr->cf_gid = le64_to_cpu(info->Gid); 255 }
256
257 fattr->cf_gid = cifs_sb->mnt_gid;
258 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) {
259 u64 id = le64_to_cpu(info->Gid);
260 if (id < ((gid_t)-1)) {
261 kgid_t gid = make_kgid(&init_user_ns, id);
262 if (gid_valid(gid))
263 fattr->cf_gid = gid;
264 }
265 }
256 266
257 fattr->cf_nlink = le64_to_cpu(info->Nlinks); 267 fattr->cf_nlink = le64_to_cpu(info->Nlinks);
258} 268}
@@ -1245,14 +1255,14 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
1245 .device = 0, 1255 .device = 0,
1246 }; 1256 };
1247 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1257 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
1248 args.uid = (__u64)current_fsuid(); 1258 args.uid = current_fsuid();
1249 if (parent->i_mode & S_ISGID) 1259 if (parent->i_mode & S_ISGID)
1250 args.gid = (__u64)parent->i_gid; 1260 args.gid = parent->i_gid;
1251 else 1261 else
1252 args.gid = (__u64)current_fsgid(); 1262 args.gid = current_fsgid();
1253 } else { 1263 } else {
1254 args.uid = NO_CHANGE_64; 1264 args.uid = INVALID_UID; /* no change */
1255 args.gid = NO_CHANGE_64; 1265 args.gid = INVALID_GID; /* no change */
1256 } 1266 }
1257 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, 1267 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
1258 cifs_sb->local_nls, 1268 cifs_sb->local_nls,
@@ -2013,12 +2023,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
2013 if (attrs->ia_valid & ATTR_UID) 2023 if (attrs->ia_valid & ATTR_UID)
2014 args->uid = attrs->ia_uid; 2024 args->uid = attrs->ia_uid;
2015 else 2025 else
2016 args->uid = NO_CHANGE_64; 2026 args->uid = INVALID_UID; /* no change */
2017 2027
2018 if (attrs->ia_valid & ATTR_GID) 2028 if (attrs->ia_valid & ATTR_GID)
2019 args->gid = attrs->ia_gid; 2029 args->gid = attrs->ia_gid;
2020 else 2030 else
2021 args->gid = NO_CHANGE_64; 2031 args->gid = INVALID_GID; /* no change */
2022 2032
2023 if (attrs->ia_valid & ATTR_ATIME) 2033 if (attrs->ia_valid & ATTR_ATIME)
2024 args->atime = cifs_UnixTimeToNT(attrs->ia_atime); 2034 args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
@@ -2086,8 +2096,8 @@ static int
2086cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) 2096cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2087{ 2097{
2088 unsigned int xid; 2098 unsigned int xid;
2089 uid_t uid = NO_CHANGE_32; 2099 kuid_t uid = INVALID_UID;
2090 gid_t gid = NO_CHANGE_32; 2100 kgid_t gid = INVALID_GID;
2091 struct inode *inode = direntry->d_inode; 2101 struct inode *inode = direntry->d_inode;
2092 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2102 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2093 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 2103 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
@@ -2146,7 +2156,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2146 2156
2147#ifdef CONFIG_CIFS_ACL 2157#ifdef CONFIG_CIFS_ACL
2148 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 2158 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
2149 if (uid != NO_CHANGE_32 || gid != NO_CHANGE_32) { 2159 if (uid_valid(uid) || gid_valid(gid)) {
2150 rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64, 2160 rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64,
2151 uid, gid); 2161 uid, gid);
2152 if (rc) { 2162 if (rc) {
@@ -2170,7 +2180,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2170#ifdef CONFIG_CIFS_ACL 2180#ifdef CONFIG_CIFS_ACL
2171 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 2181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
2172 rc = id_mode_to_cifs_acl(inode, full_path, mode, 2182 rc = id_mode_to_cifs_acl(inode, full_path, mode,
2173 NO_CHANGE_32, NO_CHANGE_32); 2183 INVALID_UID, INVALID_GID);
2174 if (rc) { 2184 if (rc) {
2175 cFYI(1, "%s: Setting ACL failed with error: %d", 2185 cFYI(1, "%s: Setting ACL failed with error: %d",
2176 __func__, rc); 2186 __func__, rc);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 51dc2fb6e854..9f6c4c45d21e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -76,7 +76,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
76 } 76 }
77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); 77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
78 if (rc) { 78 if (rc) {
79 cERROR(1, "%s: Could not update iwth link_str", __func__); 79 cERROR(1, "%s: Could not update with link_str", __func__);
80 goto symlink_hash_err; 80 goto symlink_hash_err;
81 } 81 }
82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash); 82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3a00c0d0cead..1b15bf839f37 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,7 +569,7 @@ bool
569backup_cred(struct cifs_sb_info *cifs_sb) 569backup_cred(struct cifs_sb_info *cifs_sb)
570{ 570{
571 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) { 571 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) {
572 if (cifs_sb->mnt_backupuid == current_fsuid()) 572 if (uid_eq(cifs_sb->mnt_backupuid, current_fsuid()))
573 return true; 573 return true;
574 } 574 }
575 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) { 575 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) {
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 958ae0e0ff8c..1da168c61d35 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -33,7 +33,7 @@ void coda_cache_enter(struct inode *inode, int mask)
33 33
34 spin_lock(&cii->c_lock); 34 spin_lock(&cii->c_lock);
35 cii->c_cached_epoch = atomic_read(&permission_epoch); 35 cii->c_cached_epoch = atomic_read(&permission_epoch);
36 if (cii->c_uid != current_fsuid()) { 36 if (!uid_eq(cii->c_uid, current_fsuid())) {
37 cii->c_uid = current_fsuid(); 37 cii->c_uid = current_fsuid();
38 cii->c_cached_perm = mask; 38 cii->c_cached_perm = mask;
39 } else 39 } else
@@ -65,7 +65,7 @@ int coda_cache_check(struct inode *inode, int mask)
65 65
66 spin_lock(&cii->c_lock); 66 spin_lock(&cii->c_lock);
67 hit = (mask & cii->c_cached_perm) == mask && 67 hit = (mask & cii->c_cached_perm) == mask &&
68 cii->c_uid == current_fsuid() && 68 uid_eq(cii->c_uid, current_fsuid()) &&
69 cii->c_cached_epoch == atomic_read(&permission_epoch); 69 cii->c_cached_epoch == atomic_read(&permission_epoch);
70 spin_unlock(&cii->c_lock); 70 spin_unlock(&cii->c_lock);
71 71
diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h
index b24fdfd8a3f0..c64075213218 100644
--- a/fs/coda/coda_fs_i.h
+++ b/fs/coda/coda_fs_i.h
@@ -25,7 +25,7 @@ struct coda_inode_info {
25 u_short c_flags; /* flags (see below) */ 25 u_short c_flags; /* flags (see below) */
26 unsigned int c_mapcount; /* nr of times this inode is mapped */ 26 unsigned int c_mapcount; /* nr of times this inode is mapped */
27 unsigned int c_cached_epoch; /* epoch for cached permissions */ 27 unsigned int c_cached_epoch; /* epoch for cached permissions */
28 vuid_t c_uid; /* fsuid for cached permissions */ 28 kuid_t c_uid; /* fsuid for cached permissions */
29 unsigned int c_cached_perm; /* cached access permissions */ 29 unsigned int c_cached_perm; /* cached access permissions */
30 spinlock_t c_lock; 30 spinlock_t c_lock;
31 struct inode vfs_inode; 31 struct inode vfs_inode;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 854ace712685..2849f41e72a2 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -100,9 +100,9 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
100 if (attr->va_mode != (u_short) -1) 100 if (attr->va_mode != (u_short) -1)
101 inode->i_mode = attr->va_mode | inode_type; 101 inode->i_mode = attr->va_mode | inode_type;
102 if (attr->va_uid != -1) 102 if (attr->va_uid != -1)
103 inode->i_uid = (uid_t) attr->va_uid; 103 inode->i_uid = make_kuid(&init_user_ns, (uid_t) attr->va_uid);
104 if (attr->va_gid != -1) 104 if (attr->va_gid != -1)
105 inode->i_gid = (gid_t) attr->va_gid; 105 inode->i_gid = make_kgid(&init_user_ns, (gid_t) attr->va_gid);
106 if (attr->va_nlink != -1) 106 if (attr->va_nlink != -1)
107 set_nlink(inode, attr->va_nlink); 107 set_nlink(inode, attr->va_nlink);
108 if (attr->va_size != -1) 108 if (attr->va_size != -1)
@@ -171,10 +171,10 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
171 vattr->va_mode = iattr->ia_mode; 171 vattr->va_mode = iattr->ia_mode;
172 } 172 }
173 if ( valid & ATTR_UID ) { 173 if ( valid & ATTR_UID ) {
174 vattr->va_uid = (vuid_t) iattr->ia_uid; 174 vattr->va_uid = (vuid_t) from_kuid(&init_user_ns, iattr->ia_uid);
175 } 175 }
176 if ( valid & ATTR_GID ) { 176 if ( valid & ATTR_GID ) {
177 vattr->va_gid = (vgid_t) iattr->ia_gid; 177 vattr->va_gid = (vgid_t) from_kgid(&init_user_ns, iattr->ia_gid);
178 } 178 }
179 if ( valid & ATTR_SIZE ) { 179 if ( valid & ATTR_SIZE ) {
180 vattr->va_size = iattr->ia_size; 180 vattr->va_size = iattr->ia_size;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index be2aa4909487..cf674e9179a3 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -20,6 +20,7 @@
20#include <linux/file.h> 20#include <linux/file.h>
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/pid_namespace.h>
23 24
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25 26
@@ -48,7 +49,7 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
48 return NULL; 49 return NULL;
49 memset(&ei->c_fid, 0, sizeof(struct CodaFid)); 50 memset(&ei->c_fid, 0, sizeof(struct CodaFid));
50 ei->c_flags = 0; 51 ei->c_flags = 0;
51 ei->c_uid = 0; 52 ei->c_uid = GLOBAL_ROOT_UID;
52 ei->c_cached_perm = 0; 53 ei->c_cached_perm = 0;
53 spin_lock_init(&ei->c_lock); 54 spin_lock_init(&ei->c_lock);
54 return &ei->vfs_inode; 55 return &ei->vfs_inode;
@@ -157,6 +158,9 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
157 int error; 158 int error;
158 int idx; 159 int idx;
159 160
161 if (task_active_pid_ns(current) != &init_pid_ns)
162 return -EINVAL;
163
160 idx = get_device_index((struct coda_mount_data *) data); 164 idx = get_device_index((struct coda_mount_data *) data);
161 165
162 /* Ignore errors in data, for backward compatibility */ 166 /* Ignore errors in data, for backward compatibility */
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 761d5b31b18d..ebc2bae6c289 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -37,6 +37,7 @@
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/pid_namespace.h>
40#include <asm/io.h> 41#include <asm/io.h>
41#include <asm/poll.h> 42#include <asm/poll.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -266,6 +267,12 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
266 struct venus_comm *vcp; 267 struct venus_comm *vcp;
267 int idx, err; 268 int idx, err;
268 269
270 if (task_active_pid_ns(current) != &init_pid_ns)
271 return -EINVAL;
272
273 if (current_user_ns() != &init_user_ns)
274 return -EINVAL;
275
269 idx = iminor(inode); 276 idx = iminor(inode);
270 if (idx < 0 || idx >= MAX_CODADEVS) 277 if (idx < 0 || idx >= MAX_CODADEVS)
271 return -ENODEV; 278 return -ENODEV;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 0c68fd31fbf2..3a731976dc5e 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -50,9 +50,9 @@ static void *alloc_upcall(int opcode, int size)
50 return ERR_PTR(-ENOMEM); 50 return ERR_PTR(-ENOMEM);
51 51
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = task_pid_nr_ns(current, &init_pid_ns);
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr_ns(current, &init_pid_ns);
55 inp->ih.uid = current_fsuid(); 55 inp->ih.uid = from_kuid(&init_user_ns, current_fsuid());
56 56
57 return (void*)inp; 57 return (void*)inp;
58} 58}
@@ -157,7 +157,7 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid,
157} 157}
158 158
159int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, 159int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
160 vuid_t uid) 160 kuid_t uid)
161{ 161{
162 union inputArgs *inp; 162 union inputArgs *inp;
163 union outputArgs *outp; 163 union outputArgs *outp;
@@ -166,7 +166,7 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
166 insize = SIZE(release); 166 insize = SIZE(release);
167 UPARG(CODA_CLOSE); 167 UPARG(CODA_CLOSE);
168 168
169 inp->ih.uid = uid; 169 inp->ih.uid = from_kuid(&init_user_ns, uid);
170 inp->coda_close.VFid = *fid; 170 inp->coda_close.VFid = *fid;
171 inp->coda_close.flags = flags; 171 inp->coda_close.flags = flags;
172 172
diff --git a/fs/compat.c b/fs/compat.c
index 015e1e1f87c6..fe40fde29111 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1278,8 +1278,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1279 * O_LARGEFILE flag. 1279 * O_LARGEFILE flag.
1280 */ 1280 */
1281asmlinkage long 1281COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1282compat_sys_open(const char __user *filename, int flags, umode_t mode)
1283{ 1282{
1284 return do_sys_open(AT_FDCWD, filename, flags, mode); 1283 return do_sys_open(AT_FDCWD, filename, flags, mode);
1285} 1284}
@@ -1288,8 +1287,7 @@ compat_sys_open(const char __user *filename, int flags, umode_t mode)
1288 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the 1287 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
1289 * O_LARGEFILE flag. 1288 * O_LARGEFILE flag.
1290 */ 1289 */
1291asmlinkage long 1290COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1292compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
1293{ 1291{
1294 return do_sys_open(dfd, filename, flags, mode); 1292 return do_sys_open(dfd, filename, flags, mode);
1295} 1293}
@@ -1739,55 +1737,13 @@ asmlinkage long compat_sys_signalfd(int ufd,
1739} 1737}
1740#endif /* CONFIG_SIGNALFD */ 1738#endif /* CONFIG_SIGNALFD */
1741 1739
1742#ifdef CONFIG_TIMERFD
1743
1744asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
1745 const struct compat_itimerspec __user *utmr,
1746 struct compat_itimerspec __user *otmr)
1747{
1748 int error;
1749 struct itimerspec t;
1750 struct itimerspec __user *ut;
1751
1752 if (get_compat_itimerspec(&t, utmr))
1753 return -EFAULT;
1754 ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
1755 if (copy_to_user(&ut[0], &t, sizeof(t)))
1756 return -EFAULT;
1757 error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
1758 if (!error && otmr)
1759 error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
1760 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
1761
1762 return error;
1763}
1764
1765asmlinkage long compat_sys_timerfd_gettime(int ufd,
1766 struct compat_itimerspec __user *otmr)
1767{
1768 int error;
1769 struct itimerspec t;
1770 struct itimerspec __user *ut;
1771
1772 ut = compat_alloc_user_space(sizeof(struct itimerspec));
1773 error = sys_timerfd_gettime(ufd, ut);
1774 if (!error)
1775 error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
1776 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
1777
1778 return error;
1779}
1780
1781#endif /* CONFIG_TIMERFD */
1782
1783#ifdef CONFIG_FHANDLE 1740#ifdef CONFIG_FHANDLE
1784/* 1741/*
1785 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it 1742 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
1786 * doesn't set the O_LARGEFILE flag. 1743 * doesn't set the O_LARGEFILE flag.
1787 */ 1744 */
1788asmlinkage long 1745COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
1789compat_sys_open_by_handle_at(int mountdirfd, 1746 struct file_handle __user *, handle, int, flags)
1790 struct file_handle __user *handle, int flags)
1791{ 1747{
1792 return do_handle_open(mountdirfd, handle, flags); 1748 return do_handle_open(mountdirfd, handle, flags);
1793} 1749}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 712b10f64c70..e9dcfa3c208c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1037,10 +1037,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
1037static int configfs_depend_prep(struct dentry *origin, 1037static int configfs_depend_prep(struct dentry *origin,
1038 struct config_item *target) 1038 struct config_item *target)
1039{ 1039{
1040 struct configfs_dirent *child_sd, *sd = origin->d_fsdata; 1040 struct configfs_dirent *child_sd, *sd;
1041 int ret = 0; 1041 int ret = 0;
1042 1042
1043 BUG_ON(!origin || !sd); 1043 BUG_ON(!origin || !origin->d_fsdata);
1044 sd = origin->d_fsdata;
1044 1045
1045 if (sd->s_element == target) /* Boo-yah */ 1046 if (sd->s_element == target) /* Boo-yah */
1046 goto out; 1047 goto out;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a5f12b7e228d..0c4f80b447fb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -322,7 +322,6 @@ static struct dentry *__create_file(const char *name, umode_t mode,
322 if (!parent) 322 if (!parent)
323 parent = debugfs_mount->mnt_root; 323 parent = debugfs_mount->mnt_root;
324 324
325 dentry = NULL;
326 mutex_lock(&parent->d_inode->i_mutex); 325 mutex_lock(&parent->d_inode->i_mutex);
327 dentry = lookup_one_len(name, parent, strlen(name)); 326 dentry = lookup_one_len(name, parent, strlen(name));
328 if (!IS_ERR(dentry)) { 327 if (!IS_ERR(dentry)) {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 472e6befc54d..073d30b9d1ac 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -243,6 +243,13 @@ static int mknod_ptmx(struct super_block *sb)
243 struct dentry *root = sb->s_root; 243 struct dentry *root = sb->s_root;
244 struct pts_fs_info *fsi = DEVPTS_SB(sb); 244 struct pts_fs_info *fsi = DEVPTS_SB(sb);
245 struct pts_mount_opts *opts = &fsi->mount_opts; 245 struct pts_mount_opts *opts = &fsi->mount_opts;
246 kuid_t root_uid;
247 kgid_t root_gid;
248
249 root_uid = make_kuid(current_user_ns(), 0);
250 root_gid = make_kgid(current_user_ns(), 0);
251 if (!uid_valid(root_uid) || !gid_valid(root_gid))
252 return -EINVAL;
246 253
247 mutex_lock(&root->d_inode->i_mutex); 254 mutex_lock(&root->d_inode->i_mutex);
248 255
@@ -273,6 +280,8 @@ static int mknod_ptmx(struct super_block *sb)
273 280
274 mode = S_IFCHR|opts->ptmxmode; 281 mode = S_IFCHR|opts->ptmxmode;
275 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); 282 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
283 inode->i_uid = root_uid;
284 inode->i_gid = root_gid;
276 285
277 d_add(dentry, inode); 286 d_add(dentry, inode);
278 287
@@ -438,6 +447,12 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
438 if (error) 447 if (error)
439 return ERR_PTR(error); 448 return ERR_PTR(error);
440 449
450 /* Require newinstance for all user namespace mounts to ensure
451 * the mount options are not changed.
452 */
453 if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
454 return ERR_PTR(-EINVAL);
455
441 if (opts.newinstance) 456 if (opts.newinstance)
442 s = sget(fs_type, NULL, set_anon_super, flags, NULL); 457 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
443 else 458 else
@@ -491,6 +506,9 @@ static struct file_system_type devpts_fs_type = {
491 .name = "devpts", 506 .name = "devpts",
492 .mount = devpts_mount, 507 .mount = devpts_mount,
493 .kill_sb = devpts_kill_sb, 508 .kill_sb = devpts_kill_sb,
509#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
510 .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
511#endif
494}; 512};
495 513
496/* 514/*
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 77c0f70f8fe8..e7665c31f7b1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -96,10 +96,13 @@ do { \
96} 96}
97 97
98 98
99#define DLM_RTF_SHRINK 0x00000001
100
99struct dlm_rsbtable { 101struct dlm_rsbtable {
100 struct rb_root keep; 102 struct rb_root keep;
101 struct rb_root toss; 103 struct rb_root toss;
102 spinlock_t lock; 104 spinlock_t lock;
105 uint32_t flags;
103}; 106};
104 107
105 108
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index a579f30f237d..f7501651762d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1132,6 +1132,7 @@ static void toss_rsb(struct kref *kref)
1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); 1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); 1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
1134 r->res_toss_time = jiffies; 1134 r->res_toss_time = jiffies;
1135 ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
1135 if (r->res_lvbptr) { 1136 if (r->res_lvbptr) {
1136 dlm_free_lvb(r->res_lvbptr); 1137 dlm_free_lvb(r->res_lvbptr);
1137 r->res_lvbptr = NULL; 1138 r->res_lvbptr = NULL;
@@ -1659,11 +1660,18 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1659 char *name; 1660 char *name;
1660 int our_nodeid = dlm_our_nodeid(); 1661 int our_nodeid = dlm_our_nodeid();
1661 int remote_count = 0; 1662 int remote_count = 0;
1663 int need_shrink = 0;
1662 int i, len, rv; 1664 int i, len, rv;
1663 1665
1664 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX); 1666 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
1665 1667
1666 spin_lock(&ls->ls_rsbtbl[b].lock); 1668 spin_lock(&ls->ls_rsbtbl[b].lock);
1669
1670 if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
1671 spin_unlock(&ls->ls_rsbtbl[b].lock);
1672 return;
1673 }
1674
1667 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) { 1675 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
1668 next = rb_next(n); 1676 next = rb_next(n);
1669 r = rb_entry(n, struct dlm_rsb, res_hashnode); 1677 r = rb_entry(n, struct dlm_rsb, res_hashnode);
@@ -1679,6 +1687,8 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1679 continue; 1687 continue;
1680 } 1688 }
1681 1689
1690 need_shrink = 1;
1691
1682 if (!time_after_eq(jiffies, r->res_toss_time + 1692 if (!time_after_eq(jiffies, r->res_toss_time +
1683 dlm_config.ci_toss_secs * HZ)) { 1693 dlm_config.ci_toss_secs * HZ)) {
1684 continue; 1694 continue;
@@ -1710,6 +1720,11 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1710 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 1720 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1711 dlm_free_rsb(r); 1721 dlm_free_rsb(r);
1712 } 1722 }
1723
1724 if (need_shrink)
1725 ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
1726 else
1727 ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
1713 spin_unlock(&ls->ls_rsbtbl[b].lock); 1728 spin_unlock(&ls->ls_rsbtbl[b].lock);
1714 1729
1715 /* 1730 /*
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
503#endif 503#endif
504 return -EINVAL; 504 return -EINVAL;
505 505
506#ifdef CONFIG_COMPAT 506 /*
507 if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) 507 * can't compare against COMPAT/dlm_write_request32 because
508#else 508 * we don't yet know if is64bit is zero
509 */
509 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) 510 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
510#endif
511 return -EINVAL; 511 return -EINVAL;
512 512
513 kbuf = kzalloc(count + 1, GFP_NOFS); 513 kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index cc16562654de..e15ef38c24fa 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
1config ECRYPT_FS 1config ECRYPT_FS
2 tristate "eCrypt filesystem layer support (EXPERIMENTAL)" 2 tristate "eCrypt filesystem layer support"
3 depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) 3 depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
4 select CRYPTO_ECB 4 select CRYPTO_ECB
5 select CRYPTO_CBC 5 select CRYPTO_CBC
6 select CRYPTO_MD5 6 select CRYPTO_MD5
diff --git a/fs/efs/Kconfig b/fs/efs/Kconfig
index 6ebfc1c207a8..d020e3c30fea 100644
--- a/fs/efs/Kconfig
+++ b/fs/efs/Kconfig
@@ -1,6 +1,6 @@
1config EFS_FS 1config EFS_FS
2 tristate "EFS file system support (read only) (EXPERIMENTAL)" 2 tristate "EFS file system support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard 5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard
6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer 6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0926fe46ae3e..5546ca225ffe 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2067,6 +2067,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2068 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2068 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2069 "writeback"); 2069 "writeback");
2070 sb->s_flags |= MS_SNAP_STABLE;
2070 2071
2071 return 0; 2072 return 0;
2072 2073
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..cd818d8bb221 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4968,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4968 0, len, NULL, 4968 0, len, NULL,
4969 ext4_bh_unmapped)) { 4969 ext4_bh_unmapped)) {
4970 /* Wait so that we don't change page under IO */ 4970 /* Wait so that we don't change page under IO */
4971 wait_on_page_writeback(page); 4971 wait_for_stable_page(page);
4972 ret = VM_FAULT_LOCKED; 4972 ret = VM_FAULT_LOCKED;
4973 goto out; 4973 goto out;
4974 } 4974 }
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e95b94945d5f..137af4255da6 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -191,15 +191,14 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
191 retval = f2fs_getxattr(inode, name_index, "", value, retval); 191 retval = f2fs_getxattr(inode, name_index, "", value, retval);
192 } 192 }
193 193
194 if (retval < 0) { 194 if (retval > 0)
195 if (retval == -ENODATA)
196 acl = NULL;
197 else
198 acl = ERR_PTR(retval);
199 } else {
200 acl = f2fs_acl_from_disk(value, retval); 195 acl = f2fs_acl_from_disk(value, retval);
201 } 196 else if (retval == -ENODATA)
197 acl = NULL;
198 else
199 acl = ERR_PTR(retval);
202 kfree(value); 200 kfree(value);
201
203 if (!IS_ERR(acl)) 202 if (!IS_ERR(acl))
204 set_cached_acl(inode, type, acl); 203 set_cached_acl(inode, type, acl);
205 204
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6ef36c37e2be..2b6fc131e2ce 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,22 +72,22 @@ static int f2fs_write_meta_page(struct page *page,
72{ 72{
73 struct inode *inode = page->mapping->host; 73 struct inode *inode = page->mapping->host;
74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
75 int err;
76 75
77 wait_on_page_writeback(page); 76 /* Should not write any meta pages, if any IO error was occurred */
78 77 if (wbc->for_reclaim ||
79 err = write_meta_page(sbi, page, wbc); 78 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
80 if (err) { 79 dec_page_count(sbi, F2FS_DIRTY_META);
81 wbc->pages_skipped++; 80 wbc->pages_skipped++;
82 set_page_dirty(page); 81 set_page_dirty(page);
82 return AOP_WRITEPAGE_ACTIVATE;
83 } 83 }
84 84
85 dec_page_count(sbi, F2FS_DIRTY_META); 85 wait_on_page_writeback(page);
86 86
87 /* In this case, we should not unlock this page */ 87 write_meta_page(sbi, page);
88 if (err != AOP_WRITEPAGE_ACTIVATE) 88 dec_page_count(sbi, F2FS_DIRTY_META);
89 unlock_page(page); 89 unlock_page(page);
90 return err; 90 return 0;
91} 91}
92 92
93static int f2fs_write_meta_pages(struct address_space *mapping, 93static int f2fs_write_meta_pages(struct address_space *mapping,
@@ -138,7 +138,10 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
138 BUG_ON(page->mapping != mapping); 138 BUG_ON(page->mapping != mapping);
139 BUG_ON(!PageDirty(page)); 139 BUG_ON(!PageDirty(page));
140 clear_page_dirty_for_io(page); 140 clear_page_dirty_for_io(page);
141 f2fs_write_meta_page(page, &wbc); 141 if (f2fs_write_meta_page(page, &wbc)) {
142 unlock_page(page);
143 break;
144 }
142 if (nwritten++ >= nr_to_write) 145 if (nwritten++ >= nr_to_write)
143 break; 146 break;
144 } 147 }
@@ -161,7 +164,6 @@ static int f2fs_set_meta_page_dirty(struct page *page)
161 if (!PageDirty(page)) { 164 if (!PageDirty(page)) {
162 __set_page_dirty_nobuffers(page); 165 __set_page_dirty_nobuffers(page);
163 inc_page_count(sbi, F2FS_DIRTY_META); 166 inc_page_count(sbi, F2FS_DIRTY_META);
164 F2FS_SET_SB_DIRT(sbi);
165 return 1; 167 return 1;
166 } 168 }
167 return 0; 169 return 0;
@@ -214,22 +216,13 @@ retry:
214 goto retry; 216 goto retry;
215 } 217 }
216 new->ino = ino; 218 new->ino = ino;
217 INIT_LIST_HEAD(&new->list);
218 219
219 /* add new_oentry into list which is sorted by inode number */ 220 /* add new_oentry into list which is sorted by inode number */
220 if (orphan) { 221 if (orphan)
221 struct orphan_inode_entry *prev; 222 list_add(&new->list, this->prev);
222 223 else
223 /* get previous entry */
224 prev = list_entry(orphan->list.prev, typeof(*prev), list);
225 if (&prev->list != head)
226 /* insert new orphan inode entry */
227 list_add(&new->list, &prev->list);
228 else
229 list_add(&new->list, head);
230 } else {
231 list_add_tail(&new->list, head); 224 list_add_tail(&new->list, head);
232 } 225
233 sbi->n_orphans++; 226 sbi->n_orphans++;
234out: 227out:
235 mutex_unlock(&sbi->orphan_inode_mutex); 228 mutex_unlock(&sbi->orphan_inode_mutex);
@@ -546,7 +539,7 @@ retry:
546/* 539/*
547 * Freeze all the FS-operations for checkpoint. 540 * Freeze all the FS-operations for checkpoint.
548 */ 541 */
549void block_operations(struct f2fs_sb_info *sbi) 542static void block_operations(struct f2fs_sb_info *sbi)
550{ 543{
551 int t; 544 int t;
552 struct writeback_control wbc = { 545 struct writeback_control wbc = {
@@ -718,27 +711,24 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
718 sbi->alloc_valid_block_count = 0; 711 sbi->alloc_valid_block_count = 0;
719 712
720 /* Here, we only have one bio having CP pack */ 713 /* Here, we only have one bio having CP pack */
721 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) 714 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
722 sbi->sb->s_flags |= MS_RDONLY;
723 else
724 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
725 715
726 clear_prefree_segments(sbi); 716 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
727 F2FS_RESET_SB_DIRT(sbi); 717 clear_prefree_segments(sbi);
718 F2FS_RESET_SB_DIRT(sbi);
719 }
728} 720}
729 721
730/* 722/*
731 * We guarantee that this checkpoint procedure should not fail. 723 * We guarantee that this checkpoint procedure should not fail.
732 */ 724 */
733void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount) 725void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
734{ 726{
735 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 727 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
736 unsigned long long ckpt_ver; 728 unsigned long long ckpt_ver;
737 729
738 if (!blocked) { 730 mutex_lock(&sbi->cp_mutex);
739 mutex_lock(&sbi->cp_mutex); 731 block_operations(sbi);
740 block_operations(sbi);
741 }
742 732
743 f2fs_submit_bio(sbi, DATA, true); 733 f2fs_submit_bio(sbi, DATA, true);
744 f2fs_submit_bio(sbi, NODE, true); 734 f2fs_submit_bio(sbi, NODE, true);
@@ -772,7 +762,7 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
772 sbi->n_orphans = 0; 762 sbi->n_orphans = 0;
773} 763}
774 764
775int create_checkpoint_caches(void) 765int __init create_checkpoint_caches(void)
776{ 766{
777 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 767 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
778 sizeof(struct orphan_inode_entry), NULL); 768 sizeof(struct orphan_inode_entry), NULL);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3aa5ce7cab83..7bd22a201125 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -547,6 +547,15 @@ redirty_out:
547 547
548#define MAX_DESIRED_PAGES_WP 4096 548#define MAX_DESIRED_PAGES_WP 4096
549 549
550static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
551 void *data)
552{
553 struct address_space *mapping = data;
554 int ret = mapping->a_ops->writepage(page, wbc);
555 mapping_set_error(mapping, ret);
556 return ret;
557}
558
550static int f2fs_write_data_pages(struct address_space *mapping, 559static int f2fs_write_data_pages(struct address_space *mapping,
551 struct writeback_control *wbc) 560 struct writeback_control *wbc)
552{ 561{
@@ -563,7 +572,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
563 572
564 if (!S_ISDIR(inode->i_mode)) 573 if (!S_ISDIR(inode->i_mode))
565 mutex_lock(&sbi->writepages); 574 mutex_lock(&sbi->writepages);
566 ret = generic_writepages(mapping, wbc); 575 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
567 if (!S_ISDIR(inode->i_mode)) 576 if (!S_ISDIR(inode->i_mode))
568 mutex_unlock(&sbi->writepages); 577 mutex_unlock(&sbi->writepages);
569 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 578 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
@@ -689,6 +698,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
689 return 0; 698 return 0;
690} 699}
691 700
701static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
702{
703 return generic_block_bmap(mapping, block, get_data_block_ro);
704}
705
692const struct address_space_operations f2fs_dblock_aops = { 706const struct address_space_operations f2fs_dblock_aops = {
693 .readpage = f2fs_read_data_page, 707 .readpage = f2fs_read_data_page,
694 .readpages = f2fs_read_data_pages, 708 .readpages = f2fs_read_data_pages,
@@ -700,4 +714,5 @@ const struct address_space_operations f2fs_dblock_aops = {
700 .invalidatepage = f2fs_invalidate_data_page, 714 .invalidatepage = f2fs_invalidate_data_page,
701 .releasepage = f2fs_release_data_page, 715 .releasepage = f2fs_release_data_page,
702 .direct_IO = f2fs_direct_IO, 716 .direct_IO = f2fs_direct_IO,
717 .bmap = f2fs_bmap,
703}; 718};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0e0380a588ad..025b9e2f935d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -26,6 +26,7 @@
26 26
27static LIST_HEAD(f2fs_stat_list); 27static LIST_HEAD(f2fs_stat_list);
28static struct dentry *debugfs_root; 28static struct dentry *debugfs_root;
29static DEFINE_MUTEX(f2fs_stat_mutex);
29 30
30static void update_general_status(struct f2fs_sb_info *sbi) 31static void update_general_status(struct f2fs_sb_info *sbi)
31{ 32{
@@ -180,18 +181,16 @@ static int stat_show(struct seq_file *s, void *v)
180 int i = 0; 181 int i = 0;
181 int j; 182 int j;
182 183
184 mutex_lock(&f2fs_stat_mutex);
183 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { 185 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
186 char devname[BDEVNAME_SIZE];
184 187
185 mutex_lock(&si->stat_lock);
186 if (!si->sbi) {
187 mutex_unlock(&si->stat_lock);
188 continue;
189 }
190 update_general_status(si->sbi); 188 update_general_status(si->sbi);
191 189
192 seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++); 190 seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n",
193 seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ", 191 bdevname(si->sbi->sb->s_bdev, devname), i++);
194 si->nat_area_segs, si->sit_area_segs); 192 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
193 si->sit_area_segs, si->nat_area_segs);
195 seq_printf(s, "[SSA: %d] [MAIN: %d", 194 seq_printf(s, "[SSA: %d] [MAIN: %d",
196 si->ssa_area_segs, si->main_area_segs); 195 si->ssa_area_segs, si->main_area_segs);
197 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", 196 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
@@ -286,8 +285,8 @@ static int stat_show(struct seq_file *s, void *v)
286 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", 285 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
287 (si->base_mem + si->cache_mem) >> 10, 286 (si->base_mem + si->cache_mem) >> 10,
288 si->base_mem >> 10, si->cache_mem >> 10); 287 si->base_mem >> 10, si->cache_mem >> 10);
289 mutex_unlock(&si->stat_lock);
290 } 288 }
289 mutex_unlock(&f2fs_stat_mutex);
291 return 0; 290 return 0;
292} 291}
293 292
@@ -303,7 +302,7 @@ static const struct file_operations stat_fops = {
303 .release = single_release, 302 .release = single_release,
304}; 303};
305 304
306static int init_stats(struct f2fs_sb_info *sbi) 305int f2fs_build_stats(struct f2fs_sb_info *sbi)
307{ 306{
308 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 307 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
309 struct f2fs_stat_info *si; 308 struct f2fs_stat_info *si;
@@ -313,9 +312,6 @@ static int init_stats(struct f2fs_sb_info *sbi)
313 return -ENOMEM; 312 return -ENOMEM;
314 313
315 si = sbi->stat_info; 314 si = sbi->stat_info;
316 mutex_init(&si->stat_lock);
317 list_add_tail(&si->stat_list, &f2fs_stat_list);
318
319 si->all_area_segs = le32_to_cpu(raw_super->segment_count); 315 si->all_area_segs = le32_to_cpu(raw_super->segment_count);
320 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); 316 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
321 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); 317 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -325,21 +321,11 @@ static int init_stats(struct f2fs_sb_info *sbi)
325 si->main_area_zones = si->main_area_sections / 321 si->main_area_zones = si->main_area_sections /
326 le32_to_cpu(raw_super->secs_per_zone); 322 le32_to_cpu(raw_super->secs_per_zone);
327 si->sbi = sbi; 323 si->sbi = sbi;
328 return 0;
329}
330 324
331int f2fs_build_stats(struct f2fs_sb_info *sbi) 325 mutex_lock(&f2fs_stat_mutex);
332{ 326 list_add_tail(&si->stat_list, &f2fs_stat_list);
333 int retval; 327 mutex_unlock(&f2fs_stat_mutex);
334
335 retval = init_stats(sbi);
336 if (retval)
337 return retval;
338
339 if (!debugfs_root)
340 debugfs_root = debugfs_create_dir("f2fs", NULL);
341 328
342 debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
343 return 0; 329 return 0;
344} 330}
345 331
@@ -347,14 +333,22 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
347{ 333{
348 struct f2fs_stat_info *si = sbi->stat_info; 334 struct f2fs_stat_info *si = sbi->stat_info;
349 335
336 mutex_lock(&f2fs_stat_mutex);
350 list_del(&si->stat_list); 337 list_del(&si->stat_list);
351 mutex_lock(&si->stat_lock); 338 mutex_unlock(&f2fs_stat_mutex);
352 si->sbi = NULL; 339
353 mutex_unlock(&si->stat_lock);
354 kfree(sbi->stat_info); 340 kfree(sbi->stat_info);
355} 341}
356 342
357void destroy_root_stats(void) 343void __init f2fs_create_root_stats(void)
344{
345 debugfs_root = debugfs_create_dir("f2fs", NULL);
346 if (debugfs_root)
347 debugfs_create_file("status", S_IRUGO, debugfs_root,
348 NULL, &stat_fops);
349}
350
351void f2fs_destroy_root_stats(void)
358{ 352{
359 debugfs_remove_recursive(debugfs_root); 353 debugfs_remove_recursive(debugfs_root);
360 debugfs_root = NULL; 354 debugfs_root = NULL;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 951ed52748f6..c395c5012973 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -265,7 +265,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
265 mutex_unlock_op(sbi, DENTRY_OPS); 265 mutex_unlock_op(sbi, DENTRY_OPS);
266} 266}
267 267
268void init_dent_inode(struct dentry *dentry, struct page *ipage) 268void init_dent_inode(const struct qstr *name, struct page *ipage)
269{ 269{
270 struct f2fs_node *rn; 270 struct f2fs_node *rn;
271 271
@@ -274,20 +274,19 @@ void init_dent_inode(struct dentry *dentry, struct page *ipage)
274 274
275 wait_on_page_writeback(ipage); 275 wait_on_page_writeback(ipage);
276 276
277 /* copy dentry info. to this inode page */ 277 /* copy name info. to this inode page */
278 rn = (struct f2fs_node *)page_address(ipage); 278 rn = (struct f2fs_node *)page_address(ipage);
279 rn->i.i_namelen = cpu_to_le32(dentry->d_name.len); 279 rn->i.i_namelen = cpu_to_le32(name->len);
280 memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len); 280 memcpy(rn->i.i_name, name->name, name->len);
281 set_page_dirty(ipage); 281 set_page_dirty(ipage);
282} 282}
283 283
284static int init_inode_metadata(struct inode *inode, struct dentry *dentry) 284static int init_inode_metadata(struct inode *inode,
285 struct inode *dir, const struct qstr *name)
285{ 286{
286 struct inode *dir = dentry->d_parent->d_inode;
287
288 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 287 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
289 int err; 288 int err;
290 err = new_inode_page(inode, dentry); 289 err = new_inode_page(inode, name);
291 if (err) 290 if (err)
292 return err; 291 return err;
293 292
@@ -310,7 +309,7 @@ static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
310 if (IS_ERR(ipage)) 309 if (IS_ERR(ipage))
311 return PTR_ERR(ipage); 310 return PTR_ERR(ipage);
312 set_cold_node(inode, ipage); 311 set_cold_node(inode, ipage);
313 init_dent_inode(dentry, ipage); 312 init_dent_inode(name, ipage);
314 f2fs_put_page(ipage, 1); 313 f2fs_put_page(ipage, 1);
315 } 314 }
316 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 315 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
@@ -371,7 +370,7 @@ next:
371 goto next; 370 goto next;
372} 371}
373 372
374int f2fs_add_link(struct dentry *dentry, struct inode *inode) 373int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
375{ 374{
376 unsigned int bit_pos; 375 unsigned int bit_pos;
377 unsigned int level; 376 unsigned int level;
@@ -380,17 +379,15 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
380 f2fs_hash_t dentry_hash; 379 f2fs_hash_t dentry_hash;
381 struct f2fs_dir_entry *de; 380 struct f2fs_dir_entry *de;
382 unsigned int nbucket, nblock; 381 unsigned int nbucket, nblock;
383 struct inode *dir = dentry->d_parent->d_inode;
384 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
385 const char *name = dentry->d_name.name; 383 size_t namelen = name->len;
386 size_t namelen = dentry->d_name.len;
387 struct page *dentry_page = NULL; 384 struct page *dentry_page = NULL;
388 struct f2fs_dentry_block *dentry_blk = NULL; 385 struct f2fs_dentry_block *dentry_blk = NULL;
389 int slots = GET_DENTRY_SLOTS(namelen); 386 int slots = GET_DENTRY_SLOTS(namelen);
390 int err = 0; 387 int err = 0;
391 int i; 388 int i;
392 389
393 dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len); 390 dentry_hash = f2fs_dentry_hash(name->name, name->len);
394 level = 0; 391 level = 0;
395 current_depth = F2FS_I(dir)->i_current_depth; 392 current_depth = F2FS_I(dir)->i_current_depth;
396 if (F2FS_I(dir)->chash == dentry_hash) { 393 if (F2FS_I(dir)->chash == dentry_hash) {
@@ -433,7 +430,7 @@ start:
433 ++level; 430 ++level;
434 goto start; 431 goto start;
435add_dentry: 432add_dentry:
436 err = init_inode_metadata(inode, dentry); 433 err = init_inode_metadata(inode, dir, name);
437 if (err) 434 if (err)
438 goto fail; 435 goto fail;
439 436
@@ -442,7 +439,7 @@ add_dentry:
442 de = &dentry_blk->dentry[bit_pos]; 439 de = &dentry_blk->dentry[bit_pos];
443 de->hash_code = dentry_hash; 440 de->hash_code = dentry_hash;
444 de->name_len = cpu_to_le16(namelen); 441 de->name_len = cpu_to_le16(namelen);
445 memcpy(dentry_blk->filename[bit_pos], name, namelen); 442 memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
446 de->ino = cpu_to_le32(inode->i_ino); 443 de->ino = cpu_to_le32(inode->i_ino);
447 set_de_type(de, inode); 444 set_de_type(de, inode);
448 for (i = 0; i < slots; i++) 445 for (i = 0; i < slots; i++)
@@ -503,7 +500,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
503 } 500 }
504 501
505 if (inode) { 502 if (inode) {
506 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 503 inode->i_ctime = CURRENT_TIME;
507 drop_nlink(inode); 504 drop_nlink(inode);
508 if (S_ISDIR(inode->i_mode)) { 505 if (S_ISDIR(inode->i_mode)) {
509 drop_nlink(inode); 506 drop_nlink(inode);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 13c6dfbb7183..cc2213afdcc7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -104,6 +104,20 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
104} 104}
105 105
106/* 106/*
107 * ioctl commands
108 */
109#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
110#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
111
112#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
113/*
114 * ioctl commands in 32 bit emulation
115 */
116#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
117#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
118#endif
119
120/*
107 * For INODE and NODE manager 121 * For INODE and NODE manager
108 */ 122 */
109#define XATTR_NODE_OFFSET (-1) /* 123#define XATTR_NODE_OFFSET (-1) /*
@@ -141,7 +155,7 @@ struct f2fs_inode_info {
141 155
142 /* Use below internally in f2fs*/ 156 /* Use below internally in f2fs*/
143 unsigned long flags; /* use to pass per-file flags */ 157 unsigned long flags; /* use to pass per-file flags */
144 unsigned long long data_version;/* lastes version of data for fsync */ 158 unsigned long long data_version;/* latest version of data for fsync */
145 atomic_t dirty_dents; /* # of dirty dentry pages */ 159 atomic_t dirty_dents; /* # of dirty dentry pages */
146 f2fs_hash_t chash; /* hash value of given file name */ 160 f2fs_hash_t chash; /* hash value of given file name */
147 unsigned int clevel; /* maximum level of given file name */ 161 unsigned int clevel; /* maximum level of given file name */
@@ -211,11 +225,11 @@ struct dnode_of_data {
211static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, 225static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
212 struct page *ipage, struct page *npage, nid_t nid) 226 struct page *ipage, struct page *npage, nid_t nid)
213{ 227{
228 memset(dn, 0, sizeof(*dn));
214 dn->inode = inode; 229 dn->inode = inode;
215 dn->inode_page = ipage; 230 dn->inode_page = ipage;
216 dn->node_page = npage; 231 dn->node_page = npage;
217 dn->nid = nid; 232 dn->nid = nid;
218 dn->inode_page_locked = 0;
219} 233}
220 234
221/* 235/*
@@ -573,6 +587,14 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
573 return atomic_read(&sbi->nr_pages[count_type]); 587 return atomic_read(&sbi->nr_pages[count_type]);
574} 588}
575 589
590static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
591{
592 unsigned int pages_per_sec = sbi->segs_per_sec *
593 (1 << sbi->log_blocks_per_seg);
594 return ((get_pages(sbi, block_type) + pages_per_sec - 1)
595 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
596}
597
576static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) 598static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
577{ 599{
578 block_t ret; 600 block_t ret;
@@ -842,12 +864,12 @@ void f2fs_truncate(struct inode *);
842int f2fs_setattr(struct dentry *, struct iattr *); 864int f2fs_setattr(struct dentry *, struct iattr *);
843int truncate_hole(struct inode *, pgoff_t, pgoff_t); 865int truncate_hole(struct inode *, pgoff_t, pgoff_t);
844long f2fs_ioctl(struct file *, unsigned int, unsigned long); 866long f2fs_ioctl(struct file *, unsigned int, unsigned long);
867long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
845 868
846/* 869/*
847 * inode.c 870 * inode.c
848 */ 871 */
849void f2fs_set_inode_flags(struct inode *); 872void f2fs_set_inode_flags(struct inode *);
850struct inode *f2fs_iget_nowait(struct super_block *, unsigned long);
851struct inode *f2fs_iget(struct super_block *, unsigned long); 873struct inode *f2fs_iget(struct super_block *, unsigned long);
852void update_inode(struct inode *, struct page *); 874void update_inode(struct inode *, struct page *);
853int f2fs_write_inode(struct inode *, struct writeback_control *); 875int f2fs_write_inode(struct inode *, struct writeback_control *);
@@ -867,16 +889,24 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
867ino_t f2fs_inode_by_name(struct inode *, struct qstr *); 889ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
868void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 890void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
869 struct page *, struct inode *); 891 struct page *, struct inode *);
870void init_dent_inode(struct dentry *, struct page *); 892void init_dent_inode(const struct qstr *, struct page *);
871int f2fs_add_link(struct dentry *, struct inode *); 893int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
872void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); 894void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
873int f2fs_make_empty(struct inode *, struct inode *); 895int f2fs_make_empty(struct inode *, struct inode *);
874bool f2fs_empty_dir(struct inode *); 896bool f2fs_empty_dir(struct inode *);
875 897
898static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
899{
900 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
901 inode);
902}
903
876/* 904/*
877 * super.c 905 * super.c
878 */ 906 */
879int f2fs_sync_fs(struct super_block *, int); 907int f2fs_sync_fs(struct super_block *, int);
908extern __printf(3, 4)
909void f2fs_msg(struct super_block *, const char *, const char *, ...);
880 910
881/* 911/*
882 * hash.c 912 * hash.c
@@ -894,7 +924,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
894int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 924int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
895int truncate_inode_blocks(struct inode *, pgoff_t); 925int truncate_inode_blocks(struct inode *, pgoff_t);
896int remove_inode_page(struct inode *); 926int remove_inode_page(struct inode *);
897int new_inode_page(struct inode *, struct dentry *); 927int new_inode_page(struct inode *, const struct qstr *);
898struct page *new_node_page(struct dnode_of_data *, unsigned int); 928struct page *new_node_page(struct dnode_of_data *, unsigned int);
899void ra_node_page(struct f2fs_sb_info *, nid_t); 929void ra_node_page(struct f2fs_sb_info *, nid_t);
900struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 930struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -912,7 +942,7 @@ int restore_node_summary(struct f2fs_sb_info *, unsigned int,
912void flush_nat_entries(struct f2fs_sb_info *); 942void flush_nat_entries(struct f2fs_sb_info *);
913int build_node_manager(struct f2fs_sb_info *); 943int build_node_manager(struct f2fs_sb_info *);
914void destroy_node_manager(struct f2fs_sb_info *); 944void destroy_node_manager(struct f2fs_sb_info *);
915int create_node_manager_caches(void); 945int __init create_node_manager_caches(void);
916void destroy_node_manager_caches(void); 946void destroy_node_manager_caches(void);
917 947
918/* 948/*
@@ -927,8 +957,7 @@ void allocate_new_segments(struct f2fs_sb_info *);
927struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 957struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
928struct bio *f2fs_bio_alloc(struct block_device *, int); 958struct bio *f2fs_bio_alloc(struct block_device *, int);
929void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); 959void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
930int write_meta_page(struct f2fs_sb_info *, struct page *, 960void write_meta_page(struct f2fs_sb_info *, struct page *);
931 struct writeback_control *);
932void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, 961void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
933 block_t, block_t *); 962 block_t, block_t *);
934void write_data_page(struct inode *, struct page *, struct dnode_of_data*, 963void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
@@ -961,10 +990,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *);
961void set_dirty_dir_page(struct inode *, struct page *); 990void set_dirty_dir_page(struct inode *, struct page *);
962void remove_dirty_dir_inode(struct inode *); 991void remove_dirty_dir_inode(struct inode *);
963void sync_dirty_dir_inodes(struct f2fs_sb_info *); 992void sync_dirty_dir_inodes(struct f2fs_sb_info *);
964void block_operations(struct f2fs_sb_info *); 993void write_checkpoint(struct f2fs_sb_info *, bool);
965void write_checkpoint(struct f2fs_sb_info *, bool, bool);
966void init_orphan_info(struct f2fs_sb_info *); 994void init_orphan_info(struct f2fs_sb_info *);
967int create_checkpoint_caches(void); 995int __init create_checkpoint_caches(void);
968void destroy_checkpoint_caches(void); 996void destroy_checkpoint_caches(void);
969 997
970/* 998/*
@@ -984,9 +1012,9 @@ int do_write_data_page(struct page *);
984int start_gc_thread(struct f2fs_sb_info *); 1012int start_gc_thread(struct f2fs_sb_info *);
985void stop_gc_thread(struct f2fs_sb_info *); 1013void stop_gc_thread(struct f2fs_sb_info *);
986block_t start_bidx_of_node(unsigned int); 1014block_t start_bidx_of_node(unsigned int);
987int f2fs_gc(struct f2fs_sb_info *, int); 1015int f2fs_gc(struct f2fs_sb_info *);
988void build_gc_manager(struct f2fs_sb_info *); 1016void build_gc_manager(struct f2fs_sb_info *);
989int create_gc_caches(void); 1017int __init create_gc_caches(void);
990void destroy_gc_caches(void); 1018void destroy_gc_caches(void);
991 1019
992/* 1020/*
@@ -1058,7 +1086,8 @@ struct f2fs_stat_info {
1058 1086
1059int f2fs_build_stats(struct f2fs_sb_info *); 1087int f2fs_build_stats(struct f2fs_sb_info *);
1060void f2fs_destroy_stats(struct f2fs_sb_info *); 1088void f2fs_destroy_stats(struct f2fs_sb_info *);
1061void destroy_root_stats(void); 1089void __init f2fs_create_root_stats(void);
1090void f2fs_destroy_root_stats(void);
1062#else 1091#else
1063#define stat_inc_call_count(si) 1092#define stat_inc_call_count(si)
1064#define stat_inc_seg_count(si, type) 1093#define stat_inc_seg_count(si, type)
@@ -1068,7 +1097,8 @@ void destroy_root_stats(void);
1068 1097
1069static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 1098static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1070static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 1099static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
1071static inline void destroy_root_stats(void) { } 1100static inline void __init f2fs_create_root_stats(void) { }
1101static inline void f2fs_destroy_root_stats(void) { }
1072#endif 1102#endif
1073 1103
1074extern const struct file_operations f2fs_dir_operations; 1104extern const struct file_operations f2fs_dir_operations;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7f9ea9271ebe..b7a053d4c6d3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -15,6 +15,7 @@
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/falloc.h> 16#include <linux/falloc.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/compat.h>
18#include <linux/uaccess.h> 19#include <linux/uaccess.h>
19#include <linux/mount.h> 20#include <linux/mount.h>
20 21
@@ -96,8 +97,9 @@ out:
96} 97}
97 98
98static const struct vm_operations_struct f2fs_file_vm_ops = { 99static const struct vm_operations_struct f2fs_file_vm_ops = {
99 .fault = filemap_fault, 100 .fault = filemap_fault,
100 .page_mkwrite = f2fs_vm_page_mkwrite, 101 .page_mkwrite = f2fs_vm_page_mkwrite,
102 .remap_pages = generic_file_remap_pages,
101}; 103};
102 104
103static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode) 105static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
@@ -137,6 +139,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
137 if (ret) 139 if (ret)
138 return ret; 140 return ret;
139 141
142 /* guarantee free sections for fsync */
143 f2fs_balance_fs(sbi);
144
140 mutex_lock(&inode->i_mutex); 145 mutex_lock(&inode->i_mutex);
141 146
142 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 147 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
@@ -153,11 +158,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
153 158
154 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 159 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
155 need_cp = true; 160 need_cp = true;
156 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) 161 else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
157 need_cp = true; 162 need_cp = true;
158 if (!space_for_roll_forward(sbi)) 163 else if (!space_for_roll_forward(sbi))
159 need_cp = true; 164 need_cp = true;
160 if (need_to_sync_dir(sbi, inode)) 165 else if (need_to_sync_dir(sbi, inode))
161 need_cp = true; 166 need_cp = true;
162 167
163 if (need_cp) { 168 if (need_cp) {
@@ -294,8 +299,6 @@ void f2fs_truncate(struct inode *inode)
294 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 299 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
295 mark_inode_dirty(inode); 300 mark_inode_dirty(inode);
296 } 301 }
297
298 f2fs_balance_fs(F2FS_SB(inode->i_sb));
299} 302}
300 303
301static int f2fs_getattr(struct vfsmount *mnt, 304static int f2fs_getattr(struct vfsmount *mnt,
@@ -352,6 +355,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
352 attr->ia_size != i_size_read(inode)) { 355 attr->ia_size != i_size_read(inode)) {
353 truncate_setsize(inode, attr->ia_size); 356 truncate_setsize(inode, attr->ia_size);
354 f2fs_truncate(inode); 357 f2fs_truncate(inode);
358 f2fs_balance_fs(F2FS_SB(inode->i_sb));
355 } 359 }
356 360
357 __setattr_copy(inode, attr); 361 __setattr_copy(inode, attr);
@@ -383,12 +387,17 @@ const struct inode_operations f2fs_file_inode_operations = {
383static void fill_zero(struct inode *inode, pgoff_t index, 387static void fill_zero(struct inode *inode, pgoff_t index,
384 loff_t start, loff_t len) 388 loff_t start, loff_t len)
385{ 389{
390 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
386 struct page *page; 391 struct page *page;
387 392
388 if (!len) 393 if (!len)
389 return; 394 return;
390 395
396 f2fs_balance_fs(sbi);
397
398 mutex_lock_op(sbi, DATA_NEW);
391 page = get_new_data_page(inode, index, false); 399 page = get_new_data_page(inode, index, false);
400 mutex_unlock_op(sbi, DATA_NEW);
392 401
393 if (!IS_ERR(page)) { 402 if (!IS_ERR(page)) {
394 wait_on_page_writeback(page); 403 wait_on_page_writeback(page);
@@ -407,6 +416,8 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
407 struct dnode_of_data dn; 416 struct dnode_of_data dn;
408 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 417 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
409 418
419 f2fs_balance_fs(sbi);
420
410 mutex_lock_op(sbi, DATA_TRUNC); 421 mutex_lock_op(sbi, DATA_TRUNC);
411 set_new_dnode(&dn, inode, NULL, NULL, 0); 422 set_new_dnode(&dn, inode, NULL, NULL, 0);
412 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 423 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
@@ -534,7 +545,6 @@ static long f2fs_fallocate(struct file *file, int mode,
534 loff_t offset, loff_t len) 545 loff_t offset, loff_t len)
535{ 546{
536 struct inode *inode = file->f_path.dentry->d_inode; 547 struct inode *inode = file->f_path.dentry->d_inode;
537 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
538 long ret; 548 long ret;
539 549
540 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 550 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -545,7 +555,10 @@ static long f2fs_fallocate(struct file *file, int mode,
545 else 555 else
546 ret = expand_inode_data(inode, offset, len, mode); 556 ret = expand_inode_data(inode, offset, len, mode);
547 557
548 f2fs_balance_fs(sbi); 558 if (!ret) {
559 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
560 mark_inode_dirty(inode);
561 }
549 return ret; 562 return ret;
550} 563}
551 564
@@ -622,6 +635,23 @@ out:
622 } 635 }
623} 636}
624 637
638#ifdef CONFIG_COMPAT
639long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
640{
641 switch (cmd) {
642 case F2FS_IOC32_GETFLAGS:
643 cmd = F2FS_IOC_GETFLAGS;
644 break;
645 case F2FS_IOC32_SETFLAGS:
646 cmd = F2FS_IOC_SETFLAGS;
647 break;
648 default:
649 return -ENOIOCTLCMD;
650 }
651 return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
652}
653#endif
654
625const struct file_operations f2fs_file_operations = { 655const struct file_operations f2fs_file_operations = {
626 .llseek = generic_file_llseek, 656 .llseek = generic_file_llseek,
627 .read = do_sync_read, 657 .read = do_sync_read,
@@ -633,6 +663,9 @@ const struct file_operations f2fs_file_operations = {
633 .fsync = f2fs_sync_file, 663 .fsync = f2fs_sync_file,
634 .fallocate = f2fs_fallocate, 664 .fallocate = f2fs_fallocate,
635 .unlocked_ioctl = f2fs_ioctl, 665 .unlocked_ioctl = f2fs_ioctl,
666#ifdef CONFIG_COMPAT
667 .compat_ioctl = f2fs_compat_ioctl,
668#endif
636 .splice_read = generic_file_splice_read, 669 .splice_read = generic_file_splice_read,
637 .splice_write = generic_file_splice_write, 670 .splice_write = generic_file_splice_write,
638}; 671};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b0ec721e984a..94b8a0c48453 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -44,10 +44,10 @@ static int gc_thread_func(void *data)
44 if (kthread_should_stop()) 44 if (kthread_should_stop())
45 break; 45 break;
46 46
47 f2fs_balance_fs(sbi); 47 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
48 48 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
49 if (!test_opt(sbi, BG_GC))
50 continue; 49 continue;
50 }
51 51
52 /* 52 /*
53 * [GC triggering condition] 53 * [GC triggering condition]
@@ -78,7 +78,8 @@ static int gc_thread_func(void *data)
78 78
79 sbi->bg_gc++; 79 sbi->bg_gc++;
80 80
81 if (f2fs_gc(sbi, 1) == GC_NONE) 81 /* if return value is not zero, no victim was selected */
82 if (f2fs_gc(sbi))
82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME; 83 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) 84 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
84 wait_ms = GC_THREAD_MAX_SLEEP_TIME; 85 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
@@ -90,7 +91,10 @@ static int gc_thread_func(void *data)
90int start_gc_thread(struct f2fs_sb_info *sbi) 91int start_gc_thread(struct f2fs_sb_info *sbi)
91{ 92{
92 struct f2fs_gc_kthread *gc_th; 93 struct f2fs_gc_kthread *gc_th;
94 dev_t dev = sbi->sb->s_bdev->bd_dev;
93 95
96 if (!test_opt(sbi, BG_GC))
97 return 0;
94 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 98 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
95 if (!gc_th) 99 if (!gc_th)
96 return -ENOMEM; 100 return -ENOMEM;
@@ -98,9 +102,10 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
98 sbi->gc_thread = gc_th; 102 sbi->gc_thread = gc_th;
99 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); 103 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
100 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, 104 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
101 GC_THREAD_NAME); 105 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
102 if (IS_ERR(gc_th->f2fs_gc_task)) { 106 if (IS_ERR(gc_th->f2fs_gc_task)) {
103 kfree(gc_th); 107 kfree(gc_th);
108 sbi->gc_thread = NULL;
104 return -ENOMEM; 109 return -ENOMEM;
105 } 110 }
106 return 0; 111 return 0;
@@ -141,6 +146,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
141static unsigned int get_max_cost(struct f2fs_sb_info *sbi, 146static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
142 struct victim_sel_policy *p) 147 struct victim_sel_policy *p)
143{ 148{
149 /* SSR allocates in a segment unit */
150 if (p->alloc_mode == SSR)
151 return 1 << sbi->log_blocks_per_seg;
144 if (p->gc_mode == GC_GREEDY) 152 if (p->gc_mode == GC_GREEDY)
145 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; 153 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
146 else if (p->gc_mode == GC_CB) 154 else if (p->gc_mode == GC_CB)
@@ -356,7 +364,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
356 sentry = get_seg_entry(sbi, segno); 364 sentry = get_seg_entry(sbi, segno);
357 ret = f2fs_test_bit(offset, sentry->cur_valid_map); 365 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
358 mutex_unlock(&sit_i->sentry_lock); 366 mutex_unlock(&sit_i->sentry_lock);
359 return ret ? GC_OK : GC_NEXT; 367 return ret;
360} 368}
361 369
362/* 370/*
@@ -364,7 +372,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
364 * On validity, copy that node with cold status, otherwise (invalid node) 372 * On validity, copy that node with cold status, otherwise (invalid node)
365 * ignore that. 373 * ignore that.
366 */ 374 */
367static int gc_node_segment(struct f2fs_sb_info *sbi, 375static void gc_node_segment(struct f2fs_sb_info *sbi,
368 struct f2fs_summary *sum, unsigned int segno, int gc_type) 376 struct f2fs_summary *sum, unsigned int segno, int gc_type)
369{ 377{
370 bool initial = true; 378 bool initial = true;
@@ -376,21 +384,12 @@ next_step:
376 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 384 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
377 nid_t nid = le32_to_cpu(entry->nid); 385 nid_t nid = le32_to_cpu(entry->nid);
378 struct page *node_page; 386 struct page *node_page;
379 int err;
380 387
381 /* 388 /* stop BG_GC if there is not enough free sections. */
382 * It makes sure that free segments are able to write 389 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
383 * all the dirty node pages before CP after this CP. 390 return;
384 * So let's check the space of dirty node pages.
385 */
386 if (should_do_checkpoint(sbi)) {
387 mutex_lock(&sbi->cp_mutex);
388 block_operations(sbi);
389 return GC_BLOCKED;
390 }
391 391
392 err = check_valid_map(sbi, segno, off); 392 if (check_valid_map(sbi, segno, off) == 0)
393 if (err == GC_NEXT)
394 continue; 393 continue;
395 394
396 if (initial) { 395 if (initial) {
@@ -420,11 +419,14 @@ next_step:
420 }; 419 };
421 sync_node_pages(sbi, 0, &wbc); 420 sync_node_pages(sbi, 0, &wbc);
422 } 421 }
423 return GC_DONE;
424} 422}
425 423
426/* 424/*
427 * Calculate start block index that this node page contains 425 * Calculate start block index indicating the given node offset.
426 * Be careful, caller should give this node offset only indicating direct node
427 * blocks. If any node offsets, which point the other types of node blocks such
428 * as indirect or double indirect node blocks, are given, it must be a caller's
429 * bug.
428 */ 430 */
429block_t start_bidx_of_node(unsigned int node_ofs) 431block_t start_bidx_of_node(unsigned int node_ofs)
430{ 432{
@@ -459,13 +461,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
459 461
460 node_page = get_node_page(sbi, nid); 462 node_page = get_node_page(sbi, nid);
461 if (IS_ERR(node_page)) 463 if (IS_ERR(node_page))
462 return GC_NEXT; 464 return 0;
463 465
464 get_node_info(sbi, nid, dni); 466 get_node_info(sbi, nid, dni);
465 467
466 if (sum->version != dni->version) { 468 if (sum->version != dni->version) {
467 f2fs_put_page(node_page, 1); 469 f2fs_put_page(node_page, 1);
468 return GC_NEXT; 470 return 0;
469 } 471 }
470 472
471 *nofs = ofs_of_node(node_page); 473 *nofs = ofs_of_node(node_page);
@@ -473,8 +475,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
473 f2fs_put_page(node_page, 1); 475 f2fs_put_page(node_page, 1);
474 476
475 if (source_blkaddr != blkaddr) 477 if (source_blkaddr != blkaddr)
476 return GC_NEXT; 478 return 0;
477 return GC_OK; 479 return 1;
478} 480}
479 481
480static void move_data_page(struct inode *inode, struct page *page, int gc_type) 482static void move_data_page(struct inode *inode, struct page *page, int gc_type)
@@ -515,13 +517,13 @@ out:
515 * If the parent node is not valid or the data block address is different, 517 * If the parent node is not valid or the data block address is different,
516 * the victim data block is ignored. 518 * the victim data block is ignored.
517 */ 519 */
518static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 520static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
519 struct list_head *ilist, unsigned int segno, int gc_type) 521 struct list_head *ilist, unsigned int segno, int gc_type)
520{ 522{
521 struct super_block *sb = sbi->sb; 523 struct super_block *sb = sbi->sb;
522 struct f2fs_summary *entry; 524 struct f2fs_summary *entry;
523 block_t start_addr; 525 block_t start_addr;
524 int err, off; 526 int off;
525 int phase = 0; 527 int phase = 0;
526 528
527 start_addr = START_BLOCK(sbi, segno); 529 start_addr = START_BLOCK(sbi, segno);
@@ -535,20 +537,11 @@ next_step:
535 unsigned int ofs_in_node, nofs; 537 unsigned int ofs_in_node, nofs;
536 block_t start_bidx; 538 block_t start_bidx;
537 539
538 /* 540 /* stop BG_GC if there is not enough free sections. */
539 * It makes sure that free segments are able to write 541 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
540 * all the dirty node pages before CP after this CP. 542 return;
541 * So let's check the space of dirty node pages.
542 */
543 if (should_do_checkpoint(sbi)) {
544 mutex_lock(&sbi->cp_mutex);
545 block_operations(sbi);
546 err = GC_BLOCKED;
547 goto stop;
548 }
549 543
550 err = check_valid_map(sbi, segno, off); 544 if (check_valid_map(sbi, segno, off) == 0)
551 if (err == GC_NEXT)
552 continue; 545 continue;
553 546
554 if (phase == 0) { 547 if (phase == 0) {
@@ -557,8 +550,7 @@ next_step:
557 } 550 }
558 551
559 /* Get an inode by ino with checking validity */ 552 /* Get an inode by ino with checking validity */
560 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); 553 if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0)
561 if (err == GC_NEXT)
562 continue; 554 continue;
563 555
564 if (phase == 1) { 556 if (phase == 1) {
@@ -570,7 +562,7 @@ next_step:
570 ofs_in_node = le16_to_cpu(entry->ofs_in_node); 562 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
571 563
572 if (phase == 2) { 564 if (phase == 2) {
573 inode = f2fs_iget_nowait(sb, dni.ino); 565 inode = f2fs_iget(sb, dni.ino);
574 if (IS_ERR(inode)) 566 if (IS_ERR(inode))
575 continue; 567 continue;
576 568
@@ -598,11 +590,9 @@ next_iput:
598 } 590 }
599 if (++phase < 4) 591 if (++phase < 4)
600 goto next_step; 592 goto next_step;
601 err = GC_DONE; 593
602stop:
603 if (gc_type == FG_GC) 594 if (gc_type == FG_GC)
604 f2fs_submit_bio(sbi, DATA, true); 595 f2fs_submit_bio(sbi, DATA, true);
605 return err;
606} 596}
607 597
608static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 598static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -616,17 +606,16 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
616 return ret; 606 return ret;
617} 607}
618 608
619static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, 609static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
620 struct list_head *ilist, int gc_type) 610 struct list_head *ilist, int gc_type)
621{ 611{
622 struct page *sum_page; 612 struct page *sum_page;
623 struct f2fs_summary_block *sum; 613 struct f2fs_summary_block *sum;
624 int ret = GC_DONE;
625 614
626 /* read segment summary of victim */ 615 /* read segment summary of victim */
627 sum_page = get_sum_page(sbi, segno); 616 sum_page = get_sum_page(sbi, segno);
628 if (IS_ERR(sum_page)) 617 if (IS_ERR(sum_page))
629 return GC_ERROR; 618 return;
630 619
631 /* 620 /*
632 * CP needs to lock sum_page. In this time, we don't need 621 * CP needs to lock sum_page. In this time, we don't need
@@ -638,76 +627,55 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
638 627
639 switch (GET_SUM_TYPE((&sum->footer))) { 628 switch (GET_SUM_TYPE((&sum->footer))) {
640 case SUM_TYPE_NODE: 629 case SUM_TYPE_NODE:
641 ret = gc_node_segment(sbi, sum->entries, segno, gc_type); 630 gc_node_segment(sbi, sum->entries, segno, gc_type);
642 break; 631 break;
643 case SUM_TYPE_DATA: 632 case SUM_TYPE_DATA:
644 ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); 633 gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
645 break; 634 break;
646 } 635 }
647 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); 636 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
648 stat_inc_call_count(sbi->stat_info); 637 stat_inc_call_count(sbi->stat_info);
649 638
650 f2fs_put_page(sum_page, 0); 639 f2fs_put_page(sum_page, 0);
651 return ret;
652} 640}
653 641
654int f2fs_gc(struct f2fs_sb_info *sbi, int nGC) 642int f2fs_gc(struct f2fs_sb_info *sbi)
655{ 643{
656 unsigned int segno;
657 int old_free_secs, cur_free_secs;
658 int gc_status, nfree;
659 struct list_head ilist; 644 struct list_head ilist;
645 unsigned int segno, i;
660 int gc_type = BG_GC; 646 int gc_type = BG_GC;
647 int nfree = 0;
648 int ret = -1;
661 649
662 INIT_LIST_HEAD(&ilist); 650 INIT_LIST_HEAD(&ilist);
663gc_more: 651gc_more:
664 nfree = 0; 652 if (!(sbi->sb->s_flags & MS_ACTIVE))
665 gc_status = GC_NONE; 653 goto stop;
666 654
667 if (has_not_enough_free_secs(sbi)) 655 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree))
668 old_free_secs = reserved_sections(sbi); 656 gc_type = FG_GC;
669 else
670 old_free_secs = free_sections(sbi);
671 657
672 while (sbi->sb->s_flags & MS_ACTIVE) { 658 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
673 int i; 659 goto stop;
674 if (has_not_enough_free_secs(sbi)) 660 ret = 0;
675 gc_type = FG_GC;
676 661
677 cur_free_secs = free_sections(sbi) + nfree; 662 for (i = 0; i < sbi->segs_per_sec; i++)
663 do_garbage_collect(sbi, segno + i, &ilist, gc_type);
678 664
679 /* We got free space successfully. */ 665 if (gc_type == FG_GC &&
680 if (nGC < cur_free_secs - old_free_secs) 666 get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
681 break; 667 nfree++;
682 668
683 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) 669 if (has_not_enough_free_secs(sbi, nfree))
684 break; 670 goto gc_more;
685 671
686 for (i = 0; i < sbi->segs_per_sec; i++) { 672 if (gc_type == FG_GC)
687 /* 673 write_checkpoint(sbi, false);
688 * do_garbage_collect will give us three gc_status:
689 * GC_ERROR, GC_DONE, and GC_BLOCKED.
690 * If GC is finished uncleanly, we have to return
691 * the victim to dirty segment list.
692 */
693 gc_status = do_garbage_collect(sbi, segno + i,
694 &ilist, gc_type);
695 if (gc_status != GC_DONE)
696 goto stop;
697 nfree++;
698 }
699 }
700stop: 674stop:
701 if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
702 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
703 if (nfree)
704 goto gc_more;
705 }
706 mutex_unlock(&sbi->gc_mutex); 675 mutex_unlock(&sbi->gc_mutex);
707 676
708 put_gc_inode(&ilist); 677 put_gc_inode(&ilist);
709 BUG_ON(!list_empty(&ilist)); 678 return ret;
710 return gc_status;
711} 679}
712 680
713void build_gc_manager(struct f2fs_sb_info *sbi) 681void build_gc_manager(struct f2fs_sb_info *sbi)
@@ -715,7 +683,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
715 DIRTY_I(sbi)->v_ops = &default_v_ops; 683 DIRTY_I(sbi)->v_ops = &default_v_ops;
716} 684}
717 685
718int create_gc_caches(void) 686int __init create_gc_caches(void)
719{ 687{
720 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", 688 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
721 sizeof(struct inode_entry), NULL); 689 sizeof(struct inode_entry), NULL);
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b026d9354ccd..30b2db003acd 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -8,7 +8,6 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11#define GC_THREAD_NAME "f2fs_gc_task"
12#define GC_THREAD_MIN_WB_PAGES 1 /* 11#define GC_THREAD_MIN_WB_PAGES 1 /*
13 * a threshold to determine 12 * a threshold to determine
14 * whether IO subsystem is idle 13 * whether IO subsystem is idle
@@ -23,15 +22,6 @@
23/* Search max. number of dirty segments to select a victim segment */ 22/* Search max. number of dirty segments to select a victim segment */
24#define MAX_VICTIM_SEARCH 20 23#define MAX_VICTIM_SEARCH 20
25 24
26enum {
27 GC_NONE = 0,
28 GC_ERROR,
29 GC_OK,
30 GC_NEXT,
31 GC_BLOCKED,
32 GC_DONE,
33};
34
35struct f2fs_gc_kthread { 25struct f2fs_gc_kthread {
36 struct task_struct *f2fs_gc_task; 26 struct task_struct *f2fs_gc_task;
37 wait_queue_head_t gc_wait_queue_head; 27 wait_queue_head_t gc_wait_queue_head;
@@ -104,14 +94,3 @@ static inline int is_idle(struct f2fs_sb_info *sbi)
104 struct request_list *rl = &q->root_rl; 94 struct request_list *rl = &q->root_rl;
105 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); 95 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
106} 96}
107
108static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
109{
110 unsigned int pages_per_sec = sbi->segs_per_sec *
111 (1 << sbi->log_blocks_per_seg);
112 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
113 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
114 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
115 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
116 return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
117}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index bf20b4d03214..ddae412d30c8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -16,11 +16,6 @@
16#include "f2fs.h" 16#include "f2fs.h"
17#include "node.h" 17#include "node.h"
18 18
19struct f2fs_iget_args {
20 u64 ino;
21 int on_free;
22};
23
24void f2fs_set_inode_flags(struct inode *inode) 19void f2fs_set_inode_flags(struct inode *inode)
25{ 20{
26 unsigned int flags = F2FS_I(inode)->i_flags; 21 unsigned int flags = F2FS_I(inode)->i_flags;
@@ -40,34 +35,6 @@ void f2fs_set_inode_flags(struct inode *inode)
40 inode->i_flags |= S_DIRSYNC; 35 inode->i_flags |= S_DIRSYNC;
41} 36}
42 37
43static int f2fs_iget_test(struct inode *inode, void *data)
44{
45 struct f2fs_iget_args *args = data;
46
47 if (inode->i_ino != args->ino)
48 return 0;
49 if (inode->i_state & (I_FREEING | I_WILL_FREE)) {
50 args->on_free = 1;
51 return 0;
52 }
53 return 1;
54}
55
56struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
57{
58 struct f2fs_iget_args args = {
59 .ino = ino,
60 .on_free = 0
61 };
62 struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);
63
64 if (inode)
65 return inode;
66 if (!args.on_free)
67 return f2fs_iget(sb, ino);
68 return ERR_PTR(-ENOENT);
69}
70
71static int do_read_inode(struct inode *inode) 38static int do_read_inode(struct inode *inode)
72{ 39{
73 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 40 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -100,6 +67,10 @@ static int do_read_inode(struct inode *inode)
100 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); 67 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
101 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); 68 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
102 inode->i_generation = le32_to_cpu(ri->i_generation); 69 inode->i_generation = le32_to_cpu(ri->i_generation);
70 if (ri->i_addr[0])
71 inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
72 else
73 inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
103 74
104 fi->i_current_depth = le32_to_cpu(ri->i_current_depth); 75 fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
105 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); 76 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -203,6 +174,20 @@ void update_inode(struct inode *inode, struct page *node_page)
203 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); 174 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
204 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 175 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
205 ri->i_generation = cpu_to_le32(inode->i_generation); 176 ri->i_generation = cpu_to_le32(inode->i_generation);
177
178 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
179 if (old_valid_dev(inode->i_rdev)) {
180 ri->i_addr[0] =
181 cpu_to_le32(old_encode_dev(inode->i_rdev));
182 ri->i_addr[1] = 0;
183 } else {
184 ri->i_addr[0] = 0;
185 ri->i_addr[1] =
186 cpu_to_le32(new_encode_dev(inode->i_rdev));
187 ri->i_addr[2] = 0;
188 }
189 }
190
206 set_cold_node(inode, node_page); 191 set_cold_node(inode, node_page);
207 set_page_dirty(node_page); 192 set_page_dirty(node_page);
208} 193}
@@ -217,6 +202,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
217 inode->i_ino == F2FS_META_INO(sbi)) 202 inode->i_ino == F2FS_META_INO(sbi))
218 return 0; 203 return 0;
219 204
205 if (wbc)
206 f2fs_balance_fs(sbi);
207
220 node_page = get_node_page(sbi, inode->i_ino); 208 node_page = get_node_page(sbi, inode->i_ino);
221 if (IS_ERR(node_page)) 209 if (IS_ERR(node_page))
222 return PTR_ERR(node_page); 210 return PTR_ERR(node_page);
@@ -257,6 +245,7 @@ void f2fs_evict_inode(struct inode *inode)
257 if (inode->i_nlink || is_bad_inode(inode)) 245 if (inode->i_nlink || is_bad_inode(inode))
258 goto no_delete; 246 goto no_delete;
259 247
248 sb_start_intwrite(inode->i_sb);
260 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); 249 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
261 i_size_write(inode, 0); 250 i_size_write(inode, 0);
262 251
@@ -264,6 +253,7 @@ void f2fs_evict_inode(struct inode *inode)
264 f2fs_truncate(inode); 253 f2fs_truncate(inode);
265 254
266 remove_inode_page(inode); 255 remove_inode_page(inode);
256 sb_end_intwrite(inode->i_sb);
267no_delete: 257no_delete:
268 clear_inode(inode); 258 clear_inode(inode);
269} 259}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 5066bfd256c9..e275218904ed 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -104,7 +104,7 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
104 f2fs_put_page(page, 1); 104 f2fs_put_page(page, 1);
105 continue; 105 continue;
106 } 106 }
107 page_cache_release(page); 107 f2fs_put_page(page, 0);
108 } 108 }
109} 109}
110 110
@@ -660,7 +660,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
660 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 660 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
661 int err = 0, cont = 1; 661 int err = 0, cont = 1;
662 int level, offset[4], noffset[4]; 662 int level, offset[4], noffset[4];
663 unsigned int nofs; 663 unsigned int nofs = 0;
664 struct f2fs_node *rn; 664 struct f2fs_node *rn;
665 struct dnode_of_data dn; 665 struct dnode_of_data dn;
666 struct page *page; 666 struct page *page;
@@ -780,7 +780,7 @@ int remove_inode_page(struct inode *inode)
780 return 0; 780 return 0;
781} 781}
782 782
783int new_inode_page(struct inode *inode, struct dentry *dentry) 783int new_inode_page(struct inode *inode, const struct qstr *name)
784{ 784{
785 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 785 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
786 struct page *page; 786 struct page *page;
@@ -790,7 +790,7 @@ int new_inode_page(struct inode *inode, struct dentry *dentry)
790 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 790 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
791 mutex_lock_op(sbi, NODE_NEW); 791 mutex_lock_op(sbi, NODE_NEW);
792 page = new_node_page(&dn, 0); 792 page = new_node_page(&dn, 0);
793 init_dent_inode(dentry, page); 793 init_dent_inode(name, page);
794 mutex_unlock_op(sbi, NODE_NEW); 794 mutex_unlock_op(sbi, NODE_NEW);
795 if (IS_ERR(page)) 795 if (IS_ERR(page))
796 return PTR_ERR(page); 796 return PTR_ERR(page);
@@ -874,15 +874,11 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
874 return; 874 return;
875 875
876 if (read_node_page(apage, READA)) 876 if (read_node_page(apage, READA))
877 goto unlock_out; 877 unlock_page(apage);
878 878
879 page_cache_release(apage);
880 return;
881
882unlock_out:
883 unlock_page(apage);
884release_out: 879release_out:
885 page_cache_release(apage); 880 f2fs_put_page(apage, 0);
881 return;
886} 882}
887 883
888struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 884struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
@@ -1124,6 +1120,12 @@ static int f2fs_write_node_page(struct page *page,
1124 return 0; 1120 return 0;
1125} 1121}
1126 1122
1123/*
1124 * It is very important to gather dirty pages and write at once, so that we can
1125 * submit a big bio without interfering other data writes.
1126 * Be default, 512 pages (2MB), a segment size, is quite reasonable.
1127 */
1128#define COLLECT_DIRTY_NODES 512
1127static int f2fs_write_node_pages(struct address_space *mapping, 1129static int f2fs_write_node_pages(struct address_space *mapping,
1128 struct writeback_control *wbc) 1130 struct writeback_control *wbc)
1129{ 1131{
@@ -1131,17 +1133,16 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1131 struct block_device *bdev = sbi->sb->s_bdev; 1133 struct block_device *bdev = sbi->sb->s_bdev;
1132 long nr_to_write = wbc->nr_to_write; 1134 long nr_to_write = wbc->nr_to_write;
1133 1135
1134 if (wbc->for_kupdate) 1136 /* First check balancing cached NAT entries */
1135 return 0;
1136
1137 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1138 return 0;
1139
1140 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { 1137 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1141 write_checkpoint(sbi, false, false); 1138 write_checkpoint(sbi, false);
1142 return 0; 1139 return 0;
1143 } 1140 }
1144 1141
1142 /* collect a number of dirty node pages and write together */
1143 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
1144 return 0;
1145
1145 /* if mounting is failed, skip writing node pages */ 1146 /* if mounting is failed, skip writing node pages */
1146 wbc->nr_to_write = bio_get_nr_vecs(bdev); 1147 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1147 sync_node_pages(sbi, 0, wbc); 1148 sync_node_pages(sbi, 0, wbc);
@@ -1732,7 +1733,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1732 kfree(nm_i); 1733 kfree(nm_i);
1733} 1734}
1734 1735
1735int create_node_manager_caches(void) 1736int __init create_node_manager_caches(void)
1736{ 1737{
1737 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1738 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1738 sizeof(struct nat_entry), NULL); 1739 sizeof(struct nat_entry), NULL);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b571fee677d5..b235215ac138 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -42,7 +42,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
42{ 42{
43 struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); 43 struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
44 struct f2fs_inode *raw_inode = &(raw_node->i); 44 struct f2fs_inode *raw_inode = &(raw_node->i);
45 struct dentry dent, parent; 45 struct qstr name;
46 struct f2fs_dir_entry *de; 46 struct f2fs_dir_entry *de;
47 struct page *page; 47 struct page *page;
48 struct inode *dir; 48 struct inode *dir;
@@ -57,17 +57,15 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
57 goto out; 57 goto out;
58 } 58 }
59 59
60 parent.d_inode = dir; 60 name.len = le32_to_cpu(raw_inode->i_namelen);
61 dent.d_parent = &parent; 61 name.name = raw_inode->i_name;
62 dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
63 dent.d_name.name = raw_inode->i_name;
64 62
65 de = f2fs_find_entry(dir, &dent.d_name, &page); 63 de = f2fs_find_entry(dir, &name, &page);
66 if (de) { 64 if (de) {
67 kunmap(page); 65 kunmap(page);
68 f2fs_put_page(page, 0); 66 f2fs_put_page(page, 0);
69 } else { 67 } else {
70 f2fs_add_link(&dent, inode); 68 err = __f2fs_add_link(dir, &name, inode);
71 } 69 }
72 iput(dir); 70 iput(dir);
73out: 71out:
@@ -151,7 +149,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
151 goto out; 149 goto out;
152 } 150 }
153 151
154 INIT_LIST_HEAD(&entry->list);
155 list_add_tail(&entry->list, head); 152 list_add_tail(&entry->list, head);
156 entry->blkaddr = blkaddr; 153 entry->blkaddr = blkaddr;
157 } 154 }
@@ -174,10 +171,9 @@ out:
174static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, 171static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
175 struct list_head *head) 172 struct list_head *head)
176{ 173{
177 struct list_head *this; 174 struct fsync_inode_entry *entry, *tmp;
178 struct fsync_inode_entry *entry; 175
179 list_for_each(this, head) { 176 list_for_each_entry_safe(entry, tmp, head, list) {
180 entry = list_entry(this, struct fsync_inode_entry, list);
181 iput(entry->inode); 177 iput(entry->inode);
182 list_del(&entry->list); 178 list_del(&entry->list);
183 kmem_cache_free(fsync_entry_slab, entry); 179 kmem_cache_free(fsync_entry_slab, entry);
@@ -228,7 +224,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
228 f2fs_put_page(node_page, 1); 224 f2fs_put_page(node_page, 1);
229 225
230 /* Deallocate previous index in the node page */ 226 /* Deallocate previous index in the node page */
231 inode = f2fs_iget_nowait(sbi->sb, ino); 227 inode = f2fs_iget(sbi->sb, ino);
232 if (IS_ERR(inode)) 228 if (IS_ERR(inode))
233 return; 229 return;
234 230
@@ -375,5 +371,5 @@ void recover_fsync_data(struct f2fs_sb_info *sbi)
375out: 371out:
376 destroy_fsync_dnodes(sbi, &inode_list); 372 destroy_fsync_dnodes(sbi, &inode_list);
377 kmem_cache_destroy(fsync_entry_slab); 373 kmem_cache_destroy(fsync_entry_slab);
378 write_checkpoint(sbi, false, false); 374 write_checkpoint(sbi, false);
379} 375}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index de6240922b0a..777f17e496e6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -29,9 +29,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
29 * We should do GC or end up with checkpoint, if there are so many dirty 29 * We should do GC or end up with checkpoint, if there are so many dirty
30 * dir/node pages without enough free segments. 30 * dir/node pages without enough free segments.
31 */ 31 */
32 if (has_not_enough_free_secs(sbi)) { 32 if (has_not_enough_free_secs(sbi, 0)) {
33 mutex_lock(&sbi->gc_mutex); 33 mutex_lock(&sbi->gc_mutex);
34 f2fs_gc(sbi, 1); 34 f2fs_gc(sbi);
35 } 35 }
36} 36}
37 37
@@ -308,7 +308,7 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
308 * If there is not enough reserved sections, 308 * If there is not enough reserved sections,
309 * we should not reuse prefree segments. 309 * we should not reuse prefree segments.
310 */ 310 */
311 if (has_not_enough_free_secs(sbi)) 311 if (has_not_enough_free_secs(sbi, 0))
312 return NULL_SEGNO; 312 return NULL_SEGNO;
313 313
314 /* 314 /*
@@ -536,6 +536,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
536 } 536 }
537} 537}
538 538
539static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
540{
541 struct curseg_info *curseg = CURSEG_I(sbi, type);
542 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
543
544 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
545 return v_ops->get_victim(sbi,
546 &(curseg)->next_segno, BG_GC, type, SSR);
547
548 /* For data segments, let's do SSR more intensively */
549 for (; type >= CURSEG_HOT_DATA; type--)
550 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
551 BG_GC, type, SSR))
552 return 1;
553 return 0;
554}
555
539/* 556/*
540 * flush out current segment and replace it with new segment 557 * flush out current segment and replace it with new segment
541 * This function should be returned with success, otherwise BUG 558 * This function should be returned with success, otherwise BUG
@@ -600,6 +617,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
600 if (page->mapping) 617 if (page->mapping)
601 set_bit(AS_EIO, &page->mapping->flags); 618 set_bit(AS_EIO, &page->mapping->flags);
602 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); 619 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
620 p->sbi->sb->s_flags |= MS_RDONLY;
603 } 621 }
604 end_page_writeback(page); 622 end_page_writeback(page);
605 dec_page_count(p->sbi, F2FS_WRITEBACK); 623 dec_page_count(p->sbi, F2FS_WRITEBACK);
@@ -815,15 +833,10 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
815 mutex_unlock(&curseg->curseg_mutex); 833 mutex_unlock(&curseg->curseg_mutex);
816} 834}
817 835
818int write_meta_page(struct f2fs_sb_info *sbi, struct page *page, 836void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
819 struct writeback_control *wbc)
820{ 837{
821 if (wbc->for_reclaim)
822 return AOP_WRITEPAGE_ACTIVATE;
823
824 set_page_writeback(page); 838 set_page_writeback(page);
825 submit_write_page(sbi, page, page->index, META); 839 submit_write_page(sbi, page, page->index, META);
826 return 0;
827} 840}
828 841
829void write_node_page(struct f2fs_sb_info *sbi, struct page *page, 842void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 66a288a52fd3..552dadbb2327 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -450,29 +450,16 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
450 return (free_sections(sbi) < overprovision_sections(sbi)); 450 return (free_sections(sbi) < overprovision_sections(sbi));
451} 451}
452 452
453static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) 453static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
454{ 454{
455 struct curseg_info *curseg = CURSEG_I(sbi, type); 455 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
456 return DIRTY_I(sbi)->v_ops->get_victim(sbi, 456 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
457 &(curseg)->next_segno, BG_GC, type, SSR);
458}
459
460static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
461{
462 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
463 sbi->segs_per_sec;
464 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
465 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
466 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
467 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
468 457
469 if (sbi->por_doing) 458 if (sbi->por_doing)
470 return false; 459 return false;
471 460
472 if (free_sections(sbi) <= (node_secs + 2 * dent_secs + 461 return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
473 reserved_sections(sbi))) 462 reserved_sections(sbi)));
474 return true;
475 return false;
476} 463}
477 464
478static inline int utilization(struct f2fs_sb_info *sbi) 465static inline int utilization(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 08a94c814bdc..8c117649a035 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -53,6 +53,18 @@ static match_table_t f2fs_tokens = {
53 {Opt_err, NULL}, 53 {Opt_err, NULL},
54}; 54};
55 55
56void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
57{
58 struct va_format vaf;
59 va_list args;
60
61 va_start(args, fmt);
62 vaf.fmt = fmt;
63 vaf.va = &args;
64 printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
65 va_end(args);
66}
67
56static void init_once(void *foo) 68static void init_once(void *foo)
57{ 69{
58 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; 70 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -100,7 +112,7 @@ static void f2fs_put_super(struct super_block *sb)
100 f2fs_destroy_stats(sbi); 112 f2fs_destroy_stats(sbi);
101 stop_gc_thread(sbi); 113 stop_gc_thread(sbi);
102 114
103 write_checkpoint(sbi, false, true); 115 write_checkpoint(sbi, true);
104 116
105 iput(sbi->node_inode); 117 iput(sbi->node_inode);
106 iput(sbi->meta_inode); 118 iput(sbi->meta_inode);
@@ -124,11 +136,29 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
124 return 0; 136 return 0;
125 137
126 if (sync) 138 if (sync)
127 write_checkpoint(sbi, false, false); 139 write_checkpoint(sbi, false);
140 else
141 f2fs_balance_fs(sbi);
128 142
129 return 0; 143 return 0;
130} 144}
131 145
146static int f2fs_freeze(struct super_block *sb)
147{
148 int err;
149
150 if (sb->s_flags & MS_RDONLY)
151 return 0;
152
153 err = f2fs_sync_fs(sb, 1);
154 return err;
155}
156
157static int f2fs_unfreeze(struct super_block *sb)
158{
159 return 0;
160}
161
132static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) 162static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
133{ 163{
134 struct super_block *sb = dentry->d_sb; 164 struct super_block *sb = dentry->d_sb;
@@ -184,7 +214,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
184 seq_puts(seq, ",noacl"); 214 seq_puts(seq, ",noacl");
185#endif 215#endif
186 if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) 216 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
187 seq_puts(seq, ",disable_ext_indentify"); 217 seq_puts(seq, ",disable_ext_identify");
188 218
189 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 219 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
190 220
@@ -199,6 +229,8 @@ static struct super_operations f2fs_sops = {
199 .evict_inode = f2fs_evict_inode, 229 .evict_inode = f2fs_evict_inode,
200 .put_super = f2fs_put_super, 230 .put_super = f2fs_put_super,
201 .sync_fs = f2fs_sync_fs, 231 .sync_fs = f2fs_sync_fs,
232 .freeze_fs = f2fs_freeze,
233 .unfreeze_fs = f2fs_unfreeze,
202 .statfs = f2fs_statfs, 234 .statfs = f2fs_statfs,
203}; 235};
204 236
@@ -247,7 +279,8 @@ static const struct export_operations f2fs_export_ops = {
247 .get_parent = f2fs_get_parent, 279 .get_parent = f2fs_get_parent,
248}; 280};
249 281
250static int parse_options(struct f2fs_sb_info *sbi, char *options) 282static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi,
283 char *options)
251{ 284{
252 substring_t args[MAX_OPT_ARGS]; 285 substring_t args[MAX_OPT_ARGS];
253 char *p; 286 char *p;
@@ -286,7 +319,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
286 break; 319 break;
287#else 320#else
288 case Opt_nouser_xattr: 321 case Opt_nouser_xattr:
289 pr_info("nouser_xattr options not supported\n"); 322 f2fs_msg(sb, KERN_INFO,
323 "nouser_xattr options not supported");
290 break; 324 break;
291#endif 325#endif
292#ifdef CONFIG_F2FS_FS_POSIX_ACL 326#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -295,7 +329,7 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
295 break; 329 break;
296#else 330#else
297 case Opt_noacl: 331 case Opt_noacl:
298 pr_info("noacl options not supported\n"); 332 f2fs_msg(sb, KERN_INFO, "noacl options not supported");
299 break; 333 break;
300#endif 334#endif
301 case Opt_active_logs: 335 case Opt_active_logs:
@@ -309,8 +343,9 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
309 set_opt(sbi, DISABLE_EXT_IDENTIFY); 343 set_opt(sbi, DISABLE_EXT_IDENTIFY);
310 break; 344 break;
311 default: 345 default:
312 pr_err("Unrecognized mount option \"%s\" or missing value\n", 346 f2fs_msg(sb, KERN_ERR,
313 p); 347 "Unrecognized mount option \"%s\" or missing value",
348 p);
314 return -EINVAL; 349 return -EINVAL;
315 } 350 }
316 } 351 }
@@ -337,30 +372,53 @@ static loff_t max_file_size(unsigned bits)
337 return result; 372 return result;
338} 373}
339 374
340static int sanity_check_raw_super(struct f2fs_super_block *raw_super) 375static int sanity_check_raw_super(struct super_block *sb,
376 struct f2fs_super_block *raw_super)
341{ 377{
342 unsigned int blocksize; 378 unsigned int blocksize;
343 379
344 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) 380 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
381 f2fs_msg(sb, KERN_INFO,
382 "Magic Mismatch, valid(0x%x) - read(0x%x)",
383 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
345 return 1; 384 return 1;
385 }
386
387 /* Currently, support only 4KB page cache size */
388 if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
389 f2fs_msg(sb, KERN_INFO,
390 "Invalid page_cache_size (%lu), supports only 4KB\n",
391 PAGE_CACHE_SIZE);
392 return 1;
393 }
346 394
347 /* Currently, support only 4KB block size */ 395 /* Currently, support only 4KB block size */
348 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); 396 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
349 if (blocksize != PAGE_CACHE_SIZE) 397 if (blocksize != F2FS_BLKSIZE) {
398 f2fs_msg(sb, KERN_INFO,
399 "Invalid blocksize (%u), supports only 4KB\n",
400 blocksize);
350 return 1; 401 return 1;
402 }
403
351 if (le32_to_cpu(raw_super->log_sectorsize) != 404 if (le32_to_cpu(raw_super->log_sectorsize) !=
352 F2FS_LOG_SECTOR_SIZE) 405 F2FS_LOG_SECTOR_SIZE) {
406 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
353 return 1; 407 return 1;
408 }
354 if (le32_to_cpu(raw_super->log_sectors_per_block) != 409 if (le32_to_cpu(raw_super->log_sectors_per_block) !=
355 F2FS_LOG_SECTORS_PER_BLOCK) 410 F2FS_LOG_SECTORS_PER_BLOCK) {
411 f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
356 return 1; 412 return 1;
413 }
357 return 0; 414 return 0;
358} 415}
359 416
360static int sanity_check_ckpt(struct f2fs_super_block *raw_super, 417static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
361 struct f2fs_checkpoint *ckpt)
362{ 418{
363 unsigned int total, fsmeta; 419 unsigned int total, fsmeta;
420 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
421 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
364 422
365 total = le32_to_cpu(raw_super->segment_count); 423 total = le32_to_cpu(raw_super->segment_count);
366 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); 424 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -371,6 +429,11 @@ static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
371 429
372 if (fsmeta >= total) 430 if (fsmeta >= total)
373 return 1; 431 return 1;
432
433 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
434 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
435 return 1;
436 }
374 return 0; 437 return 0;
375} 438}
376 439
@@ -399,6 +462,32 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
399 atomic_set(&sbi->nr_pages[i], 0); 462 atomic_set(&sbi->nr_pages[i], 0);
400} 463}
401 464
465static int validate_superblock(struct super_block *sb,
466 struct f2fs_super_block **raw_super,
467 struct buffer_head **raw_super_buf, sector_t block)
468{
469 const char *super = (block == 0 ? "first" : "second");
470
471 /* read f2fs raw super block */
472 *raw_super_buf = sb_bread(sb, block);
473 if (!*raw_super_buf) {
474 f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
475 super);
476 return 1;
477 }
478
479 *raw_super = (struct f2fs_super_block *)
480 ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
481
482 /* sanity checking of raw super */
483 if (!sanity_check_raw_super(sb, *raw_super))
484 return 0;
485
486 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
487 "in %s superblock", super);
488 return 1;
489}
490
402static int f2fs_fill_super(struct super_block *sb, void *data, int silent) 491static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
403{ 492{
404 struct f2fs_sb_info *sbi; 493 struct f2fs_sb_info *sbi;
@@ -413,19 +502,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
413 if (!sbi) 502 if (!sbi)
414 return -ENOMEM; 503 return -ENOMEM;
415 504
416 /* set a temporary block size */ 505 /* set a block size */
417 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) 506 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
418 goto free_sbi; 507 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
419
420 /* read f2fs raw super block */
421 raw_super_buf = sb_bread(sb, 0);
422 if (!raw_super_buf) {
423 err = -EIO;
424 goto free_sbi; 508 goto free_sbi;
425 } 509 }
426 raw_super = (struct f2fs_super_block *)
427 ((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET);
428 510
511 if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) {
512 brelse(raw_super_buf);
513 if (validate_superblock(sb, &raw_super, &raw_super_buf, 1))
514 goto free_sb_buf;
515 }
429 /* init some FS parameters */ 516 /* init some FS parameters */
430 sbi->active_logs = NR_CURSEG_TYPE; 517 sbi->active_logs = NR_CURSEG_TYPE;
431 518
@@ -438,11 +525,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
438 set_opt(sbi, POSIX_ACL); 525 set_opt(sbi, POSIX_ACL);
439#endif 526#endif
440 /* parse mount options */ 527 /* parse mount options */
441 if (parse_options(sbi, (char *)data)) 528 if (parse_options(sb, sbi, (char *)data))
442 goto free_sb_buf;
443
444 /* sanity checking of raw super */
445 if (sanity_check_raw_super(raw_super))
446 goto free_sb_buf; 529 goto free_sb_buf;
447 530
448 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); 531 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
@@ -477,18 +560,23 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
477 /* get an inode for meta space */ 560 /* get an inode for meta space */
478 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 561 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
479 if (IS_ERR(sbi->meta_inode)) { 562 if (IS_ERR(sbi->meta_inode)) {
563 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
480 err = PTR_ERR(sbi->meta_inode); 564 err = PTR_ERR(sbi->meta_inode);
481 goto free_sb_buf; 565 goto free_sb_buf;
482 } 566 }
483 567
484 err = get_valid_checkpoint(sbi); 568 err = get_valid_checkpoint(sbi);
485 if (err) 569 if (err) {
570 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
486 goto free_meta_inode; 571 goto free_meta_inode;
572 }
487 573
488 /* sanity checking of checkpoint */ 574 /* sanity checking of checkpoint */
489 err = -EINVAL; 575 err = -EINVAL;
490 if (sanity_check_ckpt(raw_super, sbi->ckpt)) 576 if (sanity_check_ckpt(sbi)) {
577 f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
491 goto free_cp; 578 goto free_cp;
579 }
492 580
493 sbi->total_valid_node_count = 581 sbi->total_valid_node_count =
494 le32_to_cpu(sbi->ckpt->valid_node_count); 582 le32_to_cpu(sbi->ckpt->valid_node_count);
@@ -502,25 +590,28 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
502 INIT_LIST_HEAD(&sbi->dir_inode_list); 590 INIT_LIST_HEAD(&sbi->dir_inode_list);
503 spin_lock_init(&sbi->dir_inode_lock); 591 spin_lock_init(&sbi->dir_inode_lock);
504 592
505 /* init super block */
506 if (!sb_set_blocksize(sb, sbi->blocksize))
507 goto free_cp;
508
509 init_orphan_info(sbi); 593 init_orphan_info(sbi);
510 594
511 /* setup f2fs internal modules */ 595 /* setup f2fs internal modules */
512 err = build_segment_manager(sbi); 596 err = build_segment_manager(sbi);
513 if (err) 597 if (err) {
598 f2fs_msg(sb, KERN_ERR,
599 "Failed to initialize F2FS segment manager");
514 goto free_sm; 600 goto free_sm;
601 }
515 err = build_node_manager(sbi); 602 err = build_node_manager(sbi);
516 if (err) 603 if (err) {
604 f2fs_msg(sb, KERN_ERR,
605 "Failed to initialize F2FS node manager");
517 goto free_nm; 606 goto free_nm;
607 }
518 608
519 build_gc_manager(sbi); 609 build_gc_manager(sbi);
520 610
521 /* get an inode for node space */ 611 /* get an inode for node space */
522 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); 612 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
523 if (IS_ERR(sbi->node_inode)) { 613 if (IS_ERR(sbi->node_inode)) {
614 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
524 err = PTR_ERR(sbi->node_inode); 615 err = PTR_ERR(sbi->node_inode);
525 goto free_nm; 616 goto free_nm;
526 } 617 }
@@ -533,6 +624,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
533 /* read root inode and dentry */ 624 /* read root inode and dentry */
534 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); 625 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
535 if (IS_ERR(root)) { 626 if (IS_ERR(root)) {
627 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
536 err = PTR_ERR(root); 628 err = PTR_ERR(root);
537 goto free_node_inode; 629 goto free_node_inode;
538 } 630 }
@@ -596,7 +688,7 @@ static struct file_system_type f2fs_fs_type = {
596 .fs_flags = FS_REQUIRES_DEV, 688 .fs_flags = FS_REQUIRES_DEV,
597}; 689};
598 690
599static int init_inodecache(void) 691static int __init init_inodecache(void)
600{ 692{
601 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 693 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
602 sizeof(struct f2fs_inode_info), NULL); 694 sizeof(struct f2fs_inode_info), NULL);
@@ -631,14 +723,17 @@ static int __init init_f2fs_fs(void)
631 err = create_checkpoint_caches(); 723 err = create_checkpoint_caches();
632 if (err) 724 if (err)
633 goto fail; 725 goto fail;
634 return register_filesystem(&f2fs_fs_type); 726 err = register_filesystem(&f2fs_fs_type);
727 if (err)
728 goto fail;
729 f2fs_create_root_stats();
635fail: 730fail:
636 return err; 731 return err;
637} 732}
638 733
639static void __exit exit_f2fs_fs(void) 734static void __exit exit_f2fs_fs(void)
640{ 735{
641 destroy_root_stats(); 736 f2fs_destroy_root_stats();
642 unregister_filesystem(&f2fs_fs_type); 737 unregister_filesystem(&f2fs_fs_type);
643 destroy_checkpoint_caches(); 738 destroy_checkpoint_caches();
644 destroy_gc_caches(); 739 destroy_gc_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 940136a3d3a6..8038c0496504 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -318,6 +318,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
318 if (name_len > 255 || value_len > MAX_VALUE_LEN) 318 if (name_len > 255 || value_len > MAX_VALUE_LEN)
319 return -ERANGE; 319 return -ERANGE;
320 320
321 f2fs_balance_fs(sbi);
322
321 mutex_lock_op(sbi, NODE_NEW); 323 mutex_lock_op(sbi, NODE_NEW);
322 if (!fi->i_xattr_nid) { 324 if (!fi->i_xattr_nid) {
323 /* Allocate new attribute block */ 325 /* Allocate new attribute block */
diff --git a/fs/file.c b/fs/file.c
index 2b3570b7caeb..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -516,7 +516,7 @@ struct files_struct init_files = {
516 .close_on_exec = init_files.close_on_exec_init, 516 .close_on_exec = init_files.close_on_exec_init,
517 .open_fds = init_files.open_fds_init, 517 .open_fds = init_files.open_fds_init,
518 }, 518 },
519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
520}; 520};
521 521
522/* 522/*
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 0cf160a94eda..1b2f6c2c3aaf 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -4,12 +4,24 @@ config FUSE_FS
4 With FUSE it is possible to implement a fully functional filesystem 4 With FUSE it is possible to implement a fully functional filesystem
5 in a userspace program. 5 in a userspace program.
6 6
7 There's also companion library: libfuse. This library along with 7 There's also a companion library: libfuse2. This library is available
8 utilities is available from the FUSE homepage: 8 from the FUSE homepage:
9 <http://fuse.sourceforge.net/> 9 <http://fuse.sourceforge.net/>
10 although chances are your distribution already has that library
11 installed if you've installed the "fuse" package itself.
10 12
11 See <file:Documentation/filesystems/fuse.txt> for more information. 13 See <file:Documentation/filesystems/fuse.txt> for more information.
12 See <file:Documentation/Changes> for needed library/utility version. 14 See <file:Documentation/Changes> for needed library/utility version.
13 15
14 If you want to develop a userspace FS, or if you want to use 16 If you want to develop a userspace FS, or if you want to use
15 a filesystem based on FUSE, answer Y or M. 17 a filesystem based on FUSE, answer Y or M.
18
19config CUSE
20 tristate "Character device in Userspace support"
21 depends on FUSE_FS
22 help
23 This FUSE extension allows character devices to be
24 implemented in userspace.
25
26 If you want to develop or use a userspace character device
27 based on CUSE, answer Y or M.
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index ee8d55042298..6f96a8def147 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -45,7 +45,6 @@
45#include <linux/miscdevice.h> 45#include <linux/miscdevice.h>
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/spinlock.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
51 50
@@ -63,7 +62,7 @@ struct cuse_conn {
63 bool unrestricted_ioctl; 62 bool unrestricted_ioctl;
64}; 63};
65 64
66static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ 65static DEFINE_MUTEX(cuse_lock); /* protects registration */
67static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; 66static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
68static struct class *cuse_class; 67static struct class *cuse_class;
69 68
@@ -92,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
92 loff_t *ppos) 91 loff_t *ppos)
93{ 92{
94 loff_t pos = 0; 93 loff_t pos = 0;
94 struct iovec iov = { .iov_base = buf, .iov_len = count };
95 95
96 return fuse_direct_io(file, buf, count, &pos, 0); 96 return fuse_direct_io(file, &iov, 1, count, &pos, 0);
97} 97}
98 98
99static ssize_t cuse_write(struct file *file, const char __user *buf, 99static ssize_t cuse_write(struct file *file, const char __user *buf,
100 size_t count, loff_t *ppos) 100 size_t count, loff_t *ppos)
101{ 101{
102 loff_t pos = 0; 102 loff_t pos = 0;
103 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
104
103 /* 105 /*
104 * No locking or generic_write_checks(), the server is 106 * No locking or generic_write_checks(), the server is
105 * responsible for locking and sanity checks. 107 * responsible for locking and sanity checks.
106 */ 108 */
107 return fuse_direct_io(file, buf, count, &pos, 1); 109 return fuse_direct_io(file, &iov, 1, count, &pos, 1);
108} 110}
109 111
110static int cuse_open(struct inode *inode, struct file *file) 112static int cuse_open(struct inode *inode, struct file *file)
@@ -114,14 +116,14 @@ static int cuse_open(struct inode *inode, struct file *file)
114 int rc; 116 int rc;
115 117
116 /* look up and get the connection */ 118 /* look up and get the connection */
117 spin_lock(&cuse_lock); 119 mutex_lock(&cuse_lock);
118 list_for_each_entry(pos, cuse_conntbl_head(devt), list) 120 list_for_each_entry(pos, cuse_conntbl_head(devt), list)
119 if (pos->dev->devt == devt) { 121 if (pos->dev->devt == devt) {
120 fuse_conn_get(&pos->fc); 122 fuse_conn_get(&pos->fc);
121 cc = pos; 123 cc = pos;
122 break; 124 break;
123 } 125 }
124 spin_unlock(&cuse_lock); 126 mutex_unlock(&cuse_lock);
125 127
126 /* dead? */ 128 /* dead? */
127 if (!cc) 129 if (!cc)
@@ -267,7 +269,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
267static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) 269static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
268{ 270{
269 char *end = p + len; 271 char *end = p + len;
270 char *key, *val; 272 char *uninitialized_var(key), *uninitialized_var(val);
271 int rc; 273 int rc;
272 274
273 while (true) { 275 while (true) {
@@ -305,14 +307,14 @@ static void cuse_gendev_release(struct device *dev)
305 */ 307 */
306static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 308static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
307{ 309{
308 struct cuse_conn *cc = fc_to_cc(fc); 310 struct cuse_conn *cc = fc_to_cc(fc), *pos;
309 struct cuse_init_out *arg = req->out.args[0].value; 311 struct cuse_init_out *arg = req->out.args[0].value;
310 struct page *page = req->pages[0]; 312 struct page *page = req->pages[0];
311 struct cuse_devinfo devinfo = { }; 313 struct cuse_devinfo devinfo = { };
312 struct device *dev; 314 struct device *dev;
313 struct cdev *cdev; 315 struct cdev *cdev;
314 dev_t devt; 316 dev_t devt;
315 int rc; 317 int rc, i;
316 318
317 if (req->out.h.error || 319 if (req->out.h.error ||
318 arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { 320 arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
@@ -356,15 +358,24 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
356 dev_set_drvdata(dev, cc); 358 dev_set_drvdata(dev, cc);
357 dev_set_name(dev, "%s", devinfo.name); 359 dev_set_name(dev, "%s", devinfo.name);
358 360
361 mutex_lock(&cuse_lock);
362
363 /* make sure the device-name is unique */
364 for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
365 list_for_each_entry(pos, &cuse_conntbl[i], list)
366 if (!strcmp(dev_name(pos->dev), dev_name(dev)))
367 goto err_unlock;
368 }
369
359 rc = device_add(dev); 370 rc = device_add(dev);
360 if (rc) 371 if (rc)
361 goto err_device; 372 goto err_unlock;
362 373
363 /* register cdev */ 374 /* register cdev */
364 rc = -ENOMEM; 375 rc = -ENOMEM;
365 cdev = cdev_alloc(); 376 cdev = cdev_alloc();
366 if (!cdev) 377 if (!cdev)
367 goto err_device; 378 goto err_unlock;
368 379
369 cdev->owner = THIS_MODULE; 380 cdev->owner = THIS_MODULE;
370 cdev->ops = &cuse_frontend_fops; 381 cdev->ops = &cuse_frontend_fops;
@@ -377,9 +388,8 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
377 cc->cdev = cdev; 388 cc->cdev = cdev;
378 389
379 /* make the device available */ 390 /* make the device available */
380 spin_lock(&cuse_lock);
381 list_add(&cc->list, cuse_conntbl_head(devt)); 391 list_add(&cc->list, cuse_conntbl_head(devt));
382 spin_unlock(&cuse_lock); 392 mutex_unlock(&cuse_lock);
383 393
384 /* announce device availability */ 394 /* announce device availability */
385 dev_set_uevent_suppress(dev, 0); 395 dev_set_uevent_suppress(dev, 0);
@@ -391,7 +401,8 @@ out:
391 401
392err_cdev: 402err_cdev:
393 cdev_del(cdev); 403 cdev_del(cdev);
394err_device: 404err_unlock:
405 mutex_unlock(&cuse_lock);
395 put_device(dev); 406 put_device(dev);
396err_region: 407err_region:
397 unregister_chrdev_region(devt, 1); 408 unregister_chrdev_region(devt, 1);
@@ -411,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc)
411 422
412 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 423 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
413 424
414 req = fuse_get_req(fc); 425 req = fuse_get_req(fc, 1);
415 if (IS_ERR(req)) { 426 if (IS_ERR(req)) {
416 rc = PTR_ERR(req); 427 rc = PTR_ERR(req);
417 goto err; 428 goto err;
@@ -441,6 +452,7 @@ static int cuse_send_init(struct cuse_conn *cc)
441 req->out.argvar = 1; 452 req->out.argvar = 1;
442 req->out.argpages = 1; 453 req->out.argpages = 1;
443 req->pages[0] = page; 454 req->pages[0] = page;
455 req->page_descs[0].length = req->out.args[1].size;
444 req->num_pages = 1; 456 req->num_pages = 1;
445 req->end = cuse_process_init_reply; 457 req->end = cuse_process_init_reply;
446 fuse_request_send_background(fc, req); 458 fuse_request_send_background(fc, req);
@@ -520,9 +532,9 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
520 int rc; 532 int rc;
521 533
522 /* remove from the conntbl, no more access from this point on */ 534 /* remove from the conntbl, no more access from this point on */
523 spin_lock(&cuse_lock); 535 mutex_lock(&cuse_lock);
524 list_del_init(&cc->list); 536 list_del_init(&cc->list);
525 spin_unlock(&cuse_lock); 537 mutex_unlock(&cuse_lock);
526 538
527 /* remove device */ 539 /* remove device */
528 if (cc->dev) 540 if (cc->dev)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c16335315e5d..e9bdec0b16d9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,34 +34,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
34 return file->private_data; 34 return file->private_data;
35} 35}
36 36
37static void fuse_request_init(struct fuse_req *req) 37static void fuse_request_init(struct fuse_req *req, struct page **pages,
38 struct fuse_page_desc *page_descs,
39 unsigned npages)
38{ 40{
39 memset(req, 0, sizeof(*req)); 41 memset(req, 0, sizeof(*req));
42 memset(pages, 0, sizeof(*pages) * npages);
43 memset(page_descs, 0, sizeof(*page_descs) * npages);
40 INIT_LIST_HEAD(&req->list); 44 INIT_LIST_HEAD(&req->list);
41 INIT_LIST_HEAD(&req->intr_entry); 45 INIT_LIST_HEAD(&req->intr_entry);
42 init_waitqueue_head(&req->waitq); 46 init_waitqueue_head(&req->waitq);
43 atomic_set(&req->count, 1); 47 atomic_set(&req->count, 1);
48 req->pages = pages;
49 req->page_descs = page_descs;
50 req->max_pages = npages;
44} 51}
45 52
46struct fuse_req *fuse_request_alloc(void) 53static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
47{ 54{
48 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); 55 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
49 if (req) 56 if (req) {
50 fuse_request_init(req); 57 struct page **pages;
58 struct fuse_page_desc *page_descs;
59
60 if (npages <= FUSE_REQ_INLINE_PAGES) {
61 pages = req->inline_pages;
62 page_descs = req->inline_page_descs;
63 } else {
64 pages = kmalloc(sizeof(struct page *) * npages, flags);
65 page_descs = kmalloc(sizeof(struct fuse_page_desc) *
66 npages, flags);
67 }
68
69 if (!pages || !page_descs) {
70 kfree(pages);
71 kfree(page_descs);
72 kmem_cache_free(fuse_req_cachep, req);
73 return NULL;
74 }
75
76 fuse_request_init(req, pages, page_descs, npages);
77 }
51 return req; 78 return req;
52} 79}
80
81struct fuse_req *fuse_request_alloc(unsigned npages)
82{
83 return __fuse_request_alloc(npages, GFP_KERNEL);
84}
53EXPORT_SYMBOL_GPL(fuse_request_alloc); 85EXPORT_SYMBOL_GPL(fuse_request_alloc);
54 86
55struct fuse_req *fuse_request_alloc_nofs(void) 87struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
56{ 88{
57 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); 89 return __fuse_request_alloc(npages, GFP_NOFS);
58 if (req)
59 fuse_request_init(req);
60 return req;
61} 90}
62 91
63void fuse_request_free(struct fuse_req *req) 92void fuse_request_free(struct fuse_req *req)
64{ 93{
94 if (req->pages != req->inline_pages) {
95 kfree(req->pages);
96 kfree(req->page_descs);
97 }
65 kmem_cache_free(fuse_req_cachep, req); 98 kmem_cache_free(fuse_req_cachep, req);
66} 99}
67 100
@@ -97,7 +130,7 @@ static void fuse_req_init_context(struct fuse_req *req)
97 req->in.h.pid = current->pid; 130 req->in.h.pid = current->pid;
98} 131}
99 132
100struct fuse_req *fuse_get_req(struct fuse_conn *fc) 133struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
101{ 134{
102 struct fuse_req *req; 135 struct fuse_req *req;
103 sigset_t oldset; 136 sigset_t oldset;
@@ -116,7 +149,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
116 if (!fc->connected) 149 if (!fc->connected)
117 goto out; 150 goto out;
118 151
119 req = fuse_request_alloc(); 152 req = fuse_request_alloc(npages);
120 err = -ENOMEM; 153 err = -ENOMEM;
121 if (!req) 154 if (!req)
122 goto out; 155 goto out;
@@ -165,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
165 struct fuse_file *ff = file->private_data; 198 struct fuse_file *ff = file->private_data;
166 199
167 spin_lock(&fc->lock); 200 spin_lock(&fc->lock);
168 fuse_request_init(req); 201 fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
169 BUG_ON(ff->reserved_req); 202 BUG_ON(ff->reserved_req);
170 ff->reserved_req = req; 203 ff->reserved_req = req;
171 wake_up_all(&fc->reserved_req_waitq); 204 wake_up_all(&fc->reserved_req_waitq);
@@ -186,13 +219,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
186 * filesystem should not have it's own file open. If deadlock is 219 * filesystem should not have it's own file open. If deadlock is
187 * intentional, it can still be broken by "aborting" the filesystem. 220 * intentional, it can still be broken by "aborting" the filesystem.
188 */ 221 */
189struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) 222struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
223 struct file *file)
190{ 224{
191 struct fuse_req *req; 225 struct fuse_req *req;
192 226
193 atomic_inc(&fc->num_waiting); 227 atomic_inc(&fc->num_waiting);
194 wait_event(fc->blocked_waitq, !fc->blocked); 228 wait_event(fc->blocked_waitq, !fc->blocked);
195 req = fuse_request_alloc(); 229 req = fuse_request_alloc(0);
196 if (!req) 230 if (!req)
197 req = get_reserved_req(fc, file); 231 req = get_reserved_req(fc, file);
198 232
@@ -406,9 +440,8 @@ __acquires(fc->lock)
406 } 440 }
407} 441}
408 442
409void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 443static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
410{ 444{
411 req->isreply = 1;
412 spin_lock(&fc->lock); 445 spin_lock(&fc->lock);
413 if (!fc->connected) 446 if (!fc->connected)
414 req->out.h.error = -ENOTCONN; 447 req->out.h.error = -ENOTCONN;
@@ -425,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
425 } 458 }
426 spin_unlock(&fc->lock); 459 spin_unlock(&fc->lock);
427} 460}
461
462void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
463{
464 req->isreply = 1;
465 __fuse_request_send(fc, req);
466}
428EXPORT_SYMBOL_GPL(fuse_request_send); 467EXPORT_SYMBOL_GPL(fuse_request_send);
429 468
430static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 469static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
@@ -491,6 +530,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
491 fuse_request_send_nowait_locked(fc, req); 530 fuse_request_send_nowait_locked(fc, req);
492} 531}
493 532
533void fuse_force_forget(struct file *file, u64 nodeid)
534{
535 struct inode *inode = file->f_path.dentry->d_inode;
536 struct fuse_conn *fc = get_fuse_conn(inode);
537 struct fuse_req *req;
538 struct fuse_forget_in inarg;
539
540 memset(&inarg, 0, sizeof(inarg));
541 inarg.nlookup = 1;
542 req = fuse_get_req_nofail_nopages(fc, file);
543 req->in.h.opcode = FUSE_FORGET;
544 req->in.h.nodeid = nodeid;
545 req->in.numargs = 1;
546 req->in.args[0].size = sizeof(inarg);
547 req->in.args[0].value = &inarg;
548 req->isreply = 0;
549 __fuse_request_send(fc, req);
550 /* ignore errors */
551 fuse_put_request(fc, req);
552}
553
494/* 554/*
495 * Lock the request. Up to the next unlock_request() there mustn't be 555 * Lock the request. Up to the next unlock_request() there mustn't be
496 * anything that could cause a page-fault. If the request was already 556 * anything that could cause a page-fault. If the request was already
@@ -692,8 +752,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
692 struct page *oldpage = *pagep; 752 struct page *oldpage = *pagep;
693 struct page *newpage; 753 struct page *newpage;
694 struct pipe_buffer *buf = cs->pipebufs; 754 struct pipe_buffer *buf = cs->pipebufs;
695 struct address_space *mapping;
696 pgoff_t index;
697 755
698 unlock_request(cs->fc, cs->req); 756 unlock_request(cs->fc, cs->req);
699 fuse_copy_finish(cs); 757 fuse_copy_finish(cs);
@@ -724,9 +782,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
724 if (fuse_check_page(newpage) != 0) 782 if (fuse_check_page(newpage) != 0)
725 goto out_fallback_unlock; 783 goto out_fallback_unlock;
726 784
727 mapping = oldpage->mapping;
728 index = oldpage->index;
729
730 /* 785 /*
731 * This is a new and locked page, it shouldn't be mapped or 786 * This is a new and locked page, it shouldn't be mapped or
732 * have any special flags on it 787 * have any special flags on it
@@ -855,11 +910,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
855{ 910{
856 unsigned i; 911 unsigned i;
857 struct fuse_req *req = cs->req; 912 struct fuse_req *req = cs->req;
858 unsigned offset = req->page_offset;
859 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
860 913
861 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 914 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
862 int err; 915 int err;
916 unsigned offset = req->page_descs[i].offset;
917 unsigned count = min(nbytes, req->page_descs[i].length);
863 918
864 err = fuse_copy_page(cs, &req->pages[i], offset, count, 919 err = fuse_copy_page(cs, &req->pages[i], offset, count,
865 zeroing); 920 zeroing);
@@ -867,8 +922,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
867 return err; 922 return err;
868 923
869 nbytes -= count; 924 nbytes -= count;
870 count = min(nbytes, (unsigned) PAGE_SIZE);
871 offset = 0;
872 } 925 }
873 return 0; 926 return 0;
874} 927}
@@ -1541,29 +1594,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1541 unsigned int num; 1594 unsigned int num;
1542 unsigned int offset; 1595 unsigned int offset;
1543 size_t total_len = 0; 1596 size_t total_len = 0;
1597 int num_pages;
1598
1599 offset = outarg->offset & ~PAGE_CACHE_MASK;
1600 file_size = i_size_read(inode);
1601
1602 num = outarg->size;
1603 if (outarg->offset > file_size)
1604 num = 0;
1605 else if (outarg->offset + num > file_size)
1606 num = file_size - outarg->offset;
1544 1607
1545 req = fuse_get_req(fc); 1608 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609 num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1610
1611 req = fuse_get_req(fc, num_pages);
1546 if (IS_ERR(req)) 1612 if (IS_ERR(req))
1547 return PTR_ERR(req); 1613 return PTR_ERR(req);
1548 1614
1549 offset = outarg->offset & ~PAGE_CACHE_MASK;
1550
1551 req->in.h.opcode = FUSE_NOTIFY_REPLY; 1615 req->in.h.opcode = FUSE_NOTIFY_REPLY;
1552 req->in.h.nodeid = outarg->nodeid; 1616 req->in.h.nodeid = outarg->nodeid;
1553 req->in.numargs = 2; 1617 req->in.numargs = 2;
1554 req->in.argpages = 1; 1618 req->in.argpages = 1;
1555 req->page_offset = offset; 1619 req->page_descs[0].offset = offset;
1556 req->end = fuse_retrieve_end; 1620 req->end = fuse_retrieve_end;
1557 1621
1558 index = outarg->offset >> PAGE_CACHE_SHIFT; 1622 index = outarg->offset >> PAGE_CACHE_SHIFT;
1559 file_size = i_size_read(inode);
1560 num = outarg->size;
1561 if (outarg->offset > file_size)
1562 num = 0;
1563 else if (outarg->offset + num > file_size)
1564 num = file_size - outarg->offset;
1565 1623
1566 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { 1624 while (num && req->num_pages < num_pages) {
1567 struct page *page; 1625 struct page *page;
1568 unsigned int this_num; 1626 unsigned int this_num;
1569 1627
@@ -1573,6 +1631,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1573 1631
1574 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); 1632 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1575 req->pages[req->num_pages] = page; 1633 req->pages[req->num_pages] = page;
1634 req->page_descs[req->num_pages].length = this_num;
1576 req->num_pages++; 1635 req->num_pages++;
1577 1636
1578 offset = 0; 1637 offset = 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..85065221a58a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,29 @@
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17static bool fuse_use_readdirplus(struct inode *dir, struct file *filp)
18{
19 struct fuse_conn *fc = get_fuse_conn(dir);
20 struct fuse_inode *fi = get_fuse_inode(dir);
21
22 if (!fc->do_readdirplus)
23 return false;
24 if (!fc->readdirplus_auto)
25 return true;
26 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 return true;
28 if (filp->f_pos == 0)
29 return true;
30 return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35 struct fuse_inode *fi = get_fuse_inode(dir);
36
37 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
17#if BITS_PER_LONG >= 64 40#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time) 41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
19{ 42{
@@ -178,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
178 return -ECHILD; 201 return -ECHILD;
179 202
180 fc = get_fuse_conn(inode); 203 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 204 req = fuse_get_req_nopages(fc);
182 if (IS_ERR(req)) 205 if (IS_ERR(req))
183 return 0; 206 return 0;
184 207
@@ -219,6 +242,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
219 attr_version); 242 attr_version);
220 fuse_change_entry_timeout(entry, &outarg); 243 fuse_change_entry_timeout(entry, &outarg);
221 } 244 }
245 fuse_advise_use_readdirplus(inode);
222 return 1; 246 return 1;
223} 247}
224 248
@@ -271,7 +295,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
271 if (name->len > FUSE_NAME_MAX) 295 if (name->len > FUSE_NAME_MAX)
272 goto out; 296 goto out;
273 297
274 req = fuse_get_req(fc); 298 req = fuse_get_req_nopages(fc);
275 err = PTR_ERR(req); 299 err = PTR_ERR(req);
276 if (IS_ERR(req)) 300 if (IS_ERR(req))
277 goto out; 301 goto out;
@@ -355,6 +379,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
355 else 379 else
356 fuse_invalidate_entry_cache(entry); 380 fuse_invalidate_entry_cache(entry);
357 381
382 fuse_advise_use_readdirplus(dir);
358 return newent; 383 return newent;
359 384
360 out_iput: 385 out_iput:
@@ -391,7 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
391 if (!forget) 416 if (!forget)
392 goto out_err; 417 goto out_err;
393 418
394 req = fuse_get_req(fc); 419 req = fuse_get_req_nopages(fc);
395 err = PTR_ERR(req); 420 err = PTR_ERR(req);
396 if (IS_ERR(req)) 421 if (IS_ERR(req))
397 goto out_put_forget_req; 422 goto out_put_forget_req;
@@ -592,7 +617,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
592{ 617{
593 struct fuse_mknod_in inarg; 618 struct fuse_mknod_in inarg;
594 struct fuse_conn *fc = get_fuse_conn(dir); 619 struct fuse_conn *fc = get_fuse_conn(dir);
595 struct fuse_req *req = fuse_get_req(fc); 620 struct fuse_req *req = fuse_get_req_nopages(fc);
596 if (IS_ERR(req)) 621 if (IS_ERR(req))
597 return PTR_ERR(req); 622 return PTR_ERR(req);
598 623
@@ -623,7 +648,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
623{ 648{
624 struct fuse_mkdir_in inarg; 649 struct fuse_mkdir_in inarg;
625 struct fuse_conn *fc = get_fuse_conn(dir); 650 struct fuse_conn *fc = get_fuse_conn(dir);
626 struct fuse_req *req = fuse_get_req(fc); 651 struct fuse_req *req = fuse_get_req_nopages(fc);
627 if (IS_ERR(req)) 652 if (IS_ERR(req))
628 return PTR_ERR(req); 653 return PTR_ERR(req);
629 654
@@ -647,7 +672,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
647{ 672{
648 struct fuse_conn *fc = get_fuse_conn(dir); 673 struct fuse_conn *fc = get_fuse_conn(dir);
649 unsigned len = strlen(link) + 1; 674 unsigned len = strlen(link) + 1;
650 struct fuse_req *req = fuse_get_req(fc); 675 struct fuse_req *req = fuse_get_req_nopages(fc);
651 if (IS_ERR(req)) 676 if (IS_ERR(req))
652 return PTR_ERR(req); 677 return PTR_ERR(req);
653 678
@@ -664,7 +689,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
664{ 689{
665 int err; 690 int err;
666 struct fuse_conn *fc = get_fuse_conn(dir); 691 struct fuse_conn *fc = get_fuse_conn(dir);
667 struct fuse_req *req = fuse_get_req(fc); 692 struct fuse_req *req = fuse_get_req_nopages(fc);
668 if (IS_ERR(req)) 693 if (IS_ERR(req))
669 return PTR_ERR(req); 694 return PTR_ERR(req);
670 695
@@ -682,7 +707,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
682 707
683 spin_lock(&fc->lock); 708 spin_lock(&fc->lock);
684 fi->attr_version = ++fc->attr_version; 709 fi->attr_version = ++fc->attr_version;
685 drop_nlink(inode); 710 /*
711 * If i_nlink == 0 then unlink doesn't make sense, yet this can
712 * happen if userspace filesystem is careless. It would be
713 * difficult to enforce correct nlink usage so just ignore this
714 * condition here
715 */
716 if (inode->i_nlink > 0)
717 drop_nlink(inode);
686 spin_unlock(&fc->lock); 718 spin_unlock(&fc->lock);
687 fuse_invalidate_attr(inode); 719 fuse_invalidate_attr(inode);
688 fuse_invalidate_attr(dir); 720 fuse_invalidate_attr(dir);
@@ -696,7 +728,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
696{ 728{
697 int err; 729 int err;
698 struct fuse_conn *fc = get_fuse_conn(dir); 730 struct fuse_conn *fc = get_fuse_conn(dir);
699 struct fuse_req *req = fuse_get_req(fc); 731 struct fuse_req *req = fuse_get_req_nopages(fc);
700 if (IS_ERR(req)) 732 if (IS_ERR(req))
701 return PTR_ERR(req); 733 return PTR_ERR(req);
702 734
@@ -723,7 +755,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
723 int err; 755 int err;
724 struct fuse_rename_in inarg; 756 struct fuse_rename_in inarg;
725 struct fuse_conn *fc = get_fuse_conn(olddir); 757 struct fuse_conn *fc = get_fuse_conn(olddir);
726 struct fuse_req *req = fuse_get_req(fc); 758 struct fuse_req *req = fuse_get_req_nopages(fc);
727 759
728 if (IS_ERR(req)) 760 if (IS_ERR(req))
729 return PTR_ERR(req); 761 return PTR_ERR(req);
@@ -776,7 +808,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
776 struct fuse_link_in inarg; 808 struct fuse_link_in inarg;
777 struct inode *inode = entry->d_inode; 809 struct inode *inode = entry->d_inode;
778 struct fuse_conn *fc = get_fuse_conn(inode); 810 struct fuse_conn *fc = get_fuse_conn(inode);
779 struct fuse_req *req = fuse_get_req(fc); 811 struct fuse_req *req = fuse_get_req_nopages(fc);
780 if (IS_ERR(req)) 812 if (IS_ERR(req))
781 return PTR_ERR(req); 813 return PTR_ERR(req);
782 814
@@ -848,7 +880,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
848 struct fuse_req *req; 880 struct fuse_req *req;
849 u64 attr_version; 881 u64 attr_version;
850 882
851 req = fuse_get_req(fc); 883 req = fuse_get_req_nopages(fc);
852 if (IS_ERR(req)) 884 if (IS_ERR(req))
853 return PTR_ERR(req); 885 return PTR_ERR(req);
854 886
@@ -985,7 +1017,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
985 1017
986/* 1018/*
987 * Calling into a user-controlled filesystem gives the filesystem 1019 * Calling into a user-controlled filesystem gives the filesystem
988 * daemon ptrace-like capabilities over the requester process. This 1020 * daemon ptrace-like capabilities over the current process. This
989 * means, that the filesystem daemon is able to record the exact 1021 * means, that the filesystem daemon is able to record the exact
990 * filesystem operations performed, and can also control the behavior 1022 * filesystem operations performed, and can also control the behavior
991 * of the requester process in otherwise impossible ways. For example 1023 * of the requester process in otherwise impossible ways. For example
@@ -996,27 +1028,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
996 * for which the owner of the mount has ptrace privilege. This 1028 * for which the owner of the mount has ptrace privilege. This
997 * excludes processes started by other users, suid or sgid processes. 1029 * excludes processes started by other users, suid or sgid processes.
998 */ 1030 */
999int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) 1031int fuse_allow_current_process(struct fuse_conn *fc)
1000{ 1032{
1001 const struct cred *cred; 1033 const struct cred *cred;
1002 int ret;
1003 1034
1004 if (fc->flags & FUSE_ALLOW_OTHER) 1035 if (fc->flags & FUSE_ALLOW_OTHER)
1005 return 1; 1036 return 1;
1006 1037
1007 rcu_read_lock(); 1038 cred = current_cred();
1008 ret = 0;
1009 cred = __task_cred(task);
1010 if (uid_eq(cred->euid, fc->user_id) && 1039 if (uid_eq(cred->euid, fc->user_id) &&
1011 uid_eq(cred->suid, fc->user_id) && 1040 uid_eq(cred->suid, fc->user_id) &&
1012 uid_eq(cred->uid, fc->user_id) && 1041 uid_eq(cred->uid, fc->user_id) &&
1013 gid_eq(cred->egid, fc->group_id) && 1042 gid_eq(cred->egid, fc->group_id) &&
1014 gid_eq(cred->sgid, fc->group_id) && 1043 gid_eq(cred->sgid, fc->group_id) &&
1015 gid_eq(cred->gid, fc->group_id)) 1044 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1045 return 1;
1017 rcu_read_unlock();
1018 1046
1019 return ret; 1047 return 0;
1020} 1048}
1021 1049
1022static int fuse_access(struct inode *inode, int mask) 1050static int fuse_access(struct inode *inode, int mask)
@@ -1029,7 +1057,7 @@ static int fuse_access(struct inode *inode, int mask)
1029 if (fc->no_access) 1057 if (fc->no_access)
1030 return 0; 1058 return 0;
1031 1059
1032 req = fuse_get_req(fc); 1060 req = fuse_get_req_nopages(fc);
1033 if (IS_ERR(req)) 1061 if (IS_ERR(req))
1034 return PTR_ERR(req); 1062 return PTR_ERR(req);
1035 1063
@@ -1077,7 +1105,7 @@ static int fuse_permission(struct inode *inode, int mask)
1077 bool refreshed = false; 1105 bool refreshed = false;
1078 int err = 0; 1106 int err = 0;
1079 1107
1080 if (!fuse_allow_task(fc, current)) 1108 if (!fuse_allow_current_process(fc))
1081 return -EACCES; 1109 return -EACCES;
1082 1110
1083 /* 1111 /*
@@ -1155,19 +1183,157 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1155 return 0; 1183 return 0;
1156} 1184}
1157 1185
1158static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) 1186static int fuse_direntplus_link(struct file *file,
1187 struct fuse_direntplus *direntplus,
1188 u64 attr_version)
1159{ 1189{
1160 int err; 1190 int err;
1191 struct fuse_entry_out *o = &direntplus->entry_out;
1192 struct fuse_dirent *dirent = &direntplus->dirent;
1193 struct dentry *parent = file->f_path.dentry;
1194 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1195 struct dentry *dentry;
1196 struct dentry *alias;
1197 struct inode *dir = parent->d_inode;
1198 struct fuse_conn *fc;
1199 struct inode *inode;
1200
1201 if (!o->nodeid) {
1202 /*
1203 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1204 * ENOENT. Instead, it only means the userspace filesystem did
1205 * not want to return attributes/handle for this entry.
1206 *
1207 * So do nothing.
1208 */
1209 return 0;
1210 }
1211
1212 if (name.name[0] == '.') {
1213 /*
1214 * We could potentially refresh the attributes of the directory
1215 * and its parent?
1216 */
1217 if (name.len == 1)
1218 return 0;
1219 if (name.name[1] == '.' && name.len == 2)
1220 return 0;
1221 }
1222 fc = get_fuse_conn(dir);
1223
1224 name.hash = full_name_hash(name.name, name.len);
1225 dentry = d_lookup(parent, &name);
1226 if (dentry && dentry->d_inode) {
1227 inode = dentry->d_inode;
1228 if (get_node_id(inode) == o->nodeid) {
1229 struct fuse_inode *fi;
1230 fi = get_fuse_inode(inode);
1231 spin_lock(&fc->lock);
1232 fi->nlookup++;
1233 spin_unlock(&fc->lock);
1234
1235 /*
1236 * The other branch to 'found' comes via fuse_iget()
1237 * which bumps nlookup inside
1238 */
1239 goto found;
1240 }
1241 err = d_invalidate(dentry);
1242 if (err)
1243 goto out;
1244 dput(dentry);
1245 dentry = NULL;
1246 }
1247
1248 dentry = d_alloc(parent, &name);
1249 err = -ENOMEM;
1250 if (!dentry)
1251 goto out;
1252
1253 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1254 &o->attr, entry_attr_timeout(o), attr_version);
1255 if (!inode)
1256 goto out;
1257
1258 alias = d_materialise_unique(dentry, inode);
1259 err = PTR_ERR(alias);
1260 if (IS_ERR(alias))
1261 goto out;
1262 if (alias) {
1263 dput(dentry);
1264 dentry = alias;
1265 }
1266
1267found:
1268 fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
1269 attr_version);
1270
1271 fuse_change_entry_timeout(dentry, o);
1272
1273 err = 0;
1274out:
1275 if (dentry)
1276 dput(dentry);
1277 return err;
1278}
1279
1280static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1281 void *dstbuf, filldir_t filldir, u64 attr_version)
1282{
1283 struct fuse_direntplus *direntplus;
1284 struct fuse_dirent *dirent;
1285 size_t reclen;
1286 int over = 0;
1287 int ret;
1288
1289 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1290 direntplus = (struct fuse_direntplus *) buf;
1291 dirent = &direntplus->dirent;
1292 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1293
1294 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1295 return -EIO;
1296 if (reclen > nbytes)
1297 break;
1298
1299 if (!over) {
1300 /* We fill entries into dstbuf only as much as
1301 it can hold. But we still continue iterating
1302 over remaining entries to link them. If not,
1303 we need to send a FORGET for each of those
1304 which we did not link.
1305 */
1306 over = filldir(dstbuf, dirent->name, dirent->namelen,
1307 file->f_pos, dirent->ino,
1308 dirent->type);
1309 file->f_pos = dirent->off;
1310 }
1311
1312 buf += reclen;
1313 nbytes -= reclen;
1314
1315 ret = fuse_direntplus_link(file, direntplus, attr_version);
1316 if (ret)
1317 fuse_force_forget(file, direntplus->entry_out.nodeid);
1318 }
1319
1320 return 0;
1321}
1322
1323static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1324{
1325 int plus, err;
1161 size_t nbytes; 1326 size_t nbytes;
1162 struct page *page; 1327 struct page *page;
1163 struct inode *inode = file->f_path.dentry->d_inode; 1328 struct inode *inode = file->f_path.dentry->d_inode;
1164 struct fuse_conn *fc = get_fuse_conn(inode); 1329 struct fuse_conn *fc = get_fuse_conn(inode);
1165 struct fuse_req *req; 1330 struct fuse_req *req;
1331 u64 attr_version = 0;
1166 1332
1167 if (is_bad_inode(inode)) 1333 if (is_bad_inode(inode))
1168 return -EIO; 1334 return -EIO;
1169 1335
1170 req = fuse_get_req(fc); 1336 req = fuse_get_req(fc, 1);
1171 if (IS_ERR(req)) 1337 if (IS_ERR(req))
1172 return PTR_ERR(req); 1338 return PTR_ERR(req);
1173 1339
@@ -1176,17 +1342,34 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1176 fuse_put_request(fc, req); 1342 fuse_put_request(fc, req);
1177 return -ENOMEM; 1343 return -ENOMEM;
1178 } 1344 }
1345
1346 plus = fuse_use_readdirplus(inode, file);
1179 req->out.argpages = 1; 1347 req->out.argpages = 1;
1180 req->num_pages = 1; 1348 req->num_pages = 1;
1181 req->pages[0] = page; 1349 req->pages[0] = page;
1182 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); 1350 req->page_descs[0].length = PAGE_SIZE;
1351 if (plus) {
1352 attr_version = fuse_get_attr_version(fc);
1353 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1354 FUSE_READDIRPLUS);
1355 } else {
1356 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1357 FUSE_READDIR);
1358 }
1183 fuse_request_send(fc, req); 1359 fuse_request_send(fc, req);
1184 nbytes = req->out.args[0].size; 1360 nbytes = req->out.args[0].size;
1185 err = req->out.h.error; 1361 err = req->out.h.error;
1186 fuse_put_request(fc, req); 1362 fuse_put_request(fc, req);
1187 if (!err) 1363 if (!err) {
1188 err = parse_dirfile(page_address(page), nbytes, file, dstbuf, 1364 if (plus) {
1189 filldir); 1365 err = parse_dirplusfile(page_address(page), nbytes,
1366 file, dstbuf, filldir,
1367 attr_version);
1368 } else {
1369 err = parse_dirfile(page_address(page), nbytes, file,
1370 dstbuf, filldir);
1371 }
1372 }
1190 1373
1191 __free_page(page); 1374 __free_page(page);
1192 fuse_invalidate_attr(inode); /* atime changed */ 1375 fuse_invalidate_attr(inode); /* atime changed */
@@ -1197,7 +1380,7 @@ static char *read_link(struct dentry *dentry)
1197{ 1380{
1198 struct inode *inode = dentry->d_inode; 1381 struct inode *inode = dentry->d_inode;
1199 struct fuse_conn *fc = get_fuse_conn(inode); 1382 struct fuse_conn *fc = get_fuse_conn(inode);
1200 struct fuse_req *req = fuse_get_req(fc); 1383 struct fuse_req *req = fuse_get_req_nopages(fc);
1201 char *link; 1384 char *link;
1202 1385
1203 if (IS_ERR(req)) 1386 if (IS_ERR(req))
@@ -1391,7 +1574,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1391 loff_t oldsize; 1574 loff_t oldsize;
1392 int err; 1575 int err;
1393 1576
1394 if (!fuse_allow_task(fc, current)) 1577 if (!fuse_allow_current_process(fc))
1395 return -EACCES; 1578 return -EACCES;
1396 1579
1397 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 1580 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
@@ -1410,7 +1593,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1410 if (attr->ia_valid & ATTR_SIZE) 1593 if (attr->ia_valid & ATTR_SIZE)
1411 is_truncate = true; 1594 is_truncate = true;
1412 1595
1413 req = fuse_get_req(fc); 1596 req = fuse_get_req_nopages(fc);
1414 if (IS_ERR(req)) 1597 if (IS_ERR(req))
1415 return PTR_ERR(req); 1598 return PTR_ERR(req);
1416 1599
@@ -1500,7 +1683,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1500 struct inode *inode = entry->d_inode; 1683 struct inode *inode = entry->d_inode;
1501 struct fuse_conn *fc = get_fuse_conn(inode); 1684 struct fuse_conn *fc = get_fuse_conn(inode);
1502 1685
1503 if (!fuse_allow_task(fc, current)) 1686 if (!fuse_allow_current_process(fc))
1504 return -EACCES; 1687 return -EACCES;
1505 1688
1506 return fuse_update_attributes(inode, stat, NULL, NULL); 1689 return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1518,7 +1701,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1518 if (fc->no_setxattr) 1701 if (fc->no_setxattr)
1519 return -EOPNOTSUPP; 1702 return -EOPNOTSUPP;
1520 1703
1521 req = fuse_get_req(fc); 1704 req = fuse_get_req_nopages(fc);
1522 if (IS_ERR(req)) 1705 if (IS_ERR(req))
1523 return PTR_ERR(req); 1706 return PTR_ERR(req);
1524 1707
@@ -1557,7 +1740,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1557 if (fc->no_getxattr) 1740 if (fc->no_getxattr)
1558 return -EOPNOTSUPP; 1741 return -EOPNOTSUPP;
1559 1742
1560 req = fuse_get_req(fc); 1743 req = fuse_get_req_nopages(fc);
1561 if (IS_ERR(req)) 1744 if (IS_ERR(req))
1562 return PTR_ERR(req); 1745 return PTR_ERR(req);
1563 1746
@@ -1603,13 +1786,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1603 struct fuse_getxattr_out outarg; 1786 struct fuse_getxattr_out outarg;
1604 ssize_t ret; 1787 ssize_t ret;
1605 1788
1606 if (!fuse_allow_task(fc, current)) 1789 if (!fuse_allow_current_process(fc))
1607 return -EACCES; 1790 return -EACCES;
1608 1791
1609 if (fc->no_listxattr) 1792 if (fc->no_listxattr)
1610 return -EOPNOTSUPP; 1793 return -EOPNOTSUPP;
1611 1794
1612 req = fuse_get_req(fc); 1795 req = fuse_get_req_nopages(fc);
1613 if (IS_ERR(req)) 1796 if (IS_ERR(req))
1614 return PTR_ERR(req); 1797 return PTR_ERR(req);
1615 1798
@@ -1654,7 +1837,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1654 if (fc->no_removexattr) 1837 if (fc->no_removexattr)
1655 return -EOPNOTSUPP; 1838 return -EOPNOTSUPP;
1656 1839
1657 req = fuse_get_req(fc); 1840 req = fuse_get_req_nopages(fc);
1658 if (IS_ERR(req)) 1841 if (IS_ERR(req))
1659 return PTR_ERR(req); 1842 return PTR_ERR(req);
1660 1843
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e21d4d8f87e3..c8071768b950 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
25 struct fuse_req *req; 25 struct fuse_req *req;
26 int err; 26 int err;
27 27
28 req = fuse_get_req(fc); 28 req = fuse_get_req_nopages(fc);
29 if (IS_ERR(req)) 29 if (IS_ERR(req))
30 return PTR_ERR(req); 30 return PTR_ERR(req);
31 31
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
57 return NULL; 57 return NULL;
58 58
59 ff->fc = fc; 59 ff->fc = fc;
60 ff->reserved_req = fuse_request_alloc(); 60 ff->reserved_req = fuse_request_alloc(0);
61 if (unlikely(!ff->reserved_req)) { 61 if (unlikely(!ff->reserved_req)) {
62 kfree(ff); 62 kfree(ff);
63 return NULL; 63 return NULL;
@@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
368 if (fc->no_flush) 368 if (fc->no_flush)
369 return 0; 369 return 0;
370 370
371 req = fuse_get_req_nofail(fc, file); 371 req = fuse_get_req_nofail_nopages(fc, file);
372 memset(&inarg, 0, sizeof(inarg)); 372 memset(&inarg, 0, sizeof(inarg));
373 inarg.fh = ff->fh; 373 inarg.fh = ff->fh;
374 inarg.lock_owner = fuse_lock_owner_id(fc, id); 374 inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
436 436
437 fuse_sync_writes(inode); 437 fuse_sync_writes(inode);
438 438
439 req = fuse_get_req(fc); 439 req = fuse_get_req_nopages(fc);
440 if (IS_ERR(req)) { 440 if (IS_ERR(req)) {
441 err = PTR_ERR(req); 441 err = PTR_ERR(req);
442 goto out; 442 goto out;
@@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page)
544 */ 544 */
545 fuse_wait_on_page_writeback(inode, page->index); 545 fuse_wait_on_page_writeback(inode, page->index);
546 546
547 req = fuse_get_req(fc); 547 req = fuse_get_req(fc, 1);
548 err = PTR_ERR(req); 548 err = PTR_ERR(req);
549 if (IS_ERR(req)) 549 if (IS_ERR(req))
550 goto out; 550 goto out;
@@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page)
555 req->out.argpages = 1; 555 req->out.argpages = 1;
556 req->num_pages = 1; 556 req->num_pages = 1;
557 req->pages[0] = page; 557 req->pages[0] = page;
558 req->page_descs[0].length = count;
558 num_read = fuse_send_read(req, file, pos, count, NULL); 559 num_read = fuse_send_read(req, file, pos, count, NULL);
559 err = req->out.h.error; 560 err = req->out.h.error;
560 fuse_put_request(fc, req); 561 fuse_put_request(fc, req);
@@ -641,6 +642,7 @@ struct fuse_fill_data {
641 struct fuse_req *req; 642 struct fuse_req *req;
642 struct file *file; 643 struct file *file;
643 struct inode *inode; 644 struct inode *inode;
645 unsigned nr_pages;
644}; 646};
645 647
646static int fuse_readpages_fill(void *_data, struct page *page) 648static int fuse_readpages_fill(void *_data, struct page *page)
@@ -656,16 +658,26 @@ static int fuse_readpages_fill(void *_data, struct page *page)
656 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 658 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
657 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 659 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
658 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 660 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
661 int nr_alloc = min_t(unsigned, data->nr_pages,
662 FUSE_MAX_PAGES_PER_REQ);
659 fuse_send_readpages(req, data->file); 663 fuse_send_readpages(req, data->file);
660 data->req = req = fuse_get_req(fc); 664 data->req = req = fuse_get_req(fc, nr_alloc);
661 if (IS_ERR(req)) { 665 if (IS_ERR(req)) {
662 unlock_page(page); 666 unlock_page(page);
663 return PTR_ERR(req); 667 return PTR_ERR(req);
664 } 668 }
665 } 669 }
670
671 if (WARN_ON(req->num_pages >= req->max_pages)) {
672 fuse_put_request(fc, req);
673 return -EIO;
674 }
675
666 page_cache_get(page); 676 page_cache_get(page);
667 req->pages[req->num_pages] = page; 677 req->pages[req->num_pages] = page;
678 req->page_descs[req->num_pages].length = PAGE_SIZE;
668 req->num_pages++; 679 req->num_pages++;
680 data->nr_pages--;
669 return 0; 681 return 0;
670} 682}
671 683
@@ -676,6 +688,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
676 struct fuse_conn *fc = get_fuse_conn(inode); 688 struct fuse_conn *fc = get_fuse_conn(inode);
677 struct fuse_fill_data data; 689 struct fuse_fill_data data;
678 int err; 690 int err;
691 int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
679 692
680 err = -EIO; 693 err = -EIO;
681 if (is_bad_inode(inode)) 694 if (is_bad_inode(inode))
@@ -683,7 +696,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
683 696
684 data.file = file; 697 data.file = file;
685 data.inode = inode; 698 data.inode = inode;
686 data.req = fuse_get_req(fc); 699 data.req = fuse_get_req(fc, nr_alloc);
700 data.nr_pages = nr_pages;
687 err = PTR_ERR(data.req); 701 err = PTR_ERR(data.req);
688 if (IS_ERR(data.req)) 702 if (IS_ERR(data.req))
689 goto out; 703 goto out;
@@ -786,7 +800,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
786 800
787 res = fuse_send_write(req, file, pos, count, NULL); 801 res = fuse_send_write(req, file, pos, count, NULL);
788 802
789 offset = req->page_offset; 803 offset = req->page_descs[0].offset;
790 count = res; 804 count = res;
791 for (i = 0; i < req->num_pages; i++) { 805 for (i = 0; i < req->num_pages; i++) {
792 struct page *page = req->pages[i]; 806 struct page *page = req->pages[i];
@@ -817,7 +831,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
817 int err; 831 int err;
818 832
819 req->in.argpages = 1; 833 req->in.argpages = 1;
820 req->page_offset = offset; 834 req->page_descs[0].offset = offset;
821 835
822 do { 836 do {
823 size_t tmp; 837 size_t tmp;
@@ -857,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
857 871
858 err = 0; 872 err = 0;
859 req->pages[req->num_pages] = page; 873 req->pages[req->num_pages] = page;
874 req->page_descs[req->num_pages].length = tmp;
860 req->num_pages++; 875 req->num_pages++;
861 876
862 iov_iter_advance(ii, tmp); 877 iov_iter_advance(ii, tmp);
@@ -869,11 +884,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
869 if (!fc->big_writes) 884 if (!fc->big_writes)
870 break; 885 break;
871 } while (iov_iter_count(ii) && count < fc->max_write && 886 } while (iov_iter_count(ii) && count < fc->max_write &&
872 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); 887 req->num_pages < req->max_pages && offset == 0);
873 888
874 return count > 0 ? count : err; 889 return count > 0 ? count : err;
875} 890}
876 891
892static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
893{
894 return min_t(unsigned,
895 ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
896 (pos >> PAGE_CACHE_SHIFT) + 1,
897 FUSE_MAX_PAGES_PER_REQ);
898}
899
877static ssize_t fuse_perform_write(struct file *file, 900static ssize_t fuse_perform_write(struct file *file,
878 struct address_space *mapping, 901 struct address_space *mapping,
879 struct iov_iter *ii, loff_t pos) 902 struct iov_iter *ii, loff_t pos)
@@ -889,8 +912,9 @@ static ssize_t fuse_perform_write(struct file *file,
889 do { 912 do {
890 struct fuse_req *req; 913 struct fuse_req *req;
891 ssize_t count; 914 ssize_t count;
915 unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
892 916
893 req = fuse_get_req(fc); 917 req = fuse_get_req(fc, nr_pages);
894 if (IS_ERR(req)) { 918 if (IS_ERR(req)) {
895 err = PTR_ERR(req); 919 err = PTR_ERR(req);
896 break; 920 break;
@@ -1023,47 +1047,110 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
1023 } 1047 }
1024} 1048}
1025 1049
1026static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, 1050static inline void fuse_page_descs_length_init(struct fuse_req *req,
1051 unsigned index, unsigned nr_pages)
1052{
1053 int i;
1054
1055 for (i = index; i < index + nr_pages; i++)
1056 req->page_descs[i].length = PAGE_SIZE -
1057 req->page_descs[i].offset;
1058}
1059
1060static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
1061{
1062 return (unsigned long)ii->iov->iov_base + ii->iov_offset;
1063}
1064
1065static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
1066 size_t max_size)
1067{
1068 return min(iov_iter_single_seg_count(ii), max_size);
1069}
1070
1071static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1027 size_t *nbytesp, int write) 1072 size_t *nbytesp, int write)
1028{ 1073{
1029 size_t nbytes = *nbytesp; 1074 size_t nbytes = 0; /* # bytes already packed in req */
1030 unsigned long user_addr = (unsigned long) buf;
1031 unsigned offset = user_addr & ~PAGE_MASK;
1032 int npages;
1033 1075
1034 /* Special case for kernel I/O: can copy directly into the buffer */ 1076 /* Special case for kernel I/O: can copy directly into the buffer */
1035 if (segment_eq(get_fs(), KERNEL_DS)) { 1077 if (segment_eq(get_fs(), KERNEL_DS)) {
1078 unsigned long user_addr = fuse_get_user_addr(ii);
1079 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
1080
1036 if (write) 1081 if (write)
1037 req->in.args[1].value = (void *) user_addr; 1082 req->in.args[1].value = (void *) user_addr;
1038 else 1083 else
1039 req->out.args[0].value = (void *) user_addr; 1084 req->out.args[0].value = (void *) user_addr;
1040 1085
1086 iov_iter_advance(ii, frag_size);
1087 *nbytesp = frag_size;
1041 return 0; 1088 return 0;
1042 } 1089 }
1043 1090
1044 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 1091 while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1045 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1092 unsigned npages;
1046 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); 1093 unsigned long user_addr = fuse_get_user_addr(ii);
1047 npages = get_user_pages_fast(user_addr, npages, !write, req->pages); 1094 unsigned offset = user_addr & ~PAGE_MASK;
1048 if (npages < 0) 1095 size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
1049 return npages; 1096 int ret;
1097
1098 unsigned n = req->max_pages - req->num_pages;
1099 frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
1100
1101 npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1102 npages = clamp(npages, 1U, n);
1103
1104 ret = get_user_pages_fast(user_addr, npages, !write,
1105 &req->pages[req->num_pages]);
1106 if (ret < 0)
1107 return ret;
1050 1108
1051 req->num_pages = npages; 1109 npages = ret;
1052 req->page_offset = offset; 1110 frag_size = min_t(size_t, frag_size,
1111 (npages << PAGE_SHIFT) - offset);
1112 iov_iter_advance(ii, frag_size);
1113
1114 req->page_descs[req->num_pages].offset = offset;
1115 fuse_page_descs_length_init(req, req->num_pages, npages);
1116
1117 req->num_pages += npages;
1118 req->page_descs[req->num_pages - 1].length -=
1119 (npages << PAGE_SHIFT) - offset - frag_size;
1120
1121 nbytes += frag_size;
1122 }
1053 1123
1054 if (write) 1124 if (write)
1055 req->in.argpages = 1; 1125 req->in.argpages = 1;
1056 else 1126 else
1057 req->out.argpages = 1; 1127 req->out.argpages = 1;
1058 1128
1059 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 1129 *nbytesp = nbytes;
1060 *nbytesp = min(*nbytesp, nbytes);
1061 1130
1062 return 0; 1131 return 0;
1063} 1132}
1064 1133
1065ssize_t fuse_direct_io(struct file *file, const char __user *buf, 1134static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1066 size_t count, loff_t *ppos, int write) 1135{
1136 struct iov_iter ii = *ii_p;
1137 int npages = 0;
1138
1139 while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
1140 unsigned long user_addr = fuse_get_user_addr(&ii);
1141 unsigned offset = user_addr & ~PAGE_MASK;
1142 size_t frag_size = iov_iter_single_seg_count(&ii);
1143
1144 npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1145 iov_iter_advance(&ii, frag_size);
1146 }
1147
1148 return min(npages, FUSE_MAX_PAGES_PER_REQ);
1149}
1150
1151ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
1152 unsigned long nr_segs, size_t count, loff_t *ppos,
1153 int write)
1067{ 1154{
1068 struct fuse_file *ff = file->private_data; 1155 struct fuse_file *ff = file->private_data;
1069 struct fuse_conn *fc = ff->fc; 1156 struct fuse_conn *fc = ff->fc;
@@ -1071,8 +1158,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1071 loff_t pos = *ppos; 1158 loff_t pos = *ppos;
1072 ssize_t res = 0; 1159 ssize_t res = 0;
1073 struct fuse_req *req; 1160 struct fuse_req *req;
1161 struct iov_iter ii;
1162
1163 iov_iter_init(&ii, iov, nr_segs, count, 0);
1074 1164
1075 req = fuse_get_req(fc); 1165 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1076 if (IS_ERR(req)) 1166 if (IS_ERR(req))
1077 return PTR_ERR(req); 1167 return PTR_ERR(req);
1078 1168
@@ -1080,7 +1170,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1080 size_t nres; 1170 size_t nres;
1081 fl_owner_t owner = current->files; 1171 fl_owner_t owner = current->files;
1082 size_t nbytes = min(count, nmax); 1172 size_t nbytes = min(count, nmax);
1083 int err = fuse_get_user_pages(req, buf, &nbytes, write); 1173 int err = fuse_get_user_pages(req, &ii, &nbytes, write);
1084 if (err) { 1174 if (err) {
1085 res = err; 1175 res = err;
1086 break; 1176 break;
@@ -1103,12 +1193,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1103 count -= nres; 1193 count -= nres;
1104 res += nres; 1194 res += nres;
1105 pos += nres; 1195 pos += nres;
1106 buf += nres;
1107 if (nres != nbytes) 1196 if (nres != nbytes)
1108 break; 1197 break;
1109 if (count) { 1198 if (count) {
1110 fuse_put_request(fc, req); 1199 fuse_put_request(fc, req);
1111 req = fuse_get_req(fc); 1200 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1112 if (IS_ERR(req)) 1201 if (IS_ERR(req))
1113 break; 1202 break;
1114 } 1203 }
@@ -1122,8 +1211,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1122} 1211}
1123EXPORT_SYMBOL_GPL(fuse_direct_io); 1212EXPORT_SYMBOL_GPL(fuse_direct_io);
1124 1213
1125static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1214static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov,
1126 size_t count, loff_t *ppos) 1215 unsigned long nr_segs, loff_t *ppos)
1127{ 1216{
1128 ssize_t res; 1217 ssize_t res;
1129 struct inode *inode = file->f_path.dentry->d_inode; 1218 struct inode *inode = file->f_path.dentry->d_inode;
@@ -1131,22 +1220,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1131 if (is_bad_inode(inode)) 1220 if (is_bad_inode(inode))
1132 return -EIO; 1221 return -EIO;
1133 1222
1134 res = fuse_direct_io(file, buf, count, ppos, 0); 1223 res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs),
1224 ppos, 0);
1135 1225
1136 fuse_invalidate_attr(inode); 1226 fuse_invalidate_attr(inode);
1137 1227
1138 return res; 1228 return res;
1139} 1229}
1140 1230
1141static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, 1231static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1142 size_t count, loff_t *ppos) 1232 size_t count, loff_t *ppos)
1233{
1234 struct iovec iov = { .iov_base = buf, .iov_len = count };
1235 return __fuse_direct_read(file, &iov, 1, ppos);
1236}
1237
1238static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov,
1239 unsigned long nr_segs, loff_t *ppos)
1143{ 1240{
1144 struct inode *inode = file->f_path.dentry->d_inode; 1241 struct inode *inode = file->f_path.dentry->d_inode;
1242 size_t count = iov_length(iov, nr_segs);
1145 ssize_t res; 1243 ssize_t res;
1146 1244
1147 res = generic_write_checks(file, ppos, &count, 0); 1245 res = generic_write_checks(file, ppos, &count, 0);
1148 if (!res) { 1246 if (!res) {
1149 res = fuse_direct_io(file, buf, count, ppos, 1); 1247 res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1);
1150 if (res > 0) 1248 if (res > 0)
1151 fuse_write_update_size(inode, *ppos); 1249 fuse_write_update_size(inode, *ppos);
1152 } 1250 }
@@ -1159,6 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
1159static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1257static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1160 size_t count, loff_t *ppos) 1258 size_t count, loff_t *ppos)
1161{ 1259{
1260 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
1162 struct inode *inode = file->f_path.dentry->d_inode; 1261 struct inode *inode = file->f_path.dentry->d_inode;
1163 ssize_t res; 1262 ssize_t res;
1164 1263
@@ -1167,7 +1266,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1167 1266
1168 /* Don't allow parallel writes to the same file */ 1267 /* Don't allow parallel writes to the same file */
1169 mutex_lock(&inode->i_mutex); 1268 mutex_lock(&inode->i_mutex);
1170 res = __fuse_direct_write(file, buf, count, ppos); 1269 res = __fuse_direct_write(file, &iov, 1, ppos);
1171 mutex_unlock(&inode->i_mutex); 1270 mutex_unlock(&inode->i_mutex);
1172 1271
1173 return res; 1272 return res;
@@ -1272,7 +1371,7 @@ static int fuse_writepage_locked(struct page *page)
1272 1371
1273 set_page_writeback(page); 1372 set_page_writeback(page);
1274 1373
1275 req = fuse_request_alloc_nofs(); 1374 req = fuse_request_alloc_nofs(1);
1276 if (!req) 1375 if (!req)
1277 goto err; 1376 goto err;
1278 1377
@@ -1293,7 +1392,8 @@ static int fuse_writepage_locked(struct page *page)
1293 req->in.argpages = 1; 1392 req->in.argpages = 1;
1294 req->num_pages = 1; 1393 req->num_pages = 1;
1295 req->pages[0] = tmp_page; 1394 req->pages[0] = tmp_page;
1296 req->page_offset = 0; 1395 req->page_descs[0].offset = 0;
1396 req->page_descs[0].length = PAGE_SIZE;
1297 req->end = fuse_writepage_end; 1397 req->end = fuse_writepage_end;
1298 req->inode = inode; 1398 req->inode = inode;
1299 1399
@@ -1471,7 +1571,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
1471 struct fuse_lk_out outarg; 1571 struct fuse_lk_out outarg;
1472 int err; 1572 int err;
1473 1573
1474 req = fuse_get_req(fc); 1574 req = fuse_get_req_nopages(fc);
1475 if (IS_ERR(req)) 1575 if (IS_ERR(req))
1476 return PTR_ERR(req); 1576 return PTR_ERR(req);
1477 1577
@@ -1506,7 +1606,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1506 if (fl->fl_flags & FL_CLOSE) 1606 if (fl->fl_flags & FL_CLOSE)
1507 return 0; 1607 return 0;
1508 1608
1509 req = fuse_get_req(fc); 1609 req = fuse_get_req_nopages(fc);
1510 if (IS_ERR(req)) 1610 if (IS_ERR(req))
1511 return PTR_ERR(req); 1611 return PTR_ERR(req);
1512 1612
@@ -1575,7 +1675,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
1575 if (!inode->i_sb->s_bdev || fc->no_bmap) 1675 if (!inode->i_sb->s_bdev || fc->no_bmap)
1576 return 0; 1676 return 0;
1577 1677
1578 req = fuse_get_req(fc); 1678 req = fuse_get_req_nopages(fc);
1579 if (IS_ERR(req)) 1679 if (IS_ERR(req))
1580 return 0; 1680 return 0;
1581 1681
@@ -1873,7 +1973,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1873 num_pages++; 1973 num_pages++;
1874 } 1974 }
1875 1975
1876 req = fuse_get_req(fc); 1976 req = fuse_get_req(fc, num_pages);
1877 if (IS_ERR(req)) { 1977 if (IS_ERR(req)) {
1878 err = PTR_ERR(req); 1978 err = PTR_ERR(req);
1879 req = NULL; 1979 req = NULL;
@@ -1881,6 +1981,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1881 } 1981 }
1882 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); 1982 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1883 req->num_pages = num_pages; 1983 req->num_pages = num_pages;
1984 fuse_page_descs_length_init(req, 0, req->num_pages);
1884 1985
1885 /* okay, let's send it to the client */ 1986 /* okay, let's send it to the client */
1886 req->in.h.opcode = FUSE_IOCTL; 1987 req->in.h.opcode = FUSE_IOCTL;
@@ -1981,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
1981 struct inode *inode = file->f_dentry->d_inode; 2082 struct inode *inode = file->f_dentry->d_inode;
1982 struct fuse_conn *fc = get_fuse_conn(inode); 2083 struct fuse_conn *fc = get_fuse_conn(inode);
1983 2084
1984 if (!fuse_allow_task(fc, current)) 2085 if (!fuse_allow_current_process(fc))
1985 return -EACCES; 2086 return -EACCES;
1986 2087
1987 if (is_bad_inode(inode)) 2088 if (is_bad_inode(inode))
@@ -2066,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2066 return DEFAULT_POLLMASK; 2167 return DEFAULT_POLLMASK;
2067 2168
2068 poll_wait(file, &ff->poll_wait, wait); 2169 poll_wait(file, &ff->poll_wait, wait);
2170 inarg.events = (__u32)poll_requested_events(wait);
2069 2171
2070 /* 2172 /*
2071 * Ask for notification iff there's someone waiting for it. 2173 * Ask for notification iff there's someone waiting for it.
@@ -2076,7 +2178,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2076 fuse_register_polled_file(fc, ff); 2178 fuse_register_polled_file(fc, ff);
2077 } 2179 }
2078 2180
2079 req = fuse_get_req(fc); 2181 req = fuse_get_req_nopages(fc);
2080 if (IS_ERR(req)) 2182 if (IS_ERR(req))
2081 return POLLERR; 2183 return POLLERR;
2082 2184
@@ -2126,41 +2228,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2126 return 0; 2228 return 0;
2127} 2229}
2128 2230
2129static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
2130 unsigned long nr_segs, loff_t *ppos, int rw)
2131{
2132 const struct iovec *vector = iov;
2133 ssize_t ret = 0;
2134
2135 while (nr_segs > 0) {
2136 void __user *base;
2137 size_t len;
2138 ssize_t nr;
2139
2140 base = vector->iov_base;
2141 len = vector->iov_len;
2142 vector++;
2143 nr_segs--;
2144
2145 if (rw == WRITE)
2146 nr = __fuse_direct_write(filp, base, len, ppos);
2147 else
2148 nr = fuse_direct_read(filp, base, len, ppos);
2149
2150 if (nr < 0) {
2151 if (!ret)
2152 ret = nr;
2153 break;
2154 }
2155 ret += nr;
2156 if (nr != len)
2157 break;
2158 }
2159
2160 return ret;
2161}
2162
2163
2164static ssize_t 2231static ssize_t
2165fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2232fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2166 loff_t offset, unsigned long nr_segs) 2233 loff_t offset, unsigned long nr_segs)
@@ -2172,13 +2239,16 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2172 file = iocb->ki_filp; 2239 file = iocb->ki_filp;
2173 pos = offset; 2240 pos = offset;
2174 2241
2175 ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); 2242 if (rw == WRITE)
2243 ret = __fuse_direct_write(file, iov, nr_segs, &pos);
2244 else
2245 ret = __fuse_direct_read(file, iov, nr_segs, &pos);
2176 2246
2177 return ret; 2247 return ret;
2178} 2248}
2179 2249
2180long fuse_file_fallocate(struct file *file, int mode, loff_t offset, 2250static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2181 loff_t length) 2251 loff_t length)
2182{ 2252{
2183 struct fuse_file *ff = file->private_data; 2253 struct fuse_file *ff = file->private_data;
2184 struct fuse_conn *fc = ff->fc; 2254 struct fuse_conn *fc = ff->fc;
@@ -2194,7 +2264,7 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2194 if (fc->no_fallocate) 2264 if (fc->no_fallocate)
2195 return -EOPNOTSUPP; 2265 return -EOPNOTSUPP;
2196 2266
2197 req = fuse_get_req(fc); 2267 req = fuse_get_req_nopages(fc);
2198 if (IS_ERR(req)) 2268 if (IS_ERR(req))
2199 return PTR_ERR(req); 2269 return PTR_ERR(req);
2200 2270
@@ -2213,7 +2283,6 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2213 2283
2214 return err; 2284 return err;
2215} 2285}
2216EXPORT_SYMBOL_GPL(fuse_file_fallocate);
2217 2286
2218static const struct file_operations fuse_file_operations = { 2287static const struct file_operations fuse_file_operations = {
2219 .llseek = fuse_file_llseek, 2288 .llseek = fuse_file_llseek,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e105a53fc72d..6aeba864f070 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,9 @@
44 doing the mount will be allowed to access the filesystem */ 44 doing the mount will be allowed to access the filesystem */
45#define FUSE_ALLOW_OTHER (1 << 1) 45#define FUSE_ALLOW_OTHER (1 << 1)
46 46
47/** Number of page pointers embedded in fuse_req */
48#define FUSE_REQ_INLINE_PAGES 1
49
47/** List of active connections */ 50/** List of active connections */
48extern struct list_head fuse_conn_list; 51extern struct list_head fuse_conn_list;
49 52
@@ -103,6 +106,15 @@ struct fuse_inode {
103 106
104 /** List of writepage requestst (pending or sent) */ 107 /** List of writepage requestst (pending or sent) */
105 struct list_head writepages; 108 struct list_head writepages;
109
110 /** Miscellaneous bits describing inode state */
111 unsigned long state;
112};
113
114/** FUSE inode state bits */
115enum {
116 /** Advise readdirplus */
117 FUSE_I_ADVISE_RDPLUS,
106}; 118};
107 119
108struct fuse_conn; 120struct fuse_conn;
@@ -200,6 +212,12 @@ struct fuse_out {
200 struct fuse_arg args[3]; 212 struct fuse_arg args[3];
201}; 213};
202 214
215/** FUSE page descriptor */
216struct fuse_page_desc {
217 unsigned int length;
218 unsigned int offset;
219};
220
203/** The request state */ 221/** The request state */
204enum fuse_req_state { 222enum fuse_req_state {
205 FUSE_REQ_INIT = 0, 223 FUSE_REQ_INIT = 0,
@@ -291,14 +309,23 @@ struct fuse_req {
291 } misc; 309 } misc;
292 310
293 /** page vector */ 311 /** page vector */
294 struct page *pages[FUSE_MAX_PAGES_PER_REQ]; 312 struct page **pages;
313
314 /** page-descriptor vector */
315 struct fuse_page_desc *page_descs;
316
317 /** size of the 'pages' array */
318 unsigned max_pages;
319
320 /** inline page vector */
321 struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
322
323 /** inline page-descriptor vector */
324 struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
295 325
296 /** number of pages in vector */ 326 /** number of pages in vector */
297 unsigned num_pages; 327 unsigned num_pages;
298 328
299 /** offset of data on first page */
300 unsigned page_offset;
301
302 /** File used in the request (or NULL) */ 329 /** File used in the request (or NULL) */
303 struct fuse_file *ff; 330 struct fuse_file *ff;
304 331
@@ -487,6 +514,12 @@ struct fuse_conn {
487 /** Use enhanced/automatic page cache invalidation. */ 514 /** Use enhanced/automatic page cache invalidation. */
488 unsigned auto_inval_data:1; 515 unsigned auto_inval_data:1;
489 516
517 /** Does the filesystem support readdirplus? */
518 unsigned do_readdirplus:1;
519
520 /** Does the filesystem want adaptive readdirplus? */
521 unsigned readdirplus_auto:1;
522
490 /** The number of requests waiting for completion */ 523 /** The number of requests waiting for completion */
491 atomic_t num_waiting; 524 atomic_t num_waiting;
492 525
@@ -578,6 +611,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
578 611
579struct fuse_forget_link *fuse_alloc_forget(void); 612struct fuse_forget_link *fuse_alloc_forget(void);
580 613
614/* Used by READDIRPLUS */
615void fuse_force_forget(struct file *file, u64 nodeid);
616
581/** 617/**
582 * Initialize READ or READDIR request 618 * Initialize READ or READDIR request
583 */ 619 */
@@ -658,9 +694,9 @@ void fuse_ctl_cleanup(void);
658/** 694/**
659 * Allocate a request 695 * Allocate a request
660 */ 696 */
661struct fuse_req *fuse_request_alloc(void); 697struct fuse_req *fuse_request_alloc(unsigned npages);
662 698
663struct fuse_req *fuse_request_alloc_nofs(void); 699struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
664 700
665/** 701/**
666 * Free a request 702 * Free a request
@@ -668,14 +704,25 @@ struct fuse_req *fuse_request_alloc_nofs(void);
668void fuse_request_free(struct fuse_req *req); 704void fuse_request_free(struct fuse_req *req);
669 705
670/** 706/**
671 * Get a request, may fail with -ENOMEM 707 * Get a request, may fail with -ENOMEM,
708 * caller should specify # elements in req->pages[] explicitly
672 */ 709 */
673struct fuse_req *fuse_get_req(struct fuse_conn *fc); 710struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
711
712/**
713 * Get a request, may fail with -ENOMEM,
714 * useful for callers who doesn't use req->pages[]
715 */
716static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
717{
718 return fuse_get_req(fc, 0);
719}
674 720
675/** 721/**
676 * Gets a requests for a file operation, always succeeds 722 * Gets a requests for a file operation, always succeeds
677 */ 723 */
678struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); 724struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
725 struct file *file);
679 726
680/** 727/**
681 * Decrement reference count of a request. If count goes to zero free 728 * Decrement reference count of a request. If count goes to zero free
@@ -739,9 +786,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
739int fuse_valid_type(int m); 786int fuse_valid_type(int m);
740 787
741/** 788/**
742 * Is task allowed to perform filesystem operation? 789 * Is current process allowed to perform filesystem operation?
743 */ 790 */
744int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); 791int fuse_allow_current_process(struct fuse_conn *fc);
745 792
746u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); 793u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
747 794
@@ -776,8 +823,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
776 823
777int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 824int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
778 bool isdir); 825 bool isdir);
779ssize_t fuse_direct_io(struct file *file, const char __user *buf, 826ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
780 size_t count, loff_t *ppos, int write); 827 unsigned long nr_segs, size_t count, loff_t *ppos,
828 int write);
781long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 829long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
782 unsigned int flags); 830 unsigned int flags);
783long fuse_ioctl_common(struct file *file, unsigned int cmd, 831long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b72beaf..01353ed75750 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0; 94 fi->orig_ino = 0;
95 fi->state = 0;
95 INIT_LIST_HEAD(&fi->write_files); 96 INIT_LIST_HEAD(&fi->write_files);
96 INIT_LIST_HEAD(&fi->queued_writes); 97 INIT_LIST_HEAD(&fi->queued_writes);
97 INIT_LIST_HEAD(&fi->writepages); 98 INIT_LIST_HEAD(&fi->writepages);
@@ -408,12 +409,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
408 struct fuse_statfs_out outarg; 409 struct fuse_statfs_out outarg;
409 int err; 410 int err;
410 411
411 if (!fuse_allow_task(fc, current)) { 412 if (!fuse_allow_current_process(fc)) {
412 buf->f_type = FUSE_SUPER_MAGIC; 413 buf->f_type = FUSE_SUPER_MAGIC;
413 return 0; 414 return 0;
414 } 415 }
415 416
416 req = fuse_get_req(fc); 417 req = fuse_get_req_nopages(fc);
417 if (IS_ERR(req)) 418 if (IS_ERR(req))
418 return PTR_ERR(req); 419 return PTR_ERR(req);
419 420
@@ -863,6 +864,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
863 fc->dont_mask = 1; 864 fc->dont_mask = 1;
864 if (arg->flags & FUSE_AUTO_INVAL_DATA) 865 if (arg->flags & FUSE_AUTO_INVAL_DATA)
865 fc->auto_inval_data = 1; 866 fc->auto_inval_data = 1;
867 if (arg->flags & FUSE_DO_READDIRPLUS)
868 fc->do_readdirplus = 1;
869 if (arg->flags & FUSE_READDIRPLUS_AUTO)
870 fc->readdirplus_auto = 1;
866 } else { 871 } else {
867 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 872 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
868 fc->no_lock = 1; 873 fc->no_lock = 1;
@@ -889,7 +894,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
889 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 894 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
890 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 895 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
891 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 896 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
892 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; 897 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
898 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
893 req->in.h.opcode = FUSE_INIT; 899 req->in.h.opcode = FUSE_INIT;
894 req->in.numargs = 1; 900 req->in.numargs = 1;
895 req->in.args[0].size = sizeof(*arg); 901 req->in.args[0].size = sizeof(*arg);
@@ -1034,12 +1040,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1034 /* only now - we want root dentry with NULL ->d_op */ 1040 /* only now - we want root dentry with NULL ->d_op */
1035 sb->s_d_op = &fuse_dentry_operations; 1041 sb->s_d_op = &fuse_dentry_operations;
1036 1042
1037 init_req = fuse_request_alloc(); 1043 init_req = fuse_request_alloc(0);
1038 if (!init_req) 1044 if (!init_req)
1039 goto err_put_root; 1045 goto err_put_root;
1040 1046
1041 if (is_bdev) { 1047 if (is_bdev) {
1042 fc->destroy_req = fuse_request_alloc(); 1048 fc->destroy_req = fuse_request_alloc(0);
1043 if (!fc->destroy_req) 1049 if (!fc->destroy_req)
1044 goto err_free_init_req; 1050 goto err_free_init_req;
1045 } 1051 }
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index f850020ad906..f69ac0af5496 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -237,7 +237,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
237 return -EINVAL; 237 return -EINVAL;
238 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 238 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
239 return value ? -EACCES : 0; 239 return value ? -EACCES : 0;
240 if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) 240 if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_FOWNER))
241 return -EPERM; 241 return -EPERM;
242 if (S_ISLNK(inode->i_mode)) 242 if (S_ISLNK(inode->i_mode))
243 return -EOPNOTSUPP; 243 return -EOPNOTSUPP;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 30de4f2a2ea9..24f414f0ce61 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -51,7 +51,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
51 continue; 51 continue;
52 if (gfs2_is_jdata(ip)) 52 if (gfs2_is_jdata(ip))
53 set_buffer_uptodate(bh); 53 set_buffer_uptodate(bh);
54 gfs2_trans_add_bh(ip->i_gl, bh, 0); 54 gfs2_trans_add_data(ip->i_gl, bh);
55 } 55 }
56} 56}
57 57
@@ -230,16 +230,14 @@ out_ignore:
230} 230}
231 231
232/** 232/**
233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk 233 * gfs2_writepages - Write a bunch of dirty pages back to disk
234 * @mapping: The mapping to write 234 * @mapping: The mapping to write
235 * @wbc: Write-back control 235 * @wbc: Write-back control
236 * 236 *
237 * For the data=writeback case we can already ignore buffer heads 237 * Used for both ordered and writeback modes.
238 * and write whole extents at once. This is a big reduction in the
239 * number of I/O requests we send and the bmap calls we make in this case.
240 */ 238 */
241static int gfs2_writeback_writepages(struct address_space *mapping, 239static int gfs2_writepages(struct address_space *mapping,
242 struct writeback_control *wbc) 240 struct writeback_control *wbc)
243{ 241{
244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 242 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
245} 243}
@@ -852,7 +850,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
852 goto failed; 850 goto failed;
853 } 851 }
854 852
855 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 853 gfs2_trans_add_meta(ip->i_gl, dibh);
856 854
857 if (gfs2_is_stuffed(ip)) 855 if (gfs2_is_stuffed(ip))
858 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); 856 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
@@ -1102,7 +1100,7 @@ cannot_release:
1102 1100
1103static const struct address_space_operations gfs2_writeback_aops = { 1101static const struct address_space_operations gfs2_writeback_aops = {
1104 .writepage = gfs2_writeback_writepage, 1102 .writepage = gfs2_writeback_writepage,
1105 .writepages = gfs2_writeback_writepages, 1103 .writepages = gfs2_writepages,
1106 .readpage = gfs2_readpage, 1104 .readpage = gfs2_readpage,
1107 .readpages = gfs2_readpages, 1105 .readpages = gfs2_readpages,
1108 .write_begin = gfs2_write_begin, 1106 .write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1118 1116
1119static const struct address_space_operations gfs2_ordered_aops = { 1117static const struct address_space_operations gfs2_ordered_aops = {
1120 .writepage = gfs2_ordered_writepage, 1118 .writepage = gfs2_ordered_writepage,
1119 .writepages = gfs2_writepages,
1121 .readpage = gfs2_readpage, 1120 .readpage = gfs2_readpage,
1122 .readpages = gfs2_readpages, 1121 .readpages = gfs2_readpages,
1123 .write_begin = gfs2_write_begin, 1122 .write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a68e91bcef3d..5e83657f046e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
22#include "meta_io.h" 22#include "meta_io.h"
23#include "quota.h" 23#include "quota.h"
24#include "rgrp.h" 24#include "rgrp.h"
25#include "log.h"
25#include "super.h" 26#include "super.h"
26#include "trans.h" 27#include "trans.h"
27#include "dir.h" 28#include "dir.h"
@@ -93,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
93 if (!gfs2_is_jdata(ip)) 94 if (!gfs2_is_jdata(ip))
94 mark_buffer_dirty(bh); 95 mark_buffer_dirty(bh);
95 if (!gfs2_is_writeback(ip)) 96 if (!gfs2_is_writeback(ip))
96 gfs2_trans_add_bh(ip->i_gl, bh, 0); 97 gfs2_trans_add_data(ip->i_gl, bh);
97 98
98 if (release) { 99 if (release) {
99 unlock_page(page); 100 unlock_page(page);
@@ -153,7 +154,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
153 154
154 /* Set up the pointer to the new block */ 155 /* Set up the pointer to the new block */
155 156
156 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 157 gfs2_trans_add_meta(ip->i_gl, dibh);
157 di = (struct gfs2_dinode *)dibh->b_data; 158 di = (struct gfs2_dinode *)dibh->b_data;
158 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
159 160
@@ -405,7 +406,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp,
405 BUG_ON(i < 1); 406 BUG_ON(i < 1);
406 BUG_ON(mp->mp_bh[i] != NULL); 407 BUG_ON(mp->mp_bh[i] != NULL);
407 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 408 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
408 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); 409 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
409 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 410 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
410 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 411 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
411 ptr += offset; 412 ptr += offset;
@@ -468,7 +469,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
468 BUG_ON(sheight < 1); 469 BUG_ON(sheight < 1);
469 BUG_ON(dibh == NULL); 470 BUG_ON(dibh == NULL);
470 471
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 472 gfs2_trans_add_meta(ip->i_gl, dibh);
472 473
473 if (height == sheight) { 474 if (height == sheight) {
474 struct buffer_head *bh; 475 struct buffer_head *bh;
@@ -544,7 +545,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
544 /* Branching from existing tree */ 545 /* Branching from existing tree */
545 case ALLOC_GROW_DEPTH: 546 case ALLOC_GROW_DEPTH:
546 if (i > 1 && i < height) 547 if (i > 1 && i < height)
547 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); 548 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
548 for (; i < height && n > 0; i++, n--) 549 for (; i < height && n > 0; i++, n--)
549 gfs2_indirect_init(mp, ip->i_gl, i, 550 gfs2_indirect_init(mp, ip->i_gl, i,
550 mp->mp_list[i-1], bn++); 551 mp->mp_list[i-1], bn++);
@@ -556,7 +557,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
556 case ALLOC_DATA: 557 case ALLOC_DATA:
557 BUG_ON(n > dblks); 558 BUG_ON(n > dblks);
558 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 559 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
559 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); 560 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
560 dblks = n; 561 dblks = n;
561 ptr = metapointer(end_of_metadata, mp); 562 ptr = metapointer(end_of_metadata, mp);
562 dblock = bn; 563 dblock = bn;
@@ -796,8 +797,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
796 797
797 down_write(&ip->i_rw_mutex); 798 down_write(&ip->i_rw_mutex);
798 799
799 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 800 gfs2_trans_add_meta(ip->i_gl, dibh);
800 gfs2_trans_add_bh(ip->i_gl, bh, 1); 801 gfs2_trans_add_meta(ip->i_gl, bh);
801 802
802 bstart = 0; 803 bstart = 0;
803 blen = 0; 804 blen = 0;
@@ -981,7 +982,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
981 } 982 }
982 983
983 if (!gfs2_is_writeback(ip)) 984 if (!gfs2_is_writeback(ip))
984 gfs2_trans_add_bh(ip->i_gl, bh, 0); 985 gfs2_trans_add_data(ip->i_gl, bh);
985 986
986 zero_user(page, offset, length); 987 zero_user(page, offset, length);
987 mark_buffer_dirty(bh); 988 mark_buffer_dirty(bh);
@@ -1046,7 +1047,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1046 if (error) 1047 if (error)
1047 goto out; 1048 goto out;
1048 1049
1049 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1050 gfs2_trans_add_meta(ip->i_gl, dibh);
1050 1051
1051 if (gfs2_is_stuffed(ip)) { 1052 if (gfs2_is_stuffed(ip)) {
1052 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1053 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
@@ -1098,7 +1099,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1098 if (error) 1099 if (error)
1099 return error; 1100 return error;
1100 1101
1101 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1102 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1102 if (error) 1103 if (error)
1103 return error; 1104 return error;
1104 1105
@@ -1137,11 +1138,12 @@ static int trunc_end(struct gfs2_inode *ip)
1137 ip->i_height = 0; 1138 ip->i_height = 0;
1138 ip->i_goal = ip->i_no_addr; 1139 ip->i_goal = ip->i_no_addr;
1139 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1140 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1141 gfs2_ordered_del_inode(ip);
1140 } 1142 }
1141 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1142 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1144 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1143 1145
1144 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1146 gfs2_trans_add_meta(ip->i_gl, dibh);
1145 gfs2_dinode_out(ip, dibh->b_data); 1147 gfs2_dinode_out(ip, dibh->b_data);
1146 brelse(dibh); 1148 brelse(dibh);
1147 1149
@@ -1246,7 +1248,7 @@ static int do_grow(struct inode *inode, u64 size)
1246 1248
1247 i_size_write(inode, size); 1249 i_size_write(inode, size);
1248 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1250 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1249 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1251 gfs2_trans_add_meta(ip->i_gl, dibh);
1250 gfs2_dinode_out(ip, dibh->b_data); 1252 gfs2_dinode_out(ip, dibh->b_data);
1251 brelse(dibh); 1253 brelse(dibh);
1252 1254
@@ -1286,6 +1288,10 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1286 1288
1287 inode_dio_wait(inode); 1289 inode_dio_wait(inode);
1288 1290
1291 ret = gfs2_rs_alloc(GFS2_I(inode));
1292 if (ret)
1293 return ret;
1294
1289 oldsize = inode->i_size; 1295 oldsize = inode->i_size;
1290 if (newsize >= oldsize) 1296 if (newsize >= oldsize)
1291 return do_grow(inode, newsize); 1297 return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 9a35670fdc38..c3e82bd23179 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -93,7 +93,7 @@ int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
93 struct buffer_head *bh; 93 struct buffer_head *bh;
94 94
95 bh = gfs2_meta_new(ip->i_gl, block); 95 bh = gfs2_meta_new(ip->i_gl, block);
96 gfs2_trans_add_bh(ip->i_gl, bh, 1); 96 gfs2_trans_add_meta(ip->i_gl, bh);
97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99 *bhp = bh; 99 *bhp = bh;
@@ -127,7 +127,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
127 if (error) 127 if (error)
128 return error; 128 return error;
129 129
130 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 130 gfs2_trans_add_meta(ip->i_gl, dibh);
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_inode.i_size < offset + size) 132 if (ip->i_inode.i_size < offset + size)
133 i_size_write(&ip->i_inode, offset + size); 133 i_size_write(&ip->i_inode, offset + size);
@@ -209,7 +209,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
209 if (error) 209 if (error)
210 goto fail; 210 goto fail;
211 211
212 gfs2_trans_add_bh(ip->i_gl, bh, 1); 212 gfs2_trans_add_meta(ip->i_gl, bh);
213 memcpy(bh->b_data + o, buf, amount); 213 memcpy(bh->b_data + o, buf, amount);
214 brelse(bh); 214 brelse(bh);
215 215
@@ -231,7 +231,7 @@ out:
231 i_size_write(&ip->i_inode, offset + copied); 231 i_size_write(&ip->i_inode, offset + copied);
232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
233 233
234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 234 gfs2_trans_add_meta(ip->i_gl, dibh);
235 gfs2_dinode_out(ip, dibh->b_data); 235 gfs2_dinode_out(ip, dibh->b_data);
236 brelse(dibh); 236 brelse(dibh);
237 237
@@ -647,7 +647,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
647 return; 647 return;
648 } 648 }
649 649
650 gfs2_trans_add_bh(dip->i_gl, bh, 1); 650 gfs2_trans_add_meta(dip->i_gl, bh);
651 651
652 /* If there is no prev entry, this is the first entry in the block. 652 /* If there is no prev entry, this is the first entry in the block.
653 The de_rec_len is already as big as it needs to be. Just zero 653 The de_rec_len is already as big as it needs to be. Just zero
@@ -690,7 +690,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
691 totlen = be16_to_cpu(dent->de_rec_len); 691 totlen = be16_to_cpu(dent->de_rec_len);
692 BUG_ON(offset + name->len > totlen); 692 BUG_ON(offset + name->len > totlen);
693 gfs2_trans_add_bh(ip->i_gl, bh, 1); 693 gfs2_trans_add_meta(ip->i_gl, bh);
694 ndent = (struct gfs2_dirent *)((char *)dent + offset); 694 ndent = (struct gfs2_dirent *)((char *)dent + offset);
695 dent->de_rec_len = cpu_to_be16(offset); 695 dent->de_rec_len = cpu_to_be16(offset);
696 gfs2_qstr2dirent(name, totlen - offset, ndent); 696 gfs2_qstr2dirent(name, totlen - offset, ndent);
@@ -831,7 +831,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
831 return NULL; 831 return NULL;
832 832
833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
834 gfs2_trans_add_bh(ip->i_gl, bh, 1); 834 gfs2_trans_add_meta(ip->i_gl, bh);
835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
836 leaf = (struct gfs2_leaf *)bh->b_data; 836 leaf = (struct gfs2_leaf *)bh->b_data;
837 leaf->lf_depth = cpu_to_be16(depth); 837 leaf->lf_depth = cpu_to_be16(depth);
@@ -916,7 +916,7 @@ static int dir_make_exhash(struct inode *inode)
916 /* We're done with the new leaf block, now setup the new 916 /* We're done with the new leaf block, now setup the new
917 hash table. */ 917 hash table. */
918 918
919 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 919 gfs2_trans_add_meta(dip->i_gl, dibh);
920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
921 921
922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
976 return 1; /* can't split */ 976 return 1; /* can't split */
977 } 977 }
978 978
979 gfs2_trans_add_bh(dip->i_gl, obh, 1); 979 gfs2_trans_add_meta(dip->i_gl, obh);
980 980
981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
982 if (!nleaf) { 982 if (!nleaf) {
@@ -1069,7 +1069,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1069 1069
1070 error = gfs2_meta_inode_buffer(dip, &dibh); 1070 error = gfs2_meta_inode_buffer(dip, &dibh);
1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1072 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1072 gfs2_trans_add_meta(dip->i_gl, dibh);
1073 gfs2_add_inode_blocks(&dip->i_inode, 1); 1073 gfs2_add_inode_blocks(&dip->i_inode, 1);
1074 gfs2_dinode_out(dip, dibh->b_data); 1074 gfs2_dinode_out(dip, dibh->b_data);
1075 brelse(dibh); 1075 brelse(dibh);
@@ -1622,7 +1622,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1622 return error; 1622 return error;
1623 } while(1); 1623 } while(1);
1624 1624
1625 gfs2_trans_add_bh(ip->i_gl, obh, 1); 1625 gfs2_trans_add_meta(ip->i_gl, obh);
1626 1626
1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1628 if (!leaf) { 1628 if (!leaf) {
@@ -1636,7 +1636,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1636 error = gfs2_meta_inode_buffer(ip, &bh); 1636 error = gfs2_meta_inode_buffer(ip, &bh);
1637 if (error) 1637 if (error)
1638 return error; 1638 return error;
1639 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1639 gfs2_trans_add_meta(ip->i_gl, bh);
1640 gfs2_add_inode_blocks(&ip->i_inode, 1); 1640 gfs2_add_inode_blocks(&ip->i_inode, 1);
1641 gfs2_dinode_out(ip, bh->b_data); 1641 gfs2_dinode_out(ip, bh->b_data);
1642 brelse(bh); 1642 brelse(bh);
@@ -1795,7 +1795,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1795 if (IS_ERR(dent)) 1795 if (IS_ERR(dent))
1796 return PTR_ERR(dent); 1796 return PTR_ERR(dent);
1797 1797
1798 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1798 gfs2_trans_add_meta(dip->i_gl, bh);
1799 gfs2_inum_out(nip, dent); 1799 gfs2_inum_out(nip, dent);
1800 dent->de_type = cpu_to_be16(new_type); 1800 dent->de_type = cpu_to_be16(new_type);
1801 1801
@@ -1804,7 +1804,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1804 error = gfs2_meta_inode_buffer(dip, &bh); 1804 error = gfs2_meta_inode_buffer(dip, &bh);
1805 if (error) 1805 if (error)
1806 return error; 1806 return error;
1807 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1807 gfs2_trans_add_meta(dip->i_gl, bh);
1808 } 1808 }
1809 1809
1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
@@ -1849,7 +1849,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1849 if (!ht) 1849 if (!ht)
1850 return -ENOMEM; 1850 return -ENOMEM;
1851 1851
1852 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1852 error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1853 if (error) 1853 if (error)
1854 goto out; 1854 goto out;
1855 1855
@@ -1917,7 +1917,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1917 if (error) 1917 if (error)
1918 goto out_end_trans; 1918 goto out_end_trans;
1919 1919
1920 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1920 gfs2_trans_add_meta(dip->i_gl, dibh);
1921 /* On the last dealloc, make this a regular file in case we crash. 1921 /* On the last dealloc, make this a regular file in case we crash.
1922 (We don't want to free these blocks a second time.) */ 1922 (We don't want to free these blocks a second time.) */
1923 if (last_dealloc) 1923 if (last_dealloc)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 991ab2d484dd..2687f50d98cb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -276,7 +276,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
276 error = gfs2_meta_inode_buffer(ip, &bh); 276 error = gfs2_meta_inode_buffer(ip, &bh);
277 if (error) 277 if (error)
278 goto out_trans_end; 278 goto out_trans_end;
279 gfs2_trans_add_bh(ip->i_gl, bh, 1); 279 gfs2_trans_add_meta(ip->i_gl, bh);
280 ip->i_diskflags = new_flags; 280 ip->i_diskflags = new_flags;
281 gfs2_dinode_out(ip, bh->b_data); 281 gfs2_dinode_out(ip, bh->b_data);
282 brelse(bh); 282 brelse(bh);
@@ -483,7 +483,7 @@ out:
483 gfs2_holder_uninit(&gh); 483 gfs2_holder_uninit(&gh);
484 if (ret == 0) { 484 if (ret == 0) {
485 set_page_dirty(page); 485 set_page_dirty(page);
486 wait_on_page_writeback(page); 486 wait_for_stable_page(page);
487 } 487 }
488 sb_end_pagefault(inode->i_sb); 488 sb_end_pagefault(inode->i_sb);
489 return block_page_mkwrite_return(ret); 489 return block_page_mkwrite_return(ret);
@@ -709,7 +709,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
709 if (unlikely(error)) 709 if (unlikely(error))
710 return error; 710 return error;
711 711
712 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 712 gfs2_trans_add_meta(ip->i_gl, dibh);
713 713
714 if (gfs2_is_stuffed(ip)) { 714 if (gfs2_is_stuffed(ip)) {
715 error = gfs2_unstuff_dinode(ip, NULL); 715 error = gfs2_unstuff_dinode(ip, NULL);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 992c5c0cb504..cf3515546739 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -30,6 +30,7 @@
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h> 32#include <linux/percpu.h>
33#include <linux/list_sort.h>
33 34
34#include "gfs2.h" 35#include "gfs2.h"
35#include "incore.h" 36#include "incore.h"
@@ -1376,56 +1377,105 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1376 gfs2_glock_put(gl); 1377 gfs2_glock_put(gl);
1377} 1378}
1378 1379
1380static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1381{
1382 struct gfs2_glock *gla, *glb;
1379 1383
1380static int gfs2_shrink_glock_memory(struct shrinker *shrink, 1384 gla = list_entry(a, struct gfs2_glock, gl_lru);
1381 struct shrink_control *sc) 1385 glb = list_entry(b, struct gfs2_glock, gl_lru);
1386
1387 if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1388 return 1;
1389 if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1390 return -1;
1391
1392 return 0;
1393}
1394
1395/**
1396 * gfs2_dispose_glock_lru - Demote a list of glocks
1397 * @list: The list to dispose of
1398 *
1399 * Disposing of glocks may involve disk accesses, so that here we sort
1400 * the glocks by number (i.e. disk location of the inodes) so that if
1401 * there are any such accesses, they'll be sent in order (mostly).
1402 *
1403 * Must be called under the lru_lock, but may drop and retake this
1404 * lock. While the lru_lock is dropped, entries may vanish from the
1405 * list, but no new entries will appear on the list (since it is
1406 * private)
1407 */
1408
1409static void gfs2_dispose_glock_lru(struct list_head *list)
1410__releases(&lru_lock)
1411__acquires(&lru_lock)
1382{ 1412{
1383 struct gfs2_glock *gl; 1413 struct gfs2_glock *gl;
1384 int may_demote;
1385 int nr_skipped = 0;
1386 int nr = sc->nr_to_scan;
1387 gfp_t gfp_mask = sc->gfp_mask;
1388 LIST_HEAD(skipped);
1389 1414
1390 if (nr == 0) 1415 list_sort(NULL, list, glock_cmp);
1391 goto out;
1392 1416
1393 if (!(gfp_mask & __GFP_FS)) 1417 while(!list_empty(list)) {
1394 return -1; 1418 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1419 list_del_init(&gl->gl_lru);
1420 clear_bit(GLF_LRU, &gl->gl_flags);
1421 gfs2_glock_hold(gl);
1422 spin_unlock(&lru_lock);
1423 spin_lock(&gl->gl_spin);
1424 if (demote_ok(gl))
1425 handle_callback(gl, LM_ST_UNLOCKED, 0);
1426 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1427 smp_mb__after_clear_bit();
1428 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1429 gfs2_glock_put_nolock(gl);
1430 spin_unlock(&gl->gl_spin);
1431 spin_lock(&lru_lock);
1432 }
1433}
1434
1435/**
1436 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1437 * @nr: The number of entries to scan
1438 *
1439 * This function selects the entries on the LRU which are able to
1440 * be demoted, and then kicks off the process by calling
1441 * gfs2_dispose_glock_lru() above.
1442 */
1443
1444static void gfs2_scan_glock_lru(int nr)
1445{
1446 struct gfs2_glock *gl;
1447 LIST_HEAD(skipped);
1448 LIST_HEAD(dispose);
1395 1449
1396 spin_lock(&lru_lock); 1450 spin_lock(&lru_lock);
1397 while(nr && !list_empty(&lru_list)) { 1451 while(nr && !list_empty(&lru_list)) {
1398 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1452 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1399 list_del_init(&gl->gl_lru);
1400 clear_bit(GLF_LRU, &gl->gl_flags);
1401 atomic_dec(&lru_count);
1402 1453
1403 /* Test for being demotable */ 1454 /* Test for being demotable */
1404 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1455 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1405 gfs2_glock_hold(gl); 1456 list_move(&gl->gl_lru, &dispose);
1406 spin_unlock(&lru_lock); 1457 atomic_dec(&lru_count);
1407 spin_lock(&gl->gl_spin); 1458 nr--;
1408 may_demote = demote_ok(gl);
1409 if (may_demote) {
1410 handle_callback(gl, LM_ST_UNLOCKED, 0);
1411 nr--;
1412 }
1413 clear_bit(GLF_LOCK, &gl->gl_flags);
1414 smp_mb__after_clear_bit();
1415 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1416 gfs2_glock_put_nolock(gl);
1417 spin_unlock(&gl->gl_spin);
1418 spin_lock(&lru_lock);
1419 continue; 1459 continue;
1420 } 1460 }
1421 nr_skipped++; 1461
1422 list_add(&gl->gl_lru, &skipped); 1462 list_move(&gl->gl_lru, &skipped);
1423 set_bit(GLF_LRU, &gl->gl_flags);
1424 } 1463 }
1425 list_splice(&skipped, &lru_list); 1464 list_splice(&skipped, &lru_list);
1426 atomic_add(nr_skipped, &lru_count); 1465 if (!list_empty(&dispose))
1466 gfs2_dispose_glock_lru(&dispose);
1427 spin_unlock(&lru_lock); 1467 spin_unlock(&lru_lock);
1428out: 1468}
1469
1470static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1471 struct shrink_control *sc)
1472{
1473 if (sc->nr_to_scan) {
1474 if (!(sc->gfp_mask & __GFP_FS))
1475 return -1;
1476 gfs2_scan_glock_lru(sc->nr_to_scan);
1477 }
1478
1429 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; 1479 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
1430} 1480}
1431 1481
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78d4184ffc7d..444b6503ebc4 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -322,8 +322,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
322 break; 322 break;
323 }; 323 };
324 324
325 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 325 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
326 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 326 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
327 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 327 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
328 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 328 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
329 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 329 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c373a24fedd9..156e42ec84ea 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -52,7 +52,6 @@ struct gfs2_log_header_host {
52 */ 52 */
53 53
54struct gfs2_log_operations { 54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
56 void (*lo_before_commit) (struct gfs2_sbd *sdp); 55 void (*lo_before_commit) (struct gfs2_sbd *sdp);
57 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); 56 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
58 void (*lo_before_scan) (struct gfs2_jdesc *jd, 57 void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -341,6 +340,7 @@ enum {
341 GIF_QD_LOCKED = 1, 340 GIF_QD_LOCKED = 1,
342 GIF_ALLOC_FAILED = 2, 341 GIF_ALLOC_FAILED = 2,
343 GIF_SW_PAGED = 3, 342 GIF_SW_PAGED = 3,
343 GIF_ORDERED = 4,
344}; 344};
345 345
346struct gfs2_inode { 346struct gfs2_inode {
@@ -357,6 +357,7 @@ struct gfs2_inode {
357 struct gfs2_rgrpd *i_rgd; 357 struct gfs2_rgrpd *i_rgd;
358 u64 i_goal; /* goal block for allocations */ 358 u64 i_goal; /* goal block for allocations */
359 struct rw_semaphore i_rw_mutex; 359 struct rw_semaphore i_rw_mutex;
360 struct list_head i_ordered;
360 struct list_head i_trunc_list; 361 struct list_head i_trunc_list;
361 __be64 *i_hash_cache; 362 __be64 *i_hash_cache;
362 u32 i_entries; 363 u32 i_entries;
@@ -391,7 +392,6 @@ struct gfs2_revoke_replay {
391}; 392};
392 393
393enum { 394enum {
394 QDF_USER = 0,
395 QDF_CHANGE = 1, 395 QDF_CHANGE = 1,
396 QDF_LOCKED = 2, 396 QDF_LOCKED = 2,
397 QDF_REFRESH = 3, 397 QDF_REFRESH = 3,
@@ -403,7 +403,7 @@ struct gfs2_quota_data {
403 403
404 atomic_t qd_count; 404 atomic_t qd_count;
405 405
406 u32 qd_id; 406 struct kqid qd_id;
407 unsigned long qd_flags; /* QDF_... */ 407 unsigned long qd_flags; /* QDF_... */
408 408
409 s64 qd_change; 409 s64 qd_change;
@@ -641,6 +641,7 @@ struct gfs2_sbd {
641 wait_queue_head_t sd_glock_wait; 641 wait_queue_head_t sd_glock_wait;
642 atomic_t sd_glock_disposal; 642 atomic_t sd_glock_disposal;
643 struct completion sd_locking_init; 643 struct completion sd_locking_init;
644 struct completion sd_wdack;
644 struct delayed_work sd_control_work; 645 struct delayed_work sd_control_work;
645 646
646 /* Inode Stuff */ 647 /* Inode Stuff */
@@ -723,6 +724,7 @@ struct gfs2_sbd {
723 struct list_head sd_log_le_revoke; 724 struct list_head sd_log_le_revoke;
724 struct list_head sd_log_le_databuf; 725 struct list_head sd_log_le_databuf;
725 struct list_head sd_log_le_ordered; 726 struct list_head sd_log_le_ordered;
727 spinlock_t sd_ordered_lock;
726 728
727 atomic_t sd_log_thresh1; 729 atomic_t sd_log_thresh1;
728 atomic_t sd_log_thresh2; 730 atomic_t sd_log_thresh2;
@@ -758,10 +760,7 @@ struct gfs2_sbd {
758 unsigned int sd_replayed_blocks; 760 unsigned int sd_replayed_blocks;
759 761
760 /* For quiescing the filesystem */ 762 /* For quiescing the filesystem */
761
762 struct gfs2_holder sd_freeze_gh; 763 struct gfs2_holder sd_freeze_gh;
763 struct mutex sd_freeze_lock;
764 unsigned int sd_freeze_count;
765 764
766 char sd_fsname[GFS2_FSNAME_LEN]; 765 char sd_fsname[GFS2_FSNAME_LEN];
767 char sd_table_name[GFS2_FSNAME_LEN]; 766 char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b6f5698ef18..cc00bd1d1f87 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -368,10 +368,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip,
368 struct inode *inode) 368 struct inode *inode)
369{ 369{
370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 371 (dip->i_inode.i_mode & S_ISUID) &&
372 !uid_eq(dip->i_inode.i_uid, GLOBAL_ROOT_UID)) {
372 if (S_ISDIR(inode->i_mode)) 373 if (S_ISDIR(inode->i_mode))
373 inode->i_mode |= S_ISUID; 374 inode->i_mode |= S_ISUID;
374 else if (dip->i_inode.i_uid != current_fsuid()) 375 else if (!uid_eq(dip->i_inode.i_uid, current_fsuid()))
375 inode->i_mode &= ~07111; 376 inode->i_mode &= ~07111;
376 inode->i_uid = dip->i_inode.i_uid; 377 inode->i_uid = dip->i_inode.i_uid;
377 } else 378 } else
@@ -447,7 +448,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
447 struct timespec tv = CURRENT_TIME; 448 struct timespec tv = CURRENT_TIME;
448 449
449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 450 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
450 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 451 gfs2_trans_add_meta(ip->i_gl, dibh);
451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 452 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 453 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
453 di = (struct gfs2_dinode *)dibh->b_data; 454 di = (struct gfs2_dinode *)dibh->b_data;
@@ -455,8 +456,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
455 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 456 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
456 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 457 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
457 di->di_mode = cpu_to_be32(ip->i_inode.i_mode); 458 di->di_mode = cpu_to_be32(ip->i_inode.i_mode);
458 di->di_uid = cpu_to_be32(ip->i_inode.i_uid); 459 di->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
459 di->di_gid = cpu_to_be32(ip->i_inode.i_gid); 460 di->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
460 di->di_nlink = 0; 461 di->di_nlink = 0;
461 di->di_size = cpu_to_be64(ip->i_inode.i_size); 462 di->di_size = cpu_to_be64(ip->i_inode.i_size);
462 di->di_blocks = cpu_to_be64(1); 463 di->di_blocks = cpu_to_be64(1);
@@ -548,7 +549,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
548 if (error) 549 if (error)
549 return error; 550 return error;
550 551
551 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 552 error = gfs2_quota_lock(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
552 if (error) 553 if (error)
553 goto fail; 554 goto fail;
554 555
@@ -584,7 +585,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
584 if (error) 585 if (error)
585 goto fail_end_trans; 586 goto fail_end_trans;
586 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); 587 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
587 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 588 gfs2_trans_add_meta(ip->i_gl, dibh);
588 gfs2_dinode_out(ip, dibh->b_data); 589 gfs2_dinode_out(ip, dibh->b_data);
589 brelse(dibh); 590 brelse(dibh);
590 return 0; 591 return 0;
@@ -931,7 +932,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
931 if (error) 932 if (error)
932 goto out_brelse; 933 goto out_brelse;
933 934
934 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 935 gfs2_trans_add_meta(ip->i_gl, dibh);
935 inc_nlink(&ip->i_inode); 936 inc_nlink(&ip->i_inode);
936 ip->i_inode.i_ctime = CURRENT_TIME; 937 ip->i_inode.i_ctime = CURRENT_TIME;
937 ihold(inode); 938 ihold(inode);
@@ -978,8 +979,8 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 return -EPERM; 979 return -EPERM;
979 980
980 if ((dip->i_inode.i_mode & S_ISVTX) && 981 if ((dip->i_inode.i_mode & S_ISVTX) &&
981 dip->i_inode.i_uid != current_fsuid() && 982 !uid_eq(dip->i_inode.i_uid, current_fsuid()) &&
982 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) 983 !uid_eq(ip->i_inode.i_uid, current_fsuid()) && !capable(CAP_FOWNER))
983 return -EPERM; 984 return -EPERM;
984 985
985 if (IS_APPEND(&dip->i_inode)) 986 if (IS_APPEND(&dip->i_inode))
@@ -1412,7 +1413,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1412 if (error) 1413 if (error)
1413 goto out_end_trans; 1414 goto out_end_trans;
1414 ip->i_inode.i_ctime = CURRENT_TIME; 1415 ip->i_inode.i_ctime = CURRENT_TIME;
1415 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1416 gfs2_trans_add_meta(ip->i_gl, dibh);
1416 gfs2_dinode_out(ip, dibh->b_data); 1417 gfs2_dinode_out(ip, dibh->b_data);
1417 brelse(dibh); 1418 brelse(dibh);
1418 } 1419 }
@@ -1580,7 +1581,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1580{ 1581{
1581 struct gfs2_inode *ip = GFS2_I(inode); 1582 struct gfs2_inode *ip = GFS2_I(inode);
1582 struct gfs2_sbd *sdp = GFS2_SB(inode); 1583 struct gfs2_sbd *sdp = GFS2_SB(inode);
1583 u32 ouid, ogid, nuid, ngid; 1584 kuid_t ouid, nuid;
1585 kgid_t ogid, ngid;
1584 int error; 1586 int error;
1585 1587
1586 ouid = inode->i_uid; 1588 ouid = inode->i_uid;
@@ -1588,16 +1590,17 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1588 nuid = attr->ia_uid; 1590 nuid = attr->ia_uid;
1589 ngid = attr->ia_gid; 1591 ngid = attr->ia_gid;
1590 1592
1591 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) 1593 if (!(attr->ia_valid & ATTR_UID) || uid_eq(ouid, nuid))
1592 ouid = nuid = NO_QUOTA_CHANGE; 1594 ouid = nuid = NO_UID_QUOTA_CHANGE;
1593 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 1595 if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
1594 ogid = ngid = NO_QUOTA_CHANGE; 1596 ogid = ngid = NO_GID_QUOTA_CHANGE;
1595 1597
1596 error = gfs2_quota_lock(ip, nuid, ngid); 1598 error = gfs2_quota_lock(ip, nuid, ngid);
1597 if (error) 1599 if (error)
1598 return error; 1600 return error;
1599 1601
1600 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1602 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1603 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1601 error = gfs2_quota_check(ip, nuid, ngid); 1604 error = gfs2_quota_check(ip, nuid, ngid);
1602 if (error) 1605 if (error)
1603 goto out_gunlock_q; 1606 goto out_gunlock_q;
@@ -1611,7 +1614,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1611 if (error) 1614 if (error)
1612 goto out_end_trans; 1615 goto out_end_trans;
1613 1616
1614 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1617 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1618 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1615 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1619 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1616 gfs2_quota_change(ip, -blocks, ouid, ogid); 1620 gfs2_quota_change(ip, -blocks, ouid, ogid);
1617 gfs2_quota_change(ip, blocks, nuid, ngid); 1621 gfs2_quota_change(ip, blocks, nuid, ngid);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index b906ed17a839..9802de0f85e6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -281,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
281{ 281{
282 struct gfs2_sbd *sdp = gl->gl_sbd; 282 struct gfs2_sbd *sdp = gl->gl_sbd;
283 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 283 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
284 int lvb_needs_unlock = 0;
284 int error; 285 int error;
285 286
286 if (gl->gl_lksb.sb_lkid == 0) { 287 if (gl->gl_lksb.sb_lkid == 0) {
@@ -294,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
294 gfs2_update_request_times(gl); 295 gfs2_update_request_times(gl);
295 296
296 /* don't want to skip dlm_unlock writing the lvb when lock is ex */ 297 /* don't want to skip dlm_unlock writing the lvb when lock is ex */
298
299 if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
300 lvb_needs_unlock = 1;
301
297 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && 302 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
298 gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { 303 !lvb_needs_unlock) {
299 gfs2_glock_free(gl); 304 gfs2_glock_free(gl);
300 return; 305 return;
301 } 306 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9c81c1..9a2ca8be7647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
482 } 482 }
483} 483}
484 484
485static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) 485static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
486{ 486{
487 struct gfs2_bufdata *bda, *bdb; 487 struct gfs2_inode *ipa, *ipb;
488 488
489 bda = list_entry(a, struct gfs2_bufdata, bd_list); 489 ipa = list_entry(a, struct gfs2_inode, i_ordered);
490 bdb = list_entry(b, struct gfs2_bufdata, bd_list); 490 ipb = list_entry(b, struct gfs2_inode, i_ordered);
491 491
492 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) 492 if (ipa->i_no_addr < ipb->i_no_addr)
493 return -1; 493 return -1;
494 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) 494 if (ipa->i_no_addr > ipb->i_no_addr)
495 return 1; 495 return 1;
496 return 0; 496 return 0;
497} 497}
498 498
499static void gfs2_ordered_write(struct gfs2_sbd *sdp) 499static void gfs2_ordered_write(struct gfs2_sbd *sdp)
500{ 500{
501 struct gfs2_bufdata *bd; 501 struct gfs2_inode *ip;
502 struct buffer_head *bh;
503 LIST_HEAD(written); 502 LIST_HEAD(written);
504 503
505 gfs2_log_lock(sdp); 504 spin_lock(&sdp->sd_ordered_lock);
506 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); 505 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
507 while (!list_empty(&sdp->sd_log_le_ordered)) { 506 while (!list_empty(&sdp->sd_log_le_ordered)) {
508 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); 507 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
509 list_move(&bd->bd_list, &written); 508 list_move(&ip->i_ordered, &written);
510 bh = bd->bd_bh; 509 if (ip->i_inode.i_mapping->nrpages == 0)
511 if (!buffer_dirty(bh))
512 continue; 510 continue;
513 get_bh(bh); 511 spin_unlock(&sdp->sd_ordered_lock);
514 gfs2_log_unlock(sdp); 512 filemap_fdatawrite(ip->i_inode.i_mapping);
515 lock_buffer(bh); 513 spin_lock(&sdp->sd_ordered_lock);
516 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
517 bh->b_end_io = end_buffer_write_sync;
518 submit_bh(WRITE_SYNC, bh);
519 } else {
520 unlock_buffer(bh);
521 brelse(bh);
522 }
523 gfs2_log_lock(sdp);
524 } 514 }
525 list_splice(&written, &sdp->sd_log_le_ordered); 515 list_splice(&written, &sdp->sd_log_le_ordered);
526 gfs2_log_unlock(sdp); 516 spin_unlock(&sdp->sd_ordered_lock);
527} 517}
528 518
529static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 519static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
530{ 520{
531 struct gfs2_bufdata *bd; 521 struct gfs2_inode *ip;
532 struct buffer_head *bh;
533 522
534 gfs2_log_lock(sdp); 523 spin_lock(&sdp->sd_ordered_lock);
535 while (!list_empty(&sdp->sd_log_le_ordered)) { 524 while (!list_empty(&sdp->sd_log_le_ordered)) {
536 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); 525 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
537 bh = bd->bd_bh; 526 list_del(&ip->i_ordered);
538 if (buffer_locked(bh)) { 527 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
539 get_bh(bh); 528 if (ip->i_inode.i_mapping->nrpages == 0)
540 gfs2_log_unlock(sdp);
541 wait_on_buffer(bh);
542 brelse(bh);
543 gfs2_log_lock(sdp);
544 continue; 529 continue;
545 } 530 spin_unlock(&sdp->sd_ordered_lock);
546 list_del_init(&bd->bd_list); 531 filemap_fdatawait(ip->i_inode.i_mapping);
532 spin_lock(&sdp->sd_ordered_lock);
547 } 533 }
548 gfs2_log_unlock(sdp); 534 spin_unlock(&sdp->sd_ordered_lock);
535}
536
537void gfs2_ordered_del_inode(struct gfs2_inode *ip)
538{
539 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
540
541 spin_lock(&sdp->sd_ordered_lock);
542 if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
543 list_del(&ip->i_ordered);
544 spin_unlock(&sdp->sd_ordered_lock);
549} 545}
550 546
551/** 547/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3fd5215ea25f..3566f35915e0 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,6 +48,18 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
48 sdp->sd_log_head = sdp->sd_log_tail = value; 48 sdp->sd_log_head = sdp->sd_log_tail = value;
49} 49}
50 50
51static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
52{
53 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
54
55 if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
56 spin_lock(&sdp->sd_ordered_lock);
57 if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
58 list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
59 spin_unlock(&sdp->sd_ordered_lock);
60 }
61}
62extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
51extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, 63extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
52 unsigned int ssize); 64 unsigned int ssize);
53 65
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9ceccb1595a3..a5055977a214 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -37,7 +37,7 @@
37 * 37 *
38 * The log lock must be held when calling this function 38 * The log lock must be held when calling this function
39 */ 39 */
40static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 40void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
41{ 41{
42 struct gfs2_bufdata *bd; 42 struct gfs2_bufdata *bd;
43 43
@@ -388,32 +388,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
388 return page; 388 return page;
389} 389}
390 390
391static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
392{
393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr;
395
396 tr = current->journal_info;
397 tr->tr_touched = 1;
398 if (!list_empty(&bd->bd_list))
399 return;
400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
403 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
404 printk(KERN_ERR
405 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
406 (unsigned long long)bd->bd_bh->b_blocknr);
407 BUG();
408 }
409 gfs2_pin(sdp, bd->bd_bh);
410 mh->__pad0 = cpu_to_be64(0);
411 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
412 sdp->sd_log_num_buf++;
413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
414 tr->tr_num_buf_new++;
415}
416
417static void gfs2_check_magic(struct buffer_head *bh) 391static void gfs2_check_magic(struct buffer_head *bh)
418{ 392{
419 void *kaddr; 393 void *kaddr;
@@ -600,20 +574,6 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
600 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 574 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
601} 575}
602 576
603static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
604{
605 struct gfs2_glock *gl = bd->bd_gl;
606 struct gfs2_trans *tr;
607
608 tr = current->journal_info;
609 tr->tr_touched = 1;
610 tr->tr_num_revoke++;
611 sdp->sd_log_num_revoke++;
612 atomic_inc(&gl->gl_revokes);
613 set_bit(GLF_LFLUSH, &gl->gl_flags);
614 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
615}
616
617static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 577static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
618{ 578{
619 struct gfs2_meta_header *mh; 579 struct gfs2_meta_header *mh;
@@ -749,44 +709,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
749} 709}
750 710
751/** 711/**
752 * databuf_lo_add - Add a databuf to the transaction.
753 *
754 * This is used in two distinct cases:
755 * i) In ordered write mode
756 * We put the data buffer on a list so that we can ensure that its
757 * synced to disk at the right time
758 * ii) In journaled data mode
759 * We need to journal the data block in the same way as metadata in
760 * the functions above. The difference is that here we have a tag
761 * which is two __be64's being the block number (as per meta data)
762 * and a flag which says whether the data block needs escaping or
763 * not. This means we need a new log entry for each 251 or so data
764 * blocks, which isn't an enormous overhead but twice as much as
765 * for normal metadata blocks.
766 */
767static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
768{
769 struct gfs2_trans *tr = current->journal_info;
770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
771 struct gfs2_inode *ip = GFS2_I(mapping->host);
772
773 if (tr)
774 tr->tr_touched = 1;
775 if (!list_empty(&bd->bd_list))
776 return;
777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
779 if (gfs2_is_jdata(ip)) {
780 gfs2_pin(sdp, bd->bd_bh);
781 tr->tr_num_databuf_new++;
782 sdp->sd_log_num_databuf++;
783 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
784 } else {
785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
786 }
787}
788
789/**
790 * databuf_lo_before_commit - Scan the data buffers, writing as we go 712 * databuf_lo_before_commit - Scan the data buffers, writing as we go
791 * 713 *
792 */ 714 */
@@ -885,7 +807,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
885 807
886 808
887const struct gfs2_log_operations gfs2_buf_lops = { 809const struct gfs2_log_operations gfs2_buf_lops = {
888 .lo_add = buf_lo_add,
889 .lo_before_commit = buf_lo_before_commit, 810 .lo_before_commit = buf_lo_before_commit,
890 .lo_after_commit = buf_lo_after_commit, 811 .lo_after_commit = buf_lo_after_commit,
891 .lo_before_scan = buf_lo_before_scan, 812 .lo_before_scan = buf_lo_before_scan,
@@ -895,7 +816,6 @@ const struct gfs2_log_operations gfs2_buf_lops = {
895}; 816};
896 817
897const struct gfs2_log_operations gfs2_revoke_lops = { 818const struct gfs2_log_operations gfs2_revoke_lops = {
898 .lo_add = revoke_lo_add,
899 .lo_before_commit = revoke_lo_before_commit, 819 .lo_before_commit = revoke_lo_before_commit,
900 .lo_after_commit = revoke_lo_after_commit, 820 .lo_after_commit = revoke_lo_after_commit,
901 .lo_before_scan = revoke_lo_before_scan, 821 .lo_before_scan = revoke_lo_before_scan,
@@ -909,7 +829,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
909}; 829};
910 830
911const struct gfs2_log_operations gfs2_databuf_lops = { 831const struct gfs2_log_operations gfs2_databuf_lops = {
912 .lo_add = databuf_lo_add,
913 .lo_before_commit = databuf_lo_before_commit, 832 .lo_before_commit = databuf_lo_before_commit,
914 .lo_after_commit = databuf_lo_after_commit, 833 .lo_after_commit = databuf_lo_after_commit,
915 .lo_scan_elements = databuf_lo_scan_elements, 834 .lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 954a330585f4..ba77b7da8325 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -29,6 +29,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
29extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); 30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); 31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
32extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
32 33
33static inline unsigned int buf_limit(struct gfs2_sbd *sdp) 34static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
34{ 35{
@@ -46,19 +47,6 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
46 return limit; 47 return limit;
47} 48}
48 49
49static inline void lops_init_le(struct gfs2_bufdata *bd,
50 const struct gfs2_log_operations *lops)
51{
52 INIT_LIST_HEAD(&bd->bd_list);
53 bd->bd_ops = lops;
54}
55
56static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
57{
58 if (bd->bd_ops->lo_add)
59 bd->bd_ops->lo_add(sdp, bd);
60}
61
62static inline void lops_before_commit(struct gfs2_sbd *sdp) 50static inline void lops_before_commit(struct gfs2_sbd *sdp)
63{ 51{
64 int x; 52 int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 22255d96b27e..b059bbb5059e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -271,41 +271,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
271 return 0; 271 return 0;
272} 272}
273 273
274/**
275 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
276 * @gl: the glock the buffer belongs to
277 * @bh: The buffer to be attached to
278 * @meta: Flag to indicate whether its metadata or not
279 */
280
281void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
282 int meta)
283{
284 struct gfs2_bufdata *bd;
285
286 if (meta)
287 lock_page(bh->b_page);
288
289 if (bh->b_private) {
290 if (meta)
291 unlock_page(bh->b_page);
292 return;
293 }
294
295 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
296 bd->bd_bh = bh;
297 bd->bd_gl = gl;
298
299 if (meta)
300 lops_init_le(bd, &gfs2_buf_lops);
301 else
302 lops_init_le(bd, &gfs2_databuf_lops);
303 bh->b_private = bd;
304
305 if (meta)
306 unlock_page(bh->b_page);
307}
308
309void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 274void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
310{ 275{
311 struct address_space *mapping = bh->b_page->mapping; 276 struct address_space *mapping = bh->b_page->mapping;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index c30973b07a7c..0d4c843b6f8e 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,9 +56,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); 56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); 57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
58 58
59void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
60 int meta);
61
62void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, 59void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
63 int meta); 60 int meta);
64 61
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0e3554edb8f2..1b612be4b873 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -81,6 +81,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
81 init_waitqueue_head(&sdp->sd_glock_wait); 81 init_waitqueue_head(&sdp->sd_glock_wait);
82 atomic_set(&sdp->sd_glock_disposal, 0); 82 atomic_set(&sdp->sd_glock_disposal, 0);
83 init_completion(&sdp->sd_locking_init); 83 init_completion(&sdp->sd_locking_init);
84 init_completion(&sdp->sd_wdack);
84 spin_lock_init(&sdp->sd_statfs_spin); 85 spin_lock_init(&sdp->sd_statfs_spin);
85 86
86 spin_lock_init(&sdp->sd_rindex_spin); 87 spin_lock_init(&sdp->sd_rindex_spin);
@@ -102,6 +103,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
102 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 103 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 104 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 105 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
106 spin_lock_init(&sdp->sd_ordered_lock);
105 107
106 init_waitqueue_head(&sdp->sd_log_waitq); 108 init_waitqueue_head(&sdp->sd_log_waitq);
107 init_waitqueue_head(&sdp->sd_logd_waitq); 109 init_waitqueue_head(&sdp->sd_logd_waitq);
@@ -115,8 +117,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
115 117
116 INIT_LIST_HEAD(&sdp->sd_revoke_list); 118 INIT_LIST_HEAD(&sdp->sd_revoke_list);
117 119
118 mutex_init(&sdp->sd_freeze_lock);
119
120 return sdp; 120 return sdp;
121} 121}
122 122
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ae55e248c3b7..c7c840e916f8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -65,13 +65,10 @@
65#include "inode.h" 65#include "inode.h"
66#include "util.h" 66#include "util.h"
67 67
68#define QUOTA_USER 1
69#define QUOTA_GROUP 0
70
71struct gfs2_quota_change_host { 68struct gfs2_quota_change_host {
72 u64 qc_change; 69 u64 qc_change;
73 u32 qc_flags; /* GFS2_QCF_... */ 70 u32 qc_flags; /* GFS2_QCF_... */
74 u32 qc_id; 71 struct kqid qc_id;
75}; 72};
76 73
77static LIST_HEAD(qd_lru_list); 74static LIST_HEAD(qd_lru_list);
@@ -120,17 +117,24 @@ out:
120 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; 117 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
121} 118}
122 119
120static u64 qd2index(struct gfs2_quota_data *qd)
121{
122 struct kqid qid = qd->qd_id;
123 return (2 * (u64)from_kqid(&init_user_ns, qid)) +
124 (qid.type == USRQUOTA) ? 0 : 1;
125}
126
123static u64 qd2offset(struct gfs2_quota_data *qd) 127static u64 qd2offset(struct gfs2_quota_data *qd)
124{ 128{
125 u64 offset; 129 u64 offset;
126 130
127 offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags); 131 offset = qd2index(qd);
128 offset *= sizeof(struct gfs2_quota); 132 offset *= sizeof(struct gfs2_quota);
129 133
130 return offset; 134 return offset;
131} 135}
132 136
133static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, 137static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
134 struct gfs2_quota_data **qdp) 138 struct gfs2_quota_data **qdp)
135{ 139{
136 struct gfs2_quota_data *qd; 140 struct gfs2_quota_data *qd;
@@ -141,13 +145,11 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
141 return -ENOMEM; 145 return -ENOMEM;
142 146
143 atomic_set(&qd->qd_count, 1); 147 atomic_set(&qd->qd_count, 1);
144 qd->qd_id = id; 148 qd->qd_id = qid;
145 if (user)
146 set_bit(QDF_USER, &qd->qd_flags);
147 qd->qd_slot = -1; 149 qd->qd_slot = -1;
148 INIT_LIST_HEAD(&qd->qd_reclaim); 150 INIT_LIST_HEAD(&qd->qd_reclaim);
149 151
150 error = gfs2_glock_get(sdp, 2 * (u64)id + !user, 152 error = gfs2_glock_get(sdp, qd2index(qd),
151 &gfs2_quota_glops, CREATE, &qd->qd_gl); 153 &gfs2_quota_glops, CREATE, &qd->qd_gl);
152 if (error) 154 if (error)
153 goto fail; 155 goto fail;
@@ -161,7 +163,7 @@ fail:
161 return error; 163 return error;
162} 164}
163 165
164static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, 166static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
165 struct gfs2_quota_data **qdp) 167 struct gfs2_quota_data **qdp)
166{ 168{
167 struct gfs2_quota_data *qd = NULL, *new_qd = NULL; 169 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
@@ -173,8 +175,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
173 found = 0; 175 found = 0;
174 spin_lock(&qd_lru_lock); 176 spin_lock(&qd_lru_lock);
175 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 177 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
176 if (qd->qd_id == id && 178 if (qid_eq(qd->qd_id, qid)) {
177 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
178 if (!atomic_read(&qd->qd_count) && 179 if (!atomic_read(&qd->qd_count) &&
179 !list_empty(&qd->qd_reclaim)) { 180 !list_empty(&qd->qd_reclaim)) {
180 /* Remove it from reclaim list */ 181 /* Remove it from reclaim list */
@@ -208,7 +209,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
208 return 0; 209 return 0;
209 } 210 }
210 211
211 error = qd_alloc(sdp, user, id, &new_qd); 212 error = qd_alloc(sdp, qid, &new_qd);
212 if (error) 213 if (error)
213 return error; 214 return error;
214 } 215 }
@@ -458,12 +459,12 @@ static void qd_unlock(struct gfs2_quota_data *qd)
458 qd_put(qd); 459 qd_put(qd);
459} 460}
460 461
461static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, 462static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
462 struct gfs2_quota_data **qdp) 463 struct gfs2_quota_data **qdp)
463{ 464{
464 int error; 465 int error;
465 466
466 error = qd_get(sdp, user, id, qdp); 467 error = qd_get(sdp, qid, qdp);
467 if (error) 468 if (error)
468 return error; 469 return error;
469 470
@@ -491,7 +492,7 @@ static void qdsb_put(struct gfs2_quota_data *qd)
491 qd_put(qd); 492 qd_put(qd);
492} 493}
493 494
494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) 495int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
495{ 496{
496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 497 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
497 struct gfs2_quota_data **qd; 498 struct gfs2_quota_data **qd;
@@ -512,28 +513,30 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
512 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 513 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
513 return 0; 514 return 0;
514 515
515 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); 516 error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
516 if (error) 517 if (error)
517 goto out; 518 goto out;
518 ip->i_res->rs_qa_qd_num++; 519 ip->i_res->rs_qa_qd_num++;
519 qd++; 520 qd++;
520 521
521 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); 522 error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
522 if (error) 523 if (error)
523 goto out; 524 goto out;
524 ip->i_res->rs_qa_qd_num++; 525 ip->i_res->rs_qa_qd_num++;
525 qd++; 526 qd++;
526 527
527 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 528 if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) &&
528 error = qdsb_get(sdp, QUOTA_USER, uid, qd); 529 !uid_eq(uid, ip->i_inode.i_uid)) {
530 error = qdsb_get(sdp, make_kqid_uid(uid), qd);
529 if (error) 531 if (error)
530 goto out; 532 goto out;
531 ip->i_res->rs_qa_qd_num++; 533 ip->i_res->rs_qa_qd_num++;
532 qd++; 534 qd++;
533 } 535 }
534 536
535 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { 537 if (!gid_eq(gid, NO_GID_QUOTA_CHANGE) &&
536 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); 538 !gid_eq(gid, ip->i_inode.i_gid)) {
539 error = qdsb_get(sdp, make_kqid_gid(gid), qd);
537 if (error) 540 if (error)
538 goto out; 541 goto out;
539 ip->i_res->rs_qa_qd_num++; 542 ip->i_res->rs_qa_qd_num++;
@@ -567,18 +570,10 @@ static int sort_qd(const void *a, const void *b)
567 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a; 570 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
568 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b; 571 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;
569 572
570 if (!test_bit(QDF_USER, &qd_a->qd_flags) != 573 if (qid_lt(qd_a->qd_id, qd_b->qd_id))
571 !test_bit(QDF_USER, &qd_b->qd_flags)) {
572 if (test_bit(QDF_USER, &qd_a->qd_flags))
573 return -1;
574 else
575 return 1;
576 }
577 if (qd_a->qd_id < qd_b->qd_id)
578 return -1; 574 return -1;
579 if (qd_a->qd_id > qd_b->qd_id) 575 if (qid_lt(qd_b->qd_id, qd_a->qd_id))
580 return 1; 576 return 1;
581
582 return 0; 577 return 0;
583} 578}
584 579
@@ -590,14 +585,14 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
590 s64 x; 585 s64 x;
591 586
592 mutex_lock(&sdp->sd_quota_mutex); 587 mutex_lock(&sdp->sd_quota_mutex);
593 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); 588 gfs2_trans_add_meta(ip->i_gl, qd->qd_bh);
594 589
595 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { 590 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
596 qc->qc_change = 0; 591 qc->qc_change = 0;
597 qc->qc_flags = 0; 592 qc->qc_flags = 0;
598 if (test_bit(QDF_USER, &qd->qd_flags)) 593 if (qd->qd_id.type == USRQUOTA)
599 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); 594 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
600 qc->qc_id = cpu_to_be32(qd->qd_id); 595 qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id));
601 } 596 }
602 597
603 x = be64_to_cpu(qc->qc_change) + change; 598 x = be64_to_cpu(qc->qc_change) + change;
@@ -726,7 +721,7 @@ get_a_page:
726 goto unlock_out; 721 goto unlock_out;
727 } 722 }
728 723
729 gfs2_trans_add_bh(ip->i_gl, bh, 0); 724 gfs2_trans_add_meta(ip->i_gl, bh);
730 725
731 kaddr = kmap_atomic(page); 726 kaddr = kmap_atomic(page);
732 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 727 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
@@ -925,7 +920,7 @@ fail:
925 return error; 920 return error;
926} 921}
927 922
928int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) 923int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
929{ 924{
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 925 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_quota_data *qd; 926 struct gfs2_quota_data *qd;
@@ -1040,13 +1035,13 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1040 1035
1041 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", 1036 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
1042 sdp->sd_fsname, type, 1037 sdp->sd_fsname, type,
1043 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", 1038 (qd->qd_id.type == USRQUOTA) ? "user" : "group",
1044 qd->qd_id); 1039 from_kqid(&init_user_ns, qd->qd_id));
1045 1040
1046 return 0; 1041 return 0;
1047} 1042}
1048 1043
1049int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) 1044int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
1050{ 1045{
1051 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1046 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1052 struct gfs2_quota_data *qd; 1047 struct gfs2_quota_data *qd;
@@ -1063,8 +1058,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1063 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1058 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1064 qd = ip->i_res->rs_qa_qd[x]; 1059 qd = ip->i_res->rs_qa_qd[x];
1065 1060
1066 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1061 if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
1067 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) 1062 qid_eq(qd->qd_id, make_kqid_gid(gid))))
1068 continue; 1063 continue;
1069 1064
1070 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1065 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
@@ -1074,10 +1069,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1074 1069
1075 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1070 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
1076 print_message(qd, "exceeded"); 1071 print_message(qd, "exceeded");
1077 quota_send_warning(make_kqid(&init_user_ns, 1072 quota_send_warning(qd->qd_id,
1078 test_bit(QDF_USER, &qd->qd_flags) ?
1079 USRQUOTA : GRPQUOTA,
1080 qd->qd_id),
1081 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1073 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
1082 1074
1083 error = -EDQUOT; 1075 error = -EDQUOT;
@@ -1087,10 +1079,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1087 time_after_eq(jiffies, qd->qd_last_warn + 1079 time_after_eq(jiffies, qd->qd_last_warn +
1088 gfs2_tune_get(sdp, 1080 gfs2_tune_get(sdp,
1089 gt_quota_warn_period) * HZ)) { 1081 gt_quota_warn_period) * HZ)) {
1090 quota_send_warning(make_kqid(&init_user_ns, 1082 quota_send_warning(qd->qd_id,
1091 test_bit(QDF_USER, &qd->qd_flags) ?
1092 USRQUOTA : GRPQUOTA,
1093 qd->qd_id),
1094 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1083 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1095 error = print_message(qd, "warning"); 1084 error = print_message(qd, "warning");
1096 qd->qd_last_warn = jiffies; 1085 qd->qd_last_warn = jiffies;
@@ -1101,7 +1090,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1101} 1090}
1102 1091
1103void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 1092void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1104 u32 uid, u32 gid) 1093 kuid_t uid, kgid_t gid)
1105{ 1094{
1106 struct gfs2_quota_data *qd; 1095 struct gfs2_quota_data *qd;
1107 unsigned int x; 1096 unsigned int x;
@@ -1114,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1114 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1103 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1115 qd = ip->i_res->rs_qa_qd[x]; 1104 qd = ip->i_res->rs_qa_qd[x];
1116 1105
1117 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1106 if (qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
1118 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { 1107 qid_eq(qd->qd_id, make_kqid_gid(gid))) {
1119 do_qc(qd, change); 1108 do_qc(qd, change);
1120 } 1109 }
1121 } 1110 }
@@ -1170,13 +1159,13 @@ static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
1170 return gfs2_quota_sync(sb, type); 1159 return gfs2_quota_sync(sb, type);
1171} 1160}
1172 1161
1173int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) 1162int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid)
1174{ 1163{
1175 struct gfs2_quota_data *qd; 1164 struct gfs2_quota_data *qd;
1176 struct gfs2_holder q_gh; 1165 struct gfs2_holder q_gh;
1177 int error; 1166 int error;
1178 1167
1179 error = qd_get(sdp, user, id, &qd); 1168 error = qd_get(sdp, qid, &qd);
1180 if (error) 1169 if (error)
1181 return error; 1170 return error;
1182 1171
@@ -1194,7 +1183,9 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *
1194 1183
1195 qc->qc_change = be64_to_cpu(str->qc_change); 1184 qc->qc_change = be64_to_cpu(str->qc_change);
1196 qc->qc_flags = be32_to_cpu(str->qc_flags); 1185 qc->qc_flags = be32_to_cpu(str->qc_flags);
1197 qc->qc_id = be32_to_cpu(str->qc_id); 1186 qc->qc_id = make_kqid(&init_user_ns,
1187 (qc->qc_flags & GFS2_QCF_USER)?USRQUOTA:GRPQUOTA,
1188 be32_to_cpu(str->qc_id));
1198} 1189}
1199 1190
1200int gfs2_quota_init(struct gfs2_sbd *sdp) 1191int gfs2_quota_init(struct gfs2_sbd *sdp)
@@ -1257,8 +1248,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
1257 if (!qc.qc_change) 1248 if (!qc.qc_change)
1258 continue; 1249 continue;
1259 1250
1260 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER), 1251 error = qd_alloc(sdp, qc.qc_id, &qd);
1261 qc.qc_id, &qd);
1262 if (error) { 1252 if (error) {
1263 brelse(bh); 1253 brelse(bh);
1264 goto fail; 1254 goto fail;
@@ -1485,21 +1475,17 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1485 struct gfs2_quota_data *qd; 1475 struct gfs2_quota_data *qd;
1486 struct gfs2_holder q_gh; 1476 struct gfs2_holder q_gh;
1487 int error; 1477 int error;
1488 int type;
1489 1478
1490 memset(fdq, 0, sizeof(struct fs_disk_quota)); 1479 memset(fdq, 0, sizeof(struct fs_disk_quota));
1491 1480
1492 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1481 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1493 return -ESRCH; /* Crazy XFS error code */ 1482 return -ESRCH; /* Crazy XFS error code */
1494 1483
1495 if (qid.type == USRQUOTA) 1484 if ((qid.type != USRQUOTA) &&
1496 type = QUOTA_USER; 1485 (qid.type != GRPQUOTA))
1497 else if (qid.type == GRPQUOTA)
1498 type = QUOTA_GROUP;
1499 else
1500 return -EINVAL; 1486 return -EINVAL;
1501 1487
1502 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); 1488 error = qd_get(sdp, qid, &qd);
1503 if (error) 1489 if (error)
1504 return error; 1490 return error;
1505 error = do_glock(qd, FORCE, &q_gh); 1491 error = do_glock(qd, FORCE, &q_gh);
@@ -1508,8 +1494,8 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1508 1494
1509 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 1495 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
1510 fdq->d_version = FS_DQUOT_VERSION; 1496 fdq->d_version = FS_DQUOT_VERSION;
1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1497 fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1512 fdq->d_id = from_kqid(&init_user_ns, qid); 1498 fdq->d_id = from_kqid_munged(current_user_ns(), qid);
1513 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; 1499 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1514 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; 1500 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1515 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; 1501 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
@@ -1535,32 +1521,18 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1535 int alloc_required; 1521 int alloc_required;
1536 loff_t offset; 1522 loff_t offset;
1537 int error; 1523 int error;
1538 int type;
1539 1524
1540 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1525 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1541 return -ESRCH; /* Crazy XFS error code */ 1526 return -ESRCH; /* Crazy XFS error code */
1542 1527
1543 switch(qid.type) { 1528 if ((qid.type != USRQUOTA) &&
1544 case USRQUOTA: 1529 (qid.type != GRPQUOTA))
1545 type = QUOTA_USER;
1546 if (fdq->d_flags != FS_USER_QUOTA)
1547 return -EINVAL;
1548 break;
1549 case GRPQUOTA:
1550 type = QUOTA_GROUP;
1551 if (fdq->d_flags != FS_GROUP_QUOTA)
1552 return -EINVAL;
1553 break;
1554 default:
1555 return -EINVAL; 1530 return -EINVAL;
1556 }
1557 1531
1558 if (fdq->d_fieldmask & ~GFS2_FIELDMASK) 1532 if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
1559 return -EINVAL; 1533 return -EINVAL;
1560 if (fdq->d_id != from_kqid(&init_user_ns, qid))
1561 return -EINVAL;
1562 1534
1563 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); 1535 error = qd_get(sdp, qid, &qd);
1564 if (error) 1536 if (error)
1565 return error; 1537 return error;
1566 1538
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index f25d98b87904..4f5e6e44ed83 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -14,20 +14,21 @@ struct gfs2_inode;
14struct gfs2_sbd; 14struct gfs2_sbd;
15struct shrink_control; 15struct shrink_control;
16 16
17#define NO_QUOTA_CHANGE ((u32)-1) 17#define NO_UID_QUOTA_CHANGE INVALID_UID
18#define NO_GID_QUOTA_CHANGE INVALID_GID
18 19
19extern int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid); 20extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
20extern void gfs2_quota_unhold(struct gfs2_inode *ip); 21extern void gfs2_quota_unhold(struct gfs2_inode *ip);
21 22
22extern int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid); 23extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
23extern void gfs2_quota_unlock(struct gfs2_inode *ip); 24extern void gfs2_quota_unlock(struct gfs2_inode *ip);
24 25
25extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); 26extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
26extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 27extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
27 u32 uid, u32 gid); 28 kuid_t uid, kgid_t gid);
28 29
29extern int gfs2_quota_sync(struct super_block *sb, int type); 30extern int gfs2_quota_sync(struct super_block *sb, int type);
30extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); 31extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
31 32
32extern int gfs2_quota_init(struct gfs2_sbd *sdp); 33extern int gfs2_quota_init(struct gfs2_sbd *sdp);
33extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); 34extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
@@ -41,7 +42,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
41 int ret; 42 int ret;
42 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 43 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
43 return 0; 44 return 0;
44 ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 45 ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
45 if (ret) 46 if (ret)
46 return ret; 47 return ret;
47 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 48 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b7eff078fe90..52c2aeaf45ce 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1323,7 +1323,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1323 if (ret == 0) { 1323 if (ret == 0) {
1324 bh = rgd->rd_bits[0].bi_bh; 1324 bh = rgd->rd_bits[0].bi_bh;
1325 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1325 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1326 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1326 gfs2_trans_add_meta(rgd->rd_gl, bh);
1327 gfs2_rgrp_out(rgd, bh->b_data); 1327 gfs2_rgrp_out(rgd, bh->b_data);
1328 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1328 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1329 gfs2_trans_end(sdp); 1329 gfs2_trans_end(sdp);
@@ -1968,14 +1968,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1968 1968
1969 *n = 1; 1969 *n = 1;
1970 block = gfs2_rbm_to_block(rbm); 1970 block = gfs2_rbm_to_block(rbm);
1971 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1971 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
1972 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1972 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1973 block++; 1973 block++;
1974 while (*n < elen) { 1974 while (*n < elen) {
1975 ret = gfs2_rbm_from_block(&pos, block); 1975 ret = gfs2_rbm_from_block(&pos, block);
1976 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1976 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1977 break; 1977 break;
1978 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1978 gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
1979 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1979 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1980 (*n)++; 1980 (*n)++;
1981 block++; 1981 block++;
@@ -2014,7 +2014,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2014 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 2014 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
2015 rbm.bi->bi_len); 2015 rbm.bi->bi_len);
2016 } 2016 }
2017 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 2017 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
2018 gfs2_setbit(&rbm, false, new_state); 2018 gfs2_setbit(&rbm, false, new_state);
2019 } 2019 }
2020 2020
@@ -2157,7 +2157,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2157 if (error == 0) { 2157 if (error == 0) {
2158 struct gfs2_dinode *di = 2158 struct gfs2_dinode *di =
2159 (struct gfs2_dinode *)dibh->b_data; 2159 (struct gfs2_dinode *)dibh->b_data;
2160 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2160 gfs2_trans_add_meta(ip->i_gl, dibh);
2161 di->di_goal_meta = di->di_goal_data = 2161 di->di_goal_meta = di->di_goal_data =
2162 cpu_to_be64(ip->i_goal); 2162 cpu_to_be64(ip->i_goal);
2163 brelse(dibh); 2163 brelse(dibh);
@@ -2176,7 +2176,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2176 *generation = rbm.rgd->rd_igeneration++; 2176 *generation = rbm.rgd->rd_igeneration++;
2177 } 2177 }
2178 2178
2179 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2179 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2180 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2180 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2181 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2181 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2182 2182
@@ -2223,7 +2223,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2223 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2223 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2224 rgd->rd_free += blen; 2224 rgd->rd_free += blen;
2225 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2225 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2226 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2226 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2227 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2227 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2228 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2228 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2229 2229
@@ -2260,7 +2260,7 @@ void gfs2_unlink_di(struct inode *inode)
2260 if (!rgd) 2260 if (!rgd)
2261 return; 2261 return;
2262 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2262 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2263 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2263 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2264 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2264 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2265 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2265 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2266 update_rgrp_lvb_unlinked(rgd, 1); 2266 update_rgrp_lvb_unlinked(rgd, 1);
@@ -2281,7 +2281,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2281 rgd->rd_dinodes--; 2281 rgd->rd_dinodes--;
2282 rgd->rd_free++; 2282 rgd->rd_free++;
2283 2283
2284 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2284 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2285 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2285 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2286 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2286 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2287 update_rgrp_lvb_unlinked(rgd, -1); 2287 update_rgrp_lvb_unlinked(rgd, -1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d6488674d916..cab77b8ba84f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -500,7 +500,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
500 if (error) 500 if (error)
501 return; 501 return;
502 502
503 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 503 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
504 504
505 spin_lock(&sdp->sd_statfs_spin); 505 spin_lock(&sdp->sd_statfs_spin);
506 l_sc->sc_total += total; 506 l_sc->sc_total += total;
@@ -528,7 +528,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
530 530
531 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 531 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
532 532
533 spin_lock(&sdp->sd_statfs_spin); 533 spin_lock(&sdp->sd_statfs_spin);
534 m_sc->sc_total += l_sc->sc_total; 534 m_sc->sc_total += l_sc->sc_total;
@@ -539,7 +539,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
539 0, sizeof(struct gfs2_statfs_change)); 539 0, sizeof(struct gfs2_statfs_change));
540 spin_unlock(&sdp->sd_statfs_spin); 540 spin_unlock(&sdp->sd_statfs_spin);
541 541
542 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 542 gfs2_trans_add_meta(m_ip->i_gl, m_bh);
543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); 543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
544} 544}
545 545
@@ -663,54 +663,6 @@ out:
663 return error; 663 return error;
664} 664}
665 665
666/**
667 * gfs2_freeze_fs - freezes the file system
668 * @sdp: the file system
669 *
670 * This function flushes data and meta data for all machines by
671 * acquiring the transaction log exclusively. All journals are
672 * ensured to be in a clean state as well.
673 *
674 * Returns: errno
675 */
676
677int gfs2_freeze_fs(struct gfs2_sbd *sdp)
678{
679 int error = 0;
680
681 mutex_lock(&sdp->sd_freeze_lock);
682
683 if (!sdp->sd_freeze_count++) {
684 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
685 if (error)
686 sdp->sd_freeze_count--;
687 }
688
689 mutex_unlock(&sdp->sd_freeze_lock);
690
691 return error;
692}
693
694/**
695 * gfs2_unfreeze_fs - unfreezes the file system
696 * @sdp: the file system
697 *
698 * This function allows the file system to proceed by unlocking
699 * the exclusively held transaction lock. Other GFS2 nodes are
700 * now free to acquire the lock shared and go on with their lives.
701 *
702 */
703
704void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
705{
706 mutex_lock(&sdp->sd_freeze_lock);
707
708 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
709 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
710
711 mutex_unlock(&sdp->sd_freeze_lock);
712}
713
714void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 666void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
715{ 667{
716 struct gfs2_dinode *str = buf; 668 struct gfs2_dinode *str = buf;
@@ -721,8 +673,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
721 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 673 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
722 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 674 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
723 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 675 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
724 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 676 str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
725 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 677 str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
726 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 678 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
727 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 679 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
728 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 680 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
@@ -824,7 +776,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
824 776
825 ret = gfs2_meta_inode_buffer(ip, &bh); 777 ret = gfs2_meta_inode_buffer(ip, &bh);
826 if (ret == 0) { 778 if (ret == 0) {
827 gfs2_trans_add_bh(ip->i_gl, bh, 1); 779 gfs2_trans_add_meta(ip->i_gl, bh);
828 gfs2_dinode_out(ip, bh->b_data); 780 gfs2_dinode_out(ip, bh->b_data);
829 brelse(bh); 781 brelse(bh);
830 } 782 }
@@ -888,13 +840,6 @@ static void gfs2_put_super(struct super_block *sb)
888 int error; 840 int error;
889 struct gfs2_jdesc *jd; 841 struct gfs2_jdesc *jd;
890 842
891 /* Unfreeze the filesystem, if we need to */
892
893 mutex_lock(&sdp->sd_freeze_lock);
894 if (sdp->sd_freeze_count)
895 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
896 mutex_unlock(&sdp->sd_freeze_lock);
897
898 /* No more recovery requests */ 843 /* No more recovery requests */
899 set_bit(SDF_NORECOVERY, &sdp->sd_flags); 844 set_bit(SDF_NORECOVERY, &sdp->sd_flags);
900 smp_mb(); 845 smp_mb();
@@ -985,7 +930,7 @@ static int gfs2_freeze(struct super_block *sb)
985 return -EINVAL; 930 return -EINVAL;
986 931
987 for (;;) { 932 for (;;) {
988 error = gfs2_freeze_fs(sdp); 933 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
989 if (!error) 934 if (!error)
990 break; 935 break;
991 936
@@ -1013,7 +958,9 @@ static int gfs2_freeze(struct super_block *sb)
1013 958
1014static int gfs2_unfreeze(struct super_block *sb) 959static int gfs2_unfreeze(struct super_block *sb)
1015{ 960{
1016 gfs2_unfreeze_fs(sb->s_fs_info); 961 struct gfs2_sbd *sdp = sb->s_fs_info;
962
963 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
1017 return 0; 964 return 0;
1018} 965}
1019 966
@@ -1429,7 +1376,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1429 if (error) 1376 if (error)
1430 return error; 1377 return error;
1431 1378
1432 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1379 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1433 if (error) 1380 if (error)
1434 return error; 1381 return error;
1435 1382
@@ -1577,6 +1524,7 @@ out:
1577 /* Case 3 starts here */ 1524 /* Case 3 starts here */
1578 truncate_inode_pages(&inode->i_data, 0); 1525 truncate_inode_pages(&inode->i_data, 0);
1579 gfs2_rs_delete(ip); 1526 gfs2_rs_delete(ip);
1527 gfs2_ordered_del_inode(ip);
1580 clear_inode(inode); 1528 clear_inode(inode);
1581 gfs2_dir_hash_inval(ip); 1529 gfs2_dir_hash_inval(ip);
1582 ip->i_gl->gl_object = NULL; 1530 ip->i_gl->gl_object = NULL;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index a0464680af0b..90e3322ffa10 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh); 46 struct buffer_head *l_bh);
47extern int gfs2_statfs_sync(struct super_block *sb, int type); 47extern int gfs2_statfs_sync(struct super_block *sb, int type);
48 48
49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
51
52extern struct file_system_type gfs2_fs_type; 49extern struct file_system_type gfs2_fs_type;
53extern struct file_system_type gfs2meta_fs_type; 50extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 51extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8056b7b7238e..597a612834dc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -91,19 +91,15 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
91 91
92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
93{ 93{
94 unsigned int count; 94 struct super_block *sb = sdp->sd_vfs;
95 95 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
96 mutex_lock(&sdp->sd_freeze_lock);
97 count = sdp->sd_freeze_count;
98 mutex_unlock(&sdp->sd_freeze_lock);
99 96
100 return snprintf(buf, PAGE_SIZE, "%u\n", count); 97 return snprintf(buf, PAGE_SIZE, "%u\n", frozen);
101} 98}
102 99
103static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 100static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
104{ 101{
105 ssize_t ret = len; 102 int error;
106 int error = 0;
107 int n = simple_strtol(buf, NULL, 0); 103 int n = simple_strtol(buf, NULL, 0);
108 104
109 if (!capable(CAP_SYS_ADMIN)) 105 if (!capable(CAP_SYS_ADMIN))
@@ -111,19 +107,21 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
111 107
112 switch (n) { 108 switch (n) {
113 case 0: 109 case 0:
114 gfs2_unfreeze_fs(sdp); 110 error = thaw_super(sdp->sd_vfs);
115 break; 111 break;
116 case 1: 112 case 1:
117 error = gfs2_freeze_fs(sdp); 113 error = freeze_super(sdp->sd_vfs);
118 break; 114 break;
119 default: 115 default:
120 ret = -EINVAL; 116 return -EINVAL;
121 } 117 }
122 118
123 if (error) 119 if (error) {
124 fs_warn(sdp, "freeze %d error %d", n, error); 120 fs_warn(sdp, "freeze %d error %d", n, error);
121 return error;
122 }
125 123
126 return ret; 124 return len;
127} 125}
128 126
129static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) 127static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
@@ -175,6 +173,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
175static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, 173static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
176 size_t len) 174 size_t len)
177{ 175{
176 struct kqid qid;
178 int error; 177 int error;
179 u32 id; 178 u32 id;
180 179
@@ -183,13 +182,18 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
183 182
184 id = simple_strtoul(buf, NULL, 0); 183 id = simple_strtoul(buf, NULL, 0);
185 184
186 error = gfs2_quota_refresh(sdp, 1, id); 185 qid = make_kqid(current_user_ns(), USRQUOTA, id);
186 if (!qid_valid(qid))
187 return -EINVAL;
188
189 error = gfs2_quota_refresh(sdp, qid);
187 return error ? error : len; 190 return error ? error : len;
188} 191}
189 192
190static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, 193static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
191 size_t len) 194 size_t len)
192{ 195{
196 struct kqid qid;
193 int error; 197 int error;
194 u32 id; 198 u32 id;
195 199
@@ -198,7 +202,11 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
198 202
199 id = simple_strtoul(buf, NULL, 0); 203 id = simple_strtoul(buf, NULL, 0);
200 204
201 error = gfs2_quota_refresh(sdp, 0, id); 205 qid = make_kqid(current_user_ns(), GRPQUOTA, id);
206 if (!qid_valid(qid))
207 return -EINVAL;
208
209 error = gfs2_quota_refresh(sdp, qid);
202 return error ? error : len; 210 return error ? error : len;
203} 211}
204 212
@@ -332,6 +340,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
332 return ret; 340 return ret;
333} 341}
334 342
343static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
344{
345 int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
346
347 return sprintf(buf, "%d\n", val);
348}
349
350static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
351{
352 ssize_t ret = len;
353 int val;
354
355 val = simple_strtol(buf, NULL, 0);
356
357 if ((val == 1) &&
358 !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
359 complete(&sdp->sd_wdack);
360 else
361 ret = -EINVAL;
362 return ret;
363}
364
335static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 365static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
336{ 366{
337 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 367 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -463,7 +493,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
463 493
464GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 494GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
465GDLM_ATTR(block, 0644, block_show, block_store); 495GDLM_ATTR(block, 0644, block_show, block_store);
466GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 496GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
467GDLM_ATTR(jid, 0644, jid_show, jid_store); 497GDLM_ATTR(jid, 0644, jid_show, jid_store);
468GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); 498GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
469GDLM_ATTR(first_done, 0444, first_done_show, NULL); 499GDLM_ATTR(first_done, 0444, first_done_show, NULL);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 413627072f36..88162fae27a5 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -18,6 +18,7 @@
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "glock.h" 20#include "glock.h"
21#include "inode.h"
21#include "log.h" 22#include "log.h"
22#include "lops.h" 23#include "lops.h"
23#include "meta_io.h" 24#include "meta_io.h"
@@ -142,44 +143,143 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
142 sb_end_intwrite(sdp->sd_vfs); 143 sb_end_intwrite(sdp->sd_vfs);
143} 144}
144 145
146static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
147 struct buffer_head *bh,
148 const struct gfs2_log_operations *lops)
149{
150 struct gfs2_bufdata *bd;
151
152 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
153 bd->bd_bh = bh;
154 bd->bd_gl = gl;
155 bd->bd_ops = lops;
156 INIT_LIST_HEAD(&bd->bd_list);
157 bh->b_private = bd;
158 return bd;
159}
160
145/** 161/**
146 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction 162 * gfs2_trans_add_data - Add a databuf to the transaction.
147 * @gl: the glock the buffer belongs to 163 * @gl: The inode glock associated with the buffer
148 * @bh: The buffer to add 164 * @bh: The buffer to add
149 * @meta: True in the case of adding metadata
150 * 165 *
166 * This is used in two distinct cases:
167 * i) In ordered write mode
168 * We put the data buffer on a list so that we can ensure that its
169 * synced to disk at the right time
170 * ii) In journaled data mode
171 * We need to journal the data block in the same way as metadata in
172 * the functions above. The difference is that here we have a tag
173 * which is two __be64's being the block number (as per meta data)
174 * and a flag which says whether the data block needs escaping or
175 * not. This means we need a new log entry for each 251 or so data
176 * blocks, which isn't an enormous overhead but twice as much as
177 * for normal metadata blocks.
151 */ 178 */
179void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
180{
181 struct gfs2_trans *tr = current->journal_info;
182 struct gfs2_sbd *sdp = gl->gl_sbd;
183 struct address_space *mapping = bh->b_page->mapping;
184 struct gfs2_inode *ip = GFS2_I(mapping->host);
185 struct gfs2_bufdata *bd;
152 186
153void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) 187 if (!gfs2_is_jdata(ip)) {
188 gfs2_ordered_add_inode(ip);
189 return;
190 }
191
192 lock_buffer(bh);
193 gfs2_log_lock(sdp);
194 bd = bh->b_private;
195 if (bd == NULL) {
196 gfs2_log_unlock(sdp);
197 unlock_buffer(bh);
198 if (bh->b_private == NULL)
199 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
200 lock_buffer(bh);
201 gfs2_log_lock(sdp);
202 }
203 gfs2_assert(sdp, bd->bd_gl == gl);
204 tr->tr_touched = 1;
205 if (list_empty(&bd->bd_list)) {
206 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
207 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
208 gfs2_pin(sdp, bd->bd_bh);
209 tr->tr_num_databuf_new++;
210 sdp->sd_log_num_databuf++;
211 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
212 }
213 gfs2_log_unlock(sdp);
214 unlock_buffer(bh);
215}
216
217static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
154{ 218{
219 struct gfs2_meta_header *mh;
220 struct gfs2_trans *tr;
221
222 tr = current->journal_info;
223 tr->tr_touched = 1;
224 if (!list_empty(&bd->bd_list))
225 return;
226 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
227 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
228 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
229 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
230 printk(KERN_ERR
231 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
232 (unsigned long long)bd->bd_bh->b_blocknr);
233 BUG();
234 }
235 gfs2_pin(sdp, bd->bd_bh);
236 mh->__pad0 = cpu_to_be64(0);
237 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
238 sdp->sd_log_num_buf++;
239 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
240 tr->tr_num_buf_new++;
241}
242
243void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
244{
245
155 struct gfs2_sbd *sdp = gl->gl_sbd; 246 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 247 struct gfs2_bufdata *bd;
157 248
158 lock_buffer(bh); 249 lock_buffer(bh);
159 gfs2_log_lock(sdp); 250 gfs2_log_lock(sdp);
160 bd = bh->b_private; 251 bd = bh->b_private;
161 if (bd) 252 if (bd == NULL) {
162 gfs2_assert(sdp, bd->bd_gl == gl);
163 else {
164 gfs2_log_unlock(sdp); 253 gfs2_log_unlock(sdp);
165 unlock_buffer(bh); 254 unlock_buffer(bh);
166 gfs2_attach_bufdata(gl, bh, meta); 255 lock_page(bh->b_page);
167 bd = bh->b_private; 256 if (bh->b_private == NULL)
257 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
258 unlock_page(bh->b_page);
168 lock_buffer(bh); 259 lock_buffer(bh);
169 gfs2_log_lock(sdp); 260 gfs2_log_lock(sdp);
170 } 261 }
171 lops_add(sdp, bd); 262 gfs2_assert(sdp, bd->bd_gl == gl);
263 meta_lo_add(sdp, bd);
172 gfs2_log_unlock(sdp); 264 gfs2_log_unlock(sdp);
173 unlock_buffer(bh); 265 unlock_buffer(bh);
174} 266}
175 267
176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 268void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
177{ 269{
270 struct gfs2_glock *gl = bd->bd_gl;
271 struct gfs2_trans *tr = current->journal_info;
272
178 BUG_ON(!list_empty(&bd->bd_list)); 273 BUG_ON(!list_empty(&bd->bd_list));
179 BUG_ON(!list_empty(&bd->bd_ail_st_list)); 274 BUG_ON(!list_empty(&bd->bd_ail_st_list));
180 BUG_ON(!list_empty(&bd->bd_ail_gl_list)); 275 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
181 lops_init_le(bd, &gfs2_revoke_lops); 276 bd->bd_ops = &gfs2_revoke_lops;
182 lops_add(sdp, bd); 277 tr->tr_touched = 1;
278 tr->tr_num_revoke++;
279 sdp->sd_log_num_revoke++;
280 atomic_inc(&gl->gl_revokes);
281 set_bit(GLF_LFLUSH, &gl->gl_flags);
282 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
183} 283}
184 284
185void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) 285void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index bf2ae9aeee7a..1e6e7da25a17 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -39,7 +39,8 @@ extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
39 unsigned int revokes); 39 unsigned int revokes);
40 40
41extern void gfs2_trans_end(struct gfs2_sbd *sdp); 41extern void gfs2_trans_end(struct gfs2_sbd *sdp);
42extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 42extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); 44extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
44extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); 45extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
45 46
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f00d7c5744f6..6402fb69d71b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -54,6 +54,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
54 54
55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
56 56
57 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
58 wait_for_completion(&sdp->sd_wdack);
59
57 if (lm->lm_unmount) { 60 if (lm->lm_unmount) {
58 fs_err(sdp, "telling LM to unmount\n"); 61 fs_err(sdp, "telling LM to unmount\n");
59 lm->lm_unmount(sdp); 62 lm->lm_unmount(sdp);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 76c144b3c9bb..ecd37f30ab91 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -270,7 +270,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
270 if (error) 270 if (error)
271 goto out_gunlock; 271 goto out_gunlock;
272 272
273 gfs2_trans_add_bh(ip->i_gl, bh, 1); 273 gfs2_trans_add_meta(ip->i_gl, bh);
274 274
275 dataptrs = GFS2_EA2DATAPTRS(ea); 275 dataptrs = GFS2_EA2DATAPTRS(ea);
276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { 276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
@@ -309,7 +309,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
309 error = gfs2_meta_inode_buffer(ip, &dibh); 309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) { 310 if (!error) {
311 ip->i_inode.i_ctime = CURRENT_TIME; 311 ip->i_inode.i_ctime = CURRENT_TIME;
312 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 312 gfs2_trans_add_meta(ip->i_gl, dibh);
313 gfs2_dinode_out(ip, dibh->b_data); 313 gfs2_dinode_out(ip, dibh->b_data);
314 brelse(dibh); 314 brelse(dibh);
315 } 315 }
@@ -331,7 +331,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
331 if (error) 331 if (error)
332 return error; 332 return error;
333 333
334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 334 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
335 if (error) 335 if (error)
336 goto out_alloc; 336 goto out_alloc;
337 337
@@ -509,7 +509,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
509 } 509 }
510 510
511 if (din) { 511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1); 512 gfs2_trans_add_meta(ip->i_gl, bh[x]);
513 memcpy(pos, din, cp_size); 513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize; 514 din += sdp->sd_jbsize;
515 } 515 }
@@ -629,7 +629,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
629 return error; 629 return error;
630 gfs2_trans_add_unrevoke(sdp, block, 1); 630 gfs2_trans_add_unrevoke(sdp, block, 1);
631 *bhp = gfs2_meta_new(ip->i_gl, block); 631 *bhp = gfs2_meta_new(ip->i_gl, block);
632 gfs2_trans_add_bh(ip->i_gl, *bhp, 1); 632 gfs2_trans_add_meta(ip->i_gl, *bhp);
633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); 633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); 634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
635 635
@@ -691,7 +691,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
691 return error; 691 return error;
692 gfs2_trans_add_unrevoke(sdp, block, 1); 692 gfs2_trans_add_unrevoke(sdp, block, 1);
693 bh = gfs2_meta_new(ip->i_gl, block); 693 bh = gfs2_meta_new(ip->i_gl, block);
694 gfs2_trans_add_bh(ip->i_gl, bh, 1); 694 gfs2_trans_add_meta(ip->i_gl, bh);
695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); 695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
696 696
697 gfs2_add_inode_blocks(&ip->i_inode, 1); 697 gfs2_add_inode_blocks(&ip->i_inode, 1);
@@ -751,7 +751,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
751 error = gfs2_meta_inode_buffer(ip, &dibh); 751 error = gfs2_meta_inode_buffer(ip, &dibh);
752 if (!error) { 752 if (!error) {
753 ip->i_inode.i_ctime = CURRENT_TIME; 753 ip->i_inode.i_ctime = CURRENT_TIME;
754 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 754 gfs2_trans_add_meta(ip->i_gl, dibh);
755 gfs2_dinode_out(ip, dibh->b_data); 755 gfs2_dinode_out(ip, dibh->b_data);
756 brelse(dibh); 756 brelse(dibh);
757 } 757 }
@@ -834,7 +834,7 @@ static void ea_set_remove_stuffed(struct gfs2_inode *ip,
834 struct gfs2_ea_header *prev = el->el_prev; 834 struct gfs2_ea_header *prev = el->el_prev;
835 u32 len; 835 u32 len;
836 836
837 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 837 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
838 838
839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) { 839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
840 ea->ea_type = GFS2_EATYPE_UNUSED; 840 ea->ea_type = GFS2_EATYPE_UNUSED;
@@ -872,7 +872,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
872 if (error) 872 if (error)
873 return error; 873 return error;
874 874
875 gfs2_trans_add_bh(ip->i_gl, bh, 1); 875 gfs2_trans_add_meta(ip->i_gl, bh);
876 876
877 if (es->ea_split) 877 if (es->ea_split)
878 ea = ea_split_ea(ea); 878 ea = ea_split_ea(ea);
@@ -886,7 +886,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
886 if (error) 886 if (error)
887 goto out; 887 goto out;
888 ip->i_inode.i_ctime = CURRENT_TIME; 888 ip->i_inode.i_ctime = CURRENT_TIME;
889 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 889 gfs2_trans_add_meta(ip->i_gl, dibh);
890 gfs2_dinode_out(ip, dibh->b_data); 890 gfs2_dinode_out(ip, dibh->b_data);
891 brelse(dibh); 891 brelse(dibh);
892out: 892out:
@@ -901,7 +901,7 @@ static int ea_set_simple_alloc(struct gfs2_inode *ip,
901 struct gfs2_ea_header *ea = es->es_ea; 901 struct gfs2_ea_header *ea = es->es_ea;
902 int error; 902 int error;
903 903
904 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); 904 gfs2_trans_add_meta(ip->i_gl, es->es_bh);
905 905
906 if (es->ea_split) 906 if (es->ea_split)
907 ea = ea_split_ea(ea); 907 ea = ea_split_ea(ea);
@@ -997,7 +997,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
997 goto out; 997 goto out;
998 } 998 }
999 999
1000 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1000 gfs2_trans_add_meta(ip->i_gl, indbh);
1001 } else { 1001 } else {
1002 u64 blk; 1002 u64 blk;
1003 unsigned int n = 1; 1003 unsigned int n = 1;
@@ -1006,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1006 return error; 1006 return error;
1007 gfs2_trans_add_unrevoke(sdp, blk, 1); 1007 gfs2_trans_add_unrevoke(sdp, blk, 1);
1008 indbh = gfs2_meta_new(ip->i_gl, blk); 1008 indbh = gfs2_meta_new(ip->i_gl, blk);
1009 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1009 gfs2_trans_add_meta(ip->i_gl, indbh);
1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1011 gfs2_buffer_clear_tail(indbh, mh_size); 1011 gfs2_buffer_clear_tail(indbh, mh_size);
1012 1012
@@ -1092,7 +1092,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1092 if (error) 1092 if (error)
1093 return error; 1093 return error;
1094 1094
1095 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1095 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
1096 1096
1097 if (prev) { 1097 if (prev) {
1098 u32 len; 1098 u32 len;
@@ -1109,7 +1109,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1109 error = gfs2_meta_inode_buffer(ip, &dibh); 1109 error = gfs2_meta_inode_buffer(ip, &dibh);
1110 if (!error) { 1110 if (!error) {
1111 ip->i_inode.i_ctime = CURRENT_TIME; 1111 ip->i_inode.i_ctime = CURRENT_TIME;
1112 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1112 gfs2_trans_add_meta(ip->i_gl, dibh);
1113 gfs2_dinode_out(ip, dibh->b_data); 1113 gfs2_dinode_out(ip, dibh->b_data);
1114 brelse(dibh); 1114 brelse(dibh);
1115 } 1115 }
@@ -1265,7 +1265,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0); 1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1267 if (error == 0) { 1267 if (error == 0) {
1268 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1268 gfs2_trans_add_meta(ip->i_gl, el.el_bh);
1269 memcpy(GFS2_EA2DATA(el.el_ea), data, 1269 memcpy(GFS2_EA2DATA(el.el_ea), data,
1270 GFS2_EA_DATA_LEN(el.el_ea)); 1270 GFS2_EA_DATA_LEN(el.el_ea));
1271 } 1271 }
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352 if (error) 1352 if (error)
1353 goto out_gunlock; 1353 goto out_gunlock;
1354 1354
1355 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1355 gfs2_trans_add_meta(ip->i_gl, indbh);
1356 1356
1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); 1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1358 bstart = 0; 1358 bstart = 0;
@@ -1384,7 +1384,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1384 1384
1385 error = gfs2_meta_inode_buffer(ip, &dibh); 1385 error = gfs2_meta_inode_buffer(ip, &dibh);
1386 if (!error) { 1386 if (!error) {
1387 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1387 gfs2_trans_add_meta(ip->i_gl, dibh);
1388 gfs2_dinode_out(ip, dibh->b_data); 1388 gfs2_dinode_out(ip, dibh->b_data);
1389 brelse(dibh); 1389 brelse(dibh);
1390 } 1390 }
@@ -1434,7 +1434,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1434 1434
1435 error = gfs2_meta_inode_buffer(ip, &dibh); 1435 error = gfs2_meta_inode_buffer(ip, &dibh);
1436 if (!error) { 1436 if (!error) {
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1437 gfs2_trans_add_meta(ip->i_gl, dibh);
1438 gfs2_dinode_out(ip, dibh->b_data); 1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh); 1439 brelse(dibh);
1440 } 1440 }
@@ -1461,7 +1461,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
1461 if (error) 1461 if (error)
1462 return error; 1462 return error;
1463 1463
1464 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1464 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1465 if (error) 1465 if (error)
1466 return error; 1466 return error;
1467 1467
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index b77c5bc20f8a..998e3a6decf3 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -1,6 +1,6 @@
1config HFS_FS 1config HFS_FS
2 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 2 tristate "Apple Macintosh file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 If you say Y here, you will be able to mount Macintosh-formatted 6 If you say Y here, you will be able to mount Macintosh-formatted
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 5dc06c837105..9edeeb0ea97e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -147,7 +147,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) { 147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
148 Some unknown structures like ACL may be in fnode, 148 Some unknown structures like ACL may be in fnode,
149 we'd better not overwrite them 149 we'd better not overwrite them
150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 structures", i->i_ino);
151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { 151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
152 __le32 ea; 152 __le32 ea;
153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { 153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index 6ae169cd8faa..d8bb6c411e96 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -50,8 +50,8 @@ config JFFS2_FS_WBUF_VERIFY
50 write-buffer, and check for errors. 50 write-buffer, and check for errors.
51 51
52config JFFS2_SUMMARY 52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)" 53 bool "JFFS2 summary support"
54 depends on JFFS2_FS && EXPERIMENTAL 54 depends on JFFS2_FS
55 default n 55 default n
56 help 56 help
57 This feature makes it possible to use summary information 57 This feature makes it possible to use summary information
@@ -63,8 +63,8 @@ config JFFS2_SUMMARY
63 If unsure, say 'N'. 63 If unsure, say 'N'.
64 64
65config JFFS2_FS_XATTR 65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)" 66 bool "JFFS2 XATTR support"
67 depends on JFFS2_FS && EXPERIMENTAL 67 depends on JFFS2_FS
68 default n 68 default n
69 help 69 help
70 Extended attributes are name:value pairs associated with inodes by 70 Extended attributes are name:value pairs associated with inodes by
@@ -173,7 +173,7 @@ config JFFS2_CMODE_PRIORITY
173 successful one. 173 successful one.
174 174
175config JFFS2_CMODE_SIZE 175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)" 176 bool "size"
177 help 177 help
178 Tries all compressors and chooses the one which has the smallest 178 Tries all compressors and chooses the one which has the smallest
179 result. 179 result.
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1a543be09c79..060ba638becb 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -154,7 +154,7 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
154 /* 154 /*
155 * If we really return the number of allocated & free inodes, some 155 * If we really return the number of allocated & free inodes, some
156 * applications will fail because they won't see enough free inodes. 156 * applications will fail because they won't see enough free inodes.
157 * We'll try to calculate some guess as to how may inodes we can 157 * We'll try to calculate some guess as to how many inodes we can
158 * really allocate 158 * really allocate
159 * 159 *
160 * buf->f_files = atomic_read(&imap->im_numinos); 160 * buf->f_files = atomic_read(&imap->im_numinos);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 54f9e6ce0430..52e5120bb159 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -550,6 +550,9 @@ again:
550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
551 if (status < 0) 551 if (status < 0)
552 break; 552 break;
553 /* Resend the blocking lock request after a server reboot */
554 if (resp->status == nlm_lck_denied_grace_period)
555 continue;
553 if (resp->status != nlm_lck_blocked) 556 if (resp->status != nlm_lck_blocked)
554 break; 557 break;
555 } 558 }
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
index daf9a9b32dd3..09ed066c0221 100644
--- a/fs/logfs/Kconfig
+++ b/fs/logfs/Kconfig
@@ -1,6 +1,6 @@
1config LOGFS 1config LOGFS
2 tristate "LogFS file system (EXPERIMENTAL)" 2 tristate "LogFS file system"
3 depends on (MTD || BLOCK) && EXPERIMENTAL 3 depends on (MTD || BLOCK)
4 select ZLIB_INFLATE 4 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
6 select CRC32 6 select CRC32
diff --git a/fs/namespace.c b/fs/namespace.c
index 55605c552787..edac42c6eff2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1237,6 +1237,14 @@ static int do_umount(struct mount *mnt, int flags)
1237 return retval; 1237 return retval;
1238} 1238}
1239 1239
1240/*
1241 * Is the caller allowed to modify his namespace?
1242 */
1243static inline bool may_mount(void)
1244{
1245 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1246}
1247
1240/* 1248/*
1241 * Now umount can handle mount points as well as block devices. 1249 * Now umount can handle mount points as well as block devices.
1242 * This is important for filesystems which use unnamed block devices. 1250 * This is important for filesystems which use unnamed block devices.
@@ -1255,6 +1263,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1255 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) 1263 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1256 return -EINVAL; 1264 return -EINVAL;
1257 1265
1266 if (!may_mount())
1267 return -EPERM;
1268
1258 if (!(flags & UMOUNT_NOFOLLOW)) 1269 if (!(flags & UMOUNT_NOFOLLOW))
1259 lookup_flags |= LOOKUP_FOLLOW; 1270 lookup_flags |= LOOKUP_FOLLOW;
1260 1271
@@ -1268,10 +1279,6 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1268 if (!check_mnt(mnt)) 1279 if (!check_mnt(mnt))
1269 goto dput_and_out; 1280 goto dput_and_out;
1270 1281
1271 retval = -EPERM;
1272 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1273 goto dput_and_out;
1274
1275 retval = do_umount(mnt, flags); 1282 retval = do_umount(mnt, flags);
1276dput_and_out: 1283dput_and_out:
1277 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 1284 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
@@ -1295,7 +1302,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1295 1302
1296static int mount_is_safe(struct path *path) 1303static int mount_is_safe(struct path *path)
1297{ 1304{
1298 if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) 1305 if (may_mount())
1299 return 0; 1306 return 0;
1300 return -EPERM; 1307 return -EPERM;
1301#ifdef notyet 1308#ifdef notyet
@@ -1633,7 +1640,7 @@ static int do_change_type(struct path *path, int flag)
1633 int type; 1640 int type;
1634 int err = 0; 1641 int err = 0;
1635 1642
1636 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) 1643 if (!may_mount())
1637 return -EPERM; 1644 return -EPERM;
1638 1645
1639 if (path->dentry != path->mnt->mnt_root) 1646 if (path->dentry != path->mnt->mnt_root)
@@ -1797,7 +1804,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1797 struct mount *p; 1804 struct mount *p;
1798 struct mount *old; 1805 struct mount *old;
1799 int err = 0; 1806 int err = 0;
1800 if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) 1807 if (!may_mount())
1801 return -EPERM; 1808 return -EPERM;
1802 if (!old_name || !*old_name) 1809 if (!old_name || !*old_name)
1803 return -EINVAL; 1810 return -EINVAL;
@@ -1933,16 +1940,14 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
1933 int mnt_flags, const char *name, void *data) 1940 int mnt_flags, const char *name, void *data)
1934{ 1941{
1935 struct file_system_type *type; 1942 struct file_system_type *type;
1936 struct user_namespace *user_ns; 1943 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
1937 struct vfsmount *mnt; 1944 struct vfsmount *mnt;
1938 int err; 1945 int err;
1939 1946
1940 if (!fstype) 1947 if (!fstype)
1941 return -EINVAL; 1948 return -EINVAL;
1942 1949
1943 /* we need capabilities... */ 1950 if (!may_mount())
1944 user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
1945 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1946 return -EPERM; 1951 return -EPERM;
1947 1952
1948 type = get_fs_type(fstype); 1953 type = get_fs_type(fstype);
@@ -2567,7 +2572,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2567 struct mount *new_mnt, *root_mnt; 2572 struct mount *new_mnt, *root_mnt;
2568 int error; 2573 int error;
2569 2574
2570 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) 2575 if (!may_mount())
2571 return -EPERM; 2576 return -EPERM;
2572 2577
2573 error = user_path_dir(new_root, &new); 2578 error = user_path_dir(new_root, &new);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1acdad7fcec7..e2be336d1c22 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -331,12 +331,15 @@ static int ncp_show_options(struct seq_file *seq, struct dentry *root)
331 struct ncp_server *server = NCP_SBP(root->d_sb); 331 struct ncp_server *server = NCP_SBP(root->d_sb);
332 unsigned int tmp; 332 unsigned int tmp;
333 333
334 if (server->m.uid != 0) 334 if (!uid_eq(server->m.uid, GLOBAL_ROOT_UID))
335 seq_printf(seq, ",uid=%u", server->m.uid); 335 seq_printf(seq, ",uid=%u",
336 if (server->m.gid != 0) 336 from_kuid_munged(&init_user_ns, server->m.uid));
337 seq_printf(seq, ",gid=%u", server->m.gid); 337 if (!gid_eq(server->m.gid, GLOBAL_ROOT_GID))
338 if (server->m.mounted_uid != 0) 338 seq_printf(seq, ",gid=%u",
339 seq_printf(seq, ",owner=%u", server->m.mounted_uid); 339 from_kgid_munged(&init_user_ns, server->m.gid));
340 if (!uid_eq(server->m.mounted_uid, GLOBAL_ROOT_UID))
341 seq_printf(seq, ",owner=%u",
342 from_kuid_munged(&init_user_ns, server->m.mounted_uid));
340 tmp = server->m.file_mode & S_IALLUGO; 343 tmp = server->m.file_mode & S_IALLUGO;
341 if (tmp != NCP_DEFAULT_FILE_MODE) 344 if (tmp != NCP_DEFAULT_FILE_MODE)
342 seq_printf(seq, ",mode=0%o", tmp); 345 seq_printf(seq, ",mode=0%o", tmp);
@@ -381,13 +384,13 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
381 384
382 data->flags = 0; 385 data->flags = 0;
383 data->int_flags = 0; 386 data->int_flags = 0;
384 data->mounted_uid = 0; 387 data->mounted_uid = GLOBAL_ROOT_UID;
385 data->wdog_pid = NULL; 388 data->wdog_pid = NULL;
386 data->ncp_fd = ~0; 389 data->ncp_fd = ~0;
387 data->time_out = NCP_DEFAULT_TIME_OUT; 390 data->time_out = NCP_DEFAULT_TIME_OUT;
388 data->retry_count = NCP_DEFAULT_RETRY_COUNT; 391 data->retry_count = NCP_DEFAULT_RETRY_COUNT;
389 data->uid = 0; 392 data->uid = GLOBAL_ROOT_UID;
390 data->gid = 0; 393 data->gid = GLOBAL_ROOT_GID;
391 data->file_mode = NCP_DEFAULT_FILE_MODE; 394 data->file_mode = NCP_DEFAULT_FILE_MODE;
392 data->dir_mode = NCP_DEFAULT_DIR_MODE; 395 data->dir_mode = NCP_DEFAULT_DIR_MODE;
393 data->info_fd = -1; 396 data->info_fd = -1;
@@ -399,13 +402,19 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
399 goto err; 402 goto err;
400 switch (optval) { 403 switch (optval) {
401 case 'u': 404 case 'u':
402 data->uid = optint; 405 data->uid = make_kuid(current_user_ns(), optint);
406 if (!uid_valid(data->uid))
407 goto err;
403 break; 408 break;
404 case 'g': 409 case 'g':
405 data->gid = optint; 410 data->gid = make_kgid(current_user_ns(), optint);
411 if (!gid_valid(data->gid))
412 goto err;
406 break; 413 break;
407 case 'o': 414 case 'o':
408 data->mounted_uid = optint; 415 data->mounted_uid = make_kuid(current_user_ns(), optint);
416 if (!uid_valid(data->mounted_uid))
417 goto err;
409 break; 418 break;
410 case 'm': 419 case 'm':
411 data->file_mode = optint; 420 data->file_mode = optint;
@@ -480,13 +489,13 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
480 489
481 data.flags = md->flags; 490 data.flags = md->flags;
482 data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE; 491 data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE;
483 data.mounted_uid = md->mounted_uid; 492 data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
484 data.wdog_pid = find_get_pid(md->wdog_pid); 493 data.wdog_pid = find_get_pid(md->wdog_pid);
485 data.ncp_fd = md->ncp_fd; 494 data.ncp_fd = md->ncp_fd;
486 data.time_out = md->time_out; 495 data.time_out = md->time_out;
487 data.retry_count = md->retry_count; 496 data.retry_count = md->retry_count;
488 data.uid = md->uid; 497 data.uid = make_kuid(current_user_ns(), md->uid);
489 data.gid = md->gid; 498 data.gid = make_kgid(current_user_ns(), md->gid);
490 data.file_mode = md->file_mode; 499 data.file_mode = md->file_mode;
491 data.dir_mode = md->dir_mode; 500 data.dir_mode = md->dir_mode;
492 data.info_fd = -1; 501 data.info_fd = -1;
@@ -499,13 +508,13 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
499 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; 508 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
500 509
501 data.flags = md->flags; 510 data.flags = md->flags;
502 data.mounted_uid = md->mounted_uid; 511 data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
503 data.wdog_pid = find_get_pid(md->wdog_pid); 512 data.wdog_pid = find_get_pid(md->wdog_pid);
504 data.ncp_fd = md->ncp_fd; 513 data.ncp_fd = md->ncp_fd;
505 data.time_out = md->time_out; 514 data.time_out = md->time_out;
506 data.retry_count = md->retry_count; 515 data.retry_count = md->retry_count;
507 data.uid = md->uid; 516 data.uid = make_kuid(current_user_ns(), md->uid);
508 data.gid = md->gid; 517 data.gid = make_kgid(current_user_ns(), md->gid);
509 data.file_mode = md->file_mode; 518 data.file_mode = md->file_mode;
510 data.dir_mode = md->dir_mode; 519 data.dir_mode = md->dir_mode;
511 data.info_fd = -1; 520 data.info_fd = -1;
@@ -520,6 +529,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
520 goto out; 529 goto out;
521 break; 530 break;
522 } 531 }
532 error = -EINVAL;
533 if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
534 !gid_valid(data.gid))
535 goto out;
523 error = -EBADF; 536 error = -EBADF;
524 ncp_filp = fget(data.ncp_fd); 537 ncp_filp = fget(data.ncp_fd);
525 if (!ncp_filp) 538 if (!ncp_filp)
@@ -886,12 +899,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
886 goto out; 899 goto out;
887 900
888 result = -EPERM; 901 result = -EPERM;
889 if (((attr->ia_valid & ATTR_UID) && 902 if ((attr->ia_valid & ATTR_UID) && !uid_eq(attr->ia_uid, server->m.uid))
890 (attr->ia_uid != server->m.uid)))
891 goto out; 903 goto out;
892 904
893 if (((attr->ia_valid & ATTR_GID) && 905 if ((attr->ia_valid & ATTR_GID) && !gid_eq(attr->ia_gid, server->m.gid))
894 (attr->ia_gid != server->m.gid)))
895 goto out; 906 goto out;
896 907
897 if (((attr->ia_valid & ATTR_MODE) && 908 if (((attr->ia_valid & ATTR_MODE) &&
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 6958adfaff08..d44318d27504 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -45,7 +45,7 @@ ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
45 return -EINVAL; 45 return -EINVAL;
46 } 46 }
47 /* TODO: info.addr = server->m.serv_addr; */ 47 /* TODO: info.addr = server->m.serv_addr; */
48 SET_UID(info.mounted_uid, server->m.mounted_uid); 48 SET_UID(info.mounted_uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
49 info.connection = server->connection; 49 info.connection = server->connection;
50 info.buffer_size = server->buffer_size; 50 info.buffer_size = server->buffer_size;
51 info.volume_number = NCP_FINFO(inode)->volNumber; 51 info.volume_number = NCP_FINFO(inode)->volNumber;
@@ -69,7 +69,7 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
69 DPRINTK("info.version invalid: %d\n", info2.version); 69 DPRINTK("info.version invalid: %d\n", info2.version);
70 return -EINVAL; 70 return -EINVAL;
71 } 71 }
72 info2.mounted_uid = server->m.mounted_uid; 72 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
73 info2.connection = server->connection; 73 info2.connection = server->connection;
74 info2.buffer_size = server->buffer_size; 74 info2.buffer_size = server->buffer_size;
75 info2.volume_number = NCP_FINFO(inode)->volNumber; 75 info2.volume_number = NCP_FINFO(inode)->volNumber;
@@ -135,7 +135,7 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
135 DPRINTK("info.version invalid: %d\n", info2.version); 135 DPRINTK("info.version invalid: %d\n", info2.version);
136 return -EINVAL; 136 return -EINVAL;
137 } 137 }
138 info2.mounted_uid = server->m.mounted_uid; 138 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
139 info2.connection = server->connection; 139 info2.connection = server->connection;
140 info2.buffer_size = server->buffer_size; 140 info2.buffer_size = server->buffer_size;
141 info2.volume_number = NCP_FINFO(inode)->volNumber; 141 info2.volume_number = NCP_FINFO(inode)->volNumber;
@@ -348,22 +348,25 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
348 { 348 {
349 u16 uid; 349 u16 uid;
350 350
351 SET_UID(uid, server->m.mounted_uid); 351 SET_UID(uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
352 if (put_user(uid, (u16 __user *)argp)) 352 if (put_user(uid, (u16 __user *)argp))
353 return -EFAULT; 353 return -EFAULT;
354 return 0; 354 return 0;
355 } 355 }
356 case NCP_IOC_GETMOUNTUID32: 356 case NCP_IOC_GETMOUNTUID32:
357 if (put_user(server->m.mounted_uid, 357 {
358 (u32 __user *)argp)) 358 uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
359 if (put_user(uid, (u32 __user *)argp))
359 return -EFAULT; 360 return -EFAULT;
360 return 0; 361 return 0;
362 }
361 case NCP_IOC_GETMOUNTUID64: 363 case NCP_IOC_GETMOUNTUID64:
362 if (put_user(server->m.mounted_uid, 364 {
363 (u64 __user *)argp)) 365 uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
366 if (put_user(uid, (u64 __user *)argp))
364 return -EFAULT; 367 return -EFAULT;
365 return 0; 368 return 0;
366 369 }
367 case NCP_IOC_GETROOT: 370 case NCP_IOC_GETROOT:
368 { 371 {
369 struct ncp_setroot_ioctl sr; 372 struct ncp_setroot_ioctl sr;
@@ -810,7 +813,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
810{ 813{
811 struct inode *inode = filp->f_dentry->d_inode; 814 struct inode *inode = filp->f_dentry->d_inode;
812 struct ncp_server *server = NCP_SERVER(inode); 815 struct ncp_server *server = NCP_SERVER(inode);
813 uid_t uid = current_uid(); 816 kuid_t uid = current_uid();
814 int need_drop_write = 0; 817 int need_drop_write = 0;
815 long ret; 818 long ret;
816 819
@@ -824,7 +827,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
824 } 827 }
825 break; 828 break;
826 } 829 }
827 if (server->m.mounted_uid != uid) { 830 if (!uid_eq(server->m.mounted_uid, uid)) {
828 switch (cmd) { 831 switch (cmd) {
829 /* 832 /*
830 * Only mount owner can issue these ioctls. Information 833 * Only mount owner can issue these ioctls. Information
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index 54cc0cdb3dcb..c51b2c543539 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -23,15 +23,15 @@ struct ncp_mount_data_kernel {
23 unsigned long flags; /* NCP_MOUNT_* flags */ 23 unsigned long flags; /* NCP_MOUNT_* flags */
24 unsigned int int_flags; /* internal flags */ 24 unsigned int int_flags; /* internal flags */
25#define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 25#define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001
26 uid_t mounted_uid; /* Who may umount() this filesystem? */ 26 kuid_t mounted_uid; /* Who may umount() this filesystem? */
27 struct pid *wdog_pid; /* Who cares for our watchdog packets? */ 27 struct pid *wdog_pid; /* Who cares for our watchdog packets? */
28 unsigned int ncp_fd; /* The socket to the ncp port */ 28 unsigned int ncp_fd; /* The socket to the ncp port */
29 unsigned int time_out; /* How long should I wait after 29 unsigned int time_out; /* How long should I wait after
30 sending a NCP request? */ 30 sending a NCP request? */
31 unsigned int retry_count; /* And how often should I retry? */ 31 unsigned int retry_count; /* And how often should I retry? */
32 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; 32 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
33 uid_t uid; 33 kuid_t uid;
34 gid_t gid; 34 kgid_t gid;
35 umode_t file_mode; 35 umode_t file_mode;
36 umode_t dir_mode; 36 umode_t dir_mode;
37 int info_fd; 37 int info_fd;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f46..434b93ec0970 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1273,6 +1273,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
1273static struct pnfs_layoutdriver_type blocklayout_type = { 1273static struct pnfs_layoutdriver_type blocklayout_type = {
1274 .id = LAYOUT_BLOCK_VOLUME, 1274 .id = LAYOUT_BLOCK_VOLUME,
1275 .name = "LAYOUT_BLOCK_VOLUME", 1275 .name = "LAYOUT_BLOCK_VOLUME",
1276 .owner = THIS_MODULE,
1276 .read_pagelist = bl_read_pagelist, 1277 .read_pagelist = bl_read_pagelist,
1277 .write_pagelist = bl_write_pagelist, 1278 .write_pagelist = bl_write_pagelist,
1278 .alloc_layout_hdr = bl_alloc_layout_hdr, 1279 .alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 264d1aa935f2..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183static u32 initiate_bulk_draining(struct nfs_client *clp, 183static u32 initiate_bulk_draining(struct nfs_client *clp,
184 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
185{ 185{
186 struct nfs_server *server; 186 int stat;
187 struct pnfs_layout_hdr *lo;
188 struct inode *ino;
189 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190 struct pnfs_layout_hdr *tmp;
191 LIST_HEAD(recall_list);
192 LIST_HEAD(free_me_list);
193 struct pnfs_layout_range range = {
194 .iomode = IOMODE_ANY,
195 .offset = 0,
196 .length = NFS4_MAX_UINT64,
197 };
198
199 spin_lock(&clp->cl_lock);
200 rcu_read_lock();
201 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202 if ((args->cbl_recall_type == RETURN_FSID) &&
203 memcmp(&server->fsid, &args->cbl_fsid,
204 sizeof(struct nfs_fsid)))
205 continue;
206 187
207 list_for_each_entry(lo, &server->layouts, plh_layouts) { 188 if (args->cbl_recall_type == RETURN_FSID)
208 ino = igrab(lo->plh_inode); 189 stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
209 if (!ino) 190 else
210 continue; 191 stat = pnfs_destroy_layouts_byclid(clp, true);
211 spin_lock(&ino->i_lock); 192 if (stat != 0)
212 /* Is this layout in the process of being freed? */ 193 return NFS4ERR_DELAY;
213 if (NFS_I(ino)->layout != lo) { 194 return NFS4ERR_NOMATCHING_LAYOUT;
214 spin_unlock(&ino->i_lock);
215 iput(ino);
216 continue;
217 }
218 pnfs_get_layout_hdr(lo);
219 spin_unlock(&ino->i_lock);
220 list_add(&lo->plh_bulk_recall, &recall_list);
221 }
222 }
223 rcu_read_unlock();
224 spin_unlock(&clp->cl_lock);
225
226 list_for_each_entry_safe(lo, tmp,
227 &recall_list, plh_bulk_recall) {
228 ino = lo->plh_inode;
229 spin_lock(&ino->i_lock);
230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
232 rv = NFS4ERR_DELAY;
233 list_del_init(&lo->plh_bulk_recall);
234 spin_unlock(&ino->i_lock);
235 pnfs_free_lseg_list(&free_me_list);
236 pnfs_put_layout_hdr(lo);
237 iput(ino);
238 }
239 return rv;
240} 195}
241 196
242static u32 do_callback_layoutrecall(struct nfs_client *clp, 197static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf38..6390a4b5fee7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -55,7 +55,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
55 flags &= FMODE_READ|FMODE_WRITE; 55 flags &= FMODE_READ|FMODE_WRITE;
56 rcu_read_lock(); 56 rcu_read_lock();
57 delegation = rcu_dereference(NFS_I(inode)->delegation); 57 delegation = rcu_dereference(NFS_I(inode)->delegation);
58 if (delegation != NULL && (delegation->type & flags) == flags) { 58 if (delegation != NULL && (delegation->type & flags) == flags &&
59 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
59 nfs_mark_delegation_referenced(delegation); 60 nfs_mark_delegation_referenced(delegation);
60 ret = 1; 61 ret = 1;
61 } 62 }
@@ -70,8 +71,10 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
70 int status = 0; 71 int status = 0;
71 72
72 if (inode->i_flock == NULL) 73 if (inode->i_flock == NULL)
73 goto out; 74 return 0;
74 75
76 if (inode->i_flock == NULL)
77 goto out;
75 /* Protect inode->i_flock using the file locks lock */ 78 /* Protect inode->i_flock using the file locks lock */
76 lock_flocks(); 79 lock_flocks();
77 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 80 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -94,7 +97,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
94{ 97{
95 struct nfs_inode *nfsi = NFS_I(inode); 98 struct nfs_inode *nfsi = NFS_I(inode);
96 struct nfs_open_context *ctx; 99 struct nfs_open_context *ctx;
100 struct nfs4_state_owner *sp;
97 struct nfs4_state *state; 101 struct nfs4_state *state;
102 unsigned int seq;
98 int err; 103 int err;
99 104
100again: 105again:
@@ -109,9 +114,16 @@ again:
109 continue; 114 continue;
110 get_nfs_open_context(ctx); 115 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 116 spin_unlock(&inode->i_lock);
117 sp = state->owner;
118 /* Block nfs4_proc_unlck */
119 mutex_lock(&sp->so_delegreturn_mutex);
120 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
112 err = nfs4_open_delegation_recall(ctx, state, stateid); 121 err = nfs4_open_delegation_recall(ctx, state, stateid);
113 if (err >= 0) 122 if (!err)
114 err = nfs_delegation_claim_locks(ctx, state); 123 err = nfs_delegation_claim_locks(ctx, state);
124 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
125 err = -EAGAIN;
126 mutex_unlock(&sp->so_delegreturn_mutex);
115 put_nfs_open_context(ctx); 127 put_nfs_open_context(ctx);
116 if (err != 0) 128 if (err != 0)
117 return err; 129 return err;
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
182} 194}
183 195
184static struct nfs_delegation * 196static struct nfs_delegation *
197nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
198{
199 struct nfs_delegation *ret = NULL;
200 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
201
202 if (delegation == NULL)
203 goto out;
204 spin_lock(&delegation->lock);
205 if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
206 ret = delegation;
207 spin_unlock(&delegation->lock);
208out:
209 return ret;
210}
211
212static struct nfs_delegation *
213nfs_start_delegation_return(struct nfs_inode *nfsi)
214{
215 struct nfs_delegation *delegation;
216
217 rcu_read_lock();
218 delegation = nfs_start_delegation_return_locked(nfsi);
219 rcu_read_unlock();
220 return delegation;
221}
222
223static void
224nfs_abort_delegation_return(struct nfs_delegation *delegation,
225 struct nfs_client *clp)
226{
227
228 spin_lock(&delegation->lock);
229 clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
230 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
231 spin_unlock(&delegation->lock);
232 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
233}
234
235static struct nfs_delegation *
185nfs_detach_delegation_locked(struct nfs_inode *nfsi, 236nfs_detach_delegation_locked(struct nfs_inode *nfsi,
186 struct nfs_server *server) 237 struct nfs_delegation *delegation,
238 struct nfs_client *clp)
187{ 239{
188 struct nfs_delegation *delegation = 240 struct nfs_delegation *deleg_cur =
189 rcu_dereference_protected(nfsi->delegation, 241 rcu_dereference_protected(nfsi->delegation,
190 lockdep_is_held(&server->nfs_client->cl_lock)); 242 lockdep_is_held(&clp->cl_lock));
191 243
192 if (delegation == NULL) 244 if (deleg_cur == NULL || delegation != deleg_cur)
193 goto nomatch; 245 return NULL;
194 246
195 spin_lock(&delegation->lock); 247 spin_lock(&delegation->lock);
248 set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
196 list_del_rcu(&delegation->super_list); 249 list_del_rcu(&delegation->super_list);
197 delegation->inode = NULL; 250 delegation->inode = NULL;
198 nfsi->delegation_state = 0; 251 nfsi->delegation_state = 0;
199 rcu_assign_pointer(nfsi->delegation, NULL); 252 rcu_assign_pointer(nfsi->delegation, NULL);
200 spin_unlock(&delegation->lock); 253 spin_unlock(&delegation->lock);
201 return delegation; 254 return delegation;
202nomatch:
203 return NULL;
204} 255}
205 256
206static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, 257static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
207 struct nfs_server *server) 258 struct nfs_delegation *delegation,
259 struct nfs_server *server)
208{ 260{
209 struct nfs_client *clp = server->nfs_client; 261 struct nfs_client *clp = server->nfs_client;
210 struct nfs_delegation *delegation;
211 262
212 spin_lock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
213 delegation = nfs_detach_delegation_locked(nfsi, server); 264 delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
214 spin_unlock(&clp->cl_lock); 265 spin_unlock(&clp->cl_lock);
215 return delegation; 266 return delegation;
216} 267}
217 268
269static struct nfs_delegation *
270nfs_inode_detach_delegation(struct inode *inode)
271{
272 struct nfs_inode *nfsi = NFS_I(inode);
273 struct nfs_server *server = NFS_SERVER(inode);
274 struct nfs_delegation *delegation;
275
276 delegation = nfs_start_delegation_return(nfsi);
277 if (delegation == NULL)
278 return NULL;
279 return nfs_detach_delegation(nfsi, delegation, server);
280}
281
218/** 282/**
219 * nfs_inode_set_delegation - set up a delegation on an inode 283 * nfs_inode_set_delegation - set up a delegation on an inode
220 * @inode: inode to which delegation applies 284 * @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
268 delegation = NULL; 332 delegation = NULL;
269 goto out; 333 goto out;
270 } 334 }
271 freeme = nfs_detach_delegation_locked(nfsi, server); 335 freeme = nfs_detach_delegation_locked(nfsi,
336 old_delegation, clp);
337 if (freeme == NULL)
338 goto out;
272 } 339 }
273 list_add_rcu(&delegation->super_list, &server->delegations); 340 list_add_rcu(&delegation->super_list, &server->delegations);
274 nfsi->delegation_state = delegation->type; 341 nfsi->delegation_state = delegation->type;
@@ -292,19 +359,29 @@ out:
292/* 359/*
293 * Basic procedure for returning a delegation to the server 360 * Basic procedure for returning a delegation to the server
294 */ 361 */
295static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 362static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
296{ 363{
364 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
297 struct nfs_inode *nfsi = NFS_I(inode); 365 struct nfs_inode *nfsi = NFS_I(inode);
298 int err; 366 int err;
299 367
300 /* 368 if (delegation == NULL)
301 * Guard against new delegated open/lock/unlock calls and against 369 return 0;
302 * state recovery 370 do {
303 */ 371 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
304 down_write(&nfsi->rwsem); 372 if (!issync || err != -EAGAIN)
305 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 373 break;
306 up_write(&nfsi->rwsem); 374 /*
307 if (err) 375 * Guard against state recovery
376 */
377 err = nfs4_wait_clnt_recover(clp);
378 } while (err == 0);
379
380 if (err) {
381 nfs_abort_delegation_return(delegation, clp);
382 goto out;
383 }
384 if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
308 goto out; 385 goto out;
309 386
310 err = nfs_do_return_delegation(inode, delegation, issync); 387 err = nfs_do_return_delegation(inode, delegation, issync);
@@ -340,13 +417,10 @@ restart:
340 inode = nfs_delegation_grab_inode(delegation); 417 inode = nfs_delegation_grab_inode(delegation);
341 if (inode == NULL) 418 if (inode == NULL)
342 continue; 419 continue;
343 delegation = nfs_detach_delegation(NFS_I(inode), 420 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
344 server);
345 rcu_read_unlock(); 421 rcu_read_unlock();
346 422
347 if (delegation != NULL) 423 err = nfs_end_delegation_return(inode, delegation, 0);
348 err = __nfs_inode_return_delegation(inode,
349 delegation, 0);
350 iput(inode); 424 iput(inode);
351 if (!err) 425 if (!err)
352 goto restart; 426 goto restart;
@@ -367,15 +441,11 @@ restart:
367 */ 441 */
368void nfs_inode_return_delegation_noreclaim(struct inode *inode) 442void nfs_inode_return_delegation_noreclaim(struct inode *inode)
369{ 443{
370 struct nfs_server *server = NFS_SERVER(inode);
371 struct nfs_inode *nfsi = NFS_I(inode);
372 struct nfs_delegation *delegation; 444 struct nfs_delegation *delegation;
373 445
374 if (rcu_access_pointer(nfsi->delegation) != NULL) { 446 delegation = nfs_inode_detach_delegation(inode);
375 delegation = nfs_detach_delegation(nfsi, server); 447 if (delegation != NULL)
376 if (delegation != NULL) 448 nfs_do_return_delegation(inode, delegation, 0);
377 nfs_do_return_delegation(inode, delegation, 0);
378 }
379} 449}
380 450
381/** 451/**
@@ -390,18 +460,14 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
390 */ 460 */
391int nfs4_inode_return_delegation(struct inode *inode) 461int nfs4_inode_return_delegation(struct inode *inode)
392{ 462{
393 struct nfs_server *server = NFS_SERVER(inode);
394 struct nfs_inode *nfsi = NFS_I(inode); 463 struct nfs_inode *nfsi = NFS_I(inode);
395 struct nfs_delegation *delegation; 464 struct nfs_delegation *delegation;
396 int err = 0; 465 int err = 0;
397 466
398 nfs_wb_all(inode); 467 nfs_wb_all(inode);
399 if (rcu_access_pointer(nfsi->delegation) != NULL) { 468 delegation = nfs_start_delegation_return(nfsi);
400 delegation = nfs_detach_delegation(nfsi, server); 469 if (delegation != NULL)
401 if (delegation != NULL) { 470 err = nfs_end_delegation_return(inode, delegation, 1);
402 err = __nfs_inode_return_delegation(inode, delegation, 1);
403 }
404 }
405 return err; 471 return err;
406} 472}
407 473
@@ -471,7 +537,7 @@ void nfs_remove_bad_delegation(struct inode *inode)
471{ 537{
472 struct nfs_delegation *delegation; 538 struct nfs_delegation *delegation;
473 539
474 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); 540 delegation = nfs_inode_detach_delegation(inode);
475 if (delegation) { 541 if (delegation) {
476 nfs_inode_find_state_and_recover(inode, &delegation->stateid); 542 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
477 nfs_free_delegation(delegation); 543 nfs_free_delegation(delegation);
@@ -649,7 +715,7 @@ restart:
649 if (inode == NULL) 715 if (inode == NULL)
650 continue; 716 continue;
651 delegation = nfs_detach_delegation(NFS_I(inode), 717 delegation = nfs_detach_delegation(NFS_I(inode),
652 server); 718 delegation, server);
653 rcu_read_unlock(); 719 rcu_read_unlock();
654 720
655 if (delegation != NULL) 721 if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d8..d54d4fca6793 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0, 29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN, 30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED, 31 NFS_DELEGATION_REFERENCED,
32 NFS_DELEGATION_RETURNING,
32}; 33};
33 34
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c36644..44efaa8c5f78 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -126,8 +126,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
126 } 126 }
127 spin_unlock(&ret->d_lock); 127 spin_unlock(&ret->d_lock);
128out: 128out:
129 if (name) 129 kfree(name);
130 kfree(name);
131 nfs_free_fattr(fsinfo.fattr); 130 nfs_free_fattr(fsinfo.fattr);
132 return ret; 131 return ret;
133} 132}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index bc3968fa81e5..b9623d19d599 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -97,7 +97,7 @@ static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
97static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr) 97static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
98{ 98{
99 struct nfs4_string *owner = fattr->owner_name; 99 struct nfs4_string *owner = fattr->owner_name;
100 __u32 uid; 100 kuid_t uid;
101 101
102 if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)) 102 if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
103 return false; 103 return false;
@@ -111,7 +111,7 @@ static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr
111static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr) 111static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
112{ 112{
113 struct nfs4_string *group = fattr->group_name; 113 struct nfs4_string *group = fattr->group_name;
114 __u32 gid; 114 kgid_t gid;
115 115
116 if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)) 116 if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
117 return false; 117 return false;
@@ -193,7 +193,8 @@ static int nfs_idmap_init_keyring(void)
193 if (!cred) 193 if (!cred)
194 return -ENOMEM; 194 return -ENOMEM;
195 195
196 keyring = keyring_alloc(".id_resolver", 0, 0, cred, 196 keyring = keyring_alloc(".id_resolver",
197 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
197 (KEY_POS_ALL & ~KEY_POS_SETATTR) | 198 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
198 KEY_USR_VIEW | KEY_USR_READ, 199 KEY_USR_VIEW | KEY_USR_READ,
199 KEY_ALLOC_NOT_IN_QUOTA, NULL); 200 KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -836,43 +837,61 @@ idmap_release_pipe(struct inode *inode)
836 nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); 837 nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
837} 838}
838 839
839int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 840int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
840{ 841{
841 struct idmap *idmap = server->nfs_client->cl_idmap; 842 struct idmap *idmap = server->nfs_client->cl_idmap;
843 __u32 id = -1;
844 int ret = 0;
842 845
843 if (nfs_map_string_to_numeric(name, namelen, uid)) 846 if (!nfs_map_string_to_numeric(name, namelen, &id))
844 return 0; 847 ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
845 return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); 848 if (ret == 0) {
849 *uid = make_kuid(&init_user_ns, id);
850 if (!uid_valid(*uid))
851 ret = -ERANGE;
852 }
853 return ret;
846} 854}
847 855
848int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) 856int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
849{ 857{
850 struct idmap *idmap = server->nfs_client->cl_idmap; 858 struct idmap *idmap = server->nfs_client->cl_idmap;
859 __u32 id = -1;
860 int ret = 0;
851 861
852 if (nfs_map_string_to_numeric(name, namelen, gid)) 862 if (!nfs_map_string_to_numeric(name, namelen, &id))
853 return 0; 863 ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
854 return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); 864 if (ret == 0) {
865 *gid = make_kgid(&init_user_ns, id);
866 if (!gid_valid(*gid))
867 ret = -ERANGE;
868 }
869 return ret;
855} 870}
856 871
857int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) 872int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
858{ 873{
859 struct idmap *idmap = server->nfs_client->cl_idmap; 874 struct idmap *idmap = server->nfs_client->cl_idmap;
860 int ret = -EINVAL; 875 int ret = -EINVAL;
876 __u32 id;
861 877
878 id = from_kuid(&init_user_ns, uid);
862 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 879 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
863 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); 880 ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
864 if (ret < 0) 881 if (ret < 0)
865 ret = nfs_map_numeric_to_string(uid, buf, buflen); 882 ret = nfs_map_numeric_to_string(id, buf, buflen);
866 return ret; 883 return ret;
867} 884}
868int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) 885int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
869{ 886{
870 struct idmap *idmap = server->nfs_client->cl_idmap; 887 struct idmap *idmap = server->nfs_client->cl_idmap;
871 int ret = -EINVAL; 888 int ret = -EINVAL;
889 __u32 id;
872 890
891 id = from_kgid(&init_user_ns, gid);
873 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 892 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
874 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); 893 ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
875 if (ret < 0) 894 if (ret < 0)
876 ret = nfs_map_numeric_to_string(gid, buf, buflen); 895 ret = nfs_map_numeric_to_string(id, buf, buflen);
877 return ret; 896 return ret;
878} 897}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0b..468ba8bf0f56 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -332,8 +332,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
332 inode->i_version = 0; 332 inode->i_version = 0;
333 inode->i_size = 0; 333 inode->i_size = 0;
334 clear_nlink(inode); 334 clear_nlink(inode);
335 inode->i_uid = -2; 335 inode->i_uid = make_kuid(&init_user_ns, -2);
336 inode->i_gid = -2; 336 inode->i_gid = make_kgid(&init_user_ns, -2);
337 inode->i_blocks = 0; 337 inode->i_blocks = 0;
338 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 338 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
339 nfsi->write_io = 0; 339 nfsi->write_io = 0;
@@ -694,10 +694,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
694 if (ctx->cred != NULL) 694 if (ctx->cred != NULL)
695 put_rpccred(ctx->cred); 695 put_rpccred(ctx->cred);
696 dput(ctx->dentry); 696 dput(ctx->dentry);
697 if (is_sync) 697 nfs_sb_deactive(sb);
698 nfs_sb_deactive(sb);
699 else
700 nfs_sb_deactive_async(sb);
701 kfree(ctx->mdsthreshold); 698 kfree(ctx->mdsthreshold);
702 kfree(ctx); 699 kfree(ctx);
703} 700}
@@ -1009,9 +1006,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1009 /* Have any file permissions changed? */ 1006 /* Have any file permissions changed? */
1010 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) 1007 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
1011 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1008 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1012 if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid) 1009 if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
1013 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1010 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1014 if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid) 1011 if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
1015 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1012 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1016 1013
1017 /* Has the link count changed? */ 1014 /* Has the link count changed? */
@@ -1440,7 +1437,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1440 | NFS_INO_REVAL_FORCED); 1437 | NFS_INO_REVAL_FORCED);
1441 1438
1442 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1439 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1443 if (inode->i_uid != fattr->uid) { 1440 if (!uid_eq(inode->i_uid, fattr->uid)) {
1444 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1441 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1445 inode->i_uid = fattr->uid; 1442 inode->i_uid = fattr->uid;
1446 } 1443 }
@@ -1451,7 +1448,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1451 | NFS_INO_REVAL_FORCED); 1448 | NFS_INO_REVAL_FORCED);
1452 1449
1453 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1450 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1454 if (inode->i_gid != fattr->gid) { 1451 if (!gid_eq(inode->i_gid, fattr->gid)) {
1455 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1452 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1456 inode->i_gid = fattr->gid; 1453 inode->i_gid = fattr->gid;
1457 } 1454 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a07..541c9ebdbc5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -329,7 +329,6 @@ extern int __init register_nfs_fs(void);
329extern void __exit unregister_nfs_fs(void); 329extern void __exit unregister_nfs_fs(void);
330extern void nfs_sb_active(struct super_block *sb); 330extern void nfs_sb_active(struct super_block *sb);
331extern void nfs_sb_deactive(struct super_block *sb); 331extern void nfs_sb_deactive(struct super_block *sb);
332extern void nfs_sb_deactive_async(struct super_block *sb);
333 332
334/* namespace.c */ 333/* namespace.c */
335#define NFS_PATH_CANONICAL 1 334#define NFS_PATH_CANONICAL 1
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
177 return mnt; 177 return mnt;
178} 178}
179 179
180static int
181nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
182{
183 if (NFS_FH(dentry->d_inode)->size != 0)
184 return nfs_getattr(mnt, dentry, stat);
185 generic_fillattr(dentry->d_inode, stat);
186 return 0;
187}
188
189static int
190nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
191{
192 if (NFS_FH(dentry->d_inode)->size != 0)
193 return nfs_setattr(dentry, attr);
194 return -EACCES;
195}
196
180const struct inode_operations nfs_mountpoint_inode_operations = { 197const struct inode_operations nfs_mountpoint_inode_operations = {
181 .getattr = nfs_getattr, 198 .getattr = nfs_getattr,
199 .setattr = nfs_setattr,
182}; 200};
183 201
184const struct inode_operations nfs_referral_inode_operations = { 202const struct inode_operations nfs_referral_inode_operations = {
203 .getattr = nfs_namespace_getattr,
204 .setattr = nfs_namespace_setattr,
185}; 205};
186 206
187static void nfs_expire_automounts(struct work_struct *work) 207static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 06b9df49f7f7..62db136339ea 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -290,8 +290,13 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
290 290
291 fattr->mode = be32_to_cpup(p++); 291 fattr->mode = be32_to_cpup(p++);
292 fattr->nlink = be32_to_cpup(p++); 292 fattr->nlink = be32_to_cpup(p++);
293 fattr->uid = be32_to_cpup(p++); 293 fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
294 fattr->gid = be32_to_cpup(p++); 294 if (!uid_valid(fattr->uid))
295 goto out_uid;
296 fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
297 if (!gid_valid(fattr->gid))
298 goto out_gid;
299
295 fattr->size = be32_to_cpup(p++); 300 fattr->size = be32_to_cpup(p++);
296 fattr->du.nfs2.blocksize = be32_to_cpup(p++); 301 fattr->du.nfs2.blocksize = be32_to_cpup(p++);
297 302
@@ -313,6 +318,12 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
313 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); 318 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
314 319
315 return 0; 320 return 0;
321out_uid:
322 dprintk("NFS: returned invalid uid\n");
323 return -EINVAL;
324out_gid:
325 dprintk("NFS: returned invalid gid\n");
326 return -EINVAL;
316out_overflow: 327out_overflow:
317 print_overflow_msg(__func__, xdr); 328 print_overflow_msg(__func__, xdr);
318 return -EIO; 329 return -EIO;
@@ -351,11 +362,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
351 else 362 else
352 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 363 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
353 if (attr->ia_valid & ATTR_UID) 364 if (attr->ia_valid & ATTR_UID)
354 *p++ = cpu_to_be32(attr->ia_uid); 365 *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
355 else 366 else
356 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 367 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
357 if (attr->ia_valid & ATTR_GID) 368 if (attr->ia_valid & ATTR_GID)
358 *p++ = cpu_to_be32(attr->ia_gid); 369 *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
359 else 370 else
360 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 371 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
361 if (attr->ia_valid & ATTR_SIZE) 372 if (attr->ia_valid & ATTR_SIZE)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index bffc32406fbf..fa6d72131c19 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -592,13 +592,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
592 592
593 if (attr->ia_valid & ATTR_UID) { 593 if (attr->ia_valid & ATTR_UID) {
594 *p++ = xdr_one; 594 *p++ = xdr_one;
595 *p++ = cpu_to_be32(attr->ia_uid); 595 *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
596 } else 596 } else
597 *p++ = xdr_zero; 597 *p++ = xdr_zero;
598 598
599 if (attr->ia_valid & ATTR_GID) { 599 if (attr->ia_valid & ATTR_GID) {
600 *p++ = xdr_one; 600 *p++ = xdr_one;
601 *p++ = cpu_to_be32(attr->ia_gid); 601 *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
602 } else 602 } else
603 *p++ = xdr_zero; 603 *p++ = xdr_zero;
604 604
@@ -657,8 +657,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
657 657
658 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode; 658 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
659 fattr->nlink = be32_to_cpup(p++); 659 fattr->nlink = be32_to_cpup(p++);
660 fattr->uid = be32_to_cpup(p++); 660 fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
661 fattr->gid = be32_to_cpup(p++); 661 if (!uid_valid(fattr->uid))
662 goto out_uid;
663 fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
664 if (!gid_valid(fattr->gid))
665 goto out_gid;
662 666
663 p = xdr_decode_size3(p, &fattr->size); 667 p = xdr_decode_size3(p, &fattr->size);
664 p = xdr_decode_size3(p, &fattr->du.nfs3.used); 668 p = xdr_decode_size3(p, &fattr->du.nfs3.used);
@@ -675,6 +679,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
675 679
676 fattr->valid |= NFS_ATTR_FATTR_V3; 680 fattr->valid |= NFS_ATTR_FATTR_V3;
677 return 0; 681 return 0;
682out_uid:
683 dprintk("NFS: returned invalid uid\n");
684 return -EINVAL;
685out_gid:
686 dprintk("NFS: returned invalid gid\n");
687 return -EINVAL;
678out_overflow: 688out_overflow:
679 print_overflow_msg(__func__, xdr); 689 print_overflow_msg(__func__, xdr);
680 return -EIO; 690 return -EIO;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a2..944c9a5c1039 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,6 +13,8 @@
13 13
14#define NFS4_MAX_LOOP_ON_RECOVER (10) 14#define NFS4_MAX_LOOP_ON_RECOVER (10)
15 15
16#include <linux/seqlock.h>
17
16struct idmap; 18struct idmap;
17 19
18enum nfs4_client_state { 20enum nfs4_client_state {
@@ -90,6 +92,8 @@ struct nfs4_state_owner {
90 unsigned long so_flags; 92 unsigned long so_flags;
91 struct list_head so_states; 93 struct list_head so_states;
92 struct nfs_seqid_counter so_seqid; 94 struct nfs_seqid_counter so_seqid;
95 seqcount_t so_reclaim_seqcount;
96 struct mutex so_delegreturn_mutex;
93}; 97};
94 98
95enum { 99enum {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc347268124..2e9779b58b7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
236 error = nfs4_discover_server_trunking(clp, &old); 236 error = nfs4_discover_server_trunking(clp, &old);
237 if (error < 0) 237 if (error < 0)
238 goto error; 238 goto error;
239 nfs_put_client(clp);
239 if (clp != old) { 240 if (clp != old) {
240 clp->cl_preserve_clid = true; 241 clp->cl_preserve_clid = true;
241 nfs_put_client(clp);
242 clp = old; 242 clp = old;
243 atomic_inc(&clp->cl_count);
244 } 243 }
245 244
246 return clp; 245 return clp;
@@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
306 .clientid = new->cl_clientid, 305 .clientid = new->cl_clientid,
307 .confirm = new->cl_confirm, 306 .confirm = new->cl_confirm,
308 }; 307 };
309 int status; 308 int status = -NFS4ERR_STALE_CLIENTID;
310 309
311 spin_lock(&nn->nfs_client_lock); 310 spin_lock(&nn->nfs_client_lock);
312 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 311 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -332,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
332 331
333 if (prev) 332 if (prev)
334 nfs_put_client(prev); 333 nfs_put_client(prev);
334 prev = pos;
335 335
336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred); 336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
337 if (status == 0) { 337 switch (status) {
338 case -NFS4ERR_STALE_CLIENTID:
339 break;
340 case 0:
338 nfs4_swap_callback_idents(pos, new); 341 nfs4_swap_callback_idents(pos, new);
339 342
340 nfs_put_client(pos); 343 prev = NULL;
341 *result = pos; 344 *result = pos;
342 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 345 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
343 __func__, pos, atomic_read(&pos->cl_count)); 346 __func__, pos, atomic_read(&pos->cl_count));
344 return 0; 347 default:
345 } 348 goto out;
346 if (status != -NFS4ERR_STALE_CLIENTID) {
347 nfs_put_client(pos);
348 dprintk("NFS: <-- %s status = %d, no result\n",
349 __func__, status);
350 return status;
351 } 349 }
352 350
353 spin_lock(&nn->nfs_client_lock); 351 spin_lock(&nn->nfs_client_lock);
354 prev = pos;
355 } 352 }
353 spin_unlock(&nn->nfs_client_lock);
356 354
357 /* 355 /* No match found. The server lost our clientid */
358 * No matching nfs_client found. This should be impossible, 356out:
359 * because the new nfs_client has already been added to
360 * nfs_client_list by nfs_get_client().
361 *
362 * Don't BUG(), since the caller is holding a mutex.
363 */
364 if (prev) 357 if (prev)
365 nfs_put_client(prev); 358 nfs_put_client(prev);
366 spin_unlock(&nn->nfs_client_lock); 359 dprintk("NFS: <-- %s status = %d\n", __func__, status);
367 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 360 return status;
368 return -NFS4ERR_STALE_CLIENTID;
369} 361}
370 362
371#ifdef CONFIG_NFS_V4_1 363#ifdef CONFIG_NFS_V4_1
@@ -432,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
432{ 424{
433 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); 425 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
434 struct nfs_client *pos, *n, *prev = NULL; 426 struct nfs_client *pos, *n, *prev = NULL;
435 int error; 427 int status = -NFS4ERR_STALE_CLIENTID;
436 428
437 spin_lock(&nn->nfs_client_lock); 429 spin_lock(&nn->nfs_client_lock);
438 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 430 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -448,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
448 nfs_put_client(prev); 440 nfs_put_client(prev);
449 prev = pos; 441 prev = pos;
450 442
451 error = nfs_wait_client_init_complete(pos); 443 nfs4_schedule_lease_recovery(pos);
452 if (error < 0) { 444 status = nfs_wait_client_init_complete(pos);
445 if (status < 0) {
453 nfs_put_client(pos); 446 nfs_put_client(pos);
454 spin_lock(&nn->nfs_client_lock); 447 spin_lock(&nn->nfs_client_lock);
455 continue; 448 continue;
456 } 449 }
457 450 status = pos->cl_cons_state;
458 spin_lock(&nn->nfs_client_lock); 451 spin_lock(&nn->nfs_client_lock);
452 if (status < 0)
453 continue;
459 } 454 }
460 455
461 if (pos->rpc_ops != new->rpc_ops) 456 if (pos->rpc_ops != new->rpc_ops)
@@ -473,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
473 if (!nfs4_match_serverowners(pos, new)) 468 if (!nfs4_match_serverowners(pos, new))
474 continue; 469 continue;
475 470
471 atomic_inc(&pos->cl_count);
476 spin_unlock(&nn->nfs_client_lock); 472 spin_unlock(&nn->nfs_client_lock);
477 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 473 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
478 __func__, pos, atomic_read(&pos->cl_count)); 474 __func__, pos, atomic_read(&pos->cl_count));
@@ -481,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
481 return 0; 477 return 0;
482 } 478 }
483 479
484 /* 480 /* No matching nfs_client found. */
485 * No matching nfs_client found. This should be impossible,
486 * because the new nfs_client has already been added to
487 * nfs_client_list by nfs_get_client().
488 *
489 * Don't BUG(), since the caller is holding a mutex.
490 */
491 spin_unlock(&nn->nfs_client_lock); 481 spin_unlock(&nn->nfs_client_lock);
492 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 482 dprintk("NFS: <-- %s status = %d\n", __func__, status);
493 return -NFS4ERR_STALE_CLIENTID; 483 return status;
494} 484}
495#endif /* CONFIG_NFS_V4_1 */ 485#endif /* CONFIG_NFS_V4_1 */
496 486
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf747ef86650..eae83bf96c6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -896,6 +896,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896 return 0; 896 return 0;
897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
898 return 0; 898 return 0;
899 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
900 return 0;
899 nfs_mark_delegation_referenced(delegation); 901 nfs_mark_delegation_referenced(delegation);
900 return 1; 902 return 1;
901} 903}
@@ -973,6 +975,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
973 975
974 spin_lock(&deleg_cur->lock); 976 spin_lock(&deleg_cur->lock);
975 if (nfsi->delegation != deleg_cur || 977 if (nfsi->delegation != deleg_cur ||
978 test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
976 (deleg_cur->type & fmode) != fmode) 979 (deleg_cur->type & fmode) != fmode)
977 goto no_delegation_unlock; 980 goto no_delegation_unlock;
978 981
@@ -1352,19 +1355,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1352 case -NFS4ERR_BAD_HIGH_SLOT: 1355 case -NFS4ERR_BAD_HIGH_SLOT:
1353 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1356 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1354 case -NFS4ERR_DEADSESSION: 1357 case -NFS4ERR_DEADSESSION:
1358 set_bit(NFS_DELEGATED_STATE, &state->flags);
1355 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 1359 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1360 err = -EAGAIN;
1356 goto out; 1361 goto out;
1357 case -NFS4ERR_STALE_CLIENTID: 1362 case -NFS4ERR_STALE_CLIENTID:
1358 case -NFS4ERR_STALE_STATEID: 1363 case -NFS4ERR_STALE_STATEID:
1364 set_bit(NFS_DELEGATED_STATE, &state->flags);
1359 case -NFS4ERR_EXPIRED: 1365 case -NFS4ERR_EXPIRED:
1360 /* Don't recall a delegation if it was lost */ 1366 /* Don't recall a delegation if it was lost */
1361 nfs4_schedule_lease_recovery(server->nfs_client); 1367 nfs4_schedule_lease_recovery(server->nfs_client);
1368 err = -EAGAIN;
1362 goto out; 1369 goto out;
1363 case -ERESTARTSYS:
1364 /*
1365 * The show must go on: exit, but mark the
1366 * stateid as needing recovery.
1367 */
1368 case -NFS4ERR_DELEG_REVOKED: 1370 case -NFS4ERR_DELEG_REVOKED:
1369 case -NFS4ERR_ADMIN_REVOKED: 1371 case -NFS4ERR_ADMIN_REVOKED:
1370 case -NFS4ERR_BAD_STATEID: 1372 case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1377,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1375 err = 0; 1377 err = 0;
1376 goto out; 1378 goto out;
1377 } 1379 }
1380 set_bit(NFS_DELEGATED_STATE, &state->flags);
1378 err = nfs4_handle_exception(server, err, &exception); 1381 err = nfs4_handle_exception(server, err, &exception);
1379 } while (exception.retry); 1382 } while (exception.retry);
1380out: 1383out:
@@ -1463,7 +1466,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1463 struct nfs4_state_owner *sp = data->owner; 1466 struct nfs4_state_owner *sp = data->owner;
1464 1467
1465 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 1468 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1466 return; 1469 goto out_wait;
1467 /* 1470 /*
1468 * Check if we still need to send an OPEN call, or if we can use 1471 * Check if we still need to send an OPEN call, or if we can use
1469 * a delegation instead. 1472 * a delegation instead.
@@ -1498,6 +1501,7 @@ unlock_no_action:
1498 rcu_read_unlock(); 1501 rcu_read_unlock();
1499out_no_action: 1502out_no_action:
1500 task->tk_action = NULL; 1503 task->tk_action = NULL;
1504out_wait:
1501 nfs4_sequence_done(task, &data->o_res.seq_res); 1505 nfs4_sequence_done(task, &data->o_res.seq_res);
1502} 1506}
1503 1507
@@ -1845,6 +1849,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1845 sattr->ia_valid |= ATTR_MTIME; 1849 sattr->ia_valid |= ATTR_MTIME;
1846} 1850}
1847 1851
1852static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
1853 fmode_t fmode,
1854 int flags,
1855 struct nfs4_state **res)
1856{
1857 struct nfs4_state_owner *sp = opendata->owner;
1858 struct nfs_server *server = sp->so_server;
1859 struct nfs4_state *state;
1860 unsigned int seq;
1861 int ret;
1862
1863 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
1864
1865 ret = _nfs4_proc_open(opendata);
1866 if (ret != 0)
1867 goto out;
1868
1869 state = nfs4_opendata_to_nfs4_state(opendata);
1870 ret = PTR_ERR(state);
1871 if (IS_ERR(state))
1872 goto out;
1873 if (server->caps & NFS_CAP_POSIX_LOCK)
1874 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1875
1876 ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
1877 if (ret != 0)
1878 goto out;
1879
1880 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
1881 nfs4_schedule_stateid_recovery(server, state);
1882 nfs4_wait_clnt_recover(server->nfs_client);
1883 }
1884 *res = state;
1885out:
1886 return ret;
1887}
1888
1848/* 1889/*
1849 * Returns a referenced nfs4_state 1890 * Returns a referenced nfs4_state
1850 */ 1891 */
@@ -1889,18 +1930,7 @@ static int _nfs4_do_open(struct inode *dir,
1889 if (dentry->d_inode != NULL) 1930 if (dentry->d_inode != NULL)
1890 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1931 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1891 1932
1892 status = _nfs4_proc_open(opendata); 1933 status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
1893 if (status != 0)
1894 goto err_opendata_put;
1895
1896 state = nfs4_opendata_to_nfs4_state(opendata);
1897 status = PTR_ERR(state);
1898 if (IS_ERR(state))
1899 goto err_opendata_put;
1900 if (server->caps & NFS_CAP_POSIX_LOCK)
1901 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1902
1903 status = nfs4_opendata_access(cred, opendata, state, fmode, flags);
1904 if (status != 0) 1934 if (status != 0)
1905 goto err_opendata_put; 1935 goto err_opendata_put;
1906 1936
@@ -2088,7 +2118,7 @@ static void nfs4_free_closedata(void *data)
2088 nfs4_put_open_state(calldata->state); 2118 nfs4_put_open_state(calldata->state);
2089 nfs_free_seqid(calldata->arg.seqid); 2119 nfs_free_seqid(calldata->arg.seqid);
2090 nfs4_put_state_owner(sp); 2120 nfs4_put_state_owner(sp);
2091 nfs_sb_deactive_async(sb); 2121 nfs_sb_deactive(sb);
2092 kfree(calldata); 2122 kfree(calldata);
2093} 2123}
2094 2124
@@ -2150,7 +2180,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2150 2180
2151 dprintk("%s: begin!\n", __func__); 2181 dprintk("%s: begin!\n", __func__);
2152 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2182 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
2153 return; 2183 goto out_wait;
2154 2184
2155 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2185 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2156 calldata->arg.fmode = FMODE_READ|FMODE_WRITE; 2186 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2172,16 +2202,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2172 2202
2173 if (!call_close) { 2203 if (!call_close) {
2174 /* Note: exit _without_ calling nfs4_close_done */ 2204 /* Note: exit _without_ calling nfs4_close_done */
2175 task->tk_action = NULL; 2205 goto out_no_action;
2176 nfs4_sequence_done(task, &calldata->res.seq_res);
2177 goto out;
2178 } 2206 }
2179 2207
2180 if (calldata->arg.fmode == 0) { 2208 if (calldata->arg.fmode == 0) {
2181 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2209 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2182 if (calldata->roc && 2210 if (calldata->roc &&
2183 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) 2211 pnfs_roc_drain(inode, &calldata->roc_barrier, task))
2184 goto out; 2212 goto out_wait;
2185 } 2213 }
2186 2214
2187 nfs_fattr_init(calldata->res.fattr); 2215 nfs_fattr_init(calldata->res.fattr);
@@ -2191,8 +2219,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2191 &calldata->res.seq_res, 2219 &calldata->res.seq_res,
2192 task) != 0) 2220 task) != 0)
2193 nfs_release_seqid(calldata->arg.seqid); 2221 nfs_release_seqid(calldata->arg.seqid);
2194out:
2195 dprintk("%s: done!\n", __func__); 2222 dprintk("%s: done!\n", __func__);
2223 return;
2224out_no_action:
2225 task->tk_action = NULL;
2226out_wait:
2227 nfs4_sequence_done(task, &calldata->res.seq_res);
2196} 2228}
2197 2229
2198static const struct rpc_call_ops nfs4_close_ops = { 2230static const struct rpc_call_ops nfs4_close_ops = {
@@ -4423,12 +4455,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4423 struct nfs4_unlockdata *calldata = data; 4455 struct nfs4_unlockdata *calldata = data;
4424 4456
4425 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 4457 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
4426 return; 4458 goto out_wait;
4427 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { 4459 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
4428 /* Note: exit _without_ running nfs4_locku_done */ 4460 /* Note: exit _without_ running nfs4_locku_done */
4429 task->tk_action = NULL; 4461 goto out_no_action;
4430 nfs4_sequence_done(task, &calldata->res.seq_res);
4431 return;
4432 } 4462 }
4433 calldata->timestamp = jiffies; 4463 calldata->timestamp = jiffies;
4434 if (nfs4_setup_sequence(calldata->server, 4464 if (nfs4_setup_sequence(calldata->server,
@@ -4436,6 +4466,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4436 &calldata->res.seq_res, 4466 &calldata->res.seq_res,
4437 task) != 0) 4467 task) != 0)
4438 nfs_release_seqid(calldata->arg.seqid); 4468 nfs_release_seqid(calldata->arg.seqid);
4469 return;
4470out_no_action:
4471 task->tk_action = NULL;
4472out_wait:
4473 nfs4_sequence_done(task, &calldata->res.seq_res);
4439} 4474}
4440 4475
4441static const struct rpc_call_ops nfs4_locku_ops = { 4476static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4482,7 +4517,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4482 4517
4483static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 4518static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
4484{ 4519{
4485 struct nfs_inode *nfsi = NFS_I(state->inode); 4520 struct inode *inode = state->inode;
4521 struct nfs4_state_owner *sp = state->owner;
4522 struct nfs_inode *nfsi = NFS_I(inode);
4486 struct nfs_seqid *seqid; 4523 struct nfs_seqid *seqid;
4487 struct nfs4_lock_state *lsp; 4524 struct nfs4_lock_state *lsp;
4488 struct rpc_task *task; 4525 struct rpc_task *task;
@@ -4492,12 +4529,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
4492 status = nfs4_set_lock_state(state, request); 4529 status = nfs4_set_lock_state(state, request);
4493 /* Unlock _before_ we do the RPC call */ 4530 /* Unlock _before_ we do the RPC call */
4494 request->fl_flags |= FL_EXISTS; 4531 request->fl_flags |= FL_EXISTS;
4532 /* Exclude nfs_delegation_claim_locks() */
4533 mutex_lock(&sp->so_delegreturn_mutex);
4534 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
4495 down_read(&nfsi->rwsem); 4535 down_read(&nfsi->rwsem);
4496 if (do_vfs_lock(request->fl_file, request) == -ENOENT) { 4536 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
4497 up_read(&nfsi->rwsem); 4537 up_read(&nfsi->rwsem);
4538 mutex_unlock(&sp->so_delegreturn_mutex);
4498 goto out; 4539 goto out;
4499 } 4540 }
4500 up_read(&nfsi->rwsem); 4541 up_read(&nfsi->rwsem);
4542 mutex_unlock(&sp->so_delegreturn_mutex);
4501 if (status != 0) 4543 if (status != 0)
4502 goto out; 4544 goto out;
4503 /* Is this a delegated lock? */ 4545 /* Is this a delegated lock? */
@@ -4576,7 +4618,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4576 4618
4577 dprintk("%s: begin!\n", __func__); 4619 dprintk("%s: begin!\n", __func__);
4578 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) 4620 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
4579 return; 4621 goto out_wait;
4580 /* Do we need to do an open_to_lock_owner? */ 4622 /* Do we need to do an open_to_lock_owner? */
4581 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4623 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4582 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { 4624 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4596,6 +4638,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4596 nfs_release_seqid(data->arg.open_seqid); 4638 nfs_release_seqid(data->arg.open_seqid);
4597out_release_lock_seqid: 4639out_release_lock_seqid:
4598 nfs_release_seqid(data->arg.lock_seqid); 4640 nfs_release_seqid(data->arg.lock_seqid);
4641out_wait:
4642 nfs4_sequence_done(task, &data->res.seq_res);
4599 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4643 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
4600} 4644}
4601 4645
@@ -4813,8 +4857,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
4813 4857
4814static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4858static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4815{ 4859{
4860 struct nfs4_state_owner *sp = state->owner;
4816 struct nfs_inode *nfsi = NFS_I(state->inode); 4861 struct nfs_inode *nfsi = NFS_I(state->inode);
4817 unsigned char fl_flags = request->fl_flags; 4862 unsigned char fl_flags = request->fl_flags;
4863 unsigned int seq;
4818 int status = -ENOLCK; 4864 int status = -ENOLCK;
4819 4865
4820 if ((fl_flags & FL_POSIX) && 4866 if ((fl_flags & FL_POSIX) &&
@@ -4836,9 +4882,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4836 status = do_vfs_lock(request->fl_file, request); 4882 status = do_vfs_lock(request->fl_file, request);
4837 goto out_unlock; 4883 goto out_unlock;
4838 } 4884 }
4885 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
4886 up_read(&nfsi->rwsem);
4839 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); 4887 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4840 if (status != 0) 4888 if (status != 0)
4889 goto out;
4890 down_read(&nfsi->rwsem);
4891 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
4892 status = -NFS4ERR_DELAY;
4841 goto out_unlock; 4893 goto out_unlock;
4894 }
4842 /* Note: we always want to sleep here! */ 4895 /* Note: we always want to sleep here! */
4843 request->fl_flags = fl_flags | FL_SLEEP; 4896 request->fl_flags = fl_flags | FL_SLEEP;
4844 if (do_vfs_lock(request->fl_file, request) < 0) 4897 if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4945,24 +4998,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4945 case 0: 4998 case 0:
4946 case -ESTALE: 4999 case -ESTALE:
4947 goto out; 5000 goto out;
4948 case -NFS4ERR_EXPIRED:
4949 nfs4_schedule_stateid_recovery(server, state);
4950 case -NFS4ERR_STALE_CLIENTID: 5001 case -NFS4ERR_STALE_CLIENTID:
4951 case -NFS4ERR_STALE_STATEID: 5002 case -NFS4ERR_STALE_STATEID:
5003 set_bit(NFS_DELEGATED_STATE, &state->flags);
5004 case -NFS4ERR_EXPIRED:
4952 nfs4_schedule_lease_recovery(server->nfs_client); 5005 nfs4_schedule_lease_recovery(server->nfs_client);
5006 err = -EAGAIN;
4953 goto out; 5007 goto out;
4954 case -NFS4ERR_BADSESSION: 5008 case -NFS4ERR_BADSESSION:
4955 case -NFS4ERR_BADSLOT: 5009 case -NFS4ERR_BADSLOT:
4956 case -NFS4ERR_BAD_HIGH_SLOT: 5010 case -NFS4ERR_BAD_HIGH_SLOT:
4957 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 5011 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4958 case -NFS4ERR_DEADSESSION: 5012 case -NFS4ERR_DEADSESSION:
5013 set_bit(NFS_DELEGATED_STATE, &state->flags);
4959 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 5014 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
5015 err = -EAGAIN;
4960 goto out; 5016 goto out;
4961 case -ERESTARTSYS:
4962 /*
4963 * The show must go on: exit, but mark the
4964 * stateid as needing recovery.
4965 */
4966 case -NFS4ERR_DELEG_REVOKED: 5017 case -NFS4ERR_DELEG_REVOKED:
4967 case -NFS4ERR_ADMIN_REVOKED: 5018 case -NFS4ERR_ADMIN_REVOKED:
4968 case -NFS4ERR_BAD_STATEID: 5019 case -NFS4ERR_BAD_STATEID:
@@ -4975,9 +5026,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4975 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 5026 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4976 err = 0; 5027 err = 0;
4977 goto out; 5028 goto out;
4978 case -NFS4ERR_DELAY:
4979 break;
4980 } 5029 }
5030 set_bit(NFS_DELEGATED_STATE, &state->flags);
4981 err = nfs4_handle_exception(server, err, &exception); 5031 err = nfs4_handle_exception(server, err, &exception);
4982 } while (exception.retry); 5032 } while (exception.retry);
4983out: 5033out:
@@ -6134,7 +6184,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6134 status = nfs4_wait_for_completion_rpc_task(task); 6184 status = nfs4_wait_for_completion_rpc_task(task);
6135 if (status == 0) 6185 if (status == 0)
6136 status = task->tk_status; 6186 status = task->tk_status;
6137 if (status == 0) 6187 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
6188 if (status == 0 && lgp->res.layoutp->len)
6138 lseg = pnfs_layout_process(lgp); 6189 lseg = pnfs_layout_process(lgp);
6139 rpc_put_task(task); 6190 rpc_put_task(task);
6140 dprintk("<-- %s status=%d\n", __func__, status); 6191 dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..6ace365c6334 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
136 clp->cl_confirm = clid.confirm; 136 clp->cl_confirm = clid.confirm;
137 137
138 status = nfs40_walk_client_list(clp, result, cred); 138 status = nfs40_walk_client_list(clp, result, cred);
139 switch (status) { 139 if (status == 0) {
140 case -NFS4ERR_STALE_CLIENTID:
141 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
142 case 0:
143 /* Sustain the lease, even if it's empty. If the clientid4 140 /* Sustain the lease, even if it's empty. If the clientid4
144 * goes stale it's of no use for trunking discovery. */ 141 * goes stale it's of no use for trunking discovery. */
145 nfs4_schedule_state_renewal(*result); 142 nfs4_schedule_state_renewal(*result);
146 break;
147 } 143 }
148
149out: 144out:
150 return status; 145 return status;
151} 146}
@@ -523,6 +518,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
523 nfs4_init_seqid_counter(&sp->so_seqid); 518 nfs4_init_seqid_counter(&sp->so_seqid);
524 atomic_set(&sp->so_count, 1); 519 atomic_set(&sp->so_count, 1);
525 INIT_LIST_HEAD(&sp->so_lru); 520 INIT_LIST_HEAD(&sp->so_lru);
521 seqcount_init(&sp->so_reclaim_seqcount);
522 mutex_init(&sp->so_delegreturn_mutex);
526 return sp; 523 return sp;
527} 524}
528 525
@@ -1395,8 +1392,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1395 * recovering after a network partition or a reboot from a 1392 * recovering after a network partition or a reboot from a
1396 * server that doesn't support a grace period. 1393 * server that doesn't support a grace period.
1397 */ 1394 */
1398restart:
1399 spin_lock(&sp->so_lock); 1395 spin_lock(&sp->so_lock);
1396 write_seqcount_begin(&sp->so_reclaim_seqcount);
1397restart:
1400 list_for_each_entry(state, &sp->so_states, open_states) { 1398 list_for_each_entry(state, &sp->so_states, open_states) {
1401 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1399 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1402 continue; 1400 continue;
@@ -1417,6 +1415,7 @@ restart:
1417 } 1415 }
1418 spin_unlock(&state->state_lock); 1416 spin_unlock(&state->state_lock);
1419 nfs4_put_open_state(state); 1417 nfs4_put_open_state(state);
1418 spin_lock(&sp->so_lock);
1420 goto restart; 1419 goto restart;
1421 } 1420 }
1422 } 1421 }
@@ -1454,12 +1453,17 @@ restart:
1454 goto out_err; 1453 goto out_err;
1455 } 1454 }
1456 nfs4_put_open_state(state); 1455 nfs4_put_open_state(state);
1456 spin_lock(&sp->so_lock);
1457 goto restart; 1457 goto restart;
1458 } 1458 }
1459 write_seqcount_end(&sp->so_reclaim_seqcount);
1459 spin_unlock(&sp->so_lock); 1460 spin_unlock(&sp->so_lock);
1460 return 0; 1461 return 0;
1461out_err: 1462out_err:
1462 nfs4_put_open_state(state); 1463 nfs4_put_open_state(state);
1464 spin_lock(&sp->so_lock);
1465 write_seqcount_end(&sp->so_reclaim_seqcount);
1466 spin_unlock(&sp->so_lock);
1463 return status; 1467 return status;
1464} 1468}
1465 1469
@@ -1863,6 +1867,7 @@ again:
1863 case -ETIMEDOUT: 1867 case -ETIMEDOUT:
1864 case -EAGAIN: 1868 case -EAGAIN:
1865 ssleep(1); 1869 ssleep(1);
1870 case -NFS4ERR_STALE_CLIENTID:
1866 dprintk("NFS: %s after status %d, retrying\n", 1871 dprintk("NFS: %s after status %d, retrying\n",
1867 __func__, status); 1872 __func__, status);
1868 goto again; 1873 goto again;
@@ -2022,8 +2027,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
2022 nfs4_begin_drain_session(clp); 2027 nfs4_begin_drain_session(clp);
2023 cred = nfs4_get_exchange_id_cred(clp); 2028 cred = nfs4_get_exchange_id_cred(clp);
2024 status = nfs4_proc_destroy_session(clp->cl_session, cred); 2029 status = nfs4_proc_destroy_session(clp->cl_session, cred);
2025 if (status && status != -NFS4ERR_BADSESSION && 2030 switch (status) {
2026 status != -NFS4ERR_DEADSESSION) { 2031 case 0:
2032 case -NFS4ERR_BADSESSION:
2033 case -NFS4ERR_DEADSESSION:
2034 break;
2035 case -NFS4ERR_BACK_CHAN_BUSY:
2036 case -NFS4ERR_DELAY:
2037 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2038 status = 0;
2039 ssleep(1);
2040 goto out;
2041 default:
2027 status = nfs4_recovery_handle_error(clp, status); 2042 status = nfs4_recovery_handle_error(clp, status);
2028 goto out; 2043 goto out;
2029 } 2044 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 26b143920433..e3edda554ac7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1002,7 +1002,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1002 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); 1002 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
1003 if (owner_namelen < 0) { 1003 if (owner_namelen < 0) {
1004 dprintk("nfs: couldn't resolve uid %d to string\n", 1004 dprintk("nfs: couldn't resolve uid %d to string\n",
1005 iap->ia_uid); 1005 from_kuid(&init_user_ns, iap->ia_uid));
1006 /* XXX */ 1006 /* XXX */
1007 strcpy(owner_name, "nobody"); 1007 strcpy(owner_name, "nobody");
1008 owner_namelen = sizeof("nobody") - 1; 1008 owner_namelen = sizeof("nobody") - 1;
@@ -1014,7 +1014,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1014 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ); 1014 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
1015 if (owner_grouplen < 0) { 1015 if (owner_grouplen < 0) {
1016 dprintk("nfs: couldn't resolve gid %d to string\n", 1016 dprintk("nfs: couldn't resolve gid %d to string\n",
1017 iap->ia_gid); 1017 from_kgid(&init_user_ns, iap->ia_gid));
1018 strcpy(owner_group, "nobody"); 1018 strcpy(owner_group, "nobody");
1019 owner_grouplen = sizeof("nobody") - 1; 1019 owner_grouplen = sizeof("nobody") - 1;
1020 /* goto out; */ 1020 /* goto out; */
@@ -3778,14 +3778,14 @@ out_overflow:
3778} 3778}
3779 3779
3780static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3780static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3781 const struct nfs_server *server, uint32_t *uid, 3781 const struct nfs_server *server, kuid_t *uid,
3782 struct nfs4_string *owner_name) 3782 struct nfs4_string *owner_name)
3783{ 3783{
3784 uint32_t len; 3784 uint32_t len;
3785 __be32 *p; 3785 __be32 *p;
3786 int ret = 0; 3786 int ret = 0;
3787 3787
3788 *uid = -2; 3788 *uid = make_kuid(&init_user_ns, -2);
3789 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 3789 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
3790 return -EIO; 3790 return -EIO;
3791 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { 3791 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
@@ -3813,7 +3813,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3813 __func__, len); 3813 __func__, len);
3814 bitmap[1] &= ~FATTR4_WORD1_OWNER; 3814 bitmap[1] &= ~FATTR4_WORD1_OWNER;
3815 } 3815 }
3816 dprintk("%s: uid=%d\n", __func__, (int)*uid); 3816 dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
3817 return ret; 3817 return ret;
3818out_overflow: 3818out_overflow:
3819 print_overflow_msg(__func__, xdr); 3819 print_overflow_msg(__func__, xdr);
@@ -3821,14 +3821,14 @@ out_overflow:
3821} 3821}
3822 3822
3823static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3823static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3824 const struct nfs_server *server, uint32_t *gid, 3824 const struct nfs_server *server, kgid_t *gid,
3825 struct nfs4_string *group_name) 3825 struct nfs4_string *group_name)
3826{ 3826{
3827 uint32_t len; 3827 uint32_t len;
3828 __be32 *p; 3828 __be32 *p;
3829 int ret = 0; 3829 int ret = 0;
3830 3830
3831 *gid = -2; 3831 *gid = make_kgid(&init_user_ns, -2);
3832 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 3832 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
3833 return -EIO; 3833 return -EIO;
3834 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { 3834 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
@@ -3856,7 +3856,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3856 __func__, len); 3856 __func__, len);
3857 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; 3857 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
3858 } 3858 }
3859 dprintk("%s: gid=%d\n", __func__, (int)*gid); 3859 dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
3860 return ret; 3860 return ret;
3861out_overflow: 3861out_overflow:
3862 print_overflow_msg(__func__, xdr); 3862 print_overflow_msg(__func__, xdr);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f89..88f9611a945c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -647,6 +647,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
647 .flags = PNFS_LAYOUTRET_ON_SETATTR | 647 .flags = PNFS_LAYOUTRET_ON_SETATTR |
648 PNFS_LAYOUTRET_ON_ERROR, 648 PNFS_LAYOUTRET_ON_ERROR,
649 649
650 .owner = THIS_MODULE,
650 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 651 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
651 .free_layout_hdr = objlayout_free_layout_hdr, 652 .free_layout_hdr = objlayout_free_layout_hdr,
652 653
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d00260b08103..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505} 505}
506EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 506EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507 507
508/* 508static bool
509 * Called by the state manger to remove all layouts established under an 509pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510 * expired lease. 510 struct list_head *layout_list)
511 */
512void
513pnfs_destroy_all_layouts(struct nfs_client *clp)
514{ 511{
515 struct nfs_server *server;
516 struct pnfs_layout_hdr *lo; 512 struct pnfs_layout_hdr *lo;
517 LIST_HEAD(tmp_list); 513 bool ret = false;
518 514
519 nfs4_deviceid_mark_client_invalid(clp); 515 spin_lock(&inode->i_lock);
520 nfs4_deviceid_purge_client(clp); 516 lo = NFS_I(inode)->layout;
517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518 pnfs_get_layout_hdr(lo);
519 list_add(&lo->plh_bulk_destroy, layout_list);
520 ret = true;
521 }
522 spin_unlock(&inode->i_lock);
523 return ret;
524}
525
526/* Caller must hold rcu_read_lock and clp->cl_lock */
527static int
528pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529 struct nfs_server *server,
530 struct list_head *layout_list)
531{
532 struct pnfs_layout_hdr *lo, *next;
533 struct inode *inode;
534
535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536 inode = igrab(lo->plh_inode);
537 if (inode == NULL)
538 continue;
539 list_del_init(&lo->plh_layouts);
540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541 continue;
542 rcu_read_unlock();
543 spin_unlock(&clp->cl_lock);
544 iput(inode);
545 spin_lock(&clp->cl_lock);
546 rcu_read_lock();
547 return -EAGAIN;
548 }
549 return 0;
550}
551
552static int
553pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554 bool is_bulk_recall)
555{
556 struct pnfs_layout_hdr *lo;
557 struct inode *inode;
558 struct pnfs_layout_range range = {
559 .iomode = IOMODE_ANY,
560 .offset = 0,
561 .length = NFS4_MAX_UINT64,
562 };
563 LIST_HEAD(lseg_list);
564 int ret = 0;
565
566 while (!list_empty(layout_list)) {
567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568 plh_bulk_destroy);
569 dprintk("%s freeing layout for inode %lu\n", __func__,
570 lo->plh_inode->i_ino);
571 inode = lo->plh_inode;
572 spin_lock(&inode->i_lock);
573 list_del_init(&lo->plh_bulk_destroy);
574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575 if (is_bulk_recall)
576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578 ret = -EAGAIN;
579 spin_unlock(&inode->i_lock);
580 pnfs_free_lseg_list(&lseg_list);
581 pnfs_put_layout_hdr(lo);
582 iput(inode);
583 }
584 return ret;
585}
586
587int
588pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589 struct nfs_fsid *fsid,
590 bool is_recall)
591{
592 struct nfs_server *server;
593 LIST_HEAD(layout_list);
521 594
522 spin_lock(&clp->cl_lock); 595 spin_lock(&clp->cl_lock);
523 rcu_read_lock(); 596 rcu_read_lock();
597restart:
524 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525 if (!list_empty(&server->layouts)) 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
526 list_splice_init(&server->layouts, &tmp_list); 600 continue;
601 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602 server,
603 &layout_list) != 0)
604 goto restart;
527 } 605 }
528 rcu_read_unlock(); 606 rcu_read_unlock();
529 spin_unlock(&clp->cl_lock); 607 spin_unlock(&clp->cl_lock);
530 608
531 while (!list_empty(&tmp_list)) { 609 if (list_empty(&layout_list))
532 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 610 return 0;
533 plh_layouts); 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
534 dprintk("%s freeing layout for inode %lu\n", __func__, 612}
535 lo->plh_inode->i_ino); 613
536 list_del_init(&lo->plh_layouts); 614int
537 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 615pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616 bool is_recall)
617{
618 struct nfs_server *server;
619 LIST_HEAD(layout_list);
620
621 spin_lock(&clp->cl_lock);
622 rcu_read_lock();
623restart:
624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626 server,
627 &layout_list) != 0)
628 goto restart;
538 } 629 }
630 rcu_read_unlock();
631 spin_unlock(&clp->cl_lock);
632
633 if (list_empty(&layout_list))
634 return 0;
635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636}
637
638/*
639 * Called by the state manger to remove all layouts established under an
640 * expired lease.
641 */
642void
643pnfs_destroy_all_layouts(struct nfs_client *clp)
644{
645 nfs4_deviceid_mark_client_invalid(clp);
646 nfs4_deviceid_purge_client(clp);
647
648 pnfs_destroy_layouts_byclid(clp, false);
539} 649}
540 650
541/* 651/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888 atomic_set(&lo->plh_refcount, 1); 998 atomic_set(&lo->plh_refcount, 1);
889 INIT_LIST_HEAD(&lo->plh_layouts); 999 INIT_LIST_HEAD(&lo->plh_layouts);
890 INIT_LIST_HEAD(&lo->plh_segs); 1000 INIT_LIST_HEAD(&lo->plh_segs);
891 INIT_LIST_HEAD(&lo->plh_bulk_recall); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy);
892 lo->plh_inode = ino; 1002 lo->plh_inode = ino;
893 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
894 return lo; 1004 return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132struct pnfs_layout_hdr { 132struct pnfs_layout_hdr {
133 atomic_t plh_refcount; 133 atomic_t plh_refcount;
134 struct list_head plh_layouts; /* other client layouts */ 134 struct list_head plh_layouts; /* other client layouts */
135 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ 135 struct list_head plh_bulk_destroy;
136 struct list_head plh_segs; /* layout segments list */ 136 struct list_head plh_segs; /* layout segments list */
137 nfs4_stateid plh_stateid; 137 nfs4_stateid plh_stateid;
138 atomic_t plh_outstanding; /* number of RPCs out */ 138 atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 196void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 197void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 198void pnfs_destroy_all_layouts(struct nfs_client *);
199int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200 struct nfs_fsid *fsid,
201 bool is_recall);
202int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203 bool is_recall);
199void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); 204void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 205void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 206 const nfs4_stateid *new,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5d..befbae0cce41 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,7 +54,6 @@
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
57#include <linux/kthread.h>
58 57
59#include <asm/uaccess.h> 58#include <asm/uaccess.h>
60 59
@@ -418,54 +417,6 @@ void nfs_sb_deactive(struct super_block *sb)
418} 417}
419EXPORT_SYMBOL_GPL(nfs_sb_deactive); 418EXPORT_SYMBOL_GPL(nfs_sb_deactive);
420 419
421static int nfs_deactivate_super_async_work(void *ptr)
422{
423 struct super_block *sb = ptr;
424
425 deactivate_super(sb);
426 module_put_and_exit(0);
427 return 0;
428}
429
430/*
431 * same effect as deactivate_super, but will do final unmount in kthread
432 * context
433 */
434static void nfs_deactivate_super_async(struct super_block *sb)
435{
436 struct task_struct *task;
437 char buf[INET6_ADDRSTRLEN + 1];
438 struct nfs_server *server = NFS_SB(sb);
439 struct nfs_client *clp = server->nfs_client;
440
441 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
442 rcu_read_lock();
443 snprintf(buf, sizeof(buf),
444 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
445 rcu_read_unlock();
446
447 __module_get(THIS_MODULE);
448 task = kthread_run(nfs_deactivate_super_async_work, sb,
449 "%s-deactivate-super", buf);
450 if (IS_ERR(task)) {
451 pr_err("%s: kthread_run: %ld\n",
452 __func__, PTR_ERR(task));
453 /* make synchronous call and hope for the best */
454 deactivate_super(sb);
455 module_put(THIS_MODULE);
456 }
457 }
458}
459
460void nfs_sb_deactive_async(struct super_block *sb)
461{
462 struct nfs_server *server = NFS_SB(sb);
463
464 if (atomic_dec_and_test(&server->active))
465 nfs_deactivate_super_async(sb);
466}
467EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
468
469/* 420/*
470 * Deliver file system statistics to userspace 421 * Deliver file system statistics to userspace
471 */ 422 */
@@ -2589,27 +2540,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2589 struct nfs_server *server; 2540 struct nfs_server *server;
2590 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2541 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2591 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; 2542 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
2592 int error;
2593 2543
2594 dprintk("--> nfs_xdev_mount_common()\n"); 2544 dprintk("--> nfs_xdev_mount()\n");
2595 2545
2596 mount_info.mntfh = mount_info.cloned->fh; 2546 mount_info.mntfh = mount_info.cloned->fh;
2597 2547
2598 /* create a new volume representation */ 2548 /* create a new volume representation */
2599 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2549 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2600 if (IS_ERR(server)) {
2601 error = PTR_ERR(server);
2602 goto out_err;
2603 }
2604 2550
2605 mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); 2551 if (IS_ERR(server))
2606 dprintk("<-- nfs_xdev_mount_common() = 0\n"); 2552 mntroot = ERR_CAST(server);
2607out: 2553 else
2608 return mntroot; 2554 mntroot = nfs_fs_mount_common(server, flags,
2555 dev_name, &mount_info, nfs_mod);
2609 2556
2610out_err: 2557 dprintk("<-- nfs_xdev_mount() = %ld\n",
2611 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); 2558 IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
2612 goto out; 2559 return mntroot;
2613} 2560}
2614 2561
2615#if IS_ENABLED(CONFIG_NFS_V4) 2562#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b8..d26a32f5b53b 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive_async(sb); 98 nfs_sb_deactive(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -268,8 +268,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
268 * point dentry is definitely not a root, so we won't need 268 * point dentry is definitely not a root, so we won't need
269 * that anymore. 269 * that anymore.
270 */ 270 */
271 if (devname_garbage) 271 kfree(devname_garbage);
272 kfree(devname_garbage);
273 return 0; 272 return 0;
274out_unlock: 273out_unlock:
275 spin_unlock(&dentry->d_lock); 274 spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 6940439bd609..ed628f71274c 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -38,8 +38,8 @@ struct nfsacl_encode_desc {
38 unsigned int count; 38 unsigned int count;
39 struct posix_acl *acl; 39 struct posix_acl *acl;
40 int typeflag; 40 int typeflag;
41 uid_t uid; 41 kuid_t uid;
42 gid_t gid; 42 kgid_t gid;
43}; 43};
44 44
45struct nfsacl_simple_acl { 45struct nfsacl_simple_acl {
@@ -60,14 +60,16 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
60 *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); 60 *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
61 switch(entry->e_tag) { 61 switch(entry->e_tag) {
62 case ACL_USER_OBJ: 62 case ACL_USER_OBJ:
63 *p++ = htonl(nfsacl_desc->uid); 63 *p++ = htonl(from_kuid(&init_user_ns, nfsacl_desc->uid));
64 break; 64 break;
65 case ACL_GROUP_OBJ: 65 case ACL_GROUP_OBJ:
66 *p++ = htonl(nfsacl_desc->gid); 66 *p++ = htonl(from_kgid(&init_user_ns, nfsacl_desc->gid));
67 break; 67 break;
68 case ACL_USER: 68 case ACL_USER:
69 *p++ = htonl(from_kuid(&init_user_ns, entry->e_uid));
70 break;
69 case ACL_GROUP: 71 case ACL_GROUP:
70 *p++ = htonl(entry->e_id); 72 *p++ = htonl(from_kgid(&init_user_ns, entry->e_gid));
71 break; 73 break;
72 default: /* Solaris depends on that! */ 74 default: /* Solaris depends on that! */
73 *p++ = 0; 75 *p++ = 0;
@@ -148,6 +150,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
148 (struct nfsacl_decode_desc *) desc; 150 (struct nfsacl_decode_desc *) desc;
149 __be32 *p = elem; 151 __be32 *p = elem;
150 struct posix_acl_entry *entry; 152 struct posix_acl_entry *entry;
153 unsigned int id;
151 154
152 if (!nfsacl_desc->acl) { 155 if (!nfsacl_desc->acl) {
153 if (desc->array_len > NFS_ACL_MAX_ENTRIES) 156 if (desc->array_len > NFS_ACL_MAX_ENTRIES)
@@ -160,14 +163,22 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
160 163
161 entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; 164 entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
162 entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT; 165 entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT;
163 entry->e_id = ntohl(*p++); 166 id = ntohl(*p++);
164 entry->e_perm = ntohl(*p++); 167 entry->e_perm = ntohl(*p++);
165 168
166 switch(entry->e_tag) { 169 switch(entry->e_tag) {
167 case ACL_USER_OBJ:
168 case ACL_USER: 170 case ACL_USER:
169 case ACL_GROUP_OBJ: 171 entry->e_uid = make_kuid(&init_user_ns, id);
172 if (!uid_valid(entry->e_uid))
173 return -EINVAL;
174 break;
170 case ACL_GROUP: 175 case ACL_GROUP:
176 entry->e_gid = make_kgid(&init_user_ns, id);
177 if (!gid_valid(entry->e_gid))
178 return -EINVAL;
179 break;
180 case ACL_USER_OBJ:
181 case ACL_GROUP_OBJ:
171 case ACL_OTHER: 182 case ACL_OTHER:
172 if (entry->e_perm & ~S_IRWXO) 183 if (entry->e_perm & ~S_IRWXO)
173 return -EINVAL; 184 return -EINVAL;
@@ -190,9 +201,13 @@ cmp_acl_entry(const void *x, const void *y)
190 201
191 if (a->e_tag != b->e_tag) 202 if (a->e_tag != b->e_tag)
192 return a->e_tag - b->e_tag; 203 return a->e_tag - b->e_tag;
193 else if (a->e_id > b->e_id) 204 else if ((a->e_tag == ACL_USER) && uid_gt(a->e_uid, b->e_uid))
205 return 1;
206 else if ((a->e_tag == ACL_USER) && uid_lt(a->e_uid, b->e_uid))
207 return -1;
208 else if ((a->e_tag == ACL_GROUP) && gid_gt(a->e_gid, b->e_gid))
194 return 1; 209 return 1;
195 else if (a->e_id < b->e_id) 210 else if ((a->e_tag == ACL_GROUP) && gid_lt(a->e_gid, b->e_gid))
196 return -1; 211 return -1;
197 else 212 else
198 return 0; 213 return 0;
@@ -213,22 +228,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
213 sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry), 228 sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry),
214 cmp_acl_entry, NULL); 229 cmp_acl_entry, NULL);
215 230
216 /* Clear undefined identifier fields and find the ACL_GROUP_OBJ 231 /* Find the ACL_GROUP_OBJ and ACL_MASK entries. */
217 and ACL_MASK entries. */
218 FOREACH_ACL_ENTRY(pa, acl, pe) { 232 FOREACH_ACL_ENTRY(pa, acl, pe) {
219 switch(pa->e_tag) { 233 switch(pa->e_tag) {
220 case ACL_USER_OBJ: 234 case ACL_USER_OBJ:
221 pa->e_id = ACL_UNDEFINED_ID;
222 break; 235 break;
223 case ACL_GROUP_OBJ: 236 case ACL_GROUP_OBJ:
224 pa->e_id = ACL_UNDEFINED_ID;
225 group_obj = pa; 237 group_obj = pa;
226 break; 238 break;
227 case ACL_MASK: 239 case ACL_MASK:
228 mask = pa; 240 mask = pa;
229 /* fall through */ 241 /* fall through */
230 case ACL_OTHER: 242 case ACL_OTHER:
231 pa->e_id = ACL_UNDEFINED_ID;
232 break; 243 break;
233 } 244 }
234 } 245 }
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 8df1ea4a6ff9..430b6872806f 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -65,8 +65,8 @@ config NFSD_V3_ACL
65 If unsure, say N. 65 If unsure, say N.
66 66
67config NFSD_V4 67config NFSD_V4
68 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" 68 bool "NFS server support for NFS version 4"
69 depends on NFSD && PROC_FS && EXPERIMENTAL 69 depends on NFSD && PROC_FS
70 select NFSD_V3 70 select NFSD_V3
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS 72 select SUNRPC_GSS
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 34e5c40af5ef..8b186a4955cc 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -44,8 +44,6 @@
44struct nfs4_acl *nfs4_acl_new(int); 44struct nfs4_acl *nfs4_acl_new(int);
45int nfs4_acl_get_whotype(char *, u32); 45int nfs4_acl_get_whotype(char *, u32);
46int nfs4_acl_write_who(int who, char *p); 46int nfs4_acl_write_who(int who, char *p);
47int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
48 uid_t who, u32 mask);
49 47
50#define NFS4_ACL_TYPE_DEFAULT 0x01 48#define NFS4_ACL_TYPE_DEFAULT 0x01
51#define NFS4_ACL_DIR 0x02 49#define NFS4_ACL_DIR 0x02
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 34a10d78b839..06cddd572264 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -47,9 +47,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
47 if (!gi) 47 if (!gi)
48 goto oom; 48 goto oom;
49 } else if (flags & NFSEXP_ROOTSQUASH) { 49 } else if (flags & NFSEXP_ROOTSQUASH) {
50 if (!new->fsuid) 50 if (uid_eq(new->fsuid, GLOBAL_ROOT_UID))
51 new->fsuid = exp->ex_anon_uid; 51 new->fsuid = exp->ex_anon_uid;
52 if (!new->fsgid) 52 if (gid_eq(new->fsgid, GLOBAL_ROOT_GID))
53 new->fsgid = exp->ex_anon_gid; 53 new->fsgid = exp->ex_anon_gid;
54 54
55 gi = groups_alloc(rqgi->ngroups); 55 gi = groups_alloc(rqgi->ngroups);
@@ -58,7 +58,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
58 58
59 for (i = 0; i < rqgi->ngroups; i++) { 59 for (i = 0; i < rqgi->ngroups; i++) {
60 if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i))) 60 if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i)))
61 GROUP_AT(gi, i) = make_kgid(&init_user_ns, exp->ex_anon_gid); 61 GROUP_AT(gi, i) = exp->ex_anon_gid;
62 else 62 else
63 GROUP_AT(gi, i) = GROUP_AT(rqgi, i); 63 GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
64 } 64 }
@@ -66,9 +66,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
66 gi = get_group_info(rqgi); 66 gi = get_group_info(rqgi);
67 } 67 }
68 68
69 if (new->fsuid == (uid_t) -1) 69 if (uid_eq(new->fsuid, INVALID_UID))
70 new->fsuid = exp->ex_anon_uid; 70 new->fsuid = exp->ex_anon_uid;
71 if (new->fsgid == (gid_t) -1) 71 if (gid_eq(new->fsgid, INVALID_GID))
72 new->fsgid = exp->ex_anon_gid; 72 new->fsgid = exp->ex_anon_gid;
73 73
74 ret = set_groups(new, gi); 74 ret = set_groups(new, gi);
@@ -76,7 +76,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
76 if (ret < 0) 76 if (ret < 0)
77 goto error; 77 goto error;
78 78
79 if (new->fsuid) 79 if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID))
80 new->cap_effective = cap_drop_nfsd_set(new->cap_effective); 80 new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
81 else 81 else
82 new->cap_effective = cap_raise_nfsd_set(new->cap_effective, 82 new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
index 78b3c0e93822..53325a12ba62 100644
--- a/fs/nfsd/auth.h
+++ b/fs/nfsd/auth.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * nfsd-specific authentication stuff. 2 * nfsd-specific authentication stuff.
3 * uid/gid mapping not yet implemented.
4 * 3 *
5 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
6 */ 5 */
@@ -8,11 +7,6 @@
8#ifndef LINUX_NFSD_AUTH_H 7#ifndef LINUX_NFSD_AUTH_H
9#define LINUX_NFSD_AUTH_H 8#define LINUX_NFSD_AUTH_H
10 9
11#define nfsd_luid(rq, uid) ((u32)(uid))
12#define nfsd_lgid(rq, gid) ((u32)(gid))
13#define nfsd_ruid(rq, uid) ((u32)(uid))
14#define nfsd_rgid(rq, gid) ((u32)(gid))
15
16/* 10/*
17 * Set the current process's fsuid/fsgid etc to those of the NFS 11 * Set the current process's fsuid/fsgid etc to those of the NFS
18 * client user 12 * client user
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index a3946cf13fc8..5681c5906f08 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -544,13 +544,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
544 err = get_int(&mesg, &an_int); 544 err = get_int(&mesg, &an_int);
545 if (err) 545 if (err)
546 goto out3; 546 goto out3;
547 exp.ex_anon_uid= an_int; 547 exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
548 if (!uid_valid(exp.ex_anon_uid))
549 goto out3;
548 550
549 /* anon gid */ 551 /* anon gid */
550 err = get_int(&mesg, &an_int); 552 err = get_int(&mesg, &an_int);
551 if (err) 553 if (err)
552 goto out3; 554 goto out3;
553 exp.ex_anon_gid= an_int; 555 exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
556 if (!gid_valid(exp.ex_anon_gid))
557 goto out3;
554 558
555 /* fsid */ 559 /* fsid */
556 err = get_int(&mesg, &an_int); 560 err = get_int(&mesg, &an_int);
@@ -613,7 +617,7 @@ out:
613} 617}
614 618
615static void exp_flags(struct seq_file *m, int flag, int fsid, 619static void exp_flags(struct seq_file *m, int flag, int fsid,
616 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); 620 kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs);
617static void show_secinfo(struct seq_file *m, struct svc_export *exp); 621static void show_secinfo(struct seq_file *m, struct svc_export *exp);
618 622
619static int svc_export_show(struct seq_file *m, 623static int svc_export_show(struct seq_file *m,
@@ -1179,15 +1183,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp)
1179} 1183}
1180 1184
1181static void exp_flags(struct seq_file *m, int flag, int fsid, 1185static void exp_flags(struct seq_file *m, int flag, int fsid,
1182 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) 1186 kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
1183{ 1187{
1184 show_expflags(m, flag, NFSEXP_ALLFLAGS); 1188 show_expflags(m, flag, NFSEXP_ALLFLAGS);
1185 if (flag & NFSEXP_FSID) 1189 if (flag & NFSEXP_FSID)
1186 seq_printf(m, ",fsid=%d", fsid); 1190 seq_printf(m, ",fsid=%d", fsid);
1187 if (anonu != (uid_t)-2 && anonu != (0x10000-2)) 1191 if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
1188 seq_printf(m, ",anonuid=%u", anonu); 1192 !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
1189 if (anong != (gid_t)-2 && anong != (0x10000-2)) 1193 seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
1190 seq_printf(m, ",anongid=%u", anong); 1194 if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
1195 !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
1196 seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
1191 if (fsloc && fsloc->locations_count > 0) { 1197 if (fsloc && fsloc->locations_count > 0) {
1192 char *loctype = (fsloc->migrated) ? "refer" : "replicas"; 1198 char *loctype = (fsloc->migrated) ? "refer" : "replicas";
1193 int i; 1199 int i;
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index 9d513efc01ba..bf95f6b817a4 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -54,9 +54,9 @@ static inline void nfsd_idmap_shutdown(struct net *net)
54} 54}
55#endif 55#endif
56 56
57__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); 57__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
58__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); 58__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
59int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); 59int nfsd_map_uid_to_name(struct svc_rqst *, kuid_t, char *);
60int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); 60int nfsd_map_gid_to_name(struct svc_rqst *, kgid_t, char *);
61 61
62#endif /* LINUX_NFSD_IDMAP_H */ 62#endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 324c0baf7cda..925c944bc0bc 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -105,12 +105,14 @@ decode_sattr3(__be32 *p, struct iattr *iap)
105 iap->ia_mode = ntohl(*p++); 105 iap->ia_mode = ntohl(*p++);
106 } 106 }
107 if (*p++) { 107 if (*p++) {
108 iap->ia_valid |= ATTR_UID; 108 iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
109 iap->ia_uid = ntohl(*p++); 109 if (uid_valid(iap->ia_uid))
110 iap->ia_valid |= ATTR_UID;
110 } 111 }
111 if (*p++) { 112 if (*p++) {
112 iap->ia_valid |= ATTR_GID; 113 iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
113 iap->ia_gid = ntohl(*p++); 114 if (gid_valid(iap->ia_gid))
115 iap->ia_valid |= ATTR_GID;
114 } 116 }
115 if (*p++) { 117 if (*p++) {
116 u64 newsize; 118 u64 newsize;
@@ -167,8 +169,8 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
167 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); 169 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
168 *p++ = htonl((u32) stat->mode); 170 *p++ = htonl((u32) stat->mode);
169 *p++ = htonl((u32) stat->nlink); 171 *p++ = htonl((u32) stat->nlink);
170 *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); 172 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
171 *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); 173 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
172 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { 174 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
173 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); 175 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
174 } else { 176 } else {
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 9c51aff02ae2..8a50b3c18093 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -264,7 +264,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
264 ace->flag = eflag; 264 ace->flag = eflag;
265 ace->access_mask = deny_mask_from_posix(deny, flags); 265 ace->access_mask = deny_mask_from_posix(deny, flags);
266 ace->whotype = NFS4_ACL_WHO_NAMED; 266 ace->whotype = NFS4_ACL_WHO_NAMED;
267 ace->who = pa->e_id; 267 ace->who_uid = pa->e_uid;
268 ace++; 268 ace++;
269 acl->naces++; 269 acl->naces++;
270 } 270 }
@@ -273,7 +273,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
273 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, 273 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
274 flags); 274 flags);
275 ace->whotype = NFS4_ACL_WHO_NAMED; 275 ace->whotype = NFS4_ACL_WHO_NAMED;
276 ace->who = pa->e_id; 276 ace->who_uid = pa->e_uid;
277 ace++; 277 ace++;
278 acl->naces++; 278 acl->naces++;
279 pa++; 279 pa++;
@@ -300,7 +300,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
300 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, 300 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
301 flags); 301 flags);
302 ace->whotype = NFS4_ACL_WHO_NAMED; 302 ace->whotype = NFS4_ACL_WHO_NAMED;
303 ace->who = pa->e_id; 303 ace->who_gid = pa->e_gid;
304 ace++; 304 ace++;
305 acl->naces++; 305 acl->naces++;
306 pa++; 306 pa++;
@@ -329,7 +329,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
329 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 329 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
330 ace->access_mask = deny_mask_from_posix(deny, flags); 330 ace->access_mask = deny_mask_from_posix(deny, flags);
331 ace->whotype = NFS4_ACL_WHO_NAMED; 331 ace->whotype = NFS4_ACL_WHO_NAMED;
332 ace->who = pa->e_id; 332 ace->who_gid = pa->e_gid;
333 ace++; 333 ace++;
334 acl->naces++; 334 acl->naces++;
335 } 335 }
@@ -345,6 +345,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
345 acl->naces++; 345 acl->naces++;
346} 346}
347 347
348static bool
349pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
350{
351 if (pace1->e_tag != pace2->e_tag)
352 return pace1->e_tag > pace2->e_tag;
353 if (pace1->e_tag == ACL_USER)
354 return uid_gt(pace1->e_uid, pace2->e_uid);
355 if (pace1->e_tag == ACL_GROUP)
356 return gid_gt(pace1->e_gid, pace2->e_gid);
357 return false;
358}
359
348static void 360static void
349sort_pacl_range(struct posix_acl *pacl, int start, int end) { 361sort_pacl_range(struct posix_acl *pacl, int start, int end) {
350 int sorted = 0, i; 362 int sorted = 0, i;
@@ -355,8 +367,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
355 while (!sorted) { 367 while (!sorted) {
356 sorted = 1; 368 sorted = 1;
357 for (i = start; i < end; i++) { 369 for (i = start; i < end; i++) {
358 if (pacl->a_entries[i].e_id 370 if (pace_gt(&pacl->a_entries[i],
359 > pacl->a_entries[i+1].e_id) { 371 &pacl->a_entries[i+1])) {
360 sorted = 0; 372 sorted = 0;
361 tmp = pacl->a_entries[i]; 373 tmp = pacl->a_entries[i];
362 pacl->a_entries[i] = pacl->a_entries[i+1]; 374 pacl->a_entries[i] = pacl->a_entries[i+1];
@@ -398,7 +410,10 @@ struct posix_ace_state {
398}; 410};
399 411
400struct posix_user_ace_state { 412struct posix_user_ace_state {
401 uid_t uid; 413 union {
414 kuid_t uid;
415 kgid_t gid;
416 };
402 struct posix_ace_state perms; 417 struct posix_ace_state perms;
403}; 418};
404 419
@@ -521,7 +536,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
521 if (error) 536 if (error)
522 goto out_err; 537 goto out_err;
523 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); 538 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
524 pace->e_id = ACL_UNDEFINED_ID;
525 539
526 for (i=0; i < state->users->n; i++) { 540 for (i=0; i < state->users->n; i++) {
527 pace++; 541 pace++;
@@ -531,7 +545,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
531 goto out_err; 545 goto out_err;
532 low_mode_from_nfs4(state->users->aces[i].perms.allow, 546 low_mode_from_nfs4(state->users->aces[i].perms.allow,
533 &pace->e_perm, flags); 547 &pace->e_perm, flags);
534 pace->e_id = state->users->aces[i].uid; 548 pace->e_uid = state->users->aces[i].uid;
535 add_to_mask(state, &state->users->aces[i].perms); 549 add_to_mask(state, &state->users->aces[i].perms);
536 } 550 }
537 551
@@ -541,7 +555,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
541 if (error) 555 if (error)
542 goto out_err; 556 goto out_err;
543 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); 557 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
544 pace->e_id = ACL_UNDEFINED_ID;
545 add_to_mask(state, &state->group); 558 add_to_mask(state, &state->group);
546 559
547 for (i=0; i < state->groups->n; i++) { 560 for (i=0; i < state->groups->n; i++) {
@@ -552,14 +565,13 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
552 goto out_err; 565 goto out_err;
553 low_mode_from_nfs4(state->groups->aces[i].perms.allow, 566 low_mode_from_nfs4(state->groups->aces[i].perms.allow,
554 &pace->e_perm, flags); 567 &pace->e_perm, flags);
555 pace->e_id = state->groups->aces[i].uid; 568 pace->e_gid = state->groups->aces[i].gid;
556 add_to_mask(state, &state->groups->aces[i].perms); 569 add_to_mask(state, &state->groups->aces[i].perms);
557 } 570 }
558 571
559 pace++; 572 pace++;
560 pace->e_tag = ACL_MASK; 573 pace->e_tag = ACL_MASK;
561 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); 574 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
562 pace->e_id = ACL_UNDEFINED_ID;
563 575
564 pace++; 576 pace++;
565 pace->e_tag = ACL_OTHER; 577 pace->e_tag = ACL_OTHER;
@@ -567,7 +579,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
567 if (error) 579 if (error)
568 goto out_err; 580 goto out_err;
569 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); 581 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
570 pace->e_id = ACL_UNDEFINED_ID;
571 582
572 return pacl; 583 return pacl;
573out_err: 584out_err:
@@ -587,12 +598,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
587 astate->deny |= mask & ~astate->allow; 598 astate->deny |= mask & ~astate->allow;
588} 599}
589 600
590static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) 601static int find_uid(struct posix_acl_state *state, kuid_t uid)
591{ 602{
603 struct posix_ace_state_array *a = state->users;
592 int i; 604 int i;
593 605
594 for (i = 0; i < a->n; i++) 606 for (i = 0; i < a->n; i++)
595 if (a->aces[i].uid == uid) 607 if (uid_eq(a->aces[i].uid, uid))
596 return i; 608 return i;
597 /* Not found: */ 609 /* Not found: */
598 a->n++; 610 a->n++;
@@ -603,6 +615,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array
603 return i; 615 return i;
604} 616}
605 617
618static int find_gid(struct posix_acl_state *state, kgid_t gid)
619{
620 struct posix_ace_state_array *a = state->groups;
621 int i;
622
623 for (i = 0; i < a->n; i++)
624 if (gid_eq(a->aces[i].gid, gid))
625 return i;
626 /* Not found: */
627 a->n++;
628 a->aces[i].gid = gid;
629 a->aces[i].perms.allow = state->everyone.allow;
630 a->aces[i].perms.deny = state->everyone.deny;
631
632 return i;
633}
634
606static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) 635static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
607{ 636{
608 int i; 637 int i;
@@ -636,7 +665,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
636 } 665 }
637 break; 666 break;
638 case ACL_USER: 667 case ACL_USER:
639 i = find_uid(state, state->users, ace->who); 668 i = find_uid(state, ace->who_uid);
640 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { 669 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
641 allow_bits(&state->users->aces[i].perms, mask); 670 allow_bits(&state->users->aces[i].perms, mask);
642 } else { 671 } else {
@@ -658,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
658 } 687 }
659 break; 688 break;
660 case ACL_GROUP: 689 case ACL_GROUP:
661 i = find_uid(state, state->groups, ace->who); 690 i = find_gid(state, ace->who_gid);
662 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { 691 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
663 allow_bits(&state->groups->aces[i].perms, mask); 692 allow_bits(&state->groups->aces[i].perms, mask);
664 } else { 693 } else {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index a1f10c0a6255..0ce12346df9c 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -65,7 +65,7 @@ MODULE_PARM_DESC(nfs4_disable_idmapping,
65struct ent { 65struct ent {
66 struct cache_head h; 66 struct cache_head h;
67 int type; /* User / Group */ 67 int type; /* User / Group */
68 uid_t id; 68 u32 id;
69 char name[IDMAP_NAMESZ]; 69 char name[IDMAP_NAMESZ];
70 char authname[IDMAP_NAMESZ]; 70 char authname[IDMAP_NAMESZ];
71}; 71};
@@ -540,7 +540,7 @@ rqst_authname(struct svc_rqst *rqstp)
540 540
541static __be32 541static __be32
542idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, 542idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
543 uid_t *id) 543 u32 *id)
544{ 544{
545 struct ent *item, key = { 545 struct ent *item, key = {
546 .type = type, 546 .type = type,
@@ -564,7 +564,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
564} 564}
565 565
566static int 566static int
567idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) 567idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
568{ 568{
569 struct ent *item, key = { 569 struct ent *item, key = {
570 .id = id, 570 .id = id,
@@ -587,7 +587,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
587} 587}
588 588
589static bool 589static bool
590numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) 590numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id)
591{ 591{
592 int ret; 592 int ret;
593 char buf[11]; 593 char buf[11];
@@ -603,7 +603,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel
603} 603}
604 604
605static __be32 605static __be32
606do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) 606do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id)
607{ 607{
608 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) 608 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
609 if (numeric_name_to_id(rqstp, type, name, namelen, id)) 609 if (numeric_name_to_id(rqstp, type, name, namelen, id))
@@ -616,7 +616,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
616} 616}
617 617
618static int 618static int
619do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) 619do_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
620{ 620{
621 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) 621 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
622 return sprintf(name, "%u", id); 622 return sprintf(name, "%u", id);
@@ -625,26 +625,40 @@ do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
625 625
626__be32 626__be32
627nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, 627nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
628 __u32 *id) 628 kuid_t *uid)
629{ 629{
630 return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); 630 __be32 status;
631 u32 id = -1;
632 status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
633 *uid = make_kuid(&init_user_ns, id);
634 if (!uid_valid(*uid))
635 status = nfserr_badowner;
636 return status;
631} 637}
632 638
633__be32 639__be32
634nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, 640nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
635 __u32 *id) 641 kgid_t *gid)
636{ 642{
637 return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); 643 __be32 status;
644 u32 id = -1;
645 status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
646 *gid = make_kgid(&init_user_ns, id);
647 if (!gid_valid(*gid))
648 status = nfserr_badowner;
649 return status;
638} 650}
639 651
640int 652int
641nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) 653nfsd_map_uid_to_name(struct svc_rqst *rqstp, kuid_t uid, char *name)
642{ 654{
655 u32 id = from_kuid(&init_user_ns, uid);
643 return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); 656 return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
644} 657}
645 658
646int 659int
647nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) 660nfsd_map_gid_to_name(struct svc_rqst *rqstp, kgid_t gid, char *name)
648{ 661{
662 u32 id = from_kgid(&init_user_ns, gid);
649 return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); 663 return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
650} 664}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ba6fdd4a0455..4914af4a817e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -73,8 +73,8 @@ nfs4_save_creds(const struct cred **original_creds)
73 if (!new) 73 if (!new)
74 return -ENOMEM; 74 return -ENOMEM;
75 75
76 new->fsuid = 0; 76 new->fsuid = GLOBAL_ROOT_UID;
77 new->fsgid = 0; 77 new->fsgid = GLOBAL_ROOT_GID;
78 *original_creds = override_creds(new); 78 *original_creds = override_creds(new);
79 put_cred(new); 79 put_cred(new);
80 return 0; 80 return 0;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ac8ed96c4199..9e7103b6e0ad 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -151,7 +151,7 @@ get_nfs4_file(struct nfs4_file *fi)
151} 151}
152 152
153static int num_delegations; 153static int num_delegations;
154unsigned int max_delegations; 154unsigned long max_delegations;
155 155
156/* 156/*
157 * Open owner state (share locks) 157 * Open owner state (share locks)
@@ -700,8 +700,8 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num)
700 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); 700 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
701 701
702 spin_lock(&nfsd_drc_lock); 702 spin_lock(&nfsd_drc_lock);
703 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, 703 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
704 nfsd_drc_max_mem - nfsd_drc_mem_used); 704 nfsd_drc_max_mem - nfsd_drc_mem_used);
705 num = min_t(int, num, avail / slotsize); 705 num = min_t(int, num, avail / slotsize);
706 nfsd_drc_mem_used += num * slotsize; 706 nfsd_drc_mem_used += num * slotsize;
707 spin_unlock(&nfsd_drc_lock); 707 spin_unlock(&nfsd_drc_lock);
@@ -1202,7 +1202,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
1202 if (g1->ngroups != g2->ngroups) 1202 if (g1->ngroups != g2->ngroups)
1203 return false; 1203 return false;
1204 for (i=0; i<g1->ngroups; i++) 1204 for (i=0; i<g1->ngroups; i++)
1205 if (GROUP_AT(g1, i) != GROUP_AT(g2, i)) 1205 if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
1206 return false; 1206 return false;
1207 return true; 1207 return true;
1208} 1208}
@@ -1227,8 +1227,8 @@ static bool
1227same_creds(struct svc_cred *cr1, struct svc_cred *cr2) 1227same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
1228{ 1228{
1229 if ((is_gss_cred(cr1) != is_gss_cred(cr2)) 1229 if ((is_gss_cred(cr1) != is_gss_cred(cr2))
1230 || (cr1->cr_uid != cr2->cr_uid) 1230 || (!uid_eq(cr1->cr_uid, cr2->cr_uid))
1231 || (cr1->cr_gid != cr2->cr_gid) 1231 || (!gid_eq(cr1->cr_gid, cr2->cr_gid))
1232 || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) 1232 || !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
1233 return false; 1233 return false;
1234 if (cr1->cr_principal == cr2->cr_principal) 1234 if (cr1->cr_principal == cr2->cr_principal)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0dc11586682f..2d1d06bae3a7 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -293,13 +293,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
293 ace->whotype = nfs4_acl_get_whotype(buf, dummy32); 293 ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
294 status = nfs_ok; 294 status = nfs_ok;
295 if (ace->whotype != NFS4_ACL_WHO_NAMED) 295 if (ace->whotype != NFS4_ACL_WHO_NAMED)
296 ace->who = 0; 296 ;
297 else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) 297 else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
298 status = nfsd_map_name_to_gid(argp->rqstp, 298 status = nfsd_map_name_to_gid(argp->rqstp,
299 buf, dummy32, &ace->who); 299 buf, dummy32, &ace->who_gid);
300 else 300 else
301 status = nfsd_map_name_to_uid(argp->rqstp, 301 status = nfsd_map_name_to_uid(argp->rqstp,
302 buf, dummy32, &ace->who); 302 buf, dummy32, &ace->who_uid);
303 if (status) 303 if (status)
304 return status; 304 return status;
305 } 305 }
@@ -464,9 +464,16 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
464 READ32(dummy); 464 READ32(dummy);
465 READ_BUF(dummy * 4); 465 READ_BUF(dummy * 4);
466 if (cbs->flavor == (u32)(-1)) { 466 if (cbs->flavor == (u32)(-1)) {
467 cbs->uid = uid; 467 kuid_t kuid = make_kuid(&init_user_ns, uid);
468 cbs->gid = gid; 468 kgid_t kgid = make_kgid(&init_user_ns, gid);
469 cbs->flavor = RPC_AUTH_UNIX; 469 if (uid_valid(kuid) && gid_valid(kgid)) {
470 cbs->uid = kuid;
471 cbs->gid = kgid;
472 cbs->flavor = RPC_AUTH_UNIX;
473 } else {
474 dprintk("RPC_AUTH_UNIX with invalid"
475 "uid or gid ignoring!\n");
476 }
470 } 477 }
471 break; 478 break;
472 case RPC_AUTH_GSS: 479 case RPC_AUTH_GSS:
@@ -1926,7 +1933,7 @@ static u32 nfs4_file_type(umode_t mode)
1926} 1933}
1927 1934
1928static __be32 1935static __be32
1929nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, 1936nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid,
1930 __be32 **p, int *buflen) 1937 __be32 **p, int *buflen)
1931{ 1938{
1932 int status; 1939 int status;
@@ -1935,10 +1942,10 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
1935 return nfserr_resource; 1942 return nfserr_resource;
1936 if (whotype != NFS4_ACL_WHO_NAMED) 1943 if (whotype != NFS4_ACL_WHO_NAMED)
1937 status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); 1944 status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
1938 else if (group) 1945 else if (gid_valid(gid))
1939 status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); 1946 status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1));
1940 else 1947 else
1941 status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); 1948 status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1));
1942 if (status < 0) 1949 if (status < 0)
1943 return nfserrno(status); 1950 return nfserrno(status);
1944 *p = xdr_encode_opaque(*p, NULL, status); 1951 *p = xdr_encode_opaque(*p, NULL, status);
@@ -1948,22 +1955,33 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
1948} 1955}
1949 1956
1950static inline __be32 1957static inline __be32
1951nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen) 1958nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen)
1952{ 1959{
1953 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); 1960 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID,
1961 p, buflen);
1954} 1962}
1955 1963
1956static inline __be32 1964static inline __be32
1957nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen) 1965nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen)
1958{ 1966{
1959 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); 1967 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group,
1968 p, buflen);
1960} 1969}
1961 1970
1962static inline __be32 1971static inline __be32
1963nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, 1972nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
1964 __be32 **p, int *buflen) 1973 __be32 **p, int *buflen)
1965{ 1974{
1966 return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); 1975 kuid_t uid = INVALID_UID;
1976 kgid_t gid = INVALID_GID;
1977
1978 if (ace->whotype == NFS4_ACL_WHO_NAMED) {
1979 if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
1980 gid = ace->who_gid;
1981 else
1982 uid = ace->who_uid;
1983 }
1984 return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen);
1967} 1985}
1968 1986
1969#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ 1987#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -2224,9 +2242,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2224 WRITE32(ace->type); 2242 WRITE32(ace->type);
2225 WRITE32(ace->flag); 2243 WRITE32(ace->flag);
2226 WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); 2244 WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
2227 status = nfsd4_encode_aclname(rqstp, ace->whotype, 2245 status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
2228 ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
2229 &p, &buflen);
2230 if (status == nfserr_resource) 2246 if (status == nfserr_resource)
2231 goto out_resource; 2247 goto out_resource;
2232 if (status) 2248 if (status)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index de23db255c69..07a473fd49bc 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -56,8 +56,8 @@ extern struct svc_version nfsd_version2, nfsd_version3,
56extern u32 nfsd_supported_minorversion; 56extern u32 nfsd_supported_minorversion;
57extern struct mutex nfsd_mutex; 57extern struct mutex nfsd_mutex;
58extern spinlock_t nfsd_drc_lock; 58extern spinlock_t nfsd_drc_lock;
59extern unsigned int nfsd_drc_max_mem; 59extern unsigned long nfsd_drc_max_mem;
60extern unsigned int nfsd_drc_mem_used; 60extern unsigned long nfsd_drc_mem_used;
61 61
62extern const struct seq_operations nfs_exports_op; 62extern const struct seq_operations nfs_exports_op;
63 63
@@ -106,7 +106,7 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
106 * NFSv4 State 106 * NFSv4 State
107 */ 107 */
108#ifdef CONFIG_NFSD_V4 108#ifdef CONFIG_NFSD_V4
109extern unsigned int max_delegations; 109extern unsigned long max_delegations;
110void nfs4_state_init(void); 110void nfs4_state_init(void);
111int nfsd4_init_slabs(void); 111int nfsd4_init_slabs(void);
112void nfsd4_free_slabs(void); 112void nfsd4_free_slabs(void);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cee62ab9d4a3..be7af509930c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -59,8 +59,8 @@ DEFINE_MUTEX(nfsd_mutex);
59 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. 59 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
60 */ 60 */
61spinlock_t nfsd_drc_lock; 61spinlock_t nfsd_drc_lock;
62unsigned int nfsd_drc_max_mem; 62unsigned long nfsd_drc_max_mem;
63unsigned int nfsd_drc_mem_used; 63unsigned long nfsd_drc_mem_used;
64 64
65#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 65#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
66static struct svc_stat nfsd_acl_svcstats; 66static struct svc_stat nfsd_acl_svcstats;
@@ -342,7 +342,7 @@ static void set_max_drc(void)
342 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; 342 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
343 nfsd_drc_mem_used = 0; 343 nfsd_drc_mem_used = 0;
344 spin_lock_init(&nfsd_drc_lock); 344 spin_lock_init(&nfsd_drc_lock);
345 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); 345 dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
346} 346}
347 347
348static int nfsd_get_default_max_blksize(void) 348static int nfsd_get_default_max_blksize(void)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 979b42106979..4201ede0ec91 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -100,12 +100,14 @@ decode_sattr(__be32 *p, struct iattr *iap)
100 iap->ia_mode = tmp; 100 iap->ia_mode = tmp;
101 } 101 }
102 if ((tmp = ntohl(*p++)) != (u32)-1) { 102 if ((tmp = ntohl(*p++)) != (u32)-1) {
103 iap->ia_valid |= ATTR_UID; 103 iap->ia_uid = make_kuid(&init_user_ns, tmp);
104 iap->ia_uid = tmp; 104 if (uid_valid(iap->ia_uid))
105 iap->ia_valid |= ATTR_UID;
105 } 106 }
106 if ((tmp = ntohl(*p++)) != (u32)-1) { 107 if ((tmp = ntohl(*p++)) != (u32)-1) {
107 iap->ia_valid |= ATTR_GID; 108 iap->ia_gid = make_kgid(&init_user_ns, tmp);
108 iap->ia_gid = tmp; 109 if (gid_valid(iap->ia_gid))
110 iap->ia_valid |= ATTR_GID;
109 } 111 }
110 if ((tmp = ntohl(*p++)) != (u32)-1) { 112 if ((tmp = ntohl(*p++)) != (u32)-1) {
111 iap->ia_valid |= ATTR_SIZE; 113 iap->ia_valid |= ATTR_SIZE;
@@ -151,8 +153,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
151 *p++ = htonl(nfs_ftypes[type >> 12]); 153 *p++ = htonl(nfs_ftypes[type >> 12]);
152 *p++ = htonl((u32) stat->mode); 154 *p++ = htonl((u32) stat->mode);
153 *p++ = htonl((u32) stat->nlink); 155 *p++ = htonl((u32) stat->nlink);
154 *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); 156 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
155 *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); 157 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
156 158
157 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { 159 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
158 *p++ = htonl(NFS_MAXPATHLEN); 160 *p++ = htonl(NFS_MAXPATHLEN);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d1c229feed52..1a8c7391f7ae 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -152,8 +152,8 @@ struct nfsd4_channel_attrs {
152 152
153struct nfsd4_cb_sec { 153struct nfsd4_cb_sec {
154 u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ 154 u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */
155 u32 uid; 155 kuid_t uid;
156 u32 gid; 156 kgid_t gid;
157}; 157};
158 158
159struct nfsd4_create_session { 159struct nfsd4_create_session {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d586117fa94a..31ff1d642e31 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -401,8 +401,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
401 401
402 /* Revoke setuid/setgid on chown */ 402 /* Revoke setuid/setgid on chown */
403 if (!S_ISDIR(inode->i_mode) && 403 if (!S_ISDIR(inode->i_mode) &&
404 (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || 404 (((iap->ia_valid & ATTR_UID) && !uid_eq(iap->ia_uid, inode->i_uid)) ||
405 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { 405 ((iap->ia_valid & ATTR_GID) && !gid_eq(iap->ia_gid, inode->i_gid)))) {
406 iap->ia_valid |= ATTR_KILL_PRIV; 406 iap->ia_valid |= ATTR_KILL_PRIV;
407 if (iap->ia_valid & ATTR_MODE) { 407 if (iap->ia_valid & ATTR_MODE) {
408 /* we're setting mode too, just clear the s*id bits */ 408 /* we're setting mode too, just clear the s*id bits */
@@ -1205,7 +1205,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1205 * send along the gid on create when it tries to implement 1205 * send along the gid on create when it tries to implement
1206 * setgid directories via NFS: 1206 * setgid directories via NFS:
1207 */ 1207 */
1208 if (current_fsuid() != 0) 1208 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
1209 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1209 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1210 if (iap->ia_valid) 1210 if (iap->ia_valid)
1211 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1211 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
@@ -2150,7 +2150,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2150 * with NFSv3. 2150 * with NFSv3.
2151 */ 2151 */
2152 if ((acc & NFSD_MAY_OWNER_OVERRIDE) && 2152 if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
2153 inode->i_uid == current_fsuid()) 2153 uid_eq(inode->i_uid, current_fsuid()))
2154 return 0; 2154 return 0;
2155 2155
2156 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ 2156 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 251da07b2a1d..80da8eb27393 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,5 @@
1config NILFS2_FS 1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support"
3 depends on EXPERIMENTAL
4 select CRC32 3 select CRC32
5 help 4 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous 5 NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..bec4af6eab13 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
126 nilfs_transaction_commit(inode->i_sb); 126 nilfs_transaction_commit(inode->i_sb);
127 127
128 mapped: 128 mapped:
129 wait_on_page_writeback(page); 129 wait_for_stable_page(page);
130 out: 130 out:
131 sb_end_pagefault(inode->i_sb); 131 sb_end_pagefault(inode->i_sb);
132 return block_page_mkwrite_return(ret); 132 return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..f3859354e41a 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
664 if (ret < 0) 664 if (ret < 0)
665 printk(KERN_ERR "NILFS: GC failed during preparation: " 665 printk(KERN_ERR "NILFS: GC failed during preparation: "
666 "cannot read source blocks: err=%d\n", ret); 666 "cannot read source blocks: err=%d\n", ret);
667 else 667 else {
668 if (nilfs_sb_need_update(nilfs))
669 set_nilfs_discontinued(nilfs);
668 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 670 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
671 }
669 672
670 nilfs_remove_all_gcinodes(nilfs); 673 nilfs_remove_all_gcinodes(nilfs);
671 clear_nilfs_gc_running(nilfs); 674 clear_nilfs_gc_running(nilfs);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 228a2c2ad8d7..07f7a92fe88e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -576,8 +576,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
576 576
577 /* don't allow invalid bits: we don't want flags set */ 577 /* don't allow invalid bits: we don't want flags set */
578 mask = inotify_arg_to_mask(arg); 578 mask = inotify_arg_to_mask(arg);
579 if (unlikely(!(mask & IN_ALL_EVENTS)))
580 return -EINVAL;
581 579
582 fsn_mark = fsnotify_find_inode_mark(group, inode); 580 fsn_mark = fsnotify_find_inode_mark(group, inode);
583 if (!fsn_mark) 581 if (!fsn_mark)
@@ -629,8 +627,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
629 627
630 /* don't allow invalid bits: we don't want flags set */ 628 /* don't allow invalid bits: we don't want flags set */
631 mask = inotify_arg_to_mask(arg); 629 mask = inotify_arg_to_mask(arg);
632 if (unlikely(!(mask & IN_ALL_EVENTS)))
633 return -EINVAL;
634 630
635 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 631 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
636 if (unlikely(!tmp_i_mark)) 632 if (unlikely(!tmp_i_mark))
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 260b16281fc3..8a404576fb26 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -65,7 +65,20 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
65 65
66 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 66 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
67 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 67 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
68 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); 68 switch(acl->a_entries[n].e_tag) {
69 case ACL_USER:
70 acl->a_entries[n].e_uid =
71 make_kuid(&init_user_ns,
72 le32_to_cpu(entry->e_id));
73 break;
74 case ACL_GROUP:
75 acl->a_entries[n].e_gid =
76 make_kgid(&init_user_ns,
77 le32_to_cpu(entry->e_id));
78 break;
79 default:
80 break;
81 }
69 value += sizeof(struct posix_acl_entry); 82 value += sizeof(struct posix_acl_entry);
70 83
71 } 84 }
@@ -91,7 +104,21 @@ static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
91 for (n = 0; n < acl->a_count; n++, entry++) { 104 for (n = 0; n < acl->a_count; n++, entry++) {
92 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 105 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
93 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 106 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
94 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); 107 switch(acl->a_entries[n].e_tag) {
108 case ACL_USER:
109 entry->e_id = cpu_to_le32(
110 from_kuid(&init_user_ns,
111 acl->a_entries[n].e_uid));
112 break;
113 case ACL_GROUP:
114 entry->e_id = cpu_to_le32(
115 from_kgid(&init_user_ns,
116 acl->a_entries[n].e_gid));
117 break;
118 default:
119 entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
120 break;
121 }
95 } 122 }
96 return ocfs2_acl; 123 return ocfs2_acl;
97} 124}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 31b9463fba1f..b8a9d87231b1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6751,8 +6751,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
6751 mlog_errno(ret); 6751 mlog_errno(ret);
6752 6752
6753out: 6753out:
6754 if (pages) 6754 kfree(pages);
6755 kfree(pages);
6756 6755
6757 return ret; 6756 return ret;
6758} 6757}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..9796330d8f04 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1194,6 +1194,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1194 goto out; 1194 goto out;
1195 } 1195 }
1196 } 1196 }
1197 wait_for_stable_page(wc->w_pages[i]);
1197 1198
1198 if (index == target_index) 1199 if (index == target_index)
1199 wc->w_target_page = wc->w_pages[i]; 1200 wc->w_target_page = wc->w_pages[i];
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f7c648d7d6bf..42252bf64b51 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1471,8 +1471,7 @@ static void o2hb_region_release(struct config_item *item)
1471 1471
1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); 1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
1473 1473
1474 if (reg->hr_tmp_block) 1474 kfree(reg->hr_tmp_block);
1475 kfree(reg->hr_tmp_block);
1476 1475
1477 if (reg->hr_slot_data) { 1476 if (reg->hr_slot_data) {
1478 for (i = 0; i < reg->hr_num_pages; i++) { 1477 for (i = 0; i < reg->hr_num_pages; i++) {
@@ -1486,8 +1485,7 @@ static void o2hb_region_release(struct config_item *item)
1486 if (reg->hr_bdev) 1485 if (reg->hr_bdev)
1487 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1486 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
1488 1487
1489 if (reg->hr_slots) 1488 kfree(reg->hr_slots);
1490 kfree(reg->hr_slots);
1491 1489
1492 kfree(reg->hr_db_regnum); 1490 kfree(reg->hr_db_regnum);
1493 kfree(reg->hr_db_livenodes); 1491 kfree(reg->hr_db_livenodes);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1bfe8802cc1e..0d2bf566e39a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -870,7 +870,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
870 /* we've had some trouble with handlers seemingly vanishing. */ 870 /* we've had some trouble with handlers seemingly vanishing. */
871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p, 871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p,
872 &parent) == NULL, 872 &parent) == NULL,
873 "couldn't find handler we *just* registerd " 873 "couldn't find handler we *just* registered "
874 "for type %u key %08x\n", msg_type, key); 874 "for type %u key %08x\n", msg_type, key);
875 } 875 }
876 write_unlock(&o2net_handler_lock); 876 write_unlock(&o2net_handler_lock);
@@ -1165,10 +1165,8 @@ out:
1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ 1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
1166 if (sc) 1166 if (sc)
1167 sc_put(sc); 1167 sc_put(sc);
1168 if (vec) 1168 kfree(vec);
1169 kfree(vec); 1169 kfree(msg);
1170 if (msg)
1171 kfree(msg);
1172 o2net_complete_nsw(nn, &nsw, 0, 0, 0); 1170 o2net_complete_nsw(nn, &nsw, 0, 0, 0);
1173 return ret; 1171 return ret;
1174} 1172}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9e89d70df337..dbb17c07656a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -319,9 +319,7 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
319 if (dlm->master_hash) 319 if (dlm->master_hash)
320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); 320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
321 321
322 if (dlm->name) 322 kfree(dlm->name);
323 kfree(dlm->name);
324
325 kfree(dlm); 323 kfree(dlm);
326} 324}
327 325
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4f7795fb5fc0..12ae194ac943 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2045,8 +2045,8 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2045 lvb->lvb_version = OCFS2_LVB_VERSION; 2045 lvb->lvb_version = OCFS2_LVB_VERSION;
2046 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2046 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
2047 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 2047 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
2048 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 2048 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
2049 lvb->lvb_igid = cpu_to_be32(inode->i_gid); 2049 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
2050 lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2050 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
2051 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2051 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
2052 lvb->lvb_iatime_packed = 2052 lvb->lvb_iatime_packed =
@@ -2095,8 +2095,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2095 else 2095 else
2096 inode->i_blocks = ocfs2_inode_sector_count(inode); 2096 inode->i_blocks = ocfs2_inode_sector_count(inode);
2097 2097
2098 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2098 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
2099 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2099 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2100 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2100 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2101 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2101 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2102 ocfs2_unpack_timespec(&inode->i_atime, 2102 ocfs2_unpack_timespec(&inode->i_atime,
@@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2545 * everything is up to the caller :) */ 2545 * everything is up to the caller :) */
2546 status = ocfs2_should_refresh_lock_res(lockres); 2546 status = ocfs2_should_refresh_lock_res(lockres);
2547 if (status < 0) { 2547 if (status < 0) {
2548 ocfs2_cluster_unlock(osb, lockres, level);
2548 mlog_errno(status); 2549 mlog_errno(status);
2549 goto bail; 2550 goto bail;
2550 } 2551 }
@@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2553 2554
2554 ocfs2_complete_lock_res_refresh(lockres, status); 2555 ocfs2_complete_lock_res_refresh(lockres, status);
2555 2556
2556 if (status < 0) 2557 if (status < 0) {
2558 ocfs2_cluster_unlock(osb, lockres, level);
2557 mlog_errno(status); 2559 mlog_errno(status);
2560 }
2558 ocfs2_track_lock_refresh(lockres); 2561 ocfs2_track_lock_refresh(lockres);
2559 } 2562 }
2560bail: 2563bail:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f487aa343442..1c39efb71bab 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -282,8 +282,7 @@ search:
282 spin_unlock(&oi->ip_lock); 282 spin_unlock(&oi->ip_lock);
283 283
284out: 284out:
285 if (new_emi) 285 kfree(new_emi);
286 kfree(new_emi);
287} 286}
288 287
289static int ocfs2_last_eb_is_empty(struct inode *inode, 288static int ocfs2_last_eb_is_empty(struct inode *inode,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 37d313ede159..0a2924a2d9e6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1116,7 +1116,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1116 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1116 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1117 dentry->d_name.len, dentry->d_name.name, 1117 dentry->d_name.len, dentry->d_name.name,
1118 attr->ia_valid, attr->ia_mode, 1118 attr->ia_valid, attr->ia_mode,
1119 attr->ia_uid, attr->ia_gid); 1119 from_kuid(&init_user_ns, attr->ia_uid),
1120 from_kgid(&init_user_ns, attr->ia_gid));
1120 1121
1121 /* ensuring we don't even attempt to truncate a symlink */ 1122 /* ensuring we don't even attempt to truncate a symlink */
1122 if (S_ISLNK(inode->i_mode)) 1123 if (S_ISLNK(inode->i_mode))
@@ -1174,14 +1175,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1174 } 1175 }
1175 } 1176 }
1176 1177
1177 if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 1178 if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
1178 (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 1179 (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
1179 /* 1180 /*
1180 * Gather pointers to quota structures so that allocation / 1181 * Gather pointers to quota structures so that allocation /
1181 * freeing of quota structures happens here and not inside 1182 * freeing of quota structures happens here and not inside
1182 * dquot_transfer() where we have problems with lock ordering 1183 * dquot_transfer() where we have problems with lock ordering
1183 */ 1184 */
1184 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid 1185 if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)
1185 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1186 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1186 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1187 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1187 transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); 1188 transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
@@ -1190,7 +1191,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1190 goto bail_unlock; 1191 goto bail_unlock;
1191 } 1192 }
1192 } 1193 }
1193 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid 1194 if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)
1194 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1195 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1195 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1196 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1196 transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); 1197 transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index d89e08a81eda..f87f9bd1edff 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -269,8 +269,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
269 inode->i_generation = le32_to_cpu(fe->i_generation); 269 inode->i_generation = le32_to_cpu(fe->i_generation);
270 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 270 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
271 inode->i_mode = le16_to_cpu(fe->i_mode); 271 inode->i_mode = le16_to_cpu(fe->i_mode);
272 inode->i_uid = le32_to_cpu(fe->i_uid); 272 i_uid_write(inode, le32_to_cpu(fe->i_uid));
273 inode->i_gid = le32_to_cpu(fe->i_gid); 273 i_gid_write(inode, le32_to_cpu(fe->i_gid));
274 274
275 /* Fast symlinks will have i_size but no allocated clusters. */ 275 /* Fast symlinks will have i_size but no allocated clusters. */
276 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { 276 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
@@ -1259,8 +1259,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1259 1259
1260 fe->i_size = cpu_to_le64(i_size_read(inode)); 1260 fe->i_size = cpu_to_le64(i_size_read(inode));
1261 ocfs2_set_links_count(fe, inode->i_nlink); 1261 ocfs2_set_links_count(fe, inode->i_nlink);
1262 fe->i_uid = cpu_to_le32(inode->i_uid); 1262 fe->i_uid = cpu_to_le32(i_uid_read(inode));
1263 fe->i_gid = cpu_to_le32(inode->i_gid); 1263 fe->i_gid = cpu_to_le32(i_gid_read(inode));
1264 fe->i_mode = cpu_to_le16(inode->i_mode); 1264 fe->i_mode = cpu_to_le16(inode->i_mode);
1265 fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 1265 fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
1266 fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 1266 fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
@@ -1290,8 +1290,8 @@ void ocfs2_refresh_inode(struct inode *inode,
1290 ocfs2_set_inode_flags(inode); 1290 ocfs2_set_inode_flags(inode);
1291 i_size_write(inode, le64_to_cpu(fe->i_size)); 1291 i_size_write(inode, le64_to_cpu(fe->i_size));
1292 set_nlink(inode, ocfs2_read_links_count(fe)); 1292 set_nlink(inode, ocfs2_read_links_count(fe));
1293 inode->i_uid = le32_to_cpu(fe->i_uid); 1293 i_uid_write(inode, le32_to_cpu(fe->i_uid));
1294 inode->i_gid = le32_to_cpu(fe->i_gid); 1294 i_gid_write(inode, le32_to_cpu(fe->i_gid));
1295 inode->i_mode = le16_to_cpu(fe->i_mode); 1295 inode->i_mode = le16_to_cpu(fe->i_mode);
1296 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) 1296 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
1297 inode->i_blocks = 0; 1297 inode->i_blocks = 0;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 2dd36af79e26..8eccfabcd12e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1234,11 +1234,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1234 /* Though we wish to avoid it, we are in fact safe in 1234 /* Though we wish to avoid it, we are in fact safe in
1235 * skipping local alloc cleanup as fsck.ocfs2 is more 1235 * skipping local alloc cleanup as fsck.ocfs2 is more
1236 * than capable of reclaiming unused space. */ 1236 * than capable of reclaiming unused space. */
1237 if (la_dinode) 1237 kfree(la_dinode);
1238 kfree(la_dinode); 1238 kfree(tl_dinode);
1239
1240 if (tl_dinode)
1241 kfree(tl_dinode);
1242 1239
1243 if (qrec) 1240 if (qrec)
1244 ocfs2_free_quota_recovery(qrec); 1241 ocfs2_free_quota_recovery(qrec);
@@ -1408,8 +1405,7 @@ bail:
1408 1405
1409 mutex_unlock(&osb->recovery_lock); 1406 mutex_unlock(&osb->recovery_lock);
1410 1407
1411 if (rm_quota) 1408 kfree(rm_quota);
1412 kfree(rm_quota);
1413 1409
1414 /* no one is callint kthread_stop() for us so the kthread() api 1410 /* no one is callint kthread_stop() for us so the kthread() api
1415 * requires that we call do_exit(). And it isn't exported, but 1411 * requires that we call do_exit(). And it isn't exported, but
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index a9f78c74d687..aebeacd807c3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -476,8 +476,7 @@ out:
476 if (local_alloc_inode) 476 if (local_alloc_inode)
477 iput(local_alloc_inode); 477 iput(local_alloc_inode);
478 478
479 if (alloc_copy) 479 kfree(alloc_copy);
480 kfree(alloc_copy);
481} 480}
482 481
483/* 482/*
@@ -534,7 +533,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
534 mlog_errno(status); 533 mlog_errno(status);
535 534
536bail: 535bail:
537 if ((status < 0) && (*alloc_copy)) { 536 if (status < 0) {
538 kfree(*alloc_copy); 537 kfree(*alloc_copy);
539 *alloc_copy = NULL; 538 *alloc_copy = NULL;
540 } 539 }
@@ -1290,8 +1289,7 @@ bail:
1290 if (main_bm_inode) 1289 if (main_bm_inode)
1291 iput(main_bm_inode); 1290 iput(main_bm_inode);
1292 1291
1293 if (alloc_copy) 1292 kfree(alloc_copy);
1294 kfree(alloc_copy);
1295 1293
1296 if (ac) 1294 if (ac)
1297 ocfs2_free_alloc_context(ac); 1295 ocfs2_free_alloc_context(ac);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f1fd0741162b..04ee1b57c243 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -512,8 +512,8 @@ static int __ocfs2_mknod_locked(struct inode *dir,
512 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc); 512 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
513 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 513 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
514 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 514 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
515 fe->i_uid = cpu_to_le32(inode->i_uid); 515 fe->i_uid = cpu_to_le32(i_uid_read(inode));
516 fe->i_gid = cpu_to_le32(inode->i_gid); 516 fe->i_gid = cpu_to_le32(i_gid_read(inode));
517 fe->i_mode = cpu_to_le16(inode->i_mode); 517 fe->i_mode = cpu_to_le16(inode->i_mode);
518 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 518 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
519 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); 519 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30a055049e16..934a4ac3e7fc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4407,7 +4407,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4407 * rights to do so. 4407 * rights to do so.
4408 */ 4408 */
4409 if (preserve) { 4409 if (preserve) {
4410 if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN)) 4410 if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_CHOWN))
4411 return -EPERM; 4411 return -EPERM;
4412 if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN)) 4412 if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
4413 return -EPERM; 4413 return -EPERM;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 94368017edb3..bf1f8930456f 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -376,7 +376,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); 376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
377 377
378out_free: 378out_free:
379 if (rc && conn->cc_private) 379 if (rc)
380 kfree(conn->cc_private); 380 kfree(conn->cc_private);
381 381
382out: 382out:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0e91ec22a940..9b6910dec4ba 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2525,8 +2525,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2525 mlog_errno(status); 2525 mlog_errno(status);
2526 2526
2527finally: 2527finally:
2528 if (local_alloc) 2528 kfree(local_alloc);
2529 kfree(local_alloc);
2530 2529
2531 if (status) 2530 if (status)
2532 mlog_errno(status); 2531 mlog_errno(status);
@@ -2553,8 +2552,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2553 * we free it here. 2552 * we free it here.
2554 */ 2553 */
2555 kfree(osb->journal); 2554 kfree(osb->journal);
2556 if (osb->local_alloc_copy) 2555 kfree(osb->local_alloc_copy);
2557 kfree(osb->local_alloc_copy);
2558 kfree(osb->uuid_str); 2556 kfree(osb->uuid_str);
2559 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2557 ocfs2_put_dlm_debug(osb->osb_dlm_debug);
2560 memset(osb, 0, sizeof(struct ocfs2_super)); 2558 memset(osb, 0, sizeof(struct ocfs2_super));
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 3d635f4bbb20..f053688d22a3 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -91,8 +91,7 @@ static struct inode **get_local_system_inode(struct ocfs2_super *osb,
91 } else 91 } else
92 osb->local_system_inodes = local_system_inodes; 92 osb->local_system_inodes = local_system_inodes;
93 spin_unlock(&osb->osb_lock); 93 spin_unlock(&osb->osb_lock);
94 if (unlikely(free)) 94 kfree(free);
95 kfree(free);
96 } 95 }
97 96
98 index = (slot * NUM_LOCAL_SYSTEM_INODES) + 97 index = (slot * NUM_LOCAL_SYSTEM_INODES) +
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 981b05601931..712f24db9600 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,8 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o fd.o 11 fd.o
12proc-$(CONFIG_TTY) += proc_tty.o
12proc-y += cmdline.o 13proc-y += cmdline.o
13proc-y += consoles.o 14proc-y += consoles.o
14proc-y += cpuinfo.o 15proc-y += cpuinfo.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
449 do { 449 do {
450 min_flt += t->min_flt; 450 min_flt += t->min_flt;
451 maj_flt += t->maj_flt; 451 maj_flt += t->maj_flt;
452 gtime += t->gtime; 452 gtime += task_gtime(t);
453 t = next_thread(t); 453 t = next_thread(t);
454 } while (t != task); 454 } while (t != task);
455 455
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
472 min_flt = task->min_flt; 472 min_flt = task->min_flt;
473 maj_flt = task->maj_flt; 473 maj_flt = task->maj_flt;
474 task_cputime_adjusted(task, &utime, &stime); 474 task_cputime_adjusted(task, &utime, &stime);
475 gtime = task->gtime; 475 gtime = task_gtime(task);
476 } 476 }
477 477
478 /* scale priority and nice values from timeslices to -20..20 */ 478 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 80e4645f7990..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
40 * sysctl_overcommit_ratio / 100) + total_swap_pages; 40 * sysctl_overcommit_ratio / 100) + total_swap_pages;
41 41
42 cached = global_page_state(NR_FILE_PAGES) - 42 cached = global_page_state(NR_FILE_PAGES) -
43 total_swapcache_pages - i.bufferram; 43 total_swapcache_pages() - i.bufferram;
44 if (cached < 0) 44 if (cached < 0)
45 cached = 0; 45 cached = 0;
46 46
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
109 K(i.freeram), 109 K(i.freeram),
110 K(i.bufferram), 110 K(i.bufferram),
111 K(cached), 111 K(cached),
112 K(total_swapcache_pages), 112 K(total_swapcache_pages()),
113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), 113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), 114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
115 K(pages[LRU_ACTIVE_ANON]), 115 K(pages[LRU_ACTIVE_ANON]),
@@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
158 vmi.used >> 10, 158 vmi.used >> 10,
159 vmi.largest_chunk >> 10 159 vmi.largest_chunk >> 10
160#ifdef CONFIG_MEMORY_FAILURE 160#ifdef CONFIG_MEMORY_FAILURE
161 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) 161 ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
162#endif 162#endif
163#ifdef CONFIG_TRANSPARENT_HUGEPAGE 163#ifdef CONFIG_TRANSPARENT_HUGEPAGE
164 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 164 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..3131a03d7d37 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = {
177 .readdir = proc_tgid_net_readdir, 177 .readdir = proc_tgid_net_readdir,
178}; 178};
179 179
180
181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, umode_t mode, const struct file_operations *fops)
183{
184 return proc_create(name, mode, net->proc_net, fops);
185}
186EXPORT_SYMBOL_GPL(proc_net_fops_create);
187
188void proc_net_remove(struct net *net, const char *name)
189{
190 remove_proc_entry(name, net->proc_net);
191}
192EXPORT_SYMBOL_GPL(proc_net_remove);
193
194static __net_init int proc_net_ns_init(struct net *net) 180static __net_init int proc_net_ns_init(struct net *net)
195{ 181{
196 struct proc_dir_entry *netd, *net_statd; 182 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 67de74ca85f4..e4bcb2cf055a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -418,9 +418,25 @@ static struct file_system_type pstore_fs_type = {
418 .kill_sb = pstore_kill_sb, 418 .kill_sb = pstore_kill_sb,
419}; 419};
420 420
421static struct kobject *pstore_kobj;
422
421static int __init init_pstore_fs(void) 423static int __init init_pstore_fs(void)
422{ 424{
423 return register_filesystem(&pstore_fs_type); 425 int err = 0;
426
427 /* Create a convenient mount point for people to access pstore */
428 pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
429 if (!pstore_kobj) {
430 err = -ENOMEM;
431 goto out;
432 }
433
434 err = register_filesystem(&pstore_fs_type);
435 if (err < 0)
436 kobject_put(pstore_kobj);
437
438out:
439 return err;
424} 440}
425module_init(init_pstore_fs) 441module_init(init_pstore_fs)
426 442
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5ea2e77ff023..86d1038b5a12 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -96,6 +96,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
96 } 96 }
97} 97}
98 98
99bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
100{
101 /*
102 * In case of NMI path, pstore shouldn't be blocked
103 * regardless of reason.
104 */
105 if (in_nmi())
106 return true;
107
108 switch (reason) {
109 /* In panic case, other cpus are stopped by smp_send_stop(). */
110 case KMSG_DUMP_PANIC:
111 /* Emergency restart shouldn't be blocked by spin lock. */
112 case KMSG_DUMP_EMERG:
113 return true;
114 default:
115 return false;
116 }
117}
118EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
119
99/* 120/*
100 * callback from kmsg_dump. (s2,l2) has the most recently 121 * callback from kmsg_dump. (s2,l2) has the most recently
101 * written bytes, older bytes are in (s1,l1). Save as much 122 * written bytes, older bytes are in (s1,l1). Save as much
@@ -114,10 +135,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
114 135
115 why = get_reason_str(reason); 136 why = get_reason_str(reason);
116 137
117 if (in_nmi()) { 138 if (pstore_cannot_block_path(reason)) {
118 is_locked = spin_trylock(&psinfo->buf_lock); 139 is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
119 if (!is_locked) 140 if (!is_locked) {
120 pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); 141 pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
142 , in_nmi() ? "NMI" : why);
143 }
121 } else 144 } else
122 spin_lock_irqsave(&psinfo->buf_lock, flags); 145 spin_lock_irqsave(&psinfo->buf_lock, flags);
123 oopscount++; 146 oopscount++;
@@ -143,9 +166,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
143 total += hsize + len; 166 total += hsize + len;
144 part++; 167 part++;
145 } 168 }
146 if (in_nmi()) { 169 if (pstore_cannot_block_path(reason)) {
147 if (is_locked) 170 if (is_locked)
148 spin_unlock(&psinfo->buf_lock); 171 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
149 } else 172 } else
150 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 173 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
151} 174}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 7003e5266f25..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) 167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
168{ 168{
169 char *hdr; 169 char *hdr;
170 struct timeval timestamp; 170 struct timespec timestamp;
171 size_t len; 171 size_t len;
172 172
173 do_gettimeofday(&timestamp); 173 /* Report zeroed timestamp if called before timekeeping has resumed. */
174 if (__getnstimeofday(&timestamp)) {
175 timestamp.tv_sec = 0;
176 timestamp.tv_nsec = 0;
177 }
174 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", 178 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
175 (long)timestamp.tv_sec, (long)timestamp.tv_usec); 179 (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
176 WARN_ON_ONCE(!hdr); 180 WARN_ON_ONCE(!hdr);
177 len = hdr ? strlen(hdr) : 0; 181 len = hdr ? strlen(hdr) : 0;
178 persistent_ram_write(prz, hdr, len); 182 persistent_ram_write(prz, hdr, len);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b6addf560483..57199a52a351 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -285,7 +285,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { 285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */ 286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different" 287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n")); 288 " endianness.\n"));
289 return bh; 289 return bh;
290 } else 290 } else
291 sbi->s_bytesex = BYTESEX_LE; 291 sbi->s_bytesex = BYTESEX_LE;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index eab8c09d3801..c24f1e10b946 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -260,6 +260,7 @@ static struct file_system_type ramfs_fs_type = {
260 .name = "ramfs", 260 .name = "ramfs",
261 .mount = ramfs_mount, 261 .mount = ramfs_mount,
262 .kill_sb = ramfs_kill_sb, 262 .kill_sb = ramfs_kill_sb,
263 .fs_flags = FS_USERNS_MOUNT,
263}; 264};
264static struct file_system_type rootfs_fs_type = { 265static struct file_system_type rootfs_fs_type = {
265 .name = "rootfs", 266 .name = "rootfs",
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/sched/rt.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31 32
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
205} 205}
206EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 206EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
207 207
208/**
209 * sysfs_add_link_to_group - add a symlink to an attribute group.
210 * @kobj: The kobject containing the group.
211 * @group_name: The name of the group.
212 * @target: The target kobject of the symlink to create.
213 * @link_name: The name of the symlink to create.
214 */
215int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
216 struct kobject *target, const char *link_name)
217{
218 struct sysfs_dirent *dir_sd;
219 int error = 0;
220
221 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
222 if (!dir_sd)
223 return -ENOENT;
224
225 error = sysfs_create_link_sd(dir_sd, target, link_name);
226 sysfs_put(dir_sd);
227
228 return error;
229}
230EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
231
232/**
233 * sysfs_remove_link_from_group - remove a symlink from an attribute group.
234 * @kobj: The kobject containing the group.
235 * @group_name: The name of the group.
236 * @link_name: The name of the symlink to remove.
237 */
238void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
239 const char *link_name)
240{
241 struct sysfs_dirent *dir_sd;
242
243 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
244 if (dir_sd) {
245 sysfs_hash_and_remove(dir_sd, NULL, link_name);
246 sysfs_put(dir_sd);
247 }
248}
249EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
208 250
209EXPORT_SYMBOL_GPL(sysfs_create_group); 251EXPORT_SYMBOL_GPL(sysfs_create_group);
210EXPORT_SYMBOL_GPL(sysfs_update_group); 252EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db940a9be045..8d924b5ec733 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -10,7 +10,7 @@
10 * Please see Documentation/filesystems/sysfs.txt for more information. 10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 11 */
12 12
13#define DEBUG 13#define DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
21 21
22#include "sysfs.h" 22#include "sysfs.h"
23 23
24static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, 24static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
25 const char *name, int warn) 25 struct kobject *target,
26 const char *name, int warn)
26{ 27{
27 struct sysfs_dirent *parent_sd = NULL;
28 struct sysfs_dirent *target_sd = NULL; 28 struct sysfs_dirent *target_sd = NULL;
29 struct sysfs_dirent *sd = NULL; 29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt; 30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type; 31 enum kobj_ns_type ns_type;
32 int error; 32 int error;
33 33
34 BUG_ON(!name); 34 BUG_ON(!name || !parent_sd);
35
36 if (!kobj)
37 parent_sd = &sysfs_root;
38 else
39 parent_sd = kobj->sd;
40
41 error = -EFAULT;
42 if (!parent_sd)
43 goto out_put;
44 35
45 /* target->sd can go away beneath us but is protected with 36 /* target->sd can go away beneath us but is protected with
46 * sysfs_assoc_lock. Fetch target_sd from it. 37 * sysfs_assoc_lock. Fetch target_sd from it.
@@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
96} 87}
97 88
98/** 89/**
90 * sysfs_create_link_sd - create symlink to a given object.
91 * @sd: directory we're creating the link in.
92 * @target: object we're pointing to.
93 * @name: name of the symlink.
94 */
95int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
96 const char *name)
97{
98 return sysfs_do_create_link_sd(sd, target, name, 1);
99}
100
101static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
102 const char *name, int warn)
103{
104 struct sysfs_dirent *parent_sd = NULL;
105
106 if (!kobj)
107 parent_sd = &sysfs_root;
108 else
109 parent_sd = kobj->sd;
110
111 if (!parent_sd)
112 return -EFAULT;
113
114 return sysfs_do_create_link_sd(parent_sd, target, name, warn);
115}
116
117/**
99 * sysfs_create_link - create symlink between two objects. 118 * sysfs_create_link - create symlink between two objects.
100 * @kobj: object whose directory we're creating the link in. 119 * @kobj: object whose directory we're creating the link in.
101 * @target: object we're pointing to. 120 * @target: object we're pointing to.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
240 * symlink.c 240 * symlink.c
241 */ 241 */
242extern const struct inode_operations sysfs_symlink_inode_operations; 242extern const struct inode_operations sysfs_symlink_inode_operations;
243int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
244 const char *name);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d03822bbf190..0e606b12a59d 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/compat.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
27struct timerfd_ctx { 28struct timerfd_ctx {
@@ -278,21 +279,17 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
278 return ufd; 279 return ufd;
279} 280}
280 281
281SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 282static int do_timerfd_settime(int ufd, int flags,
282 const struct itimerspec __user *, utmr, 283 const struct itimerspec *new,
283 struct itimerspec __user *, otmr) 284 struct itimerspec *old)
284{ 285{
285 struct fd f; 286 struct fd f;
286 struct timerfd_ctx *ctx; 287 struct timerfd_ctx *ctx;
287 struct itimerspec ktmr, kotmr;
288 int ret; 288 int ret;
289 289
290 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
291 return -EFAULT;
292
293 if ((flags & ~TFD_SETTIME_FLAGS) || 290 if ((flags & ~TFD_SETTIME_FLAGS) ||
294 !timespec_valid(&ktmr.it_value) || 291 !timespec_valid(&new->it_value) ||
295 !timespec_valid(&ktmr.it_interval)) 292 !timespec_valid(&new->it_interval))
296 return -EINVAL; 293 return -EINVAL;
297 294
298 ret = timerfd_fget(ufd, &f); 295 ret = timerfd_fget(ufd, &f);
@@ -323,27 +320,23 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
323 if (ctx->expired && ctx->tintv.tv64) 320 if (ctx->expired && ctx->tintv.tv64)
324 hrtimer_forward_now(&ctx->tmr, ctx->tintv); 321 hrtimer_forward_now(&ctx->tmr, ctx->tintv);
325 322
326 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 323 old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
327 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 324 old->it_interval = ktime_to_timespec(ctx->tintv);
328 325
329 /* 326 /*
330 * Re-program the timer to the new value ... 327 * Re-program the timer to the new value ...
331 */ 328 */
332 ret = timerfd_setup(ctx, flags, &ktmr); 329 ret = timerfd_setup(ctx, flags, new);
333 330
334 spin_unlock_irq(&ctx->wqh.lock); 331 spin_unlock_irq(&ctx->wqh.lock);
335 fdput(f); 332 fdput(f);
336 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
337 return -EFAULT;
338
339 return ret; 333 return ret;
340} 334}
341 335
342SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 336static int do_timerfd_gettime(int ufd, struct itimerspec *t)
343{ 337{
344 struct fd f; 338 struct fd f;
345 struct timerfd_ctx *ctx; 339 struct timerfd_ctx *ctx;
346 struct itimerspec kotmr;
347 int ret = timerfd_fget(ufd, &f); 340 int ret = timerfd_fget(ufd, &f);
348 if (ret) 341 if (ret)
349 return ret; 342 return ret;
@@ -356,11 +349,65 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
356 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; 349 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
357 hrtimer_restart(&ctx->tmr); 350 hrtimer_restart(&ctx->tmr);
358 } 351 }
359 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 352 t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
360 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 353 t->it_interval = ktime_to_timespec(ctx->tintv);
361 spin_unlock_irq(&ctx->wqh.lock); 354 spin_unlock_irq(&ctx->wqh.lock);
362 fdput(f); 355 fdput(f);
356 return 0;
357}
358
359SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
360 const struct itimerspec __user *, utmr,
361 struct itimerspec __user *, otmr)
362{
363 struct itimerspec new, old;
364 int ret;
365
366 if (copy_from_user(&new, utmr, sizeof(new)))
367 return -EFAULT;
368 ret = do_timerfd_settime(ufd, flags, &new, &old);
369 if (ret)
370 return ret;
371 if (otmr && copy_to_user(otmr, &old, sizeof(old)))
372 return -EFAULT;
373
374 return ret;
375}
363 376
377SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
378{
379 struct itimerspec kotmr;
380 int ret = do_timerfd_gettime(ufd, &kotmr);
381 if (ret)
382 return ret;
364 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 383 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
365} 384}
366 385
386#ifdef COMPAT
387COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
388 const struct itimerspec __user *, utmr,
389 struct itimerspec __user *, otmr)
390{
391 struct itimerspec new, old;
392 int ret;
393
394 if (get_compat_itimerspec(&new, utmr))
395 return -EFAULT;
396 ret = do_timerfd_settime(ufd, flags, &new, &old);
397 if (ret)
398 return ret;
399 if (otmr && put_compat_itimerspec(otmr, &old))
400 return -EFAULT;
401 return ret;
402}
403
404COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
405 struct itimerspec __user *, otmr)
406{
407 struct itimerspec kotmr;
408 int ret = do_timerfd_gettime(ufd, &kotmr);
409 if (ret)
410 return ret;
411 return put_compat_itimerspec(otmr, &t) ? -EFAULT: 0;
412}
413#endif
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 12817ffc7345..7f60e900edff 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2459,7 +2459,7 @@ error_dump:
2459 2459
2460static inline int chance(unsigned int n, unsigned int out_of) 2460static inline int chance(unsigned int n, unsigned int out_of)
2461{ 2461{
2462 return !!((random32() % out_of) + 1 <= n); 2462 return !!((prandom_u32() % out_of) + 1 <= n);
2463 2463
2464} 2464}
2465 2465
@@ -2477,13 +2477,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2477 if (chance(1, 2)) { 2477 if (chance(1, 2)) {
2478 d->pc_delay = 1; 2478 d->pc_delay = 1;
2479 /* Fail withing 1 minute */ 2479 /* Fail withing 1 minute */
2480 delay = random32() % 60000; 2480 delay = prandom_u32() % 60000;
2481 d->pc_timeout = jiffies; 2481 d->pc_timeout = jiffies;
2482 d->pc_timeout += msecs_to_jiffies(delay); 2482 d->pc_timeout += msecs_to_jiffies(delay);
2483 ubifs_warn("failing after %lums", delay); 2483 ubifs_warn("failing after %lums", delay);
2484 } else { 2484 } else {
2485 d->pc_delay = 2; 2485 d->pc_delay = 2;
2486 delay = random32() % 10000; 2486 delay = prandom_u32() % 10000;
2487 /* Fail within 10000 operations */ 2487 /* Fail within 10000 operations */
2488 d->pc_cnt_max = delay; 2488 d->pc_cnt_max = delay;
2489 ubifs_warn("failing after %lu calls", delay); 2489 ubifs_warn("failing after %lu calls", delay);
@@ -2563,7 +2563,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
2563 unsigned int from, to, ffs = chance(1, 2); 2563 unsigned int from, to, ffs = chance(1, 2);
2564 unsigned char *p = (void *)buf; 2564 unsigned char *p = (void *)buf;
2565 2565
2566 from = random32() % (len + 1); 2566 from = prandom_u32() % (len + 1);
2567 /* Corruption may only span one max. write unit */ 2567 /* Corruption may only span one max. write unit */
2568 to = min(len, ALIGN(from, c->max_write_size)); 2568 to = min(len, ALIGN(from, c->max_write_size));
2569 2569
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..4f6493c130e0 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1522,6 +1522,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1522 ubifs_release_dirty_inode_budget(c, ui); 1522 ubifs_release_dirty_inode_budget(c, ui);
1523 } 1523 }
1524 1524
1525 wait_for_stable_page(page);
1525 unlock_page(page); 1526 unlock_page(page);
1526 return 0; 1527 return 0;
1527 1528
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 9daaeef675dd..4b826abb1528 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -2007,28 +2007,28 @@ static int dbg_populate_lsave(struct ubifs_info *c)
2007 2007
2008 if (!dbg_is_chk_gen(c)) 2008 if (!dbg_is_chk_gen(c))
2009 return 0; 2009 return 0;
2010 if (random32() & 3) 2010 if (prandom_u32() & 3)
2011 return 0; 2011 return 0;
2012 2012
2013 for (i = 0; i < c->lsave_cnt; i++) 2013 for (i = 0; i < c->lsave_cnt; i++)
2014 c->lsave[i] = c->main_first; 2014 c->lsave[i] = c->main_first;
2015 2015
2016 list_for_each_entry(lprops, &c->empty_list, list) 2016 list_for_each_entry(lprops, &c->empty_list, list)
2017 c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2017 c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
2018 list_for_each_entry(lprops, &c->freeable_list, list) 2018 list_for_each_entry(lprops, &c->freeable_list, list)
2019 c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2019 c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
2020 list_for_each_entry(lprops, &c->frdi_idx_list, list) 2020 list_for_each_entry(lprops, &c->frdi_idx_list, list)
2021 c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2021 c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
2022 2022
2023 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; 2023 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
2024 for (i = 0; i < heap->cnt; i++) 2024 for (i = 0; i < heap->cnt; i++)
2025 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2025 c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
2026 heap = &c->lpt_heap[LPROPS_DIRTY - 1]; 2026 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
2027 for (i = 0; i < heap->cnt; i++) 2027 for (i = 0; i < heap->cnt; i++)
2028 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2028 c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
2029 heap = &c->lpt_heap[LPROPS_FREE - 1]; 2029 heap = &c->lpt_heap[LPROPS_FREE - 1];
2030 for (i = 0; i < heap->cnt; i++) 2030 for (i = 0; i < heap->cnt; i++)
2031 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2031 c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
2032 2032
2033 return 1; 2033 return 1;
2034} 2034}
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 769701ccb5c9..ba32da3fe08a 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -126,13 +126,14 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
126 else if (inum > o->inum) 126 else if (inum > o->inum)
127 p = p->rb_right; 127 p = p->rb_right;
128 else { 128 else {
129 if (o->dnext) { 129 if (o->del) {
130 spin_unlock(&c->orphan_lock); 130 spin_unlock(&c->orphan_lock);
131 dbg_gen("deleted twice ino %lu", 131 dbg_gen("deleted twice ino %lu",
132 (unsigned long)inum); 132 (unsigned long)inum);
133 return; 133 return;
134 } 134 }
135 if (o->cnext) { 135 if (o->cmt) {
136 o->del = 1;
136 o->dnext = c->orph_dnext; 137 o->dnext = c->orph_dnext;
137 c->orph_dnext = o; 138 c->orph_dnext = o;
138 spin_unlock(&c->orphan_lock); 139 spin_unlock(&c->orphan_lock);
@@ -172,7 +173,9 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
172 last = &c->orph_cnext; 173 last = &c->orph_cnext;
173 list_for_each_entry(orphan, &c->orph_new, new_list) { 174 list_for_each_entry(orphan, &c->orph_new, new_list) {
174 ubifs_assert(orphan->new); 175 ubifs_assert(orphan->new);
176 ubifs_assert(!orphan->cmt);
175 orphan->new = 0; 177 orphan->new = 0;
178 orphan->cmt = 1;
176 *last = orphan; 179 *last = orphan;
177 last = &orphan->cnext; 180 last = &orphan->cnext;
178 } 181 }
@@ -299,7 +302,9 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
299 cnext = c->orph_cnext; 302 cnext = c->orph_cnext;
300 for (i = 0; i < cnt; i++) { 303 for (i = 0; i < cnt; i++) {
301 orphan = cnext; 304 orphan = cnext;
305 ubifs_assert(orphan->cmt);
302 orph->inos[i] = cpu_to_le64(orphan->inum); 306 orph->inos[i] = cpu_to_le64(orphan->inum);
307 orphan->cmt = 0;
303 cnext = orphan->cnext; 308 cnext = orphan->cnext;
304 orphan->cnext = NULL; 309 orphan->cnext = NULL;
305 } 310 }
@@ -378,6 +383,7 @@ static int consolidate(struct ubifs_info *c)
378 list_for_each_entry(orphan, &c->orph_list, list) { 383 list_for_each_entry(orphan, &c->orph_list, list) {
379 if (orphan->new) 384 if (orphan->new)
380 continue; 385 continue;
386 orphan->cmt = 1;
381 *last = orphan; 387 *last = orphan;
382 last = &orphan->cnext; 388 last = &orphan->cnext;
383 cnt += 1; 389 cnt += 1;
@@ -442,6 +448,7 @@ static void erase_deleted(struct ubifs_info *c)
442 orphan = dnext; 448 orphan = dnext;
443 dnext = orphan->dnext; 449 dnext = orphan->dnext;
444 ubifs_assert(!orphan->new); 450 ubifs_assert(!orphan->new);
451 ubifs_assert(orphan->del);
445 rb_erase(&orphan->rb, &c->orph_tree); 452 rb_erase(&orphan->rb, &c->orph_tree);
446 list_del(&orphan->list); 453 list_del(&orphan->list);
447 c->tot_orphans -= 1; 454 c->tot_orphans -= 1;
@@ -531,6 +538,7 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
531 rb_link_node(&orphan->rb, parent, p); 538 rb_link_node(&orphan->rb, parent, p);
532 rb_insert_color(&orphan->rb, &c->orph_tree); 539 rb_insert_color(&orphan->rb, &c->orph_tree);
533 list_add_tail(&orphan->list, &c->orph_list); 540 list_add_tail(&orphan->list, &c->orph_list);
541 orphan->del = 1;
534 orphan->dnext = c->orph_dnext; 542 orphan->dnext = c->orph_dnext;
535 c->orph_dnext = orphan; 543 c->orph_dnext = orphan;
536 dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, 544 dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 523bbad69c0c..52a6559275c4 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -683,7 +683,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
683 c->ilebs[c->ileb_cnt++] = lnum; 683 c->ilebs[c->ileb_cnt++] = lnum;
684 dbg_cmt("LEB %d", lnum); 684 dbg_cmt("LEB %d", lnum);
685 } 685 }
686 if (dbg_is_chk_index(c) && !(random32() & 7)) 686 if (dbg_is_chk_index(c) && !(prandom_u32() & 7))
687 return -ENOSPC; 687 return -ENOSPC;
688 return 0; 688 return 0;
689} 689}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d133c276fe05..b2babce4d70f 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -904,6 +904,8 @@ struct ubifs_budget_req {
904 * @dnext: next orphan to delete 904 * @dnext: next orphan to delete
905 * @inum: inode number 905 * @inum: inode number
906 * @new: %1 => added since the last commit, otherwise %0 906 * @new: %1 => added since the last commit, otherwise %0
907 * @cmt: %1 => commit pending, otherwise %0
908 * @del: %1 => delete pending, otherwise %0
907 */ 909 */
908struct ubifs_orphan { 910struct ubifs_orphan {
909 struct rb_node rb; 911 struct rb_node rb;
@@ -912,7 +914,9 @@ struct ubifs_orphan {
912 struct ubifs_orphan *cnext; 914 struct ubifs_orphan *cnext;
913 struct ubifs_orphan *dnext; 915 struct ubifs_orphan *dnext;
914 ino_t inum; 916 ino_t inum;
915 int new; 917 unsigned new:1;
918 unsigned cmt:1;
919 unsigned del:1;
916}; 920};
917 921
918/** 922/**
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index e4f10a40768a..0bf6e16f8d79 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -29,7 +29,7 @@ config UFS_FS
29 29
30config UFS_FS_WRITE 30config UFS_FS_WRITE
31 bool "UFS file system write support (DANGEROUS)" 31 bool "UFS file system write support (DANGEROUS)"
32 depends on UFS_FS && EXPERIMENTAL 32 depends on UFS_FS
33 help 33 help
34 Say Y here if you want to try writing to UFS partitions. This is 34 Say Y here if you want to try writing to UFS partitions. This is
35 experimental, so you should back up your UFS partitions beforehand. 35 experimental, so you should back up your UFS partitions beforehand.
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5a7ffe54f5d5..cc33aaf219f1 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -70,8 +70,8 @@ config XFS_RT
70 If unsure, say N. 70 If unsure, say N.
71 71
72config XFS_DEBUG 72config XFS_DEBUG
73 bool "XFS Debugging support (EXPERIMENTAL)" 73 bool "XFS Debugging support"
74 depends on XFS_FS && EXPERIMENTAL 74 depends on XFS_FS
75 help 75 help
76 Say Y here to get an XFS build with many debugging features, 76 Say Y here to get an XFS build with many debugging features,
77 including ASSERT checks, function wrappers around macros, 77 including ASSERT checks, function wrappers around macros,
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 393055fe3aef..0ad23253e8b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist(
1925 targs.mp = mp; 1925 targs.mp = mp;
1926 targs.agbp = agbp; 1926 targs.agbp = agbp;
1927 targs.agno = args->agno; 1927 targs.agno = args->agno;
1928 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1929 targs.minalignslop = 0;
1930 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1928 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1931 targs.type = XFS_ALLOCTYPE_THIS_AG; 1929 targs.type = XFS_ALLOCTYPE_THIS_AG;
1932 targs.pag = pag; 1930 targs.pag = pag;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
86 } 86 }
87 87
88 if (ioend->io_iocb) { 88 if (ioend->io_iocb) {
89 inode_dio_done(ioend->io_inode);
89 if (ioend->io_isasync) { 90 if (ioend->io_isasync) {
90 aio_complete(ioend->io_iocb, ioend->io_error ? 91 aio_complete(ioend->io_iocb, ioend->io_error ?
91 ioend->io_error : ioend->io_result, 0); 92 ioend->io_error : ioend->io_result, 0);
92 } 93 }
93 inode_dio_done(ioend->io_inode);
94 } 94 }
95 95
96 mempool_free(ioend, xfs_ioend_pool); 96 mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index aaf472532b3c..888683844d98 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -300,9 +300,12 @@ xfs_attr_set_int(
300 if (rsvd) 300 if (rsvd)
301 args.trans->t_flags |= XFS_TRANS_RESERVE; 301 args.trans->t_flags |= XFS_TRANS_RESERVE;
302 302
303 if ((error = xfs_trans_reserve(args.trans, args.total, 303 error = xfs_trans_reserve(args.trans, args.total,
304 XFS_ATTRSET_LOG_RES(mp, args.total), 0, 304 XFS_ATTRSETM_LOG_RES(mp) +
305 XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { 305 XFS_ATTRSETRT_LOG_RES(mp) * args.total,
306 0, XFS_TRANS_PERM_LOG_RES,
307 XFS_ATTRSET_LOG_COUNT);
308 if (error) {
306 xfs_trans_cancel(args.trans, 0); 309 xfs_trans_cancel(args.trans, 0);
307 return(error); 310 return(error);
308 } 311 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..b44af9211bd9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -147,7 +147,10 @@ xfs_bmap_local_to_extents(
147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
148 xfs_extlen_t total, /* total blocks needed by transaction */ 148 xfs_extlen_t total, /* total blocks needed by transaction */
149 int *logflagsp, /* inode logging flags */ 149 int *logflagsp, /* inode logging flags */
150 int whichfork); /* data or attr fork */ 150 int whichfork, /* data or attr fork */
151 void (*init_fn)(struct xfs_buf *bp,
152 struct xfs_inode *ip,
153 struct xfs_ifork *ifp));
151 154
152/* 155/*
153 * Search the extents list for the inode, for the extent containing bno. 156 * Search the extents list for the inode, for the extent containing bno.
@@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents(
357} 360}
358 361
359/* 362/*
360 * Called from xfs_bmap_add_attrfork to handle local format files. 363 * Block initialisation functions for local to extent format conversion.
364 * As these get more complex, they will be moved to the relevant files,
365 * but for now they are too simple to worry about.
366 */
367STATIC void
368xfs_bmap_local_to_extents_init_fn(
369 struct xfs_buf *bp,
370 struct xfs_inode *ip,
371 struct xfs_ifork *ifp)
372{
373 bp->b_ops = &xfs_bmbt_buf_ops;
374 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
375}
376
377STATIC void
378xfs_symlink_local_to_remote(
379 struct xfs_buf *bp,
380 struct xfs_inode *ip,
381 struct xfs_ifork *ifp)
382{
383 /* remote symlink blocks are not verifiable until CRCs come along */
384 bp->b_ops = NULL;
385 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
386}
387
388/*
389 * Called from xfs_bmap_add_attrfork to handle local format files. Each
390 * different data fork content type needs a different callout to do the
391 * conversion. Some are basic and only require special block initialisation
392 * callouts for the data formating, others (directories) are so specialised they
393 * handle everything themselves.
394 *
395 * XXX (dgc): investigate whether directory conversion can use the generic
396 * formatting callout. It should be possible - it's just a very complex
397 * formatter. it would also require passing the transaction through to the init
398 * function.
361 */ 399 */
362STATIC int /* error */ 400STATIC int /* error */
363xfs_bmap_add_attrfork_local( 401xfs_bmap_add_attrfork_local(
@@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local(
368 int *flags) /* inode logging flags */ 406 int *flags) /* inode logging flags */
369{ 407{
370 xfs_da_args_t dargs; /* args for dir/attr code */ 408 xfs_da_args_t dargs; /* args for dir/attr code */
371 int error; /* error return value */
372 xfs_mount_t *mp; /* mount structure pointer */
373 409
374 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 410 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
375 return 0; 411 return 0;
412
376 if (S_ISDIR(ip->i_d.di_mode)) { 413 if (S_ISDIR(ip->i_d.di_mode)) {
377 mp = ip->i_mount;
378 memset(&dargs, 0, sizeof(dargs)); 414 memset(&dargs, 0, sizeof(dargs));
379 dargs.dp = ip; 415 dargs.dp = ip;
380 dargs.firstblock = firstblock; 416 dargs.firstblock = firstblock;
381 dargs.flist = flist; 417 dargs.flist = flist;
382 dargs.total = mp->m_dirblkfsbs; 418 dargs.total = ip->i_mount->m_dirblkfsbs;
383 dargs.whichfork = XFS_DATA_FORK; 419 dargs.whichfork = XFS_DATA_FORK;
384 dargs.trans = tp; 420 dargs.trans = tp;
385 error = xfs_dir2_sf_to_block(&dargs); 421 return xfs_dir2_sf_to_block(&dargs);
386 } else 422 }
387 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, 423
388 XFS_DATA_FORK); 424 if (S_ISLNK(ip->i_d.di_mode))
389 return error; 425 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
426 flags, XFS_DATA_FORK,
427 xfs_symlink_local_to_remote);
428
429 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
430 XFS_DATA_FORK,
431 xfs_bmap_local_to_extents_init_fn);
390} 432}
391 433
392/* 434/*
@@ -3099,8 +3141,6 @@ xfs_bmap_extents_to_btree(
3099 args.fsbno = *firstblock; 3141 args.fsbno = *firstblock;
3100 } 3142 }
3101 args.minlen = args.maxlen = args.prod = 1; 3143 args.minlen = args.maxlen = args.prod = 1;
3102 args.total = args.minleft = args.alignment = args.mod = args.isfl =
3103 args.minalignslop = 0;
3104 args.wasdel = wasdel; 3144 args.wasdel = wasdel;
3105 *logflagsp = 0; 3145 *logflagsp = 0;
3106 if ((error = xfs_alloc_vextent(&args))) { 3146 if ((error = xfs_alloc_vextent(&args))) {
@@ -3221,7 +3261,10 @@ xfs_bmap_local_to_extents(
3221 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 3261 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
3222 xfs_extlen_t total, /* total blocks needed by transaction */ 3262 xfs_extlen_t total, /* total blocks needed by transaction */
3223 int *logflagsp, /* inode logging flags */ 3263 int *logflagsp, /* inode logging flags */
3224 int whichfork) /* data or attr fork */ 3264 int whichfork,
3265 void (*init_fn)(struct xfs_buf *bp,
3266 struct xfs_inode *ip,
3267 struct xfs_ifork *ifp))
3225{ 3268{
3226 int error; /* error return value */ 3269 int error; /* error return value */
3227 int flags; /* logging flags returned */ 3270 int flags; /* logging flags returned */
@@ -3241,12 +3284,12 @@ xfs_bmap_local_to_extents(
3241 xfs_buf_t *bp; /* buffer for extent block */ 3284 xfs_buf_t *bp; /* buffer for extent block */
3242 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3285 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3243 3286
3287 ASSERT((ifp->if_flags &
3288 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3244 memset(&args, 0, sizeof(args)); 3289 memset(&args, 0, sizeof(args));
3245 args.tp = tp; 3290 args.tp = tp;
3246 args.mp = ip->i_mount; 3291 args.mp = ip->i_mount;
3247 args.firstblock = *firstblock; 3292 args.firstblock = *firstblock;
3248 ASSERT((ifp->if_flags &
3249 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3250 /* 3293 /*
3251 * Allocate a block. We know we need only one, since the 3294 * Allocate a block. We know we need only one, since the
3252 * file currently fits in an inode. 3295 * file currently fits in an inode.
@@ -3259,20 +3302,21 @@ xfs_bmap_local_to_extents(
3259 args.type = XFS_ALLOCTYPE_NEAR_BNO; 3302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3260 } 3303 }
3261 args.total = total; 3304 args.total = total;
3262 args.mod = args.minleft = args.alignment = args.wasdel =
3263 args.isfl = args.minalignslop = 0;
3264 args.minlen = args.maxlen = args.prod = 1; 3305 args.minlen = args.maxlen = args.prod = 1;
3265 if ((error = xfs_alloc_vextent(&args))) 3306 error = xfs_alloc_vextent(&args);
3307 if (error)
3266 goto done; 3308 goto done;
3267 /* 3309
3268 * Can't fail, the space was reserved. 3310 /* Can't fail, the space was reserved. */
3269 */
3270 ASSERT(args.fsbno != NULLFSBLOCK); 3311 ASSERT(args.fsbno != NULLFSBLOCK);
3271 ASSERT(args.len == 1); 3312 ASSERT(args.len == 1);
3272 *firstblock = args.fsbno; 3313 *firstblock = args.fsbno;
3273 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 3314 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3274 bp->b_ops = &xfs_bmbt_buf_ops; 3315
3275 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 3316 /* initialise the block and copy the data */
3317 init_fn(bp, ip, ifp);
3318
3319 /* account for the change in fork size and log everything */
3276 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); 3320 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3277 xfs_bmap_forkoff_reset(args.mp, ip, whichfork); 3321 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3278 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 3322 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -4680,9 +4724,6 @@ __xfs_bmapi_allocate(
4680 return error; 4724 return error;
4681 } 4725 }
4682 4726
4683 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4684 bma->stack_switch = 1;
4685
4686 error = xfs_bmap_alloc(bma); 4727 error = xfs_bmap_alloc(bma);
4687 if (error) 4728 if (error)
4688 return error; 4729 return error;
@@ -4922,8 +4963,32 @@ xfs_bmapi_write(
4922 XFS_STATS_INC(xs_blk_mapw); 4963 XFS_STATS_INC(xs_blk_mapw);
4923 4964
4924 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 4965 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4966 /*
4967 * XXX (dgc): This assumes we are only called for inodes that
4968 * contain content neutral data in local format. Anything that
4969 * contains caller-specific data in local format that needs
4970 * transformation to move to a block format needs to do the
4971 * conversion to extent format itself.
4972 *
4973 * Directory data forks and attribute forks handle this
4974 * themselves, but with the addition of metadata verifiers every
4975 * data fork in local format now contains caller specific data
4976 * and as such conversion through this function is likely to be
4977 * broken.
4978 *
4979 * The only likely user of this branch is for remote symlinks,
4980 * but we cannot overwrite the data fork contents of the symlink
4981 * (EEXIST occurs higher up the stack) and so it will never go
4982 * from local format to extent format here. Hence I don't think
4983 * this branch is ever executed intentionally and we should
4984 * consider removing it and asserting that xfs_bmapi_write()
4985 * cannot be called directly on local format forks. i.e. callers
4986 * are completely responsible for local to extent format
4987 * conversion, not xfs_bmapi_write().
4988 */
4925 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, 4989 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
4926 &bma.logflags, whichfork); 4990 &bma.logflags, whichfork,
4991 xfs_bmap_local_to_extents_init_fn);
4927 if (error) 4992 if (error)
4928 goto error0; 4993 goto error0;
4929 } 4994 }
@@ -4956,6 +5021,9 @@ xfs_bmapi_write(
4956 bma.flist = flist; 5021 bma.flist = flist;
4957 bma.firstblock = firstblock; 5022 bma.firstblock = firstblock;
4958 5023
5024 if (flags & XFS_BMAPI_STACK_SWITCH)
5025 bma.stack_switch = 1;
5026
4959 while (bno < end && n < *nmap) { 5027 while (bno < end && n < *nmap) {
4960 inhole = eof || bma.got.br_startoff > bno; 5028 inhole = eof || bma.got.br_startoff > bno;
4961 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 5029 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 56d1614760cf..4e8f0df82d02 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -487,6 +487,7 @@ _xfs_buf_find(
487 struct rb_node *parent; 487 struct rb_node *parent;
488 xfs_buf_t *bp; 488 xfs_buf_t *bp;
489 xfs_daddr_t blkno = map[0].bm_bn; 489 xfs_daddr_t blkno = map[0].bm_bn;
490 xfs_daddr_t eofs;
490 int numblks = 0; 491 int numblks = 0;
491 int i; 492 int i;
492 493
@@ -498,6 +499,23 @@ _xfs_buf_find(
498 ASSERT(!(numbytes < (1 << btp->bt_sshift))); 499 ASSERT(!(numbytes < (1 << btp->bt_sshift)));
499 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); 500 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
500 501
502 /*
503 * Corrupted block numbers can get through to here, unfortunately, so we
504 * have to check that the buffer falls within the filesystem bounds.
505 */
506 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
507 if (blkno >= eofs) {
508 /*
509 * XXX (dgc): we should really be returning EFSCORRUPTED here,
510 * but none of the higher level infrastructure supports
511 * returning a specific error on buffer lookup failures.
512 */
513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs);
516 return NULL;
517 }
518
501 /* get tree root */ 519 /* get tree root */
502 pag = xfs_perag_get(btp->bt_mount, 520 pag = xfs_perag_get(btp->bt_mount,
503 xfs_daddr_to_agno(btp->bt_mount, blkno)); 521 xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -933,8 +951,6 @@ xfs_buf_trylock(
933 locked = down_trylock(&bp->b_sema) == 0; 951 locked = down_trylock(&bp->b_sema) == 0;
934 if (locked) 952 if (locked)
935 XB_SET_OWNER(bp); 953 XB_SET_OWNER(bp);
936 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
937 xfs_log_force(bp->b_target->bt_mount, 0);
938 954
939 trace_xfs_buf_trylock(bp, _RET_IP_); 955 trace_xfs_buf_trylock(bp, _RET_IP_);
940 return locked; 956 return locked;
@@ -1487,6 +1503,8 @@ restart:
1487 while (!list_empty(&btp->bt_lru)) { 1503 while (!list_empty(&btp->bt_lru)) {
1488 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); 1504 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1489 if (atomic_read(&bp->b_hold) > 1) { 1505 if (atomic_read(&bp->b_hold) > 1) {
1506 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1507 list_move_tail(&bp->b_lru, &btp->bt_lru);
1490 spin_unlock(&btp->bt_lru_lock); 1508 spin_unlock(&btp->bt_lru_lock);
1491 delay(100); 1509 delay(100);
1492 goto restart; 1510 goto restart;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 77b09750e92c..cf263476d6b4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
37 return container_of(lip, struct xfs_buf_log_item, bli_item); 37 return container_of(lip, struct xfs_buf_log_item, bli_item);
38} 38}
39 39
40
41#ifdef XFS_TRANS_DEBUG
42/*
43 * This function uses an alternate strategy for tracking the bytes
44 * that the user requests to be logged. This can then be used
45 * in conjunction with the bli_orig array in the buf log item to
46 * catch bugs in our callers' code.
47 *
48 * We also double check the bits set in xfs_buf_item_log using a
49 * simple algorithm to check that every byte is accounted for.
50 */
51STATIC void
52xfs_buf_item_log_debug(
53 xfs_buf_log_item_t *bip,
54 uint first,
55 uint last)
56{
57 uint x;
58 uint byte;
59 uint nbytes;
60 uint chunk_num;
61 uint word_num;
62 uint bit_num;
63 uint bit_set;
64 uint *wordp;
65
66 ASSERT(bip->bli_logged != NULL);
67 byte = first;
68 nbytes = last - first + 1;
69 bfset(bip->bli_logged, first, nbytes);
70 for (x = 0; x < nbytes; x++) {
71 chunk_num = byte >> XFS_BLF_SHIFT;
72 word_num = chunk_num >> BIT_TO_WORD_SHIFT;
73 bit_num = chunk_num & (NBWORD - 1);
74 wordp = &(bip->__bli_format.blf_data_map[word_num]);
75 bit_set = *wordp & (1 << bit_num);
76 ASSERT(bit_set);
77 byte++;
78 }
79}
80
81/*
82 * This function is called when we flush something into a buffer without
83 * logging it. This happens for things like inodes which are logged
84 * separately from the buffer.
85 */
86void
87xfs_buf_item_flush_log_debug(
88 xfs_buf_t *bp,
89 uint first,
90 uint last)
91{
92 xfs_buf_log_item_t *bip = bp->b_fspriv;
93 uint nbytes;
94
95 if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
96 return;
97
98 ASSERT(bip->bli_logged != NULL);
99 nbytes = last - first + 1;
100 bfset(bip->bli_logged, first, nbytes);
101}
102
103/*
104 * This function is called to verify that our callers have logged
105 * all the bytes that they changed.
106 *
107 * It does this by comparing the original copy of the buffer stored in
108 * the buf log item's bli_orig array to the current copy of the buffer
109 * and ensuring that all bytes which mismatch are set in the bli_logged
110 * array of the buf log item.
111 */
112STATIC void
113xfs_buf_item_log_check(
114 xfs_buf_log_item_t *bip)
115{
116 char *orig;
117 char *buffer;
118 int x;
119 xfs_buf_t *bp;
120
121 ASSERT(bip->bli_orig != NULL);
122 ASSERT(bip->bli_logged != NULL);
123
124 bp = bip->bli_buf;
125 ASSERT(bp->b_length > 0);
126 ASSERT(bp->b_addr != NULL);
127 orig = bip->bli_orig;
128 buffer = bp->b_addr;
129 for (x = 0; x < BBTOB(bp->b_length); x++) {
130 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
131 xfs_emerg(bp->b_mount,
132 "%s: bip %x buffer %x orig %x index %d",
133 __func__, bip, bp, orig, x);
134 ASSERT(0);
135 }
136 }
137}
138#else
139#define xfs_buf_item_log_debug(x,y,z)
140#define xfs_buf_item_log_check(x)
141#endif
142
143STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); 40STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
144 41
145/* 42/*
@@ -429,7 +326,6 @@ xfs_buf_item_format(
429 * Check to make sure everything is consistent. 326 * Check to make sure everything is consistent.
430 */ 327 */
431 trace_xfs_buf_item_format(bip); 328 trace_xfs_buf_item_format(bip);
432 xfs_buf_item_log_check(bip);
433} 329}
434 330
435/* 331/*
@@ -573,8 +469,18 @@ xfs_buf_item_push(
573 469
574 if (xfs_buf_ispinned(bp)) 470 if (xfs_buf_ispinned(bp))
575 return XFS_ITEM_PINNED; 471 return XFS_ITEM_PINNED;
576 if (!xfs_buf_trylock(bp)) 472 if (!xfs_buf_trylock(bp)) {
473 /*
474 * If we have just raced with a buffer being pinned and it has
475 * been marked stale, we could end up stalling until someone else
476 * issues a log force to unpin the stale buffer. Check for the
477 * race condition here so xfsaild recognizes the buffer is pinned
478 * and queues a log force to move it along.
479 */
480 if (xfs_buf_ispinned(bp))
481 return XFS_ITEM_PINNED;
577 return XFS_ITEM_LOCKED; 482 return XFS_ITEM_LOCKED;
483 }
578 484
579 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 485 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
580 486
@@ -652,7 +558,10 @@ xfs_buf_item_unlock(
652 558
653 /* 559 /*
654 * If the buf item isn't tracking any data, free it, otherwise drop the 560 * If the buf item isn't tracking any data, free it, otherwise drop the
655 * reference we hold to it. 561 * reference we hold to it. If we are aborting the transaction, this may
562 * be the only reference to the buf item, so we free it anyway
563 * regardless of whether it is dirty or not. A dirty abort implies a
564 * shutdown, anyway.
656 */ 565 */
657 clean = 1; 566 clean = 1;
658 for (i = 0; i < bip->bli_format_count; i++) { 567 for (i = 0; i < bip->bli_format_count; i++) {
@@ -664,7 +573,12 @@ xfs_buf_item_unlock(
664 } 573 }
665 if (clean) 574 if (clean)
666 xfs_buf_item_relse(bp); 575 xfs_buf_item_relse(bp);
667 else 576 else if (aborted) {
577 if (atomic_dec_and_test(&bip->bli_refcount)) {
578 ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
579 xfs_buf_item_relse(bp);
580 }
581 } else
668 atomic_dec(&bip->bli_refcount); 582 atomic_dec(&bip->bli_refcount);
669 583
670 if (!hold) 584 if (!hold)
@@ -915,8 +829,6 @@ xfs_buf_item_log_segment(
915 mask = (1 << end_bit) - 1; 829 mask = (1 << end_bit) - 1;
916 *wordp |= mask; 830 *wordp |= mask;
917 } 831 }
918
919 xfs_buf_item_log_debug(bip, first, last);
920} 832}
921 833
922/* 834/*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 16def435944a..ee36c88ecfde 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -98,10 +98,6 @@ typedef struct xfs_buf_log_item {
98 unsigned int bli_flags; /* misc flags */ 98 unsigned int bli_flags; /* misc flags */
99 unsigned int bli_recur; /* lock recursion count */ 99 unsigned int bli_recur; /* lock recursion count */
100 atomic_t bli_refcount; /* cnt of tp refs */ 100 atomic_t bli_refcount; /* cnt of tp refs */
101#ifdef XFS_TRANS_DEBUG
102 char *bli_orig; /* original buffer copy */
103 char *bli_logged; /* bytes logged (bitmap) */
104#endif
105 int bli_format_count; /* count of headers */ 101 int bli_format_count; /* count of headers */
106 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 102 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
107 struct xfs_buf_log_format __bli_format; /* embedded in-log header */ 103 struct xfs_buf_log_format __bli_format; /* embedded in-log header */
@@ -117,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
117void xfs_buf_iodone_callbacks(struct xfs_buf *); 113void xfs_buf_iodone_callbacks(struct xfs_buf *);
118void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 114void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
119 115
120#ifdef XFS_TRANS_DEBUG
121void
122xfs_buf_item_flush_log_debug(
123 struct xfs_buf *bp,
124 uint first,
125 uint last);
126#else
127#define xfs_buf_item_flush_log_debug(bp, first, last)
128#endif
129
130#endif /* __KERNEL__ */ 116#endif /* __KERNEL__ */
131 117
132#endif /* __XFS_BUF_ITEM_H__ */ 118#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..a8bd26b82ecb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -246,10 +246,10 @@ xfs_swap_extents(
246 goto out_unlock; 246 goto out_unlock;
247 } 247 }
248 248
249 error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); 249 error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
250 if (error) 250 if (error)
251 goto out_unlock; 251 goto out_unlock;
252 truncate_pagecache_range(VFS_I(ip), 0, -1); 252 truncate_pagecache_range(VFS_I(tip), 0, -1);
253 253
254 /* Verify O_DIRECT for ftmp */ 254 /* Verify O_DIRECT for ftmp */
255 if (VN_CACHED(VFS_I(tip)) != 0) { 255 if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9e1bf5294c91..8025eb23ad72 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -612,15 +612,9 @@ xfs_qm_dqread(
612 if (flags & XFS_QMOPT_DQALLOC) { 612 if (flags & XFS_QMOPT_DQALLOC) {
613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), 614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
615 XFS_WRITE_LOG_RES(mp) + 615 XFS_QM_DQALLOC_LOG_RES(mp), 0,
616 /* 616 XFS_TRANS_PERM_LOG_RES,
617 * Round the chunklen up to the next multiple 617 XFS_WRITE_LOG_COUNT);
618 * of 128 (buf log item chunk size)).
619 */
620 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
621 0,
622 XFS_TRANS_PERM_LOG_RES,
623 XFS_WRITE_LOG_COUNT);
624 if (error) 618 if (error)
625 goto error1; 619 goto error1;
626 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 620 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 94eaeedc5498..2866b8c78b7a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -709,8 +709,8 @@ xfs_fs_log_dummy(
709 int error; 709 int error;
710 710
711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); 711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
712 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 712 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
713 XFS_DEFAULT_LOG_COUNT); 713 XFS_DEFAULT_LOG_COUNT);
714 if (error) { 714 if (error) {
715 xfs_trans_cancel(tp, 0); 715 xfs_trans_cancel(tp, 0);
716 return error; 716 return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a815412eab80..515bf71ce01c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc(
279 (args.agbno < be32_to_cpu(agi->agi_length)))) { 279 (args.agbno < be32_to_cpu(agi->agi_length)))) {
280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
281 args.type = XFS_ALLOCTYPE_THIS_BNO; 281 args.type = XFS_ALLOCTYPE_THIS_BNO;
282 args.mod = args.total = args.wasdel = args.isfl =
283 args.userdata = args.minalignslop = 0;
284 args.prod = 1; 282 args.prod = 1;
285 283
286 /* 284 /*
@@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc(
333 * Allocate a fixed-size extent of inodes. 331 * Allocate a fixed-size extent of inodes.
334 */ 332 */
335 args.type = XFS_ALLOCTYPE_NEAR_BNO; 333 args.type = XFS_ALLOCTYPE_NEAR_BNO;
336 args.mod = args.total = args.wasdel = args.isfl =
337 args.userdata = args.minalignslop = 0;
338 args.prod = 1; 334 args.prod = 1;
339 /* 335 /*
340 * Allow space for the inode btree to split. 336 * Allow space for the inode btree to split.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66282dcb821b..4f201656d2d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2379,9 +2379,6 @@ xfs_iflush_fork(
2379 char *cp; 2379 char *cp;
2380 xfs_ifork_t *ifp; 2380 xfs_ifork_t *ifp;
2381 xfs_mount_t *mp; 2381 xfs_mount_t *mp;
2382#ifdef XFS_TRANS_DEBUG
2383 int first;
2384#endif
2385 static const short brootflag[2] = 2382 static const short brootflag[2] =
2386 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2383 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
2387 static const short dataflag[2] = 2384 static const short dataflag[2] =
@@ -2724,9 +2721,6 @@ xfs_iflush_int(
2724 xfs_inode_log_item_t *iip; 2721 xfs_inode_log_item_t *iip;
2725 xfs_dinode_t *dip; 2722 xfs_dinode_t *dip;
2726 xfs_mount_t *mp; 2723 xfs_mount_t *mp;
2727#ifdef XFS_TRANS_DEBUG
2728 int first;
2729#endif
2730 2724
2731 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2732 ASSERT(xfs_isiflocked(ip)); 2726 ASSERT(xfs_isiflocked(ip));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 22baf6ea4fac..237e7f6f2ab3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip)
419static inline void xfs_ifunlock(struct xfs_inode *ip) 419static inline void xfs_ifunlock(struct xfs_inode *ip)
420{ 420{
421 xfs_iflags_clear(ip, XFS_IFLOCK); 421 xfs_iflags_clear(ip, XFS_IFLOCK);
422 smp_mb();
422 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); 423 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
423} 424}
424 425
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d041d47d9d86..f034bd1652f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -269,17 +269,6 @@ xfs_inode_item_format(
269 } else { 269 } else {
270 ASSERT(!(iip->ili_fields & 270 ASSERT(!(iip->ili_fields &
271 XFS_ILOG_DBROOT)); 271 XFS_ILOG_DBROOT));
272#ifdef XFS_TRANS_DEBUG
273 if (iip->ili_root_size > 0) {
274 ASSERT(iip->ili_root_size ==
275 ip->i_df.if_broot_bytes);
276 ASSERT(memcmp(iip->ili_orig_root,
277 ip->i_df.if_broot,
278 iip->ili_root_size) == 0);
279 } else {
280 ASSERT(ip->i_df.if_broot_bytes == 0);
281 }
282#endif
283 iip->ili_fields &= ~XFS_ILOG_DBROOT; 272 iip->ili_fields &= ~XFS_ILOG_DBROOT;
284 } 273 }
285 break; 274 break;
@@ -678,11 +667,6 @@ void
678xfs_inode_item_destroy( 667xfs_inode_item_destroy(
679 xfs_inode_t *ip) 668 xfs_inode_t *ip)
680{ 669{
681#ifdef XFS_TRANS_DEBUG
682 if (ip->i_itemp->ili_root_size != 0) {
683 kmem_free(ip->i_itemp->ili_orig_root);
684 }
685#endif
686 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 670 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
687} 671}
688 672
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 376d4d0b2635..779812fb3d80 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item {
148 data exts */ 148 data exts */
149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
150 attr exts */ 150 attr exts */
151#ifdef XFS_TRANS_DEBUG
152 int ili_root_size;
153 char *ili_orig_root;
154#endif
155 xfs_inode_log_format_t ili_format; /* logged structure */ 151 xfs_inode_log_format_t ili_format; /* logged structure */
156} xfs_inode_log_item_t; 152} xfs_inode_log_item_t;
157 153
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
311} 311}
312 312
313/* 313/*
314 * Determine the initial size of the preallocation. We are beyond the current
315 * EOF here, but we need to take into account whether this is a sparse write or
316 * an extending write when determining the preallocation size. Hence we need to
317 * look up the extent that ends at the current write offset and use the result
318 * to determine the preallocation size.
319 *
320 * If the extent is a hole, then preallocation is essentially disabled.
321 * Otherwise we take the size of the preceeding data extent as the basis for the
322 * preallocation size. If the size of the extent is greater than half the
323 * maximum extent length, then use the current offset as the basis. This ensures
324 * that for large files the preallocation size always extends to MAXEXTLEN
325 * rather than falling short due to things like stripe unit/width alignment of
326 * real extents.
327 */
328STATIC int
329xfs_iomap_eof_prealloc_initial_size(
330 struct xfs_mount *mp,
331 struct xfs_inode *ip,
332 xfs_off_t offset,
333 xfs_bmbt_irec_t *imap,
334 int nimaps)
335{
336 xfs_fileoff_t start_fsb;
337 int imaps = 1;
338 int error;
339
340 ASSERT(nimaps >= imaps);
341
342 /* if we are using a specific prealloc size, return now */
343 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
344 return 0;
345
346 /*
347 * As we write multiple pages, the offset will always align to the
348 * start of a page and hence point to a hole at EOF. i.e. if the size is
349 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
350 * will return FSB 1. Hence if there are blocks in the file, we want to
351 * point to the block prior to the EOF block and not the hole that maps
352 * directly at @offset.
353 */
354 start_fsb = XFS_B_TO_FSB(mp, offset);
355 if (start_fsb)
356 start_fsb--;
357 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
358 if (error)
359 return 0;
360
361 ASSERT(imaps == 1);
362 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount;
366 return XFS_B_TO_FSB(mp, offset);
367}
368
369/*
314 * If we don't have a user specified preallocation size, dynamically increase 370 * If we don't have a user specified preallocation size, dynamically increase
315 * the preallocation size as the size of the file grows. Cap the maximum size 371 * the preallocation size as the size of the file grows. Cap the maximum size
316 * at a single extent or less if the filesystem is near full. The closer the 372 * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
319STATIC xfs_fsblock_t 375STATIC xfs_fsblock_t
320xfs_iomap_prealloc_size( 376xfs_iomap_prealloc_size(
321 struct xfs_mount *mp, 377 struct xfs_mount *mp,
322 struct xfs_inode *ip) 378 struct xfs_inode *ip,
379 xfs_off_t offset,
380 struct xfs_bmbt_irec *imap,
381 int nimaps)
323{ 382{
324 xfs_fsblock_t alloc_blocks = 0; 383 xfs_fsblock_t alloc_blocks = 0;
325 384
326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps);
387 if (alloc_blocks > 0) {
327 int shift = 0; 388 int shift = 0;
328 int64_t freesp; 389 int64_t freesp;
329 390
330 /*
331 * rounddown_pow_of_two() returns an undefined result
332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
333 * ensure we always pass in a non-zero value.
334 */
335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
337 rounddown_pow_of_two(alloc_blocks)); 392 rounddown_pow_of_two(alloc_blocks));
338 393
@@ -351,6 +406,15 @@ xfs_iomap_prealloc_size(
351 } 406 }
352 if (shift) 407 if (shift)
353 alloc_blocks >>= shift; 408 alloc_blocks >>= shift;
409
410 /*
411 * If we are still trying to allocate more space than is
412 * available, squash the prealloc hard. This can happen if we
413 * have a large file on a small filesystem and the above
414 * lowspace thresholds are smaller than MAXEXTLEN.
415 */
416 while (alloc_blocks >= freesp)
417 alloc_blocks >>= 4;
354 } 418 }
355 419
356 if (alloc_blocks < mp->m_writeio_blocks) 420 if (alloc_blocks < mp->m_writeio_blocks)
@@ -390,7 +454,6 @@ xfs_iomap_write_delay(
390 extsz = xfs_get_extsz_hint(ip); 454 extsz = xfs_get_extsz_hint(ip);
391 offset_fsb = XFS_B_TO_FSBT(mp, offset); 455 offset_fsb = XFS_B_TO_FSBT(mp, offset);
392 456
393
394 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 457 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
395 imap, XFS_WRITE_IMAPS, &prealloc); 458 imap, XFS_WRITE_IMAPS, &prealloc);
396 if (error) 459 if (error)
@@ -398,7 +461,10 @@ xfs_iomap_write_delay(
398 461
399retry: 462retry:
400 if (prealloc) { 463 if (prealloc) {
401 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); 464 xfs_fsblock_t alloc_blocks;
465
466 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
467 XFS_WRITE_IMAPS);
402 468
403 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 469 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
404 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 470 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46bd9d52ab51..eec226f78a40 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -120,7 +120,7 @@ xlog_verify_iclog(
120 struct xlog *log, 120 struct xlog *log,
121 struct xlog_in_core *iclog, 121 struct xlog_in_core *iclog,
122 int count, 122 int count,
123 boolean_t syncing); 123 bool syncing);
124STATIC void 124STATIC void
125xlog_verify_tail_lsn( 125xlog_verify_tail_lsn(
126 struct xlog *log, 126 struct xlog *log,
@@ -1737,7 +1737,7 @@ xlog_sync(
1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1739 1739
1740 xlog_verify_iclog(log, iclog, count, B_TRUE); 1740 xlog_verify_iclog(log, iclog, count, true);
1741 1741
1742 /* account for log which doesn't start at block #0 */ 1742 /* account for log which doesn't start at block #0 */
1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -3611,7 +3611,7 @@ xlog_verify_iclog(
3611 struct xlog *log, 3611 struct xlog *log,
3612 struct xlog_in_core *iclog, 3612 struct xlog_in_core *iclog,
3613 int count, 3613 int count,
3614 boolean_t syncing) 3614 bool syncing)
3615{ 3615{
3616 xlog_op_header_t *ophead; 3616 xlog_op_header_t *ophead;
3617 xlog_in_core_t *icptr; 3617 xlog_in_core_t *icptr;
@@ -3659,7 +3659,7 @@ xlog_verify_iclog(
3659 /* clientid is only 1 byte */ 3659 /* clientid is only 1 byte */
3660 field_offset = (__psint_t) 3660 field_offset = (__psint_t)
3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); 3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
3662 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3662 if (!syncing || (field_offset & 0x1ff)) {
3663 clientid = ophead->oh_clientid; 3663 clientid = ophead->oh_clientid;
3664 } else { 3664 } else {
3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); 3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3682,7 +3682,7 @@ xlog_verify_iclog(
3682 /* check length */ 3682 /* check length */
3683 field_offset = (__psint_t) 3683 field_offset = (__psint_t)
3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); 3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
3685 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3685 if (!syncing || (field_offset & 0x1ff)) {
3686 op_len = be32_to_cpu(ophead->oh_len); 3686 op_len = be32_to_cpu(ophead->oh_len);
3687 } else { 3687 } else {
3688 idx = BTOBBT((__psint_t)&ophead->oh_len - 3688 idx = BTOBBT((__psint_t)&ophead->oh_len -
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..3806088a8f77 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
658 return; 658 return;
659 } 659 }
660 /* quietly fail */ 660 /* quietly fail */
661 xfs_buf_ioerror(bp, EFSCORRUPTED); 661 xfs_buf_ioerror(bp, EWRONGFS);
662} 662}
663 663
664static void 664static void
@@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags(
1109 return 0; 1109 return 0;
1110 1110
1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1112 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1112 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1113 XFS_DEFAULT_LOG_COUNT); 1113 0, 0, XFS_DEFAULT_LOG_COUNT);
1114 if (error) { 1114 if (error) {
1115 xfs_trans_cancel(tp, 0); 1115 xfs_trans_cancel(tp, 0);
1116 xfs_alert(mp, "%s: Superblock update failed!", __func__); 1116 xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
1583 return 0; 1583 return 0;
1584 1584
1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); 1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1586 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1586 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1587 XFS_DEFAULT_LOG_COUNT); 1587 XFS_DEFAULT_LOG_COUNT);
1588 if (error) { 1588 if (error) {
1589 xfs_trans_cancel(tp, 0); 1589 xfs_trans_cancel(tp, 0);
1590 return error; 1590 return error;
@@ -1945,8 +1945,8 @@ xfs_mount_log_sb(
1945 XFS_SB_VERSIONNUM)); 1945 XFS_SB_VERSIONNUM));
1946 1946
1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1948 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1948 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1949 XFS_DEFAULT_LOG_COUNT); 1949 XFS_DEFAULT_LOG_COUNT);
1950 if (error) { 1950 if (error) {
1951 xfs_trans_cancel(tp, 0); 1951 xfs_trans_cancel(tp, 0);
1952 return error; 1952 return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bab8314507e4..bc907061d392 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -34,12 +34,19 @@ typedef struct xfs_trans_reservations {
34 uint tr_addafork; /* cvt inode to attributed trans */ 34 uint tr_addafork; /* cvt inode to attributed trans */
35 uint tr_writeid; /* write setuid/setgid file */ 35 uint tr_writeid; /* write setuid/setgid file */
36 uint tr_attrinval; /* attr fork buffer invalidation */ 36 uint tr_attrinval; /* attr fork buffer invalidation */
37 uint tr_attrset; /* set/create an attribute */ 37 uint tr_attrsetm; /* set/create an attribute at mount time */
38 uint tr_attrsetrt; /* set/create an attribute at runtime */
38 uint tr_attrrm; /* remove an attribute */ 39 uint tr_attrrm; /* remove an attribute */
39 uint tr_clearagi; /* clear bad agi unlinked ino bucket */ 40 uint tr_clearagi; /* clear bad agi unlinked ino bucket */
40 uint tr_growrtalloc; /* grow realtime allocations */ 41 uint tr_growrtalloc; /* grow realtime allocations */
41 uint tr_growrtzero; /* grow realtime zeroing */ 42 uint tr_growrtzero; /* grow realtime zeroing */
42 uint tr_growrtfree; /* grow realtime freeing */ 43 uint tr_growrtfree; /* grow realtime freeing */
44 uint tr_qm_sbchange; /* change quota flags */
45 uint tr_qm_setqlim; /* adjust quota limits */
46 uint tr_qm_dqalloc; /* allocate quota on disk */
47 uint tr_qm_quotaoff; /* turn quota off */
48 uint tr_qm_equotaoff;/* end of turn quota off */
49 uint tr_sb; /* modify superblock */
43} xfs_trans_reservations_t; 50} xfs_trans_reservations_t;
44 51
45#ifndef __KERNEL__ 52#ifndef __KERNEL__
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 60eff4763156..e5b5cf973781 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes(
1584 int error; 1584 int error;
1585 1585
1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1587 if ((error = xfs_trans_reserve(tp, 0, 1587 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1588 mp->m_sb.sb_sectsize + 128, 0, 1588 0, 0, XFS_DEFAULT_LOG_COUNT);
1589 0, 1589 if (error) {
1590 XFS_DEFAULT_LOG_COUNT))) {
1591 xfs_trans_cancel(tp, 0); 1590 xfs_trans_cancel(tp, 0);
1592 return error; 1591 return error;
1593 } 1592 }
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 6b39115bf145..2d02eac1c9a8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -146,7 +146,7 @@ xfs_qm_newmount(
146 * inode goes inactive and wants to free blocks, 146 * inode goes inactive and wants to free blocks,
147 * or via xfs_log_mount_finish. 147 * or via xfs_log_mount_finish.
148 */ 148 */
149 *needquotamount = B_TRUE; 149 *needquotamount = true;
150 *quotaflags = mp->m_qflags; 150 *quotaflags = mp->m_qflags;
151 mp->m_qflags = 0; 151 mp->m_qflags = 0;
152 } 152 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 8a59f8546552..cf9a34051e07 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -408,10 +408,10 @@ xfs_qm_scall_getqstat(
408{ 408{
409 struct xfs_quotainfo *q = mp->m_quotainfo; 409 struct xfs_quotainfo *q = mp->m_quotainfo;
410 struct xfs_inode *uip, *gip; 410 struct xfs_inode *uip, *gip;
411 boolean_t tempuqip, tempgqip; 411 bool tempuqip, tempgqip;
412 412
413 uip = gip = NULL; 413 uip = gip = NULL;
414 tempuqip = tempgqip = B_FALSE; 414 tempuqip = tempgqip = false;
415 memset(out, 0, sizeof(fs_quota_stat_t)); 415 memset(out, 0, sizeof(fs_quota_stat_t));
416 416
417 out->qs_version = FS_QSTAT_VERSION; 417 out->qs_version = FS_QSTAT_VERSION;
@@ -434,12 +434,12 @@ xfs_qm_scall_getqstat(
434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
436 0, 0, &uip) == 0) 436 0, 0, &uip) == 0)
437 tempuqip = B_TRUE; 437 tempuqip = true;
438 } 438 }
439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
441 0, 0, &gip) == 0) 441 0, 0, &gip) == 0)
442 tempgqip = B_TRUE; 442 tempgqip = true;
443 } 443 }
444 if (uip) { 444 if (uip) {
445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; 445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
@@ -490,8 +490,9 @@ xfs_qm_scall_setqlim(
490 return 0; 490 return 0;
491 491
492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
493 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 493 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
494 0, 0, XFS_DEFAULT_LOG_COUNT))) { 494 0, 0, XFS_DEFAULT_LOG_COUNT);
495 if (error) {
495 xfs_trans_cancel(tp, 0); 496 xfs_trans_cancel(tp, 0);
496 return (error); 497 return (error);
497 } 498 }
@@ -638,8 +639,9 @@ xfs_qm_log_quotaoff_end(
638 639
639 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); 640 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
640 641
641 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, 642 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
642 0, 0, XFS_DEFAULT_LOG_COUNT))) { 643 0, 0, XFS_DEFAULT_LOG_COUNT);
644 if (error) {
643 xfs_trans_cancel(tp, 0); 645 xfs_trans_cancel(tp, 0);
644 return (error); 646 return (error);
645 } 647 }
@@ -671,14 +673,10 @@ xfs_qm_log_quotaoff(
671 uint oldsbqflag=0; 673 uint oldsbqflag=0;
672 674
673 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); 675 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
674 if ((error = xfs_trans_reserve(tp, 0, 676 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
675 sizeof(xfs_qoff_logitem_t) * 2 + 677 0, 0, XFS_DEFAULT_LOG_COUNT);
676 mp->m_sb.sb_sectsize + 128, 678 if (error)
677 0,
678 0,
679 XFS_DEFAULT_LOG_COUNT))) {
680 goto error0; 679 goto error0;
681 }
682 680
683 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); 681 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
684 xfs_trans_log_quotaoff_item(tp, qoffi); 682 xfs_trans_log_quotaoff_item(tp, qoffi);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ab8839b26272..c407121873b4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -139,9 +139,9 @@ static const match_table_t tokens = {
139 139
140 140
141STATIC unsigned long 141STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 142suffix_kstrtoint(char *s, unsigned int base, int *res)
143{ 143{
144 int last, shift_left_factor = 0; 144 int last, shift_left_factor = 0, _res;
145 char *value = s; 145 char *value = s;
146 146
147 last = strlen(value) - 1; 147 last = strlen(value) - 1;
@@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
158 value[last] = '\0'; 158 value[last] = '\0';
159 } 159 }
160 160
161 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 161 if (kstrtoint(s, base, &_res))
162 return -EINVAL;
163 *res = _res << shift_left_factor;
164 return 0;
162} 165}
163 166
164/* 167/*
@@ -174,7 +177,7 @@ xfs_parseargs(
174 char *options) 177 char *options)
175{ 178{
176 struct super_block *sb = mp->m_super; 179 struct super_block *sb = mp->m_super;
177 char *this_char, *value, *eov; 180 char *this_char, *value;
178 int dsunit = 0; 181 int dsunit = 0;
179 int dswidth = 0; 182 int dswidth = 0;
180 int iosize = 0; 183 int iosize = 0;
@@ -230,14 +233,16 @@ xfs_parseargs(
230 this_char); 233 this_char);
231 return EINVAL; 234 return EINVAL;
232 } 235 }
233 mp->m_logbufs = simple_strtoul(value, &eov, 10); 236 if (kstrtoint(value, 10, &mp->m_logbufs))
237 return EINVAL;
234 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 238 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
235 if (!value || !*value) { 239 if (!value || !*value) {
236 xfs_warn(mp, "%s option requires an argument", 240 xfs_warn(mp, "%s option requires an argument",
237 this_char); 241 this_char);
238 return EINVAL; 242 return EINVAL;
239 } 243 }
240 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 244 if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
245 return EINVAL;
241 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 246 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
242 if (!value || !*value) { 247 if (!value || !*value) {
243 xfs_warn(mp, "%s option requires an argument", 248 xfs_warn(mp, "%s option requires an argument",
@@ -266,7 +271,8 @@ xfs_parseargs(
266 this_char); 271 this_char);
267 return EINVAL; 272 return EINVAL;
268 } 273 }
269 iosize = simple_strtoul(value, &eov, 10); 274 if (kstrtoint(value, 10, &iosize))
275 return EINVAL;
270 iosizelog = ffs(iosize) - 1; 276 iosizelog = ffs(iosize) - 1;
271 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 277 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
272 if (!value || !*value) { 278 if (!value || !*value) {
@@ -274,7 +280,8 @@ xfs_parseargs(
274 this_char); 280 this_char);
275 return EINVAL; 281 return EINVAL;
276 } 282 }
277 iosize = suffix_strtoul(value, &eov, 10); 283 if (suffix_kstrtoint(value, 10, &iosize))
284 return EINVAL;
278 iosizelog = ffs(iosize) - 1; 285 iosizelog = ffs(iosize) - 1;
279 } else if (!strcmp(this_char, MNTOPT_GRPID) || 286 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
280 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 287 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -296,14 +303,16 @@ xfs_parseargs(
296 this_char); 303 this_char);
297 return EINVAL; 304 return EINVAL;
298 } 305 }
299 dsunit = simple_strtoul(value, &eov, 10); 306 if (kstrtoint(value, 10, &dsunit))
307 return EINVAL;
300 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 308 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
301 if (!value || !*value) { 309 if (!value || !*value) {
302 xfs_warn(mp, "%s option requires an argument", 310 xfs_warn(mp, "%s option requires an argument",
303 this_char); 311 this_char);
304 return EINVAL; 312 return EINVAL;
305 } 313 }
306 dswidth = simple_strtoul(value, &eov, 10); 314 if (kstrtoint(value, 10, &dswidth))
315 return EINVAL;
307 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { 316 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
308 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 317 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
309 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 318 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
341DEFINE_BUF_EVENT(xfs_buf_item_iodone); 341DEFINE_BUF_EVENT(xfs_buf_item_iodone);
342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); 342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
343DEFINE_BUF_EVENT(xfs_buf_error_relse); 343DEFINE_BUF_EVENT(xfs_buf_error_relse);
344DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
344DEFINE_BUF_EVENT(xfs_trans_read_buf_io); 345DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
345DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); 346DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
346 347
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06ed520a767f..2fd7c1ff1d21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -37,14 +37,45 @@
37#include "xfs_extent_busy.h" 37#include "xfs_extent_busy.h"
38#include "xfs_bmap.h" 38#include "xfs_bmap.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_qm.h"
40#include "xfs_trans_priv.h" 41#include "xfs_trans_priv.h"
41#include "xfs_trans_space.h" 42#include "xfs_trans_space.h"
42#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_log_priv.h"
45#include "xfs_buf_item.h"
43#include "xfs_trace.h" 46#include "xfs_trace.h"
44 47
45kmem_zone_t *xfs_trans_zone; 48kmem_zone_t *xfs_trans_zone;
46kmem_zone_t *xfs_log_item_desc_zone; 49kmem_zone_t *xfs_log_item_desc_zone;
47 50
51/*
52 * A buffer has a format structure overhead in the log in addition
53 * to the data, so we need to take this into account when reserving
54 * space in a transaction for a buffer. Round the space required up
55 * to a multiple of 128 bytes so that we don't change the historical
56 * reservation that has been used for this overhead.
57 */
58STATIC uint
59xfs_buf_log_overhead(void)
60{
61 return round_up(sizeof(struct xlog_op_header) +
62 sizeof(struct xfs_buf_log_format), 128);
63}
64
65/*
66 * Calculate out transaction log reservation per item in bytes.
67 *
68 * The nbufs argument is used to indicate the number of items that
69 * will be changed in a transaction. size is used to tell how many
70 * bytes should be reserved per item.
71 */
72STATIC uint
73xfs_calc_buf_res(
74 uint nbufs,
75 uint size)
76{
77 return nbufs * (size + xfs_buf_log_overhead());
78}
48 79
49/* 80/*
50 * Various log reservation values. 81 * Various log reservation values.
@@ -85,18 +116,15 @@ xfs_calc_write_reservation(
85 struct xfs_mount *mp) 116 struct xfs_mount *mp)
86{ 117{
87 return XFS_DQUOT_LOGRES(mp) + 118 return XFS_DQUOT_LOGRES(mp) +
88 MAX((mp->m_sb.sb_inodesize + 119 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
89 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
90 2 * mp->m_sb.sb_sectsize + 121 XFS_FSB_TO_B(mp, 1)) +
91 mp->m_sb.sb_sectsize + 122 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
92 XFS_ALLOCFREE_LOG_RES(mp, 2) + 123 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
93 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 124 XFS_FSB_TO_B(mp, 1))),
94 XFS_ALLOCFREE_LOG_COUNT(mp, 2))), 125 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
95 (2 * mp->m_sb.sb_sectsize + 126 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
96 2 * mp->m_sb.sb_sectsize + 127 XFS_FSB_TO_B(mp, 1))));
97 mp->m_sb.sb_sectsize +
98 XFS_ALLOCFREE_LOG_RES(mp, 2) +
99 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
100} 128}
101 129
102/* 130/*
@@ -117,18 +145,17 @@ xfs_calc_itruncate_reservation(
117 struct xfs_mount *mp) 145 struct xfs_mount *mp)
118{ 146{
119 return XFS_DQUOT_LOGRES(mp) + 147 return XFS_DQUOT_LOGRES(mp) +
120 MAX((mp->m_sb.sb_inodesize + 148 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
121 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + 149 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
122 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 150 XFS_FSB_TO_B(mp, 1))),
123 (4 * mp->m_sb.sb_sectsize + 151 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
124 4 * mp->m_sb.sb_sectsize + 152 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
125 mp->m_sb.sb_sectsize + 153 XFS_FSB_TO_B(mp, 1)) +
126 XFS_ALLOCFREE_LOG_RES(mp, 4) + 154 xfs_calc_buf_res(5, 0) +
127 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + 155 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
128 128 * 5 + 156 XFS_FSB_TO_B(mp, 1)) +
129 XFS_ALLOCFREE_LOG_RES(mp, 1) + 157 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
130 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 158 mp->m_in_maxlevels, 0)));
131 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
132} 159}
133 160
134/* 161/*
@@ -148,14 +175,12 @@ xfs_calc_rename_reservation(
148 struct xfs_mount *mp) 175 struct xfs_mount *mp)
149{ 176{
150 return XFS_DQUOT_LOGRES(mp) + 177 return XFS_DQUOT_LOGRES(mp) +
151 MAX((4 * mp->m_sb.sb_inodesize + 178 MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
152 2 * XFS_DIROP_LOG_RES(mp) + 179 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
153 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), 180 XFS_FSB_TO_B(mp, 1))),
154 (3 * mp->m_sb.sb_sectsize + 181 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
155 3 * mp->m_sb.sb_sectsize + 182 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
156 mp->m_sb.sb_sectsize + 183 XFS_FSB_TO_B(mp, 1))));
157 XFS_ALLOCFREE_LOG_RES(mp, 3) +
158 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
159} 184}
160 185
161/* 186/*
@@ -175,15 +200,12 @@ xfs_calc_link_reservation(
175 struct xfs_mount *mp) 200 struct xfs_mount *mp)
176{ 201{
177 return XFS_DQUOT_LOGRES(mp) + 202 return XFS_DQUOT_LOGRES(mp) +
178 MAX((mp->m_sb.sb_inodesize + 203 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
179 mp->m_sb.sb_inodesize + 204 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
180 XFS_DIROP_LOG_RES(mp) + 205 XFS_FSB_TO_B(mp, 1))),
181 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 206 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
182 (mp->m_sb.sb_sectsize + 207 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
183 mp->m_sb.sb_sectsize + 208 XFS_FSB_TO_B(mp, 1))));
184 mp->m_sb.sb_sectsize +
185 XFS_ALLOCFREE_LOG_RES(mp, 1) +
186 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
187} 209}
188 210
189/* 211/*
@@ -203,15 +225,12 @@ xfs_calc_remove_reservation(
203 struct xfs_mount *mp) 225 struct xfs_mount *mp)
204{ 226{
205 return XFS_DQUOT_LOGRES(mp) + 227 return XFS_DQUOT_LOGRES(mp) +
206 MAX((mp->m_sb.sb_inodesize + 228 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
207 mp->m_sb.sb_inodesize + 229 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
208 XFS_DIROP_LOG_RES(mp) + 230 XFS_FSB_TO_B(mp, 1))),
209 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 231 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
210 (2 * mp->m_sb.sb_sectsize + 232 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
211 2 * mp->m_sb.sb_sectsize + 233 XFS_FSB_TO_B(mp, 1))));
212 mp->m_sb.sb_sectsize +
213 XFS_ALLOCFREE_LOG_RES(mp, 2) +
214 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
215} 234}
216 235
217/* 236/*
@@ -233,18 +252,18 @@ xfs_calc_symlink_reservation(
233 struct xfs_mount *mp) 252 struct xfs_mount *mp)
234{ 253{
235 return XFS_DQUOT_LOGRES(mp) + 254 return XFS_DQUOT_LOGRES(mp) +
236 MAX((mp->m_sb.sb_inodesize + 255 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
237 mp->m_sb.sb_inodesize + 256 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
238 XFS_FSB_TO_B(mp, 1) + 257 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
239 XFS_DIROP_LOG_RES(mp) + 258 XFS_FSB_TO_B(mp, 1)) +
240 1024 + 259 xfs_calc_buf_res(1, 1024)),
241 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), 260 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
242 (2 * mp->m_sb.sb_sectsize + 261 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
243 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 262 XFS_FSB_TO_B(mp, 1)) +
244 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 263 xfs_calc_buf_res(mp->m_in_maxlevels,
245 XFS_ALLOCFREE_LOG_RES(mp, 1) + 264 XFS_FSB_TO_B(mp, 1)) +
246 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 265 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
247 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 266 XFS_FSB_TO_B(mp, 1))));
248} 267}
249 268
250/* 269/*
@@ -267,18 +286,19 @@ xfs_calc_create_reservation(
267 struct xfs_mount *mp) 286 struct xfs_mount *mp)
268{ 287{
269 return XFS_DQUOT_LOGRES(mp) + 288 return XFS_DQUOT_LOGRES(mp) +
270 MAX((mp->m_sb.sb_inodesize + 289 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
271 mp->m_sb.sb_inodesize + 290 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
291 (uint)XFS_FSB_TO_B(mp, 1) +
292 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
293 XFS_FSB_TO_B(mp, 1))),
294 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
272 mp->m_sb.sb_sectsize + 295 mp->m_sb.sb_sectsize +
273 XFS_FSB_TO_B(mp, 1) + 296 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
274 XFS_DIROP_LOG_RES(mp) + 297 XFS_FSB_TO_B(mp, 1)) +
275 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), 298 xfs_calc_buf_res(mp->m_in_maxlevels,
276 (3 * mp->m_sb.sb_sectsize + 299 XFS_FSB_TO_B(mp, 1)) +
277 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 300 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
278 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 301 XFS_FSB_TO_B(mp, 1))));
279 XFS_ALLOCFREE_LOG_RES(mp, 1) +
280 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
281 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
282} 302}
283 303
284/* 304/*
@@ -306,16 +326,16 @@ xfs_calc_ifree_reservation(
306 struct xfs_mount *mp) 326 struct xfs_mount *mp)
307{ 327{
308 return XFS_DQUOT_LOGRES(mp) + 328 return XFS_DQUOT_LOGRES(mp) +
309 mp->m_sb.sb_inodesize + 329 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
310 mp->m_sb.sb_sectsize + 330 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
311 mp->m_sb.sb_sectsize + 331 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
312 XFS_FSB_TO_B(mp, 1) +
313 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), 332 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
314 XFS_INODE_CLUSTER_SIZE(mp)) + 333 XFS_INODE_CLUSTER_SIZE(mp)) +
315 128 * 5 + 334 xfs_calc_buf_res(1, 0) +
316 XFS_ALLOCFREE_LOG_RES(mp, 1) + 335 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
317 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 336 mp->m_in_maxlevels, 0) +
318 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 337 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
338 XFS_FSB_TO_B(mp, 1));
319} 339}
320 340
321/* 341/*
@@ -343,9 +363,9 @@ STATIC uint
343xfs_calc_growdata_reservation( 363xfs_calc_growdata_reservation(
344 struct xfs_mount *mp) 364 struct xfs_mount *mp)
345{ 365{
346 return mp->m_sb.sb_sectsize * 3 + 366 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
347 XFS_ALLOCFREE_LOG_RES(mp, 1) + 367 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
348 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 368 XFS_FSB_TO_B(mp, 1));
349} 369}
350 370
351/* 371/*
@@ -362,12 +382,12 @@ STATIC uint
362xfs_calc_growrtalloc_reservation( 382xfs_calc_growrtalloc_reservation(
363 struct xfs_mount *mp) 383 struct xfs_mount *mp)
364{ 384{
365 return 2 * mp->m_sb.sb_sectsize + 385 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
366 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 386 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
367 mp->m_sb.sb_inodesize + 387 XFS_FSB_TO_B(mp, 1)) +
368 XFS_ALLOCFREE_LOG_RES(mp, 1) + 388 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
369 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 389 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
370 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 390 XFS_FSB_TO_B(mp, 1));
371} 391}
372 392
373/* 393/*
@@ -379,7 +399,7 @@ STATIC uint
379xfs_calc_growrtzero_reservation( 399xfs_calc_growrtzero_reservation(
380 struct xfs_mount *mp) 400 struct xfs_mount *mp)
381{ 401{
382 return mp->m_sb.sb_blocksize + 128; 402 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
383} 403}
384 404
385/* 405/*
@@ -396,11 +416,10 @@ STATIC uint
396xfs_calc_growrtfree_reservation( 416xfs_calc_growrtfree_reservation(
397 struct xfs_mount *mp) 417 struct xfs_mount *mp)
398{ 418{
399 return mp->m_sb.sb_sectsize + 419 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
400 2 * mp->m_sb.sb_inodesize + 420 xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
401 mp->m_sb.sb_blocksize + 421 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
402 mp->m_rsumsize + 422 xfs_calc_buf_res(1, mp->m_rsumsize);
403 128 * 5;
404} 423}
405 424
406/* 425/*
@@ -411,7 +430,7 @@ STATIC uint
411xfs_calc_swrite_reservation( 430xfs_calc_swrite_reservation(
412 struct xfs_mount *mp) 431 struct xfs_mount *mp)
413{ 432{
414 return mp->m_sb.sb_inodesize + 128; 433 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
415} 434}
416 435
417/* 436/*
@@ -421,7 +440,7 @@ xfs_calc_swrite_reservation(
421STATIC uint 440STATIC uint
422xfs_calc_writeid_reservation(xfs_mount_t *mp) 441xfs_calc_writeid_reservation(xfs_mount_t *mp)
423{ 442{
424 return mp->m_sb.sb_inodesize + 128; 443 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
425} 444}
426 445
427/* 446/*
@@ -437,13 +456,13 @@ xfs_calc_addafork_reservation(
437 struct xfs_mount *mp) 456 struct xfs_mount *mp)
438{ 457{
439 return XFS_DQUOT_LOGRES(mp) + 458 return XFS_DQUOT_LOGRES(mp) +
440 mp->m_sb.sb_inodesize + 459 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
441 mp->m_sb.sb_sectsize * 2 + 460 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
442 mp->m_dirblksize + 461 xfs_calc_buf_res(1, mp->m_dirblksize) +
443 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + 462 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
444 XFS_ALLOCFREE_LOG_RES(mp, 1) + 463 XFS_FSB_TO_B(mp, 1)) +
445 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + 464 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
446 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 465 XFS_FSB_TO_B(mp, 1));
447} 466}
448 467
449/* 468/*
@@ -461,35 +480,51 @@ STATIC uint
461xfs_calc_attrinval_reservation( 480xfs_calc_attrinval_reservation(
462 struct xfs_mount *mp) 481 struct xfs_mount *mp)
463{ 482{
464 return MAX((mp->m_sb.sb_inodesize + 483 return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
465 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 484 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
466 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), 485 XFS_FSB_TO_B(mp, 1))),
467 (4 * mp->m_sb.sb_sectsize + 486 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
468 4 * mp->m_sb.sb_sectsize + 487 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
469 mp->m_sb.sb_sectsize + 488 XFS_FSB_TO_B(mp, 1))));
470 XFS_ALLOCFREE_LOG_RES(mp, 4) +
471 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
472} 489}
473 490
474/* 491/*
475 * Setting an attribute. 492 * Setting an attribute at mount time.
476 * the inode getting the attribute 493 * the inode getting the attribute
477 * the superblock for allocations 494 * the superblock for allocations
478 * the agfs extents are allocated from 495 * the agfs extents are allocated from
479 * the attribute btree * max depth 496 * the attribute btree * max depth
480 * the inode allocation btree 497 * the inode allocation btree
481 * Since attribute transaction space is dependent on the size of the attribute, 498 * Since attribute transaction space is dependent on the size of the attribute,
482 * the calculation is done partially at mount time and partially at runtime. 499 * the calculation is done partially at mount time and partially at runtime(see
500 * below).
483 */ 501 */
484STATIC uint 502STATIC uint
485xfs_calc_attrset_reservation( 503xfs_calc_attrsetm_reservation(
486 struct xfs_mount *mp) 504 struct xfs_mount *mp)
487{ 505{
488 return XFS_DQUOT_LOGRES(mp) + 506 return XFS_DQUOT_LOGRES(mp) +
489 mp->m_sb.sb_inodesize + 507 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
490 mp->m_sb.sb_sectsize + 508 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
491 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 509 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
492 128 * (2 + XFS_DA_NODE_MAXDEPTH); 510}
511
512/*
513 * Setting an attribute at runtime, transaction space unit per block.
514 * the superblock for allocations: sector size
515 * the inode bmap btree could join or split: max depth * block size
516 * Since the runtime attribute transaction space is dependent on the total
517 * blocks needed for the 1st bmap, here we calculate out the space unit for
518 * one block so that the caller could figure out the total space according
519 * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
520 */
521STATIC uint
522xfs_calc_attrsetrt_reservation(
523 struct xfs_mount *mp)
524{
525 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
526 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
527 XFS_FSB_TO_B(mp, 1));
493} 528}
494 529
495/* 530/*
@@ -508,16 +543,15 @@ xfs_calc_attrrm_reservation(
508 struct xfs_mount *mp) 543 struct xfs_mount *mp)
509{ 544{
510 return XFS_DQUOT_LOGRES(mp) + 545 return XFS_DQUOT_LOGRES(mp) +
511 MAX((mp->m_sb.sb_inodesize + 546 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
512 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 547 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
513 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 548 XFS_FSB_TO_B(mp, 1)) +
514 128 * (1 + XFS_DA_NODE_MAXDEPTH + 549 (uint)XFS_FSB_TO_B(mp,
515 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 550 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
516 (2 * mp->m_sb.sb_sectsize + 551 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
517 2 * mp->m_sb.sb_sectsize + 552 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
518 mp->m_sb.sb_sectsize + 553 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
519 XFS_ALLOCFREE_LOG_RES(mp, 2) + 554 XFS_FSB_TO_B(mp, 1))));
520 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
521} 555}
522 556
523/* 557/*
@@ -527,7 +561,78 @@ STATIC uint
527xfs_calc_clear_agi_bucket_reservation( 561xfs_calc_clear_agi_bucket_reservation(
528 struct xfs_mount *mp) 562 struct xfs_mount *mp)
529{ 563{
530 return mp->m_sb.sb_sectsize + 128; 564 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
565}
566
567/*
568 * Clearing the quotaflags in the superblock.
569 * the super block for changing quota flags: sector size
570 */
571STATIC uint
572xfs_calc_qm_sbchange_reservation(
573 struct xfs_mount *mp)
574{
575 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
576}
577
578/*
579 * Adjusting quota limits.
580 * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
581 */
582STATIC uint
583xfs_calc_qm_setqlim_reservation(
584 struct xfs_mount *mp)
585{
586 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
587}
588
589/*
590 * Allocating quota on disk if needed.
591 * the write transaction log space: XFS_WRITE_LOG_RES(mp)
592 * the unit of quota allocation: one system block size
593 */
594STATIC uint
595xfs_calc_qm_dqalloc_reservation(
596 struct xfs_mount *mp)
597{
598 return XFS_WRITE_LOG_RES(mp) +
599 xfs_calc_buf_res(1,
600 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
601}
602
603/*
604 * Turning off quotas.
605 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
606 * the superblock for the quota flags: sector size
607 */
608STATIC uint
609xfs_calc_qm_quotaoff_reservation(
610 struct xfs_mount *mp)
611{
612 return sizeof(struct xfs_qoff_logitem) * 2 +
613 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
614}
615
616/*
617 * End of turning off quotas.
618 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
619 */
620STATIC uint
621xfs_calc_qm_quotaoff_end_reservation(
622 struct xfs_mount *mp)
623{
624 return sizeof(struct xfs_qoff_logitem) * 2;
625}
626
627/*
628 * Syncing the incore super block changes to disk.
629 * the super block to reflect the changes: sector size
630 */
631STATIC uint
632xfs_calc_sb_reservation(
633 struct xfs_mount *mp)
634{
635 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
531} 636}
532 637
533/* 638/*
@@ -555,12 +660,19 @@ xfs_trans_init(
555 resp->tr_writeid = xfs_calc_writeid_reservation(mp); 660 resp->tr_writeid = xfs_calc_writeid_reservation(mp);
556 resp->tr_addafork = xfs_calc_addafork_reservation(mp); 661 resp->tr_addafork = xfs_calc_addafork_reservation(mp);
557 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); 662 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
558 resp->tr_attrset = xfs_calc_attrset_reservation(mp); 663 resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
664 resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
559 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); 665 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
560 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); 666 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
561 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); 667 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
562 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); 668 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
563 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); 669 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
670 resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
671 resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
672 resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
673 resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
674 resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
675 resp->tr_sb = xfs_calc_sb_reservation(mp);
564} 676}
565 677
566/* 678/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6c0601abd7a..cd29f6171021 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -252,17 +252,19 @@ struct xfs_log_item_desc {
252 * as long as SWRITE logs the entire inode core 252 * as long as SWRITE logs the entire inode core
253 */ 253 */
254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
258#define XFS_ATTRSET_LOG_RES(mp, ext) \ 258#define XFS_ATTRSETM_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetm)
259 ((mp)->m_reservations.tr_attrset + \ 259#define XFS_ATTRSETRT_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetrt)
260 (ext * (mp)->m_sb.sb_sectsize) + \ 260#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
261 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
262 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
263#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
264#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 261#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
265 262#define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange)
263#define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim)
264#define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc)
265#define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff)
266#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff)
267#define XFS_SB_LOG_RES(mp) ((mp)->m_reservations.tr_sb)
266 268
267/* 269/*
268 * Various log count values. 270 * Various log count values.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6011ee661339..0eda7254305f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -55,20 +55,6 @@ xfs_ail_check(
55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
56 56
57 57
58#ifdef XFS_TRANS_DEBUG
59 /*
60 * Walk the list checking lsn ordering, and that every entry has the
61 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
62 * when specifically debugging the transaction subsystem.
63 */
64 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
65 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
66 if (&prev_lip->li_ail != &ailp->xa_ail)
67 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
68 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
69 prev_lip = lip;
70 }
71#endif /* XFS_TRANS_DEBUG */
72} 58}
73#else /* !DEBUG */ 59#else /* !DEBUG */
74#define xfs_ail_check(a,l) 60#define xfs_ail_check(a,l)
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 0c7fa54f309e..642c2d6e1db1 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots(
516 int i, j; 516 int i, j;
517 xfs_dquot_t *dqp; 517 xfs_dquot_t *dqp;
518 xfs_dqtrx_t *qtrx, *qa; 518 xfs_dqtrx_t *qtrx, *qa;
519 boolean_t locked; 519 bool locked;
520 520
521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) 521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
522 return; 522 return;
@@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots(
537 * about the number of blocks used field, or deltas. 537 * about the number of blocks used field, or deltas.
538 * Also we don't bother to zero the fields. 538 * Also we don't bother to zero the fields.
539 */ 539 */
540 locked = B_FALSE; 540 locked = false;
541 if (qtrx->qt_blk_res) { 541 if (qtrx->qt_blk_res) {
542 xfs_dqlock(dqp); 542 xfs_dqlock(dqp);
543 locked = B_TRUE; 543 locked = true;
544 dqp->q_res_bcount -= 544 dqp->q_res_bcount -=
545 (xfs_qcnt_t)qtrx->qt_blk_res; 545 (xfs_qcnt_t)qtrx->qt_blk_res;
546 } 546 }
547 if (qtrx->qt_ino_res) { 547 if (qtrx->qt_ino_res) {
548 if (!locked) { 548 if (!locked) {
549 xfs_dqlock(dqp); 549 xfs_dqlock(dqp);
550 locked = B_TRUE; 550 locked = true;
551 } 551 }
552 dqp->q_res_icount -= 552 dqp->q_res_icount -=
553 (xfs_qcnt_t)qtrx->qt_ino_res; 553 (xfs_qcnt_t)qtrx->qt_ino_res;
@@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots(
556 if (qtrx->qt_rtblk_res) { 556 if (qtrx->qt_rtblk_res) {
557 if (!locked) { 557 if (!locked) {
558 xfs_dqlock(dqp); 558 xfs_dqlock(dqp);
559 locked = B_TRUE; 559 locked = true;
560 } 560 }
561 dqp->q_res_rtbcount -= 561 dqp->q_res_rtbcount -=
562 (xfs_qcnt_t)qtrx->qt_rtblk_res; 562 (xfs_qcnt_t)qtrx->qt_rtblk_res;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index d2eee20d5f5b..ac6d567704db 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -33,14 +33,6 @@
33#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35 35
36#ifdef XFS_TRANS_DEBUG
37STATIC void
38xfs_trans_inode_broot_debug(
39 xfs_inode_t *ip);
40#else
41#define xfs_trans_inode_broot_debug(ip)
42#endif
43
44/* 36/*
45 * Add a locked inode to the transaction. 37 * Add a locked inode to the transaction.
46 * 38 *
@@ -67,8 +59,6 @@ xfs_trans_ijoin(
67 * Get a log_item_desc to point at the new item. 59 * Get a log_item_desc to point at the new item.
68 */ 60 */
69 xfs_trans_add_item(tp, &iip->ili_item); 61 xfs_trans_add_item(tp, &iip->ili_item);
70
71 xfs_trans_inode_broot_debug(ip);
72} 62}
73 63
74/* 64/*
@@ -135,34 +125,3 @@ xfs_trans_log_inode(
135 flags |= ip->i_itemp->ili_last_fields; 125 flags |= ip->i_itemp->ili_last_fields;
136 ip->i_itemp->ili_fields |= flags; 126 ip->i_itemp->ili_fields |= flags;
137} 127}
138
139#ifdef XFS_TRANS_DEBUG
140/*
141 * Keep track of the state of the inode btree root to make sure we
142 * log it properly.
143 */
144STATIC void
145xfs_trans_inode_broot_debug(
146 xfs_inode_t *ip)
147{
148 xfs_inode_log_item_t *iip;
149
150 ASSERT(ip->i_itemp != NULL);
151 iip = ip->i_itemp;
152 if (iip->ili_root_size != 0) {
153 ASSERT(iip->ili_orig_root != NULL);
154 kmem_free(iip->ili_orig_root);
155 iip->ili_root_size = 0;
156 iip->ili_orig_root = NULL;
157 }
158 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
159 ASSERT((ip->i_df.if_broot != NULL) &&
160 (ip->i_df.if_broot_bytes > 0));
161 iip->ili_root_size = ip->i_df.if_broot_bytes;
162 iip->ili_orig_root =
163 (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
164 memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
165 iip->ili_root_size);
166 }
167}
168#endif
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 7a41874f4c20..61ba1cfa974c 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -32,7 +32,6 @@ typedef unsigned int __uint32_t;
32typedef signed long long int __int64_t; 32typedef signed long long int __int64_t;
33typedef unsigned long long int __uint64_t; 33typedef unsigned long long int __uint64_t;
34 34
35typedef enum { B_FALSE,B_TRUE } boolean_t;
36typedef __uint32_t prid_t; /* project ID */ 35typedef __uint32_t prid_t; /* project ID */
37typedef __uint32_t inst_t; /* an instruction */ 36typedef __uint32_t inst_t; /* an instruction */
38 37
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d95f565a390e..77ad74834baa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -725,7 +725,7 @@ xfs_create(
725 int error; 725 int error;
726 xfs_bmap_free_t free_list; 726 xfs_bmap_free_t free_list;
727 xfs_fsblock_t first_block; 727 xfs_fsblock_t first_block;
728 boolean_t unlock_dp_on_error = B_FALSE; 728 bool unlock_dp_on_error = false;
729 uint cancel_flags; 729 uint cancel_flags;
730 int committed; 730 int committed;
731 prid_t prid; 731 prid_t prid;
@@ -794,7 +794,7 @@ xfs_create(
794 } 794 }
795 795
796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
797 unlock_dp_on_error = B_TRUE; 797 unlock_dp_on_error = true;
798 798
799 xfs_bmap_init(&free_list, &first_block); 799 xfs_bmap_init(&free_list, &first_block);
800 800
@@ -830,7 +830,7 @@ xfs_create(
830 * error path. 830 * error path.
831 */ 831 */
832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
833 unlock_dp_on_error = B_FALSE; 833 unlock_dp_on_error = false;
834 834
835 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 835 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
836 &first_block, &free_list, resblks ? 836 &first_block, &free_list, resblks ?
@@ -1367,7 +1367,7 @@ xfs_symlink(
1367 int pathlen; 1367 int pathlen;
1368 xfs_bmap_free_t free_list; 1368 xfs_bmap_free_t free_list;
1369 xfs_fsblock_t first_block; 1369 xfs_fsblock_t first_block;
1370 boolean_t unlock_dp_on_error = B_FALSE; 1370 bool unlock_dp_on_error = false;
1371 uint cancel_flags; 1371 uint cancel_flags;
1372 int committed; 1372 int committed;
1373 xfs_fileoff_t first_fsb; 1373 xfs_fileoff_t first_fsb;
@@ -1438,7 +1438,7 @@ xfs_symlink(
1438 } 1438 }
1439 1439
1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1441 unlock_dp_on_error = B_TRUE; 1441 unlock_dp_on_error = true;
1442 1442
1443 /* 1443 /*
1444 * Check whether the directory allows new symlinks or not. 1444 * Check whether the directory allows new symlinks or not.
@@ -1484,7 +1484,7 @@ xfs_symlink(
1484 * error path. 1484 * error path.
1485 */ 1485 */
1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1487 unlock_dp_on_error = B_FALSE; 1487 unlock_dp_on_error = false;
1488 1488
1489 /* 1489 /*
1490 * Also attach the dquot(s) to it, if applicable. 1490 * Also attach the dquot(s) to it, if applicable.