aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2010-02-26 14:04:15 -0500
committerDavid Woodhouse <David.Woodhouse@intel.com>2010-02-26 14:06:24 -0500
commita7790532f5b7358c33a6b1834dc2b318de209f31 (patch)
tree0ceb9e24b3f54cb5c8453fb5a218e2a94a0f1cce /fs
parent2764fb4244cc1bc08df3667924ca4a972e90ac70 (diff)
parent60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
The SmartMedia FTL code depends on new kfifo bits from 2.6.33
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c33
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_file.c19
-rw-r--r--fs/9p/vfs_inode.c43
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/namei.c7
-rw-r--r--fs/affs/super.c31
-rw-r--r--fs/affs/symlink.c7
-rw-r--r--fs/anon_inodes.c35
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/inode.c43
-rw-r--r--fs/binfmt_aout.c14
-rw-r--r--fs/binfmt_elf.c51
-rw-r--r--fs/binfmt_elf_fdpic.c49
-rw-r--r--fs/binfmt_flat.c7
-rw-r--r--fs/binfmt_som.c3
-rw-r--r--fs/bio-integrity.c3
-rw-r--r--fs/bio.c9
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/acl.c81
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h41
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c40
-rw-r--r--fs/btrfs/extent-tree.c112
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/file.c729
-rw-r--r--fs/btrfs/inode.c627
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c117
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/super.c24
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c19
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
-rw-r--r--fs/cachefiles/bind.c11
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/CHANGES8
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/connect.c43
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/cifs/inode.c12
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c14
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c164
-rw-r--r--fs/ecryptfs/main.c13
-rw-r--r--fs/eventfd.c91
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c96
-rw-r--r--fs/exofs/inode.c17
-rw-r--r--fs/exofs/pnfs.h10
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/acl.c79
-rw-r--r--fs/ext2/xattr.c11
-rw-r--r--fs/ext2/xattr_security.c16
-rw-r--r--fs/ext2/xattr_trusted.c16
-rw-r--r--fs/ext2/xattr_user.c25
-rw-r--r--fs/ext3/acl.c74
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/namei.c28
-rw-r--r--fs/ext3/resize.c35
-rw-r--r--fs/ext3/super.c19
-rw-r--r--fs/ext3/xattr.c31
-rw-r--r--fs/ext3/xattr_security.c20
-rw-r--r--fs/ext3/xattr_trusted.c18
-rw-r--r--fs/ext3/xattr_user.c25
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/acl.c74
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c98
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c289
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/ext4/xattr.c33
-rw-r--r--fs/ext4/xattr_security.c20
-rw-r--r--fs/ext4/xattr_trusted.c20
-rw-r--r--fs/ext4/xattr_user.c25
-rw-r--r--fs/fat/fat.h3
-rw-r--r--fs/fat/fatent.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fcntl.c102
-rw-r--r--fs/file_table.c51
-rw-r--r--fs/fs-writeback.c18
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/generic_acl.c158
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/acl.c16
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/lock_dlm.c11
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c14
-rw-r--r--fs/gfs2/ops_inode.c9
-rw-r--r--fs/gfs2/rgrp.c8
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/xattr.c90
-rw-r--r--fs/gfs2/xattr.h7
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c26
-rw-r--r--fs/internal.h8
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jbd/Kconfig1
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/Kconfig1
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jffs2/acl.c65
-rw-r--r--fs/jffs2/security.c18
-rw-r--r--fs/jffs2/xattr.c6
-rw-r--r--fs/jffs2/xattr_trusted.c18
-rw-r--r--fs/jffs2/xattr_user.c18
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c486
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/fscache.c9
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs4_fs.h7
-rw-r--r--fs/nfs/nfs4proc.c281
-rw-r--r--fs/nfs/nfs4state.c62
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs/pagelist.c17
-rw-r--r--fs/nfs/super.c15
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsctl.c2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/cache.h83
-rw-r--r--fs/nfsd/export.c57
-rw-r--r--fs/nfsd/lockd.c10
-rw-r--r--fs/nfsd/nfs2acl.c27
-rw-r--r--fs/nfsd/nfs3acl.c15
-rw-r--r--fs/nfsd/nfs3proc.c20
-rw-r--r--fs/nfsd/nfs3xdr.c15
-rw-r--r--fs/nfsd/nfs4acl.c12
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c17
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c84
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/nfscache.c14
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsd.h338
-rw-r--r--fs/nfsd/nfsfh.c102
-rw-r--r--fs/nfsd/nfsfh.h208
-rw-r--r--fs/nfsd/nfsproc.c22
-rw-r--r--fs/nfsd/nfssvc.c22
-rw-r--r--fs/nfsd/nfsxdr.c12
-rw-r--r--fs/nfsd/state.h408
-rw-r--r--fs/nfsd/stats.c11
-rw-r--r--fs/nfsd/vfs.c143
-rw-r--r--fs/nfsd/vfs.h101
-rw-r--r--fs/nfsd/xdr.h173
-rw-r--r--fs/nfsd/xdr3.h344
-rw-r--r--fs/nfsd/xdr4.h562
-rw-r--r--fs/nilfs2/Kconfig1
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/cpfile.c31
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c33
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ocfs2/Kconfig10
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c91
-rw-r--r--fs/ocfs2/acl.h22
-rw-r--r--fs/ocfs2/alloc.c14
-rw-r--r--fs/ocfs2/alloc.h5
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c12
-rw-r--r--fs/ocfs2/cluster/nodemanager.c51
-rw-r--r--fs/ocfs2/cluster/nodemanager.h7
-rw-r--r--fs/ocfs2/cluster/quorum.c16
-rw-r--r--fs/ocfs2/cluster/tcp.c10
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h4
-rw-r--r--fs/ocfs2/dlm/dlmapi.h2
-rw-r--r--fs/ocfs2/dlm/dlmast.c2
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlm/dlmlock.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c38
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c163
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c8
-rw-r--r--fs/ocfs2/dlmglue.c85
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/extent_map.c27
-rw-r--r--fs/ocfs2/file.c35
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h12
-rw-r--r--fs/ocfs2/ocfs2_fs.h13
-rw-r--r--fs/ocfs2/refcounttree.c162
-rw-r--r--fs/ocfs2/stack_o2cb.c12
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/super.c97
-rw-r--r--fs/ocfs2/symlink.c12
-rw-r--r--fs/ocfs2/uptodate.c4
-rw-r--r--fs/ocfs2/xattr.c78
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/open.c17
-rw-r--r--fs/pipe.c45
-rw-r--r--fs/proc/array.c108
-rw-r--r--fs/proc/base.c25
-rw-r--r--fs/proc/page.c45
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c291
-rw-r--r--fs/quota/quota_v2.c9
-rw-r--r--fs/ramfs/file-nommu.c28
-rw-r--r--fs/reiserfs/Kconfig1
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/inode.c44
-rw-r--r--fs/reiserfs/ioctl.c3
-rw-r--r--fs/reiserfs/journal.c20
-rw-r--r--fs/reiserfs/lock.c9
-rw-r--r--fs/reiserfs/namei.c7
-rw-r--r--fs/reiserfs/xattr.c74
-rw-r--r--fs/reiserfs/xattr_acl.c71
-rw-r--r--fs/reiserfs/xattr_security.c21
-rw-r--r--fs/reiserfs/xattr_trusted.c21
-rw-r--r--fs/reiserfs/xattr_user.c21
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/stack.c71
-rw-r--r--fs/stat.c10
-rw-r--r--fs/super.c3
-rw-r--r--fs/sync.c59
-rw-r--r--fs/sysfs/bin.c6
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/inode.c35
-rw-r--r--fs/sysfs/sysfs.h15
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/xattr.c28
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c183
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1145
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c71
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_acl.h3
-rw-r--r--fs/xfs/xfs_alloc.c44
-rw-r--r--fs/xfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/xfs_dfrag.c106
-rw-r--r--fs/xfs/xfs_iget.c17
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c93
303 files changed, 8156 insertions, 5024 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index cf62b05e296a..7d6c2139891d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -84,7 +84,7 @@ static const match_table_t tokens = {
84 84
85static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) 85static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
86{ 86{
87 char *options; 87 char *options, *tmp_options;
88 substring_t args[MAX_OPT_ARGS]; 88 substring_t args[MAX_OPT_ARGS];
89 char *p; 89 char *p;
90 int option = 0; 90 int option = 0;
@@ -102,9 +102,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
102 if (!opts) 102 if (!opts)
103 return 0; 103 return 0;
104 104
105 options = kstrdup(opts, GFP_KERNEL); 105 tmp_options = kstrdup(opts, GFP_KERNEL);
106 if (!options) 106 if (!tmp_options) {
107 ret = -ENOMEM;
107 goto fail_option_alloc; 108 goto fail_option_alloc;
109 }
110 options = tmp_options;
108 111
109 while ((p = strsep(&options, ",")) != NULL) { 112 while ((p = strsep(&options, ",")) != NULL) {
110 int token; 113 int token;
@@ -159,8 +162,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
159 break; 162 break;
160 case Opt_cache: 163 case Opt_cache:
161 s = match_strdup(&args[0]); 164 s = match_strdup(&args[0]);
162 if (!s) 165 if (!s) {
163 goto fail_option_alloc; 166 ret = -ENOMEM;
167 P9_DPRINTK(P9_DEBUG_ERROR,
168 "problem allocating copy of cache arg\n");
169 goto free_and_return;
170 }
164 171
165 if (strcmp(s, "loose") == 0) 172 if (strcmp(s, "loose") == 0)
166 v9ses->cache = CACHE_LOOSE; 173 v9ses->cache = CACHE_LOOSE;
@@ -173,8 +180,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
173 180
174 case Opt_access: 181 case Opt_access:
175 s = match_strdup(&args[0]); 182 s = match_strdup(&args[0]);
176 if (!s) 183 if (!s) {
177 goto fail_option_alloc; 184 ret = -ENOMEM;
185 P9_DPRINTK(P9_DEBUG_ERROR,
186 "problem allocating copy of access arg\n");
187 goto free_and_return;
188 }
178 189
179 v9ses->flags &= ~V9FS_ACCESS_MASK; 190 v9ses->flags &= ~V9FS_ACCESS_MASK;
180 if (strcmp(s, "user") == 0) 191 if (strcmp(s, "user") == 0)
@@ -194,13 +205,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 continue; 205 continue;
195 } 206 }
196 } 207 }
197 kfree(options);
198 return ret;
199 208
209free_and_return:
210 kfree(tmp_options);
200fail_option_alloc: 211fail_option_alloc:
201 P9_DPRINTK(P9_DEBUG_ERROR, 212 return ret;
202 "failed to allocate copy of option argument\n");
203 return -ENOMEM;
204} 213}
205 214
206/** 215/**
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 3a7560e35865..ed835836e0dc 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -60,3 +60,4 @@ void v9fs_dentry_release(struct dentry *);
60int v9fs_uflags2omode(int uflags, int extended); 60int v9fs_uflags2omode(int uflags, int extended);
61 61
62ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 62ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
63void v9fs_blank_wstat(struct p9_wstat *wstat);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3902bf43a088..74a0461a9ac0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,6 +257,23 @@ v9fs_file_write(struct file *filp, const char __user * data,
257 return total; 257 return total;
258} 258}
259 259
260static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
261 int datasync)
262{
263 struct p9_fid *fid;
264 struct p9_wstat wstat;
265 int retval;
266
267 P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
268 dentry, datasync);
269
270 fid = filp->private_data;
271 v9fs_blank_wstat(&wstat);
272
273 retval = p9_client_wstat(fid, &wstat);
274 return retval;
275}
276
260static const struct file_operations v9fs_cached_file_operations = { 277static const struct file_operations v9fs_cached_file_operations = {
261 .llseek = generic_file_llseek, 278 .llseek = generic_file_llseek,
262 .read = do_sync_read, 279 .read = do_sync_read,
@@ -266,6 +283,7 @@ static const struct file_operations v9fs_cached_file_operations = {
266 .release = v9fs_dir_release, 283 .release = v9fs_dir_release,
267 .lock = v9fs_file_lock, 284 .lock = v9fs_file_lock,
268 .mmap = generic_file_readonly_mmap, 285 .mmap = generic_file_readonly_mmap,
286 .fsync = v9fs_file_fsync,
269}; 287};
270 288
271const struct file_operations v9fs_file_operations = { 289const struct file_operations v9fs_file_operations = {
@@ -276,4 +294,5 @@ const struct file_operations v9fs_file_operations = {
276 .release = v9fs_dir_release, 294 .release = v9fs_dir_release,
277 .lock = v9fs_file_lock, 295 .lock = v9fs_file_lock,
278 .mmap = generic_file_readonly_mmap, 296 .mmap = generic_file_readonly_mmap,
297 .fsync = v9fs_file_fsync,
279}; 298};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce9..a407fa3388c0 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -176,7 +176,7 @@ int v9fs_uflags2omode(int uflags, int extended)
176 * 176 *
177 */ 177 */
178 178
179static void 179void
180v9fs_blank_wstat(struct p9_wstat *wstat) 180v9fs_blank_wstat(struct p9_wstat *wstat)
181{ 181{
182 wstat->type = ~0; 182 wstat->type = ~0;
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 14a86448572c..69357c0d9899 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s)
188 188
189 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 189 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
190 190
191 v9fs_dentry_release(s->s_root); /* clunk root */ 191 if (s->s_root)
192 v9fs_dentry_release(s->s_root); /* clunk root */
192 193
193 kill_anon_super(s); 194 kill_anon_super(s);
194 195
diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad628..64d44efad7a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
6 6
7if BLOCK 7if BLOCK
8 8
9config FS_JOURNAL_INFO
10 bool
11 default n
12
13source "fs/ext2/Kconfig" 9source "fs/ext2/Kconfig"
14source "fs/ext3/Kconfig" 10source "fs/ext3/Kconfig"
15source "fs/ext4/Kconfig" 11source "fs/ext4/Kconfig"
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index e511dc621a2e..0e40caaba456 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,8 +106,8 @@ struct affs_sb_info {
106 u32 s_last_bmap; 106 u32 s_last_bmap;
107 struct buffer_head *s_bmap_bh; 107 struct buffer_head *s_bmap_bh;
108 char *s_prefix; /* Prefix for volumes and assigns. */ 108 char *s_prefix; /* Prefix for volumes and assigns. */
109 int s_prefix_len; /* Length of prefix. */
110 char s_volume[32]; /* Volume prefix for absolute symlinks. */ 109 char s_volume[32]; /* Volume prefix for absolute symlinks. */
110 spinlock_t symlink_lock; /* protects the previous two */
111}; 111};
112 112
113#define SF_INTL 0x0001 /* International filesystem. */ 113#define SF_INTL 0x0001 /* International filesystem. */
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 960d336ec694..d70bbbac6b7b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
341 p = (char *)AFFS_HEAD(bh)->table; 341 p = (char *)AFFS_HEAD(bh)->table;
342 lc = '/'; 342 lc = '/';
343 if (*symname == '/') { 343 if (*symname == '/') {
344 struct affs_sb_info *sbi = AFFS_SB(sb);
344 while (*symname == '/') 345 while (*symname == '/')
345 symname++; 346 symname++;
346 while (AFFS_SB(sb)->s_volume[i]) /* Cannot overflow */ 347 spin_lock(&sbi->symlink_lock);
347 *p++ = AFFS_SB(sb)->s_volume[i++]; 348 while (sbi->s_volume[i]) /* Cannot overflow */
349 *p++ = sbi->s_volume[i++];
350 spin_unlock(&sbi->symlink_lock);
348 } 351 }
349 while (i < maxlen && (c = *symname++)) { 352 while (i < maxlen && (c = *symname++)) {
350 if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') { 353 if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 104fdcb3a7fc..d41e9673cd97 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
203 switch (token) { 203 switch (token) {
204 case Opt_bs: 204 case Opt_bs:
205 if (match_int(&args[0], &n)) 205 if (match_int(&args[0], &n))
206 return -EINVAL; 206 return 0;
207 if (n != 512 && n != 1024 && n != 2048 207 if (n != 512 && n != 1024 && n != 2048
208 && n != 4096) { 208 && n != 4096) {
209 printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); 209 printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
@@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
213 break; 213 break;
214 case Opt_mode: 214 case Opt_mode:
215 if (match_octal(&args[0], &option)) 215 if (match_octal(&args[0], &option))
216 return 1; 216 return 0;
217 *mode = option & 0777; 217 *mode = option & 0777;
218 *mount_opts |= SF_SETMODE; 218 *mount_opts |= SF_SETMODE;
219 break; 219 break;
@@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
221 *mount_opts |= SF_MUFS; 221 *mount_opts |= SF_MUFS;
222 break; 222 break;
223 case Opt_prefix: 223 case Opt_prefix:
224 /* Free any previous prefix */
225 kfree(*prefix);
226 *prefix = match_strdup(&args[0]); 224 *prefix = match_strdup(&args[0]);
227 if (!*prefix) 225 if (!*prefix)
228 return 0; 226 return 0;
@@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 231 break;
234 case Opt_reserved: 232 case Opt_reserved:
235 if (match_int(&args[0], reserved)) 233 if (match_int(&args[0], reserved))
236 return 1; 234 return 0;
237 break; 235 break;
238 case Opt_root: 236 case Opt_root:
239 if (match_int(&args[0], root)) 237 if (match_int(&args[0], root))
240 return 1; 238 return 0;
241 break; 239 break;
242 case Opt_setgid: 240 case Opt_setgid:
243 if (match_int(&args[0], &option)) 241 if (match_int(&args[0], &option))
244 return 1; 242 return 0;
245 *gid = option; 243 *gid = option;
246 *mount_opts |= SF_SETGID; 244 *mount_opts |= SF_SETGID;
247 break; 245 break;
248 case Opt_setuid: 246 case Opt_setuid:
249 if (match_int(&args[0], &option)) 247 if (match_int(&args[0], &option))
250 return -EINVAL; 248 return 0;
251 *uid = option; 249 *uid = option;
252 *mount_opts |= SF_SETUID; 250 *mount_opts |= SF_SETUID;
253 break; 251 break;
@@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
311 return -ENOMEM; 309 return -ENOMEM;
312 sb->s_fs_info = sbi; 310 sb->s_fs_info = sbi;
313 mutex_init(&sbi->s_bmlock); 311 mutex_init(&sbi->s_bmlock);
312 spin_lock_init(&sbi->symlink_lock);
314 313
315 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 314 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
316 &blocksize,&sbi->s_prefix, 315 &blocksize,&sbi->s_prefix,
317 sbi->s_volume, &mount_flags)) { 316 sbi->s_volume, &mount_flags)) {
318 printk(KERN_ERR "AFFS: Error parsing options\n"); 317 printk(KERN_ERR "AFFS: Error parsing options\n");
318 kfree(sbi->s_prefix);
319 kfree(sbi);
319 return -EINVAL; 320 return -EINVAL;
320 } 321 }
321 /* N.B. after this point s_prefix must be released */ 322 /* N.B. after this point s_prefix must be released */
@@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data)
516 unsigned long mount_flags; 517 unsigned long mount_flags;
517 int res = 0; 518 int res = 0;
518 char *new_opts = kstrdup(data, GFP_KERNEL); 519 char *new_opts = kstrdup(data, GFP_KERNEL);
520 char volume[32];
521 char *prefix = NULL;
519 522
520 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); 523 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
521 524
522 *flags |= MS_NODIRATIME; 525 *flags |= MS_NODIRATIME;
523 526
527 memcpy(volume, sbi->s_volume, 32);
524 if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, 528 if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
525 &blocksize, &sbi->s_prefix, sbi->s_volume, 529 &blocksize, &prefix, volume,
526 &mount_flags)) { 530 &mount_flags)) {
531 kfree(prefix);
527 kfree(new_opts); 532 kfree(new_opts);
528 return -EINVAL; 533 return -EINVAL;
529 } 534 }
@@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
534 sbi->s_mode = mode; 539 sbi->s_mode = mode;
535 sbi->s_uid = uid; 540 sbi->s_uid = uid;
536 sbi->s_gid = gid; 541 sbi->s_gid = gid;
542 /* protect against readers */
543 spin_lock(&sbi->symlink_lock);
544 if (prefix) {
545 kfree(sbi->s_prefix);
546 sbi->s_prefix = prefix;
547 }
548 memcpy(sbi->s_volume, volume, 32);
549 spin_unlock(&sbi->symlink_lock);
537 550
538 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 551 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
539 unlock_kernel(); 552 unlock_kernel();
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 41782539c907..ee00f08c4f53 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
20 int i, j; 20 int i, j;
21 char c; 21 char c;
22 char lc; 22 char lc;
23 char *pf;
24 23
25 pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); 24 pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
26 25
@@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
32 j = 0; 31 j = 0;
33 lf = (struct slink_front *)bh->b_data; 32 lf = (struct slink_front *)bh->b_data;
34 lc = 0; 33 lc = 0;
35 pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/";
36 34
37 if (strchr(lf->symname,':')) { /* Handle assign or volume name */ 35 if (strchr(lf->symname,':')) { /* Handle assign or volume name */
36 struct affs_sb_info *sbi = AFFS_SB(inode->i_sb);
37 char *pf;
38 spin_lock(&sbi->symlink_lock);
39 pf = sbi->s_prefix ? sbi->s_prefix : "/";
38 while (i < 1023 && (c = pf[i])) 40 while (i < 1023 && (c = pf[i]))
39 link[i++] = c; 41 link[i++] = c;
42 spin_unlock(&sbi->symlink_lock);
40 while (i < 1023 && lf->symname[j] != ':') 43 while (i < 1023 && lf->symname[j] != ':')
41 link[i++] = lf->symname[j++]; 44 link[i++] = lf->symname[j++];
42 if (i < 1023) 45 if (i < 1023)
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdbf..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
35 mnt); 35 mnt);
36} 36}
37 37
38static int anon_inodefs_delete_dentry(struct dentry *dentry) 38/*
39 * anon_inodefs_dname() is called from d_path().
40 */
41static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
39{ 42{
40 /* 43 return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
41 * We faked vfs to believe the dentry was hashed when we created it. 44 dentry->d_name.name);
42 * Now we restore the flag so that dput() will work correctly.
43 */
44 dentry->d_flags |= DCACHE_UNHASHED;
45 return 1;
46} 45}
47 46
48static struct file_system_type anon_inode_fs_type = { 47static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
51 .kill_sb = kill_anon_super, 50 .kill_sb = kill_anon_super,
52}; 51};
53static const struct dentry_operations anon_inodefs_dentry_operations = { 52static const struct dentry_operations anon_inodefs_dentry_operations = {
54 .d_delete = anon_inodefs_delete_dentry, 53 .d_dname = anon_inodefs_dname,
55}; 54};
56 55
57/* 56/*
@@ -88,7 +87,7 @@ struct file *anon_inode_getfile(const char *name,
88 void *priv, int flags) 87 void *priv, int flags)
89{ 88{
90 struct qstr this; 89 struct qstr this;
91 struct dentry *dentry; 90 struct path path;
92 struct file *file; 91 struct file *file;
93 int error; 92 int error;
94 93
@@ -106,10 +105,11 @@ struct file *anon_inode_getfile(const char *name,
106 this.name = name; 105 this.name = name;
107 this.len = strlen(name); 106 this.len = strlen(name);
108 this.hash = 0; 107 this.hash = 0;
109 dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 108 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
110 if (!dentry) 109 if (!path.dentry)
111 goto err_module; 110 goto err_module;
112 111
112 path.mnt = mntget(anon_inode_mnt);
113 /* 113 /*
114 * We know the anon_inode inode count is always greater than zero, 114 * We know the anon_inode inode count is always greater than zero,
115 * so we can avoid doing an igrab() and we can use an open-coded 115 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,27 +117,24 @@ struct file *anon_inode_getfile(const char *name,
117 */ 117 */
118 atomic_inc(&anon_inode_inode->i_count); 118 atomic_inc(&anon_inode_inode->i_count);
119 119
120 dentry->d_op = &anon_inodefs_dentry_operations; 120 path.dentry->d_op = &anon_inodefs_dentry_operations;
121 /* Do not publish this dentry inside the global dentry hash table */ 121 d_instantiate(path.dentry, anon_inode_inode);
122 dentry->d_flags &= ~DCACHE_UNHASHED;
123 d_instantiate(dentry, anon_inode_inode);
124 122
125 error = -ENFILE; 123 error = -ENFILE;
126 file = alloc_file(anon_inode_mnt, dentry, 124 file = alloc_file(&path, OPEN_FMODE(flags), fops);
127 FMODE_READ | FMODE_WRITE, fops);
128 if (!file) 125 if (!file)
129 goto err_dput; 126 goto err_dput;
130 file->f_mapping = anon_inode_inode->i_mapping; 127 file->f_mapping = anon_inode_inode->i_mapping;
131 128
132 file->f_pos = 0; 129 file->f_pos = 0;
133 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 130 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
134 file->f_version = 0; 131 file->f_version = 0;
135 file->private_data = priv; 132 file->private_data = priv;
136 133
137 return file; 134 return file;
138 135
139err_dput: 136err_dput:
140 dput(dentry); 137 path_put(&path);
141err_module: 138err_module:
142 module_put(fops->owner); 139 module_put(fops->owner);
143 return ERR_PTR(error); 140 return ERR_PTR(error);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 33baf27fac78..34ddda888e63 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -873,6 +873,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
873 brelse(bh); 873 brelse(bh);
874 874
875 unacquire_priv_sbp: 875 unacquire_priv_sbp:
876 kfree(befs_sb->mount_opts.iocharset);
876 kfree(sb->s_fs_info); 877 kfree(sb->s_fs_info);
877 878
878 unacquire_none: 879 unacquire_none:
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 6f60336c6628..8f3d9fd89604 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
353 struct inode *inode; 353 struct inode *inode;
354 unsigned i, imap_len; 354 unsigned i, imap_len;
355 struct bfs_sb_info *info; 355 struct bfs_sb_info *info;
356 long ret = -EINVAL; 356 int ret = -EINVAL;
357 unsigned long i_sblock, i_eblock, i_eoff, s_size; 357 unsigned long i_sblock, i_eblock, i_eoff, s_size;
358 358
359 info = kzalloc(sizeof(*info), GFP_KERNEL); 359 info = kzalloc(sizeof(*info), GFP_KERNEL);
360 if (!info) 360 if (!info)
361 return -ENOMEM; 361 return -ENOMEM;
362 mutex_init(&info->bfs_lock);
362 s->s_fs_info = info; 363 s->s_fs_info = info;
363 364
364 sb_set_blocksize(s, BFS_BSIZE); 365 sb_set_blocksize(s, BFS_BSIZE);
365 366
366 bh = sb_bread(s, 0); 367 info->si_sbh = sb_bread(s, 0);
367 if(!bh) 368 if (!info->si_sbh)
368 goto out; 369 goto out;
369 bfs_sb = (struct bfs_super_block *)bh->b_data; 370 bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
370 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { 371 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
371 if (!silent) 372 if (!silent)
372 printf("No BFS filesystem on %s (magic=%08x)\n", 373 printf("No BFS filesystem on %s (magic=%08x)\n",
373 s->s_id, le32_to_cpu(bfs_sb->s_magic)); 374 s->s_id, le32_to_cpu(bfs_sb->s_magic));
374 goto out; 375 goto out1;
375 } 376 }
376 if (BFS_UNCLEAN(bfs_sb, s) && !silent) 377 if (BFS_UNCLEAN(bfs_sb, s) && !silent)
377 printf("%s is unclean, continuing\n", s->s_id); 378 printf("%s is unclean, continuing\n", s->s_id);
378 379
379 s->s_magic = BFS_MAGIC; 380 s->s_magic = BFS_MAGIC;
380 info->si_sbh = bh;
381 381
382 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { 382 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
383 printf("Superblock is corrupted\n"); 383 printf("Superblock is corrupted\n");
384 goto out; 384 goto out1;
385 } 385 }
386 386
387 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / 387 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
@@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
390 imap_len = (info->si_lasti / 8) + 1; 390 imap_len = (info->si_lasti / 8) + 1;
391 info->si_imap = kzalloc(imap_len, GFP_KERNEL); 391 info->si_imap = kzalloc(imap_len, GFP_KERNEL);
392 if (!info->si_imap) 392 if (!info->si_imap)
393 goto out; 393 goto out1;
394 for (i = 0; i < BFS_ROOT_INO; i++) 394 for (i = 0; i < BFS_ROOT_INO; i++)
395 set_bit(i, info->si_imap); 395 set_bit(i, info->si_imap);
396 396
@@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
398 inode = bfs_iget(s, BFS_ROOT_INO); 398 inode = bfs_iget(s, BFS_ROOT_INO);
399 if (IS_ERR(inode)) { 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode); 400 ret = PTR_ERR(inode);
401 kfree(info->si_imap); 401 goto out2;
402 goto out;
403 } 402 }
404 s->s_root = d_alloc_root(inode); 403 s->s_root = d_alloc_root(inode);
405 if (!s->s_root) { 404 if (!s->s_root) {
406 iput(inode); 405 iput(inode);
407 ret = -ENOMEM; 406 ret = -ENOMEM;
408 kfree(info->si_imap); 407 goto out2;
409 goto out;
410 } 408 }
411 409
412 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; 410 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
@@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
419 bh = sb_bread(s, info->si_blocks - 1); 417 bh = sb_bread(s, info->si_blocks - 1);
420 if (!bh) { 418 if (!bh) {
421 printf("Last block not available: %lu\n", info->si_blocks - 1); 419 printf("Last block not available: %lu\n", info->si_blocks - 1);
422 iput(inode);
423 ret = -EIO; 420 ret = -EIO;
424 kfree(info->si_imap); 421 goto out3;
425 goto out;
426 } 422 }
427 brelse(bh); 423 brelse(bh);
428 424
@@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
459 printf("Inode 0x%08x corrupted\n", i); 455 printf("Inode 0x%08x corrupted\n", i);
460 456
461 brelse(bh); 457 brelse(bh);
462 s->s_root = NULL; 458 ret = -EIO;
463 kfree(info->si_imap); 459 goto out3;
464 kfree(info);
465 s->s_fs_info = NULL;
466 return -EIO;
467 } 460 }
468 461
469 if (!di->i_ino) { 462 if (!di->i_ino) {
@@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
483 s->s_dirt = 1; 476 s->s_dirt = 1;
484 } 477 }
485 dump_imap("read_super", s); 478 dump_imap("read_super", s);
486 mutex_init(&info->bfs_lock);
487 return 0; 479 return 0;
488 480
481out3:
482 dput(s->s_root);
483 s->s_root = NULL;
484out2:
485 kfree(info->si_imap);
486out1:
487 brelse(info->si_sbh);
489out: 488out:
490 brelse(bh); 489 mutex_destroy(&info->bfs_lock);
491 kfree(info); 490 kfree(info);
492 s->s_fs_info = NULL; 491 s->s_fs_info = NULL;
493 return ret; 492 return ret;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c778..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
32 32
33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
34static int load_aout_library(struct file*); 34static int load_aout_library(struct file*);
35static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 35static int aout_core_dump(struct coredump_params *cprm);
36 36
37static struct linux_binfmt aout_format = { 37static struct linux_binfmt aout_format = {
38 .module = THIS_MODULE, 38 .module = THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
89 * dumping of the process results in another error.. 89 * dumping of the process results in another error..
90 */ 90 */
91 91
92static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 92static int aout_core_dump(struct coredump_params *cprm)
93{ 93{
94 struct file *file = cprm->file;
94 mm_segment_t fs; 95 mm_segment_t fs;
95 int has_dumped = 0; 96 int has_dumped = 0;
96 unsigned long dump_start, dump_size; 97 unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
108 current->flags |= PF_DUMPCORE; 109 current->flags |= PF_DUMPCORE;
109 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 110 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
110 dump.u_ar0 = offsetof(struct user, regs); 111 dump.u_ar0 = offsetof(struct user, regs);
111 dump.signal = signr; 112 dump.signal = cprm->signr;
112 aout_dump_thread(regs, &dump); 113 aout_dump_thread(cprm->regs, &dump);
113 114
114/* If the size of the dump file exceeds the rlimit, then see what would happen 115/* If the size of the dump file exceeds the rlimit, then see what would happen
115 if we wrote the stack, but not the data area. */ 116 if we wrote the stack, but not the data area. */
116 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) 117 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
117 dump.u_dsize = 0; 118 dump.u_dsize = 0;
118 119
119/* Make sure we have enough room to write the stack and data areas. */ 120/* Make sure we have enough room to write the stack and data areas. */
120 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 121 if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
121 dump.u_ssize = 0; 122 dump.u_ssize = 0;
122 123
123/* make sure we actually have a data and stack area to dump */ 124/* make sure we actually have a data and stack area to dump */
@@ -263,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
263#else 264#else
264 set_personality(PER_LINUX); 265 set_personality(PER_LINUX);
265#endif 266#endif
267 setup_new_exec(bprm);
266 268
267 current->mm->end_code = ex.a_text + 269 current->mm->end_code = ex.a_text +
268 (current->mm->start_code = N_TXTADDR(ex)); 270 (current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 97b6e9efeb7f..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
45 * don't even try. 45 * don't even try.
46 */ 46 */
47#ifdef CONFIG_ELF_CORE 47#ifdef CONFIG_ELF_CORE
48static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 48static int elf_core_dump(struct coredump_params *cprm);
49#else 49#else
50#define elf_core_dump NULL 50#define elf_core_dump NULL
51#endif 51#endif
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
662 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') 662 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663 goto out_free_interp; 663 goto out_free_interp;
664 664
665 /*
666 * The early SET_PERSONALITY here is so that the lookup
667 * for the interpreter happens in the namespace of the
668 * to-be-execed image. SET_PERSONALITY can select an
669 * alternate root.
670 *
671 * However, SET_PERSONALITY is NOT allowed to switch
672 * this task into the new images's memory mapping
673 * policy - that is, TASK_SIZE must still evaluate to
674 * that which is appropriate to the execing application.
675 * This is because exit_mmap() needs to have TASK_SIZE
676 * evaluate to the size of the old image.
677 *
678 * So if (say) a 64-bit application is execing a 32-bit
679 * application it is the architecture's responsibility
680 * to defer changing the value of TASK_SIZE until the
681 * switch really is going to happen - do this in
682 * flush_thread(). - akpm
683 */
684 SET_PERSONALITY(loc->elf_ex);
685
686 interpreter = open_exec(elf_interpreter); 665 interpreter = open_exec(elf_interpreter);
687 retval = PTR_ERR(interpreter); 666 retval = PTR_ERR(interpreter);
688 if (IS_ERR(interpreter)) 667 if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
730 /* Verify the interpreter has a valid arch */ 709 /* Verify the interpreter has a valid arch */
731 if (!elf_check_arch(&loc->interp_elf_ex)) 710 if (!elf_check_arch(&loc->interp_elf_ex))
732 goto out_free_dentry; 711 goto out_free_dentry;
733 } else {
734 /* Executables without an interpreter also need a personality */
735 SET_PERSONALITY(loc->elf_ex);
736 } 712 }
737 713
738 /* Flush all traces of the currently running executable */ 714 /* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
752 728
753 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 729 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754 current->flags |= PF_RANDOMIZE; 730 current->flags |= PF_RANDOMIZE;
755 arch_pick_mmap_layout(current->mm); 731
732 setup_new_exec(bprm);
756 733
757 /* Do this so that we can load the interpreter, if need be. We will 734 /* Do this so that we can load the interpreter, if need be. We will
758 change some of these later */ 735 change some of these later */
@@ -1272,8 +1249,9 @@ static int writenote(struct memelfnote *men, struct file *file,
1272} 1249}
1273#undef DUMP_WRITE 1250#undef DUMP_WRITE
1274 1251
1275#define DUMP_WRITE(addr, nr) \ 1252#define DUMP_WRITE(addr, nr) \
1276 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1253 if ((size += (nr)) > cprm->limit || \
1254 !dump_write(cprm->file, (addr), (nr))) \
1277 goto end_coredump; 1255 goto end_coredump;
1278 1256
1279static void fill_elf_header(struct elfhdr *elf, int segs, 1257static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1901,7 +1879,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1901 * and then they are actually written out. If we run out of core limit 1879 * and then they are actually written out. If we run out of core limit
1902 * we just truncate. 1880 * we just truncate.
1903 */ 1881 */
1904static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 1882static int elf_core_dump(struct coredump_params *cprm)
1905{ 1883{
1906 int has_dumped = 0; 1884 int has_dumped = 0;
1907 mm_segment_t fs; 1885 mm_segment_t fs;
@@ -1947,7 +1925,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
1947 * notes. This also sets up the file header. 1925 * notes. This also sets up the file header.
1948 */ 1926 */
1949 if (!fill_note_info(elf, segs + 1, /* including notes section */ 1927 if (!fill_note_info(elf, segs + 1, /* including notes section */
1950 &info, signr, regs)) 1928 &info, cprm->signr, cprm->regs))
1951 goto cleanup; 1929 goto cleanup;
1952 1930
1953 has_dumped = 1; 1931 has_dumped = 1;
@@ -2009,14 +1987,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2009#endif 1987#endif
2010 1988
2011 /* write out the notes section */ 1989 /* write out the notes section */
2012 if (!write_note_info(&info, file, &foffset)) 1990 if (!write_note_info(&info, cprm->file, &foffset))
2013 goto end_coredump; 1991 goto end_coredump;
2014 1992
2015 if (elf_coredump_extra_notes_write(file, &foffset)) 1993 if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2016 goto end_coredump; 1994 goto end_coredump;
2017 1995
2018 /* Align to page */ 1996 /* Align to page */
2019 if (!dump_seek(file, dataoff - foffset)) 1997 if (!dump_seek(cprm->file, dataoff - foffset))
2020 goto end_coredump; 1998 goto end_coredump;
2021 1999
2022 for (vma = first_vma(current, gate_vma); vma != NULL; 2000 for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2033,12 +2011,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2033 page = get_dump_page(addr); 2011 page = get_dump_page(addr);
2034 if (page) { 2012 if (page) {
2035 void *kaddr = kmap(page); 2013 void *kaddr = kmap(page);
2036 stop = ((size += PAGE_SIZE) > limit) || 2014 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2037 !dump_write(file, kaddr, PAGE_SIZE); 2015 !dump_write(cprm->file, kaddr,
2016 PAGE_SIZE);
2038 kunmap(page); 2017 kunmap(page);
2039 page_cache_release(page); 2018 page_cache_release(page);
2040 } else 2019 } else
2041 stop = !dump_seek(file, PAGE_SIZE); 2020 stop = !dump_seek(cprm->file, PAGE_SIZE);
2042 if (stop) 2021 if (stop)
2043 goto end_coredump; 2022 goto end_coredump;
2044 } 2023 }
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7b055385db8e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
76 struct file *, struct mm_struct *); 76 struct file *, struct mm_struct *);
77 77
78#ifdef CONFIG_ELF_CORE 78#ifdef CONFIG_ELF_CORE
79static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit); 79static int elf_fdpic_core_dump(struct coredump_params *cprm);
80#endif 80#endif
81 81
82static struct linux_binfmt elf_fdpic_format = { 82static struct linux_binfmt elf_fdpic_format = {
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
171#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
172 unsigned long dynaddr; 172 unsigned long dynaddr;
173#endif 173#endif
174#ifndef CONFIG_MMU
175 unsigned long stack_prot;
176#endif
174 struct file *interpreter = NULL; /* to shut gcc up */ 177 struct file *interpreter = NULL; /* to shut gcc up */
175 char *interpreter_name = NULL; 178 char *interpreter_name = NULL;
176 int executable_stack; 179 int executable_stack;
@@ -316,6 +319,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
316 * defunct, deceased, etc. after this point we have to exit via 319 * defunct, deceased, etc. after this point we have to exit via
317 * error_kill */ 320 * error_kill */
318 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC;
324
325 setup_new_exec(bprm);
326
319 set_binfmt(&elf_fdpic_format); 327 set_binfmt(&elf_fdpic_format);
320 328
321 current->mm->start_code = 0; 329 current->mm->start_code = 0;
@@ -377,9 +385,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
377 if (stack_size < PAGE_SIZE * 2) 385 if (stack_size < PAGE_SIZE * 2)
378 stack_size = PAGE_SIZE * 2; 386 stack_size = PAGE_SIZE * 2;
379 387
388 stack_prot = PROT_READ | PROT_WRITE;
389 if (executable_stack == EXSTACK_ENABLE_X ||
390 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
391 stack_prot |= PROT_EXEC;
392
380 down_write(&current->mm->mmap_sem); 393 down_write(&current->mm->mmap_sem);
381 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 394 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
382 PROT_READ | PROT_WRITE | PROT_EXEC,
383 MAP_PRIVATE | MAP_ANONYMOUS | 395 MAP_PRIVATE | MAP_ANONYMOUS |
384 MAP_UNINITIALIZED | MAP_GROWSDOWN, 396 MAP_UNINITIALIZED | MAP_GROWSDOWN,
385 0); 397 0);
@@ -1326,8 +1338,9 @@ static int writenote(struct memelfnote *men, struct file *file)
1326#undef DUMP_WRITE 1338#undef DUMP_WRITE
1327#undef DUMP_SEEK 1339#undef DUMP_SEEK
1328 1340
1329#define DUMP_WRITE(addr, nr) \ 1341#define DUMP_WRITE(addr, nr) \
1330 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1342 if ((size += (nr)) > cprm->limit || \
1343 !dump_write(cprm->file, (addr), (nr))) \
1331 goto end_coredump; 1344 goto end_coredump;
1332 1345
1333static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1346static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1595,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1582 * and then they are actually written out. If we run out of core limit 1595 * and then they are actually written out. If we run out of core limit
1583 * we just truncate. 1596 * we just truncate.
1584 */ 1597 */
1585static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, 1598static int elf_fdpic_core_dump(struct coredump_params *cprm)
1586 struct file *file, unsigned long limit)
1587{ 1599{
1588#define NUM_NOTES 6 1600#define NUM_NOTES 6
1589 int has_dumped = 0; 1601 int has_dumped = 0;
@@ -1642,7 +1654,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1642 goto cleanup; 1654 goto cleanup;
1643#endif 1655#endif
1644 1656
1645 if (signr) { 1657 if (cprm->signr) {
1646 struct core_thread *ct; 1658 struct core_thread *ct;
1647 struct elf_thread_status *tmp; 1659 struct elf_thread_status *tmp;
1648 1660
@@ -1661,14 +1673,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1661 int sz; 1673 int sz;
1662 1674
1663 tmp = list_entry(t, struct elf_thread_status, list); 1675 tmp = list_entry(t, struct elf_thread_status, list);
1664 sz = elf_dump_thread_status(signr, tmp); 1676 sz = elf_dump_thread_status(cprm->signr, tmp);
1665 thread_status_size += sz; 1677 thread_status_size += sz;
1666 } 1678 }
1667 } 1679 }
1668 1680
1669 /* now collect the dump for the current */ 1681 /* now collect the dump for the current */
1670 fill_prstatus(prstatus, current, signr); 1682 fill_prstatus(prstatus, current, cprm->signr);
1671 elf_core_copy_regs(&prstatus->pr_reg, regs); 1683 elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
1672 1684
1673 segs = current->mm->map_count; 1685 segs = current->mm->map_count;
1674#ifdef ELF_CORE_EXTRA_PHDRS 1686#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1715,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1703 1715
1704 /* Try to dump the FPU. */ 1716 /* Try to dump the FPU. */
1705 if ((prstatus->pr_fpvalid = 1717 if ((prstatus->pr_fpvalid =
1706 elf_core_copy_task_fpregs(current, regs, fpu))) 1718 elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
1707 fill_note(notes + numnote++, 1719 fill_note(notes + numnote++,
1708 "CORE", NT_PRFPREG, sizeof(*fpu), fpu); 1720 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1709#ifdef ELF_CORE_COPY_XFPREGS 1721#ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1786,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1774 1786
1775 /* write out the notes section */ 1787 /* write out the notes section */
1776 for (i = 0; i < numnote; i++) 1788 for (i = 0; i < numnote; i++)
1777 if (!writenote(notes + i, file)) 1789 if (!writenote(notes + i, cprm->file))
1778 goto end_coredump; 1790 goto end_coredump;
1779 1791
1780 /* write out the thread status notes section */ 1792 /* write out the thread status notes section */
@@ -1783,25 +1795,26 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1783 list_entry(t, struct elf_thread_status, list); 1795 list_entry(t, struct elf_thread_status, list);
1784 1796
1785 for (i = 0; i < tmp->num_notes; i++) 1797 for (i = 0; i < tmp->num_notes; i++)
1786 if (!writenote(&tmp->notes[i], file)) 1798 if (!writenote(&tmp->notes[i], cprm->file))
1787 goto end_coredump; 1799 goto end_coredump;
1788 } 1800 }
1789 1801
1790 if (!dump_seek(file, dataoff)) 1802 if (!dump_seek(cprm->file, dataoff))
1791 goto end_coredump; 1803 goto end_coredump;
1792 1804
1793 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) 1805 if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
1806 mm_flags) < 0)
1794 goto end_coredump; 1807 goto end_coredump;
1795 1808
1796#ifdef ELF_CORE_WRITE_EXTRA_DATA 1809#ifdef ELF_CORE_WRITE_EXTRA_DATA
1797 ELF_CORE_WRITE_EXTRA_DATA; 1810 ELF_CORE_WRITE_EXTRA_DATA;
1798#endif 1811#endif
1799 1812
1800 if (file->f_pos != offset) { 1813 if (cprm->file->f_pos != offset) {
1801 /* Sanity check */ 1814 /* Sanity check */
1802 printk(KERN_WARNING 1815 printk(KERN_WARNING
1803 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", 1816 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1804 file->f_pos, offset); 1817 cprm->file->f_pos, offset);
1805 } 1818 }
1806 1819
1807end_coredump: 1820end_coredump:
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e756..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
87#endif 87#endif
88 88
89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); 89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
90static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 90static int flat_core_dump(struct coredump_params *cprm);
91 91
92static struct linux_binfmt flat_format = { 92static struct linux_binfmt flat_format = {
93 .module = THIS_MODULE, 93 .module = THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
102 * Currently only a stub-function. 102 * Currently only a stub-function.
103 */ 103 */
104 104
105static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 105static int flat_core_dump(struct coredump_params *cprm)
106{ 106{
107 printk("Process %s:%d received signr %d and should have core dumped\n", 107 printk("Process %s:%d received signr %d and should have core dumped\n",
108 current->comm, current->pid, (int) signr); 108 current->comm, current->pid, (int) cprm->signr);
109 return(1); 109 return(1);
110} 110}
111 111
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
519 519
520 /* OK, This is the point of no return */ 520 /* OK, This is the point of no return */
521 set_personality(PER_LINUX_32BIT); 521 set_personality(PER_LINUX_32BIT);
522 setup_new_exec(bprm);
522 } 523 }
523 524
524 /* 525 /*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e77..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
43 * don't even try. 43 * don't even try.
44 */ 44 */
45#if 0 45#if 0
46static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit); 46static int som_core_dump(struct coredump_params *cprm);
47#else 47#else
48#define som_core_dump NULL 48#define som_core_dump NULL
49#endif 49#endif
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
227 /* OK, This is the point of no return */ 227 /* OK, This is the point of no return */
228 current->flags &= ~PF_FORKNOEXEC; 228 current->flags &= ~PF_FORKNOEXEC;
229 current->personality = PER_HPUX; 229 current->personality = PER_HPUX;
230 setup_new_exec(bprm);
230 231
231 /* Set the task size for HP-UX processes such that 232 /* Set the task size for HP-UX processes such that
232 * the gateway page is outside the address space. 233 * the gateway page is outside the address space.
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 49a34e7f7306..a16f29e888cd 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -61,7 +61,7 @@ static inline unsigned int vecs_to_idx(unsigned int nr)
61 61
62static inline int use_bip_pool(unsigned int idx) 62static inline int use_bip_pool(unsigned int idx)
63{ 63{
64 if (idx == BIOVEC_NR_POOLS) 64 if (idx == BIOVEC_MAX_IDX)
65 return 1; 65 return 1;
66 66
67 return 0; 67 return 0;
@@ -95,6 +95,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
95 95
96 /* Use mempool if lower order alloc failed or max vecs were requested */ 96 /* Use mempool if lower order alloc failed or max vecs were requested */
97 if (bip == NULL) { 97 if (bip == NULL) {
98 idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
98 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); 99 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
99 100
100 if (unlikely(bip == NULL)) { 101 if (unlikely(bip == NULL)) {
diff --git a/fs/bio.c b/fs/bio.c
index 76e6713abf94..88094afc29ea 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -78,7 +78,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
78 78
79 i = 0; 79 i = 0;
80 while (i < bio_slab_nr) { 80 while (i < bio_slab_nr) {
81 struct bio_slab *bslab = &bio_slabs[i]; 81 bslab = &bio_slabs[i];
82 82
83 if (!bslab->slab && entry == -1) 83 if (!bslab->slab && entry == -1)
84 entry = i; 84 entry = i;
@@ -542,13 +542,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
542 542
543 if (page == prev->bv_page && 543 if (page == prev->bv_page &&
544 offset == prev->bv_offset + prev->bv_len) { 544 offset == prev->bv_offset + prev->bv_len) {
545 unsigned int prev_bv_len = prev->bv_len;
545 prev->bv_len += len; 546 prev->bv_len += len;
546 547
547 if (q->merge_bvec_fn) { 548 if (q->merge_bvec_fn) {
548 struct bvec_merge_data bvm = { 549 struct bvec_merge_data bvm = {
550 /* prev_bvec is already charged in
551 bi_size, discharge it in order to
552 simulate merging updated prev_bvec
553 as new bvec. */
549 .bi_bdev = bio->bi_bdev, 554 .bi_bdev = bio->bi_bdev,
550 .bi_sector = bio->bi_sector, 555 .bi_sector = bio->bi_sector,
551 .bi_size = bio->bi_size, 556 .bi_size = bio->bi_size - prev_bv_len,
552 .bi_rw = bio->bi_rw, 557 .bi_rw = bio->bi_rw,
553 }; 558 };
554 559
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 73d6a735b8f3..d11d0289f3d2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -246,7 +246,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
246 if (!sb) 246 if (!sb)
247 goto out; 247 goto out;
248 if (sb->s_flags & MS_RDONLY) { 248 if (sb->s_flags & MS_RDONLY) {
249 deactivate_locked_super(sb); 249 sb->s_frozen = SB_FREEZE_TRANS;
250 up_write(&sb->s_umount);
250 mutex_unlock(&bdev->bd_fsfreeze_mutex); 251 mutex_unlock(&bdev->bd_fsfreeze_mutex);
251 return sb; 252 return sb;
252 } 253 }
@@ -307,7 +308,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
307 BUG_ON(sb->s_bdev != bdev); 308 BUG_ON(sb->s_bdev != bdev);
308 down_write(&sb->s_umount); 309 down_write(&sb->s_umount);
309 if (sb->s_flags & MS_RDONLY) 310 if (sb->s_flags & MS_RDONLY)
310 goto out_deactivate; 311 goto out_unfrozen;
311 312
312 if (sb->s_op->unfreeze_fs) { 313 if (sb->s_op->unfreeze_fs) {
313 error = sb->s_op->unfreeze_fs(sb); 314 error = sb->s_op->unfreeze_fs(sb);
@@ -321,11 +322,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
321 } 322 }
322 } 323 }
323 324
325out_unfrozen:
324 sb->s_frozen = SB_UNFROZEN; 326 sb->s_frozen = SB_UNFROZEN;
325 smp_wmb(); 327 smp_wmb();
326 wake_up(&sb->s_wait_unfrozen); 328 wake_up(&sb->s_wait_unfrozen);
327 329
328out_deactivate:
329 if (sb) 330 if (sb)
330 deactivate_locked_super(sb); 331 deactivate_locked_super(sb);
331out_unlock: 332out_unlock:
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bfb..7bb3c020e570 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
4 select LIBCRC32C 4 select LIBCRC32C
5 select ZLIB_INFLATE 5 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 6 select ZLIB_DEFLATE
7 select FS_JOURNAL_INFO
8 help 7 help
9 Btrfs is a new filesystem with extents, writable snapshotting, 8 Btrfs is a new filesystem with extents, writable snapshotting,
10 support for multiple devices and many more features. 9 support for multiple devices and many more features.
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..6df6d6ed74fd 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 73 return acl;
74} 74}
75 75
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 76static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 77 void *value, size_t size, int type)
78{ 78{
79 struct posix_acl *acl; 79 struct posix_acl *acl;
80 int ret = 0; 80 int ret = 0;
81 81
82 acl = btrfs_get_acl(inode, type); 82 acl = btrfs_get_acl(dentry->d_inode, type);
83 83
84 if (IS_ERR(acl)) 84 if (IS_ERR(acl))
85 return PTR_ERR(acl); 85 return PTR_ERR(acl);
@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
94/* 94/*
95 * Needs to be called with fs_mutex held 95 * Needs to be called with fs_mutex held
96 */ 96 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 97static int btrfs_set_acl(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct posix_acl *acl, int type)
98{ 99{
99 int ret, size = 0; 100 int ret, size = 0;
100 const char *name; 101 const char *name;
@@ -111,12 +112,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
111 switch (type) { 112 switch (type) {
112 case ACL_TYPE_ACCESS: 113 case ACL_TYPE_ACCESS:
113 mode = inode->i_mode; 114 mode = inode->i_mode;
114 ret = posix_acl_equiv_mode(acl, &mode);
115 if (ret < 0)
116 return ret;
117 ret = 0;
118 inode->i_mode = mode;
119 name = POSIX_ACL_XATTR_ACCESS; 115 name = POSIX_ACL_XATTR_ACCESS;
116 if (acl) {
117 ret = posix_acl_equiv_mode(acl, &mode);
118 if (ret < 0)
119 return ret;
120 inode->i_mode = mode;
121 }
122 ret = 0;
120 break; 123 break;
121 case ACL_TYPE_DEFAULT: 124 case ACL_TYPE_DEFAULT:
122 if (!S_ISDIR(inode->i_mode)) 125 if (!S_ISDIR(inode->i_mode))
@@ -140,8 +143,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 143 goto out;
141 } 144 }
142 145
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 146 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 147out:
146 kfree(value); 148 kfree(value);
147 149
@@ -151,10 +153,10 @@ out:
151 return ret; 153 return ret;
152} 154}
153 155
154static int btrfs_xattr_set_acl(struct inode *inode, int type, 156static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size) 157 const void *value, size_t size, int flags, int type)
156{ 158{
157 int ret = 0; 159 int ret;
158 struct posix_acl *acl = NULL; 160 struct posix_acl *acl = NULL;
159 161
160 if (value) { 162 if (value) {
@@ -167,38 +169,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
167 } 169 }
168 } 170 }
169 171
170 ret = btrfs_set_acl(inode, acl, type); 172 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 173
172 posix_acl_release(acl); 174 posix_acl_release(acl);
173 175
174 return ret; 176 return ret;
175} 177}
176 178
177
178static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
182}
183
184static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
185 const void *value, size_t size, int flags)
186{
187 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
188}
189
190static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
191 void *value, size_t size)
192{
193 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
194}
195
196static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
197 const void *value, size_t size, int flags)
198{
199 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
200}
201
202int btrfs_check_acl(struct inode *inode, int mask) 179int btrfs_check_acl(struct inode *inode, int mask)
203{ 180{
204 struct posix_acl *acl; 181 struct posix_acl *acl;
@@ -221,7 +198,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
221 * stuff has been fixed to work with that. If the locking stuff changes, we 198 * stuff has been fixed to work with that. If the locking stuff changes, we
222 * need to re-evaluate the acl locking stuff. 199 * need to re-evaluate the acl locking stuff.
223 */ 200 */
224int btrfs_init_acl(struct inode *inode, struct inode *dir) 201int btrfs_init_acl(struct btrfs_trans_handle *trans,
202 struct inode *inode, struct inode *dir)
225{ 203{
226 struct posix_acl *acl = NULL; 204 struct posix_acl *acl = NULL;
227 int ret = 0; 205 int ret = 0;
@@ -246,7 +224,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
246 mode_t mode; 224 mode_t mode;
247 225
248 if (S_ISDIR(inode->i_mode)) { 226 if (S_ISDIR(inode->i_mode)) {
249 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 227 ret = btrfs_set_acl(trans, inode, acl,
228 ACL_TYPE_DEFAULT);
250 if (ret) 229 if (ret)
251 goto failed; 230 goto failed;
252 } 231 }
@@ -261,10 +240,11 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
261 inode->i_mode = mode; 240 inode->i_mode = mode;
262 if (ret > 0) { 241 if (ret > 0) {
263 /* we need an acl */ 242 /* we need an acl */
264 ret = btrfs_set_acl(inode, clone, 243 ret = btrfs_set_acl(trans, inode, clone,
265 ACL_TYPE_ACCESS); 244 ACL_TYPE_ACCESS);
266 } 245 }
267 } 246 }
247 posix_acl_release(clone);
268 } 248 }
269failed: 249failed:
270 posix_acl_release(acl); 250 posix_acl_release(acl);
@@ -294,7 +274,7 @@ int btrfs_acl_chmod(struct inode *inode)
294 274
295 ret = posix_acl_chmod_masq(clone, inode->i_mode); 275 ret = posix_acl_chmod_masq(clone, inode->i_mode);
296 if (!ret) 276 if (!ret)
297 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 277 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
298 278
299 posix_acl_release(clone); 279 posix_acl_release(clone);
300 280
@@ -303,14 +283,16 @@ int btrfs_acl_chmod(struct inode *inode)
303 283
304struct xattr_handler btrfs_xattr_acl_default_handler = { 284struct xattr_handler btrfs_xattr_acl_default_handler = {
305 .prefix = POSIX_ACL_XATTR_DEFAULT, 285 .prefix = POSIX_ACL_XATTR_DEFAULT,
306 .get = btrfs_xattr_acl_default_get, 286 .flags = ACL_TYPE_DEFAULT,
307 .set = btrfs_xattr_acl_default_set, 287 .get = btrfs_xattr_acl_get,
288 .set = btrfs_xattr_acl_set,
308}; 289};
309 290
310struct xattr_handler btrfs_xattr_acl_access_handler = { 291struct xattr_handler btrfs_xattr_acl_access_handler = {
311 .prefix = POSIX_ACL_XATTR_ACCESS, 292 .prefix = POSIX_ACL_XATTR_ACCESS,
312 .get = btrfs_xattr_acl_access_get, 293 .flags = ACL_TYPE_ACCESS,
313 .set = btrfs_xattr_acl_access_set, 294 .get = btrfs_xattr_acl_get,
295 .set = btrfs_xattr_acl_set,
314}; 296};
315 297
316#else /* CONFIG_BTRFS_FS_POSIX_ACL */ 298#else /* CONFIG_BTRFS_FS_POSIX_ACL */
@@ -320,7 +302,8 @@ int btrfs_acl_chmod(struct inode *inode)
320 return 0; 302 return 0;
321} 303}
322 304
323int btrfs_init_acl(struct inode *inode, struct inode *dir) 305int btrfs_init_acl(struct btrfs_trans_handle *trans,
306 struct inode *inode, struct inode *dir)
324{ 307{
325 return 0; 308 return 0;
326} 309}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f010..3f1f50d9d916 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
166 163
167static inline void btrfs_i_size_write(struct inode *inode, u64 size) 164static inline void btrfs_i_size_write(struct inode *inode, u64 size)
168{ 165{
169 inode->i_size = size; 166 i_size_write(inode, size);
170 BTRFS_I(inode)->disk_i_size = size; 167 BTRFS_I(inode)->disk_i_size = size;
171} 168}
172 169
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..c4bc570a396e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40static int setup_items_for_insert(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, struct btrfs_path *path,
42 struct btrfs_key *cpu_key, u32 *data_size,
43 u32 total_data, u32 total_size, int nr);
44
40 45
41struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
42{ 47{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 456 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 457 spin_unlock(&root->node_lock);
453 458
454 btrfs_free_extent(trans, root, buf->start, buf->len, 459 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 460 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 461 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 462 add_root_to_dirty_list(root);
459 } else { 463 } else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 472 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 473 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 474 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 475 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 476 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 477 }
475 if (unlock_orig) 478 if (unlock_orig)
476 btrfs_tree_unlock(buf); 479 btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1033 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1034 /* once for the path */
1032 free_extent_buffer(mid); 1035 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1037 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1038 /* once for the root ptr */
1036 free_extent_buffer(mid); 1039 free_extent_buffer(mid);
1037 return ret; 1040 return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1098 1);
1096 if (wret) 1099 if (wret)
1097 ret = wret; 1100 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1101 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1102 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1103 root->root_key.objectid,
1101 level, 0); 1104 level);
1102 if (wret) 1105 if (wret)
1103 ret = wret; 1106 ret = wret;
1104 } else { 1107 } else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1146 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1147 if (wret)
1145 ret = wret; 1148 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1150 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1151 if (wret)
1150 ret = wret; 1152 ret = wret;
1151 } else { 1153 } else {
@@ -2997,75 +2999,85 @@ again:
2997 return ret; 2999 return ret;
2998} 3000}
2999 3001
3000/* 3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3003 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3004 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3005{
3021 u32 item_size; 3006 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3007 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3008 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3009 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3010 u32 item_size;
3026 int ret = 0; 3011 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3012
3033 leaf = path->nodes[0]; 3013 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3014 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3015
3036 goto split; 3016 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3017 key.type != BTRFS_EXTENT_CSUM_KEY);
3018
3019 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3020 return 0;
3037 3021
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3022 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3023 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3024 fi = btrfs_item_ptr(leaf, path->slots[0],
3025 struct btrfs_file_extent_item);
3026 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3027 }
3039 btrfs_release_path(root, path); 3028 btrfs_release_path(root, path);
3040 3029
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3030 path->keep_locks = 1;
3043 3031 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3033 path->search_for_split = 0;
3034 if (ret < 0)
3035 goto err;
3046 3036
3037 ret = -EAGAIN;
3038 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3039 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3041 goto err;
3050 path->keep_locks = 0; 3042
3051 return -EAGAIN; 3043 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item);
3046 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3047 goto err;
3052 } 3048 }
3053 3049
3054 btrfs_set_path_blocking(path); 3050 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3051 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3052 BUG_ON(ret);
3059 3053
3054 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3055 btrfs_unlock_up_safe(path, 1);
3056 return 0;
3057err:
3058 path->keep_locks = 0;
3059 return ret;
3060}
3061
3062static noinline int split_item(struct btrfs_trans_handle *trans,
3063 struct btrfs_root *root,
3064 struct btrfs_path *path,
3065 struct btrfs_key *new_key,
3066 unsigned long split_offset)
3067{
3068 struct extent_buffer *leaf;
3069 struct btrfs_item *item;
3070 struct btrfs_item *new_item;
3071 int slot;
3072 char *buf;
3073 u32 nritems;
3074 u32 item_size;
3075 u32 orig_offset;
3076 struct btrfs_disk_key disk_key;
3077
3061 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3079 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3080
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3081 btrfs_set_path_blocking(path);
3070 3082
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3083 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3085 item_size = btrfs_item_size(leaf, item);
3074 3086
3075 buf = kmalloc(item_size, GFP_NOFS); 3087 buf = kmalloc(item_size, GFP_NOFS);
3088 if (!buf)
3089 return -ENOMEM;
3090
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3091 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3092 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3093
3094 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3095 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3096 if (slot != nritems) {
3084 /* shift the items */ 3097 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3098 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3099 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3100 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3101 }
3090 3102
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3103 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
3113 item_size - split_offset); 3125 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3126 btrfs_mark_buffer_dirty(leaf);
3115 3127
3116 ret = 0; 3128 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3129 kfree(buf);
3130 return 0;
3131}
3132
3133/*
3134 * This function splits a single item into two items,
3135 * giving 'new_key' to the new item and splitting the
3136 * old one at split_offset (from the start of the item).
3137 *
3138 * The path may be released by this operation. After
3139 * the split, the path is pointing to the old item. The
3140 * new item is going to be in the same node as the old one.
3141 *
3142 * Note, the item being split must be smaller enough to live alone on
3143 * a tree block with room for one extra struct btrfs_item
3144 *
3145 * This allows us to split the item in place, keeping a lock on the
3146 * leaf the entire time.
3147 */
3148int btrfs_split_item(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct btrfs_key *new_key,
3152 unsigned long split_offset)
3153{
3154 int ret;
3155 ret = setup_leaf_for_split(trans, root, path,
3156 sizeof(struct btrfs_item));
3157 if (ret)
3158 return ret;
3159
3160 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3161 return ret;
3123} 3162}
3124 3163
3125/* 3164/*
3165 * This function duplicate a item, giving 'new_key' to the new item.
3166 * It guarantees both items live in the same tree leaf and the new item
3167 * is contiguous with the original item.
3168 *
3169 * This allows us to split file extent in place, keeping a lock on the
3170 * leaf the entire time.
3171 */
3172int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct btrfs_key *new_key)
3176{
3177 struct extent_buffer *leaf;
3178 int ret;
3179 u32 item_size;
3180
3181 leaf = path->nodes[0];
3182 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3183 ret = setup_leaf_for_split(trans, root, path,
3184 item_size + sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 path->slots[0]++;
3189 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3190 item_size, item_size +
3191 sizeof(struct btrfs_item), 1);
3192 BUG_ON(ret);
3193
3194 leaf = path->nodes[0];
3195 memcpy_extent_buffer(leaf,
3196 btrfs_item_ptr_offset(leaf, path->slots[0]),
3197 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3198 item_size);
3199 return 0;
3200}
3201
3202/*
3126 * make the item pointed to by the path smaller. new_size indicates 3203 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3204 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3205 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3791 */
3715 btrfs_unlock_up_safe(path, 0); 3792 btrfs_unlock_up_safe(path, 0);
3716 3793
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3795 0, root->root_key.objectid, 0);
3719 return ret; 3796 return ret;
3720} 3797}
3721/* 3798/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a4..2aa8ec6a0981 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 311 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 312 sizeof(struct btrfs_file_extent_item))
313#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
314 sizeof(struct btrfs_item) -\
315 sizeof(struct btrfs_dir_item))
313 316
314 317
315/* 318/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 862 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 863 struct rw_semaphore extent_commit_sem;
861 864
862 struct rw_semaphore subvol_sem; 865 struct rw_semaphore cleanup_work_sem;
863 866
867 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 868 struct srcu_struct subvol_srcu;
865 869
866 struct list_head trans_list; 870 struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 872 struct list_head dead_roots;
869 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
870 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
871 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
1034 int ref_cows; 1041 int ref_cows;
1035 int track_dirty; 1042 int track_dirty;
1036 int in_radix; 1043 int in_radix;
1044 int clean_orphans;
1037 1045
1038 u64 defrag_trans_start; 1046 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1047 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1048 struct btrfs_key defrag_max;
1041 int defrag_running; 1049 int defrag_running;
1042 int defrag_level;
1043 char *name; 1050 char *name;
1044 int in_sysfs; 1051 int in_sysfs;
1045 1052
@@ -1154,6 +1161,7 @@ struct btrfs_root {
1154#define BTRFS_MOUNT_SSD_SPREAD (1 << 8) 1161#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
1155#define BTRFS_MOUNT_NOSSD (1 << 9) 1162#define BTRFS_MOUNT_NOSSD (1 << 9)
1156#define BTRFS_MOUNT_DISCARD (1 << 10) 1163#define BTRFS_MOUNT_DISCARD (1 << 10)
1164#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1157 1165
1158#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1166#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1159#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1167#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1975,6 +1983,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1975 u64 parent, u64 root_objectid, 1983 u64 parent, u64 root_objectid,
1976 struct btrfs_disk_key *key, int level, 1984 struct btrfs_disk_key *key, int level,
1977 u64 hint, u64 empty_size); 1985 u64 hint, u64 empty_size);
1986int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1987 struct btrfs_root *root,
1988 u64 bytenr, u32 blocksize,
1989 u64 parent, u64 root_objectid, int level);
1978struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1990struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1979 struct btrfs_root *root, 1991 struct btrfs_root *root,
1980 u64 bytenr, u32 blocksize, 1992 u64 bytenr, u32 blocksize,
@@ -2089,6 +2101,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2089 struct btrfs_path *path, 2101 struct btrfs_path *path,
2090 struct btrfs_key *new_key, 2102 struct btrfs_key *new_key,
2091 unsigned long split_offset); 2103 unsigned long split_offset);
2104int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2105 struct btrfs_root *root,
2106 struct btrfs_path *path,
2107 struct btrfs_key *new_key);
2092int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2108int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2093 *root, struct btrfs_key *key, struct btrfs_path *p, int 2109 *root, struct btrfs_key *key, struct btrfs_path *p, int
2094 ins_len, int cow); 2110 ins_len, int cow);
@@ -2196,9 +2212,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2196 struct btrfs_path *path, 2212 struct btrfs_path *path,
2197 struct btrfs_dir_item *di); 2213 struct btrfs_dir_item *di);
2198int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2214int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2199 struct btrfs_root *root, const char *name, 2215 struct btrfs_root *root,
2200 u16 name_len, const void *data, u16 data_len, 2216 struct btrfs_path *path, u64 objectid,
2201 u64 dir); 2217 const char *name, u16 name_len,
2218 const void *data, u16 data_len);
2202struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2219struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root, 2220 struct btrfs_root *root,
2204 struct btrfs_path *path, u64 dir, 2221 struct btrfs_path *path, u64 dir,
@@ -2292,7 +2309,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2292 struct inode *inode, u64 new_size, 2309 struct inode *inode, u64 new_size,
2293 u32 min_type); 2310 u32 min_type);
2294 2311
2295int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2312int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2296int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2313int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2297int btrfs_writepages(struct address_space *mapping, 2314int btrfs_writepages(struct address_space *mapping,
2298 struct writeback_control *wbc); 2315 struct writeback_control *wbc);
@@ -2332,6 +2349,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2332void btrfs_orphan_cleanup(struct btrfs_root *root); 2349void btrfs_orphan_cleanup(struct btrfs_root *root);
2333int btrfs_cont_expand(struct inode *inode, loff_t size); 2350int btrfs_cont_expand(struct inode *inode, loff_t size);
2334int btrfs_invalidate_inodes(struct btrfs_root *root); 2351int btrfs_invalidate_inodes(struct btrfs_root *root);
2352void btrfs_add_delayed_iput(struct inode *inode);
2353void btrfs_run_delayed_iputs(struct btrfs_root *root);
2335extern const struct dentry_operations btrfs_dentry_operations; 2354extern const struct dentry_operations btrfs_dentry_operations;
2336 2355
2337/* ioctl.c */ 2356/* ioctl.c */
@@ -2345,12 +2364,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2345 int skip_pinned); 2364 int skip_pinned);
2346int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2365int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2347extern const struct file_operations btrfs_file_operations; 2366extern const struct file_operations btrfs_file_operations;
2348int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2367int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2349 struct btrfs_root *root, struct inode *inode, 2368 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2350 u64 start, u64 end, u64 locked_end,
2351 u64 inline_limit, u64 *hint_block, int drop_cache);
2352int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2369int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2353 struct btrfs_root *root,
2354 struct inode *inode, u64 start, u64 end); 2370 struct inode *inode, u64 start, u64 end);
2355int btrfs_release_file(struct inode *inode, struct file *file); 2371int btrfs_release_file(struct inode *inode, struct file *file);
2356 2372
@@ -2380,7 +2396,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
2380#else 2396#else
2381#define btrfs_check_acl NULL 2397#define btrfs_check_acl NULL
2382#endif 2398#endif
2383int btrfs_init_acl(struct inode *inode, struct inode *dir); 2399int btrfs_init_acl(struct btrfs_trans_handle *trans,
2400 struct inode *inode, struct inode *dir);
2384int btrfs_acl_chmod(struct inode *inode); 2401int btrfs_acl_chmod(struct inode *inode);
2385 2402
2386/* relocation.c */ 2403/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd7450..2b59201b955c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 932 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
980 981
981 while (1) { 982 while (1) {
982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 983 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983 0, &start, &end, EXTENT_DIRTY); 984 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 if (ret) 985 if (ret)
985 break; 986 break;
986 987
987 clear_extent_dirty(&log_root_tree->dirty_log_pages, 988 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
988 start, end, GFP_NOFS); 989 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 } 990 }
990 eb = fs_info->log_root_tree->node; 991 eb = fs_info->log_root_tree->node;
991 992
@@ -1210,8 +1211,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1212 root); 1213 root);
1213 if (ret == 0) 1214 if (ret == 0) {
1214 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1217 if (ret) { 1220 if (ret) {
@@ -1225,10 +1228,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1229 root->root_key.objectid);
1227 WARN_ON(ret); 1230 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1231 return root;
1233fail: 1232fail:
1234 free_fs_root(root); 1233 free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1477 1476
1478 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1479 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1480 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1481 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1482 } 1482 }
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1607 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1608 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1609 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1610 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1611 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1614 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1615 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1616 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1617 1619
1618 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1619 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1691 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1692 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1693 init_rwsem(&fs_info->extent_commit_sem);
1694 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1695 init_rwsem(&fs_info->subvol_sem);
1693 1696
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1697 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -1979,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1979 1982
1980 if (!(sb->s_flags & MS_RDONLY)) { 1983 if (!(sb->s_flags & MS_RDONLY)) {
1981 ret = btrfs_recover_relocation(tree_root); 1984 ret = btrfs_recover_relocation(tree_root);
1982 BUG_ON(ret); 1985 if (ret < 0) {
1986 printk(KERN_WARNING
1987 "btrfs: failed to recover relocation\n");
1988 err = -EINVAL;
1989 goto fail_trans_kthread;
1990 }
1983 } 1991 }
1984 1992
1985 location.objectid = BTRFS_FS_TREE_OBJECTID; 1993 location.objectid = BTRFS_FS_TREE_OBJECTID;
@@ -1990,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1990 if (!fs_info->fs_root) 1998 if (!fs_info->fs_root)
1991 goto fail_trans_kthread; 1999 goto fail_trans_kthread;
1992 2000
2001 if (!(sb->s_flags & MS_RDONLY)) {
2002 down_read(&fs_info->cleanup_work_sem);
2003 btrfs_orphan_cleanup(fs_info->fs_root);
2004 up_read(&fs_info->cleanup_work_sem);
2005 }
2006
1993 return tree_root; 2007 return tree_root;
1994 2008
1995fail_trans_kthread: 2009fail_trans_kthread:
@@ -2386,8 +2400,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2400 int ret;
2387 2401
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2402 mutex_lock(&root->fs_info->cleaner_mutex);
2403 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2404 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2405 mutex_unlock(&root->fs_info->cleaner_mutex);
2406
2407 /* wait until ongoing cleanup work done */
2408 down_write(&root->fs_info->cleanup_work_sem);
2409 up_write(&root->fs_info->cleanup_work_sem);
2410
2391 trans = btrfs_start_transaction(root, 1); 2411 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2412 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2413 BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc193..559f72489b3b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 83 return (cache->flags & bits) == bits;
84} 84}
85 85
86void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
87{
88 atomic_inc(&cache->count);
89}
90
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{
93 if (atomic_dec_and_test(&cache->count))
94 kfree(cache);
95}
96
86/* 97/*
87 * this adds the block group to the fs_info rb tree for the block group 98 * this adds the block group to the fs_info rb tree for the block group
88 * cache 99 * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 167 }
157 } 168 }
158 if (ret) 169 if (ret)
159 atomic_inc(&ret->count); 170 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 171 spin_unlock(&info->block_group_cache_lock);
161 172
162 return ret; 173 return ret;
@@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 206 int stripe_len;
196 int i, nr, ret; 207 int i, nr, ret;
197 208
209 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
210 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
211 cache->bytes_super += stripe_len;
212 ret = add_excluded_extent(root, cache->key.objectid,
213 stripe_len);
214 BUG_ON(ret);
215 }
216
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 217 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 218 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 219 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 274 if (ret)
256 break; 275 break;
257 276
258 if (extent_start == start) { 277 if (extent_start <= start) {
259 start = extent_end + 1; 278 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 279 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 280 size = extent_start - start;
@@ -399,6 +418,8 @@ err:
399 418
400 put_caching_control(caching_ctl); 419 put_caching_control(caching_ctl);
401 atomic_dec(&block_group->space_info->caching_threads); 420 atomic_dec(&block_group->space_info->caching_threads);
421 btrfs_put_block_group(block_group);
422
402 return 0; 423 return 0;
403} 424}
404 425
@@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
439 up_write(&fs_info->extent_commit_sem); 460 up_write(&fs_info->extent_commit_sem);
440 461
441 atomic_inc(&cache->space_info->caching_threads); 462 atomic_inc(&cache->space_info->caching_threads);
463 btrfs_get_block_group(cache);
442 464
443 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 465 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
444 cache->key.objectid); 466 cache->key.objectid);
@@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
478 return cache; 500 return cache;
479} 501}
480 502
481void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
482{
483 if (atomic_dec_and_test(&cache->count))
484 kfree(cache);
485}
486
487static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 503static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
488 u64 flags) 504 u64 flags)
489{ 505{
@@ -2574,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
2574 if (node) { 2590 if (node) {
2575 cache = rb_entry(node, struct btrfs_block_group_cache, 2591 cache = rb_entry(node, struct btrfs_block_group_cache,
2576 cache_node); 2592 cache_node);
2577 atomic_inc(&cache->count); 2593 btrfs_get_block_group(cache);
2578 } else 2594 } else
2579 cache = NULL; 2595 cache = NULL;
2580 spin_unlock(&root->fs_info->block_group_cache_lock); 2596 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -2880,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2896 root = async->root;
2881 info = async->info; 2897 info = async->info;
2882 2898
2883 btrfs_start_delalloc_inodes(root); 2899 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2900 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2901 btrfs_wait_ordered_extents(root, 0, 0);
2886 2902
2887 spin_lock(&info->lock); 2903 spin_lock(&info->lock);
2888 info->flushing = 0; 2904 info->flushing = 0;
@@ -2956,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2972 return;
2957 2973
2958flush: 2974flush:
2959 btrfs_start_delalloc_inodes(root); 2975 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2976 btrfs_wait_ordered_extents(root, 0, 0);
2961 2977
2962 spin_lock(&info->lock); 2978 spin_lock(&info->lock);
2963 info->flushing = 0; 2979 info->flushing = 0;
@@ -3454,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3454 else 3470 else
3455 old_val -= num_bytes; 3471 old_val -= num_bytes;
3456 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3472 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3457
3458 /* block accounting for root item */
3459 old_val = btrfs_root_used(&root->root_item);
3460 if (alloc)
3461 old_val += num_bytes;
3462 else
3463 old_val -= num_bytes;
3464 btrfs_set_root_used(&root->root_item, old_val);
3465 spin_unlock(&info->delalloc_lock); 3473 spin_unlock(&info->delalloc_lock);
3466 3474
3467 while (total) { 3475 while (total) {
@@ -4049,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4049 return ret; 4057 return ret;
4050} 4058}
4051 4059
4060int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4061 struct btrfs_root *root,
4062 u64 bytenr, u32 blocksize,
4063 u64 parent, u64 root_objectid, int level)
4064{
4065 u64 used;
4066 spin_lock(&root->node_lock);
4067 used = btrfs_root_used(&root->root_item) - blocksize;
4068 btrfs_set_root_used(&root->root_item, used);
4069 spin_unlock(&root->node_lock);
4070
4071 return btrfs_free_extent(trans, root, bytenr, blocksize,
4072 parent, root_objectid, level, 0);
4073}
4074
4052static u64 stripe_align(struct btrfs_root *root, u64 val) 4075static u64 stripe_align(struct btrfs_root *root, u64 val)
4053{ 4076{
4054 u64 mask = ((u64)root->stripesize - 1); 4077 u64 mask = ((u64)root->stripesize - 1);
@@ -4212,7 +4235,7 @@ search:
4212 u64 offset; 4235 u64 offset;
4213 int cached; 4236 int cached;
4214 4237
4215 atomic_inc(&block_group->count); 4238 btrfs_get_block_group(block_group);
4216 search_start = block_group->key.objectid; 4239 search_start = block_group->key.objectid;
4217 4240
4218have_block_group: 4241have_block_group:
@@ -4300,7 +4323,7 @@ have_block_group:
4300 4323
4301 btrfs_put_block_group(block_group); 4324 btrfs_put_block_group(block_group);
4302 block_group = last_ptr->block_group; 4325 block_group = last_ptr->block_group;
4303 atomic_inc(&block_group->count); 4326 btrfs_get_block_group(block_group);
4304 spin_unlock(&last_ptr->lock); 4327 spin_unlock(&last_ptr->lock);
4305 spin_unlock(&last_ptr->refill_lock); 4328 spin_unlock(&last_ptr->refill_lock);
4306 4329
@@ -4578,7 +4601,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4578{ 4601{
4579 int ret; 4602 int ret;
4580 u64 search_start = 0; 4603 u64 search_start = 0;
4581 struct btrfs_fs_info *info = root->fs_info;
4582 4604
4583 data = btrfs_get_alloc_profile(root, data); 4605 data = btrfs_get_alloc_profile(root, data);
4584again: 4606again:
@@ -4586,17 +4608,9 @@ again:
4586 * the only place that sets empty_size is btrfs_realloc_node, which 4608 * the only place that sets empty_size is btrfs_realloc_node, which
4587 * is not called recursively on allocations 4609 * is not called recursively on allocations
4588 */ 4610 */
4589 if (empty_size || root->ref_cows) { 4611 if (empty_size || root->ref_cows)
4590 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4591 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4592 2 * 1024 * 1024,
4593 BTRFS_BLOCK_GROUP_METADATA |
4594 (info->metadata_alloc_profile &
4595 info->avail_metadata_alloc_bits), 0);
4596 }
4597 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4612 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4598 num_bytes + 2 * 1024 * 1024, data, 0); 4613 num_bytes + 2 * 1024 * 1024, data, 0);
4599 }
4600 4614
4601 WARN_ON(num_bytes < root->sectorsize); 4615 WARN_ON(num_bytes < root->sectorsize);
4602 ret = find_free_extent(trans, root, num_bytes, empty_size, 4616 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4911,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4897 extent_op); 4911 extent_op);
4898 BUG_ON(ret); 4912 BUG_ON(ret);
4899 } 4913 }
4914
4915 if (root_objectid == root->root_key.objectid) {
4916 u64 used;
4917 spin_lock(&root->node_lock);
4918 used = btrfs_root_used(&root->root_item) + num_bytes;
4919 btrfs_set_root_used(&root->root_item, used);
4920 spin_unlock(&root->node_lock);
4921 }
4900 return ret; 4922 return ret;
4901} 4923}
4902 4924
@@ -4919,8 +4941,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4919 btrfs_set_buffer_uptodate(buf); 4941 btrfs_set_buffer_uptodate(buf);
4920 4942
4921 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4943 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4922 set_extent_dirty(&root->dirty_log_pages, buf->start, 4944 /*
4923 buf->start + buf->len - 1, GFP_NOFS); 4945 * we allow two log transactions at a time, use different
4946 * EXENT bit to differentiate dirty pages.
4947 */
4948 if (root->log_transid % 2 == 0)
4949 set_extent_dirty(&root->dirty_log_pages, buf->start,
4950 buf->start + buf->len - 1, GFP_NOFS);
4951 else
4952 set_extent_new(&root->dirty_log_pages, buf->start,
4953 buf->start + buf->len - 1, GFP_NOFS);
4924 } else { 4954 } else {
4925 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4955 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4926 buf->start + buf->len - 1, GFP_NOFS); 4956 buf->start + buf->len - 1, GFP_NOFS);
@@ -5372,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5372 int ret; 5402 int ret;
5373 5403
5374 while (level >= 0) { 5404 while (level >= 0) {
5375 if (path->slots[level] >=
5376 btrfs_header_nritems(path->nodes[level]))
5377 break;
5378
5379 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5405 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5380 if (ret > 0) 5406 if (ret > 0)
5381 break; 5407 break;
@@ -5383,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5383 if (level == 0) 5409 if (level == 0)
5384 break; 5410 break;
5385 5411
5412 if (path->slots[level] >=
5413 btrfs_header_nritems(path->nodes[level]))
5414 break;
5415
5386 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5416 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5387 if (ret > 0) { 5417 if (ret > 0) {
5388 path->slots[level]++; 5418 path->slots[level]++;
@@ -7373,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7373 wait_block_group_cache_done(block_group); 7403 wait_block_group_cache_done(block_group);
7374 7404
7375 btrfs_remove_free_space_cache(block_group); 7405 btrfs_remove_free_space_cache(block_group);
7376 7406 btrfs_put_block_group(block_group);
7377 WARN_ON(atomic_read(&block_group->count) != 1);
7378 kfree(block_group);
7379 7407
7380 spin_lock(&info->block_group_cache_lock); 7408 spin_lock(&info->block_group_cache_lock);
7381 } 7409 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96577e8bf9fd..b177ed319612 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 spin_unlock(&tree->buffer_lock); 3165 spin_unlock(&tree->buffer_lock);
3166 goto free_eb; 3166 goto free_eb;
3167 } 3167 }
3168 spin_unlock(&tree->buffer_lock);
3169
3170 /* add one reference for the tree */ 3168 /* add one reference for the tree */
3171 atomic_inc(&eb->refs); 3169 atomic_inc(&eb->refs);
3170 spin_unlock(&tree->buffer_lock);
3172 return eb; 3171 return eb;
3173 3172
3174free_eb: 3173free_eb:
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 46bea0f4dc7b..428fcac45f90 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
155 return NULL; 155 return NULL;
156} 156}
157 157
158/*
159 * look for an offset in the tree, and if it can't be found, return
160 * the first offset we can find smaller than 'offset'.
161 */
162static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
163{
164 struct rb_node *prev;
165 struct rb_node *ret;
166 ret = __tree_search(root, offset, &prev, NULL);
167 if (!ret)
168 return prev;
169 return ret;
170}
171
172/* check to see if two extent_map structs are adjacent and safe to merge */ 158/* check to see if two extent_map structs are adjacent and safe to merge */
173static int mergable_maps(struct extent_map *prev, struct extent_map *next) 159static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174{ 160{
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f759302e12..6ed434ac037f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 179 }
180 flags = em->flags; 180 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 182 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 183 free_extent_map(em);
185 write_unlock(&em_tree->lock); 184 write_unlock(&em_tree->lock);
186 break; 185 break;
187 } 186 }
188 if (start < em->start) { 187 start = em->start + em->len;
189 len = em->start - start; 188 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 189 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 190 free_extent_map(em);
195 write_unlock(&em_tree->lock); 191 write_unlock(&em_tree->lock);
196 continue; 192 continue;
@@ -265,324 +261,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 261 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 262 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 263 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 264 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 265int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 266 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 267{
277 u64 extent_end = 0; 268 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 269 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 270 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 271 struct btrfs_path *path;
288 struct btrfs_key key; 272 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 273 struct btrfs_key new_key;
290 int keep; 274 u64 search_start = start;
291 int slot; 275 u64 disk_bytenr = 0;
292 int bookend; 276 u64 num_bytes = 0;
293 int found_type = 0; 277 u64 extent_offset = 0;
294 int found_extent; 278 u64 extent_end = 0;
295 int found_inline; 279 int del_nr = 0;
280 int del_slot = 0;
281 int extent_type;
296 int recow; 282 int recow;
297 int ret; 283 int ret;
298 284
299 inline_limit = 0;
300 if (drop_cache) 285 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 287
303 path = btrfs_alloc_path(); 288 path = btrfs_alloc_path();
304 if (!path) 289 if (!path)
305 return -ENOMEM; 290 return -ENOMEM;
291
306 while (1) { 292 while (1) {
307 recow = 0; 293 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 294 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 295 search_start, -1);
311 if (ret < 0) 296 if (ret < 0)
312 goto out; 297 break;
313 if (ret > 0) { 298 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 299 leaf = path->nodes[0];
315 ret = 0; 300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 301 if (key.objectid == inode->i_ino &&
317 } 302 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 303 path->slots[0]--;
319 } 304 }
305 ret = 0;
320next_slot: 306next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 307 leaf = path->nodes[0];
329 slot = path->slots[0]; 308 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 309 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 310 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 311 if (ret < 0)
333 key.offset >= end) { 312 break;
334 goto out; 313 if (ret > 0) {
335 } 314 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 315 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 316 }
317 leaf = path->nodes[0];
318 recow = 1;
319 }
320
321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
322 if (key.objectid > inode->i_ino ||
323 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
324 break;
325
326 fi = btrfs_item_ptr(leaf, path->slots[0],
327 struct btrfs_file_extent_item);
328 extent_type = btrfs_file_extent_type(leaf, fi);
329
330 if (extent_type == BTRFS_FILE_EXTENT_REG ||
331 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
332 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
333 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
334 extent_offset = btrfs_file_extent_offset(leaf, fi);
335 extent_end = key.offset +
336 btrfs_file_extent_num_bytes(leaf, fi);
337 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
338 extent_end = key.offset +
339 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 340 } else {
341 WARN_ON(1);
373 extent_end = search_start; 342 extent_end = search_start;
374 } 343 }
375 344
376 /* we found nothing we can drop */ 345 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 346 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 347 goto next_slot;
391 } 348 }
392 349
393 if (end <= extent_end && start >= key.offset && found_inline) 350 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 351 if (recow) {
395 352 btrfs_release_path(root, path);
396 if (found_extent) { 353 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 354 }
406 355
407 if (bookend && found_extent) { 356 /*
408 if (locked_end < extent_end) { 357 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 358 * | -------- extent -------- |
410 locked_end, extent_end - 1, 359 */
411 GFP_NOFS); 360 if (start > key.offset && end < extent_end) {
412 if (!ret) { 361 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 362 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 363
415 locked_end, extent_end - 1, 364 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 365 new_key.offset = start;
417 locked_end = extent_end; 366 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 367 &new_key);
419 } 368 if (ret == -EAGAIN) {
420 locked_end = extent_end; 369 btrfs_release_path(root, path);
370 continue;
421 } 371 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 372 if (ret < 0)
423 if (disk_bytenr != 0) { 373 break;
374
375 leaf = path->nodes[0];
376 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
377 struct btrfs_file_extent_item);
378 btrfs_set_file_extent_num_bytes(leaf, fi,
379 start - key.offset);
380
381 fi = btrfs_item_ptr(leaf, path->slots[0],
382 struct btrfs_file_extent_item);
383
384 extent_offset += start - key.offset;
385 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
386 btrfs_set_file_extent_num_bytes(leaf, fi,
387 extent_end - start);
388 btrfs_mark_buffer_dirty(leaf);
389
390 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 391 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 392 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 393 root->root_key.objectid,
427 root->root_key.objectid, 394 new_key.objectid,
428 key.objectid, key.offset - 395 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 396 BUG_ON(ret);
397 *hint_byte = disk_bytenr;
431 } 398 }
399 key.offset = start;
432 } 400 }
401 /*
402 * | ---- range to drop ----- |
403 * | -------- extent -------- |
404 */
405 if (start <= key.offset && end < extent_end) {
406 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433 407
434 if (found_inline) { 408 memcpy(&new_key, &key, sizeof(new_key));
435 u64 mask = root->sectorsize - 1; 409 new_key.offset = end;
436 search_start = (extent_end + mask) & ~mask; 410 btrfs_set_item_key_safe(trans, root, path, &new_key);
437 } else 411
438 search_start = extent_end; 412 extent_offset += end - key.offset;
439 413 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
440 /* truncate existing extent */ 414 btrfs_set_file_extent_num_bytes(leaf, fi,
441 if (start > key.offset) { 415 extent_end - end);
442 u64 new_num; 416 btrfs_mark_buffer_dirty(leaf);
443 u64 old_num; 417 if (disk_bytenr > 0) {
444 keep = 1; 418 inode_sub_bytes(inode, end - key.offset);
445 WARN_ON(start & (root->sectorsize - 1)); 419 *hint_byte = disk_bytenr;
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 420 }
421 break;
476 } 422 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 423
508 btrfs_release_path(root, path); 424 search_start = extent_end;
509 path->leave_spinning = 1; 425 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 426 * | ---- range to drop ----- |
511 sizeof(*extent)); 427 * | -------- extent -------- |
512 BUG_ON(ret); 428 */
429 if (start > key.offset && end >= extent_end) {
430 BUG_ON(del_nr > 0);
431 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 432
514 leaf = path->nodes[0]; 433 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 434 start - key.offset);
516 struct btrfs_file_extent_item); 435 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 436 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 437 inode_sub_bytes(inode, extent_end - start);
519 438 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 439 }
521 compression); 440 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 441 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 442
533 /* 443 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 444 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 445 }
552 446
553 if (found_extent && !keep) { 447 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 448 * | ---- range to drop ----- |
449 * | ------ extent ------ |
450 */
451 if (start <= key.offset && end >= extent_end) {
452 if (del_nr == 0) {
453 del_slot = path->slots[0];
454 del_nr = 1;
455 } else {
456 BUG_ON(del_slot + del_nr != path->slots[0]);
457 del_nr++;
458 }
555 459
556 if (old_disk_bytenr != 0) { 460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 461 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 462 extent_end - key.offset);
463 extent_end = ALIGN(extent_end,
464 root->sectorsize);
465 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 466 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 467 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 468 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 469 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 470 extent_offset);
565 BUG_ON(ret); 471 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 472 inode_sub_bytes(inode,
473 extent_end - key.offset);
474 *hint_byte = disk_bytenr;
567 } 475 }
568 }
569 476
570 if (search_start >= end) { 477 if (end == extent_end)
571 ret = 0; 478 break;
572 goto out; 479
480 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
481 path->slots[0]++;
482 goto next_slot;
483 }
484
485 ret = btrfs_del_items(trans, root, path, del_slot,
486 del_nr);
487 BUG_ON(ret);
488
489 del_nr = 0;
490 del_slot = 0;
491
492 btrfs_release_path(root, path);
493 continue;
573 } 494 }
495
496 BUG_ON(1);
574 } 497 }
575out: 498
576 btrfs_free_path(path); 499 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 500 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 501 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 502 }
503
504 btrfs_free_path(path);
581 return ret; 505 return ret;
582} 506}
583 507
584static int extent_mergeable(struct extent_buffer *leaf, int slot, 508static int extent_mergeable(struct extent_buffer *leaf, int slot,
585 u64 objectid, u64 bytenr, u64 *start, u64 *end) 509 u64 objectid, u64 bytenr, u64 orig_offset,
510 u64 *start, u64 *end)
586{ 511{
587 struct btrfs_file_extent_item *fi; 512 struct btrfs_file_extent_item *fi;
588 struct btrfs_key key; 513 struct btrfs_key key;
@@ -598,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
598 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
599 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 524 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
600 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 525 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
526 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
601 btrfs_file_extent_compression(leaf, fi) || 527 btrfs_file_extent_compression(leaf, fi) ||
602 btrfs_file_extent_encryption(leaf, fi) || 528 btrfs_file_extent_encryption(leaf, fi) ||
603 btrfs_file_extent_other_encoding(leaf, fi)) 529 btrfs_file_extent_other_encoding(leaf, fi))
@@ -620,23 +546,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 546 * two or three.
621 */ 547 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 548int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 549 struct inode *inode, u64 start, u64 end)
625{ 550{
551 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 552 struct extent_buffer *leaf;
627 struct btrfs_path *path; 553 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 554 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 555 struct btrfs_key key;
556 struct btrfs_key new_key;
630 u64 bytenr; 557 u64 bytenr;
631 u64 num_bytes; 558 u64 num_bytes;
632 u64 extent_end; 559 u64 extent_end;
633 u64 orig_offset; 560 u64 orig_offset;
634 u64 other_start; 561 u64 other_start;
635 u64 other_end; 562 u64 other_end;
636 u64 split = start; 563 u64 split;
637 u64 locked_end = end; 564 int del_nr = 0;
638 int extent_type; 565 int del_slot = 0;
639 int split_end = 1; 566 int recow;
640 int ret; 567 int ret;
641 568
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 569 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +571,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 571 path = btrfs_alloc_path();
645 BUG_ON(!path); 572 BUG_ON(!path);
646again: 573again:
574 recow = 0;
575 split = start;
647 key.objectid = inode->i_ino; 576 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 577 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 578 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 579
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 580 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 581 if (ret > 0 && path->slots[0] > 0)
@@ -661,159 +587,158 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 587 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 588 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 589 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 590 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 591 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 592 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 593 BUG_ON(key.offset > start || extent_end < end);
668 594
669 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 595 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 596 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 597 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
598 memcpy(&new_key, &key, sizeof(new_key));
672 599
673 if (key.offset == start) 600 if (start == key.offset && end < extent_end) {
674 split = end;
675
676 if (key.offset == start && extent_end == end) {
677 int del_nr = 0;
678 int del_slot = 0;
679 other_start = end;
680 other_end = 0;
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
682 bytenr, &other_start, &other_end)) {
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0; 601 other_start = 0;
692 other_end = start; 602 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 603 if (extent_mergeable(leaf, path->slots[0] - 1,
694 bytenr, &other_start, &other_end)) { 604 inode->i_ino, bytenr, orig_offset,
695 key.offset = other_start; 605 &other_start, &other_end)) {
696 del_slot = path->slots[0]; 606 new_key.offset = end;
697 del_nr++; 607 btrfs_set_item_key_safe(trans, root, path, &new_key);
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 608 fi = btrfs_item_ptr(leaf, path->slots[0],
699 0, root->root_key.objectid, 609 struct btrfs_file_extent_item);
700 inode->i_ino, orig_offset); 610 btrfs_set_file_extent_num_bytes(leaf, fi,
701 BUG_ON(ret); 611 extent_end - end);
702 } 612 btrfs_set_file_extent_offset(leaf, fi,
703 split_end = 0; 613 end - orig_offset);
704 if (del_nr == 0) { 614 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
705 btrfs_set_file_extent_type(leaf, fi, 615 struct btrfs_file_extent_item);
706 BTRFS_FILE_EXTENT_REG); 616 btrfs_set_file_extent_num_bytes(leaf, fi,
707 goto done; 617 end - other_start);
708 } 618 btrfs_mark_buffer_dirty(leaf);
709 619 goto out;
710 fi = btrfs_item_ptr(leaf, del_slot - 1,
711 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset);
715 btrfs_mark_buffer_dirty(leaf);
716
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
718 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 } 620 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 } 621 }
743 622
744 if (extent_end == end) { 623 if (start > key.offset && end == extent_end) {
745 split_end = 0;
746 extent_type = BTRFS_FILE_EXTENT_REG;
747 }
748 if (extent_end == end && split == start) {
749 other_start = end; 624 other_start = end;
750 other_end = 0; 625 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 626 if (extent_mergeable(leaf, path->slots[0] + 1,
752 bytenr, &other_start, &other_end)) { 627 inode->i_ino, bytenr, orig_offset,
753 path->slots[0]++; 628 &other_start, &other_end)) {
754 fi = btrfs_item_ptr(leaf, path->slots[0], 629 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item); 630 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi, 631 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split); 632 start - key.offset);
762 goto done; 633 path->slots[0]++;
763 } 634 new_key.offset = start;
764 } 635 btrfs_set_item_key_safe(trans, root, path, &new_key);
765 if (extent_end == end && split == end) { 636
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 637 fi = btrfs_item_ptr(leaf, path->slots[0],
772 struct btrfs_file_extent_item); 638 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - 639 btrfs_set_file_extent_num_bytes(leaf, fi,
774 other_start); 640 other_end - start);
775 goto done; 641 btrfs_set_file_extent_offset(leaf, fi,
642 start - orig_offset);
643 btrfs_mark_buffer_dirty(leaf);
644 goto out;
776 } 645 }
777 } 646 }
778 647
779 btrfs_mark_buffer_dirty(leaf); 648 while (start > key.offset || end < extent_end) {
649 if (key.offset == start)
650 split = end;
780 651
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 652 new_key.offset = split;
782 root->root_key.objectid, 653 ret = btrfs_duplicate_item(trans, root, path, &new_key);
783 inode->i_ino, orig_offset); 654 if (ret == -EAGAIN) {
784 BUG_ON(ret); 655 btrfs_release_path(root, path);
785 btrfs_release_path(root, path); 656 goto again;
657 }
658 BUG_ON(ret < 0);
786 659
787 key.offset = start; 660 leaf = path->nodes[0];
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi)); 661 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
789 BUG_ON(ret); 662 struct btrfs_file_extent_item);
663 btrfs_set_file_extent_num_bytes(leaf, fi,
664 split - key.offset);
790 665
791 leaf = path->nodes[0]; 666 fi = btrfs_item_ptr(leaf, path->slots[0],
792 fi = btrfs_item_ptr(leaf, path->slots[0], 667 struct btrfs_file_extent_item);
793 struct btrfs_file_extent_item); 668
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid); 669 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
795 btrfs_set_file_extent_type(leaf, fi, extent_type); 670 btrfs_set_file_extent_num_bytes(leaf, fi,
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 671 extent_end - split);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 672 btrfs_mark_buffer_dirty(leaf);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset); 673
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 674 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 675 root->root_key.objectid,
801 btrfs_set_file_extent_compression(leaf, fi, 0); 676 inode->i_ino, orig_offset);
802 btrfs_set_file_extent_encryption(leaf, fi, 0); 677 BUG_ON(ret);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 678
804done: 679 if (split == start) {
805 btrfs_mark_buffer_dirty(leaf); 680 key.offset = start;
806 681 } else {
807release: 682 BUG_ON(start != key.offset);
808 btrfs_release_path(root, path); 683 path->slots[0]--;
809 if (split_end && split == start) { 684 extent_end = end;
810 split = end; 685 }
811 goto again; 686 recow = 1;
687 }
688
689 other_start = end;
690 other_end = 0;
691 if (extent_mergeable(leaf, path->slots[0] + 1,
692 inode->i_ino, bytenr, orig_offset,
693 &other_start, &other_end)) {
694 if (recow) {
695 btrfs_release_path(root, path);
696 goto again;
697 }
698 extent_end = other_end;
699 del_slot = path->slots[0] + 1;
700 del_nr++;
701 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
702 0, root->root_key.objectid,
703 inode->i_ino, orig_offset);
704 BUG_ON(ret);
705 }
706 other_start = 0;
707 other_end = start;
708 if (extent_mergeable(leaf, path->slots[0] - 1,
709 inode->i_ino, bytenr, orig_offset,
710 &other_start, &other_end)) {
711 if (recow) {
712 btrfs_release_path(root, path);
713 goto again;
714 }
715 key.offset = other_start;
716 del_slot = path->slots[0];
717 del_nr++;
718 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
719 0, root->root_key.objectid,
720 inode->i_ino, orig_offset);
721 BUG_ON(ret);
812 } 722 }
813 if (locked_end > end) { 723 if (del_nr == 0) {
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 724 fi = btrfs_item_ptr(leaf, path->slots[0],
815 GFP_NOFS); 725 struct btrfs_file_extent_item);
726 btrfs_set_file_extent_type(leaf, fi,
727 BTRFS_FILE_EXTENT_REG);
728 btrfs_mark_buffer_dirty(leaf);
729 } else {
730 fi = btrfs_item_ptr(leaf, del_slot - 1,
731 struct btrfs_file_extent_item);
732 btrfs_set_file_extent_type(leaf, fi,
733 BTRFS_FILE_EXTENT_REG);
734 btrfs_set_file_extent_num_bytes(leaf, fi,
735 extent_end - key.offset);
736 btrfs_mark_buffer_dirty(leaf);
737
738 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
739 BUG_ON(ret);
816 } 740 }
741out:
817 btrfs_free_path(path); 742 btrfs_free_path(path);
818 return 0; 743 return 0;
819} 744}
@@ -1210,7 +1135,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1210 } 1135 }
1211 mutex_lock(&dentry->d_inode->i_mutex); 1136 mutex_lock(&dentry->d_inode->i_mutex);
1212out: 1137out:
1213 return ret > 0 ? EIO : ret; 1138 return ret > 0 ? -EIO : ret;
1214} 1139}
1215 1140
1216static const struct vm_operations_struct btrfs_file_vm_ops = { 1141static const struct vm_operations_struct btrfs_file_vm_ops = {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..4deb280f8969 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -472,7 +483,8 @@ again:
472 nr_pages_ret = 0; 483 nr_pages_ret = 0;
473 484
474 /* flag the file so we don't compress in the future */ 485 /* flag the file so we don't compress in the future */
475 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 486 if (!btrfs_test_opt(root, FORCE_COMPRESS))
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
476 } 488 }
477 if (will_compress) { 489 if (will_compress) {
478 *num_added += 1; 490 *num_added += 1;
@@ -543,7 +555,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 555 if (list_empty(&async_cow->extents))
544 return 0; 556 return 0;
545 557
546 trans = btrfs_join_transaction(root, 1);
547 558
548 while (!list_empty(&async_cow->extents)) { 559 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 560 async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +601,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 601 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 602 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 603 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 604
605 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 606 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 607 async_extent->compressed_size,
603 async_extent->compressed_size, 608 async_extent->compressed_size,
604 0, alloc_hint, 609 0, alloc_hint,
605 (u64)-1, &ins, 1); 610 (u64)-1, &ins, 1);
611 btrfs_end_transaction(trans, root);
612
606 if (ret) { 613 if (ret) {
607 int i; 614 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 615 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +625,14 @@ retry:
618 goto retry; 625 goto retry;
619 } 626 }
620 627
628 /*
629 * here we're doing allocation and writeback of the
630 * compressed pages
631 */
632 btrfs_drop_extent_cache(inode, async_extent->start,
633 async_extent->start +
634 async_extent->ram_size - 1, 0);
635
621 em = alloc_extent_map(GFP_NOFS); 636 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 637 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 638 em->len = async_extent->ram_size;
@@ -649,8 +664,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 664 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 665 BUG_ON(ret);
651 666
652 btrfs_end_transaction(trans, root);
653
654 /* 667 /*
655 * clear dirty, set writeback and unlock the pages. 668 * clear dirty, set writeback and unlock the pages.
656 */ 669 */
@@ -672,13 +685,11 @@ retry:
672 async_extent->nr_pages); 685 async_extent->nr_pages);
673 686
674 BUG_ON(ret); 687 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 688 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 689 kfree(async_extent);
678 cond_resched(); 690 cond_resched();
679 } 691 }
680 692
681 btrfs_end_transaction(trans, root);
682 return 0; 693 return 0;
683} 694}
684 695
@@ -742,6 +753,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 753 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 754 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 755 EXTENT_END_WRITEBACK);
756
745 *nr_written = *nr_written + 757 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 758 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 759 *page_started = 1;
@@ -1596,7 +1608,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1608 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1609 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1610 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1611 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1612 u16 other_encoding, int extent_type)
1602{ 1613{
@@ -1622,9 +1633,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1633 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1634 * with the others.
1624 */ 1635 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1636 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1637 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1638 BUG_ON(ret);
1629 1639
1630 ins.objectid = inode->i_ino; 1640 ins.objectid = inode->i_ino;
@@ -1671,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1671 * before we start the transaction. It limits the amount of btree 1681 * before we start the transaction. It limits the amount of btree
1672 * reads required while inside the transaction. 1682 * reads required while inside the transaction.
1673 */ 1683 */
1674static noinline void reada_csum(struct btrfs_root *root,
1675 struct btrfs_path *path,
1676 struct btrfs_ordered_extent *ordered_extent)
1677{
1678 struct btrfs_ordered_sum *sum;
1679 u64 bytenr;
1680
1681 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1682 list);
1683 bytenr = sum->sums[0].bytenr;
1684
1685 /*
1686 * we don't care about the results, the point of this search is
1687 * just to get the btree leaves into ram
1688 */
1689 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1690}
1691
1692/* as ordered data IO finishes, this gets called so we can finish 1684/* as ordered data IO finishes, this gets called so we can finish
1693 * an ordered extent if the range of bytes in the file it covers are 1685 * an ordered extent if the range of bytes in the file it covers are
1694 * fully written. 1686 * fully written.
@@ -1699,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1699 struct btrfs_trans_handle *trans; 1691 struct btrfs_trans_handle *trans;
1700 struct btrfs_ordered_extent *ordered_extent = NULL; 1692 struct btrfs_ordered_extent *ordered_extent = NULL;
1701 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1693 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1702 struct btrfs_path *path;
1703 int compressed = 0; 1694 int compressed = 0;
1704 int ret; 1695 int ret;
1705 1696
@@ -1707,46 +1698,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1707 if (!ret) 1698 if (!ret)
1708 return 0; 1699 return 0;
1709 1700
1710 /* 1701 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1711 * before we join the transaction, try to do some of our IO. 1702 BUG_ON(!ordered_extent);
1712 * This will limit the amount of IO that we have to do with 1703
1713 * the transaction running. We're unlikely to need to do any 1704 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1714 * IO if the file extents are new, the disk_i_size checks 1705 BUG_ON(!list_empty(&ordered_extent->list));
1715 * covers the most common case. 1706 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1716 */ 1707 if (!ret) {
1717 if (start < BTRFS_I(inode)->disk_i_size) { 1708 trans = btrfs_join_transaction(root, 1);
1718 path = btrfs_alloc_path(); 1709 ret = btrfs_update_inode(trans, root, inode);
1719 if (path) { 1710 BUG_ON(ret);
1720 ret = btrfs_lookup_file_extent(NULL, root, path, 1711 btrfs_end_transaction(trans, root);
1721 inode->i_ino,
1722 start, 0);
1723 ordered_extent = btrfs_lookup_ordered_extent(inode,
1724 start);
1725 if (!list_empty(&ordered_extent->list)) {
1726 btrfs_release_path(root, path);
1727 reada_csum(root, path, ordered_extent);
1728 }
1729 btrfs_free_path(path);
1730 } 1712 }
1713 goto out;
1731 } 1714 }
1732 1715
1733 trans = btrfs_join_transaction(root, 1);
1734
1735 if (!ordered_extent)
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1739 goto nocow;
1740
1741 lock_extent(io_tree, ordered_extent->file_offset, 1716 lock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1717 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1718 GFP_NOFS);
1744 1719
1720 trans = btrfs_join_transaction(root, 1);
1721
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1722 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1723 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1724 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1725 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1726 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1727 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1728 ordered_extent->file_offset +
1752 ordered_extent->len); 1729 ordered_extent->len);
@@ -1758,8 +1735,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1735 ordered_extent->disk_len,
1759 ordered_extent->len, 1736 ordered_extent->len,
1760 ordered_extent->len, 1737 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1738 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1739 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1740 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1745,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1745 unlock_extent(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1746 ordered_extent->file_offset + ordered_extent->len - 1,
1772 GFP_NOFS); 1747 GFP_NOFS);
1773nocow:
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1748 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1749 &ordered_extent->list);
1776 1750
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1751 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1752 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1753 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1754 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1755 btrfs_end_transaction(trans, root);
1782 1756out:
1783 /* once for us */ 1757 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1758 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1759 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1760 btrfs_put_ordered_extent(ordered_extent);
1787 1761
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1762 return 0;
1790} 1763}
1791 1764
@@ -2008,6 +1981,54 @@ zeroit:
2008 return -EIO; 1981 return -EIO;
2009} 1982}
2010 1983
1984struct delayed_iput {
1985 struct list_head list;
1986 struct inode *inode;
1987};
1988
1989void btrfs_add_delayed_iput(struct inode *inode)
1990{
1991 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1992 struct delayed_iput *delayed;
1993
1994 if (atomic_add_unless(&inode->i_count, -1, 1))
1995 return;
1996
1997 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
1998 delayed->inode = inode;
1999
2000 spin_lock(&fs_info->delayed_iput_lock);
2001 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2002 spin_unlock(&fs_info->delayed_iput_lock);
2003}
2004
2005void btrfs_run_delayed_iputs(struct btrfs_root *root)
2006{
2007 LIST_HEAD(list);
2008 struct btrfs_fs_info *fs_info = root->fs_info;
2009 struct delayed_iput *delayed;
2010 int empty;
2011
2012 spin_lock(&fs_info->delayed_iput_lock);
2013 empty = list_empty(&fs_info->delayed_iputs);
2014 spin_unlock(&fs_info->delayed_iput_lock);
2015 if (empty)
2016 return;
2017
2018 down_read(&root->fs_info->cleanup_work_sem);
2019 spin_lock(&fs_info->delayed_iput_lock);
2020 list_splice_init(&fs_info->delayed_iputs, &list);
2021 spin_unlock(&fs_info->delayed_iput_lock);
2022
2023 while (!list_empty(&list)) {
2024 delayed = list_entry(list.next, struct delayed_iput, list);
2025 list_del(&delayed->list);
2026 iput(delayed->inode);
2027 kfree(delayed);
2028 }
2029 up_read(&root->fs_info->cleanup_work_sem);
2030}
2031
2011/* 2032/*
2012 * This creates an orphan entry for the given inode in case something goes 2033 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 2034 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2101,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2101 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2102 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2103
2083 path = btrfs_alloc_path(); 2104 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2105 return;
2106
2107 path = btrfs_alloc_path();
2108 BUG_ON(!path);
2086 path->reada = -1; 2109 path->reada = -1;
2087 2110
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2111 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2112 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2113 key.offset = (u64)-1;
2091 2114
2092
2093 while (1) { 2115 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2116 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2117 if (ret < 0) {
@@ -2834,37 +2856,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2856 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2857 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2858 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2859int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2860 struct btrfs_root *root,
2839 struct inode *inode, 2861 struct inode *inode,
2840 u64 new_size, u32 min_type) 2862 u64 new_size, u32 min_type)
2841{ 2863{
2842 int ret;
2843 struct btrfs_path *path; 2864 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2865 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2866 struct btrfs_file_extent_item *fi;
2867 struct btrfs_key key;
2868 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2869 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2870 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2871 u64 extent_offset = 0;
2852 u64 item_end = 0; 2872 u64 item_end = 0;
2873 u64 mask = root->sectorsize - 1;
2874 u32 found_type = (u8)-1;
2853 int found_extent; 2875 int found_extent;
2854 int del_item; 2876 int del_item;
2855 int pending_del_nr = 0; 2877 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2878 int pending_del_slot = 0;
2857 int extent_type = -1; 2879 int extent_type = -1;
2858 int encoding; 2880 int encoding;
2859 u64 mask = root->sectorsize - 1; 2881 int ret;
2882 int err = 0;
2883
2884 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2885
2861 if (root->ref_cows) 2886 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2887 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2888
2863 path = btrfs_alloc_path(); 2889 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2890 BUG_ON(!path);
2865 path->reada = -1; 2891 path->reada = -1;
2866 2892
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2893 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2894 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2895 key.type = (u8)-1;
@@ -2872,17 +2897,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2897search_again:
2873 path->leave_spinning = 1; 2898 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2899 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2900 if (ret < 0) {
2876 goto error; 2901 err = ret;
2902 goto out;
2903 }
2877 2904
2878 if (ret > 0) { 2905 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2906 /* there are no items in the tree for us to truncate, we're
2880 * done 2907 * done
2881 */ 2908 */
2882 if (path->slots[0] == 0) { 2909 if (path->slots[0] == 0)
2883 ret = 0; 2910 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2911 path->slots[0]--;
2887 } 2912 }
2888 2913
@@ -2917,28 +2942,17 @@ search_again:
2917 } 2942 }
2918 item_end--; 2943 item_end--;
2919 } 2944 }
2920 if (item_end < new_size) { 2945 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2946 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2947 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2948 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2949 break;
2933 btrfs_set_key_type(&key, found_type); 2950 if (found_key.offset >= new_size)
2934 goto next; 2951 del_item = 1;
2952 else
2953 del_item = 0;
2935 } 2954 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2955 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2956 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2957 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2958 goto delete;
@@ -3025,42 +3039,36 @@ delete:
3025 inode->i_ino, extent_offset); 3039 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3040 BUG_ON(ret);
3027 } 3041 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3042
3038 path->slots[0]--; 3043 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3044 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3045
3041 struct btrfs_key debug; 3046 if (path->slots[0] == 0 ||
3042del_pending: 3047 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3048 if (root->ref_cows) {
3044 pending_del_slot); 3049 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3050 goto out;
3046 pending_del_slot, 3051 }
3047 pending_del_nr); 3052 if (pending_del_nr) {
3048 BUG_ON(ret); 3053 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3054 pending_del_slot,
3055 pending_del_nr);
3056 BUG_ON(ret);
3057 pending_del_nr = 0;
3058 }
3050 btrfs_release_path(root, path); 3059 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3060 goto search_again;
3061 } else {
3062 path->slots[0]--;
3054 } 3063 }
3055 } 3064 }
3056 ret = 0; 3065out:
3057error:
3058 if (pending_del_nr) { 3066 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3067 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3068 pending_del_nr);
3061 } 3069 }
3062 btrfs_free_path(path); 3070 btrfs_free_path(path);
3063 return ret; 3071 return err;
3064} 3072}
3065 3073
3066/* 3074/*
@@ -3180,10 +3188,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3188 if (size <= hole_start)
3181 return 0; 3189 return 0;
3182 3190
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3191 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3192 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3193 btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3200,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3196 btrfs_put_ordered_extent(ordered); 3200 btrfs_put_ordered_extent(ordered);
3197 } 3201 }
3198 3202
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3203 cur_offset = hole_start;
3203 while (1) { 3204 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3205 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3207,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3207 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3208 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3209 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3210 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3211 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3212 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3213
3220 err = btrfs_reserve_metadata_space(root, 1); 3214 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3215 if (err)
3222 break; 3216 break;
3223 3217
3218 trans = btrfs_start_transaction(root, 1);
3219 btrfs_set_trans_block_group(trans, inode);
3220
3221 err = btrfs_drop_extents(trans, inode, cur_offset,
3222 cur_offset + hole_size,
3223 &hint_byte, 1);
3224 BUG_ON(err);
3225
3224 err = btrfs_insert_file_extent(trans, root, 3226 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3227 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3228 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3229 0, 0, 0);
3230 BUG_ON(err);
3231
3228 btrfs_drop_extent_cache(inode, hole_start, 3232 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3233 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3234
3235 btrfs_end_transaction(trans, root);
3236 btrfs_unreserve_metadata_space(root, 2);
3231 } 3237 }
3232 free_extent_map(em); 3238 free_extent_map(em);
3233 cur_offset = last_byte; 3239 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3240 if (cur_offset >= block_end)
3235 break; 3241 break;
3236 } 3242 }
3237 3243
3238 btrfs_end_transaction(trans, root);
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3244 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3240 return err; 3245 return err;
3241} 3246}
3242 3247
3248static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3249{
3250 struct btrfs_root *root = BTRFS_I(inode)->root;
3251 struct btrfs_trans_handle *trans;
3252 unsigned long nr;
3253 int ret;
3254
3255 if (attr->ia_size == inode->i_size)
3256 return 0;
3257
3258 if (attr->ia_size > inode->i_size) {
3259 unsigned long limit;
3260 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3261 if (attr->ia_size > inode->i_sb->s_maxbytes)
3262 return -EFBIG;
3263 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3264 send_sig(SIGXFSZ, current, 0);
3265 return -EFBIG;
3266 }
3267 }
3268
3269 ret = btrfs_reserve_metadata_space(root, 1);
3270 if (ret)
3271 return ret;
3272
3273 trans = btrfs_start_transaction(root, 1);
3274 btrfs_set_trans_block_group(trans, inode);
3275
3276 ret = btrfs_orphan_add(trans, inode);
3277 BUG_ON(ret);
3278
3279 nr = trans->blocks_used;
3280 btrfs_end_transaction(trans, root);
3281 btrfs_unreserve_metadata_space(root, 1);
3282 btrfs_btree_balance_dirty(root, nr);
3283
3284 if (attr->ia_size > inode->i_size) {
3285 ret = btrfs_cont_expand(inode, attr->ia_size);
3286 if (ret) {
3287 btrfs_truncate(inode);
3288 return ret;
3289 }
3290
3291 i_size_write(inode, attr->ia_size);
3292 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3293
3294 trans = btrfs_start_transaction(root, 1);
3295 btrfs_set_trans_block_group(trans, inode);
3296
3297 ret = btrfs_update_inode(trans, root, inode);
3298 BUG_ON(ret);
3299 if (inode->i_nlink > 0) {
3300 ret = btrfs_orphan_del(trans, inode);
3301 BUG_ON(ret);
3302 }
3303 nr = trans->blocks_used;
3304 btrfs_end_transaction(trans, root);
3305 btrfs_btree_balance_dirty(root, nr);
3306 return 0;
3307 }
3308
3309 /*
3310 * We're truncating a file that used to have good data down to
3311 * zero. Make sure it gets into the ordered flush list so that
3312 * any new writes get down to disk quickly.
3313 */
3314 if (attr->ia_size == 0)
3315 BTRFS_I(inode)->ordered_data_close = 1;
3316
3317 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3318 ret = vmtruncate(inode, attr->ia_size);
3319 BUG_ON(ret);
3320
3321 return 0;
3322}
3323
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3324static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3325{
3245 struct inode *inode = dentry->d_inode; 3326 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3331,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3331 return err;
3251 3332
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3333 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3334 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3335 if (err)
3255 if (err) 3336 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3337 }
3338 attr->ia_valid &= ~ATTR_SIZE;
3268 3339
3269 err = inode_setattr(inode, attr); 3340 if (attr->ia_valid)
3341 err = inode_setattr(inode, attr);
3270 3342
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3343 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3344 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3359,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3359 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3360 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3361
3362 if (root->fs_info->log_root_recovering) {
3363 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3364 goto no_delete;
3365 }
3366
3290 if (inode->i_nlink > 0) { 3367 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3368 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3369 goto no_delete;
3293 } 3370 }
3294 3371
3295 btrfs_i_size_write(inode, 0); 3372 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3373
3298 btrfs_set_trans_block_group(trans, inode); 3374 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3375 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3376 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3377 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3378
3305 btrfs_orphan_del(trans, inode); 3379 if (ret != -EAGAIN)
3380 break;
3306 3381
3307 nr = trans->blocks_used; 3382 nr = trans->blocks_used;
3308 clear_inode(inode); 3383 btrfs_end_transaction(trans, root);
3384 trans = NULL;
3385 btrfs_btree_balance_dirty(root, nr);
3386 }
3309 3387
3310 btrfs_end_transaction(trans, root); 3388 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3389 ret = btrfs_orphan_del(trans, inode);
3312 return; 3390 BUG_ON(ret);
3391 }
3313 3392
3314no_delete_lock:
3315 nr = trans->blocks_used; 3393 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3394 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3395 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3396no_delete:
3319 clear_inode(inode); 3397 clear_inode(inode);
3398 return;
3320} 3399}
3321 3400
3322/* 3401/*
@@ -3569,7 +3648,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3648 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3649 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3650 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3651 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3652}
3575 3653
@@ -3695,6 +3773,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3695 } 3773 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3774 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3775
3776 if (root != sub_root) {
3777 down_read(&root->fs_info->cleanup_work_sem);
3778 if (!(inode->i_sb->s_flags & MS_RDONLY))
3779 btrfs_orphan_cleanup(sub_root);
3780 up_read(&root->fs_info->cleanup_work_sem);
3781 }
3782
3698 return inode; 3783 return inode;
3699} 3784}
3700 3785
@@ -3869,7 +3954,11 @@ skip:
3869 3954
3870 /* Reached end of directory/root. Bump pos past the last item. */ 3955 /* Reached end of directory/root. Bump pos past the last item. */
3871 if (key_type == BTRFS_DIR_INDEX_KEY) 3956 if (key_type == BTRFS_DIR_INDEX_KEY)
3872 filp->f_pos = INT_LIMIT(off_t); 3957 /*
3958 * 32-bit glibc will use getdents64, but then strtol -
3959 * so the last number we can serve is this.
3960 */
3961 filp->f_pos = 0x7fffffff;
3873 else 3962 else
3874 filp->f_pos++; 3963 filp->f_pos++;
3875nopos: 3964nopos:
@@ -4219,7 +4308,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4308 if (IS_ERR(inode))
4220 goto out_unlock; 4309 goto out_unlock;
4221 4310
4222 err = btrfs_init_inode_security(inode, dir); 4311 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4312 if (err) {
4224 drop_inode = 1; 4313 drop_inode = 1;
4225 goto out_unlock; 4314 goto out_unlock;
@@ -4290,7 +4379,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4379 if (IS_ERR(inode))
4291 goto out_unlock; 4380 goto out_unlock;
4292 4381
4293 err = btrfs_init_inode_security(inode, dir); 4382 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4383 if (err) {
4295 drop_inode = 1; 4384 drop_inode = 1;
4296 goto out_unlock; 4385 goto out_unlock;
@@ -4336,6 +4425,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4425 if (inode->i_nlink == 0)
4337 return -ENOENT; 4426 return -ENOENT;
4338 4427
4428 /* do not allow sys_link's with other subvols of the same device */
4429 if (root->objectid != BTRFS_I(inode)->root->objectid)
4430 return -EPERM;
4431
4339 /* 4432 /*
4340 * 1 item for inode ref 4433 * 1 item for inode ref
4341 * 2 items for dir items 4434 * 2 items for dir items
@@ -4423,7 +4516,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4516
4424 drop_on_err = 1; 4517 drop_on_err = 1;
4425 4518
4426 err = btrfs_init_inode_security(inode, dir); 4519 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4520 if (err)
4428 goto out_fail; 4521 goto out_fail;
4429 4522
@@ -5074,17 +5167,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5167 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5168 u64 mask = root->sectorsize - 1;
5076 5169
5077 if (!S_ISREG(inode->i_mode)) 5170 if (!S_ISREG(inode->i_mode)) {
5078 return; 5171 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5172 return;
5173 }
5081 5174
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5175 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5176 if (ret)
5084 return; 5177 return;
5178
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5179 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5180 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5181
5087 trans = btrfs_start_transaction(root, 1); 5182 trans = btrfs_start_transaction(root, 1);
5183 btrfs_set_trans_block_group(trans, inode);
5088 5184
5089 /* 5185 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5186 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5202,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5202 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5203 btrfs_add_ordered_operation(trans, root, inode);
5108 5204
5109 btrfs_set_trans_block_group(trans, inode); 5205 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5206 ret = btrfs_truncate_inode_items(trans, root, inode,
5207 inode->i_size,
5208 BTRFS_EXTENT_DATA_KEY);
5209 if (ret != -EAGAIN)
5210 break;
5111 5211
5112 ret = btrfs_orphan_add(trans, inode); 5212 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5213 BUG_ON(ret);
5114 goto out;
5115 /* FIXME, add redo link to tree so we don't leak on crash */
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
5117 BTRFS_EXTENT_DATA_KEY);
5118 btrfs_update_inode(trans, root, inode);
5119 5214
5120 ret = btrfs_orphan_del(trans, inode); 5215 nr = trans->blocks_used;
5216 btrfs_end_transaction(trans, root);
5217 btrfs_btree_balance_dirty(root, nr);
5218
5219 trans = btrfs_start_transaction(root, 1);
5220 btrfs_set_trans_block_group(trans, inode);
5221 }
5222
5223 if (ret == 0 && inode->i_nlink > 0) {
5224 ret = btrfs_orphan_del(trans, inode);
5225 BUG_ON(ret);
5226 }
5227
5228 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5229 BUG_ON(ret);
5122 5230
5123out:
5124 nr = trans->blocks_used; 5231 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5232 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5233 BUG_ON(ret);
@@ -5217,9 +5324,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5324
5218 spin_lock(&root->list_lock); 5325 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5326 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5327 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5328 inode->i_ino);
5222 dump_stack(); 5329 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5330 }
5224 spin_unlock(&root->list_lock); 5331 spin_unlock(&root->list_lock);
5225 5332
@@ -5476,7 +5583,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5583 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5584 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5585 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5586int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5587{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5588 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5589 struct btrfs_inode *binode;
@@ -5495,7 +5602,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5602 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5603 if (inode) {
5497 filemap_flush(inode->i_mapping); 5604 filemap_flush(inode->i_mapping);
5498 iput(inode); 5605 if (delay_iput)
5606 btrfs_add_delayed_iput(inode);
5607 else
5608 iput(inode);
5499 } 5609 }
5500 cond_resched(); 5610 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5611 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5679,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5679 if (IS_ERR(inode))
5570 goto out_unlock; 5680 goto out_unlock;
5571 5681
5572 err = btrfs_init_inode_security(inode, dir); 5682 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5683 if (err) {
5574 drop_inode = 1; 5684 drop_inode = 1;
5575 goto out_unlock; 5685 goto out_unlock;
@@ -5641,57 +5751,77 @@ out_fail:
5641 return err; 5751 return err;
5642} 5752}
5643 5753
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5754static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5755 u64 alloc_hint, int mode, loff_t actual_len)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5756{
5757 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5758 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5759 struct btrfs_key ins;
5650 u64 alloc_size; 5760 u64 alloc_size;
5651 u64 cur_offset = start; 5761 u64 cur_offset = start;
5652 u64 num_bytes = end - start; 5762 u64 num_bytes = end - start;
5653 int ret = 0; 5763 int ret = 0;
5764 u64 i_size;
5654 5765
5655 while (num_bytes > 0) { 5766 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5767 alloc_size = min(num_bytes, root->fs_info->max_extent);
5657 5768
5658 ret = btrfs_reserve_metadata_space(root, 1); 5769 trans = btrfs_start_transaction(root, 1);
5659 if (ret)
5660 goto out;
5661 5770
5662 ret = btrfs_reserve_extent(trans, root, alloc_size, 5771 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5772 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5773 (u64)-1, &ins, 1);
5665 if (ret) { 5774 if (ret) {
5666 WARN_ON(1); 5775 WARN_ON(1);
5667 goto out; 5776 goto stop_trans;
5668 } 5777 }
5778
5779 ret = btrfs_reserve_metadata_space(root, 3);
5780 if (ret) {
5781 btrfs_free_reserved_extent(root, ins.objectid,
5782 ins.offset);
5783 goto stop_trans;
5784 }
5785
5669 ret = insert_reserved_file_extent(trans, inode, 5786 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5787 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5788 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5789 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5790 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5791 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5792 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5793 cur_offset + ins.offset -1, 0);
5794
5678 num_bytes -= ins.offset; 5795 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5796 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5797 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5798
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5799 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5800 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5801 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5802 (actual_len > inode->i_size) &&
5689 btrfs_i_size_write(inode, cur_offset); 5803 (cur_offset > inode->i_size)) {
5804
5805 if (cur_offset > actual_len)
5806 i_size = actual_len;
5807 else
5808 i_size = cur_offset;
5809 i_size_write(inode, i_size);
5810 btrfs_ordered_update_i_size(inode, i_size, NULL);
5811 }
5812
5690 ret = btrfs_update_inode(trans, root, inode); 5813 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5814 BUG_ON(ret);
5815
5816 btrfs_end_transaction(trans, root);
5817 btrfs_unreserve_metadata_space(root, 3);
5692 } 5818 }
5819 return ret;
5693 5820
5821stop_trans:
5822 btrfs_end_transaction(trans, root);
5694 return ret; 5823 return ret;
5824
5695} 5825}
5696 5826
5697static long btrfs_fallocate(struct inode *inode, int mode, 5827static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5835,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5835 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5836 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5837 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5838 int ret;
5711 5839
5712 alloc_start = offset & ~mask; 5840 alloc_start = offset & ~mask;
@@ -5725,9 +5853,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5853 goto out;
5726 } 5854 }
5727 5855
5728 root = BTRFS_I(inode)->root; 5856 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5857 alloc_end - alloc_start);
5732 if (ret) 5858 if (ret)
5733 goto out; 5859 goto out;
@@ -5736,12 +5862,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5862 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5863 struct btrfs_ordered_extent *ordered;
5738 5864
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5865 /* the extent lock is ordered inside the running
5746 * transaction 5866 * transaction
5747 */ 5867 */
@@ -5755,8 +5875,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5755 btrfs_put_ordered_extent(ordered); 5875 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5876 unlock_extent(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5877 alloc_start, locked_end, GFP_NOFS);
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5759
5760 /* 5878 /*
5761 * we can't wait on the range with the transaction 5879 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5880 * running or with the extent lock held
@@ -5777,10 +5895,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5895 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5896 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5897 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5898 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5899 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5900 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5901 ret = prealloc_file_range(inode,
5902 cur_offset, last_byte,
5903 alloc_hint, mode, offset+len);
5784 if (ret < 0) { 5904 if (ret < 0) {
5785 free_extent_map(em); 5905 free_extent_map(em);
5786 break; 5906 break;
@@ -5799,9 +5919,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5919 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5920 GFP_NOFS);
5801 5921
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5922 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5923 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5924out:
5806 mutex_unlock(&inode->i_mutex); 5925 mutex_unlock(&inode->i_mutex);
5807 return ret; 5926 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..645a17927a8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 237 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1;
241 240
242 /* 241 /*
243 * 1 - inode item 242 * 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 289 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 290 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 291 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 292 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 293 btrfs_set_root_last_snapshot(&root_item, 0);
295 294
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 295 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 341
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 342 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 343fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 344 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 345 if (err && !ret)
348 ret = err; 346 ret = err;
349 347
350 btrfs_unreserve_metadata_space(root, 6); 348 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 349 return ret;
353} 350}
354 351
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 352static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 353 char *name, int namelen)
357{ 354{
355 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 356 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 357 struct btrfs_trans_handle *trans;
360 int ret = 0; 358 int ret;
361 int err;
362 unsigned long nr = 0;
363 359
364 if (!root->ref_cows) 360 if (!root->ref_cows)
365 return -EINVAL; 361 return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 368 */
373 ret = btrfs_reserve_metadata_space(root, 6); 369 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 370 if (ret)
375 goto fail_unlock; 371 goto fail;
376 372
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 373 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 374 if (!pending_snapshot) {
379 ret = -ENOMEM; 375 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 376 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 377 goto fail;
382 } 378 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 379 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 380 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 381 ret = -ENOMEM;
386 kfree(pending_snapshot); 382 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 383 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 384 goto fail;
389 } 385 }
390 memcpy(pending_snapshot->name, name, namelen); 386 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 387 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 391 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 392 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 393 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 394 ret = btrfs_commit_transaction(trans, root);
395 BUG_ON(ret);
396 btrfs_unreserve_metadata_space(root, 6);
399 397
400fail_unlock: 398 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode);
401 goto fail;
402 }
403 BUG_ON(!inode);
404 d_instantiate(dentry, inode);
405 ret = 0;
406fail:
402 return ret; 407 return ret;
403} 408}
404 409
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1032 BUG_ON(!trans);
1028 1033
1029 /* punch hole in destination first */ 1034 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1035 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1036
1033 /* clone data */ 1037 /* clone data */
1034 key.objectid = src->i_ino; 1038 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..5c2a9e78a949 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 291
292/* 292/*
293 * remove an ordered extent from the tree. No references are dropped 293 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 294 * and you must wake_up entry->wait. You must hold the tree mutex
295 * while you call this function.
295 */ 296 */
296int btrfs_remove_ordered_extent(struct inode *inode, 297static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 298 struct btrfs_ordered_extent *entry)
298{ 299{
299 struct btrfs_ordered_inode_tree *tree; 300 struct btrfs_ordered_inode_tree *tree;
300 struct rb_node *node; 301 struct rb_node *node;
301 302
302 tree = &BTRFS_I(inode)->ordered_tree; 303 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 304 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 305 rb_erase(node, &tree->tree);
306 tree->last = NULL; 306 tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
326 } 326 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
328 328
329 return 0;
330}
331
332/*
333 * remove an ordered extent from the tree. No references are dropped
334 * but any waiters are woken.
335 */
336int btrfs_remove_ordered_extent(struct inode *inode,
337 struct btrfs_ordered_extent *entry)
338{
339 struct btrfs_ordered_inode_tree *tree;
340 int ret;
341
342 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex);
344 ret = __btrfs_remove_ordered_extent(inode, entry);
329 mutex_unlock(&tree->mutex); 345 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait); 346 wake_up(&entry->wait);
331 return 0; 347
348 return ret;
332} 349}
333 350
334/* 351/*
335 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 353 * space between drives.
337 */ 354 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
339{ 357{
340 struct list_head splice; 358 struct list_head splice;
341 struct list_head *cur; 359 struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 390 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
375 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
376 } else { 397 } else {
377 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
378 } 399 }
@@ -430,7 +451,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 452 else
432 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
433 iput(inode); 454 btrfs_add_delayed_iput(inode);
434 } 455 }
435 456
436 cond_resched(); 457 cond_resched();
@@ -589,7 +610,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 610 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 611 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 612 */
592int btrfs_ordered_update_i_size(struct inode *inode, 613int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 614 struct btrfs_ordered_extent *ordered)
594{ 615{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 618 u64 disk_i_size;
598 u64 new_i_size; 619 u64 new_i_size;
599 u64 i_size_test; 620 u64 i_size_test;
621 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 622 struct rb_node *node;
623 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 624 struct btrfs_ordered_extent *test;
625 int ret = 1;
626
627 if (ordered)
628 offset = entry_end(ordered);
629 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
602 631
603 mutex_lock(&tree->mutex); 632 mutex_lock(&tree->mutex);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 633 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 634
635 /* truncate file */
636 if (disk_i_size > i_size) {
637 BTRFS_I(inode)->disk_i_size = i_size;
638 ret = 0;
639 goto out;
640 }
641
606 /* 642 /*
607 * if the disk i_size is already at the inode->i_size, or 643 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 644 * this ordered extent is inside the disk i_size, we're done
609 */ 645 */
610 if (disk_i_size >= inode->i_size || 646 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 647 goto out;
613 } 648 }
614 649
@@ -616,8 +651,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 651 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 652 * between disk_i_size and this ordered extent
618 */ 653 */
619 if (test_range_bit(io_tree, disk_i_size, 654 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 655 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 656 goto out;
623 } 657 }
@@ -626,20 +660,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 660 * if we find an ordered extent then we can't update disk i_size
627 * yet 661 * yet
628 */ 662 */
629 node = &ordered->rb_node; 663 if (ordered) {
630 while (1) { 664 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 665 } else {
632 if (!node) 666 prev = tree_search(tree, offset);
633 break; 667 /*
668 * we insert file extents without involving ordered struct,
669 * so there should be no ordered struct cover this offset
670 */
671 if (prev) {
672 test = rb_entry(prev, struct btrfs_ordered_extent,
673 rb_node);
674 BUG_ON(offset_in_entry(test, offset));
675 }
676 node = prev;
677 }
678 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 679 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 680 if (test->file_offset + test->len <= disk_i_size)
636 break; 681 break;
637 if (test->file_offset >= inode->i_size) 682 if (test->file_offset >= i_size)
638 break; 683 break;
639 if (test->file_offset >= disk_i_size) 684 if (test->file_offset >= disk_i_size)
640 goto out; 685 goto out;
686 node = rb_prev(node);
641 } 687 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 688 new_i_size = min_t(u64, offset, i_size);
643 689
644 /* 690 /*
645 * at this point, we know we can safely update i_size to at least 691 * at this point, we know we can safely update i_size to at least
@@ -647,7 +693,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 693 * walk forward and see if ios from higher up in the file have
648 * finished. 694 * finished.
649 */ 695 */
650 node = rb_next(&ordered->rb_node); 696 if (ordered) {
697 node = rb_next(&ordered->rb_node);
698 } else {
699 if (prev)
700 node = rb_next(prev);
701 else
702 node = rb_first(&tree->tree);
703 }
651 i_size_test = 0; 704 i_size_test = 0;
652 if (node) { 705 if (node) {
653 /* 706 /*
@@ -655,10 +708,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 708 * between our ordered extent and the next one.
656 */ 709 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 710 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 711 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 712 i_size_test = test->file_offset;
660 } else { 713 } else {
661 i_size_test = i_size_read(inode); 714 i_size_test = i_size;
662 } 715 }
663 716
664 /* 717 /*
@@ -667,15 +720,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 720 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 721 * disk_i_size to the end of the region.
669 */ 722 */
670 if (i_size_test > entry_end(ordered) && 723 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 724 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 725 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 726 new_i_size = min_t(u64, i_size_test, i_size);
674 } 727 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 728 BTRFS_I(inode)->disk_i_size = new_i_size;
729 ret = 0;
676out: 730out:
731 /*
732 * we need to remove the ordered extent with the tree lock held
733 * so that other people calling this function don't find our fully
734 * processed ordered entry and skip updating the i_size
735 */
736 if (ordered)
737 __btrfs_remove_ordered_extent(inode, ordered);
677 mutex_unlock(&tree->mutex); 738 mutex_unlock(&tree->mutex);
678 return 0; 739 if (ordered)
740 wake_up(&ordered->wait);
741 return ret;
679} 742}
680 743
681/* 744/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 151struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..ab7ab5318745 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1561 return 0;
1562} 1562}
1563 1563
1564static void put_inodes(struct list_head *list)
1565{
1566 struct inodevec *ivec;
1567 while (!list_empty(list)) {
1568 ivec = list_entry(list->next, struct inodevec, list);
1569 list_del(&ivec->list);
1570 while (ivec->nr > 0) {
1571 ivec->nr--;
1572 iput(ivec->inode[ivec->nr]);
1573 }
1574 kfree(ivec);
1575 }
1576}
1577
1564static int find_next_key(struct btrfs_path *path, int level, 1578static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1579 struct btrfs_key *key)
1566 1580
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1737
1724 btrfs_btree_balance_dirty(root, nr); 1738 btrfs_btree_balance_dirty(root, nr);
1725 1739
1740 /*
1741 * put inodes outside transaction, otherwise we may deadlock.
1742 */
1743 put_inodes(&inode_list);
1744
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1746 invalidate_extent_cache(root, &key, &next_key);
1728 } 1747 }
@@ -1752,19 +1771,7 @@ out:
1752 1771
1753 btrfs_btree_balance_dirty(root, nr); 1772 btrfs_btree_balance_dirty(root, nr);
1754 1773
1755 /* 1774 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1775
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1777 invalidate_extent_cache(root, &key, &next_key);
@@ -3274,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3274 return -ENOMEM; 3281 return -ENOMEM;
3275 3282
3276 path = btrfs_alloc_path(); 3283 path = btrfs_alloc_path();
3277 if (!path) 3284 if (!path) {
3285 kfree(cluster);
3278 return -ENOMEM; 3286 return -ENOMEM;
3287 }
3279 3288
3280 rc->extents_found = 0; 3289 rc->extents_found = 0;
3281 rc->extents_skipped = 0; 3290 rc->extents_skipped = 0;
@@ -3534,8 +3543,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3543 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3544 (unsigned long long)rc->block_group->flags);
3536 3545
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3546 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3547 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3548
3540 while (1) { 3549 while (1) {
3541 rc->extents_found = 0; 3550 rc->extents_found = 0;
@@ -3755,6 +3764,8 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3764 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3765 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3766 err = PTR_ERR(fs_root);
3767 else
3768 btrfs_orphan_cleanup(fs_root);
3758 } 3769 }
3759 return err; 3770 return err;
3760} 3771}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..8a1ea6e64575 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,8 @@ enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
69 Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 69 Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
70 Opt_flushoncommit,
70 Opt_discard, Opt_err, 71 Opt_discard, Opt_err,
71}; 72};
72 73
@@ -82,6 +83,7 @@ static match_table_t tokens = {
82 {Opt_alloc_start, "alloc_start=%s"}, 83 {Opt_alloc_start, "alloc_start=%s"},
83 {Opt_thread_pool, "thread_pool=%d"}, 84 {Opt_thread_pool, "thread_pool=%d"},
84 {Opt_compress, "compress"}, 85 {Opt_compress, "compress"},
86 {Opt_compress_force, "compress-force"},
85 {Opt_ssd, "ssd"}, 87 {Opt_ssd, "ssd"},
86 {Opt_ssd_spread, "ssd_spread"}, 88 {Opt_ssd_spread, "ssd_spread"},
87 {Opt_nossd, "nossd"}, 89 {Opt_nossd, "nossd"},
@@ -128,6 +130,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
128 substring_t args[MAX_OPT_ARGS]; 130 substring_t args[MAX_OPT_ARGS];
129 char *p, *num; 131 char *p, *num;
130 int intarg; 132 int intarg;
133 int ret = 0;
131 134
132 if (!options) 135 if (!options)
133 return 0; 136 return 0;
@@ -172,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
172 printk(KERN_INFO "btrfs: use compression\n"); 175 printk(KERN_INFO "btrfs: use compression\n");
173 btrfs_set_opt(info->mount_opt, COMPRESS); 176 btrfs_set_opt(info->mount_opt, COMPRESS);
174 break; 177 break;
178 case Opt_compress_force:
179 printk(KERN_INFO "btrfs: forcing compression\n");
180 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
181 btrfs_set_opt(info->mount_opt, COMPRESS);
182 break;
175 case Opt_ssd: 183 case Opt_ssd:
176 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 184 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
177 btrfs_set_opt(info->mount_opt, SSD); 185 btrfs_set_opt(info->mount_opt, SSD);
@@ -262,12 +270,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
262 case Opt_discard: 270 case Opt_discard:
263 btrfs_set_opt(info->mount_opt, DISCARD); 271 btrfs_set_opt(info->mount_opt, DISCARD);
264 break; 272 break;
273 case Opt_err:
274 printk(KERN_INFO "btrfs: unrecognized mount option "
275 "'%s'\n", p);
276 ret = -EINVAL;
277 goto out;
265 default: 278 default:
266 break; 279 break;
267 } 280 }
268 } 281 }
282out:
269 kfree(options); 283 kfree(options);
270 return 0; 284 return ret;
271} 285}
272 286
273/* 287/*
@@ -405,8 +419,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 419 return 0;
406 } 420 }
407 421
408 btrfs_start_delalloc_inodes(root); 422 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 423 btrfs_wait_ordered_extents(root, 0, 0);
410 424
411 trans = btrfs_start_transaction(root, 1); 425 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 426 ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +464,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
450 seq_puts(seq, ",notreelog"); 464 seq_puts(seq, ",notreelog");
451 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 465 if (btrfs_test_opt(root, FLUSHONCOMMIT))
452 seq_puts(seq, ",flushoncommit"); 466 seq_puts(seq, ",flushoncommit");
467 if (btrfs_test_opt(root, DISCARD))
468 seq_puts(seq, ",discard");
453 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 469 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
454 seq_puts(seq, ",noacl"); 470 seq_puts(seq, ",noacl");
455 return 0; 471 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9b..b2acc79f1b34 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
333 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
334 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
335 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
336 return 0; 339 return 0;
337} 340}
338 341
@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
354 * those extents are sent to disk but does not wait on them 357 * those extents are sent to disk but does not wait on them
355 */ 358 */
356int btrfs_write_marked_extents(struct btrfs_root *root, 359int btrfs_write_marked_extents(struct btrfs_root *root,
357 struct extent_io_tree *dirty_pages) 360 struct extent_io_tree *dirty_pages, int mark)
358{ 361{
359 int ret; 362 int ret;
360 int err = 0; 363 int err = 0;
@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
367 370
368 while (1) { 371 while (1) {
369 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 372 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
370 EXTENT_DIRTY); 373 mark);
371 if (ret) 374 if (ret)
372 break; 375 break;
373 while (start <= end) { 376 while (start <= end) {
@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
413 * on all the pages and clear them from the dirty pages state tree 416 * on all the pages and clear them from the dirty pages state tree
414 */ 417 */
415int btrfs_wait_marked_extents(struct btrfs_root *root, 418int btrfs_wait_marked_extents(struct btrfs_root *root,
416 struct extent_io_tree *dirty_pages) 419 struct extent_io_tree *dirty_pages, int mark)
417{ 420{
418 int ret; 421 int ret;
419 int err = 0; 422 int err = 0;
@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
425 unsigned long index; 428 unsigned long index;
426 429
427 while (1) { 430 while (1) {
428 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 431 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
429 EXTENT_DIRTY); 432 mark);
430 if (ret) 433 if (ret)
431 break; 434 break;
432 435
433 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 436 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
434 while (start <= end) { 437 while (start <= end) {
435 index = start >> PAGE_CACHE_SHIFT; 438 index = start >> PAGE_CACHE_SHIFT;
436 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 439 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
460 * those extents are on disk for transaction or log commit 463 * those extents are on disk for transaction or log commit
461 */ 464 */
462int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 465int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
463 struct extent_io_tree *dirty_pages) 466 struct extent_io_tree *dirty_pages, int mark)
464{ 467{
465 int ret; 468 int ret;
466 int ret2; 469 int ret2;
467 470
468 ret = btrfs_write_marked_extents(root, dirty_pages); 471 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
469 ret2 = btrfs_wait_marked_extents(root, dirty_pages); 472 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
470 return ret || ret2; 473 return ret || ret2;
471} 474}
472 475
@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
479 return filemap_write_and_wait(btree_inode->i_mapping); 482 return filemap_write_and_wait(btree_inode->i_mapping);
480 } 483 }
481 return btrfs_write_and_wait_marked_extents(root, 484 return btrfs_write_and_wait_marked_extents(root,
482 &trans->transaction->dirty_pages); 485 &trans->transaction->dirty_pages,
486 EXTENT_DIRTY);
483} 487}
484 488
485/* 489/*
@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
497{ 501{
498 int ret; 502 int ret;
499 u64 old_root_bytenr; 503 u64 old_root_bytenr;
504 u64 old_root_used;
500 struct btrfs_root *tree_root = root->fs_info->tree_root; 505 struct btrfs_root *tree_root = root->fs_info->tree_root;
501 506
507 old_root_used = btrfs_root_used(&root->root_item);
502 btrfs_write_dirty_block_groups(trans, root); 508 btrfs_write_dirty_block_groups(trans, root);
503 509
504 while (1) { 510 while (1) {
505 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 511 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
506 if (old_root_bytenr == root->node->start) 512 if (old_root_bytenr == root->node->start &&
513 old_root_used == btrfs_root_used(&root->root_item))
507 break; 514 break;
508 515
509 btrfs_set_root_node(&root->root_item, root->node); 516 btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
512 &root->root_item); 519 &root->root_item);
513 BUG_ON(ret); 520 BUG_ON(ret);
514 521
522 old_root_used = btrfs_root_used(&root->root_item);
515 ret = btrfs_write_dirty_block_groups(trans, root); 523 ret = btrfs_write_dirty_block_groups(trans, root);
516 BUG_ON(ret); 524 BUG_ON(ret);
517 } 525 }
@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
795 memcpy(&pending->root_key, &key, sizeof(key)); 803 memcpy(&pending->root_key, &key, sizeof(key));
796fail: 804fail:
797 kfree(new_root_item); 805 kfree(new_root_item);
798 btrfs_unreserve_metadata_space(root, 6);
799 return ret; 806 return ret;
800} 807}
801 808
@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
807 u64 index = 0; 814 u64 index = 0;
808 struct btrfs_trans_handle *trans; 815 struct btrfs_trans_handle *trans;
809 struct inode *parent_inode; 816 struct inode *parent_inode;
810 struct inode *inode;
811 struct btrfs_root *parent_root; 817 struct btrfs_root *parent_root;
812 818
813 parent_inode = pending->dentry->d_parent->d_inode; 819 parent_inode = pending->dentry->d_parent->d_inode;
@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
839 845
840 BUG_ON(ret); 846 BUG_ON(ret);
841 847
842 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
843 d_instantiate(pending->dentry, inode);
844fail: 848fail:
845 btrfs_end_transaction(trans, fs_info->fs_root); 849 btrfs_end_transaction(trans, fs_info->fs_root);
846 return ret; 850 return ret;
@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
994 mutex_unlock(&root->fs_info->trans_mutex); 998 mutex_unlock(&root->fs_info->trans_mutex);
995 999
996 if (flush_on_commit) { 1000 if (flush_on_commit) {
997 btrfs_start_delalloc_inodes(root); 1001 btrfs_start_delalloc_inodes(root, 1);
998 ret = btrfs_wait_ordered_extents(root, 0); 1002 ret = btrfs_wait_ordered_extents(root, 0, 1);
999 BUG_ON(ret); 1003 BUG_ON(ret);
1000 } else if (snap_pending) { 1004 } else if (snap_pending) {
1001 ret = btrfs_wait_ordered_extents(root, 1); 1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
1002 BUG_ON(ret); 1006 BUG_ON(ret);
1003 } 1007 }
1004 1008
@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1116 current->journal_info = NULL; 1120 current->journal_info = NULL;
1117 1121
1118 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1122 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1123
1124 if (current != root->fs_info->transaction_kthread)
1125 btrfs_run_delayed_iputs(root);
1126
1119 return ret; 1127 return ret;
1120} 1128}
1121 1129
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938c..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 108 struct btrfs_root *root);
109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root, 111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages); 112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root, 113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages); 114 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676a..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 542
543 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
547 BUG_ON(ret); 547 BUG_ON(ret);
548 548
549 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1976{
1978 int index1; 1977 int index1;
1979 int index2; 1978 int index2;
1979 int mark;
1980 int ret; 1980 int ret;
1981 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 u64 log_transid = 0; 1983 unsigned long log_transid = 0;
1984 1984
1985 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1986 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2014 goto out; 2014 goto out;
2015 } 2015 }
2016 2016
2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2017 /* we start IO on all the marked extents here, but we don't actually 2023 /* we start IO on all the marked extents here, but we don't actually
2018 * wait for them until later. 2024 * wait for them until later.
2019 */ 2025 */
2020 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); 2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 BUG_ON(ret); 2027 BUG_ON(ret);
2022 2028
2023 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
2024 2030
2025 root->log_batch = 0; 2031 root->log_batch = 0;
2026 log_transid = root->log_transid;
2027 root->log_transid++; 2032 root->log_transid++;
2028 log->log_transid = root->log_transid; 2033 log->log_transid = root->log_transid;
2029 root->log_start_pid = 0; 2034 root->log_start_pid = 0;
2030 smp_mb(); 2035 smp_mb();
2031 /* 2036 /*
2032 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2033 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2034 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2035 */ 2040 */
2036 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2037 2042
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2052 2057
2053 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2054 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2057 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2058 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2072 * check the full commit flag again 2077 * check the full commit flag again
2073 */ 2078 */
2074 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2077 ret = -EAGAIN; 2082 ret = -EAGAIN;
2078 goto out_wake_log_root; 2083 goto out_wake_log_root;
2079 } 2084 }
2080 2085
2081 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2083 BUG_ON(ret); 2089 BUG_ON(ret);
2084 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2091
2086 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2147 2153
2148 while (1) { 2154 while (1) {
2149 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2150 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 if (ret) 2157 if (ret)
2152 break; 2158 break;
2153 2159
2154 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2155 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 } 2162 }
2157 2163
2158 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..41ecbb2347f2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1135 root->fs_info->avail_metadata_alloc_bits; 1135 root->fs_info->avail_metadata_alloc_bits;
1136 1136
1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1138 root->fs_info->fs_devices->rw_devices <= 4) { 1138 root->fs_info->fs_devices->num_devices <= 4) {
1139 printk(KERN_ERR "btrfs: unable to go below four devices " 1139 printk(KERN_ERR "btrfs: unable to go below four devices "
1140 "on raid10\n"); 1140 "on raid10\n");
1141 ret = -EINVAL; 1141 ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1143 } 1143 }
1144 1144
1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1146 root->fs_info->fs_devices->rw_devices <= 2) { 1146 root->fs_info->fs_devices->num_devices <= 2) {
1147 printk(KERN_ERR "btrfs: unable to go below two " 1147 printk(KERN_ERR "btrfs: unable to go below two "
1148 "devices on raid1\n"); 1148 "devices on raid1\n");
1149 ret = -EINVAL; 1149 ret = -EINVAL;
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1434 return -EINVAL; 1434 return -EINVAL;
1435 1435
1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
1437 if (!bdev) 1437 if (IS_ERR(bdev))
1438 return -EIO; 1438 return PTR_ERR(bdev);
1439 1439
1440 if (root->fs_info->fs_devices->seeding) { 1440 if (root->fs_info->fs_devices->seeding) {
1441 seeding_dev = 1; 1441 seeding_dev = 1;
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2209 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2210 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2212 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2213 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2215 calc_size = 8 * 1024 * 1024;
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2538 if (!em) 2538 if (!em)
2539 return 1; 2539 return 1;
2540 2540
2541 if (btrfs_test_opt(root, DEGRADED)) {
2542 free_extent_map(em);
2543 return 0;
2544 }
2545
2541 map = (struct map_lookup *)em->bdev; 2546 map = (struct map_lookup *)em->bdev;
2542 for (i = 0; i < map->num_stripes; i++) { 2547 for (i = 0; i < map->num_stripes; i++) {
2543 if (!map->stripes[i].dev->writeable) { 2548 if (!map->stripes[i].dev->writeable) {
@@ -2649,8 +2654,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2654 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2655 read_unlock(&em_tree->lock);
2651 2656
2652 if (!em && unplug_page) 2657 if (!em && unplug_page) {
2658 kfree(multi);
2653 return 0; 2659 return 0;
2660 }
2654 2661
2655 if (!em) { 2662 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2663 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48a..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 3797e0077b35..2906077ac798 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -84,7 +84,7 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) 84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
85{ 85{
86 struct cachefiles_object *fsdef; 86 struct cachefiles_object *fsdef;
87 struct nameidata nd; 87 struct path path;
88 struct kstatfs stats; 88 struct kstatfs stats;
89 struct dentry *graveyard, *cachedir, *root; 89 struct dentry *graveyard, *cachedir, *root;
90 const struct cred *saved_cred; 90 const struct cred *saved_cred;
@@ -114,15 +114,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
114 _debug("- fsdef %p", fsdef); 114 _debug("- fsdef %p", fsdef);
115 115
116 /* look up the directory at the root of the cache */ 116 /* look up the directory at the root of the cache */
117 memset(&nd, 0, sizeof(nd)); 117 ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
118
119 ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
120 if (ret < 0) 118 if (ret < 0)
121 goto error_open_root; 119 goto error_open_root;
122 120
123 cache->mnt = mntget(nd.path.mnt); 121 cache->mnt = path.mnt;
124 root = dget(nd.path.dentry); 122 root = path.dentry;
125 path_put(&nd.path);
126 123
127 /* check parameters */ 124 /* check parameters */
128 ret = -EOPNOTSUPP; 125 ret = -EOPNOTSUPP;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 14ac4806e291..eeb4986ea7db 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -348,7 +348,17 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
348 dir = dget_parent(object->dentry); 348 dir = dget_parent(object->dentry);
349 349
350 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 350 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
351 ret = cachefiles_bury_object(cache, dir, object->dentry); 351
352 /* we need to check that our parent is _still_ our parent - it may have
353 * been renamed */
354 if (dir == object->dentry->d_parent) {
355 ret = cachefiles_bury_object(cache, dir, object->dentry);
356 } else {
357 /* it got moved, presumably by cachefilesd culling it, so it's
358 * no longer in the key path and we can ignore it */
359 mutex_unlock(&dir->d_inode->i_mutex);
360 ret = 0;
361 }
352 362
353 dput(dir); 363 dput(dir);
354 _leave(" = %d", ret); 364 _leave(" = %d", ret);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a6c8c6fe8df9..1d8332563863 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/mount.h> 12#include <linux/mount.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/ima.h>
15#include "internal.h" 14#include "internal.h"
16 15
17/* 16/*
@@ -923,7 +922,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
923 if (IS_ERR(file)) { 922 if (IS_ERR(file)) {
924 ret = PTR_ERR(file); 923 ret = PTR_ERR(file);
925 } else { 924 } else {
926 ima_counts_get(file);
927 ret = -EIO; 925 ret = -EIO;
928 if (file->f_op->write) { 926 if (file->f_op->write) {
929 pos = (loff_t) page->index << PAGE_SHIFT; 927 pos = (loff_t) page->index << PAGE_SHIFT;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc85..49503d2edc7e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
1Version 1.62
2------------
3Add sockopt=TCP_NODELAY mount option.
4
1Version 1.61 5Version 1.61
2------------ 6------------
3Fix append problem to Samba servers (files opened with O_APPEND could 7Fix append problem to Samba servers (files opened with O_APPEND could
@@ -5,7 +9,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. 9mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
6Disable use of server inode numbers when server only 10Disable use of server inode numbers when server only
7partially supports them (e.g. for one server querying inode numbers on 11partially supports them (e.g. for one server querying inode numbers on
8FindFirst fails but QPathInfo queries works). 12FindFirst fails but QPathInfo queries works). Fix oops with dfs in
13cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
14for OpenOffice when on forcedirectio mount e.g.)
9 15
10Version 1.60 16Version 1.60
11------------- 17-------------
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4ba..b44ce0a0711c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bbf..8c6a03627176 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
758}; 758};
759 759
760const struct file_operations cifs_file_direct_ops = { 760const struct file_operations cifs_file_direct_ops = {
761 /* no mmap, no aio, no readv - 761 /* no aio, no readv -
762 BB reevaluate whether they can be done with directio, no cache */ 762 BB reevaluate whether they can be done with directio, no cache */
763 .read = cifs_user_read, 763 .read = cifs_user_read,
764 .write = cifs_user_write, 764 .write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
767 .lock = cifs_lock, 767 .lock = cifs_lock,
768 .fsync = cifs_fsync, 768 .fsync = cifs_fsync,
769 .flush = cifs_flush, 769 .flush = cifs_flush,
770 .mmap = cifs_file_mmap,
770 .splice_read = generic_file_splice_read, 771 .splice_read = generic_file_splice_read,
771#ifdef CONFIG_CIFS_POSIX 772#ifdef CONFIG_CIFS_POSIX
772 .unlocked_ioctl = cifs_ioctl, 773 .unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ac2b24c192f8..78c1b86d55f6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
113extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
114#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
115 115
116#define CIFS_VERSION "1.61" 116#define CIFS_VERSION "1.62"
117#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4b35f7ec0583..ed751bb657db 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -149,6 +149,7 @@ struct TCP_Server_Info {
149 bool svlocal:1; /* local server or remote */ 149 bool svlocal:1; /* local server or remote */
150 bool noblocksnd; /* use blocking sendmsg */ 150 bool noblocksnd; /* use blocking sendmsg */
151 bool noautotune; /* do not autotune send buf sizes */ 151 bool noautotune; /* do not autotune send buf sizes */
152 bool tcp_nodelay;
152 atomic_t inFlight; /* number of requests on the wire to server */ 153 atomic_t inFlight; /* number of requests on the wire to server */
153#ifdef CONFIG_CIFS_STATS2 154#ifdef CONFIG_CIFS_STATS2
154 atomic_t inSend; /* requests trying to send */ 155 atomic_t inSend; /* requests trying to send */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687f..2e9e09ca0e30 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -98,7 +98,7 @@ struct smb_vol {
98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ 98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
99 unsigned int rsize; 99 unsigned int rsize;
100 unsigned int wsize; 100 unsigned int wsize;
101 unsigned int sockopt; 101 bool sockopt_tcp_nodelay:1;
102 unsigned short int port; 102 unsigned short int port;
103 char *prepath; 103 char *prepath;
104}; 104};
@@ -1142,9 +1142,11 @@ cifs_parse_mount_options(char *options, const char *devname,
1142 simple_strtoul(value, &value, 0); 1142 simple_strtoul(value, &value, 0);
1143 } 1143 }
1144 } else if (strnicmp(data, "sockopt", 5) == 0) { 1144 } else if (strnicmp(data, "sockopt", 5) == 0) {
1145 if (value && *value) { 1145 if (!value || !*value) {
1146 vol->sockopt = 1146 cERROR(1, ("no socket option specified"));
1147 simple_strtoul(value, &value, 0); 1147 continue;
1148 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
1149 vol->sockopt_tcp_nodelay = 1;
1148 } 1150 }
1149 } else if (strnicmp(data, "netbiosname", 4) == 0) { 1151 } else if (strnicmp(data, "netbiosname", 4) == 0) {
1150 if (!value || !*value || (*value == ' ')) { 1152 if (!value || !*value || (*value == ' ')) {
@@ -1514,6 +1516,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1514 1516
1515 tcp_ses->noblocksnd = volume_info->noblocksnd; 1517 tcp_ses->noblocksnd = volume_info->noblocksnd;
1516 tcp_ses->noautotune = volume_info->noautotune; 1518 tcp_ses->noautotune = volume_info->noautotune;
1519 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
1517 atomic_set(&tcp_ses->inFlight, 0); 1520 atomic_set(&tcp_ses->inFlight, 0);
1518 init_waitqueue_head(&tcp_ses->response_q); 1521 init_waitqueue_head(&tcp_ses->response_q);
1519 init_waitqueue_head(&tcp_ses->request_q); 1522 init_waitqueue_head(&tcp_ses->request_q);
@@ -1764,6 +1767,7 @@ static int
1764ipv4_connect(struct TCP_Server_Info *server) 1767ipv4_connect(struct TCP_Server_Info *server)
1765{ 1768{
1766 int rc = 0; 1769 int rc = 0;
1770 int val;
1767 bool connected = false; 1771 bool connected = false;
1768 __be16 orig_port = 0; 1772 __be16 orig_port = 0;
1769 struct socket *socket = server->ssocket; 1773 struct socket *socket = server->ssocket;
@@ -1845,6 +1849,14 @@ ipv4_connect(struct TCP_Server_Info *server)
1845 socket->sk->sk_rcvbuf = 140 * 1024; 1849 socket->sk->sk_rcvbuf = 140 * 1024;
1846 } 1850 }
1847 1851
1852 if (server->tcp_nodelay) {
1853 val = 1;
1854 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
1855 (char *)&val, sizeof(val));
1856 if (rc)
1857 cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
1858 }
1859
1848 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", 1860 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
1849 socket->sk->sk_sndbuf, 1861 socket->sk->sk_sndbuf,
1850 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo)); 1862 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo));
@@ -1916,6 +1928,7 @@ static int
1916ipv6_connect(struct TCP_Server_Info *server) 1928ipv6_connect(struct TCP_Server_Info *server)
1917{ 1929{
1918 int rc = 0; 1930 int rc = 0;
1931 int val;
1919 bool connected = false; 1932 bool connected = false;
1920 __be16 orig_port = 0; 1933 __be16 orig_port = 0;
1921 struct socket *socket = server->ssocket; 1934 struct socket *socket = server->ssocket;
@@ -1987,6 +2000,15 @@ ipv6_connect(struct TCP_Server_Info *server)
1987 */ 2000 */
1988 socket->sk->sk_rcvtimeo = 7 * HZ; 2001 socket->sk->sk_rcvtimeo = 7 * HZ;
1989 socket->sk->sk_sndtimeo = 5 * HZ; 2002 socket->sk->sk_sndtimeo = 5 * HZ;
2003
2004 if (server->tcp_nodelay) {
2005 val = 1;
2006 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2007 (char *)&val, sizeof(val));
2008 if (rc)
2009 cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
2010 }
2011
1990 server->ssocket = socket; 2012 server->ssocket = socket;
1991 2013
1992 return rc; 2014 return rc;
@@ -2287,12 +2309,12 @@ int
2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2309cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2288 char *mount_data_global, const char *devname) 2310 char *mount_data_global, const char *devname)
2289{ 2311{
2290 int rc = 0; 2312 int rc;
2291 int xid; 2313 int xid;
2292 struct smb_vol *volume_info; 2314 struct smb_vol *volume_info;
2293 struct cifsSesInfo *pSesInfo = NULL; 2315 struct cifsSesInfo *pSesInfo;
2294 struct cifsTconInfo *tcon = NULL; 2316 struct cifsTconInfo *tcon;
2295 struct TCP_Server_Info *srvTcp = NULL; 2317 struct TCP_Server_Info *srvTcp;
2296 char *full_path; 2318 char *full_path;
2297 char *mount_data = mount_data_global; 2319 char *mount_data = mount_data_global;
2298#ifdef CONFIG_CIFS_DFS_UPCALL 2320#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2323,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2301 int referral_walks_count = 0; 2323 int referral_walks_count = 0;
2302try_mount_again: 2324try_mount_again:
2303#endif 2325#endif
2326 rc = 0;
2327 tcon = NULL;
2328 pSesInfo = NULL;
2329 srvTcp = NULL;
2304 full_path = NULL; 2330 full_path = NULL;
2305 2331
2306 xid = GetXid(); 2332 xid = GetXid();
@@ -2597,6 +2623,7 @@ remote_path_check:
2597 2623
2598 cleanup_volume_info(&volume_info); 2624 cleanup_volume_info(&volume_info);
2599 referral_walks_count++; 2625 referral_walks_count++;
2626 FreeXid(xid);
2600 goto try_mount_again; 2627 goto try_mount_again;
2601 } 2628 }
2602#else /* No DFS support, return error on mount */ 2629#else /* No DFS support, return error on mount */
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1b..6177f7cca16a 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26 /* 26 /*
27 * See Documentation/filesystems/Exporting 27 * See Documentation/filesystems/nfs/Exporting
28 * and examples in fs/exportfs 28 * and examples in fs/exportfs
29 * 29 *
30 * Since cifs is a network file system, an "fsid" must be included for 30 * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cf18ee765590..e3fda978f481 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1762,8 +1762,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1762 CIFS_MOUNT_MAP_SPECIAL_CHR); 1762 CIFS_MOUNT_MAP_SPECIAL_CHR);
1763 } 1763 }
1764 1764
1765 if (!rc) 1765 if (!rc) {
1766 rc = inode_setattr(inode, attrs); 1766 rc = inode_setattr(inode, attrs);
1767
1768 /* force revalidate when any of these times are set since some
1769 of the fs types (eg ext3, fat) do not have fine enough
1770 time granularity to match protocol, and we do not have a
1771 a way (yet) to query the server fs's time granularity (and
1772 whether it rounds times down).
1773 */
1774 if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
1775 cifsInode->time = 0;
1776 }
1767out: 1777out:
1768 kfree(args); 1778 kfree(args);
1769 kfree(full_path); 1779 kfree(full_path);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f84062f9a985..c343b14ba2d3 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -77,6 +77,11 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
77 77
78 cFYI(1, ("For %s", name->name)); 78 cFYI(1, ("For %s", name->name));
79 79
80 if (parent->d_op && parent->d_op->d_hash)
81 parent->d_op->d_hash(parent, name);
82 else
83 name->hash = full_name_hash(name->name, name->len);
84
80 dentry = d_lookup(parent, name); 85 dentry = d_lookup(parent, name);
81 if (dentry) { 86 if (dentry) {
82 /* FIXME: check for inode number changes? */ 87 /* FIXME: check for inode number changes? */
@@ -666,12 +671,11 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
666 min(len, max_len), nlt, 671 min(len, max_len), nlt,
667 cifs_sb->mnt_cifs_flags & 672 cifs_sb->mnt_cifs_flags &
668 CIFS_MOUNT_MAP_SPECIAL_CHR); 673 CIFS_MOUNT_MAP_SPECIAL_CHR);
674 pqst->len -= nls_nullsize(nlt);
669 } else { 675 } else {
670 pqst->name = filename; 676 pqst->name = filename;
671 pqst->len = len; 677 pqst->len = len;
672 } 678 }
673 pqst->hash = full_name_hash(pqst->name, pqst->len);
674/* cFYI(1, ("filldir on %s",pqst->name)); */
675 return rc; 679 return rc;
676} 680}
677 681
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7085a6275c4c..aaa9c1c5a5bd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -223,9 +223,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { /* 300 should be long enough for any conceivable user name */ 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
228 300, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
231 bcc_ptr += 2; /* account for null termination */ 231 bcc_ptr += 2; /* account for null termination */
@@ -246,11 +246,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->userName == NULL) {
248 /* BB what about null user mounts - check that we do this BB */ 248 /* BB what about null user mounts - check that we do this BB */
249 } else { /* 300 should be long enough for any conceivable user name */ 249 } else {
250 strncpy(bcc_ptr, ses->userName, 300); 250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE);
251 } 251 }
252 /* BB improve check for overflow */ 252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 bcc_ptr += strnlen(ses->userName, 300);
254 *bcc_ptr = 0; 253 *bcc_ptr = 0;
255 bcc_ptr++; /* account for null termination */ 254 bcc_ptr++; /* account for null termination */
256 255
diff --git a/fs/compat.c b/fs/compat.c
index 6c19040ffeef..00d90c2e66f0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -38,8 +38,6 @@
38#include <linux/dirent.h> 38#include <linux/dirent.h>
39#include <linux/fsnotify.h> 39#include <linux/fsnotify.h>
40#include <linux/highuid.h> 40#include <linux/highuid.h>
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/syscall.h> 41#include <linux/nfsd/syscall.h>
44#include <linux/personality.h> 42#include <linux/personality.h>
45#include <linux/rwsem.h> 43#include <linux/rwsem.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 14cbc831422a..0ca9ec4a79c3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -301,6 +301,12 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
301 u32 data; 301 u32 data;
302 void __user *dxferp; 302 void __user *dxferp;
303 int err; 303 int err;
304 int interface_id;
305
306 if (get_user(interface_id, &sgio32->interface_id))
307 return -EFAULT;
308 if (interface_id != 'S')
309 return sys_ioctl(fd, cmd, (unsigned long)sgio32);
304 310
305 if (get_user(iovec_count, &sgio32->iovec_count)) 311 if (get_user(iovec_count, &sgio32->iovec_count))
306 return -EFAULT; 312 return -EFAULT;
@@ -936,6 +942,7 @@ COMPATIBLE_IOCTL(TCSETSF)
936COMPATIBLE_IOCTL(TIOCLINUX) 942COMPATIBLE_IOCTL(TIOCLINUX)
937COMPATIBLE_IOCTL(TIOCSBRK) 943COMPATIBLE_IOCTL(TIOCSBRK)
938COMPATIBLE_IOCTL(TIOCCBRK) 944COMPATIBLE_IOCTL(TIOCCBRK)
945COMPATIBLE_IOCTL(TIOCGSID)
939COMPATIBLE_IOCTL(TIOCGICOUNT) 946COMPATIBLE_IOCTL(TIOCGICOUNT)
940/* Little t */ 947/* Little t */
941COMPATIBLE_IOCTL(TIOCGETD) 948COMPATIBLE_IOCTL(TIOCGETD)
@@ -1005,6 +1012,9 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1012COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1013COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1007#endif 1014#endif
1015/* Big V (don't complain on serial console) */
1016IGNORE_IOCTL(VT_OPENQRY)
1017IGNORE_IOCTL(VT_GETMODE)
1008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1018/* Little p (/dev/rtc, /dev/envctrl, etc.) */
1009COMPATIBLE_IOCTL(RTC_AIE_ON) 1019COMPATIBLE_IOCTL(RTC_AIE_ON)
1010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1020COMPATIBLE_IOCTL(RTC_AIE_OFF)
@@ -1035,6 +1045,8 @@ COMPATIBLE_IOCTL(FIOQSIZE)
1035#ifdef CONFIG_BLOCK 1045#ifdef CONFIG_BLOCK
1036/* loop */ 1046/* loop */
1037IGNORE_IOCTL(LOOP_CLR_FD) 1047IGNORE_IOCTL(LOOP_CLR_FD)
1048/* md calls this on random blockdevs */
1049IGNORE_IOCTL(RAID_VERSION)
1038/* SG stuff */ 1050/* SG stuff */
1039COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 1051COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
1040COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 1052COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
@@ -1600,8 +1612,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1600 case KDSKBMETA: 1612 case KDSKBMETA:
1601 case KDSKBLED: 1613 case KDSKBLED:
1602 case KDSETLED: 1614 case KDSETLED:
1603 /* SG stuff */
1604 case SG_SET_TRANSFORM:
1605 /* AUTOFS */ 1615 /* AUTOFS */
1606 case AUTOFS_IOC_READY: 1616 case AUTOFS_IOC_READY:
1607 case AUTOFS_IOC_FAIL: 1617 case AUTOFS_IOC_FAIL:
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91d..32a5f46b1157 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/dcache.c b/fs/dcache.c
index a100fa35a48f..953173a293a9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -978,6 +978,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
978 q.hash = full_name_hash(q.name, q.len); 978 q.hash = full_name_hash(q.name, q.len);
979 return d_alloc(parent, &q); 979 return d_alloc(parent, &q);
980} 980}
981EXPORT_SYMBOL(d_alloc_name);
981 982
982/* the caller must hold dcache_lock */ 983/* the caller must hold dcache_lock */
983static void __d_instantiate(struct dentry *dentry, struct inode *inode) 984static void __d_instantiate(struct dentry *dentry, struct inode *inode)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b486169f42bf..274ac865bae8 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -160,15 +160,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
160 * block. A pointer to that is in the struct vfsmount that we 160 * block. A pointer to that is in the struct vfsmount that we
161 * have around. 161 * have around.
162 */ 162 */
163 if (!parent) { 163 if (!parent)
164 if (debugfs_mount && debugfs_mount->mnt_sb) { 164 parent = debugfs_mount->mnt_sb->s_root;
165 parent = debugfs_mount->mnt_sb->s_root;
166 }
167 }
168 if (!parent) {
169 pr_debug("debugfs: Ah! can not find a parent!\n");
170 return -EFAULT;
171 }
172 165
173 *dentry = NULL; 166 *dentry = NULL;
174 mutex_lock(&parent->d_inode->i_mutex); 167 mutex_lock(&parent->d_inode->i_mutex);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4012885d027f..e82adc2debb7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1206 * NOTE: filesystems with their own locking have to handle this 1206 * NOTE: filesystems with their own locking have to handle this
1207 * on their own. 1207 * on their own.
1208 */ 1208 */
1209 if (dio->flags & DIO_LOCKING) { 1209 if (flags & DIO_LOCKING) {
1210 if (unlikely((rw & WRITE) && retval < 0)) { 1210 if (unlikely((rw & WRITE) && retval < 0)) {
1211 loff_t isize = i_size_read(inode); 1211 loff_t isize = i_size_read(inode);
1212 if (end > isize) 1212 if (end > isize)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index fbb6e5eed697..7cb0a59f4b9d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1748 char *cipher_name, size_t *key_size) 1748 char *cipher_name, size_t *key_size)
1749{ 1749{
1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; 1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1751 char *full_alg_name; 1751 char *full_alg_name = NULL;
1752 int rc; 1752 int rc;
1753 1753
1754 *key_tfm = NULL; 1754 *key_tfm = NULL;
@@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1763 if (rc) 1763 if (rc)
1764 goto out; 1764 goto out;
1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); 1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
1766 kfree(full_alg_name);
1767 if (IS_ERR(*key_tfm)) { 1766 if (IS_ERR(*key_tfm)) {
1768 rc = PTR_ERR(*key_tfm); 1767 rc = PTR_ERR(*key_tfm);
1769 printk(KERN_ERR "Unable to allocate crypto cipher with name " 1768 printk(KERN_ERR "Unable to allocate crypto cipher with name "
@@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1786 goto out; 1785 goto out;
1787 } 1786 }
1788out: 1787out:
1788 kfree(full_alg_name);
1789 return rc; 1789 return rc;
1790} 1790}
1791 1791
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
62 struct inode *lower_inode = 62 struct inode *lower_inode =
63 ecryptfs_inode_to_lower(dentry->d_inode); 63 ecryptfs_inode_to_lower(dentry->d_inode);
64 64
65 fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); 65 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
66 } 66 }
67out: 67out:
68 return rc; 68 return rc;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9e944057001b..678172b61be2 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -158,7 +158,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
158 struct dentry *ecryptfs_dentry = file->f_path.dentry; 158 struct dentry *ecryptfs_dentry = file->f_path.dentry;
159 /* Private value of ecryptfs_dentry allocated in 159 /* Private value of ecryptfs_dentry allocated in
160 * ecryptfs_lookup() */ 160 * ecryptfs_lookup() */
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 161 struct dentry *lower_dentry;
162 struct ecryptfs_file_info *file_info; 162 struct ecryptfs_file_info *file_info;
163 163
164 mount_crypt_stat = &ecryptfs_superblock_to_private( 164 mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
195 && !(file->f_flags & O_RDONLY)) {
196 rc = -EPERM;
197 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
198 "file must hence be opened RO\n", __func__);
199 goto out;
200 }
201 if (!ecryptfs_inode_to_private(inode)->lower_file) { 194 if (!ecryptfs_inode_to_private(inode)->lower_file) {
202 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 195 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
203 if (rc) { 196 if (rc) {
@@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
208 goto out; 201 goto out;
209 } 202 }
210 } 203 }
204 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
205 && !(file->f_flags & O_RDONLY)) {
206 rc = -EPERM;
207 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
208 "file must hence be opened RO\n", __func__);
209 goto out;
210 }
211 ecryptfs_set_file_lower( 211 ecryptfs_set_file_lower(
212 file, ecryptfs_inode_to_private(inode)->lower_file); 212 file, ecryptfs_inode_to_private(inode)->lower_file);
213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
@@ -299,7 +299,6 @@ static int ecryptfs_ioctl(struct inode *inode, struct file *file,
299const struct file_operations ecryptfs_dir_fops = { 299const struct file_operations ecryptfs_dir_fops = {
300 .readdir = ecryptfs_readdir, 300 .readdir = ecryptfs_readdir,
301 .ioctl = ecryptfs_ioctl, 301 .ioctl = ecryptfs_ioctl,
302 .mmap = generic_file_mmap,
303 .open = ecryptfs_open, 302 .open = ecryptfs_open,
304 .flush = ecryptfs_flush, 303 .flush = ecryptfs_flush,
305 .release = ecryptfs_release, 304 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..4a430ab4115c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -282,7 +282,8 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
282 goto out; 282 goto out;
283 } 283 }
284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
285 ecryptfs_dir_inode->i_sb, 1); 285 ecryptfs_dir_inode->i_sb,
286 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
286 if (rc) { 287 if (rc) {
287 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", 288 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
288 __func__, rc); 289 __func__, rc);
@@ -463,9 +464,6 @@ out_lock:
463 unlock_dir(lower_dir_dentry); 464 unlock_dir(lower_dir_dentry);
464 dput(lower_new_dentry); 465 dput(lower_new_dentry);
465 dput(lower_old_dentry); 466 dput(lower_old_dentry);
466 d_drop(lower_old_dentry);
467 d_drop(new_dentry);
468 d_drop(old_dentry);
469 return rc; 467 return rc;
470} 468}
471 469
@@ -614,6 +612,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
614 struct dentry *lower_new_dentry; 612 struct dentry *lower_new_dentry;
615 struct dentry *lower_old_dir_dentry; 613 struct dentry *lower_old_dir_dentry;
616 struct dentry *lower_new_dir_dentry; 614 struct dentry *lower_new_dir_dentry;
615 struct dentry *trap = NULL;
617 616
618 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 617 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
619 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 618 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -621,14 +620,24 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
621 dget(lower_new_dentry); 620 dget(lower_new_dentry);
622 lower_old_dir_dentry = dget_parent(lower_old_dentry); 621 lower_old_dir_dentry = dget_parent(lower_old_dentry);
623 lower_new_dir_dentry = dget_parent(lower_new_dentry); 622 lower_new_dir_dentry = dget_parent(lower_new_dentry);
624 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 623 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
624 /* source should not be ancestor of target */
625 if (trap == lower_old_dentry) {
626 rc = -EINVAL;
627 goto out_lock;
628 }
629 /* target should not be ancestor of source */
630 if (trap == lower_new_dentry) {
631 rc = -ENOTEMPTY;
632 goto out_lock;
633 }
625 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, 634 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 635 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 636 if (rc)
628 goto out_lock; 637 goto out_lock;
629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); 638 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
630 if (new_dir != old_dir) 639 if (new_dir != old_dir)
631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); 640 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
632out_lock: 641out_lock:
633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 642 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
634 dput(lower_new_dentry->d_parent); 643 dput(lower_new_dentry->d_parent);
@@ -715,31 +724,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 724 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 725 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 726 if (!buf) {
718 rc = -ENOMEM; 727 buf = ERR_PTR(-ENOMEM);
719 goto out; 728 goto out;
720 } 729 }
721 old_fs = get_fs(); 730 old_fs = get_fs();
722 set_fs(get_ds()); 731 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 732 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 733 set_fs(old_fs);
725 if (rc < 0) 734 if (rc < 0) {
726 goto out_free; 735 kfree(buf);
727 else 736 buf = ERR_PTR(rc);
737 } else
728 buf[rc] = '\0'; 738 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 739out:
735 return ERR_PTR(rc); 740 nd_set_link(nd, buf);
741 return NULL;
736} 742}
737 743
738static void 744static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 745ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 746{
741 /* Free the char* */ 747 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 748 if (!IS_ERR(buf)) {
749 /* Free the char* */
750 kfree(buf);
751 }
743} 752}
744 753
745/** 754/**
@@ -772,18 +781,23 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
772} 781}
773 782
774/** 783/**
775 * ecryptfs_truncate 784 * truncate_upper
776 * @dentry: The ecryptfs layer dentry 785 * @dentry: The ecryptfs layer dentry
777 * @new_length: The length to expand the file to 786 * @ia: Address of the ecryptfs inode's attributes
787 * @lower_ia: Address of the lower inode's attributes
778 * 788 *
779 * Function to handle truncations modifying the size of the file. Note 789 * Function to handle truncations modifying the size of the file. Note
780 * that the file sizes are interpolated. When expanding, we are simply 790 * that the file sizes are interpolated. When expanding, we are simply
781 * writing strings of 0's out. When truncating, we need to modify the 791 * writing strings of 0's out. When truncating, we truncate the upper
782 * underlying file size according to the page index interpolations. 792 * inode and update the lower_ia according to the page index
793 * interpolations. If ATTR_SIZE is set in lower_ia->ia_valid upon return,
794 * the caller must use lower_ia in a call to notify_change() to perform
795 * the truncation of the lower inode.
783 * 796 *
784 * Returns zero on success; non-zero otherwise 797 * Returns zero on success; non-zero otherwise
785 */ 798 */
786int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) 799static int truncate_upper(struct dentry *dentry, struct iattr *ia,
800 struct iattr *lower_ia)
787{ 801{
788 int rc = 0; 802 int rc = 0;
789 struct inode *inode = dentry->d_inode; 803 struct inode *inode = dentry->d_inode;
@@ -794,8 +808,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
794 loff_t lower_size_before_truncate; 808 loff_t lower_size_before_truncate;
795 loff_t lower_size_after_truncate; 809 loff_t lower_size_after_truncate;
796 810
797 if (unlikely((new_length == i_size))) 811 if (unlikely((ia->ia_size == i_size))) {
812 lower_ia->ia_valid &= ~ATTR_SIZE;
798 goto out; 813 goto out;
814 }
799 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 815 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
800 /* Set up a fake ecryptfs file, this is used to interface with 816 /* Set up a fake ecryptfs file, this is used to interface with
801 * the file in the underlying filesystem so that the 817 * the file in the underlying filesystem so that the
@@ -815,28 +831,30 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
815 &fake_ecryptfs_file, 831 &fake_ecryptfs_file,
816 ecryptfs_inode_to_private(dentry->d_inode)->lower_file); 832 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
817 /* Switch on growing or shrinking file */ 833 /* Switch on growing or shrinking file */
818 if (new_length > i_size) { 834 if (ia->ia_size > i_size) {
819 char zero[] = { 0x00 }; 835 char zero[] = { 0x00 };
820 836
837 lower_ia->ia_valid &= ~ATTR_SIZE;
821 /* Write a single 0 at the last position of the file; 838 /* Write a single 0 at the last position of the file;
822 * this triggers code that will fill in 0's throughout 839 * this triggers code that will fill in 0's throughout
823 * the intermediate portion of the previous end of the 840 * the intermediate portion of the previous end of the
824 * file and the new and of the file */ 841 * file and the new and of the file */
825 rc = ecryptfs_write(&fake_ecryptfs_file, zero, 842 rc = ecryptfs_write(&fake_ecryptfs_file, zero,
826 (new_length - 1), 1); 843 (ia->ia_size - 1), 1);
827 } else { /* new_length < i_size_read(inode) */ 844 } else { /* ia->ia_size < i_size_read(inode) */
828 /* We're chopping off all the pages down do the page 845 /* We're chopping off all the pages down to the page
829 * in which new_length is located. Fill in the end of 846 * in which ia->ia_size is located. Fill in the end of
830 * that page from (new_length & ~PAGE_CACHE_MASK) to 847 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
831 * PAGE_CACHE_SIZE with zeros. */ 848 * PAGE_CACHE_SIZE with zeros. */
832 size_t num_zeros = (PAGE_CACHE_SIZE 849 size_t num_zeros = (PAGE_CACHE_SIZE
833 - (new_length & ~PAGE_CACHE_MASK)); 850 - (ia->ia_size & ~PAGE_CACHE_MASK));
834 851
835 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 852 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
836 rc = vmtruncate(inode, new_length); 853 rc = vmtruncate(inode, ia->ia_size);
837 if (rc) 854 if (rc)
838 goto out_free; 855 goto out_free;
839 rc = vmtruncate(lower_dentry->d_inode, new_length); 856 lower_ia->ia_size = ia->ia_size;
857 lower_ia->ia_valid |= ATTR_SIZE;
840 goto out_free; 858 goto out_free;
841 } 859 }
842 if (num_zeros) { 860 if (num_zeros) {
@@ -848,7 +866,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
848 goto out_free; 866 goto out_free;
849 } 867 }
850 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, 868 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
851 new_length, num_zeros); 869 ia->ia_size, num_zeros);
852 kfree(zeros_virt); 870 kfree(zeros_virt);
853 if (rc) { 871 if (rc) {
854 printk(KERN_ERR "Error attempting to zero out " 872 printk(KERN_ERR "Error attempting to zero out "
@@ -857,7 +875,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
857 goto out_free; 875 goto out_free;
858 } 876 }
859 } 877 }
860 vmtruncate(inode, new_length); 878 vmtruncate(inode, ia->ia_size);
861 rc = ecryptfs_write_inode_size_to_metadata(inode); 879 rc = ecryptfs_write_inode_size_to_metadata(inode);
862 if (rc) { 880 if (rc) {
863 printk(KERN_ERR "Problem with " 881 printk(KERN_ERR "Problem with "
@@ -870,10 +888,12 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
870 lower_size_before_truncate = 888 lower_size_before_truncate =
871 upper_size_to_lower_size(crypt_stat, i_size); 889 upper_size_to_lower_size(crypt_stat, i_size);
872 lower_size_after_truncate = 890 lower_size_after_truncate =
873 upper_size_to_lower_size(crypt_stat, new_length); 891 upper_size_to_lower_size(crypt_stat, ia->ia_size);
874 if (lower_size_after_truncate < lower_size_before_truncate) 892 if (lower_size_after_truncate < lower_size_before_truncate) {
875 vmtruncate(lower_dentry->d_inode, 893 lower_ia->ia_size = lower_size_after_truncate;
876 lower_size_after_truncate); 894 lower_ia->ia_valid |= ATTR_SIZE;
895 } else
896 lower_ia->ia_valid &= ~ATTR_SIZE;
877 } 897 }
878out_free: 898out_free:
879 if (ecryptfs_file_to_private(&fake_ecryptfs_file)) 899 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
@@ -883,6 +903,33 @@ out:
883 return rc; 903 return rc;
884} 904}
885 905
906/**
907 * ecryptfs_truncate
908 * @dentry: The ecryptfs layer dentry
909 * @new_length: The length to expand the file to
910 *
911 * Simple function that handles the truncation of an eCryptfs inode and
912 * its corresponding lower inode.
913 *
914 * Returns zero on success; non-zero otherwise
915 */
916int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
917{
918 struct iattr ia = { .ia_valid = ATTR_SIZE, .ia_size = new_length };
919 struct iattr lower_ia = { .ia_valid = 0 };
920 int rc;
921
922 rc = truncate_upper(dentry, &ia, &lower_ia);
923 if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
924 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
925
926 mutex_lock(&lower_dentry->d_inode->i_mutex);
927 rc = notify_change(lower_dentry, &lower_ia);
928 mutex_unlock(&lower_dentry->d_inode->i_mutex);
929 }
930 return rc;
931}
932
886static int 933static int
887ecryptfs_permission(struct inode *inode, int mask) 934ecryptfs_permission(struct inode *inode, int mask)
888{ 935{
@@ -905,6 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
905{ 952{
906 int rc = 0; 953 int rc = 0;
907 struct dentry *lower_dentry; 954 struct dentry *lower_dentry;
955 struct iattr lower_ia;
908 struct inode *inode; 956 struct inode *inode;
909 struct inode *lower_inode; 957 struct inode *lower_inode;
910 struct ecryptfs_crypt_stat *crypt_stat; 958 struct ecryptfs_crypt_stat *crypt_stat;
@@ -943,15 +991,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
943 } 991 }
944 } 992 }
945 mutex_unlock(&crypt_stat->cs_mutex); 993 mutex_unlock(&crypt_stat->cs_mutex);
994 memcpy(&lower_ia, ia, sizeof(lower_ia));
995 if (ia->ia_valid & ATTR_FILE)
996 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
946 if (ia->ia_valid & ATTR_SIZE) { 997 if (ia->ia_valid & ATTR_SIZE) {
947 ecryptfs_printk(KERN_DEBUG, 998 rc = truncate_upper(dentry, ia, &lower_ia);
948 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
949 ia->ia_valid, ATTR_SIZE);
950 rc = ecryptfs_truncate(dentry, ia->ia_size);
951 /* ecryptfs_truncate handles resizing of the lower file */
952 ia->ia_valid &= ~ATTR_SIZE;
953 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
954 ia->ia_valid);
955 if (rc < 0) 999 if (rc < 0)
956 goto out; 1000 goto out;
957 } 1001 }
@@ -960,14 +1004,29 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
960 * mode change is for clearing setuid/setgid bits. Allow lower fs 1004 * mode change is for clearing setuid/setgid bits. Allow lower fs
961 * to interpret this in its own way. 1005 * to interpret this in its own way.
962 */ 1006 */
963 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 1007 if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
964 ia->ia_valid &= ~ATTR_MODE; 1008 lower_ia.ia_valid &= ~ATTR_MODE;
965 1009
966 mutex_lock(&lower_dentry->d_inode->i_mutex); 1010 mutex_lock(&lower_dentry->d_inode->i_mutex);
967 rc = notify_change(lower_dentry, ia); 1011 rc = notify_change(lower_dentry, &lower_ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 1012 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 1013out:
970 fsstack_copy_attr_all(inode, lower_inode, NULL); 1014 fsstack_copy_attr_all(inode, lower_inode);
1015 return rc;
1016}
1017
1018int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1019 struct kstat *stat)
1020{
1021 struct kstat lower_stat;
1022 int rc;
1023
1024 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1025 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1026 if (!rc) {
1027 generic_fillattr(dentry->d_inode, stat);
1028 stat->blocks = lower_stat.blocks;
1029 }
971 return rc; 1030 return rc;
972} 1031}
973 1032
@@ -1100,6 +1159,7 @@ const struct inode_operations ecryptfs_dir_iops = {
1100const struct inode_operations ecryptfs_main_iops = { 1159const struct inode_operations ecryptfs_main_iops = {
1101 .permission = ecryptfs_permission, 1160 .permission = ecryptfs_permission,
1102 .setattr = ecryptfs_setattr, 1161 .setattr = ecryptfs_setattr,
1162 .getattr = ecryptfs_getattr,
1103 .setxattr = ecryptfs_setxattr, 1163 .setxattr = ecryptfs_setxattr,
1104 .getxattr = ecryptfs_getxattr, 1164 .getxattr = ecryptfs_getxattr,
1105 .listxattr = ecryptfs_listxattr, 1165 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c6ac85d6c701..ea2f92101dfe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,7 +35,6 @@
35#include <linux/key.h> 35#include <linux/key.h>
36#include <linux/parser.h> 36#include <linux/parser.h>
37#include <linux/fs_stack.h> 37#include <linux/fs_stack.h>
38#include <linux/ima.h>
39#include "ecryptfs_kernel.h" 38#include "ecryptfs_kernel.h"
40 39
41/** 40/**
@@ -119,7 +118,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
119 const struct cred *cred = current_cred(); 118 const struct cred *cred = current_cred();
120 struct ecryptfs_inode_info *inode_info = 119 struct ecryptfs_inode_info *inode_info =
121 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 120 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
122 int opened_lower_file = 0;
123 int rc = 0; 121 int rc = 0;
124 122
125 mutex_lock(&inode_info->lower_file_mutex); 123 mutex_lock(&inode_info->lower_file_mutex);
@@ -136,12 +134,9 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
136 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
137 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 135 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
138 inode_info->lower_file = NULL; 136 inode_info->lower_file = NULL;
139 } else 137 }
140 opened_lower_file = 1;
141 } 138 }
142 mutex_unlock(&inode_info->lower_file_mutex); 139 mutex_unlock(&inode_info->lower_file_mutex);
143 if (opened_lower_file)
144 ima_counts_get(inode_info->lower_file);
145 return rc; 140 return rc;
146} 141}
147 142
@@ -194,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
194 init_special_inode(inode, lower_inode->i_mode, 189 init_special_inode(inode, lower_inode->i_mode,
195 lower_inode->i_rdev); 190 lower_inode->i_rdev);
196 dentry->d_op = &ecryptfs_dops; 191 dentry->d_op = &ecryptfs_dops;
197 fsstack_copy_attr_all(inode, lower_inode, NULL); 192 fsstack_copy_attr_all(inode, lower_inode);
198 /* This size will be overwritten for real files w/ headers and 193 /* This size will be overwritten for real files w/ headers and
199 * other metadata */ 194 * other metadata */
200 fsstack_copy_inode_size(inode, lower_inode); 195 fsstack_copy_inode_size(inode, lower_inode);
@@ -590,8 +585,8 @@ out:
590 * with as much information as it can before needing 585 * with as much information as it can before needing
591 * the lower filesystem. 586 * the lower filesystem.
592 * ecryptfs_read_super(): this accesses the lower filesystem and uses 587 * ecryptfs_read_super(): this accesses the lower filesystem and uses
593 * ecryptfs_interpolate to perform most of the linking 588 * ecryptfs_interpose to perform most of the linking
594 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs 589 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
595 */ 590 */
596static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
597 const char *dev_name, void *raw_data, 592 const char *dev_name, void *raw_data,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8b47e4200e65..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
135 return events; 135 return events;
136} 136}
137 137
138static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 138static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
139 loff_t *ppos) 139{
140 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
141 ctx->count -= *cnt;
142}
143
144/**
145 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
146 * @ctx: [in] Pointer to eventfd context.
147 * @wait: [in] Wait queue to be removed.
148 * @cnt: [out] Pointer to the 64bit conter value.
149 *
150 * Returns zero if successful, or the following error codes:
151 *
152 * -EAGAIN : The operation would have blocked.
153 *
154 * This is used to atomically remove a wait queue entry from the eventfd wait
155 * queue head, and read/reset the counter value.
156 */
157int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
158 __u64 *cnt)
159{
160 unsigned long flags;
161
162 spin_lock_irqsave(&ctx->wqh.lock, flags);
163 eventfd_ctx_do_read(ctx, cnt);
164 __remove_wait_queue(&ctx->wqh, wait);
165 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
166 wake_up_locked_poll(&ctx->wqh, POLLOUT);
167 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
168
169 return *cnt != 0 ? 0 : -EAGAIN;
170}
171EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
172
173/**
174 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
175 * @ctx: [in] Pointer to eventfd context.
176 * @no_wait: [in] Different from zero if the operation should not block.
177 * @cnt: [out] Pointer to the 64bit conter value.
178 *
179 * Returns zero if successful, or the following error codes:
180 *
181 * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
182 * -ERESTARTSYS : A signal interrupted the wait operation.
183 *
184 * If @no_wait is zero, the function might sleep until the eventfd internal
185 * counter becomes greater than zero.
186 */
187ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
140{ 188{
141 struct eventfd_ctx *ctx = file->private_data;
142 ssize_t res; 189 ssize_t res;
143 __u64 ucnt = 0;
144 DECLARE_WAITQUEUE(wait, current); 190 DECLARE_WAITQUEUE(wait, current);
145 191
146 if (count < sizeof(ucnt))
147 return -EINVAL;
148 spin_lock_irq(&ctx->wqh.lock); 192 spin_lock_irq(&ctx->wqh.lock);
193 *cnt = 0;
149 res = -EAGAIN; 194 res = -EAGAIN;
150 if (ctx->count > 0) 195 if (ctx->count > 0)
151 res = sizeof(ucnt); 196 res = 0;
152 else if (!(file->f_flags & O_NONBLOCK)) { 197 else if (!no_wait) {
153 __add_wait_queue(&ctx->wqh, &wait); 198 __add_wait_queue(&ctx->wqh, &wait);
154 for (res = 0;;) { 199 for (;;) {
155 set_current_state(TASK_INTERRUPTIBLE); 200 set_current_state(TASK_INTERRUPTIBLE);
156 if (ctx->count > 0) { 201 if (ctx->count > 0) {
157 res = sizeof(ucnt); 202 res = 0;
158 break; 203 break;
159 } 204 }
160 if (signal_pending(current)) { 205 if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
168 __remove_wait_queue(&ctx->wqh, &wait); 213 __remove_wait_queue(&ctx->wqh, &wait);
169 __set_current_state(TASK_RUNNING); 214 __set_current_state(TASK_RUNNING);
170 } 215 }
171 if (likely(res > 0)) { 216 if (likely(res == 0)) {
172 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 217 eventfd_ctx_do_read(ctx, cnt);
173 ctx->count -= ucnt;
174 if (waitqueue_active(&ctx->wqh)) 218 if (waitqueue_active(&ctx->wqh))
175 wake_up_locked_poll(&ctx->wqh, POLLOUT); 219 wake_up_locked_poll(&ctx->wqh, POLLOUT);
176 } 220 }
177 spin_unlock_irq(&ctx->wqh.lock); 221 spin_unlock_irq(&ctx->wqh.lock);
178 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
179 return -EFAULT;
180 222
181 return res; 223 return res;
182} 224}
225EXPORT_SYMBOL_GPL(eventfd_ctx_read);
226
227static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
228 loff_t *ppos)
229{
230 struct eventfd_ctx *ctx = file->private_data;
231 ssize_t res;
232 __u64 cnt;
233
234 if (count < sizeof(cnt))
235 return -EINVAL;
236 res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
237 if (res < 0)
238 return res;
239
240 return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
241}
183 242
184static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 243static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
185 loff_t *ppos) 244 loff_t *ppos)
@@ -339,7 +398,7 @@ struct file *eventfd_file_create(unsigned int count, int flags)
339 ctx->flags = flags; 398 ctx->flags = flags;
340 399
341 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, 400 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
342 flags & EFD_SHARED_FCNTL_FLAGS); 401 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
343 if (IS_ERR(file)) 402 if (IS_ERR(file))
344 eventfd_free_ctx(ctx); 403 eventfd_free_ctx(ctx);
345 404
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 366c503f9657..bd056a5b4efc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1206,7 +1206,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1206 * a file structure and a free file descriptor. 1206 * a file structure and a free file descriptor.
1207 */ 1207 */
1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1209 flags & O_CLOEXEC); 1209 O_RDWR | (flags & O_CLOEXEC));
1210 if (error < 0) 1210 if (error < 0)
1211 ep_free(ep); 1211 ep_free(ep);
1212 1212
diff --git a/fs/exec.c b/fs/exec.c
index 623a5cc3076a..cce6bbdbdbb1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -571,6 +571,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
571 struct vm_area_struct *prev = NULL; 571 struct vm_area_struct *prev = NULL;
572 unsigned long vm_flags; 572 unsigned long vm_flags;
573 unsigned long stack_base; 573 unsigned long stack_base;
574 unsigned long stack_size;
575 unsigned long stack_expand;
576 unsigned long rlim_stack;
574 577
575#ifdef CONFIG_STACK_GROWSUP 578#ifdef CONFIG_STACK_GROWSUP
576 /* Limit stack size to 1GB */ 579 /* Limit stack size to 1GB */
@@ -627,10 +630,23 @@ int setup_arg_pages(struct linux_binprm *bprm,
627 goto out_unlock; 630 goto out_unlock;
628 } 631 }
629 632
633 stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE;
634 stack_size = vma->vm_end - vma->vm_start;
635 /*
636 * Align this down to a page boundary as expand_stack
637 * will align it up.
638 */
639 rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
630#ifdef CONFIG_STACK_GROWSUP 640#ifdef CONFIG_STACK_GROWSUP
631 stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; 641 if (stack_size + stack_expand > rlim_stack)
642 stack_base = vma->vm_start + rlim_stack;
643 else
644 stack_base = vma->vm_end + stack_expand;
632#else 645#else
633 stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; 646 if (stack_size + stack_expand > rlim_stack)
647 stack_base = vma->vm_end - rlim_stack;
648 else
649 stack_base = vma->vm_start - stack_expand;
634#endif 650#endif
635 ret = expand_stack(vma, stack_base); 651 ret = expand_stack(vma, stack_base);
636 if (ret) 652 if (ret)
@@ -826,7 +842,9 @@ static int de_thread(struct task_struct *tsk)
826 attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); 842 attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
827 transfer_pid(leader, tsk, PIDTYPE_PGID); 843 transfer_pid(leader, tsk, PIDTYPE_PGID);
828 transfer_pid(leader, tsk, PIDTYPE_SID); 844 transfer_pid(leader, tsk, PIDTYPE_SID);
845
829 list_replace_rcu(&leader->tasks, &tsk->tasks); 846 list_replace_rcu(&leader->tasks, &tsk->tasks);
847 list_replace_init(&leader->sibling, &tsk->sibling);
830 848
831 tsk->group_leader = tsk; 849 tsk->group_leader = tsk;
832 leader->group_leader = tsk; 850 leader->group_leader = tsk;
@@ -939,9 +957,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
939 957
940int flush_old_exec(struct linux_binprm * bprm) 958int flush_old_exec(struct linux_binprm * bprm)
941{ 959{
942 char * name; 960 int retval;
943 int i, ch, retval;
944 char tcomm[sizeof(current->comm)];
945 961
946 /* 962 /*
947 * Make sure we have a private signal table and that 963 * Make sure we have a private signal table and that
@@ -962,6 +978,25 @@ int flush_old_exec(struct linux_binprm * bprm)
962 978
963 bprm->mm = NULL; /* We're using it now */ 979 bprm->mm = NULL; /* We're using it now */
964 980
981 current->flags &= ~PF_RANDOMIZE;
982 flush_thread();
983 current->personality &= ~bprm->per_clear;
984
985 return 0;
986
987out:
988 return retval;
989}
990EXPORT_SYMBOL(flush_old_exec);
991
992void setup_new_exec(struct linux_binprm * bprm)
993{
994 int i, ch;
995 char * name;
996 char tcomm[sizeof(current->comm)];
997
998 arch_pick_mmap_layout(current->mm);
999
965 /* This is the point of no return */ 1000 /* This is the point of no return */
966 current->sas_ss_sp = current->sas_ss_size = 0; 1001 current->sas_ss_sp = current->sas_ss_size = 0;
967 1002
@@ -983,9 +1018,6 @@ int flush_old_exec(struct linux_binprm * bprm)
983 tcomm[i] = '\0'; 1018 tcomm[i] = '\0';
984 set_task_comm(current, tcomm); 1019 set_task_comm(current, tcomm);
985 1020
986 current->flags &= ~PF_RANDOMIZE;
987 flush_thread();
988
989 /* Set the new mm task size. We have to do that late because it may 1021 /* Set the new mm task size. We have to do that late because it may
990 * depend on TIF_32BIT which is only updated in flush_thread() on 1022 * depend on TIF_32BIT which is only updated in flush_thread() on
991 * some architectures like powerpc 1023 * some architectures like powerpc
@@ -1001,8 +1033,6 @@ int flush_old_exec(struct linux_binprm * bprm)
1001 set_dumpable(current->mm, suid_dumpable); 1033 set_dumpable(current->mm, suid_dumpable);
1002 } 1034 }
1003 1035
1004 current->personality &= ~bprm->per_clear;
1005
1006 /* 1036 /*
1007 * Flush performance counters when crossing a 1037 * Flush performance counters when crossing a
1008 * security domain: 1038 * security domain:
@@ -1017,14 +1047,8 @@ int flush_old_exec(struct linux_binprm * bprm)
1017 1047
1018 flush_signal_handlers(current, 0); 1048 flush_signal_handlers(current, 0);
1019 flush_old_files(current->files); 1049 flush_old_files(current->files);
1020
1021 return 0;
1022
1023out:
1024 return retval;
1025} 1050}
1026 1051EXPORT_SYMBOL(setup_new_exec);
1027EXPORT_SYMBOL(flush_old_exec);
1028 1052
1029/* 1053/*
1030 * Prepare credentials and lock ->cred_guard_mutex. 1054 * Prepare credentials and lock ->cred_guard_mutex.
@@ -1761,17 +1785,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1761 struct mm_struct *mm = current->mm; 1785 struct mm_struct *mm = current->mm;
1762 struct linux_binfmt * binfmt; 1786 struct linux_binfmt * binfmt;
1763 struct inode * inode; 1787 struct inode * inode;
1764 struct file * file;
1765 const struct cred *old_cred; 1788 const struct cred *old_cred;
1766 struct cred *cred; 1789 struct cred *cred;
1767 int retval = 0; 1790 int retval = 0;
1768 int flag = 0; 1791 int flag = 0;
1769 int ispipe = 0; 1792 int ispipe = 0;
1770 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1771 char **helper_argv = NULL; 1793 char **helper_argv = NULL;
1772 int helper_argc = 0; 1794 int helper_argc = 0;
1773 int dump_count = 0; 1795 int dump_count = 0;
1774 static atomic_t core_dump_count = ATOMIC_INIT(0); 1796 static atomic_t core_dump_count = ATOMIC_INIT(0);
1797 struct coredump_params cprm = {
1798 .signr = signr,
1799 .regs = regs,
1800 .limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
1801 };
1775 1802
1776 audit_core_dumps(signr); 1803 audit_core_dumps(signr);
1777 1804
@@ -1827,15 +1854,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1827 ispipe = format_corename(corename, signr); 1854 ispipe = format_corename(corename, signr);
1828 unlock_kernel(); 1855 unlock_kernel();
1829 1856
1830 if ((!ispipe) && (core_limit < binfmt->min_coredump)) 1857 if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
1831 goto fail_unlock; 1858 goto fail_unlock;
1832 1859
1833 if (ispipe) { 1860 if (ispipe) {
1834 if (core_limit == 0) { 1861 if (cprm.limit == 0) {
1835 /* 1862 /*
1836 * Normally core limits are irrelevant to pipes, since 1863 * Normally core limits are irrelevant to pipes, since
1837 * we're not writing to the file system, but we use 1864 * we're not writing to the file system, but we use
1838 * core_limit of 0 here as a speacial value. Any 1865 * cprm.limit of 0 here as a speacial value. Any
1839 * non-zero limit gets set to RLIM_INFINITY below, but 1866 * non-zero limit gets set to RLIM_INFINITY below, but
1840 * a limit of 0 skips the dump. This is a consistent 1867 * a limit of 0 skips the dump. This is a consistent
1841 * way to catch recursive crashes. We can still crash 1868 * way to catch recursive crashes. We can still crash
@@ -1868,25 +1895,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1868 goto fail_dropcount; 1895 goto fail_dropcount;
1869 } 1896 }
1870 1897
1871 core_limit = RLIM_INFINITY; 1898 cprm.limit = RLIM_INFINITY;
1872 1899
1873 /* SIGPIPE can happen, but it's just never processed */ 1900 /* SIGPIPE can happen, but it's just never processed */
1874 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, 1901 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
1875 &file)) { 1902 &cprm.file)) {
1876 printk(KERN_INFO "Core dump to %s pipe failed\n", 1903 printk(KERN_INFO "Core dump to %s pipe failed\n",
1877 corename); 1904 corename);
1878 goto fail_dropcount; 1905 goto fail_dropcount;
1879 } 1906 }
1880 } else 1907 } else
1881 file = filp_open(corename, 1908 cprm.file = filp_open(corename,
1882 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 1909 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1883 0600); 1910 0600);
1884 if (IS_ERR(file)) 1911 if (IS_ERR(cprm.file))
1885 goto fail_dropcount; 1912 goto fail_dropcount;
1886 inode = file->f_path.dentry->d_inode; 1913 inode = cprm.file->f_path.dentry->d_inode;
1887 if (inode->i_nlink > 1) 1914 if (inode->i_nlink > 1)
1888 goto close_fail; /* multiple links - don't dump */ 1915 goto close_fail; /* multiple links - don't dump */
1889 if (!ispipe && d_unhashed(file->f_path.dentry)) 1916 if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
1890 goto close_fail; 1917 goto close_fail;
1891 1918
1892 /* AK: actually i see no reason to not allow this for named pipes etc., 1919 /* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1899,21 +1926,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1899 */ 1926 */
1900 if (inode->i_uid != current_fsuid()) 1927 if (inode->i_uid != current_fsuid())
1901 goto close_fail; 1928 goto close_fail;
1902 if (!file->f_op) 1929 if (!cprm.file->f_op)
1903 goto close_fail; 1930 goto close_fail;
1904 if (!file->f_op->write) 1931 if (!cprm.file->f_op->write)
1905 goto close_fail; 1932 goto close_fail;
1906 if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) 1933 if (!ispipe &&
1934 do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
1907 goto close_fail; 1935 goto close_fail;
1908 1936
1909 retval = binfmt->core_dump(signr, regs, file, core_limit); 1937 retval = binfmt->core_dump(&cprm);
1910 1938
1911 if (retval) 1939 if (retval)
1912 current->signal->group_exit_code |= 0x80; 1940 current->signal->group_exit_code |= 0x80;
1913close_fail: 1941close_fail:
1914 if (ispipe && core_pipe_limit) 1942 if (ispipe && core_pipe_limit)
1915 wait_for_dump_helpers(file); 1943 wait_for_dump_helpers(cprm.file);
1916 filp_close(file, NULL); 1944 filp_close(cprm.file, NULL);
1917fail_dropcount: 1945fail_dropcount:
1918 if (dump_count) 1946 if (dump_count)
1919 atomic_dec(&core_dump_count); 1947 atomic_dec(&core_dump_count);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 698a8636d39c..2afbcebeda71 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -738,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
738 fsdata); 738 fsdata);
739} 739}
740 740
741static int exofs_write_end(struct file *file, struct address_space *mapping,
742 loff_t pos, unsigned len, unsigned copied,
743 struct page *page, void *fsdata)
744{
745 struct inode *inode = mapping->host;
746 /* According to comment in simple_write_end i_mutex is held */
747 loff_t i_size = inode->i_size;
748 int ret;
749
750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
751 if (i_size != inode->i_size)
752 mark_inode_dirty(inode);
753 return ret;
754}
755
741const struct address_space_operations exofs_aops = { 756const struct address_space_operations exofs_aops = {
742 .readpage = exofs_readpage, 757 .readpage = exofs_readpage,
743 .readpages = exofs_readpages, 758 .readpages = exofs_readpages,
744 .writepage = exofs_writepage, 759 .writepage = exofs_writepage,
745 .writepages = exofs_writepages, 760 .writepages = exofs_writepages,
746 .write_begin = exofs_write_begin_export, 761 .write_begin = exofs_write_begin_export,
747 .write_end = simple_write_end, 762 .write_end = exofs_write_end,
748}; 763};
749 764
750/****************************************************************************** 765/******************************************************************************
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
index 423033addd1f..c52e9888b8ab 100644
--- a/fs/exofs/pnfs.h
+++ b/fs/exofs/pnfs.h
@@ -15,13 +15,7 @@
15#ifndef __EXOFS_PNFS_H__ 15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__ 16#define __EXOFS_PNFS_H__
17 17
18#if defined(CONFIG_PNFS) 18#if ! defined(__PNFS_OSD_XDR_H__)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25 19
26enum pnfs_iomode { 20enum pnfs_iomode {
27 IOMODE_READ = 1, 21 IOMODE_READ = 1,
@@ -46,6 +40,6 @@ struct pnfs_osd_data_map {
46 u32 odm_raid_algorithm; 40 u32 odm_raid_algorithm;
47}; 41};
48 42
49#endif /* else defined(CONFIG_PNFS) */ 43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
50 44
51#endif /* __EXOFS_PNFS_H__ */ 45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c7..e9e175949a63 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
6 * and for mapping back from file handles to dentries. 6 * and for mapping back from file handles to dentries.
7 * 7 *
8 * For details on why we do all the strange and hairy things in here 8 * For details on why we do all the strange and hairy things in here
9 * take a look at Documentation/filesystems/Exporting. 9 * take a look at Documentation/filesystems/nfs/Exporting.
10 */ 10 */
11#include <linux/exportfs.h> 11#include <linux/exportfs.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a70..a99e54318c3d 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
339 * Extended attribut handlers 339 * Extended attribut handlers
340 */ 340 */
341static size_t 341static size_t
342ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, 342ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
343 const char *name, size_t name_len) 343 const char *name, size_t name_len, int type)
344{ 344{
345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
346 346
347 if (!test_opt(inode->i_sb, POSIX_ACL)) 347 if (!test_opt(dentry->d_sb, POSIX_ACL))
348 return 0; 348 return 0;
349 if (list && size <= list_size) 349 if (list && size <= list_size)
350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
352} 352}
353 353
354static size_t 354static size_t
355ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, 355ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
356 const char *name, size_t name_len) 356 const char *name, size_t name_len, int type)
357{ 357{
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 359
360 if (!test_opt(inode->i_sb, POSIX_ACL)) 360 if (!test_opt(dentry->d_sb, POSIX_ACL))
361 return 0; 361 return 0;
362 if (list && size <= list_size) 362 if (list && size <= list_size)
363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
365} 365}
366 366
367static int 367static int
368ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 368ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
369 size_t size, int type)
369{ 370{
370 struct posix_acl *acl; 371 struct posix_acl *acl;
371 int error; 372 int error;
372 373
373 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (strcmp(name, "") != 0)
375 return -EINVAL;
376 if (!test_opt(dentry->d_sb, POSIX_ACL))
374 return -EOPNOTSUPP; 377 return -EOPNOTSUPP;
375 378
376 acl = ext2_get_acl(inode, type); 379 acl = ext2_get_acl(dentry->d_inode, type);
377 if (IS_ERR(acl)) 380 if (IS_ERR(acl))
378 return PTR_ERR(acl); 381 return PTR_ERR(acl);
379 if (acl == NULL) 382 if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
385} 388}
386 389
387static int 390static int
388ext2_xattr_get_acl_access(struct inode *inode, const char *name, 391ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
389 void *buffer, size_t size) 392 size_t size, int flags, int type)
390{
391 if (strcmp(name, "") != 0)
392 return -EINVAL;
393 return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int
397ext2_xattr_get_acl_default(struct inode *inode, const char *name,
398 void *buffer, size_t size)
399{
400 if (strcmp(name, "") != 0)
401 return -EINVAL;
402 return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
403}
404
405static int
406ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
407 size_t size)
408{ 393{
409 struct posix_acl *acl; 394 struct posix_acl *acl;
410 int error; 395 int error;
411 396
412 if (!test_opt(inode->i_sb, POSIX_ACL)) 397 if (strcmp(name, "") != 0)
398 return -EINVAL;
399 if (!test_opt(dentry->d_sb, POSIX_ACL))
413 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
414 if (!is_owner_or_cap(inode)) 401 if (!is_owner_or_cap(dentry->d_inode))
415 return -EPERM; 402 return -EPERM;
416 403
417 if (value) { 404 if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
426 } else 413 } else
427 acl = NULL; 414 acl = NULL;
428 415
429 error = ext2_set_acl(inode, type, acl); 416 error = ext2_set_acl(dentry->d_inode, type, acl);
430 417
431release_and_out: 418release_and_out:
432 posix_acl_release(acl); 419 posix_acl_release(acl);
433 return error; 420 return error;
434} 421}
435 422
436static int
437ext2_xattr_set_acl_access(struct inode *inode, const char *name,
438 const void *value, size_t size, int flags)
439{
440 if (strcmp(name, "") != 0)
441 return -EINVAL;
442 return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
443}
444
445static int
446ext2_xattr_set_acl_default(struct inode *inode, const char *name,
447 const void *value, size_t size, int flags)
448{
449 if (strcmp(name, "") != 0)
450 return -EINVAL;
451 return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
452}
453
454struct xattr_handler ext2_xattr_acl_access_handler = { 423struct xattr_handler ext2_xattr_acl_access_handler = {
455 .prefix = POSIX_ACL_XATTR_ACCESS, 424 .prefix = POSIX_ACL_XATTR_ACCESS,
425 .flags = ACL_TYPE_ACCESS,
456 .list = ext2_xattr_list_acl_access, 426 .list = ext2_xattr_list_acl_access,
457 .get = ext2_xattr_get_acl_access, 427 .get = ext2_xattr_get_acl,
458 .set = ext2_xattr_set_acl_access, 428 .set = ext2_xattr_set_acl,
459}; 429};
460 430
461struct xattr_handler ext2_xattr_acl_default_handler = { 431struct xattr_handler ext2_xattr_acl_default_handler = {
462 .prefix = POSIX_ACL_XATTR_DEFAULT, 432 .prefix = POSIX_ACL_XATTR_DEFAULT,
433 .flags = ACL_TYPE_DEFAULT,
463 .list = ext2_xattr_list_acl_default, 434 .list = ext2_xattr_list_acl_default,
464 .get = ext2_xattr_get_acl_default, 435 .get = ext2_xattr_get_acl,
465 .set = ext2_xattr_set_acl_default, 436 .set = ext2_xattr_set_acl,
466}; 437};
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d5..904f00642f84 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
60#include <linux/mbcache.h> 60#include <linux/mbcache.h>
61#include <linux/quotaops.h> 61#include <linux/quotaops.h>
62#include <linux/rwsem.h> 62#include <linux/rwsem.h>
63#include <linux/security.h>
63#include "ext2.h" 64#include "ext2.h"
64#include "xattr.h" 65#include "xattr.h"
65#include "acl.h" 66#include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
249 * used / required on success. 250 * used / required on success.
250 */ 251 */
251static int 252static int
252ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 253ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
253{ 254{
255 struct inode *inode = dentry->d_inode;
254 struct buffer_head *bh = NULL; 256 struct buffer_head *bh = NULL;
255 struct ext2_xattr_entry *entry; 257 struct ext2_xattr_entry *entry;
256 char *end; 258 char *end;
@@ -300,9 +302,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
300 ext2_xattr_handler(entry->e_name_index); 302 ext2_xattr_handler(entry->e_name_index);
301 303
302 if (handler) { 304 if (handler) {
303 size_t size = handler->list(inode, buffer, rest, 305 size_t size = handler->list(dentry, buffer, rest,
304 entry->e_name, 306 entry->e_name,
305 entry->e_name_len); 307 entry->e_name_len,
308 handler->flags);
306 if (buffer) { 309 if (buffer) {
307 if (size > rest) { 310 if (size > rest) {
308 error = -ERANGE; 311 error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
330ssize_t 333ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) 334ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
332{ 335{
333 return ext2_xattr_list(dentry->d_inode, buffer, size); 336 return ext2_xattr_list(dentry, buffer, size);
334} 337}
335 338
336/* 339/*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb7..c8155845ac05 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
11#include "xattr.h" 11#include "xattr.h"
12 12
13static size_t 13static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len, int type)
16{ 16{
17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
26} 26}
27 27
28static int 28static int
29ext2_xattr_security_get(struct inode *inode, const char *name, 29ext2_xattr_security_get(struct dentry *dentry, const char *name,
30 void *buffer, size_t size) 30 void *buffer, size_t size, int type)
31{ 31{
32 if (strcmp(name, "") == 0) 32 if (strcmp(name, "") == 0)
33 return -EINVAL; 33 return -EINVAL;
34 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, 34 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
35 buffer, size); 35 buffer, size);
36} 36}
37 37
38static int 38static int
39ext2_xattr_security_set(struct inode *inode, const char *name, 39ext2_xattr_security_set(struct dentry *dentry, const char *name,
40 const void *value, size_t size, int flags) 40 const void *value, size_t size, int flags, int type)
41{ 41{
42 if (strcmp(name, "") == 0) 42 if (strcmp(name, "") == 0)
43 return -EINVAL; 43 return -EINVAL;
44 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, 44 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
45 value, size, flags); 45 value, size, flags);
46} 46}
47 47
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9f..2a26d71f4771 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext2_xattr_trusted_get(struct inode *inode, const char *name, 34ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, 39 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
40 buffer, size); 40 buffer, size);
41} 41}
42 42
43static int 43static int
44ext2_xattr_trusted_set(struct inode *inode, const char *name, 44ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 45 const void *value, size_t size, int flags, int type)
46{ 46{
47 if (strcmp(name, "") == 0) 47 if (strcmp(name, "") == 0)
48 return -EINVAL; 48 return -EINVAL;
49 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, 49 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
50 value, size, flags); 50 value, size, flags);
51} 51}
52 52
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62f..3f6caf3684b4 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(dentry->d_sb, XATTR_USER))
22 return 0; 22 return 0;
23 23
24 if (list && total_len <= list_size) { 24 if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
30} 30}
31 31
32static int 32static int
33ext2_xattr_user_get(struct inode *inode, const char *name, 33ext2_xattr_user_get(struct dentry *dentry, const char *name,
34 void *buffer, size_t size) 34 void *buffer, size_t size, int type)
35{ 35{
36 if (strcmp(name, "") == 0) 36 if (strcmp(name, "") == 0)
37 return -EINVAL; 37 return -EINVAL;
38 if (!test_opt(inode->i_sb, XATTR_USER)) 38 if (!test_opt(dentry->d_sb, XATTR_USER))
39 return -EOPNOTSUPP; 39 return -EOPNOTSUPP;
40 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); 40 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
41 name, buffer, size);
41} 42}
42 43
43static int 44static int
44ext2_xattr_user_set(struct inode *inode, const char *name, 45ext2_xattr_user_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
46{ 47{
47 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
48 return -EINVAL; 49 return -EINVAL;
49 if (!test_opt(inode->i_sb, XATTR_USER)) 50 if (!test_opt(dentry->d_sb, XATTR_USER))
50 return -EOPNOTSUPP; 51 return -EOPNOTSUPP;
51 52
52 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, 53 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext2_xattr_user_handler = { 57struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5f..82ba34158661 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
366 * Extended attribute handlers 366 * Extended attribute handlers
367 */ 367 */
368static size_t 368static size_t
369ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 369ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
370 const char *name, size_t name_len) 370 const char *name, size_t name_len, int type)
371{ 371{
372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
373 373
374 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (!test_opt(dentry->d_sb, POSIX_ACL))
375 return 0; 375 return 0;
376 if (list && size <= list_len) 376 if (list && size <= list_len)
377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
379} 379}
380 380
381static size_t 381static size_t
382ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 382ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
383 const char *name, size_t name_len) 383 const char *name, size_t name_len, int type)
384{ 384{
385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
386 386
387 if (!test_opt(inode->i_sb, POSIX_ACL)) 387 if (!test_opt(dentry->d_sb, POSIX_ACL))
388 return 0; 388 return 0;
389 if (list && size <= list_len) 389 if (list && size <= list_len)
390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
392} 392}
393 393
394static int 394static int
395ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 395ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
396 size_t size, int type)
396{ 397{
397 struct posix_acl *acl; 398 struct posix_acl *acl;
398 int error; 399 int error;
399 400
400 if (!test_opt(inode->i_sb, POSIX_ACL)) 401 if (strcmp(name, "") != 0)
402 return -EINVAL;
403 if (!test_opt(dentry->d_sb, POSIX_ACL))
401 return -EOPNOTSUPP; 404 return -EOPNOTSUPP;
402 405
403 acl = ext3_get_acl(inode, type); 406 acl = ext3_get_acl(dentry->d_inode, type);
404 if (IS_ERR(acl)) 407 if (IS_ERR(acl))
405 return PTR_ERR(acl); 408 return PTR_ERR(acl);
406 if (acl == NULL) 409 if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
412} 415}
413 416
414static int 417static int
415ext3_xattr_get_acl_access(struct inode *inode, const char *name, 418ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
416 void *buffer, size_t size) 419 size_t size, int flags, int type)
417{
418 if (strcmp(name, "") != 0)
419 return -EINVAL;
420 return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
421}
422
423static int
424ext3_xattr_get_acl_default(struct inode *inode, const char *name,
425 void *buffer, size_t size)
426{
427 if (strcmp(name, "") != 0)
428 return -EINVAL;
429 return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
430}
431
432static int
433ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
434 size_t size)
435{ 420{
421 struct inode *inode = dentry->d_inode;
436 handle_t *handle; 422 handle_t *handle;
437 struct posix_acl *acl; 423 struct posix_acl *acl;
438 int error, retries = 0; 424 int error, retries = 0;
439 425
426 if (strcmp(name, "") != 0)
427 return -EINVAL;
440 if (!test_opt(inode->i_sb, POSIX_ACL)) 428 if (!test_opt(inode->i_sb, POSIX_ACL))
441 return -EOPNOTSUPP; 429 return -EOPNOTSUPP;
442 if (!is_owner_or_cap(inode)) 430 if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
468 return error; 456 return error;
469} 457}
470 458
471static int
472ext3_xattr_set_acl_access(struct inode *inode, const char *name,
473 const void *value, size_t size, int flags)
474{
475 if (strcmp(name, "") != 0)
476 return -EINVAL;
477 return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
478}
479
480static int
481ext3_xattr_set_acl_default(struct inode *inode, const char *name,
482 const void *value, size_t size, int flags)
483{
484 if (strcmp(name, "") != 0)
485 return -EINVAL;
486 return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
487}
488
489struct xattr_handler ext3_xattr_acl_access_handler = { 459struct xattr_handler ext3_xattr_acl_access_handler = {
490 .prefix = POSIX_ACL_XATTR_ACCESS, 460 .prefix = POSIX_ACL_XATTR_ACCESS,
461 .flags = ACL_TYPE_ACCESS,
491 .list = ext3_xattr_list_acl_access, 462 .list = ext3_xattr_list_acl_access,
492 .get = ext3_xattr_get_acl_access, 463 .get = ext3_xattr_get_acl,
493 .set = ext3_xattr_set_acl_access, 464 .set = ext3_xattr_set_acl,
494}; 465};
495 466
496struct xattr_handler ext3_xattr_acl_default_handler = { 467struct xattr_handler ext3_xattr_acl_default_handler = {
497 .prefix = POSIX_ACL_XATTR_DEFAULT, 468 .prefix = POSIX_ACL_XATTR_DEFAULT,
469 .flags = ACL_TYPE_DEFAULT,
498 .list = ext3_xattr_list_acl_default, 470 .list = ext3_xattr_list_acl_default,
499 .get = ext3_xattr_get_acl_default, 471 .get = ext3_xattr_get_acl,
500 .set = ext3_xattr_set_acl_default, 472 .set = ext3_xattr_set_acl,
501}; 473};
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227f509e..455e6e6e5cb9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
970 if (max_blocks > DIO_MAX_BLOCKS) 970 if (max_blocks > DIO_MAX_BLOCKS)
971 max_blocks = DIO_MAX_BLOCKS; 971 max_blocks = DIO_MAX_BLOCKS;
972 handle = ext3_journal_start(inode, DIO_CREDITS + 972 handle = ext3_journal_start(inode, DIO_CREDITS +
973 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb)); 973 EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
974 if (IS_ERR(handle)) { 974 if (IS_ERR(handle)) {
975 ret = PTR_ERR(handle); 975 ret = PTR_ERR(handle);
976 goto out; 976 goto out;
@@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3146 3146
3147 /* (user+group)*(old+new) structure, inode write (sb, 3147 /* (user+group)*(old+new) structure, inode write (sb,
3148 * inode block, ? - but truncate inode update has it) */ 3148 * inode block, ? - but truncate inode update has it) */
3149 handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+ 3149 handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
3150 EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 3150 EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
3151 if (IS_ERR(handle)) { 3151 if (IS_ERR(handle)) {
3152 error = PTR_ERR(handle); 3152 error = PTR_ERR(handle);
3153 goto err_out; 3153 goto err_out;
@@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3239#ifdef CONFIG_QUOTA 3239#ifdef CONFIG_QUOTA
3240 /* We know that structure was already allocated during vfs_dq_init so 3240 /* We know that structure was already allocated during vfs_dq_init so
3241 * we will be updating only the data blocks + inodes */ 3241 * we will be updating only the data blocks + inodes */
3242 ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); 3242 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
3243#endif 3243#endif
3244 3244
3245 return ret; 3245 return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b77..7b0e44f7d66f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1699retry: 1699retry:
1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1702 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1702 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1703 if (IS_ERR(handle)) 1703 if (IS_ERR(handle))
1704 return PTR_ERR(handle); 1704 return PTR_ERR(handle);
1705 1705
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1733retry: 1733retry:
1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1736 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1736 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1737 if (IS_ERR(handle)) 1737 if (IS_ERR(handle))
1738 return PTR_ERR(handle); 1738 return PTR_ERR(handle);
1739 1739
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1769retry: 1769retry:
1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1772 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1772 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1773 if (IS_ERR(handle)) 1773 if (IS_ERR(handle))
1774 return PTR_ERR(handle); 1774 return PTR_ERR(handle);
1775 1775
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1920 struct ext3_iloc iloc; 1920 struct ext3_iloc iloc;
1921 int err = 0, rc; 1921 int err = 0, rc;
1922 1922
1923 lock_super(sb); 1923 mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
1924 if (!list_empty(&EXT3_I(inode)->i_orphan)) 1924 if (!list_empty(&EXT3_I(inode)->i_orphan))
1925 goto out_unlock; 1925 goto out_unlock;
1926 1926
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1929 1929
1930 /* @@@ FIXME: Observation from aviro: 1930 /* @@@ FIXME: Observation from aviro:
1931 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block 1931 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
1932 * here (on lock_super()), so race with ext3_link() which might bump 1932 * here (on s_orphan_lock), so race with ext3_link() which might bump
1933 * ->i_nlink. For, say it, character device. Not a regular file, 1933 * ->i_nlink. For, say it, character device. Not a regular file,
1934 * not a directory, not a symlink and ->i_nlink > 0. 1934 * not a directory, not a symlink and ->i_nlink > 0.
1935 *
1936 * tytso, 4/25/2009: I'm not sure how that could happen;
1937 * shouldn't the fs core protect us from these sort of
1938 * unlink()/link() races?
1935 */ 1939 */
1936 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1940 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1937 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1941 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1968 jbd_debug(4, "orphan inode %lu will point to %d\n", 1972 jbd_debug(4, "orphan inode %lu will point to %d\n",
1969 inode->i_ino, NEXT_ORPHAN(inode)); 1973 inode->i_ino, NEXT_ORPHAN(inode));
1970out_unlock: 1974out_unlock:
1971 unlock_super(sb); 1975 mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
1972 ext3_std_error(inode->i_sb, err); 1976 ext3_std_error(inode->i_sb, err);
1973 return err; 1977 return err;
1974} 1978}
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
1986 struct ext3_iloc iloc; 1990 struct ext3_iloc iloc;
1987 int err = 0; 1991 int err = 0;
1988 1992
1989 lock_super(inode->i_sb); 1993 mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
1990 if (list_empty(&ei->i_orphan)) { 1994 if (list_empty(&ei->i_orphan))
1991 unlock_super(inode->i_sb); 1995 goto out;
1992 return 0;
1993 }
1994 1996
1995 ino_next = NEXT_ORPHAN(inode); 1997 ino_next = NEXT_ORPHAN(inode);
1996 prev = ei->i_orphan.prev; 1998 prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
2040out_err: 2042out_err:
2041 ext3_std_error(inode->i_sb, err); 2043 ext3_std_error(inode->i_sb, err);
2042out: 2044out:
2043 unlock_super(inode->i_sb); 2045 mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
2044 return err; 2046 return err;
2045 2047
2046out_brelse: 2048out_brelse:
@@ -2175,7 +2177,7 @@ static int ext3_symlink (struct inode * dir,
2175retry: 2177retry:
2176 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2178 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2177 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2179 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2178 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 2180 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2179 if (IS_ERR(handle)) 2181 if (IS_ERR(handle))
2180 return PTR_ERR(handle); 2182 return PTR_ERR(handle);
2181 2183
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b6179178..54351ac7cef9 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
209 if (IS_ERR(handle)) 209 if (IS_ERR(handle))
210 return PTR_ERR(handle); 210 return PTR_ERR(handle);
211 211
212 lock_super(sb); 212 mutex_lock(&sbi->s_resize_lock);
213 if (input->group != sbi->s_groups_count) { 213 if (input->group != sbi->s_groups_count) {
214 err = -EBUSY; 214 err = -EBUSY;
215 goto exit_journal; 215 goto exit_journal;
@@ -324,7 +324,7 @@ exit_bh:
324 brelse(bh); 324 brelse(bh);
325 325
326exit_journal: 326exit_journal:
327 unlock_super(sb); 327 mutex_unlock(&sbi->s_resize_lock);
328 if ((err2 = ext3_journal_stop(handle)) && !err) 328 if ((err2 = ext3_journal_stop(handle)) && !err)
329 err = err2; 329 err = err2;
330 330
@@ -662,11 +662,12 @@ exit_free:
662 * important part is that the new block and inode counts are in the backup 662 * important part is that the new block and inode counts are in the backup
663 * superblocks, and the location of the new group metadata in the GDT backups. 663 * superblocks, and the location of the new group metadata in the GDT backups.
664 * 664 *
665 * We do not need lock_super() for this, because these blocks are not 665 * We do not need take the s_resize_lock for this, because these
666 * otherwise touched by the filesystem code when it is mounted. We don't 666 * blocks are not otherwise touched by the filesystem code when it is
667 * need to worry about last changing from sbi->s_groups_count, because the 667 * mounted. We don't need to worry about last changing from
668 * worst that can happen is that we do not copy the full number of backups 668 * sbi->s_groups_count, because the worst that can happen is that we
669 * at this time. The resize which changed s_groups_count will backup again. 669 * do not copy the full number of backups at this time. The resize
670 * which changed s_groups_count will backup again.
670 */ 671 */
671static void update_backups(struct super_block *sb, 672static void update_backups(struct super_block *sb,
672 int blk_off, char *data, int size) 673 int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
825 goto exit_put; 826 goto exit_put;
826 } 827 }
827 828
828 lock_super(sb); 829 mutex_lock(&sbi->s_resize_lock);
829 if (input->group != sbi->s_groups_count) { 830 if (input->group != sbi->s_groups_count) {
830 ext3_warning(sb, __func__, 831 ext3_warning(sb, __func__,
831 "multiple resizers run on filesystem!"); 832 "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
856 /* 857 /*
857 * OK, now we've set up the new group. Time to make it active. 858 * OK, now we've set up the new group. Time to make it active.
858 * 859 *
859 * Current kernels don't lock all allocations via lock_super(), 860 * We do not lock all allocations via s_resize_lock
860 * so we have to be safe wrt. concurrent accesses the group 861 * so we have to be safe wrt. concurrent accesses the group
861 * data. So we need to be careful to set all of the relevant 862 * data. So we need to be careful to set all of the relevant
862 * group descriptor data etc. *before* we enable the group. 863 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
900 * 901 *
901 * The precise rules we use are: 902 * The precise rules we use are:
902 * 903 *
903 * * Writers of s_groups_count *must* hold lock_super 904 * * Writers of s_groups_count *must* hold s_resize_lock
904 * AND 905 * AND
905 * * Writers must perform a smp_wmb() after updating all dependent 906 * * Writers must perform a smp_wmb() after updating all dependent
906 * data and before modifying the groups count 907 * data and before modifying the groups count
907 * 908 *
908 * * Readers must hold lock_super() over the access 909 * * Readers must hold s_resize_lock over the access
909 * OR 910 * OR
910 * * Readers must perform an smp_rmb() after reading the groups count 911 * * Readers must perform an smp_rmb() after reading the groups count
911 * and before reading any dependent data. 912 * and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
936 ext3_journal_dirty_metadata(handle, sbi->s_sbh); 937 ext3_journal_dirty_metadata(handle, sbi->s_sbh);
937 938
938exit_journal: 939exit_journal:
939 unlock_super(sb); 940 mutex_unlock(&sbi->s_resize_lock);
940 if ((err2 = ext3_journal_stop(handle)) && !err) 941 if ((err2 = ext3_journal_stop(handle)) && !err)
941 err = err2; 942 err = err2;
942 if (!err) { 943 if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
973 974
974 /* We don't need to worry about locking wrt other resizers just 975 /* We don't need to worry about locking wrt other resizers just
975 * yet: we're going to revalidate es->s_blocks_count after 976 * yet: we're going to revalidate es->s_blocks_count after
976 * taking lock_super() below. */ 977 * taking the s_resize_lock below. */
977 o_blocks_count = le32_to_cpu(es->s_blocks_count); 978 o_blocks_count = le32_to_cpu(es->s_blocks_count);
978 o_groups_count = EXT3_SB(sb)->s_groups_count; 979 o_groups_count = EXT3_SB(sb)->s_groups_count;
979 980
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1045 goto exit_put; 1046 goto exit_put;
1046 } 1047 }
1047 1048
1048 lock_super(sb); 1049 mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1049 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1050 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1050 ext3_warning(sb, __func__, 1051 ext3_warning(sb, __func__,
1051 "multiple resizers run on filesystem!"); 1052 "multiple resizers run on filesystem!");
1052 unlock_super(sb); 1053 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1053 ext3_journal_stop(handle); 1054 ext3_journal_stop(handle);
1054 err = -EBUSY; 1055 err = -EBUSY;
1055 goto exit_put; 1056 goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1059 EXT3_SB(sb)->s_sbh))) { 1060 EXT3_SB(sb)->s_sbh))) {
1060 ext3_warning(sb, __func__, 1061 ext3_warning(sb, __func__,
1061 "error %d on journal write access", err); 1062 "error %d on journal write access", err);
1062 unlock_super(sb); 1063 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1063 ext3_journal_stop(handle); 1064 ext3_journal_stop(handle);
1064 goto exit_put; 1065 goto exit_put;
1065 } 1066 }
1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1067 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1068 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1068 unlock_super(sb); 1069 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1069 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, 1070 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
1070 o_blocks_count + add); 1071 o_blocks_count + add);
1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1072 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7ad1e8c30bd0..afa2b569da10 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1928 sb->dq_op = &ext3_quota_operations; 1928 sb->dq_op = &ext3_quota_operations;
1929#endif 1929#endif
1930 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1930 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1931 mutex_init(&sbi->s_orphan_lock);
1932 mutex_init(&sbi->s_resize_lock);
1931 1933
1932 sb->s_root = NULL; 1934 sb->s_root = NULL;
1933 1935
@@ -2014,14 +2016,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2014 } 2016 }
2015 2017
2016 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2018 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
2017 /* 2019
2018 * akpm: core read_super() calls in here with the superblock locked.
2019 * That deadlocks, because orphan cleanup needs to lock the superblock
2020 * in numerous places. Here we just pop the lock - it's relatively
2021 * harmless, because we are now ready to accept write_super() requests,
2022 * and aviro says that's the only reason for hanging onto the
2023 * superblock lock.
2024 */
2025 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2020 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2026 ext3_orphan_cleanup(sb, es); 2021 ext3_orphan_cleanup(sb, es);
2027 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2022 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
@@ -2403,13 +2398,11 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2403 if (journal_flush(journal) < 0) 2398 if (journal_flush(journal) < 0)
2404 goto out; 2399 goto out;
2405 2400
2406 lock_super(sb);
2407 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2401 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2408 sb->s_flags & MS_RDONLY) { 2402 sb->s_flags & MS_RDONLY) {
2409 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2403 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2410 ext3_commit_super(sb, es, 1); 2404 ext3_commit_super(sb, es, 1);
2411 } 2405 }
2412 unlock_super(sb);
2413 2406
2414out: 2407out:
2415 journal_unlock_updates(journal); 2408 journal_unlock_updates(journal);
@@ -2601,13 +2594,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2601 (sbi->s_mount_state & EXT3_VALID_FS)) 2594 (sbi->s_mount_state & EXT3_VALID_FS))
2602 es->s_state = cpu_to_le16(sbi->s_mount_state); 2595 es->s_state = cpu_to_le16(sbi->s_mount_state);
2603 2596
2604 /*
2605 * We have to unlock super so that we can wait for
2606 * transactions.
2607 */
2608 unlock_super(sb);
2609 ext3_mark_recovery_complete(sb, es); 2597 ext3_mark_recovery_complete(sb, es);
2610 lock_super(sb);
2611 } else { 2598 } else {
2612 __le32 ret; 2599 __le32 ret;
2613 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2600 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b97..66895ccf76c7 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
99 struct mb_cache_entry **); 99 struct mb_cache_entry **);
100static void ext3_xattr_rehash(struct ext3_xattr_header *, 100static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 101 struct ext3_xattr_entry *);
102static int ext3_xattr_list(struct inode *inode, char *buffer, 102static int ext3_xattr_list(struct dentry *dentry, char *buffer,
103 size_t buffer_size); 103 size_t buffer_size);
104 104
105static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
147ssize_t 147ssize_t
148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) 148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
149{ 149{
150 return ext3_xattr_list(dentry->d_inode, buffer, size); 150 return ext3_xattr_list(dentry, buffer, size);
151} 151}
152 152
153static int 153static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
332} 332}
333 333
334static int 334static int
335ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry, 335ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
336 char *buffer, size_t buffer_size) 336 char *buffer, size_t buffer_size)
337{ 337{
338 size_t rest = buffer_size; 338 size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
342 ext3_xattr_handler(entry->e_name_index); 342 ext3_xattr_handler(entry->e_name_index);
343 343
344 if (handler) { 344 if (handler) {
345 size_t size = handler->list(inode, buffer, rest, 345 size_t size = handler->list(dentry, buffer, rest,
346 entry->e_name, 346 entry->e_name,
347 entry->e_name_len); 347 entry->e_name_len,
348 handler->flags);
348 if (buffer) { 349 if (buffer) {
349 if (size > rest) 350 if (size > rest)
350 return -ERANGE; 351 return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
357} 358}
358 359
359static int 360static int
360ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 361ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
361{ 362{
363 struct inode *inode = dentry->d_inode;
362 struct buffer_head *bh = NULL; 364 struct buffer_head *bh = NULL;
363 int error; 365 int error;
364 366
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
383 goto cleanup; 385 goto cleanup;
384 } 386 }
385 ext3_xattr_cache_insert(bh); 387 ext3_xattr_cache_insert(bh);
386 error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 388 error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
387 389
388cleanup: 390cleanup:
389 brelse(bh); 391 brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
392} 394}
393 395
394static int 396static int
395ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 397ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
396{ 398{
399 struct inode *inode = dentry->d_inode;
397 struct ext3_xattr_ibody_header *header; 400 struct ext3_xattr_ibody_header *header;
398 struct ext3_inode *raw_inode; 401 struct ext3_inode *raw_inode;
399 struct ext3_iloc iloc; 402 struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
411 error = ext3_xattr_check_names(IFIRST(header), end); 414 error = ext3_xattr_check_names(IFIRST(header), end);
412 if (error) 415 if (error)
413 goto cleanup; 416 goto cleanup;
414 error = ext3_xattr_list_entries(inode, IFIRST(header), 417 error = ext3_xattr_list_entries(dentry, IFIRST(header),
415 buffer, buffer_size); 418 buffer, buffer_size);
416 419
417cleanup: 420cleanup:
@@ -430,12 +433,12 @@ cleanup:
430 * used / required on success. 433 * used / required on success.
431 */ 434 */
432static int 435static int
433ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 436ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
434{ 437{
435 int i_error, b_error; 438 int i_error, b_error;
436 439
437 down_read(&EXT3_I(inode)->xattr_sem); 440 down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
438 i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size); 441 i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
439 if (i_error < 0) { 442 if (i_error < 0) {
440 b_error = 0; 443 b_error = 0;
441 } else { 444 } else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
443 buffer += i_error; 446 buffer += i_error;
444 buffer_size -= i_error; 447 buffer_size -= i_error;
445 } 448 }
446 b_error = ext3_xattr_block_list(inode, buffer, buffer_size); 449 b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
447 if (b_error < 0) 450 if (b_error < 0)
448 i_error = 0; 451 i_error = 0;
449 } 452 }
450 up_read(&EXT3_I(inode)->xattr_sem); 453 up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
451 return i_error + b_error; 454 return i_error + b_error;
452} 455}
453 456
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf2..474348788dd9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext3_xattr_security_get(struct inode *inode, const char *name, 31ext3_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name, 36 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext3_xattr_security_set(struct inode *inode, const char *name, 41ext3_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name, 46 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4b..e5562845ed96 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext3_xattr_trusted_get(struct inode *inode, const char *name, 35ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
36 void *buffer, size_t size) 36 void *buffer, size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, 40 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext3_xattr_trusted_set(struct inode *inode, const char *name, 45ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, 50 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
51 value, size, flags); 51 value, size, flags);
52} 52}
53 53
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b3..3bcfe9ee0a68 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext3_xattr_user_get(struct inode *inode, const char *name, 34ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
35 void *buffer, size_t size) 35 size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size); 41 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext3_xattr_user_set(struct inode *inode, const char *name, 46ext3_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, 53 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext3_xattr_user_handler = { 57struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e4..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
2 tristate "The Extended 4 (ext4) filesystem" 2 tristate "The Extended 4 (ext4) filesystem"
3 select JBD2 3 select JBD2
4 select CRC16 4 select CRC16
5 select FS_JOURNAL_INFO
6 help 5 help
7 This is the next generation of the ext3 filesystem. 6 This is the next generation of the ext3 filesystem.
8 7
@@ -29,6 +28,7 @@ config EXT4_FS
29 28
30config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
31 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
32 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
33 default y 33 default y
34 help 34 help
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
364 * Extended attribute handlers 364 * Extended attribute handlers
365 */ 365 */
366static size_t 366static size_t
367ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 367ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
368 const char *name, size_t name_len) 368 const char *name, size_t name_len, int type)
369{ 369{
370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
371 371
372 if (!test_opt(inode->i_sb, POSIX_ACL)) 372 if (!test_opt(dentry->d_sb, POSIX_ACL))
373 return 0; 373 return 0;
374 if (list && size <= list_len) 374 if (list && size <= list_len)
375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
377} 377}
378 378
379static size_t 379static size_t
380ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 380ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
381 const char *name, size_t name_len) 381 const char *name, size_t name_len, int type)
382{ 382{
383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
384 384
385 if (!test_opt(inode->i_sb, POSIX_ACL)) 385 if (!test_opt(dentry->d_sb, POSIX_ACL))
386 return 0; 386 return 0;
387 if (list && size <= list_len) 387 if (list && size <= list_len)
388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
390} 390}
391 391
392static int 392static int
393ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 393ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
394 size_t size, int type)
394{ 395{
395 struct posix_acl *acl; 396 struct posix_acl *acl;
396 int error; 397 int error;
397 398
398 if (!test_opt(inode->i_sb, POSIX_ACL)) 399 if (strcmp(name, "") != 0)
400 return -EINVAL;
401 if (!test_opt(dentry->d_sb, POSIX_ACL))
399 return -EOPNOTSUPP; 402 return -EOPNOTSUPP;
400 403
401 acl = ext4_get_acl(inode, type); 404 acl = ext4_get_acl(dentry->d_inode, type);
402 if (IS_ERR(acl)) 405 if (IS_ERR(acl))
403 return PTR_ERR(acl); 406 return PTR_ERR(acl);
404 if (acl == NULL) 407 if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
410} 413}
411 414
412static int 415static int
413ext4_xattr_get_acl_access(struct inode *inode, const char *name, 416ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
414 void *buffer, size_t size) 417 size_t size, int flags, int type)
415{
416 if (strcmp(name, "") != 0)
417 return -EINVAL;
418 return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
419}
420
421static int
422ext4_xattr_get_acl_default(struct inode *inode, const char *name,
423 void *buffer, size_t size)
424{
425 if (strcmp(name, "") != 0)
426 return -EINVAL;
427 return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
428}
429
430static int
431ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
432 size_t size)
433{ 418{
419 struct inode *inode = dentry->d_inode;
434 handle_t *handle; 420 handle_t *handle;
435 struct posix_acl *acl; 421 struct posix_acl *acl;
436 int error, retries = 0; 422 int error, retries = 0;
437 423
424 if (strcmp(name, "") != 0)
425 return -EINVAL;
438 if (!test_opt(inode->i_sb, POSIX_ACL)) 426 if (!test_opt(inode->i_sb, POSIX_ACL))
439 return -EOPNOTSUPP; 427 return -EOPNOTSUPP;
440 if (!is_owner_or_cap(inode)) 428 if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
466 return error; 454 return error;
467} 455}
468 456
469static int
470ext4_xattr_set_acl_access(struct inode *inode, const char *name,
471 const void *value, size_t size, int flags)
472{
473 if (strcmp(name, "") != 0)
474 return -EINVAL;
475 return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
476}
477
478static int
479ext4_xattr_set_acl_default(struct inode *inode, const char *name,
480 const void *value, size_t size, int flags)
481{
482 if (strcmp(name, "") != 0)
483 return -EINVAL;
484 return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
485}
486
487struct xattr_handler ext4_xattr_acl_access_handler = { 457struct xattr_handler ext4_xattr_acl_access_handler = {
488 .prefix = POSIX_ACL_XATTR_ACCESS, 458 .prefix = POSIX_ACL_XATTR_ACCESS,
459 .flags = ACL_TYPE_ACCESS,
489 .list = ext4_xattr_list_acl_access, 460 .list = ext4_xattr_list_acl_access,
490 .get = ext4_xattr_get_acl_access, 461 .get = ext4_xattr_get_acl,
491 .set = ext4_xattr_set_acl_access, 462 .set = ext4_xattr_set_acl,
492}; 463};
493 464
494struct xattr_handler ext4_xattr_acl_default_handler = { 465struct xattr_handler ext4_xattr_acl_default_handler = {
495 .prefix = POSIX_ACL_XATTR_DEFAULT, 466 .prefix = POSIX_ACL_XATTR_DEFAULT,
467 .flags = ACL_TYPE_DEFAULT,
496 .list = ext4_xattr_list_acl_default, 468 .list = ext4_xattr_list_acl_default,
497 .get = ext4_xattr_get_acl_default, 469 .get = ext4_xattr_get_acl,
498 .set = ext4_xattr_set_acl_default, 470 .set = ext4_xattr_set_acl,
499}; 471};
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab31e65d46d0..874d169a193e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
361 so set the magic i_delalloc_reserve_flag after taking the 361 so set the magic i_delalloc_reserve_flag after taking the
362 inode allocation semaphore for */ 362 inode allocation semaphore for */
363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
364 /* Call ext4_da_update_reserve_space() after successfully
365 allocating the blocks */
366#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
367 /* caller is from the direct IO path, request to creation of an 364 /* caller is from the direct IO path, request to creation of an
368 unitialized extents if not allocated, split the uninitialized 365 unitialized extents if not allocated, split the uninitialized
369 extent if blocks has been preallocated already*/ 366 extent if blocks has been preallocated already*/
370#define EXT4_GET_BLOCKS_DIO 0x0010 367#define EXT4_GET_BLOCKS_DIO 0x0008
371#define EXT4_GET_BLOCKS_CONVERT 0x0020 368#define EXT4_GET_BLOCKS_CONVERT 0x0010
372#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 369#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\
373 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
374 /* Convert extent to initialized after direct IO complete */ 371 /* Convert extent to initialized after direct IO complete */
@@ -699,11 +696,17 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 696 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 697 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 698 unsigned short i_delalloc_reserved_flag;
699 sector_t i_da_metadata_calc_last_lblock;
700 int i_da_metadata_calc_len;
702 701
703 /* on-disk additional length */ 702 /* on-disk additional length */
704 __u16 i_extra_isize; 703 __u16 i_extra_isize;
705 704
706 spinlock_t i_block_reservation_lock; 705 spinlock_t i_block_reservation_lock;
706#ifdef CONFIG_QUOTA
707 /* quota space reservation, managed internally by quota code */
708 qsize_t i_reserved_quota;
709#endif
707 710
708 /* completed async DIOs that might need unwritten extents handling */ 711 /* completed async DIOs that might need unwritten extents handling */
709 struct list_head i_aio_dio_complete_list; 712 struct list_head i_aio_dio_complete_list;
@@ -1435,8 +1438,10 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1435extern int ext4_block_truncate_page(handle_t *handle, 1438extern int ext4_block_truncate_page(handle_t *handle,
1436 struct address_space *mapping, loff_t from); 1439 struct address_space *mapping, loff_t from);
1437extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1440extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1438extern qsize_t ext4_get_reserved_space(struct inode *inode); 1441extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1439extern int flush_aio_dio_completed_IO(struct inode *inode); 1442extern int flush_aio_dio_completed_IO(struct inode *inode);
1443extern void ext4_da_update_reserve_space(struct inode *inode,
1444 int used, int quota_claim);
1440/* ioctl.c */ 1445/* ioctl.c */
1441extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1446extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1442extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 1447extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..765a4826b118 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,30 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 allocated = max_blocks;
3136 }
3137
3138 /*
3139 * If we have done fallocate with the offset that is already
3140 * delayed allocated, we would have block reservation
3141 * and quota reservation done in the delayed write path.
3142 * But fallocate would have already updated quota and block
3143 * count for this offset. So cancel these reservation
3144 */
3145 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3146 ext4_da_update_reserve_space(inode, allocated, 0);
3147
3101map_out: 3148map_out:
3102 set_buffer_mapped(bh_result); 3149 set_buffer_mapped(bh_result);
3103out1: 3150out1:
@@ -3190,7 +3237,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3237 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3238 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3239 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3240 if (path[depth].p_ext == NULL && depth != 0) {
3241 ext4_error(inode->i_sb, __func__, "bad extent address "
3242 "inode: %lu, iblock: %d, depth: %d",
3243 inode->i_ino, iblock, depth);
3244 err = -EIO;
3245 goto out2;
3246 }
3194 eh = path[depth].p_hdr; 3247 eh = path[depth].p_hdr;
3195 3248
3196 ex = path[depth].p_ext; 3249 ex = path[depth].p_ext;
@@ -3327,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3327 /* previous routine could use block we allocated */ 3380 /* previous routine could use block we allocated */
3328 newblock = ext_pblock(&newex); 3381 newblock = ext_pblock(&newex);
3329 allocated = ext4_ext_get_actual_len(&newex); 3382 allocated = ext4_ext_get_actual_len(&newex);
3383 if (allocated > max_blocks)
3384 allocated = max_blocks;
3330 set_buffer_new(bh_result); 3385 set_buffer_new(bh_result);
3331 3386
3332 /* 3387 /*
3388 * Update reserved blocks/metadata blocks after successful
3389 * block allocation which had been deferred till now.
3390 */
3391 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3392 ext4_da_update_reserve_space(inode, allocated, 1);
3393
3394 /*
3333 * Cache the extent and update transaction to commit on fdatasync only 3395 * Cache the extent and update transaction to commit on fdatasync only
3334 * when it is _not_ an uninitialized extent. 3396 * when it is _not_ an uninitialized extent.
3335 */ 3397 */
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5352db1a3086..e11952404e02 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1003,92 +1003,120 @@ out:
1003 return err; 1003 return err;
1004} 1004}
1005 1005
1006qsize_t ext4_get_reserved_space(struct inode *inode) 1006#ifdef CONFIG_QUOTA
1007qsize_t *ext4_get_reserved_space(struct inode *inode)
1007{ 1008{
1008 unsigned long long total; 1009 return &EXT4_I(inode)->i_reserved_quota;
1009
1010 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1011 total = EXT4_I(inode)->i_reserved_data_blocks +
1012 EXT4_I(inode)->i_reserved_meta_blocks;
1013 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1014
1015 return (total << inode->i_blkbits);
1016} 1010}
1011#endif
1012
1017/* 1013/*
1018 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1019 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1020 */ 1016 */
1021static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1022{ 1019{
1023 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1024 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1025 1022 int blk_bits;
1026 /* number of new indirect blocks needed */
1027 ind_blks = (blocks + icap - 1) / icap;
1028 1023
1029 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1030 1026
1031 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1032 1028
1033 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1034} 1038}
1035 1039
1036/* 1040/*
1037 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1038 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1039 */ 1043 */
1040static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1041{ 1045{
1042 if (!blocks)
1043 return 0;
1044
1045 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1046 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1047 1048
1048 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1049} 1050}
1050 1051
1051static void ext4_da_update_reserve_space(struct inode *inode, int used) 1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1056void ext4_da_update_reserve_space(struct inode *inode,
1057 int used, int quota_claim)
1052{ 1058{
1053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1059 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1054 int total, mdb, mdb_free; 1060 struct ext4_inode_info *ei = EXT4_I(inode);
1055 1061 int mdb_free = 0, allocated_meta_blocks = 0;
1056 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1062
1057 /* recalculate the number of metablocks still need to be reserved */ 1063 spin_lock(&ei->i_block_reservation_lock);
1058 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1064 if (unlikely(used > ei->i_reserved_data_blocks)) {
1059 mdb = ext4_calc_metadata_amount(inode, total); 1065 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1060 1066 "with only %d reserved data blocks\n",
1061 /* figure out how many metablocks to release */ 1067 __func__, inode->i_ino, used,
1062 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1068 ei->i_reserved_data_blocks);
1063 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1069 WARN_ON(1);
1064 1070 used = ei->i_reserved_data_blocks;
1065 if (mdb_free) { 1071 }
1066 /* Account for allocated meta_blocks */ 1072
1067 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1073 /* Update per-inode reservations */
1068 1074 ei->i_reserved_data_blocks -= used;
1069 /* update fs dirty blocks counter */ 1075 used += ei->i_allocated_meta_blocks;
1076 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1077 allocated_meta_blocks = ei->i_allocated_meta_blocks;
1078 ei->i_allocated_meta_blocks = 0;
1079 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1080
1081 if (ei->i_reserved_data_blocks == 0) {
1082 /*
1083 * We can release all of the reserved metadata blocks
1084 * only when we have written all of the delayed
1085 * allocation blocks.
1086 */
1087 mdb_free = ei->i_reserved_meta_blocks;
1088 ei->i_reserved_meta_blocks = 0;
1089 ei->i_da_metadata_calc_len = 0;
1070 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1090 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1071 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1072 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1073 } 1091 }
1074
1075 /* update per-inode reservations */
1076 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1077 EXT4_I(inode)->i_reserved_data_blocks -= used;
1078 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1092 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1079 1093
1080 /* 1094 /* Update quota subsystem */
1081 * free those over-booking quota for metadata blocks 1095 if (quota_claim) {
1082 */ 1096 vfs_dq_claim_block(inode, used);
1083 if (mdb_free) 1097 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1098 vfs_dq_release_reservation_block(inode, mdb_free);
1099 } else {
1100 /*
1101 * We did fallocate with an offset that is already delayed
1102 * allocated. So on delayed allocated writeback we should
1103 * not update the quota for allocated blocks. But then
1104 * converting an fallocate region to initialized region would
1105 * have caused a metadata allocation. So claim quota for
1106 * that
1107 */
1108 if (allocated_meta_blocks)
1109 vfs_dq_claim_block(inode, allocated_meta_blocks);
1110 vfs_dq_release_reservation_block(inode, mdb_free + used);
1111 }
1085 1112
1086 /* 1113 /*
1087 * If we have done all the pending block allocations and if 1114 * If we have done all the pending block allocations and if
1088 * there aren't any writers on the inode, we can discard the 1115 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1116 * inode's preallocations.
1090 */ 1117 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1118 if ((ei->i_reserved_data_blocks == 0) &&
1119 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1120 ext4_discard_preallocations(inode);
1093} 1121}
1094 1122
@@ -1280,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1280 */ 1308 */
1281 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
1282 } 1310 }
1283 }
1284 1311
1312 /*
1313 * Update reserved blocks/metadata blocks after successful
1314 * block allocation which had been deferred till now. We don't
1315 * support fallocate for non extent files. So we can update
1316 * reserve space here.
1317 */
1318 if ((retval > 0) &&
1319 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
1320 ext4_da_update_reserve_space(inode, retval, 1);
1321 }
1285 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1322 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1286 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1323 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1287 1324
1288 /*
1289 * Update reserved blocks/metadata blocks after successful
1290 * block allocation which had been deferred till now.
1291 */
1292 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1293 ext4_da_update_reserve_space(inode, retval);
1294
1295 up_write((&EXT4_I(inode)->i_data_sem)); 1325 up_write((&EXT4_I(inode)->i_data_sem));
1296 if (retval > 0 && buffer_mapped(bh)) { 1326 if (retval > 0 && buffer_mapped(bh)) {
1297 int ret = check_block_validity(inode, "file system " 1327 int ret = check_block_validity(inode, "file system "
@@ -1797,11 +1827,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1827 return ret ? ret : copied;
1798} 1828}
1799 1829
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1830/*
1831 * Reserve a single block located at lblock
1832 */
1833static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1834{
1802 int retries = 0; 1835 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1836 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1837 struct ext4_inode_info *ei = EXT4_I(inode);
1838 unsigned long md_needed, md_reserved;
1805 1839
1806 /* 1840 /*
1807 * recalculate the amount of metadata blocks to reserve 1841 * recalculate the amount of metadata blocks to reserve
@@ -1809,86 +1843,78 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1843 * worse case is one extent per block
1810 */ 1844 */
1811repeat: 1845repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1846 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1847 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1848 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1849 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 1850
1820 /* 1851 /*
1821 * Make quota reservation here to prevent quota overflow 1852 * Make quota reservation here to prevent quota overflow
1822 * later. Real quota accounting is done at pages writeout 1853 * later. Real quota accounting is done at pages writeout
1823 * time. 1854 * time.
1824 */ 1855 */
1825 if (vfs_dq_reserve_block(inode, total)) { 1856 if (vfs_dq_reserve_block(inode, md_needed + 1))
1826 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1827 return -EDQUOT; 1857 return -EDQUOT;
1828 }
1829 1858
1830 if (ext4_claim_free_blocks(sbi, total)) { 1859 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1831 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1860 vfs_dq_release_reservation_block(inode, md_needed + 1);
1832 vfs_dq_release_reservation_block(inode, total);
1833 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1861 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1834 yield(); 1862 yield();
1835 goto repeat; 1863 goto repeat;
1836 } 1864 }
1837 return -ENOSPC; 1865 return -ENOSPC;
1838 } 1866 }
1839 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1867 spin_lock(&ei->i_block_reservation_lock);
1840 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1868 ei->i_reserved_data_blocks++;
1869 ei->i_reserved_meta_blocks += md_needed;
1870 spin_unlock(&ei->i_block_reservation_lock);
1841 1871
1842 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1843 return 0; /* success */ 1872 return 0; /* success */
1844} 1873}
1845 1874
1846static void ext4_da_release_space(struct inode *inode, int to_free) 1875static void ext4_da_release_space(struct inode *inode, int to_free)
1847{ 1876{
1848 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1877 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1849 int total, mdb, mdb_free, release; 1878 struct ext4_inode_info *ei = EXT4_I(inode);
1850 1879
1851 if (!to_free) 1880 if (!to_free)
1852 return; /* Nothing to release, exit */ 1881 return; /* Nothing to release, exit */
1853 1882
1854 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1883 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1855 1884
1856 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1885 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1857 /* 1886 /*
1858 * if there is no reserved blocks, but we try to free some 1887 * if there aren't enough reserved blocks, then the
1859 * then the counter is messed up somewhere. 1888 * counter is messed up somewhere. Since this
1860 * but since this function is called from invalidate 1889 * function is called from invalidate page, it's
1861 * page, it's harmless to return without any action 1890 * harmless to return without any action.
1862 */ 1891 */
1863 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1892 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1864 "blocks for inode %lu, but there is no reserved " 1893 "ino %lu, to_free %d with only %d reserved "
1865 "data blocks\n", to_free, inode->i_ino); 1894 "data blocks\n", inode->i_ino, to_free,
1866 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1895 ei->i_reserved_data_blocks);
1867 return; 1896 WARN_ON(1);
1897 to_free = ei->i_reserved_data_blocks;
1868 } 1898 }
1899 ei->i_reserved_data_blocks -= to_free;
1869 1900
1870 /* recalculate the number of metablocks still need to be reserved */ 1901 if (ei->i_reserved_data_blocks == 0) {
1871 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1902 /*
1872 mdb = ext4_calc_metadata_amount(inode, total); 1903 * We can release all of the reserved metadata blocks
1873 1904 * only when we have written all of the delayed
1874 /* figure out how many metablocks to release */ 1905 * allocation blocks.
1875 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1906 */
1876 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1907 to_free += ei->i_reserved_meta_blocks;
1877 1908 ei->i_reserved_meta_blocks = 0;
1878 release = to_free + mdb_free; 1909 ei->i_da_metadata_calc_len = 0;
1879 1910 }
1880 /* update fs dirty blocks counter for truncate case */
1881 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1882 1911
1883 /* update per-inode reservations */ 1912 /* update fs dirty blocks counter */
1884 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1913 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1885 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1886 1914
1887 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1888 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1889 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1915 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1890 1916
1891 vfs_dq_release_reservation_block(inode, release); 1917 vfs_dq_release_reservation_block(inode, to_free);
1892} 1918}
1893 1919
1894static void ext4_da_page_release_reservation(struct page *page, 1920static void ext4_da_page_release_reservation(struct page *page,
@@ -2193,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2193 * variables are updated after the blocks have been allocated. 2219 * variables are updated after the blocks have been allocated.
2194 */ 2220 */
2195 new.b_state = 0; 2221 new.b_state = 0;
2196 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2222 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2197 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2198 if (mpd->b_state & (1 << BH_Delay)) 2223 if (mpd->b_state & (1 << BH_Delay))
2199 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2224 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2225
2200 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2226 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2201 &new, get_blocks_flags); 2227 &new, get_blocks_flags);
2202 if (blks < 0) { 2228 if (blks < 0) {
@@ -2494,7 +2520,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2494 * XXX: __block_prepare_write() unmaps passed block, 2520 * XXX: __block_prepare_write() unmaps passed block,
2495 * is it OK? 2521 * is it OK?
2496 */ 2522 */
2497 ret = ext4_da_reserve_space(inode, 1); 2523 ret = ext4_da_reserve_space(inode, iblock);
2498 if (ret) 2524 if (ret)
2499 /* not enough space to reserve */ 2525 /* not enough space to reserve */
2500 return ret; 2526 return ret;
@@ -2968,8 +2994,7 @@ retry:
2968out_writepages: 2994out_writepages:
2969 if (!no_nrwrite_index_update) 2995 if (!no_nrwrite_index_update)
2970 wbc->no_nrwrite_index_update = 0; 2996 wbc->no_nrwrite_index_update = 0;
2971 if (wbc->nr_to_write > nr_to_writebump) 2997 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->nr_to_write -= nr_to_writebump;
2973 wbc->range_start = range_start; 2998 wbc->range_start = range_start;
2974 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2999 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2975 return ret; 3000 return ret;
@@ -2994,11 +3019,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2994 if (2 * free_blocks < 3 * dirty_blocks || 3019 if (2 * free_blocks < 3 * dirty_blocks ||
2995 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3020 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2996 /* 3021 /*
2997 * free block count is less that 150% of dirty blocks 3022 * free block count is less than 150% of dirty blocks
2998 * or free blocks is less that watermark 3023 * or free blocks is less than watermark
2999 */ 3024 */
3000 return 1; 3025 return 1;
3001 } 3026 }
3027 /*
3028 * Even if we don't switch but are nearing capacity,
3029 * start pushing delalloc when 1/2 of free blocks are dirty.
3030 */
3031 if (free_blocks < 2 * dirty_blocks)
3032 writeback_inodes_sb_if_idle(sb);
3033
3002 return 0; 3034 return 0;
3003} 3035}
3004 3036
@@ -3006,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3006 loff_t pos, unsigned len, unsigned flags, 3038 loff_t pos, unsigned len, unsigned flags,
3007 struct page **pagep, void **fsdata) 3039 struct page **pagep, void **fsdata)
3008{ 3040{
3009 int ret, retries = 0; 3041 int ret, retries = 0, quota_retries = 0;
3010 struct page *page; 3042 struct page *page;
3011 pgoff_t index; 3043 pgoff_t index;
3012 unsigned from, to; 3044 unsigned from, to;
@@ -3065,6 +3097,22 @@ retry:
3065 3097
3066 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3098 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3067 goto retry; 3099 goto retry;
3100
3101 if ((ret == -EDQUOT) &&
3102 EXT4_I(inode)->i_reserved_meta_blocks &&
3103 (quota_retries++ < 3)) {
3104 /*
3105 * Since we often over-estimate the number of meta
3106 * data blocks required, we may sometimes get a
3107 * spurios out of quota error even though there would
3108 * be enough space once we write the data blocks and
3109 * find out how many meta data blocks were _really_
3110 * required. So try forcing the inode write to see if
3111 * that helps.
3112 */
3113 write_inode_now(inode, (quota_retries == 3));
3114 goto retry;
3115 }
3068out: 3116out:
3069 return ret; 3117 return ret;
3070} 3118}
@@ -4794,6 +4842,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4794 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4842 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4795 inode->i_size = ext4_isize(raw_inode); 4843 inode->i_size = ext4_isize(raw_inode);
4796 ei->i_disksize = inode->i_size; 4844 ei->i_disksize = inode->i_size;
4845#ifdef CONFIG_QUOTA
4846 ei->i_reserved_quota = 0;
4847#endif
4797 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4848 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4798 ei->i_block_group = iloc.block_group; 4849 ei->i_block_group = iloc.block_group;
4799 ei->i_last_alloc_group = ~0; 4850 ei->i_last_alloc_group = ~0;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1fd3daadc9c..d34afad3e137 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2755,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2755 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2756 /* release all the reserved blocks if non delalloc */ 2756 /* release all the reserved blocks if non delalloc */
2757 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2757 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2758 else {
2759 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2760 ac->ac_b_ex.fe_len);
2761 /* convert reserved quota blocks to real quota blocks */
2762 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2763 }
2764 2758
2765 if (sbi->s_log_groups_per_flex) { 2759 if (sbi->s_log_groups_per_flex) {
2766 ext4_group_t flex_group = ext4_flex_group(sbi, 2760 ext4_group_t flex_group = ext4_flex_group(sbi,
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 827bde1f2594..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,8 +702,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
708#ifdef CONFIG_QUOTA
709 ei->i_reserved_quota = 0;
710#endif
707 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 711 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
708 ei->cur_aio_dio = NULL; 712 ei->cur_aio_dio = NULL;
709 ei->i_sync_tid = 0; 713 ei->i_sync_tid = 0;
@@ -1014,7 +1018,9 @@ static const struct dquot_operations ext4_quota_operations = {
1014 .reserve_space = dquot_reserve_space, 1018 .reserve_space = dquot_reserve_space,
1015 .claim_space = dquot_claim_space, 1019 .claim_space = dquot_claim_space,
1016 .release_rsv = dquot_release_reserved_space, 1020 .release_rsv = dquot_release_reserved_space,
1021#ifdef CONFIG_QUOTA
1017 .get_reserved_space = ext4_get_reserved_space, 1022 .get_reserved_space = ext4_get_reserved_space,
1023#endif
1018 .alloc_inode = dquot_alloc_inode, 1024 .alloc_inode = dquot_alloc_inode,
1019 .free_space = dquot_free_space, 1025 .free_space = dquot_free_space,
1020 .free_inode = dquot_free_inode, 1026 .free_inode = dquot_free_inode,
@@ -2169,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2169 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2170 2176
2171 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2172 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2173 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2174 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2175} 2181}
2176 2182
2177static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4000,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4000{ 4006{
4001 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4002} 4008}
4009MODULE_ALIAS("ext2");
4003#else 4010#else
4004static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4005static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4026,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4026{ 4033{
4027 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4028} 4035}
4036MODULE_ALIAS("ext3");
4029#else 4037#else
4030static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4031static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb3..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer, 95static int ext4_xattr_list(struct dentry *dentry, char *buffer,
96 size_t buffer_size); 96 size_t buffer_size);
97 97
98static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
140ssize_t 140ssize_t
141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) 141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
142{ 142{
143 return ext4_xattr_list(dentry->d_inode, buffer, size); 143 return ext4_xattr_list(dentry, buffer, size);
144} 144}
145 145
146static int 146static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
325} 325}
326 326
327static int 327static int
328ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, 328ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
329 char *buffer, size_t buffer_size) 329 char *buffer, size_t buffer_size)
330{ 330{
331 size_t rest = buffer_size; 331 size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
335 ext4_xattr_handler(entry->e_name_index); 335 ext4_xattr_handler(entry->e_name_index);
336 336
337 if (handler) { 337 if (handler) {
338 size_t size = handler->list(inode, buffer, rest, 338 size_t size = handler->list(dentry, buffer, rest,
339 entry->e_name, 339 entry->e_name,
340 entry->e_name_len); 340 entry->e_name_len,
341 handler->flags);
341 if (buffer) { 342 if (buffer) {
342 if (size > rest) 343 if (size > rest)
343 return -ERANGE; 344 return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
350} 351}
351 352
352static int 353static int
353ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 354ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
354{ 355{
356 struct inode *inode = dentry->d_inode;
355 struct buffer_head *bh = NULL; 357 struct buffer_head *bh = NULL;
356 int error; 358 int error;
357 359
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
376 goto cleanup; 378 goto cleanup;
377 } 379 }
378 ext4_xattr_cache_insert(bh); 380 ext4_xattr_cache_insert(bh);
379 error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 381 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
380 382
381cleanup: 383cleanup:
382 brelse(bh); 384 brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
385} 387}
386 388
387static int 389static int
388ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 390ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
389{ 391{
392 struct inode *inode = dentry->d_inode;
390 struct ext4_xattr_ibody_header *header; 393 struct ext4_xattr_ibody_header *header;
391 struct ext4_inode *raw_inode; 394 struct ext4_inode *raw_inode;
392 struct ext4_iloc iloc; 395 struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
404 error = ext4_xattr_check_names(IFIRST(header), end); 407 error = ext4_xattr_check_names(IFIRST(header), end);
405 if (error) 408 if (error)
406 goto cleanup; 409 goto cleanup;
407 error = ext4_xattr_list_entries(inode, IFIRST(header), 410 error = ext4_xattr_list_entries(dentry, IFIRST(header),
408 buffer, buffer_size); 411 buffer, buffer_size);
409 412
410cleanup: 413cleanup:
@@ -423,12 +426,12 @@ cleanup:
423 * used / required on success. 426 * used / required on success.
424 */ 427 */
425static int 428static int
426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 429ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427{ 430{
428 int i_error, b_error; 431 int i_error, b_error;
429 432
430 down_read(&EXT4_I(inode)->xattr_sem); 433 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
431 i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); 434 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
432 if (i_error < 0) { 435 if (i_error < 0) {
433 b_error = 0; 436 b_error = 0;
434 } else { 437 } else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
436 buffer += i_error; 439 buffer += i_error;
437 buffer_size -= i_error; 440 buffer_size -= i_error;
438 } 441 }
439 b_error = ext4_xattr_block_list(inode, buffer, buffer_size); 442 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
440 if (b_error < 0) 443 if (b_error < 0)
441 i_error = 0; 444 i_error = 0;
442 } 445 }
443 up_read(&EXT4_I(inode)->xattr_sem); 446 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
444 return i_error + b_error; 447 return i_error + b_error;
445} 448}
446 449
@@ -1329,6 +1332,8 @@ retry:
1329 goto cleanup; 1332 goto cleanup;
1330 kfree(b_entry_name); 1333 kfree(b_entry_name);
1331 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1332 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1333 kfree(is); 1338 kfree(is);
1334 kfree(bs); 1339 kfree(bs);
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..983c253999a7 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext4_xattr_security_get(struct inode *inode, const char *name, 31ext4_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, 36 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext4_xattr_security_set(struct inode *inode, const char *name, 41ext4_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, 46 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext4_xattr_trusted_get(struct inode *inode, const char *name, 35ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
36 void *buffer, size_t size) 36 size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, 40 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext4_xattr_trusted_set(struct inode *inode, const char *name, 45ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, 50 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
51 value, size, flags); 51 name, value, size, flags);
52} 52}
53 53
54struct xattr_handler ext4_xattr_trusted_handler = { 54struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext4_xattr_user_get(struct inode *inode, const char *name, 34ext4_xattr_user_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); 41 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext4_xattr_user_set(struct inode *inode, const char *name, 46ext4_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, 53 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext4_xattr_user_handler = { 57struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7db0979c6b72..e6efdfa0f6db 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -44,7 +44,8 @@ struct fat_mount_options {
44 nocase:1, /* Does this need case conversion? 0=need case conversion*/ 44 nocase:1, /* Does this need case conversion? 0=need case conversion*/
45 usefree:1, /* Use free_clusters for FAT32 */ 45 usefree:1, /* Use free_clusters for FAT32 */
46 tz_utc:1, /* Filesystem timestamps are in UTC */ 46 tz_utc:1, /* Filesystem timestamps are in UTC */
47 rodir:1; /* allow ATTR_RO for directory */ 47 rodir:1, /* allow ATTR_RO for directory */
48 discard:1; /* Issue discard requests on deletions */
48}; 49};
49 50
50#define FAT_HASH_BITS 8 51#define FAT_HASH_BITS 8
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a81037721a6f..81184d3b75a3 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -566,16 +566,21 @@ int fat_free_clusters(struct inode *inode, int cluster)
566 goto error; 566 goto error;
567 } 567 }
568 568
569 /* 569 if (sbi->options.discard) {
570 * Issue discard for the sectors we no longer care about, 570 /*
571 * batching contiguous clusters into one request 571 * Issue discard for the sectors we no longer
572 */ 572 * care about, batching contiguous clusters
573 if (cluster != fatent.entry + 1) { 573 * into one request
574 int nr_clus = fatent.entry - first_cl + 1; 574 */
575 575 if (cluster != fatent.entry + 1) {
576 sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), 576 int nr_clus = fatent.entry - first_cl + 1;
577 nr_clus * sbi->sec_per_clus); 577
578 first_cl = cluster; 578 sb_issue_discard(sb,
579 fat_clus_to_blknr(sbi, first_cl),
580 nr_clus * sbi->sec_per_clus);
581
582 first_cl = cluster;
583 }
579 } 584 }
580 585
581 ops->ent_put(&fatent, FAT_ENT_FREE); 586 ops->ent_put(&fatent, FAT_ENT_FREE);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 76b7961ab663..14da530b05ca 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -858,6 +858,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
858 seq_puts(m, ",errors=panic"); 858 seq_puts(m, ",errors=panic");
859 else 859 else
860 seq_puts(m, ",errors=remount-ro"); 860 seq_puts(m, ",errors=remount-ro");
861 if (opts->discard)
862 seq_puts(m, ",discard");
861 863
862 return 0; 864 return 0;
863} 865}
@@ -871,7 +873,7 @@ enum {
871 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 873 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
872 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 874 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
873 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 875 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
874 Opt_err_panic, Opt_err_ro, Opt_err, 876 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
875}; 877};
876 878
877static const match_table_t fat_tokens = { 879static const match_table_t fat_tokens = {
@@ -899,6 +901,7 @@ static const match_table_t fat_tokens = {
899 {Opt_err_cont, "errors=continue"}, 901 {Opt_err_cont, "errors=continue"},
900 {Opt_err_panic, "errors=panic"}, 902 {Opt_err_panic, "errors=panic"},
901 {Opt_err_ro, "errors=remount-ro"}, 903 {Opt_err_ro, "errors=remount-ro"},
904 {Opt_discard, "discard"},
902 {Opt_obsolate, "conv=binary"}, 905 {Opt_obsolate, "conv=binary"},
903 {Opt_obsolate, "conv=text"}, 906 {Opt_obsolate, "conv=text"},
904 {Opt_obsolate, "conv=auto"}, 907 {Opt_obsolate, "conv=auto"},
@@ -1136,6 +1139,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1136 case Opt_rodir: 1139 case Opt_rodir:
1137 opts->rodir = 1; 1140 opts->rodir = 1;
1138 break; 1141 break;
1142 case Opt_discard:
1143 opts->discard = 1;
1144 break;
1139 1145
1140 /* obsolete mount options */ 1146 /* obsolete mount options */
1141 case Opt_obsolate: 1147 case Opt_obsolate:
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a67..97e01dc0d95f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 618static struct kmem_cache *fasync_cache __read_mostly;
619 619
620/* 620/*
621 * fasync_helper() is used by almost all character device drivers 621 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 622 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 623 * do nothing and return 0.
624 *
625 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list".
627 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
624 */ 630 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 632{
627 struct fasync_struct *fa, **fp; 633 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 634 int result = 0;
630 635
631 if (on) { 636 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 637 write_lock_irq(&fasync_lock);
633 if (!new) 638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 639 if (fa->fa_file != filp)
640 continue;
641 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa);
643 filp->f_flags &= ~FASYNC;
644 result = 1;
645 break;
635 } 646 }
647 write_unlock_irq(&fasync_lock);
648 spin_unlock(&filp->f_lock);
649 return result;
650}
651
652/*
653 * Add a fasync entry. Return negative on error, positive if
654 * added, and zero if did nothing but change an existing one.
655 *
656 * NOTE! It is very important that the FASYNC flag always
657 * match the state "is the filp on a fasync list".
658 */
659static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660{
661 struct fasync_struct *new, *fa, **fp;
662 int result = 0;
663
664 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
665 if (!new)
666 return -ENOMEM;
636 667
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 668 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 669 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 671 if (fa->fa_file != filp)
645 if(on) { 672 continue;
646 fa->fa_fd = fd; 673 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 674 kmem_cache_free(fasync_cache, new);
648 } else { 675 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 676 }
656 677
657 if (on) { 678 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 679 new->fa_file = filp;
659 new->fa_file = filp; 680 new->fa_fd = fd;
660 new->fa_fd = fd; 681 new->fa_next = *fapp;
661 new->fa_next = *fapp; 682 *fapp = new;
662 *fapp = new; 683 result = 1;
663 result = 1; 684 filp->f_flags |= FASYNC;
664 } 685
665out: 686out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 687 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 688 spin_unlock(&filp->f_lock);
672 return result; 689 return result;
673} 690}
674 691
692/*
693 * fasync_helper() is used by almost all character device drivers
694 * to set up the fasync queue, and for regular files by the file
695 * lease code. It returns negative on error, 0 if it did no changes
696 * and positive if it added/deleted the entry.
697 */
698int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
699{
700 if (!on)
701 return fasync_remove_entry(filp, fapp);
702 return fasync_add_entry(fd, filp, fapp);
703}
704
675EXPORT_SYMBOL(fasync_helper); 705EXPORT_SYMBOL(fasync_helper);
676 706
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 707void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/file_table.c b/fs/file_table.c
index 4bef4c01ec6f..b98404b54383 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -21,9 +21,12 @@
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu_counter.h> 23#include <linux/percpu_counter.h>
24#include <linux/ima.h>
24 25
25#include <asm/atomic.h> 26#include <asm/atomic.h>
26 27
28#include "internal.h"
29
27/* sysctl tunables... */ 30/* sysctl tunables... */
28struct files_stat_struct files_stat = { 31struct files_stat_struct files_stat = {
29 .max_files = NR_FILE 32 .max_files = NR_FILE
@@ -147,8 +150,6 @@ fail:
147 return NULL; 150 return NULL;
148} 151}
149 152
150EXPORT_SYMBOL(get_empty_filp);
151
152/** 153/**
153 * alloc_file - allocate and initialize a 'struct file' 154 * alloc_file - allocate and initialize a 'struct file'
154 * @mnt: the vfsmount on which the file will reside 155 * @mnt: the vfsmount on which the file will reside
@@ -164,8 +165,8 @@ EXPORT_SYMBOL(get_empty_filp);
164 * If all the callers of init_file() are eliminated, its 165 * If all the callers of init_file() are eliminated, its
165 * code should be moved into this function. 166 * code should be moved into this function.
166 */ 167 */
167struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, 168struct file *alloc_file(struct path *path, fmode_t mode,
168 fmode_t mode, const struct file_operations *fop) 169 const struct file_operations *fop)
169{ 170{
170 struct file *file; 171 struct file *file;
171 172
@@ -173,35 +174,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
173 if (!file) 174 if (!file)
174 return NULL; 175 return NULL;
175 176
176 init_file(file, mnt, dentry, mode, fop); 177 file->f_path = *path;
177 return file; 178 file->f_mapping = path->dentry->d_inode->i_mapping;
178}
179EXPORT_SYMBOL(alloc_file);
180
181/**
182 * init_file - initialize a 'struct file'
183 * @file: the already allocated 'struct file' to initialized
184 * @mnt: the vfsmount on which the file resides
185 * @dentry: the dentry representing this file
186 * @mode: the mode the file is opened with
187 * @fop: the 'struct file_operations' for this file
188 *
189 * Use this instead of setting the members directly. Doing so
190 * avoids making mistakes like forgetting the mntget() or
191 * forgetting to take a write on the mnt.
192 *
193 * Note: This is a crappy interface. It is here to make
194 * merging with the existing users of get_empty_filp()
195 * who have complex failure logic easier. All users
196 * of this should be moving to alloc_file().
197 */
198int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
199 fmode_t mode, const struct file_operations *fop)
200{
201 int error = 0;
202 file->f_path.dentry = dentry;
203 file->f_path.mnt = mntget(mnt);
204 file->f_mapping = dentry->d_inode->i_mapping;
205 file->f_mode = mode; 179 file->f_mode = mode;
206 file->f_op = fop; 180 file->f_op = fop;
207 181
@@ -211,14 +185,14 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
211 * visible. We do this for consistency, and so 185 * visible. We do this for consistency, and so
212 * that we can do debugging checks at __fput() 186 * that we can do debugging checks at __fput()
213 */ 187 */
214 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { 188 if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
215 file_take_write(file); 189 file_take_write(file);
216 error = mnt_clone_write(mnt); 190 WARN_ON(mnt_clone_write(path->mnt));
217 WARN_ON(error);
218 } 191 }
219 return error; 192 ima_counts_get(file);
193 return file;
220} 194}
221EXPORT_SYMBOL(init_file); 195EXPORT_SYMBOL(alloc_file);
222 196
223void fput(struct file *file) 197void fput(struct file *file)
224{ 198{
@@ -279,6 +253,7 @@ void __fput(struct file *file)
279 if (file->f_op && file->f_op->release) 253 if (file->f_op && file->f_op->release)
280 file->f_op->release(inode, file); 254 file->f_op->release(inode, file);
281 security_file_free(file); 255 security_file_free(file);
256 ima_file_free(file);
282 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 257 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
283 cdev_put(inode->i_cdev); 258 cdev_put(inode->i_cdev);
284 fops_put(file->f_op); 259 fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..1a7c42c64ff4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 242/**
243 * bdi_start_writeback - start writeback 243 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 244 * @bdi: the backing device to write from
245 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 246 * @nr_pages: the number of pages to write
246 * 247 *
247 * Description: 248 * Description:
@@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1188EXPORT_SYMBOL(writeback_inodes_sb);
1188 1189
1189/** 1190/**
1191 * writeback_inodes_sb_if_idle - start writeback if none underway
1192 * @sb: the superblock
1193 *
1194 * Invoke writeback_inodes_sb if no writeback is currently underway.
1195 * Returns 1 if writeback was started, 0 if not.
1196 */
1197int writeback_inodes_sb_if_idle(struct super_block *sb)
1198{
1199 if (!writeback_in_progress(sb->s_bdi)) {
1200 writeback_inodes_sb(sb);
1201 return 1;
1202 } else
1203 return 0;
1204}
1205EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1206
1207/**
1190 * sync_inodes_sb - sync sb inode pages 1208 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1209 * @sb: the superblock
1192 * 1210 *
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c18913a777ae..a9f5e137f1d3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -828,6 +828,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
828 if (!page) 828 if (!page)
829 break; 829 break;
830 830
831 if (mapping_writably_mapped(mapping))
832 flush_dcache_page(page);
833
831 pagefault_disable(); 834 pagefault_disable();
832 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 835 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
833 pagefault_enable(); 836 pagefault_enable();
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbec..55458031e501 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
1/* 1/*
2 * fs/generic_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de> 2 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 * 3 *
6 * This file is released under the GPL. 4 * This file is released under the GPL.
5 *
6 * Generic ACL support for in-memory filesystems.
7 */ 7 */
8 8
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/generic_acl.h> 11#include <linux/generic_acl.h>
12#include <linux/posix_acl.h>
13#include <linux/posix_acl_xattr.h>
12 14
13/** 15
14 * generic_acl_list - Generic xattr_handler->list() operation 16static size_t
15 * @ops: Filesystem specific getacl and setacl callbacks 17generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
16 */ 18 const char *name, size_t name_len, int type)
17size_t
18generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
19 int type, char *list, size_t list_size)
20{ 19{
21 struct posix_acl *acl; 20 struct posix_acl *acl;
22 const char *name; 21 const char *xname;
23 size_t size; 22 size_t size;
24 23
25 acl = ops->getacl(inode, type); 24 acl = get_cached_acl(dentry->d_inode, type);
26 if (!acl) 25 if (!acl)
27 return 0; 26 return 0;
28 posix_acl_release(acl); 27 posix_acl_release(acl);
29 28
30 switch(type) { 29 switch (type) {
31 case ACL_TYPE_ACCESS: 30 case ACL_TYPE_ACCESS:
32 name = POSIX_ACL_XATTR_ACCESS; 31 xname = POSIX_ACL_XATTR_ACCESS;
33 break; 32 break;
34 33 case ACL_TYPE_DEFAULT:
35 case ACL_TYPE_DEFAULT: 34 xname = POSIX_ACL_XATTR_DEFAULT;
36 name = POSIX_ACL_XATTR_DEFAULT; 35 break;
37 break; 36 default:
38 37 return 0;
39 default:
40 return 0;
41 } 38 }
42 size = strlen(name) + 1; 39 size = strlen(xname) + 1;
43 if (list && size <= list_size) 40 if (list && size <= list_size)
44 memcpy(list, name, size); 41 memcpy(list, xname, size);
45 return size; 42 return size;
46} 43}
47 44
48/** 45static int
49 * generic_acl_get - Generic xattr_handler->get() operation 46generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
50 * @ops: Filesystem specific getacl and setacl callbacks 47 size_t size, int type)
51 */
52int
53generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
54 int type, void *buffer, size_t size)
55{ 48{
56 struct posix_acl *acl; 49 struct posix_acl *acl;
57 int error; 50 int error;
58 51
59 acl = ops->getacl(inode, type); 52 if (strcmp(name, "") != 0)
53 return -EINVAL;
54
55 acl = get_cached_acl(dentry->d_inode, type);
60 if (!acl) 56 if (!acl)
61 return -ENODATA; 57 return -ENODATA;
62 error = posix_acl_to_xattr(acl, buffer, size); 58 error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
65 return error; 61 return error;
66} 62}
67 63
68/** 64static int
69 * generic_acl_set - Generic xattr_handler->set() operation 65generic_acl_set(struct dentry *dentry, const char *name, const void *value,
70 * @ops: Filesystem specific getacl and setacl callbacks 66 size_t size, int flags, int type)
71 */
72int
73generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
74 int type, const void *value, size_t size)
75{ 67{
68 struct inode *inode = dentry->d_inode;
76 struct posix_acl *acl = NULL; 69 struct posix_acl *acl = NULL;
77 int error; 70 int error;
78 71
72 if (strcmp(name, "") != 0)
73 return -EINVAL;
79 if (S_ISLNK(inode->i_mode)) 74 if (S_ISLNK(inode->i_mode))
80 return -EOPNOTSUPP; 75 return -EOPNOTSUPP;
81 if (!is_owner_or_cap(inode)) 76 if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
91 error = posix_acl_valid(acl); 86 error = posix_acl_valid(acl);
92 if (error) 87 if (error)
93 goto failed; 88 goto failed;
94 switch(type) { 89 switch (type) {
95 case ACL_TYPE_ACCESS: 90 case ACL_TYPE_ACCESS:
96 mode = inode->i_mode; 91 mode = inode->i_mode;
97 error = posix_acl_equiv_mode(acl, &mode); 92 error = posix_acl_equiv_mode(acl, &mode);
98 if (error < 0) 93 if (error < 0)
99 goto failed; 94 goto failed;
100 inode->i_mode = mode; 95 inode->i_mode = mode;
101 if (error == 0) { 96 if (error == 0) {
102 posix_acl_release(acl); 97 posix_acl_release(acl);
103 acl = NULL; 98 acl = NULL;
104 } 99 }
105 break; 100 break;
106 101 case ACL_TYPE_DEFAULT:
107 case ACL_TYPE_DEFAULT: 102 if (!S_ISDIR(inode->i_mode)) {
108 if (!S_ISDIR(inode->i_mode)) { 103 error = -EINVAL;
109 error = -EINVAL; 104 goto failed;
110 goto failed; 105 }
111 } 106 break;
112 break;
113 } 107 }
114 } 108 }
115 ops->setacl(inode, type, acl); 109 set_cached_acl(inode, type, acl);
116 error = 0; 110 error = 0;
117failed: 111failed:
118 posix_acl_release(acl); 112 posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
121 115
122/** 116/**
123 * generic_acl_init - Take care of acl inheritance at @inode create time 117 * generic_acl_init - Take care of acl inheritance at @inode create time
124 * @ops: Filesystem specific getacl and setacl callbacks
125 * 118 *
126 * Files created inside a directory with a default ACL inherit the 119 * Files created inside a directory with a default ACL inherit the
127 * directory's default ACL. 120 * directory's default ACL.
128 */ 121 */
129int 122int
130generic_acl_init(struct inode *inode, struct inode *dir, 123generic_acl_init(struct inode *inode, struct inode *dir)
131 struct generic_acl_operations *ops)
132{ 124{
133 struct posix_acl *acl = NULL; 125 struct posix_acl *acl = NULL;
134 mode_t mode = inode->i_mode; 126 mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
136 128
137 inode->i_mode = mode & ~current_umask(); 129 inode->i_mode = mode & ~current_umask();
138 if (!S_ISLNK(inode->i_mode)) 130 if (!S_ISLNK(inode->i_mode))
139 acl = ops->getacl(dir, ACL_TYPE_DEFAULT); 131 acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
140 if (acl) { 132 if (acl) {
141 struct posix_acl *clone; 133 struct posix_acl *clone;
142 134
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
145 error = -ENOMEM; 137 error = -ENOMEM;
146 if (!clone) 138 if (!clone)
147 goto cleanup; 139 goto cleanup;
148 ops->setacl(inode, ACL_TYPE_DEFAULT, clone); 140 set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
149 posix_acl_release(clone); 141 posix_acl_release(clone);
150 } 142 }
151 clone = posix_acl_clone(acl, GFP_KERNEL); 143 clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
156 if (error >= 0) { 148 if (error >= 0) {
157 inode->i_mode = mode; 149 inode->i_mode = mode;
158 if (error > 0) 150 if (error > 0)
159 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 151 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
160 } 152 }
161 posix_acl_release(clone); 153 posix_acl_release(clone);
162 } 154 }
@@ -169,20 +161,19 @@ cleanup:
169 161
170/** 162/**
171 * generic_acl_chmod - change the access acl of @inode upon chmod() 163 * generic_acl_chmod - change the access acl of @inode upon chmod()
172 * @ops: FIlesystem specific getacl and setacl callbacks
173 * 164 *
174 * A chmod also changes the permissions of the owner, group/mask, and 165 * A chmod also changes the permissions of the owner, group/mask, and
175 * other ACL entries. 166 * other ACL entries.
176 */ 167 */
177int 168int
178generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) 169generic_acl_chmod(struct inode *inode)
179{ 170{
180 struct posix_acl *acl, *clone; 171 struct posix_acl *acl, *clone;
181 int error = 0; 172 int error = 0;
182 173
183 if (S_ISLNK(inode->i_mode)) 174 if (S_ISLNK(inode->i_mode))
184 return -EOPNOTSUPP; 175 return -EOPNOTSUPP;
185 acl = ops->getacl(inode, ACL_TYPE_ACCESS); 176 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
186 if (acl) { 177 if (acl) {
187 clone = posix_acl_clone(acl, GFP_KERNEL); 178 clone = posix_acl_clone(acl, GFP_KERNEL);
188 posix_acl_release(acl); 179 posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
190 return -ENOMEM; 181 return -ENOMEM;
191 error = posix_acl_chmod_masq(clone, inode->i_mode); 182 error = posix_acl_chmod_masq(clone, inode->i_mode);
192 if (!error) 183 if (!error)
193 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 184 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
194 posix_acl_release(clone); 185 posix_acl_release(clone);
195 } 186 }
196 return error; 187 return error;
197} 188}
189
190int
191generic_check_acl(struct inode *inode, int mask)
192{
193 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
194
195 if (acl) {
196 int error = posix_acl_permission(inode, acl, mask);
197 posix_acl_release(acl);
198 return error;
199 }
200 return -EAGAIN;
201}
202
203struct xattr_handler generic_acl_access_handler = {
204 .prefix = POSIX_ACL_XATTR_ACCESS,
205 .flags = ACL_TYPE_ACCESS,
206 .list = generic_acl_list,
207 .get = generic_acl_get,
208 .set = generic_acl_set,
209};
210
211struct xattr_handler generic_acl_default_handler = {
212 .prefix = POSIX_ACL_XATTR_DEFAULT,
213 .flags = ACL_TYPE_DEFAULT,
214 .list = generic_acl_list,
215 .get = generic_acl_get,
216 .set = generic_acl_set,
217};
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa6..4dcddf83326f 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
10 select SLOW_WORK 10 select SLOW_WORK
11 select QUOTA 11 select QUOTA
12 select QUOTACTL 12 select QUOTACTL
13 select FS_JOURNAL_INFO
14 help 13 help
15 A cluster filesystem. 14 A cluster filesystem.
16 15
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea846173..87ee309d4c24 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
126 error = posix_acl_to_xattr(acl, data, len); 126 error = posix_acl_to_xattr(acl, data, len);
127 if (error < 0) 127 if (error < 0)
128 goto out; 128 goto out;
129 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); 129 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
130 if (!error) 130 if (!error)
131 set_cached_acl(inode, type, acl); 131 set_cached_acl(inode, type, acl);
132out: 132out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
232 return -EINVAL; 232 return -EINVAL;
233} 233}
234 234
235static int gfs2_xattr_system_get(struct inode *inode, const char *name, 235static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
236 void *buffer, size_t size) 236 void *buffer, size_t size, int xtype)
237{ 237{
238 struct inode *inode = dentry->d_inode;
238 struct posix_acl *acl; 239 struct posix_acl *acl;
239 int type; 240 int type;
240 int error; 241 int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
255 return error; 256 return error;
256} 257}
257 258
258static int gfs2_xattr_system_set(struct inode *inode, const char *name, 259static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
259 const void *value, size_t size, int flags) 260 const void *value, size_t size, int flags,
261 int xtype)
260{ 262{
263 struct inode *inode = dentry->d_inode;
261 struct gfs2_sbd *sdp = GFS2_SB(inode); 264 struct gfs2_sbd *sdp = GFS2_SB(inode);
262 struct posix_acl *acl = NULL; 265 struct posix_acl *acl = NULL;
263 int error = 0, type; 266 int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
319 } 322 }
320 323
321set_acl: 324set_acl:
322 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); 325 error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
323 if (!error) { 326 if (!error) {
324 if (acl) 327 if (acl)
325 set_cached_acl(inode, type, acl); 328 set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
334 337
335struct xattr_handler gfs2_xattr_system_handler = { 338struct xattr_handler gfs2_xattr_system_handler = {
336 .prefix = XATTR_SYSTEM_PREFIX, 339 .prefix = XATTR_SYSTEM_PREFIX,
340 .flags = GFS2_EATYPE_SYS,
337 .get = gfs2_xattr_system_get, 341 .get = gfs2_xattr_system_get,
338 .set = gfs2_xattr_system_set, 342 .set = gfs2_xattr_system_set,
339}; 343};
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d47379e794b..583e823307ae 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -541,7 +541,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
541 *ptr++ = cpu_to_be64(bn++); 541 *ptr++ = cpu_to_be64(bn++);
542 break; 542 break;
543 } 543 }
544 } while (state != ALLOC_DATA); 544 } while ((state != ALLOC_DATA) || !dblock);
545 545
546 ip->i_height = height; 546 ip->i_height = height;
547 gfs2_add_inode_blocks(&ip->i_inode, alloced); 547 gfs2_add_inode_blocks(&ip->i_inode, alloced);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa3234..a6abbae8a278 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
569 return ret; 569 return ret;
570} 570}
571 571
572/**
573 * gfs2_file_aio_write - Perform a write to a file
574 * @iocb: The io context
575 * @iov: The data to write
576 * @nr_segs: Number of @iov segments
577 * @pos: The file position
578 *
579 * We have to do a lock/unlock here to refresh the inode size for
580 * O_APPEND writes, otherwise we can land up writing at the wrong
581 * offset. There is still a race, but provided the app is using its
582 * own file locking, this will make O_APPEND work as expected.
583 *
584 */
585
586static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
587 unsigned long nr_segs, loff_t pos)
588{
589 struct file *file = iocb->ki_filp;
590
591 if (file->f_flags & O_APPEND) {
592 struct dentry *dentry = file->f_dentry;
593 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
594 struct gfs2_holder gh;
595 int ret;
596
597 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 if (ret)
599 return ret;
600 gfs2_glock_dq_uninit(&gh);
601 }
602
603 return generic_file_aio_write(iocb, iov, nr_segs, pos);
604}
605
572#ifdef CONFIG_GFS2_FS_LOCKING_DLM 606#ifdef CONFIG_GFS2_FS_LOCKING_DLM
573 607
574/** 608/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
711 .read = do_sync_read, 745 .read = do_sync_read,
712 .aio_read = generic_file_aio_read, 746 .aio_read = generic_file_aio_read,
713 .write = do_sync_write, 747 .write = do_sync_write,
714 .aio_write = generic_file_aio_write, 748 .aio_write = gfs2_file_aio_write,
715 .unlocked_ioctl = gfs2_ioctl, 749 .unlocked_ioctl = gfs2_ioctl,
716 .mmap = gfs2_mmap, 750 .mmap = gfs2_mmap,
717 .open = gfs2_open, 751 .open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
741 .read = do_sync_read, 775 .read = do_sync_read,
742 .aio_read = generic_file_aio_read, 776 .aio_read = generic_file_aio_read,
743 .write = do_sync_write, 777 .write = do_sync_write,
744 .aio_write = generic_file_aio_write, 778 .aio_write = gfs2_file_aio_write,
745 .unlocked_ioctl = gfs2_ioctl, 779 .unlocked_ioctl = gfs2_ioctl,
746 .mmap = gfs2_mmap, 780 .mmap = gfs2_mmap,
747 .open = gfs2_open, 781 .open = gfs2_open,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f455a03a09e2..f42663325931 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
769 if (!gl) 769 if (!gl)
770 return -ENOMEM; 770 return -ENOMEM;
771 771
772 atomic_inc(&sdp->sd_glock_disposal);
772 gl->gl_flags = 0; 773 gl->gl_flags = 0;
773 gl->gl_name = name; 774 gl->gl_name = name;
774 atomic_set(&gl->gl_ref, 1); 775 atomic_set(&gl->gl_ref, 1);
@@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1538 up_write(&gfs2_umount_flush_sem); 1539 up_write(&gfs2_umount_flush_sem);
1539 msleep(10); 1540 msleep(10);
1540 } 1541 }
1542 flush_workqueue(glock_workqueue);
1543 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1544 gfs2_dump_lockstate(sdp);
1541} 1545}
1542 1546
1543void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1547void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 13f0bd228132..c0262faf4725 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -123,7 +123,7 @@ struct lm_lockops {
123 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 123 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
124 void (*lm_unmount) (struct gfs2_sbd *sdp); 124 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp); 125 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, void *gl); 126 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl, 127 unsigned int (*lm_lock) (struct gfs2_glock *gl,
128 unsigned int req_state, unsigned int flags); 128 unsigned int req_state, unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl); 129 void (*lm_cancel) (struct gfs2_glock *gl);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4792200978c8..bc0ad158e6b4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -544,6 +544,8 @@ struct gfs2_sbd {
544 struct gfs2_holder sd_live_gh; 544 struct gfs2_holder sd_live_gh;
545 struct gfs2_glock *sd_rename_gl; 545 struct gfs2_glock *sd_rename_gl;
546 struct gfs2_glock *sd_trans_gl; 546 struct gfs2_glock *sd_trans_gl;
547 wait_queue_head_t sd_glock_wait;
548 atomic_t sd_glock_disposal;
547 549
548 /* Inode Stuff */ 550 /* Inode Stuff */
549 551
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2d..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
126 * segment inside gfs2_inode_lookup code needs to get moved around. 126 * segment inside gfs2_inode_lookup code needs to get moved around.
127 * 127 *
128 * Clean up I_LOCK and I_NEW as well. 128 * Clears I_NEW as well.
129 **/ 129 **/
130 130
131void gfs2_set_iop(struct inode *inode) 131void gfs2_set_iop(struct inode *inode)
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
801 return err; 801 return err;
802 } 802 }
803 803
804 err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0); 804 err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
805 GFS2_EATYPE_SECURITY);
805 kfree(value); 806 kfree(value);
806 kfree(name); 807 kfree(name);
807 808
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 46df988323bc..0e5e0e7022e5 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -21,6 +21,7 @@ static void gdlm_ast(void *arg)
21{ 21{
22 struct gfs2_glock *gl = arg; 22 struct gfs2_glock *gl = arg;
23 unsigned ret = gl->gl_state; 23 unsigned ret = gl->gl_state;
24 struct gfs2_sbd *sdp = gl->gl_sbd;
24 25
25 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
26 27
@@ -30,6 +31,8 @@ static void gdlm_ast(void *arg)
30 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
31 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
32 kmem_cache_free(gfs2_glock_cachep, gl); 33 kmem_cache_free(gfs2_glock_cachep, gl);
34 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
35 wake_up(&sdp->sd_glock_wait);
33 return; 36 return;
34 case -DLM_ECANCEL: /* Cancel while getting lock */ 37 case -DLM_ECANCEL: /* Cancel while getting lock */
35 ret |= LM_OUT_CANCELED; 38 ret |= LM_OUT_CANCELED;
@@ -164,14 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
164 return LM_OUT_ASYNC; 167 return LM_OUT_ASYNC;
165} 168}
166 169
167static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) 170static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
168{ 171{
169 struct gfs2_glock *gl = ptr; 172 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 173 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 174 int error;
172 175
173 if (gl->gl_lksb.sb_lkid == 0) { 176 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 177 kmem_cache_free(cachep, gl);
178 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
179 wake_up(&sdp->sd_glock_wait);
175 return; 180 return;
176 } 181 }
177 182
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5ec..6f68a5f18eb8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
121 if (aspace) { 121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops; 123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL; 124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace); 125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags); 126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace); 127 insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24f3636..a86ed6381566 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
82 82
83 gfs2_tune_init(&sdp->sd_tune); 83 gfs2_tune_init(&sdp->sd_tune);
84 84
85 init_waitqueue_head(&sdp->sd_glock_wait);
86 atomic_set(&sdp->sd_glock_disposal, 0);
85 spin_lock_init(&sdp->sd_statfs_spin); 87 spin_lock_init(&sdp->sd_statfs_spin);
86 88
87 spin_lock_init(&sdp->sd_rindex_spin); 89 spin_lock_init(&sdp->sd_rindex_spin);
@@ -723,7 +725,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
723 goto fail; 725 goto fail;
724 } 726 }
725 727
726 error = -EINVAL; 728 error = -EUSERS;
727 if (!gfs2_jindex_size(sdp)) { 729 if (!gfs2_jindex_size(sdp)) {
728 fs_err(sdp, "no journals!\n"); 730 fs_err(sdp, "no journals!\n");
729 goto fail_jindex; 731 goto fail_jindex;
@@ -983,9 +985,17 @@ static const match_table_t nolock_tokens = {
983 { Opt_err, NULL }, 985 { Opt_err, NULL },
984}; 986};
985 987
988static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
989{
990 struct gfs2_sbd *sdp = gl->gl_sbd;
991 kmem_cache_free(cachep, gl);
992 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
993 wake_up(&sdp->sd_glock_wait);
994}
995
986static const struct lm_lockops nolock_ops = { 996static const struct lm_lockops nolock_ops = {
987 .lm_proto_name = "lock_nolock", 997 .lm_proto_name = "lock_nolock",
988 .lm_put_lock = kmem_cache_free, 998 .lm_put_lock = nolock_put_lock,
989 .lm_tokens = &nolock_tokens, 999 .lm_tokens = &nolock_tokens,
990}; 1000};
991 1001
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10deb..84350e1be66d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
748 struct gfs2_rgrpd *nrgd; 748 struct gfs2_rgrpd *nrgd;
749 unsigned int num_gh; 749 unsigned int num_gh;
750 int dir_rename = 0; 750 int dir_rename = 0;
751 int alloc_required; 751 int alloc_required = 0;
752 unsigned int x; 752 unsigned int x;
753 int error; 753 int error;
754 754
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
867 goto out_gunlock; 867 goto out_gunlock;
868 } 868 }
869 869
870 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 870 if (nip == NULL)
871 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
872 error = alloc_required;
871 if (error < 0) 873 if (error < 0)
872 goto out_gunlock; 874 goto out_gunlock;
873 error = 0; 875 error = 0;
@@ -1086,7 +1088,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1086 error = vfs_follow_link(nd, buf); 1088 error = vfs_follow_link(nd, buf);
1087 if (buf != array) 1089 if (buf != array)
1088 kfree(buf); 1090 kfree(buf);
1089 } 1091 } else
1092 path_put(&nd->path);
1090 1093
1091 return ERR_PTR(error); 1094 return ERR_PTR(error);
1092} 1095}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0608f490c295..503b842f3ba2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -591,11 +591,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
591 u64 rgrp_count = ip->i_disksize; 591 u64 rgrp_count = ip->i_disksize;
592 int error; 592 int error;
593 593
594 if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) { 594 do_div(rgrp_count, sizeof(struct gfs2_rindex));
595 gfs2_consist_inode(ip);
596 return -EIO;
597 }
598
599 clear_rgrpdi(sdp); 595 clear_rgrpdi(sdp);
600 596
601 file_ra_state_init(&ra_state, inode->i_mapping); 597 file_ra_state_init(&ra_state, inode->i_mapping);
@@ -915,7 +911,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
915struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) 911struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
916{ 912{
917 BUG_ON(ip->i_alloc != NULL); 913 BUG_ON(ip->i_alloc != NULL);
918 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); 914 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS);
919 return ip->i_alloc; 915 return ip->i_alloc;
920} 916}
921 917
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c282ad41f3d1..b9dd3da22c0a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -21,6 +21,7 @@
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/crc32.h> 22#include <linux/crc32.h>
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h>
24 25
25#include "gfs2.h" 26#include "gfs2.h"
26#include "incore.h" 27#include "incore.h"
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc4740..c2ebdf2c01d4 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
567/** 567/**
568 * gfs2_xattr_get - Get a GFS2 extended attribute 568 * gfs2_xattr_get - Get a GFS2 extended attribute
569 * @inode: The inode 569 * @inode: The inode
570 * @type: The type of extended attribute
571 * @name: The name of the extended attribute 570 * @name: The name of the extended attribute
572 * @buffer: The buffer to write the result into 571 * @buffer: The buffer to write the result into
573 * @size: The size of the buffer 572 * @size: The size of the buffer
573 * @type: The type of extended attribute
574 * 574 *
575 * Returns: actual size of data on success, -errno on error 575 * Returns: actual size of data on success, -errno on error
576 */ 576 */
577 577static int gfs2_xattr_get(struct dentry *dentry, const char *name,
578int gfs2_xattr_get(struct inode *inode, int type, const char *name, 578 void *buffer, size_t size, int type)
579 void *buffer, size_t size)
580{ 579{
581 struct gfs2_inode *ip = GFS2_I(inode); 580 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
582 struct gfs2_ea_location el; 581 struct gfs2_ea_location el;
583 int error; 582 int error;
584 583
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1119 1118
1120/** 1119/**
1121 * gfs2_xattr_remove - Remove a GFS2 extended attribute 1120 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1122 * @inode: The inode 1121 * @ip: The inode
1123 * @type: The type of the extended attribute 1122 * @type: The type of the extended attribute
1124 * @name: The name of the extended attribute 1123 * @name: The name of the extended attribute
1125 * 1124 *
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1130 * Returns: 0, or errno on failure 1129 * Returns: 0, or errno on failure
1131 */ 1130 */
1132 1131
1133static int gfs2_xattr_remove(struct inode *inode, int type, const char *name) 1132static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
1134{ 1133{
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_ea_location el; 1134 struct gfs2_ea_location el;
1137 int error; 1135 int error;
1138 1136
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
1156} 1154}
1157 1155
1158/** 1156/**
1159 * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute 1157 * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1160 * @inode: The inode 1158 * @ip: The inode
1161 * @type: The type of the extended attribute
1162 * @name: The name of the extended attribute 1159 * @name: The name of the extended attribute
1163 * @value: The value of the extended attribute (NULL for remove) 1160 * @value: The value of the extended attribute (NULL for remove)
1164 * @size: The size of the @value argument 1161 * @size: The size of the @value argument
1165 * @flags: Create or Replace 1162 * @flags: Create or Replace
1163 * @type: The type of the extended attribute
1166 * 1164 *
1167 * See gfs2_xattr_remove() for details of the removal of xattrs. 1165 * See gfs2_xattr_remove() for details of the removal of xattrs.
1168 * 1166 *
1169 * Returns: 0 or errno on failure 1167 * Returns: 0 or errno on failure
1170 */ 1168 */
1171 1169
1172int gfs2_xattr_set(struct inode *inode, int type, const char *name, 1170int __gfs2_xattr_set(struct inode *inode, const char *name,
1173 const void *value, size_t size, int flags) 1171 const void *value, size_t size, int flags, int type)
1174{ 1172{
1175 struct gfs2_sbd *sdp = GFS2_SB(inode);
1176 struct gfs2_inode *ip = GFS2_I(inode); 1173 struct gfs2_inode *ip = GFS2_I(inode);
1174 struct gfs2_sbd *sdp = GFS2_SB(inode);
1177 struct gfs2_ea_location el; 1175 struct gfs2_ea_location el;
1178 unsigned int namel = strlen(name); 1176 unsigned int namel = strlen(name);
1179 int error; 1177 int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1184 return -ERANGE; 1182 return -ERANGE;
1185 1183
1186 if (value == NULL) 1184 if (value == NULL)
1187 return gfs2_xattr_remove(inode, type, name); 1185 return gfs2_xattr_remove(ip, type, name);
1188 1186
1189 if (ea_check_size(sdp, namel, size)) 1187 if (ea_check_size(sdp, namel, size))
1190 return -ERANGE; 1188 return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1224 return error; 1222 return error;
1225} 1223}
1226 1224
1225static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1226 const void *value, size_t size, int flags, int type)
1227{
1228 return __gfs2_xattr_set(dentry->d_inode, name, value,
1229 size, flags, type);
1230}
1231
1227static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1232static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1228 struct gfs2_ea_header *ea, char *data) 1233 struct gfs2_ea_header *ea, char *data)
1229{ 1234{
@@ -1291,6 +1296,7 @@ fail:
1291 1296
1292int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1293{ 1298{
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1294 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1295 struct buffer_head *dibh; 1301 struct buffer_head *dibh;
1296 int error; 1302 int error;
@@ -1300,16 +1306,17 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1300 return error; 1306 return error;
1301 1307
1302 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1308 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1303 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1309 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1304 if (error) 1310 if (error == 0) {
1305 return error; 1311 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1306 1312 memcpy(GFS2_EA2DATA(el.el_ea), data,
1307 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1313 GFS2_EA_DATA_LEN(el.el_ea));
1308 memcpy(GFS2_EA2DATA(el.el_ea), data, 1314 }
1309 GFS2_EA_DATA_LEN(el.el_ea)); 1315 } else {
1310 } else
1311 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); 1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1317 }
1312 1318
1319 brelse(el.el_bh);
1313 if (error) 1320 if (error)
1314 return error; 1321 return error;
1315 1322
@@ -1322,8 +1329,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1322 brelse(dibh); 1329 brelse(dibh);
1323 } 1330 }
1324 1331
1325 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1332 gfs2_trans_end(sdp);
1326
1327 return error; 1333 return error;
1328} 1334}
1329 1335
@@ -1529,40 +1535,18 @@ out_alloc:
1529 return error; 1535 return error;
1530} 1536}
1531 1537
1532static int gfs2_xattr_user_get(struct inode *inode, const char *name,
1533 void *buffer, size_t size)
1534{
1535 return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
1536}
1537
1538static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1539 const void *value, size_t size, int flags)
1540{
1541 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1542}
1543
1544static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1545 void *buffer, size_t size)
1546{
1547 return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
1548}
1549
1550static int gfs2_xattr_security_set(struct inode *inode, const char *name,
1551 const void *value, size_t size, int flags)
1552{
1553 return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
1554}
1555
1556static struct xattr_handler gfs2_xattr_user_handler = { 1538static struct xattr_handler gfs2_xattr_user_handler = {
1557 .prefix = XATTR_USER_PREFIX, 1539 .prefix = XATTR_USER_PREFIX,
1558 .get = gfs2_xattr_user_get, 1540 .flags = GFS2_EATYPE_USR,
1559 .set = gfs2_xattr_user_set, 1541 .get = gfs2_xattr_get,
1542 .set = gfs2_xattr_set,
1560}; 1543};
1561 1544
1562static struct xattr_handler gfs2_xattr_security_handler = { 1545static struct xattr_handler gfs2_xattr_security_handler = {
1563 .prefix = XATTR_SECURITY_PREFIX, 1546 .prefix = XATTR_SECURITY_PREFIX,
1564 .get = gfs2_xattr_security_get, 1547 .flags = GFS2_EATYPE_SECURITY,
1565 .set = gfs2_xattr_security_set, 1548 .get = gfs2_xattr_get,
1549 .set = gfs2_xattr_set,
1566}; 1550};
1567 1551
1568struct xattr_handler *gfs2_xattr_handlers[] = { 1552struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4d..d392f8358f2f 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
53 struct gfs2_ea_header *el_prev; 53 struct gfs2_ea_header *el_prev;
54}; 54};
55 55
56extern int gfs2_xattr_get(struct inode *inode, int type, const char *name, 56extern int __gfs2_xattr_set(struct inode *inode, const char *name,
57 void *buffer, size_t size); 57 const void *value, size_t size,
58extern int gfs2_xattr_set(struct inode *inode, int type, const char *name, 58 int flags, int type);
59 const void *value, size_t size, int flags);
60extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); 59extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
61extern int gfs2_ea_dealloc(struct gfs2_inode *ip); 60extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
62 61
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67a..7239efc690d8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b8..a0bbd3d1b41a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/dnotify.h> 30#include <linux/dnotify.h>
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/ima.h>
34#include <linux/magic.h> 33#include <linux/magic.h>
35 34
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -922,7 +921,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
922 int error = -ENOMEM; 921 int error = -ENOMEM;
923 struct file *file; 922 struct file *file;
924 struct inode *inode; 923 struct inode *inode;
925 struct dentry *dentry, *root; 924 struct path path;
925 struct dentry *root;
926 struct qstr quick_string; 926 struct qstr quick_string;
927 927
928 *user = NULL; 928 *user = NULL;
@@ -944,10 +944,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
944 quick_string.name = name; 944 quick_string.name = name;
945 quick_string.len = strlen(quick_string.name); 945 quick_string.len = strlen(quick_string.name);
946 quick_string.hash = 0; 946 quick_string.hash = 0;
947 dentry = d_alloc(root, &quick_string); 947 path.dentry = d_alloc(root, &quick_string);
948 if (!dentry) 948 if (!path.dentry)
949 goto out_shm_unlock; 949 goto out_shm_unlock;
950 950
951 path.mnt = mntget(hugetlbfs_vfsmount);
951 error = -ENOSPC; 952 error = -ENOSPC;
952 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), 953 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
953 current_fsgid(), S_IFREG | S_IRWXUGO, 0); 954 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,24 +961,22 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
960 acctflag)) 961 acctflag))
961 goto out_inode; 962 goto out_inode;
962 963
963 d_instantiate(dentry, inode); 964 d_instantiate(path.dentry, inode);
964 inode->i_size = size; 965 inode->i_size = size;
965 inode->i_nlink = 0; 966 inode->i_nlink = 0;
966 967
967 error = -ENFILE; 968 error = -ENFILE;
968 file = alloc_file(hugetlbfs_vfsmount, dentry, 969 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
969 FMODE_WRITE | FMODE_READ,
970 &hugetlbfs_file_operations); 970 &hugetlbfs_file_operations);
971 if (!file) 971 if (!file)
972 goto out_dentry; /* inode is already attached */ 972 goto out_dentry; /* inode is already attached */
973 ima_counts_get(file);
974 973
975 return file; 974 return file;
976 975
977out_inode: 976out_inode:
978 iput(inode); 977 iput(inode);
979out_dentry: 978out_dentry:
980 dput(dentry); 979 path_put(&path);
981out_shm_unlock: 980out_shm_unlock:
982 if (*user) { 981 if (*user) {
983 user_shm_unlock(size, *user); 982 user_shm_unlock(size, *user);
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
113 * Prevent speculative execution through spin_unlock(&inode_lock); 113 * Prevent speculative execution through spin_unlock(&inode_lock);
114 */ 114 */
115 smp_mb(); 115 smp_mb();
116 wake_up_bit(&inode->i_state, __I_LOCK); 116 wake_up_bit(&inode->i_state, __I_NEW);
117} 117}
118 118
119/** 119/**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
690 } 690 }
691#endif 691#endif
692 /* 692 /*
693 * This is special! We do not need the spinlock when clearing I_LOCK, 693 * This is special! We do not need the spinlock when clearing I_NEW,
694 * because we're guaranteed that nobody else tries to do anything about 694 * because we're guaranteed that nobody else tries to do anything about
695 * the state of the inode when it is locked, as we just created it (so 695 * the state of the inode when it is locked, as we just created it (so
696 * there can be no old holders that haven't tested I_LOCK). 696 * there can be no old holders that haven't tested I_NEW).
697 * However we must emit the memory barrier so that other CPUs reliably 697 * However we must emit the memory barrier so that other CPUs reliably
698 * see the clearing of I_LOCK after the other inode initialisation has 698 * see the clearing of I_NEW after the other inode initialisation has
699 * completed. 699 * completed.
700 */ 700 */
701 smp_mb(); 701 smp_mb();
702 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 702 WARN_ON(!(inode->i_state & I_NEW));
703 inode->i_state &= ~(I_LOCK|I_NEW); 703 inode->i_state &= ~I_NEW;
704 wake_up_inode(inode); 704 wake_up_inode(inode);
705} 705}
706EXPORT_SYMBOL(unlock_new_inode); 706EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
731 goto set_failed; 731 goto set_failed;
732 732
733 __inode_add_to_lists(sb, head, inode); 733 __inode_add_to_lists(sb, head, inode);
734 inode->i_state = I_LOCK|I_NEW; 734 inode->i_state = I_NEW;
735 spin_unlock(&inode_lock); 735 spin_unlock(&inode_lock);
736 736
737 /* Return the locked inode with I_NEW set, the 737 /* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
778 if (!old) { 778 if (!old) {
779 inode->i_ino = ino; 779 inode->i_ino = ino;
780 __inode_add_to_lists(sb, head, inode); 780 __inode_add_to_lists(sb, head, inode);
781 inode->i_state = I_LOCK|I_NEW; 781 inode->i_state = I_NEW;
782 spin_unlock(&inode_lock); 782 spin_unlock(&inode_lock);
783 783
784 /* Return the locked inode with I_NEW set, the 784 /* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1083 ino_t ino = inode->i_ino; 1083 ino_t ino = inode->i_ino;
1084 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1084 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1085 1085
1086 inode->i_state |= I_LOCK|I_NEW; 1086 inode->i_state |= I_NEW;
1087 while (1) { 1087 while (1) {
1088 struct hlist_node *node; 1088 struct hlist_node *node;
1089 struct inode *old = NULL; 1089 struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1120 struct super_block *sb = inode->i_sb; 1120 struct super_block *sb = inode->i_sb;
1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1122 1122
1123 inode->i_state |= I_LOCK|I_NEW; 1123 inode->i_state |= I_NEW;
1124 1124
1125 while (1) { 1125 while (1) {
1126 struct hlist_node *node; 1126 struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
1510 * until the deletion _might_ have completed. Callers are responsible 1510 * until the deletion _might_ have completed. Callers are responsible
1511 * to recheck inode state. 1511 * to recheck inode state.
1512 * 1512 *
1513 * It doesn't matter if I_LOCK is not set initially, a call to 1513 * It doesn't matter if I_NEW is not set initially, a call to
1514 * wake_up_inode() after removing from the hash list will DTRT. 1514 * wake_up_inode() after removing from the hash list will DTRT.
1515 * 1515 *
1516 * This is called with inode_lock held. 1516 * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
1518static void __wait_on_freeing_inode(struct inode *inode) 1518static void __wait_on_freeing_inode(struct inode *inode)
1519{ 1519{
1520 wait_queue_head_t *wq; 1520 wait_queue_head_t *wq;
1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1522 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1522 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1524 spin_unlock(&inode_lock); 1524 spin_unlock(&inode_lock);
1525 schedule(); 1525 schedule();
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72e..e96a1667d749 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,8 +79,16 @@ extern void chroot_fs_refs(struct path *, struct path *);
79 * file_table.c 79 * file_table.c
80 */ 80 */
81extern void mark_files_ro(struct super_block *); 81extern void mark_files_ro(struct super_block *);
82extern struct file *get_empty_filp(void);
82 83
83/* 84/*
84 * super.c 85 * super.c
85 */ 86 */
86extern int do_remount_sb(struct super_block *, int, void *, int); 87extern int do_remount_sb(struct super_block *, int, void *, int);
88
89/*
90 * open.c
91 */
92struct nameidata;
93extern struct file *nameidata_to_filp(struct nameidata *);
94extern void release_open_intent(struct nameidata *);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba9..ed752cb38474 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * The following files are helpful: 10 * The following files are helpful:
11 * 11 *
12 * Documentation/filesystems/Exporting 12 * Documentation/filesystems/nfs/Exporting
13 * fs/exportfs/expfs.c. 13 * fs/exportfs/expfs.c.
14 */ 14 */
15 15
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd4..4e28beeed157 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
1config JBD 1config JBD
2 tristate 2 tristate
3 select FS_JOURNAL_INFO
4 help 3 help
5 This is a generic journalling layer for block devices. It is 4 This is a generic journalling layer for block devices. It is
6 currently used by the ext3 file system, but it could also be 5 currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 4160afad6d00..bd224eec9b07 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1913,7 +1913,7 @@ static void __init jbd_create_debugfs_entry(void)
1913{ 1913{
1914 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); 1914 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
1915 if (jbd_debugfs_dir) 1915 if (jbd_debugfs_dir)
1916 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO, 1916 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR,
1917 jbd_debugfs_dir, 1917 jbd_debugfs_dir,
1918 &journal_enable_debug); 1918 &journal_enable_debug);
1919} 1919}
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdfd..f32f346f4b0a 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
1config JBD2 1config JBD2
2 tristate 2 tristate
3 select CRC32 3 select CRC32
4 select FS_JOURNAL_INFO
5 help 4 help
6 This is a generic journaling layer for block devices that support 5 This is a generic journaling layer for block devices that support
7 both 32-bit and 64-bit block numbers. It is currently used by 6 both 32-bit and 64-bit block numbers. It is currently used by
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -708,8 +709,17 @@ start_journal_io:
708 } 709 }
709 } 710 }
710 711
711 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
712 721
722 /* Done it all: now write the commit record asynchronously. */
713 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
714 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
715 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
720 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
721 } 731 }
722 732
723 /*
724 * This is the right place to wait for data buffers both for ASYNC
725 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
726 * the commit block went to disk (which happens above). If commit is
727 * SYNC, we need to wait for data buffers before we start writing
728 * commit block, which happens below in such setting.
729 */
730 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
731 if (err) { 734 if (err) {
732 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b7ca3a92a4db..ac0d027595d0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
814 journal_t *journal; 814 journal_t *journal;
815 int err; 815 int err;
816 816
817 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
818 if (!journal) 818 if (!journal)
819 goto fail; 819 goto fail;
820 820
@@ -2115,7 +2115,8 @@ static void __init jbd2_create_debugfs_entry(void)
2115{ 2115{
2116 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); 2116 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
2117 if (jbd2_debugfs_dir) 2117 if (jbd2_debugfs_dir)
2118 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO, 2118 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
2119 S_IRUGO | S_IWUSR,
2119 jbd2_debugfs_dir, 2120 jbd2_debugfs_dir,
2120 &jbd2_journal_enable_debug); 2121 &jbd2_journal_enable_debug);
2121} 2122}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e97419..7cdc3196476a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
350 return rc; 350 return rc;
351} 351}
352 352
353static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size, 353static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
354 const char *name, size_t name_len) 354 size_t list_size, const char *name, size_t name_len, int type)
355{ 355{
356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS); 356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
357 357
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
360 return retlen; 360 return retlen;
361} 361}
362 362
363static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size, 363static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
364 const char *name, size_t name_len) 364 size_t list_size, const char *name, size_t name_len, int type)
365{ 365{
366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT); 366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
367 367
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
370 return retlen; 370 return retlen;
371} 371}
372 372
373static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size) 373static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
374 void *buffer, size_t size, int type)
374{ 375{
375 struct posix_acl *acl; 376 struct posix_acl *acl;
376 int rc; 377 int rc;
377 378
378 acl = jffs2_get_acl(inode, type); 379 if (name[0] != '\0')
380 return -EINVAL;
381
382 acl = jffs2_get_acl(dentry->d_inode, type);
379 if (IS_ERR(acl)) 383 if (IS_ERR(acl))
380 return PTR_ERR(acl); 384 return PTR_ERR(acl);
381 if (!acl) 385 if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
386 return rc; 390 return rc;
387} 391}
388 392
389static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) 393static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
390{ 394 const void *value, size_t size, int flags, int type)
391 if (name[0] != '\0')
392 return -EINVAL;
393 return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
397{
398 if (name[0] != '\0')
399 return -EINVAL;
400 return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
401}
402
403static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
404{ 395{
405 struct posix_acl *acl; 396 struct posix_acl *acl;
406 int rc; 397 int rc;
407 398
408 if (!is_owner_or_cap(inode)) 399 if (name[0] != '\0')
400 return -EINVAL;
401 if (!is_owner_or_cap(dentry->d_inode))
409 return -EPERM; 402 return -EPERM;
410 403
411 if (value) { 404 if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
420 } else { 413 } else {
421 acl = NULL; 414 acl = NULL;
422 } 415 }
423 rc = jffs2_set_acl(inode, type, acl); 416 rc = jffs2_set_acl(dentry->d_inode, type, acl);
424 out: 417 out:
425 posix_acl_release(acl); 418 posix_acl_release(acl);
426 return rc; 419 return rc;
427} 420}
428 421
429static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
430 const void *buffer, size_t size, int flags)
431{
432 if (name[0] != '\0')
433 return -EINVAL;
434 return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
435}
436
437static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
438 const void *buffer, size_t size, int flags)
439{
440 if (name[0] != '\0')
441 return -EINVAL;
442 return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
443}
444
445struct xattr_handler jffs2_acl_access_xattr_handler = { 422struct xattr_handler jffs2_acl_access_xattr_handler = {
446 .prefix = POSIX_ACL_XATTR_ACCESS, 423 .prefix = POSIX_ACL_XATTR_ACCESS,
424 .flags = ACL_TYPE_DEFAULT,
447 .list = jffs2_acl_access_listxattr, 425 .list = jffs2_acl_access_listxattr,
448 .get = jffs2_acl_access_getxattr, 426 .get = jffs2_acl_getxattr,
449 .set = jffs2_acl_access_setxattr, 427 .set = jffs2_acl_setxattr,
450}; 428};
451 429
452struct xattr_handler jffs2_acl_default_xattr_handler = { 430struct xattr_handler jffs2_acl_default_xattr_handler = {
453 .prefix = POSIX_ACL_XATTR_DEFAULT, 431 .prefix = POSIX_ACL_XATTR_DEFAULT,
432 .flags = ACL_TYPE_DEFAULT,
454 .list = jffs2_acl_default_listxattr, 433 .list = jffs2_acl_default_listxattr,
455 .get = jffs2_acl_default_getxattr, 434 .get = jffs2_acl_getxattr,
456 .set = jffs2_acl_default_setxattr, 435 .set = jffs2_acl_setxattr,
457}; 436};
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb3..eaccee058583 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
44} 44}
45 45
46/* ---- XATTR Handler for "security.*" ----------------- */ 46/* ---- XATTR Handler for "security.*" ----------------- */
47static int jffs2_security_getxattr(struct inode *inode, const char *name, 47static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
48 void *buffer, size_t size) 48 void *buffer, size_t size, int type)
49{ 49{
50 if (!strcmp(name, "")) 50 if (!strcmp(name, ""))
51 return -EINVAL; 51 return -EINVAL;
52 52
53 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); 53 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
54 name, buffer, size);
54} 55}
55 56
56static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer, 57static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
57 size_t size, int flags) 58 const void *buffer, size_t size, int flags, int type)
58{ 59{
59 if (!strcmp(name, "")) 60 if (!strcmp(name, ""))
60 return -EINVAL; 61 return -EINVAL;
61 62
62 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags); 63 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
64 name, buffer, size, flags);
63} 65}
64 66
65static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size, 67static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
66 const char *name, size_t name_len) 68 size_t list_size, const char *name, size_t name_len, int type)
67{ 69{
68 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; 70 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
69 71
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd5..9e75c62c85d6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
990 if (!xhandle) 990 if (!xhandle)
991 continue; 991 continue;
992 if (buffer) { 992 if (buffer) {
993 rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len); 993 rc = xhandle->list(dentry, buffer+len, size-len,
994 xd->xname, xd->name_len, xd->flags);
994 } else { 995 } else {
995 rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len); 996 rc = xhandle->list(dentry, NULL, 0, xd->xname,
997 xd->name_len, xd->flags);
996 } 998 }
997 if (rc < 0) 999 if (rc < 0)
998 goto out; 1000 goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef348..3e5a5e356e05 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_trusted_getxattr(struct inode *inode, const char *name, 19static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada1..8544af67dffe 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_user_getxattr(struct inode *inode, const char *name, 19static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1292 */ 1292 */
1293 /* 1293 /*
1294 * I believe this code is no longer needed. Splitting I_LOCK 1294 * I believe this code is no longer needed. Splitting I_LOCK
1295 * into two bits, I_LOCK and I_SYNC should prevent this 1295 * into two bits, I_NEW and I_SYNC should prevent this
1296 * deadlock as well. But since I don't have a JFS testload 1296 * deadlock as well. But since I don't have a JFS testload
1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1298 * Joern 1298 * Joern
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2234c73fc577..d929a822a74e 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -524,7 +524,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
524 * Page cache is indexed by long. 524 * Page cache is indexed by long.
525 * I would use MAX_LFS_FILESIZE, but it's only half as big 525 * I would use MAX_LFS_FILESIZE, but it's only half as big
526 */ 526 */
527 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); 527 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes);
528#endif 528#endif
529 sb->s_time_gran = 1; 529 sb->s_time_gran = 1;
530 return 0; 530 return 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 219576c52d80..6e8d17e1dc4c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -848,7 +848,6 @@ EXPORT_SYMBOL(simple_write_end);
848EXPORT_SYMBOL(simple_dir_inode_operations); 848EXPORT_SYMBOL(simple_dir_inode_operations);
849EXPORT_SYMBOL(simple_dir_operations); 849EXPORT_SYMBOL(simple_dir_operations);
850EXPORT_SYMBOL(simple_empty); 850EXPORT_SYMBOL(simple_empty);
851EXPORT_SYMBOL(d_alloc_name);
852EXPORT_SYMBOL(simple_fill_super); 851EXPORT_SYMBOL(simple_fill_super);
853EXPORT_SYMBOL(simple_getattr); 852EXPORT_SYMBOL(simple_getattr);
854EXPORT_SYMBOL(simple_link); 853EXPORT_SYMBOL(simple_link);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bd173a6ca3b1..a7966eed3c17 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e1d28ddd2169..56c9519d900a 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/namei.c b/fs/namei.c
index 87f97ba90ad1..a4855af776a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,7 +35,7 @@
35#include <linux/fs_struct.h> 35#include <linux/fs_struct.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 38#include "internal.h"
39 39
40/* [Feb-1997 T. Schoebel-Theuer] 40/* [Feb-1997 T. Schoebel-Theuer]
41 * Fundamental changes in the pathname lookup mechanisms (namei) 41 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -108,8 +108,6 @@
108 * any extra contention... 108 * any extra contention...
109 */ 109 */
110 110
111static int __link_path_walk(const char *name, struct nameidata *nd);
112
113/* In order to reduce some races, while at the same time doing additional 111/* In order to reduce some races, while at the same time doing additional
114 * checking and hopefully speeding things up, we copy filenames to the 112 * checking and hopefully speeding things up, we copy filenames to the
115 * kernel data space before using them.. 113 * kernel data space before using them..
@@ -234,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
234 /* 232 /*
235 * Searching includes executable on directories, else just read. 233 * Searching includes executable on directories, else just read.
236 */ 234 */
235 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
237 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 236 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
238 if (capable(CAP_DAC_READ_SEARCH)) 237 if (capable(CAP_DAC_READ_SEARCH))
239 return 0; 238 return 0;
@@ -414,36 +413,55 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
414} 413}
415 414
416/* 415/*
417 * Internal lookup() using the new generic dcache. 416 * force_reval_path - force revalidation of a dentry
418 * SMP-safe 417 *
418 * In some situations the path walking code will trust dentries without
419 * revalidating them. This causes problems for filesystems that depend on
420 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
421 * (which indicates that it's possible for the dentry to go stale), force
422 * a d_revalidate call before proceeding.
423 *
424 * Returns 0 if the revalidation was successful. If the revalidation fails,
425 * either return the error returned by d_revalidate or -ESTALE if the
426 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
427 * invalidate the dentry. It's up to the caller to handle putting references
428 * to the path if necessary.
419 */ 429 */
420static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 430static int
431force_reval_path(struct path *path, struct nameidata *nd)
421{ 432{
422 struct dentry * dentry = __d_lookup(parent, name); 433 int status;
434 struct dentry *dentry = path->dentry;
423 435
424 /* lockess __d_lookup may fail due to concurrent d_move() 436 /*
425 * in some unrelated directory, so try with d_lookup 437 * only check on filesystems where it's possible for the dentry to
438 * become stale. It's assumed that if this flag is set then the
439 * d_revalidate op will also be defined.
426 */ 440 */
427 if (!dentry) 441 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
428 dentry = d_lookup(parent, name); 442 return 0;
429 443
430 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 444 status = dentry->d_op->d_revalidate(dentry, nd);
431 dentry = do_revalidate(dentry, nd); 445 if (status > 0)
446 return 0;
432 447
433 return dentry; 448 if (!status) {
449 d_invalidate(dentry);
450 status = -ESTALE;
451 }
452 return status;
434} 453}
435 454
436/* 455/*
437 * Short-cut version of permission(), for calling by 456 * Short-cut version of permission(), for calling on directories
438 * path_walk(), when dcache lock is held. Combines parts 457 * during pathname resolution. Combines parts of permission()
439 * of permission() and generic_permission(), and tests ONLY for 458 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
440 * MAY_EXEC permission.
441 * 459 *
442 * If appropriate, check DAC only. If not appropriate, or 460 * If appropriate, check DAC only. If not appropriate, or
443 * short-cut DAC fails, then call permission() to do more 461 * short-cut DAC fails, then call ->permission() to do more
444 * complete permission check. 462 * complete permission check.
445 */ 463 */
446static int exec_permission_lite(struct inode *inode) 464static int exec_permission(struct inode *inode)
447{ 465{
448 int ret; 466 int ret;
449 467
@@ -465,99 +483,6 @@ ok:
465 return security_inode_permission(inode, MAY_EXEC); 483 return security_inode_permission(inode, MAY_EXEC);
466} 484}
467 485
468/*
469 * This is called when everything else fails, and we actually have
470 * to go to the low-level filesystem to find out what we should do..
471 *
472 * We get the directory semaphore, and after getting that we also
473 * make sure that nobody added the entry to the dcache in the meantime..
474 * SMP-safe
475 */
476static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
477{
478 struct dentry * result;
479 struct inode *dir = parent->d_inode;
480
481 mutex_lock(&dir->i_mutex);
482 /*
483 * First re-do the cached lookup just in case it was created
484 * while we waited for the directory semaphore..
485 *
486 * FIXME! This could use version numbering or similar to
487 * avoid unnecessary cache lookups.
488 *
489 * The "dcache_lock" is purely to protect the RCU list walker
490 * from concurrent renames at this point (we mustn't get false
491 * negatives from the RCU list walk here, unlike the optimistic
492 * fast walk).
493 *
494 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
495 */
496 result = d_lookup(parent, name);
497 if (!result) {
498 struct dentry *dentry;
499
500 /* Don't create child dentry for a dead directory. */
501 result = ERR_PTR(-ENOENT);
502 if (IS_DEADDIR(dir))
503 goto out_unlock;
504
505 dentry = d_alloc(parent, name);
506 result = ERR_PTR(-ENOMEM);
507 if (dentry) {
508 result = dir->i_op->lookup(dir, dentry, nd);
509 if (result)
510 dput(dentry);
511 else
512 result = dentry;
513 }
514out_unlock:
515 mutex_unlock(&dir->i_mutex);
516 return result;
517 }
518
519 /*
520 * Uhhuh! Nasty case: the cache was re-populated while
521 * we waited on the semaphore. Need to revalidate.
522 */
523 mutex_unlock(&dir->i_mutex);
524 if (result->d_op && result->d_op->d_revalidate) {
525 result = do_revalidate(result, nd);
526 if (!result)
527 result = ERR_PTR(-ENOENT);
528 }
529 return result;
530}
531
532/*
533 * Wrapper to retry pathname resolution whenever the underlying
534 * file system returns an ESTALE.
535 *
536 * Retry the whole path once, forcing real lookup requests
537 * instead of relying on the dcache.
538 */
539static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
540{
541 struct path save = nd->path;
542 int result;
543
544 /* make sure the stuff we saved doesn't go away */
545 path_get(&save);
546
547 result = __link_path_walk(name, nd);
548 if (result == -ESTALE) {
549 /* nd->path had been dropped */
550 nd->path = save;
551 path_get(&nd->path);
552 nd->flags |= LOOKUP_REVAL;
553 result = __link_path_walk(name, nd);
554 }
555
556 path_put(&save);
557
558 return result;
559}
560
561static __always_inline void set_root(struct nameidata *nd) 486static __always_inline void set_root(struct nameidata *nd)
562{ 487{
563 if (!nd->root.mnt) { 488 if (!nd->root.mnt) {
@@ -569,6 +494,8 @@ static __always_inline void set_root(struct nameidata *nd)
569 } 494 }
570} 495}
571 496
497static int link_path_walk(const char *, struct nameidata *);
498
572static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 499static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
573{ 500{
574 int res = 0; 501 int res = 0;
@@ -634,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
634 dget(dentry); 561 dget(dentry);
635 } 562 }
636 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
637 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
638 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
639 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -641,11 +569,14 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
641 error = 0; 569 error = 0;
642 if (s) 570 if (s)
643 error = __vfs_follow_link(nd, s); 571 error = __vfs_follow_link(nd, s);
572 else if (nd->last_type == LAST_BIND) {
573 error = force_reval_path(&nd->path, nd);
574 if (error)
575 path_put(&nd->path);
576 }
644 if (dentry->d_inode->i_op->put_link) 577 if (dentry->d_inode->i_op->put_link)
645 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 578 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
646 } 579 }
647 path_put(path);
648
649 return error; 580 return error;
650} 581}
651 582
@@ -672,6 +603,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
672 current->total_link_count++; 603 current->total_link_count++;
673 nd->depth++; 604 nd->depth++;
674 err = __do_follow_link(path, nd); 605 err = __do_follow_link(path, nd);
606 path_put(path);
675 current->link_count--; 607 current->link_count--;
676 nd->depth--; 608 nd->depth--;
677 return err; 609 return err;
@@ -797,8 +729,19 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
797 struct path *path) 729 struct path *path)
798{ 730{
799 struct vfsmount *mnt = nd->path.mnt; 731 struct vfsmount *mnt = nd->path.mnt;
800 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 732 struct dentry *dentry, *parent;
733 struct inode *dir;
734 /*
735 * See if the low-level filesystem might want
736 * to use its own hash..
737 */
738 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
739 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
740 if (err < 0)
741 return err;
742 }
801 743
744 dentry = __d_lookup(nd->path.dentry, name);
802 if (!dentry) 745 if (!dentry)
803 goto need_lookup; 746 goto need_lookup;
804 if (dentry->d_op && dentry->d_op->d_revalidate) 747 if (dentry->d_op && dentry->d_op->d_revalidate)
@@ -810,7 +753,59 @@ done:
810 return 0; 753 return 0;
811 754
812need_lookup: 755need_lookup:
813 dentry = real_lookup(nd->path.dentry, name, nd); 756 parent = nd->path.dentry;
757 dir = parent->d_inode;
758
759 mutex_lock(&dir->i_mutex);
760 /*
761 * First re-do the cached lookup just in case it was created
762 * while we waited for the directory semaphore..
763 *
764 * FIXME! This could use version numbering or similar to
765 * avoid unnecessary cache lookups.
766 *
767 * The "dcache_lock" is purely to protect the RCU list walker
768 * from concurrent renames at this point (we mustn't get false
769 * negatives from the RCU list walk here, unlike the optimistic
770 * fast walk).
771 *
772 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
773 */
774 dentry = d_lookup(parent, name);
775 if (!dentry) {
776 struct dentry *new;
777
778 /* Don't create child dentry for a dead directory. */
779 dentry = ERR_PTR(-ENOENT);
780 if (IS_DEADDIR(dir))
781 goto out_unlock;
782
783 new = d_alloc(parent, name);
784 dentry = ERR_PTR(-ENOMEM);
785 if (new) {
786 dentry = dir->i_op->lookup(dir, new, nd);
787 if (dentry)
788 dput(new);
789 else
790 dentry = new;
791 }
792out_unlock:
793 mutex_unlock(&dir->i_mutex);
794 if (IS_ERR(dentry))
795 goto fail;
796 goto done;
797 }
798
799 /*
800 * Uhhuh! Nasty case: the cache was re-populated while
801 * we waited on the semaphore. Need to revalidate.
802 */
803 mutex_unlock(&dir->i_mutex);
804 if (dentry->d_op && dentry->d_op->d_revalidate) {
805 dentry = do_revalidate(dentry, nd);
806 if (!dentry)
807 dentry = ERR_PTR(-ENOENT);
808 }
814 if (IS_ERR(dentry)) 809 if (IS_ERR(dentry))
815 goto fail; 810 goto fail;
816 goto done; 811 goto done;
@@ -828,6 +823,17 @@ fail:
828} 823}
829 824
830/* 825/*
826 * This is a temporary kludge to deal with "automount" symlinks; proper
827 * solution is to trigger them on follow_mount(), so that do_lookup()
828 * would DTRT. To be killed before 2.6.34-final.
829 */
830static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
831{
832 return inode && unlikely(inode->i_op->follow_link) &&
833 ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
834}
835
836/*
831 * Name resolution. 837 * Name resolution.
832 * This is the basic name resolution function, turning a pathname into 838 * This is the basic name resolution function, turning a pathname into
833 * the final dentry. We expect 'base' to be positive and a directory. 839 * the final dentry. We expect 'base' to be positive and a directory.
@@ -835,7 +841,7 @@ fail:
835 * Returns 0 and nd will have valid dentry and mnt on success. 841 * Returns 0 and nd will have valid dentry and mnt on success.
836 * Returns error and drops reference to input namei data on failure. 842 * Returns error and drops reference to input namei data on failure.
837 */ 843 */
838static int __link_path_walk(const char *name, struct nameidata *nd) 844static int link_path_walk(const char *name, struct nameidata *nd)
839{ 845{
840 struct path next; 846 struct path next;
841 struct inode *inode; 847 struct inode *inode;
@@ -858,7 +864,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
858 unsigned int c; 864 unsigned int c;
859 865
860 nd->flags |= LOOKUP_CONTINUE; 866 nd->flags |= LOOKUP_CONTINUE;
861 err = exec_permission_lite(inode); 867 err = exec_permission(inode);
862 if (err) 868 if (err)
863 break; 869 break;
864 870
@@ -898,16 +904,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
898 case 1: 904 case 1:
899 continue; 905 continue;
900 } 906 }
901 /*
902 * See if the low-level filesystem might want
903 * to use its own hash..
904 */
905 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
906 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
907 &this);
908 if (err < 0)
909 break;
910 }
911 /* This does the actual lookups.. */ 907 /* This does the actual lookups.. */
912 err = do_lookup(nd, &this, &next); 908 err = do_lookup(nd, &this, &next);
913 if (err) 909 if (err)
@@ -953,18 +949,11 @@ last_component:
953 case 1: 949 case 1:
954 goto return_reval; 950 goto return_reval;
955 } 951 }
956 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
957 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
958 &this);
959 if (err < 0)
960 break;
961 }
962 err = do_lookup(nd, &this, &next); 952 err = do_lookup(nd, &this, &next);
963 if (err) 953 if (err)
964 break; 954 break;
965 inode = next.dentry->d_inode; 955 inode = next.dentry->d_inode;
966 if ((lookup_flags & LOOKUP_FOLLOW) 956 if (follow_on_final(inode, lookup_flags)) {
967 && inode && inode->i_op->follow_link) {
968 err = do_follow_link(&next, nd); 957 err = do_follow_link(&next, nd);
969 if (err) 958 if (err)
970 goto return_err; 959 goto return_err;
@@ -1017,8 +1006,27 @@ return_err:
1017 1006
1018static int path_walk(const char *name, struct nameidata *nd) 1007static int path_walk(const char *name, struct nameidata *nd)
1019{ 1008{
1009 struct path save = nd->path;
1010 int result;
1011
1020 current->total_link_count = 0; 1012 current->total_link_count = 0;
1021 return link_path_walk(name, nd); 1013
1014 /* make sure the stuff we saved doesn't go away */
1015 path_get(&save);
1016
1017 result = link_path_walk(name, nd);
1018 if (result == -ESTALE) {
1019 /* nd->path had been dropped */
1020 current->total_link_count = 0;
1021 nd->path = save;
1022 path_get(&nd->path);
1023 nd->flags |= LOOKUP_REVAL;
1024 result = link_path_walk(name, nd);
1025 }
1026
1027 path_put(&save);
1028
1029 return result;
1022} 1030}
1023 1031
1024static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1032static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
@@ -1141,36 +1149,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1141 return retval; 1149 return retval;
1142} 1150}
1143 1151
1144/**
1145 * path_lookup_open - lookup a file path with open intent
1146 * @dfd: the directory to use as base, or AT_FDCWD
1147 * @name: pointer to file name
1148 * @lookup_flags: lookup intent flags
1149 * @nd: pointer to nameidata
1150 * @open_flags: open intent flags
1151 */
1152static int path_lookup_open(int dfd, const char *name,
1153 unsigned int lookup_flags, struct nameidata *nd, int open_flags)
1154{
1155 struct file *filp = get_empty_filp();
1156 int err;
1157
1158 if (filp == NULL)
1159 return -ENFILE;
1160 nd->intent.open.file = filp;
1161 nd->intent.open.flags = open_flags;
1162 nd->intent.open.create_mode = 0;
1163 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1164 if (IS_ERR(nd->intent.open.file)) {
1165 if (err == 0) {
1166 err = PTR_ERR(nd->intent.open.file);
1167 path_put(&nd->path);
1168 }
1169 } else if (err != 0)
1170 release_open_intent(nd);
1171 return err;
1172}
1173
1174static struct dentry *__lookup_hash(struct qstr *name, 1152static struct dentry *__lookup_hash(struct qstr *name,
1175 struct dentry *base, struct nameidata *nd) 1153 struct dentry *base, struct nameidata *nd)
1176{ 1154{
@@ -1191,7 +1169,17 @@ static struct dentry *__lookup_hash(struct qstr *name,
1191 goto out; 1169 goto out;
1192 } 1170 }
1193 1171
1194 dentry = cached_lookup(base, name, nd); 1172 dentry = __d_lookup(base, name);
1173
1174 /* lockess __d_lookup may fail due to concurrent d_move()
1175 * in some unrelated directory, so try with d_lookup
1176 */
1177 if (!dentry)
1178 dentry = d_lookup(base, name);
1179
1180 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1181 dentry = do_revalidate(dentry, nd);
1182
1195 if (!dentry) { 1183 if (!dentry) {
1196 struct dentry *new; 1184 struct dentry *new;
1197 1185
@@ -1223,7 +1211,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1223{ 1211{
1224 int err; 1212 int err;
1225 1213
1226 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); 1214 err = exec_permission(nd->path.dentry->d_inode);
1227 if (err) 1215 if (err)
1228 return ERR_PTR(err); 1216 return ERR_PTR(err);
1229 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1217 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1273,7 +1261,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1273 if (err) 1261 if (err)
1274 return ERR_PTR(err); 1262 return ERR_PTR(err);
1275 1263
1276 err = inode_permission(base->d_inode, MAY_EXEC); 1264 err = exec_permission(base->d_inode);
1277 if (err) 1265 if (err)
1278 return ERR_PTR(err); 1266 return ERR_PTR(err);
1279 return __lookup_hash(&this, base, NULL); 1267 return __lookup_hash(&this, base, NULL);
@@ -1511,69 +1499,45 @@ int may_open(struct path *path, int acc_mode, int flag)
1511 if (error) 1499 if (error)
1512 return error; 1500 return error;
1513 1501
1514 error = ima_path_check(path, acc_mode ?
1515 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1516 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1517 IMA_COUNT_UPDATE);
1518
1519 if (error)
1520 return error;
1521 /* 1502 /*
1522 * An append-only file must be opened in append mode for writing. 1503 * An append-only file must be opened in append mode for writing.
1523 */ 1504 */
1524 if (IS_APPEND(inode)) { 1505 if (IS_APPEND(inode)) {
1525 error = -EPERM;
1526 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1506 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1527 goto err_out; 1507 return -EPERM;
1528 if (flag & O_TRUNC) 1508 if (flag & O_TRUNC)
1529 goto err_out; 1509 return -EPERM;
1530 } 1510 }
1531 1511
1532 /* O_NOATIME can only be set by the owner or superuser */ 1512 /* O_NOATIME can only be set by the owner or superuser */
1533 if (flag & O_NOATIME) 1513 if (flag & O_NOATIME && !is_owner_or_cap(inode))
1534 if (!is_owner_or_cap(inode)) { 1514 return -EPERM;
1535 error = -EPERM;
1536 goto err_out;
1537 }
1538 1515
1539 /* 1516 /*
1540 * Ensure there are no outstanding leases on the file. 1517 * Ensure there are no outstanding leases on the file.
1541 */ 1518 */
1542 error = break_lease(inode, flag); 1519 return break_lease(inode, flag);
1543 if (error) 1520}
1544 goto err_out;
1545
1546 if (flag & O_TRUNC) {
1547 error = get_write_access(inode);
1548 if (error)
1549 goto err_out;
1550
1551 /*
1552 * Refuse to truncate files with mandatory locks held on them.
1553 */
1554 error = locks_verify_locked(inode);
1555 if (!error)
1556 error = security_path_truncate(path, 0,
1557 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1558 if (!error) {
1559 vfs_dq_init(inode);
1560
1561 error = do_truncate(dentry, 0,
1562 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1563 NULL);
1564 }
1565 put_write_access(inode);
1566 if (error)
1567 goto err_out;
1568 } else
1569 if (flag & FMODE_WRITE)
1570 vfs_dq_init(inode);
1571 1521
1572 return 0; 1522static int handle_truncate(struct path *path)
1573err_out: 1523{
1574 ima_counts_put(path, acc_mode ? 1524 struct inode *inode = path->dentry->d_inode;
1575 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : 1525 int error = get_write_access(inode);
1576 ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); 1526 if (error)
1527 return error;
1528 /*
1529 * Refuse to truncate files with mandatory locks held on them.
1530 */
1531 error = locks_verify_locked(inode);
1532 if (!error)
1533 error = security_path_truncate(path, 0,
1534 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1535 if (!error) {
1536 error = do_truncate(path->dentry, 0,
1537 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1538 NULL);
1539 }
1540 put_write_access(inode);
1577 return error; 1541 return error;
1578} 1542}
1579 1543
@@ -1628,7 +1592,7 @@ static inline int open_to_namei_flags(int flag)
1628 return flag; 1592 return flag;
1629} 1593}
1630 1594
1631static int open_will_write_to_fs(int flag, struct inode *inode) 1595static int open_will_truncate(int flag, struct inode *inode)
1632{ 1596{
1633 /* 1597 /*
1634 * We'll never write to the fs underlying 1598 * We'll never write to the fs underlying
@@ -1653,8 +1617,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
1653 struct path path; 1617 struct path path;
1654 struct dentry *dir; 1618 struct dentry *dir;
1655 int count = 0; 1619 int count = 0;
1656 int will_write; 1620 int will_truncate;
1657 int flag = open_to_namei_flags(open_flag); 1621 int flag = open_to_namei_flags(open_flag);
1622 int force_reval = 0;
1658 1623
1659 /* 1624 /*
1660 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1625 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1666,7 +1631,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1666 open_flag |= O_DSYNC; 1631 open_flag |= O_DSYNC;
1667 1632
1668 if (!acc_mode) 1633 if (!acc_mode)
1669 acc_mode = MAY_OPEN | ACC_MODE(flag); 1634 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1670 1635
1671 /* O_TRUNC implies we need access checks for write permissions */ 1636 /* O_TRUNC implies we need access checks for write permissions */
1672 if (flag & O_TRUNC) 1637 if (flag & O_TRUNC)
@@ -1681,8 +1646,23 @@ struct file *do_filp_open(int dfd, const char *pathname,
1681 * The simplest case - just a plain lookup. 1646 * The simplest case - just a plain lookup.
1682 */ 1647 */
1683 if (!(flag & O_CREAT)) { 1648 if (!(flag & O_CREAT)) {
1684 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1649 filp = get_empty_filp();
1685 &nd, flag); 1650
1651 if (filp == NULL)
1652 return ERR_PTR(-ENFILE);
1653 nd.intent.open.file = filp;
1654 filp->f_flags = open_flag;
1655 nd.intent.open.flags = flag;
1656 nd.intent.open.create_mode = 0;
1657 error = do_path_lookup(dfd, pathname,
1658 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1659 if (IS_ERR(nd.intent.open.file)) {
1660 if (error == 0) {
1661 error = PTR_ERR(nd.intent.open.file);
1662 path_put(&nd.path);
1663 }
1664 } else if (error)
1665 release_open_intent(&nd);
1686 if (error) 1666 if (error)
1687 return ERR_PTR(error); 1667 return ERR_PTR(error);
1688 goto ok; 1668 goto ok;
@@ -1691,9 +1671,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1691 /* 1671 /*
1692 * Create - we need to know the parent. 1672 * Create - we need to know the parent.
1693 */ 1673 */
1674reval:
1694 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1675 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1695 if (error) 1676 if (error)
1696 return ERR_PTR(error); 1677 return ERR_PTR(error);
1678 if (force_reval)
1679 nd.flags |= LOOKUP_REVAL;
1697 error = path_walk(pathname, &nd); 1680 error = path_walk(pathname, &nd);
1698 if (error) { 1681 if (error) {
1699 if (nd.root.mnt) 1682 if (nd.root.mnt)
@@ -1717,6 +1700,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1717 if (filp == NULL) 1700 if (filp == NULL)
1718 goto exit_parent; 1701 goto exit_parent;
1719 nd.intent.open.file = filp; 1702 nd.intent.open.file = filp;
1703 filp->f_flags = open_flag;
1720 nd.intent.open.flags = flag; 1704 nd.intent.open.flags = flag;
1721 nd.intent.open.create_mode = mode; 1705 nd.intent.open.create_mode = mode;
1722 dir = nd.path.dentry; 1706 dir = nd.path.dentry;
@@ -1757,14 +1741,17 @@ do_last:
1757 mnt_drop_write(nd.path.mnt); 1741 mnt_drop_write(nd.path.mnt);
1758 goto exit; 1742 goto exit;
1759 } 1743 }
1760 filp = nameidata_to_filp(&nd, open_flag); 1744 filp = nameidata_to_filp(&nd);
1761 if (IS_ERR(filp))
1762 ima_counts_put(&nd.path,
1763 acc_mode & (MAY_READ | MAY_WRITE |
1764 MAY_EXEC));
1765 mnt_drop_write(nd.path.mnt); 1745 mnt_drop_write(nd.path.mnt);
1766 if (nd.root.mnt) 1746 if (nd.root.mnt)
1767 path_put(&nd.root); 1747 path_put(&nd.root);
1748 if (!IS_ERR(filp)) {
1749 error = ima_file_check(filp, acc_mode);
1750 if (error) {
1751 fput(filp);
1752 filp = ERR_PTR(error);
1753 }
1754 }
1768 return filp; 1755 return filp;
1769 } 1756 }
1770 1757
@@ -1792,7 +1779,7 @@ do_last:
1792 1779
1793 path_to_nameidata(&path, &nd); 1780 path_to_nameidata(&path, &nd);
1794 error = -EISDIR; 1781 error = -EISDIR;
1795 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1782 if (S_ISDIR(path.dentry->d_inode->i_mode))
1796 goto exit; 1783 goto exit;
1797ok: 1784ok:
1798 /* 1785 /*
@@ -1805,28 +1792,44 @@ ok:
1805 * be avoided. Taking this mnt write here 1792 * be avoided. Taking this mnt write here
1806 * ensures that (2) can not occur. 1793 * ensures that (2) can not occur.
1807 */ 1794 */
1808 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); 1795 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
1809 if (will_write) { 1796 if (will_truncate) {
1810 error = mnt_want_write(nd.path.mnt); 1797 error = mnt_want_write(nd.path.mnt);
1811 if (error) 1798 if (error)
1812 goto exit; 1799 goto exit;
1813 } 1800 }
1814 error = may_open(&nd.path, acc_mode, flag); 1801 error = may_open(&nd.path, acc_mode, flag);
1815 if (error) { 1802 if (error) {
1816 if (will_write) 1803 if (will_truncate)
1817 mnt_drop_write(nd.path.mnt); 1804 mnt_drop_write(nd.path.mnt);
1818 goto exit; 1805 goto exit;
1819 } 1806 }
1820 filp = nameidata_to_filp(&nd, open_flag); 1807 filp = nameidata_to_filp(&nd);
1821 if (IS_ERR(filp)) 1808 if (!IS_ERR(filp)) {
1822 ima_counts_put(&nd.path, 1809 error = ima_file_check(filp, acc_mode);
1823 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); 1810 if (error) {
1811 fput(filp);
1812 filp = ERR_PTR(error);
1813 }
1814 }
1815 if (!IS_ERR(filp)) {
1816 if (acc_mode & MAY_WRITE)
1817 vfs_dq_init(nd.path.dentry->d_inode);
1818
1819 if (will_truncate) {
1820 error = handle_truncate(&nd.path);
1821 if (error) {
1822 fput(filp);
1823 filp = ERR_PTR(error);
1824 }
1825 }
1826 }
1824 /* 1827 /*
1825 * It is now safe to drop the mnt write 1828 * It is now safe to drop the mnt write
1826 * because the filp has had a write taken 1829 * because the filp has had a write taken
1827 * on its behalf. 1830 * on its behalf.
1828 */ 1831 */
1829 if (will_write) 1832 if (will_truncate)
1830 mnt_drop_write(nd.path.mnt); 1833 mnt_drop_write(nd.path.mnt);
1831 if (nd.root.mnt) 1834 if (nd.root.mnt)
1832 path_put(&nd.root); 1835 path_put(&nd.root);
@@ -1864,6 +1867,7 @@ do_link:
1864 if (error) 1867 if (error)
1865 goto exit_dput; 1868 goto exit_dput;
1866 error = __do_follow_link(&path, &nd); 1869 error = __do_follow_link(&path, &nd);
1870 path_put(&path);
1867 if (error) { 1871 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1872 /* Does someone understand code flow here? Or it is only
1869 * me so stupid? Anathema to whoever designed this non-sense 1873 * me so stupid? Anathema to whoever designed this non-sense
@@ -1872,6 +1876,10 @@ do_link:
1872 release_open_intent(&nd); 1876 release_open_intent(&nd);
1873 if (nd.root.mnt) 1877 if (nd.root.mnt)
1874 path_put(&nd.root); 1878 path_put(&nd.root);
1879 if (error == -ESTALE && !force_reval) {
1880 force_reval = 1;
1881 goto reval;
1882 }
1875 return ERR_PTR(error); 1883 return ERR_PTR(error);
1876 } 1884 }
1877 nd.flags &= ~LOOKUP_PARENT; 1885 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..c768f733c8d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5af..59e5673b4597 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
90 If you want your system to mount its root file system via NFS, 90 If you want your system to mount its root file system via NFS,
91 choose Y here. This is common practice for managing systems 91 choose Y here. This is common practice for managing systems
92 without local permanent storage. For details, read 92 without local permanent storage. For details, read
93 <file:Documentation/filesystems/nfsroot.txt>. 93 <file:Documentation/filesystems/nfs/nfsroot.txt>.
94 94
95 Most people say N here. 95 Most people say N here.
96 96
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c5ace4f00a7..3c7f03b669fb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1615,6 +1615,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1615 goto out; 1615 goto out;
1616 1616
1617 new_dentry = dentry; 1617 new_dentry = dentry;
1618 rehash = NULL;
1618 new_inode = NULL; 1619 new_inode = NULL;
1619 } 1620 }
1620 } 1621 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e1d415e97849..0d289823e856 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
342 data->res.fattr = &data->fattr; 342 data->res.fattr = &data->fattr;
343 data->res.eof = 0; 343 data->res.eof = 0;
344 data->res.count = bytes; 344 data->res.count = bytes;
345 nfs_fattr_init(&data->fattr);
345 msg.rpc_argp = &data->args; 346 msg.rpc_argp = &data->args;
346 msg.rpc_resp = &data->res; 347 msg.rpc_resp = &data->res;
347 348
@@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
575 data->res.count = 0; 576 data->res.count = 0;
576 data->res.fattr = &data->fattr; 577 data->res.fattr = &data->fattr;
577 data->res.verf = &data->verf; 578 data->res.verf = &data->verf;
579 nfs_fattr_init(&data->fattr);
578 580
579 NFS_PROTO(data->inode)->commit_setup(data, &msg); 581 NFS_PROTO(data->inode)->commit_setup(data, &msg);
580 582
@@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
766 data->res.fattr = &data->fattr; 768 data->res.fattr = &data->fattr;
767 data->res.count = bytes; 769 data->res.count = bytes;
768 data->res.verf = &data->verf; 770 data->res.verf = &data->verf;
771 nfs_fattr_init(&data->fattr);
769 772
770 task_setup_data.task = &data->task; 773 task_setup_data.task = &data->task;
771 task_setup_data.callback_data = data; 774 task_setup_data.callback_data = data;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6b891328f332..63f2071d6445 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
486{ 486{
487 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 487 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
488 488
489 if (gfp & __GFP_WAIT)
490 nfs_wb_page(page->mapping->host, page);
489 /* If PagePrivate() is set, then the page is not freeable */ 491 /* If PagePrivate() is set, then the page is not freeable */
490 if (PagePrivate(page)) 492 if (PagePrivate(page))
491 return 0; 493 return 0;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index fa588006588d..237874f1af23 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
354 */ 354 */
355int nfs_fscache_release_page(struct page *page, gfp_t gfp) 355int nfs_fscache_release_page(struct page *page, gfp_t gfp)
356{ 356{
357 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
358 struct fscache_cookie *cookie = nfsi->fscache;
359
360 BUG_ON(!cookie);
361
362 if (PageFsCache(page)) { 357 if (PageFsCache(page)) {
358 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
359 struct fscache_cookie *cookie = nfsi->fscache;
360
361 BUG_ON(!cookie);
363 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", 362 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
364 cookie, page, nfsi); 363 cookie, page, nfsi);
365 364
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index faa091865ad0..f141bde7756a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1261 1261
1262 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1262 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1263 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { 1263 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1264 umode_t newmode = inode->i_mode & S_IFMT;
1265 newmode |= fattr->mode & S_IALLUGO;
1266 inode->i_mode = newmode;
1264 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1267 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1265 inode->i_mode = fattr->mode;
1266 } 1268 }
1267 } else if (server->caps & NFS_CAP_MODE) 1269 } else if (server->caps & NFS_CAP_MODE)
1268 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1270 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0adefc40cc89..59047f8d7d72 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -120,7 +120,7 @@ static struct {
120 { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, 120 { .status = MNT3ERR_INVAL, .errno = -EINVAL, },
121 { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, 121 { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, },
122 { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, 122 { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, },
123 { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, 123 { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, },
124}; 124};
125 125
126struct mountres { 126struct mountres {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5e078b222b4e..7bc2da8efd4a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -699,7 +699,7 @@ static struct {
699 { NFSERR_BAD_COOKIE, -EBADCOOKIE }, 699 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
700 { NFSERR_NOTSUPP, -ENOTSUPP }, 700 { NFSERR_NOTSUPP, -ENOTSUPP },
701 { NFSERR_TOOSMALL, -ETOOSMALL }, 701 { NFSERR_TOOSMALL, -ETOOSMALL },
702 { NFSERR_SERVERFAULT, -ESERVERFAULT }, 702 { NFSERR_SERVERFAULT, -EREMOTEIO },
703 { NFSERR_BADTYPE, -EBADTYPE }, 703 { NFSERR_BADTYPE, -EBADTYPE },
704 { NFSERR_JUKEBOX, -EJUKEBOX }, 704 { NFSERR_JUKEBOX, -EJUKEBOX },
705 { -1, -EIO } 705 { -1, -EIO }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7e57b04e4014..0c6fda33d66e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -108,6 +108,10 @@ enum {
108 NFS_OWNER_RECLAIM_NOGRACE 108 NFS_OWNER_RECLAIM_NOGRACE
109}; 109};
110 110
111#define NFS_LOCK_NEW 0
112#define NFS_LOCK_RECLAIM 1
113#define NFS_LOCK_EXPIRED 2
114
111/* 115/*
112 * struct nfs4_state maintains the client-side state for a given 116 * struct nfs4_state maintains the client-side state for a given
113 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). 117 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -142,6 +146,7 @@ enum {
142 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ 146 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
143 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ 147 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
144 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ 148 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
149 NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
145}; 150};
146 151
147struct nfs4_state { 152struct nfs4_state {
@@ -273,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
273extern void nfs4_schedule_state_recovery(struct nfs_client *); 278extern void nfs4_schedule_state_recovery(struct nfs_client *);
274extern void nfs4_schedule_state_manager(struct nfs_client *); 279extern void nfs4_schedule_state_manager(struct nfs_client *);
275extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 280extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
281extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
276extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 282extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
277extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 283extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
278extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 284extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
@@ -282,6 +288,7 @@ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
282extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 288extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
283extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); 289extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
284extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 290extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
291extern void nfs_release_seqid(struct nfs_seqid *seqid);
285extern void nfs_free_seqid(struct nfs_seqid *seqid); 292extern void nfs_free_seqid(struct nfs_seqid *seqid);
286 293
287extern const nfs4_stateid zero_stateid; 294extern const nfs4_stateid zero_stateid;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9f5f11ecfd93..375f0fae2c6a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64 64
65struct nfs4_opendata; 65struct nfs4_opendata;
66static int _nfs4_proc_open(struct nfs4_opendata *data); 66static int _nfs4_proc_open(struct nfs4_opendata *data);
67static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
67static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
68static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 69static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
69static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 70static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
@@ -248,19 +249,15 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
248 if (state == NULL) 249 if (state == NULL)
249 break; 250 break;
250 nfs4_state_mark_reclaim_nograce(clp, state); 251 nfs4_state_mark_reclaim_nograce(clp, state);
251 case -NFS4ERR_STALE_CLIENTID: 252 goto do_state_recovery;
252 case -NFS4ERR_STALE_STATEID: 253 case -NFS4ERR_STALE_STATEID:
253 case -NFS4ERR_EXPIRED: 254 if (state == NULL)
254 nfs4_schedule_state_recovery(clp);
255 ret = nfs4_wait_clnt_recover(clp);
256 if (ret == 0)
257 exception->retry = 1;
258#if !defined(CONFIG_NFS_V4_1)
259 break;
260#else /* !defined(CONFIG_NFS_V4_1) */
261 if (!nfs4_has_session(server->nfs_client))
262 break; 255 break;
263 /* FALLTHROUGH */ 256 nfs4_state_mark_reclaim_reboot(clp, state);
257 case -NFS4ERR_STALE_CLIENTID:
258 case -NFS4ERR_EXPIRED:
259 goto do_state_recovery;
260#if defined(CONFIG_NFS_V4_1)
264 case -NFS4ERR_BADSESSION: 261 case -NFS4ERR_BADSESSION:
265 case -NFS4ERR_BADSLOT: 262 case -NFS4ERR_BADSLOT:
266 case -NFS4ERR_BAD_HIGH_SLOT: 263 case -NFS4ERR_BAD_HIGH_SLOT:
@@ -273,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
273 nfs4_schedule_state_recovery(clp); 270 nfs4_schedule_state_recovery(clp);
274 exception->retry = 1; 271 exception->retry = 1;
275 break; 272 break;
276#endif /* !defined(CONFIG_NFS_V4_1) */ 273#endif /* defined(CONFIG_NFS_V4_1) */
277 case -NFS4ERR_FILE_OPEN: 274 case -NFS4ERR_FILE_OPEN:
278 if (exception->timeout > HZ) { 275 if (exception->timeout > HZ) {
279 /* We have retried a decent amount, time to 276 /* We have retried a decent amount, time to
@@ -292,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
292 } 289 }
293 /* We failed to handle the error */ 290 /* We failed to handle the error */
294 return nfs4_map_errors(ret); 291 return nfs4_map_errors(ret);
292do_state_recovery:
293 nfs4_schedule_state_recovery(clp);
294 ret = nfs4_wait_clnt_recover(clp);
295 if (ret == 0)
296 exception->retry = 1;
297 return ret;
295} 298}
296 299
297 300
@@ -341,6 +344,27 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
341 free_slotid, tbl->highest_used_slotid); 344 free_slotid, tbl->highest_used_slotid);
342} 345}
343 346
347/*
348 * Signal state manager thread if session is drained
349 */
350static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
351{
352 struct rpc_task *task;
353
354 if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
355 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
356 if (task)
357 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
358 return;
359 }
360
361 if (ses->fc_slot_table.highest_used_slotid != -1)
362 return;
363
364 dprintk("%s COMPLETE: Session Drained\n", __func__);
365 complete(&ses->complete);
366}
367
344static void nfs41_sequence_free_slot(const struct nfs_client *clp, 368static void nfs41_sequence_free_slot(const struct nfs_client *clp,
345 struct nfs4_sequence_res *res) 369 struct nfs4_sequence_res *res)
346{ 370{
@@ -356,15 +380,7 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp,
356 380
357 spin_lock(&tbl->slot_tbl_lock); 381 spin_lock(&tbl->slot_tbl_lock);
358 nfs4_free_slot(tbl, res->sr_slotid); 382 nfs4_free_slot(tbl, res->sr_slotid);
359 383 nfs41_check_drain_session_complete(clp->cl_session);
360 /* Signal state manager thread if session is drained */
361 if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
362 if (tbl->highest_used_slotid == -1) {
363 dprintk("%s COMPLETE: Session Drained\n", __func__);
364 complete(&clp->cl_session->complete);
365 }
366 } else
367 rpc_wake_up_next(&tbl->slot_tbl_waitq);
368 spin_unlock(&tbl->slot_tbl_lock); 384 spin_unlock(&tbl->slot_tbl_lock);
369 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 385 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
370} 386}
@@ -421,7 +437,7 @@ out:
421 * Note: must be called with under the slot_tbl_lock. 437 * Note: must be called with under the slot_tbl_lock.
422 */ 438 */
423static u8 439static u8
424nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) 440nfs4_find_slot(struct nfs4_slot_table *tbl)
425{ 441{
426 int slotid; 442 int slotid;
427 u8 ret_id = NFS4_MAX_SLOT_TABLE; 443 u8 ret_id = NFS4_MAX_SLOT_TABLE;
@@ -463,7 +479,8 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
463 tbl = &session->fc_slot_table; 479 tbl = &session->fc_slot_table;
464 480
465 spin_lock(&tbl->slot_tbl_lock); 481 spin_lock(&tbl->slot_tbl_lock);
466 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state)) { 482 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
483 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
467 /* 484 /*
468 * The state manager will wait until the slot table is empty. 485 * The state manager will wait until the slot table is empty.
469 * Schedule the reset thread 486 * Schedule the reset thread
@@ -474,7 +491,15 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
474 return -EAGAIN; 491 return -EAGAIN;
475 } 492 }
476 493
477 slotid = nfs4_find_slot(tbl, task); 494 if (!rpc_queue_empty(&tbl->slot_tbl_waitq) &&
495 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
496 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
497 spin_unlock(&tbl->slot_tbl_lock);
498 dprintk("%s enforce FIFO order\n", __func__);
499 return -EAGAIN;
500 }
501
502 slotid = nfs4_find_slot(tbl);
478 if (slotid == NFS4_MAX_SLOT_TABLE) { 503 if (slotid == NFS4_MAX_SLOT_TABLE) {
479 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 504 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
480 spin_unlock(&tbl->slot_tbl_lock); 505 spin_unlock(&tbl->slot_tbl_lock);
@@ -483,6 +508,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
483 } 508 }
484 spin_unlock(&tbl->slot_tbl_lock); 509 spin_unlock(&tbl->slot_tbl_lock);
485 510
511 rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
486 slot = tbl->slots + slotid; 512 slot = tbl->slots + slotid;
487 args->sa_session = session; 513 args->sa_session = session;
488 args->sa_slotid = slotid; 514 args->sa_slotid = slotid;
@@ -545,6 +571,12 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
545 rpc_call_start(task); 571 rpc_call_start(task);
546} 572}
547 573
574static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
575{
576 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
577 nfs41_call_sync_prepare(task, calldata);
578}
579
548static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) 580static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
549{ 581{
550 struct nfs41_call_sync_data *data = calldata; 582 struct nfs41_call_sync_data *data = calldata;
@@ -557,12 +589,18 @@ struct rpc_call_ops nfs41_call_sync_ops = {
557 .rpc_call_done = nfs41_call_sync_done, 589 .rpc_call_done = nfs41_call_sync_done,
558}; 590};
559 591
592struct rpc_call_ops nfs41_call_priv_sync_ops = {
593 .rpc_call_prepare = nfs41_call_priv_sync_prepare,
594 .rpc_call_done = nfs41_call_sync_done,
595};
596
560static int nfs4_call_sync_sequence(struct nfs_client *clp, 597static int nfs4_call_sync_sequence(struct nfs_client *clp,
561 struct rpc_clnt *clnt, 598 struct rpc_clnt *clnt,
562 struct rpc_message *msg, 599 struct rpc_message *msg,
563 struct nfs4_sequence_args *args, 600 struct nfs4_sequence_args *args,
564 struct nfs4_sequence_res *res, 601 struct nfs4_sequence_res *res,
565 int cache_reply) 602 int cache_reply,
603 int privileged)
566{ 604{
567 int ret; 605 int ret;
568 struct rpc_task *task; 606 struct rpc_task *task;
@@ -580,6 +618,8 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
580 }; 618 };
581 619
582 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 620 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
621 if (privileged)
622 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
583 task = rpc_run_task(&task_setup); 623 task = rpc_run_task(&task_setup);
584 if (IS_ERR(task)) 624 if (IS_ERR(task))
585 ret = PTR_ERR(task); 625 ret = PTR_ERR(task);
@@ -597,7 +637,7 @@ int _nfs4_call_sync_session(struct nfs_server *server,
597 int cache_reply) 637 int cache_reply)
598{ 638{
599 return nfs4_call_sync_sequence(server->nfs_client, server->client, 639 return nfs4_call_sync_sequence(server->nfs_client, server->client,
600 msg, args, res, cache_reply); 640 msg, args, res, cache_reply, 0);
601} 641}
602 642
603#endif /* CONFIG_NFS_V4_1 */ 643#endif /* CONFIG_NFS_V4_1 */
@@ -1035,7 +1075,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1035 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 1075 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1036 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 1076 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1037 nfs4_init_opendata_res(opendata); 1077 nfs4_init_opendata_res(opendata);
1038 ret = _nfs4_proc_open(opendata); 1078 ret = _nfs4_recover_proc_open(opendata);
1039 if (ret != 0) 1079 if (ret != 0)
1040 return ret; 1080 return ret;
1041 newstate = nfs4_opendata_to_nfs4_state(opendata); 1081 newstate = nfs4_opendata_to_nfs4_state(opendata);
@@ -1326,6 +1366,12 @@ out_no_action:
1326 1366
1327} 1367}
1328 1368
1369static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
1370{
1371 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
1372 nfs4_open_prepare(task, calldata);
1373}
1374
1329static void nfs4_open_done(struct rpc_task *task, void *calldata) 1375static void nfs4_open_done(struct rpc_task *task, void *calldata)
1330{ 1376{
1331 struct nfs4_opendata *data = calldata; 1377 struct nfs4_opendata *data = calldata;
@@ -1384,10 +1430,13 @@ static const struct rpc_call_ops nfs4_open_ops = {
1384 .rpc_release = nfs4_open_release, 1430 .rpc_release = nfs4_open_release,
1385}; 1431};
1386 1432
1387/* 1433static const struct rpc_call_ops nfs4_recover_open_ops = {
1388 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata 1434 .rpc_call_prepare = nfs4_recover_open_prepare,
1389 */ 1435 .rpc_call_done = nfs4_open_done,
1390static int _nfs4_proc_open(struct nfs4_opendata *data) 1436 .rpc_release = nfs4_open_release,
1437};
1438
1439static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
1391{ 1440{
1392 struct inode *dir = data->dir->d_inode; 1441 struct inode *dir = data->dir->d_inode;
1393 struct nfs_server *server = NFS_SERVER(dir); 1442 struct nfs_server *server = NFS_SERVER(dir);
@@ -1414,21 +1463,57 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1414 data->rpc_done = 0; 1463 data->rpc_done = 0;
1415 data->rpc_status = 0; 1464 data->rpc_status = 0;
1416 data->cancelled = 0; 1465 data->cancelled = 0;
1466 if (isrecover)
1467 task_setup_data.callback_ops = &nfs4_recover_open_ops;
1417 task = rpc_run_task(&task_setup_data); 1468 task = rpc_run_task(&task_setup_data);
1418 if (IS_ERR(task)) 1469 if (IS_ERR(task))
1419 return PTR_ERR(task); 1470 return PTR_ERR(task);
1420 status = nfs4_wait_for_completion_rpc_task(task); 1471 status = nfs4_wait_for_completion_rpc_task(task);
1421 if (status != 0) { 1472 if (status != 0) {
1422 data->cancelled = 1; 1473 data->cancelled = 1;
1423 smp_wmb(); 1474 smp_wmb();
1424 } else 1475 } else
1425 status = data->rpc_status; 1476 status = data->rpc_status;
1426 rpc_put_task(task); 1477 rpc_put_task(task);
1478
1479 return status;
1480}
1481
1482static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1483{
1484 struct inode *dir = data->dir->d_inode;
1485 struct nfs_openres *o_res = &data->o_res;
1486 int status;
1487
1488 status = nfs4_run_open_task(data, 1);
1427 if (status != 0 || !data->rpc_done) 1489 if (status != 0 || !data->rpc_done)
1428 return status; 1490 return status;
1429 1491
1430 if (o_res->fh.size == 0) 1492 nfs_refresh_inode(dir, o_res->dir_attr);
1431 _nfs4_proc_lookup(dir, o_arg->name, &o_res->fh, o_res->f_attr); 1493
1494 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1495 status = _nfs4_proc_open_confirm(data);
1496 if (status != 0)
1497 return status;
1498 }
1499
1500 return status;
1501}
1502
1503/*
1504 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
1505 */
1506static int _nfs4_proc_open(struct nfs4_opendata *data)
1507{
1508 struct inode *dir = data->dir->d_inode;
1509 struct nfs_server *server = NFS_SERVER(dir);
1510 struct nfs_openargs *o_arg = &data->o_arg;
1511 struct nfs_openres *o_res = &data->o_res;
1512 int status;
1513
1514 status = nfs4_run_open_task(data, 0);
1515 if (status != 0 || !data->rpc_done)
1516 return status;
1432 1517
1433 if (o_arg->open_flags & O_CREAT) { 1518 if (o_arg->open_flags & O_CREAT) {
1434 update_changeattr(dir, &o_res->cinfo); 1519 update_changeattr(dir, &o_res->cinfo);
@@ -1575,6 +1660,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1575 status = PTR_ERR(state); 1660 status = PTR_ERR(state);
1576 if (IS_ERR(state)) 1661 if (IS_ERR(state))
1577 goto err_opendata_put; 1662 goto err_opendata_put;
1663 if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
1664 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1578 nfs4_opendata_put(opendata); 1665 nfs4_opendata_put(opendata);
1579 nfs4_put_state_owner(sp); 1666 nfs4_put_state_owner(sp);
1580 *res = state; 1667 *res = state;
@@ -1752,11 +1839,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1752 if (calldata->arg.fmode == 0) 1839 if (calldata->arg.fmode == 0)
1753 break; 1840 break;
1754 default: 1841 default:
1755 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { 1842 if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
1756 nfs_restart_rpc(task, server->nfs_client); 1843 rpc_restart_call_prepare(task);
1757 return;
1758 }
1759 } 1844 }
1845 nfs_release_seqid(calldata->arg.seqid);
1760 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 1846 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
1761} 1847}
1762 1848
@@ -1848,8 +1934,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1848 calldata->state = state; 1934 calldata->state = state;
1849 calldata->arg.fh = NFS_FH(state->inode); 1935 calldata->arg.fh = NFS_FH(state->inode);
1850 calldata->arg.stateid = &state->open_stateid; 1936 calldata->arg.stateid = &state->open_stateid;
1851 if (nfs4_has_session(server->nfs_client))
1852 memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */
1853 /* Serialization for the sequence id */ 1937 /* Serialization for the sequence id */
1854 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1938 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1855 if (calldata->arg.seqid == NULL) 1939 if (calldata->arg.seqid == NULL)
@@ -3342,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3342 if (state == NULL) 3426 if (state == NULL)
3343 break; 3427 break;
3344 nfs4_state_mark_reclaim_nograce(clp, state); 3428 nfs4_state_mark_reclaim_nograce(clp, state);
3345 case -NFS4ERR_STALE_CLIENTID: 3429 goto do_state_recovery;
3346 case -NFS4ERR_STALE_STATEID: 3430 case -NFS4ERR_STALE_STATEID:
3431 if (state == NULL)
3432 break;
3433 nfs4_state_mark_reclaim_reboot(clp, state);
3434 case -NFS4ERR_STALE_CLIENTID:
3347 case -NFS4ERR_EXPIRED: 3435 case -NFS4ERR_EXPIRED:
3348 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3436 goto do_state_recovery;
3349 nfs4_schedule_state_recovery(clp);
3350 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3351 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3352 task->tk_status = 0;
3353 return -EAGAIN;
3354#if defined(CONFIG_NFS_V4_1) 3437#if defined(CONFIG_NFS_V4_1)
3355 case -NFS4ERR_BADSESSION: 3438 case -NFS4ERR_BADSESSION:
3356 case -NFS4ERR_BADSLOT: 3439 case -NFS4ERR_BADSLOT:
@@ -3378,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3378 } 3461 }
3379 task->tk_status = nfs4_map_errors(task->tk_status); 3462 task->tk_status = nfs4_map_errors(task->tk_status);
3380 return 0; 3463 return 0;
3464do_state_recovery:
3465 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3466 nfs4_schedule_state_recovery(clp);
3467 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3468 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3469 task->tk_status = 0;
3470 return -EAGAIN;
3381} 3471}
3382 3472
3383static int 3473static int
@@ -3941,6 +4031,12 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
3941 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4031 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
3942} 4032}
3943 4033
4034static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
4035{
4036 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4037 nfs4_lock_prepare(task, calldata);
4038}
4039
3944static void nfs4_lock_done(struct rpc_task *task, void *calldata) 4040static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3945{ 4041{
3946 struct nfs4_lockdata *data = calldata; 4042 struct nfs4_lockdata *data = calldata;
@@ -3996,7 +4092,35 @@ static const struct rpc_call_ops nfs4_lock_ops = {
3996 .rpc_release = nfs4_lock_release, 4092 .rpc_release = nfs4_lock_release,
3997}; 4093};
3998 4094
3999static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) 4095static const struct rpc_call_ops nfs4_recover_lock_ops = {
4096 .rpc_call_prepare = nfs4_recover_lock_prepare,
4097 .rpc_call_done = nfs4_lock_done,
4098 .rpc_release = nfs4_lock_release,
4099};
4100
4101static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4102{
4103 struct nfs_client *clp = server->nfs_client;
4104 struct nfs4_state *state = lsp->ls_state;
4105
4106 switch (error) {
4107 case -NFS4ERR_ADMIN_REVOKED:
4108 case -NFS4ERR_BAD_STATEID:
4109 case -NFS4ERR_EXPIRED:
4110 if (new_lock_owner != 0 ||
4111 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4112 nfs4_state_mark_reclaim_nograce(clp, state);
4113 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4114 break;
4115 case -NFS4ERR_STALE_STATEID:
4116 if (new_lock_owner != 0 ||
4117 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4118 nfs4_state_mark_reclaim_reboot(clp, state);
4119 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4120 };
4121}
4122
4123static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
4000{ 4124{
4001 struct nfs4_lockdata *data; 4125 struct nfs4_lockdata *data;
4002 struct rpc_task *task; 4126 struct rpc_task *task;
@@ -4020,8 +4144,11 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4020 return -ENOMEM; 4144 return -ENOMEM;
4021 if (IS_SETLKW(cmd)) 4145 if (IS_SETLKW(cmd))
4022 data->arg.block = 1; 4146 data->arg.block = 1;
4023 if (reclaim != 0) 4147 if (recovery_type > NFS_LOCK_NEW) {
4024 data->arg.reclaim = 1; 4148 if (recovery_type == NFS_LOCK_RECLAIM)
4149 data->arg.reclaim = NFS_LOCK_RECLAIM;
4150 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4151 }
4025 msg.rpc_argp = &data->arg, 4152 msg.rpc_argp = &data->arg,
4026 msg.rpc_resp = &data->res, 4153 msg.rpc_resp = &data->res,
4027 task_setup_data.callback_data = data; 4154 task_setup_data.callback_data = data;
@@ -4031,6 +4158,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4031 ret = nfs4_wait_for_completion_rpc_task(task); 4158 ret = nfs4_wait_for_completion_rpc_task(task);
4032 if (ret == 0) { 4159 if (ret == 0) {
4033 ret = data->rpc_status; 4160 ret = data->rpc_status;
4161 if (ret)
4162 nfs4_handle_setlk_error(data->server, data->lsp,
4163 data->arg.new_lock_owner, ret);
4034 } else 4164 } else
4035 data->cancelled = 1; 4165 data->cancelled = 1;
4036 rpc_put_task(task); 4166 rpc_put_task(task);
@@ -4048,7 +4178,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4048 /* Cache the lock if possible... */ 4178 /* Cache the lock if possible... */
4049 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4179 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4050 return 0; 4180 return 0;
4051 err = _nfs4_do_setlk(state, F_SETLK, request, 1); 4181 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4052 if (err != -NFS4ERR_DELAY) 4182 if (err != -NFS4ERR_DELAY)
4053 break; 4183 break;
4054 nfs4_handle_exception(server, err, &exception); 4184 nfs4_handle_exception(server, err, &exception);
@@ -4068,7 +4198,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4068 do { 4198 do {
4069 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4199 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4070 return 0; 4200 return 0;
4071 err = _nfs4_do_setlk(state, F_SETLK, request, 0); 4201 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
4072 switch (err) { 4202 switch (err) {
4073 default: 4203 default:
4074 goto out; 4204 goto out;
@@ -4086,8 +4216,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4086{ 4216{
4087 struct nfs_inode *nfsi = NFS_I(state->inode); 4217 struct nfs_inode *nfsi = NFS_I(state->inode);
4088 unsigned char fl_flags = request->fl_flags; 4218 unsigned char fl_flags = request->fl_flags;
4089 int status; 4219 int status = -ENOLCK;
4090 4220
4221 if ((fl_flags & FL_POSIX) &&
4222 !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
4223 goto out;
4091 /* Is this a delegated open? */ 4224 /* Is this a delegated open? */
4092 status = nfs4_set_lock_state(state, request); 4225 status = nfs4_set_lock_state(state, request);
4093 if (status != 0) 4226 if (status != 0)
@@ -4104,7 +4237,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4104 status = do_vfs_lock(request->fl_file, request); 4237 status = do_vfs_lock(request->fl_file, request);
4105 goto out_unlock; 4238 goto out_unlock;
4106 } 4239 }
4107 status = _nfs4_do_setlk(state, cmd, request, 0); 4240 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4108 if (status != 0) 4241 if (status != 0)
4109 goto out_unlock; 4242 goto out_unlock;
4110 /* Note: we always want to sleep here! */ 4243 /* Note: we always want to sleep here! */
@@ -4187,7 +4320,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4187 if (err != 0) 4320 if (err != 0)
4188 goto out; 4321 goto out;
4189 do { 4322 do {
4190 err = _nfs4_do_setlk(state, F_SETLK, fl, 0); 4323 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4191 switch (err) { 4324 switch (err) {
4192 default: 4325 default:
4193 printk(KERN_ERR "%s: unhandled error %d.\n", 4326 printk(KERN_ERR "%s: unhandled error %d.\n",
@@ -4395,11 +4528,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4395 (struct nfs4_get_lease_time_data *)calldata; 4528 (struct nfs4_get_lease_time_data *)calldata;
4396 4529
4397 dprintk("--> %s\n", __func__); 4530 dprintk("--> %s\n", __func__);
4531 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4398 /* just setup sequence, do not trigger session recovery 4532 /* just setup sequence, do not trigger session recovery
4399 since we're invoked within one */ 4533 since we're invoked within one */
4400 ret = nfs41_setup_sequence(data->clp->cl_session, 4534 ret = nfs41_setup_sequence(data->clp->cl_session,
4401 &data->args->la_seq_args, 4535 &data->args->la_seq_args,
4402 &data->res->lr_seq_res, 0, task); 4536 &data->res->lr_seq_res, 0, task);
4403 4537
4404 BUG_ON(ret == -EAGAIN); 4538 BUG_ON(ret == -EAGAIN);
4405 rpc_call_start(task); 4539 rpc_call_start(task);
@@ -4619,7 +4753,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4619 tbl = &session->fc_slot_table; 4753 tbl = &session->fc_slot_table;
4620 tbl->highest_used_slotid = -1; 4754 tbl->highest_used_slotid = -1;
4621 spin_lock_init(&tbl->slot_tbl_lock); 4755 spin_lock_init(&tbl->slot_tbl_lock);
4622 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 4756 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4623 4757
4624 tbl = &session->bc_slot_table; 4758 tbl = &session->bc_slot_table;
4625 tbl->highest_used_slotid = -1; 4759 tbl->highest_used_slotid = -1;
@@ -4838,14 +4972,22 @@ int nfs4_init_session(struct nfs_server *server)
4838{ 4972{
4839 struct nfs_client *clp = server->nfs_client; 4973 struct nfs_client *clp = server->nfs_client;
4840 struct nfs4_session *session; 4974 struct nfs4_session *session;
4975 unsigned int rsize, wsize;
4841 int ret; 4976 int ret;
4842 4977
4843 if (!nfs4_has_session(clp)) 4978 if (!nfs4_has_session(clp))
4844 return 0; 4979 return 0;
4845 4980
4981 rsize = server->rsize;
4982 if (rsize == 0)
4983 rsize = NFS_MAX_FILE_IO_SIZE;
4984 wsize = server->wsize;
4985 if (wsize == 0)
4986 wsize = NFS_MAX_FILE_IO_SIZE;
4987
4846 session = clp->cl_session; 4988 session = clp->cl_session;
4847 session->fc_attrs.max_rqst_sz = server->wsize + nfs41_maxwrite_overhead; 4989 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
4848 session->fc_attrs.max_resp_sz = server->rsize + nfs41_maxread_overhead; 4990 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
4849 4991
4850 ret = nfs4_recover_expired_lease(server); 4992 ret = nfs4_recover_expired_lease(server);
4851 if (!ret) 4993 if (!ret)
@@ -4871,7 +5013,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
4871 args.sa_cache_this = 0; 5013 args.sa_cache_this = 0;
4872 5014
4873 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, 5015 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
4874 &res, 0); 5016 &res, args.sa_cache_this, 1);
4875} 5017}
4876 5018
4877void nfs41_sequence_call_done(struct rpc_task *task, void *data) 5019void nfs41_sequence_call_done(struct rpc_task *task, void *data)
@@ -4953,6 +5095,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
4953{ 5095{
4954 struct nfs4_reclaim_complete_data *calldata = data; 5096 struct nfs4_reclaim_complete_data *calldata = data;
4955 5097
5098 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4956 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, 5099 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
4957 &calldata->res.seq_res, 0, task)) 5100 &calldata->res.seq_res, 0, task))
4958 return; 5101 return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e76427e6346f..c1e2733f4fa4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -135,16 +135,30 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
135 return status; 135 return status;
136} 136}
137 137
138static void nfs41_end_drain_session(struct nfs_client *clp, 138static void nfs4_end_drain_session(struct nfs_client *clp)
139 struct nfs4_session *ses)
140{ 139{
141 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) 140 struct nfs4_session *ses = clp->cl_session;
142 rpc_wake_up(&ses->fc_slot_table.slot_tbl_waitq); 141 int max_slots;
142
143 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
144 spin_lock(&ses->fc_slot_table.slot_tbl_lock);
145 max_slots = ses->fc_slot_table.max_slots;
146 while (max_slots--) {
147 struct rpc_task *task;
148
149 task = rpc_wake_up_next(&ses->fc_slot_table.
150 slot_tbl_waitq);
151 if (!task)
152 break;
153 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
154 }
155 spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
156 }
143} 157}
144 158
145static int nfs41_begin_drain_session(struct nfs_client *clp, 159static int nfs4_begin_drain_session(struct nfs_client *clp)
146 struct nfs4_session *ses)
147{ 160{
161 struct nfs4_session *ses = clp->cl_session;
148 struct nfs4_slot_table *tbl = &ses->fc_slot_table; 162 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
149 163
150 spin_lock(&tbl->slot_tbl_lock); 164 spin_lock(&tbl->slot_tbl_lock);
@@ -162,16 +176,13 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
162{ 176{
163 int status; 177 int status;
164 178
165 status = nfs41_begin_drain_session(clp, clp->cl_session); 179 nfs4_begin_drain_session(clp);
166 if (status != 0)
167 goto out;
168 status = nfs4_proc_exchange_id(clp, cred); 180 status = nfs4_proc_exchange_id(clp, cred);
169 if (status != 0) 181 if (status != 0)
170 goto out; 182 goto out;
171 status = nfs4_proc_create_session(clp); 183 status = nfs4_proc_create_session(clp);
172 if (status != 0) 184 if (status != 0)
173 goto out; 185 goto out;
174 nfs41_end_drain_session(clp, clp->cl_session);
175 nfs41_setup_state_renewal(clp); 186 nfs41_setup_state_renewal(clp);
176 nfs_mark_client_ready(clp, NFS_CS_READY); 187 nfs_mark_client_ready(clp, NFS_CS_READY);
177out: 188out:
@@ -755,16 +766,21 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
755 return new; 766 return new;
756} 767}
757 768
758void nfs_free_seqid(struct nfs_seqid *seqid) 769void nfs_release_seqid(struct nfs_seqid *seqid)
759{ 770{
760 if (!list_empty(&seqid->list)) { 771 if (!list_empty(&seqid->list)) {
761 struct rpc_sequence *sequence = seqid->sequence->sequence; 772 struct rpc_sequence *sequence = seqid->sequence->sequence;
762 773
763 spin_lock(&sequence->lock); 774 spin_lock(&sequence->lock);
764 list_del(&seqid->list); 775 list_del_init(&seqid->list);
765 spin_unlock(&sequence->lock); 776 spin_unlock(&sequence->lock);
766 rpc_wake_up(&sequence->wait); 777 rpc_wake_up(&sequence->wait);
767 } 778 }
779}
780
781void nfs_free_seqid(struct nfs_seqid *seqid)
782{
783 nfs_release_seqid(seqid);
768 kfree(seqid); 784 kfree(seqid);
769} 785}
770 786
@@ -885,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
885 nfs4_schedule_state_manager(clp); 901 nfs4_schedule_state_manager(clp);
886} 902}
887 903
888static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 904int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
889{ 905{
890 906
891 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 907 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1257,13 +1273,9 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1257 1273
1258static int nfs4_reset_session(struct nfs_client *clp) 1274static int nfs4_reset_session(struct nfs_client *clp)
1259{ 1275{
1260 struct nfs4_session *ses = clp->cl_session;
1261 int status; 1276 int status;
1262 1277
1263 status = nfs41_begin_drain_session(clp, ses); 1278 nfs4_begin_drain_session(clp);
1264 if (status != 0)
1265 return status;
1266
1267 status = nfs4_proc_destroy_session(clp->cl_session); 1279 status = nfs4_proc_destroy_session(clp->cl_session);
1268 if (status && status != -NFS4ERR_BADSESSION && 1280 if (status && status != -NFS4ERR_BADSESSION &&
1269 status != -NFS4ERR_DEADSESSION) { 1281 status != -NFS4ERR_DEADSESSION) {
@@ -1279,19 +1291,17 @@ static int nfs4_reset_session(struct nfs_client *clp)
1279out: 1291out:
1280 /* 1292 /*
1281 * Let the state manager reestablish state 1293 * Let the state manager reestablish state
1282 * without waking other tasks yet.
1283 */ 1294 */
1284 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1295 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1285 /* Wake up the next rpc task */ 1296 status == 0)
1286 nfs41_end_drain_session(clp, ses); 1297 nfs41_setup_state_renewal(clp);
1287 if (status == 0) 1298
1288 nfs41_setup_state_renewal(clp);
1289 }
1290 return status; 1299 return status;
1291} 1300}
1292 1301
1293#else /* CONFIG_NFS_V4_1 */ 1302#else /* CONFIG_NFS_V4_1 */
1294static int nfs4_reset_session(struct nfs_client *clp) { return 0; } 1303static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1304static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1295#endif /* CONFIG_NFS_V4_1 */ 1305#endif /* CONFIG_NFS_V4_1 */
1296 1306
1297/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors 1307/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1382,6 +1392,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1382 goto out_error; 1392 goto out_error;
1383 } 1393 }
1384 1394
1395 nfs4_end_drain_session(clp);
1385 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { 1396 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
1386 nfs_client_return_marked_delegations(clp); 1397 nfs_client_return_marked_delegations(clp);
1387 continue; 1398 continue;
@@ -1398,6 +1409,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1398out_error: 1409out_error:
1399 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1410 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
1400 " with error %d\n", clp->cl_hostname, -status); 1411 " with error %d\n", clp->cl_hostname, -status);
1412 nfs4_end_drain_session(clp);
1401 nfs4_clear_state_manager_bit(clp); 1413 nfs4_clear_state_manager_bit(clp);
1402} 1414}
1403 1415
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e437fd6a819f..5cd5184b56db 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr,
4631 * If the server returns different values for sessionID, slotID or 4631 * If the server returns different values for sessionID, slotID or
4632 * sequence number, the server is looney tunes. 4632 * sequence number, the server is looney tunes.
4633 */ 4633 */
4634 status = -ESERVERFAULT; 4634 status = -EREMOTEIO;
4635 4635
4636 if (memcmp(id.data, res->sr_session->sess_id.data, 4636 if (memcmp(id.data, res->sr_session->sess_id.data,
4637 NFS4_MAX_SESSIONID_LEN)) { 4637 NFS4_MAX_SESSIONID_LEN)) {
@@ -5774,7 +5774,7 @@ static struct {
5774 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, 5774 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
5775 { NFS4ERR_NOTSUPP, -ENOTSUPP }, 5775 { NFS4ERR_NOTSUPP, -ENOTSUPP },
5776 { NFS4ERR_TOOSMALL, -ETOOSMALL }, 5776 { NFS4ERR_TOOSMALL, -ETOOSMALL },
5777 { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, 5777 { NFS4ERR_SERVERFAULT, -EREMOTEIO },
5778 { NFS4ERR_BADTYPE, -EBADTYPE }, 5778 { NFS4ERR_BADTYPE, -EBADTYPE },
5779 { NFS4ERR_LOCKED, -EAGAIN }, 5779 { NFS4ERR_LOCKED, -EAGAIN },
5780 { NFS4ERR_SYMLINK, -ELOOP }, 5780 { NFS4ERR_SYMLINK, -ELOOP },
@@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat)
5801 } 5801 }
5802 if (stat <= 10000 || stat > 10100) { 5802 if (stat <= 10000 || stat > 10100) {
5803 /* The server is looney tunes. */ 5803 /* The server is looney tunes. */
5804 return -ESERVERFAULT; 5804 return -EREMOTEIO;
5805 } 5805 }
5806 /* If we cannot translate the error, the recovery routines should 5806 /* If we cannot translate the error, the recovery routines should
5807 * handle it. 5807 * handle it.
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e2975939126a..a12c45b65dd4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req)
176 kref_put(&req->wb_kref, nfs_free_request); 176 kref_put(&req->wb_kref, nfs_free_request);
177} 177}
178 178
179static int nfs_wait_bit_uninterruptible(void *word)
180{
181 io_schedule();
182 return 0;
183}
184
179/** 185/**
180 * nfs_wait_on_request - Wait for a request to complete. 186 * nfs_wait_on_request - Wait for a request to complete.
181 * @req: request to wait upon. 187 * @req: request to wait upon.
@@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req)
186int 192int
187nfs_wait_on_request(struct nfs_page *req) 193nfs_wait_on_request(struct nfs_page *req)
188{ 194{
189 int ret = 0; 195 return wait_on_bit(&req->wb_flags, PG_BUSY,
190 196 nfs_wait_bit_uninterruptible,
191 if (!test_bit(PG_BUSY, &req->wb_flags)) 197 TASK_UNINTERRUPTIBLE);
192 goto out;
193 ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
194 nfs_wait_bit_killable, TASK_KILLABLE);
195out:
196 return ret;
197} 198}
198 199
199/** 200/**
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc5508..f1afee4eea77 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -243,6 +243,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *);
243static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 243static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
244static int nfs_xdev_get_sb(struct file_system_type *fs_type, 244static int nfs_xdev_get_sb(struct file_system_type *fs_type,
245 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 245 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
246static void nfs_put_super(struct super_block *);
246static void nfs_kill_super(struct super_block *); 247static void nfs_kill_super(struct super_block *);
247static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 248static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
248 249
@@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = {
266 .alloc_inode = nfs_alloc_inode, 267 .alloc_inode = nfs_alloc_inode,
267 .destroy_inode = nfs_destroy_inode, 268 .destroy_inode = nfs_destroy_inode,
268 .write_inode = nfs_write_inode, 269 .write_inode = nfs_write_inode,
270 .put_super = nfs_put_super,
269 .statfs = nfs_statfs, 271 .statfs = nfs_statfs,
270 .clear_inode = nfs_clear_inode, 272 .clear_inode = nfs_clear_inode,
271 .umount_begin = nfs_umount_begin, 273 .umount_begin = nfs_umount_begin,
@@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = {
335 .alloc_inode = nfs_alloc_inode, 337 .alloc_inode = nfs_alloc_inode,
336 .destroy_inode = nfs_destroy_inode, 338 .destroy_inode = nfs_destroy_inode,
337 .write_inode = nfs_write_inode, 339 .write_inode = nfs_write_inode,
340 .put_super = nfs_put_super,
338 .statfs = nfs_statfs, 341 .statfs = nfs_statfs,
339 .clear_inode = nfs4_clear_inode, 342 .clear_inode = nfs4_clear_inode,
340 .umount_begin = nfs_umount_begin, 343 .umount_begin = nfs_umount_begin,
@@ -2258,6 +2261,17 @@ error_splat_super:
2258} 2261}
2259 2262
2260/* 2263/*
2264 * Ensure that we unregister the bdi before kill_anon_super
2265 * releases the device name
2266 */
2267static void nfs_put_super(struct super_block *s)
2268{
2269 struct nfs_server *server = NFS_SB(s);
2270
2271 bdi_unregister(&server->backing_dev_info);
2272}
2273
2274/*
2261 * Destroy an NFS2/3 superblock 2275 * Destroy an NFS2/3 superblock
2262 */ 2276 */
2263static void nfs_kill_super(struct super_block *s) 2277static void nfs_kill_super(struct super_block *s)
@@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s)
2265 struct nfs_server *server = NFS_SB(s); 2279 struct nfs_server *server = NFS_SB(s);
2266 2280
2267 kill_anon_super(s); 2281 kill_anon_super(s);
2268 bdi_unregister(&server->backing_dev_info);
2269 nfs_fscache_release_super_cookie(s); 2282 nfs_fscache_release_super_cookie(s);
2270 nfs_free_server(server); 2283 nfs_free_server(server);
2271} 2284}
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 70e1fbbaaeab..ad4d2e787b20 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -15,8 +15,10 @@
15 15
16#include "callback.h" 16#include "callback.h"
17 17
18#ifdef CONFIG_NFS_V4
18static const int nfs_set_port_min = 0; 19static const int nfs_set_port_min = 0;
19static const int nfs_set_port_max = 65535; 20static const int nfs_set_port_max = 65535;
21#endif
20static struct ctl_table_header *nfs_callback_sysctl_table; 22static struct ctl_table_header *nfs_callback_sysctl_table;
21 23
22static ctl_table nfs_cb_sysctls[] = { 24static ctl_table nfs_cb_sysctls[] = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696017f4..d63d964a0392 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1233 1233
1234 1234
1235#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1235#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1236void nfs_commitdata_release(void *data) 1236static void nfs_commitdata_release(void *data)
1237{ 1237{
1238 struct nfs_write_data *wdata = data; 1238 struct nfs_write_data *wdata = data;
1239 1239
@@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1541 break; 1541 break;
1542 } 1542 }
1543 ret = nfs_wait_on_request(req); 1543 ret = nfs_wait_on_request(req);
1544 nfs_release_request(req);
1544 if (ret < 0) 1545 if (ret < 0)
1545 goto out; 1546 goto out;
1546 } 1547 }
@@ -1597,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1597 struct nfs_page *req; 1598 struct nfs_page *req;
1598 int ret; 1599 int ret;
1599 1600
1600 if (PageFsCache(page)) 1601 nfs_fscache_release_page(page, GFP_KERNEL);
1601 nfs_fscache_release_page(page, GFP_KERNEL);
1602 1602
1603 req = nfs_find_and_lock_request(page); 1603 req = nfs_find_and_lock_request(page);
1604 ret = PTR_ERR(req); 1604 ret = PTR_ERR(req);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 8f9a20556f79..d3854d94b7cf 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -7,8 +7,6 @@
7#include <linux/types.h> 7#include <linux/types.h>
8#include <linux/file.h> 8#include <linux/file.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/sunrpc/svc.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/syscall.h> 10#include <linux/nfsd/syscall.h>
13#include <linux/cred.h> 11#include <linux/cred.h>
14#include <linux/sched.h> 12#include <linux/sched.h>
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 36fcabbf5186..79717a40daba 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -1,15 +1,7 @@
1/* 1/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2 * linux/fs/nfsd/auth.c
3 *
4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
5 */
6 2
7#include <linux/types.h>
8#include <linux/sched.h> 3#include <linux/sched.h>
9#include <linux/sunrpc/svc.h> 4#include "nfsd.h"
10#include <linux/sunrpc/svcauth.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/export.h>
13#include "auth.h" 5#include "auth.h"
14 6
15int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) 7int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 000000000000..d892be61016c
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,83 @@
1/*
2 * Request reply cache. This was heavily inspired by the
3 * implementation in 4.3BSD/4.4BSD.
4 *
5 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef NFSCACHE_H
9#define NFSCACHE_H
10
11#include <linux/sunrpc/svc.h>
12
13/*
14 * Representation of a reply cache entry.
15 */
16struct svc_cacherep {
17 struct hlist_node c_hash;
18 struct list_head c_lru;
19
20 unsigned char c_state, /* unused, inprog, done */
21 c_type, /* status, buffer */
22 c_secure : 1; /* req came from port < 1024 */
23 struct sockaddr_in c_addr;
24 __be32 c_xid;
25 u32 c_prot;
26 u32 c_proc;
27 u32 c_vers;
28 unsigned long c_timestamp;
29 union {
30 struct kvec u_vec;
31 __be32 u_status;
32 } c_u;
33};
34
35#define c_replvec c_u.u_vec
36#define c_replstat c_u.u_status
37
38/* cache entry states */
39enum {
40 RC_UNUSED,
41 RC_INPROG,
42 RC_DONE
43};
44
45/* return values */
46enum {
47 RC_DROPIT,
48 RC_REPLY,
49 RC_DOIT,
50 RC_INTR
51};
52
53/*
54 * Cache types.
55 * We may want to add more types one day, e.g. for diropres and
56 * attrstat replies. Using cache entries with fixed length instead
57 * of buffer pointers may be more efficient.
58 */
59enum {
60 RC_NOCACHE,
61 RC_REPLSTAT,
62 RC_REPLBUFF,
63};
64
65/*
66 * If requests are retransmitted within this interval, they're dropped.
67 */
68#define RC_DELAY (HZ/5)
69
70int nfsd_reply_cache_init(void);
71void nfsd_reply_cache_shutdown(void);
72int nfsd_cache_lookup(struct svc_rqst *, int);
73void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
74
75#ifdef CONFIG_NFSD_V4
76void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
77#else /* CONFIG_NFSD_V4 */
78static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
79{
80}
81#endif /* CONFIG_NFSD_V4 */
82
83#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c1c9e035d4a4..a0c4016413f1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/export.c
4 *
5 * NFS exporting and validation. 3 * NFS exporting and validation.
6 * 4 *
7 * We maintain a list of clients, each of which has a list of 5 * We maintain a list of clients, each of which has a list of
@@ -14,29 +12,16 @@
14 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> 12 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
15 */ 13 */
16 14
17#include <linux/unistd.h>
18#include <linux/slab.h>
19#include <linux/stat.h>
20#include <linux/in.h>
21#include <linux/seq_file.h>
22#include <linux/syscalls.h>
23#include <linux/rwsem.h>
24#include <linux/dcache.h>
25#include <linux/namei.h> 15#include <linux/namei.h>
26#include <linux/mount.h>
27#include <linux/hash.h>
28#include <linux/module.h> 16#include <linux/module.h>
29#include <linux/exportfs.h> 17#include <linux/exportfs.h>
30 18
31#include <linux/sunrpc/svc.h>
32#include <linux/nfsd/nfsd.h>
33#include <linux/nfsd/nfsfh.h>
34#include <linux/nfsd/syscall.h> 19#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h>
36#include <linux/sunrpc/msg_prot.h>
37#include <linux/sunrpc/gss_api.h>
38#include <net/ipv6.h> 20#include <net/ipv6.h>
39 21
22#include "nfsd.h"
23#include "nfsfh.h"
24
40#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
41 26
42typedef struct auth_domain svc_client; 27typedef struct auth_domain svc_client;
@@ -369,16 +354,25 @@ static struct svc_export *svc_export_update(struct svc_export *new,
369 struct svc_export *old); 354 struct svc_export *old);
370static struct svc_export *svc_export_lookup(struct svc_export *); 355static struct svc_export *svc_export_lookup(struct svc_export *);
371 356
372static int check_export(struct inode *inode, int flags, unsigned char *uuid) 357static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
373{ 358{
374 359
375 /* We currently export only dirs and regular files. 360 /*
376 * This is what umountd does. 361 * We currently export only dirs, regular files, and (for v4
362 * pseudoroot) symlinks.
377 */ 363 */
378 if (!S_ISDIR(inode->i_mode) && 364 if (!S_ISDIR(inode->i_mode) &&
365 !S_ISLNK(inode->i_mode) &&
379 !S_ISREG(inode->i_mode)) 366 !S_ISREG(inode->i_mode))
380 return -ENOTDIR; 367 return -ENOTDIR;
381 368
369 /*
370 * Mountd should never pass down a writeable V4ROOT export, but,
371 * just to make sure:
372 */
373 if (*flags & NFSEXP_V4ROOT)
374 *flags |= NFSEXP_READONLY;
375
382 /* There are two requirements on a filesystem to be exportable. 376 /* There are two requirements on a filesystem to be exportable.
383 * 1: We must be able to identify the filesystem from a number. 377 * 1: We must be able to identify the filesystem from a number.
384 * either a device number (so FS_REQUIRES_DEV needed) 378 * either a device number (so FS_REQUIRES_DEV needed)
@@ -387,7 +381,7 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid)
387 * This means that s_export_op must be set. 381 * This means that s_export_op must be set.
388 */ 382 */
389 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && 383 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
390 !(flags & NFSEXP_FSID) && 384 !(*flags & NFSEXP_FSID) &&
391 uuid == NULL) { 385 uuid == NULL) {
392 dprintk("exp_export: export of non-dev fs without fsid\n"); 386 dprintk("exp_export: export of non-dev fs without fsid\n");
393 return -EINVAL; 387 return -EINVAL;
@@ -602,7 +596,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
602 goto out4; 596 goto out4;
603 } 597 }
604 598
605 err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags, 599 err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
606 exp.ex_uuid); 600 exp.ex_uuid);
607 if (err) 601 if (err)
608 goto out4; 602 goto out4;
@@ -1041,7 +1035,7 @@ exp_export(struct nfsctl_export *nxp)
1041 goto finish; 1035 goto finish;
1042 } 1036 }
1043 1037
1044 err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL); 1038 err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL);
1045 if (err) goto finish; 1039 if (err) goto finish;
1046 1040
1047 err = -ENOMEM; 1041 err = -ENOMEM;
@@ -1320,6 +1314,15 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1320 return exp; 1314 return exp;
1321} 1315}
1322 1316
1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
1318{
1319 u32 fsidv[2];
1320
1321 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1322
1323 return rqst_exp_find(rqstp, FSID_NUM, fsidv);
1324}
1325
1323/* 1326/*
1324 * Called when we need the filehandle for the root of the pseudofs, 1327 * Called when we need the filehandle for the root of the pseudofs,
1325 * for a given NFSv4 client. The root is defined to be the 1328 * for a given NFSv4 client. The root is defined to be the
@@ -1330,11 +1333,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1330{ 1333{
1331 struct svc_export *exp; 1334 struct svc_export *exp;
1332 __be32 rv; 1335 __be32 rv;
1333 u32 fsidv[2];
1334
1335 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1336 1336
1337 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); 1337 exp = find_fsidzero_export(rqstp);
1338 if (IS_ERR(exp)) 1338 if (IS_ERR(exp))
1339 return nfserrno(PTR_ERR(exp)); 1339 return nfserrno(PTR_ERR(exp));
1340 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1340 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
@@ -1425,6 +1425,7 @@ static struct flags {
1425 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1425 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
1426 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1426 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
1427 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, 1427 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
1428 { NFSEXP_V4ROOT, {"v4root", ""}},
1428#ifdef MSNFS 1429#ifdef MSNFS
1429 { NFSEXP_MSNFS, {"msnfs", ""}}, 1430 { NFSEXP_MSNFS, {"msnfs", ""}},
1430#endif 1431#endif
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9afe..0c6d81670137 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/lockd.c
3 *
4 * This file contains all the stubs needed when communicating with lockd. 2 * This file contains all the stubs needed when communicating with lockd.
5 * This level of indirection is necessary so we can run nfsd+lockd without 3 * This level of indirection is necessary so we can run nfsd+lockd without
6 * requiring the nfs client to be compiled in/loaded, and vice versa. 4 * requiring the nfs client to be compiled in/loaded, and vice versa.
@@ -8,14 +6,10 @@
8 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/fs.h>
13#include <linux/file.h> 9#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/sunrpc/clnt.h>
16#include <linux/sunrpc/svc.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/bind.h> 10#include <linux/lockd/bind.h>
11#include "nfsd.h"
12#include "vfs.h"
19 13
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 14#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 15
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3219e84116..f20589d2ae27 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,19 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs2acl.c
3 *
4 * Process version 2 NFSACL requests. 2 * Process version 2 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr.h>
14#include <linux/nfsd/xdr3.h>
15#include <linux/posix_acl.h>
16#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
17 13
18#define NFSDDBG_FACILITY NFSDDBG_PROC 14#define NFSDDBG_FACILITY NFSDDBG_PROC
19#define RETURN_STATUS(st) { resp->status = (st); return (st); } 15#define RETURN_STATUS(st) { resp->status = (st); return (st); }
@@ -217,6 +213,16 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
217 * XDR encode functions 213 * XDR encode functions
218 */ 214 */
219 215
216/*
217 * There must be an encoding function for void results so svc_process
218 * will work properly.
219 */
220int
221nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
222{
223 return xdr_ressize_check(rqstp, p);
224}
225
220/* GETACL */ 226/* GETACL */
221static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, 227static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
222 struct nfsd3_getaclres *resp) 228 struct nfsd3_getaclres *resp)
@@ -308,7 +314,6 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
308} 314}
309 315
310#define nfsaclsvc_decode_voidargs NULL 316#define nfsaclsvc_decode_voidargs NULL
311#define nfsaclsvc_encode_voidres NULL
312#define nfsaclsvc_release_void NULL 317#define nfsaclsvc_release_void NULL
313#define nfsd3_fhandleargs nfsd_fhandle 318#define nfsd3_fhandleargs nfsd_fhandle
314#define nfsd3_attrstatres nfsd_attrstat 319#define nfsd3_attrstatres nfsd_attrstat
@@ -346,5 +351,5 @@ struct svc_version nfsd_acl_version2 = {
346 .vs_proc = nfsd_acl_procedures2, 351 .vs_proc = nfsd_acl_procedures2,
347 .vs_dispatch = nfsd_dispatch, 352 .vs_dispatch = nfsd_dispatch,
348 .vs_xdrsize = NFS3_SVC_XDRSIZE, 353 .vs_xdrsize = NFS3_SVC_XDRSIZE,
349 .vs_hidden = 1, 354 .vs_hidden = 0,
350}; 355};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9981dbb377a3..e0c4846bad92 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -1,18 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3acl.c
3 *
4 * Process version 3 NFSACL requests. 2 * Process version 3 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs3.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr3.h>
14#include <linux/posix_acl.h>
15#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
16 13
17#define RETURN_STATUS(st) { resp->status = (st); return (st); } 14#define RETURN_STATUS(st) { resp->status = (st); return (st); }
18 15
@@ -264,6 +261,6 @@ struct svc_version nfsd_acl_version3 = {
264 .vs_proc = nfsd_acl_procedures3, 261 .vs_proc = nfsd_acl_procedures3,
265 .vs_dispatch = nfsd_dispatch, 262 .vs_dispatch = nfsd_dispatch,
266 .vs_xdrsize = NFS3_SVC_XDRSIZE, 263 .vs_xdrsize = NFS3_SVC_XDRSIZE,
267 .vs_hidden = 1, 264 .vs_hidden = 0,
268}; 265};
269 266
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a922..3d68f45a37b9 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -1,30 +1,16 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3proc.c
3 *
4 * Process version 3 NFS requests. 2 * Process version 3 NFS requests.
5 * 3 *
6 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/linkage.h>
10#include <linux/time.h>
11#include <linux/errno.h>
12#include <linux/fs.h> 7#include <linux/fs.h>
13#include <linux/ext2_fs.h> 8#include <linux/ext2_fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/major.h>
21#include <linux/magic.h> 9#include <linux/magic.h>
22 10
23#include <linux/sunrpc/svc.h> 11#include "cache.h"
24#include <linux/nfsd/nfsd.h> 12#include "xdr3.h"
25#include <linux/nfsd/cache.h> 13#include "vfs.h"
26#include <linux/nfsd/xdr3.h>
27#include <linux/nfs3.h>
28 14
29#define NFSDDBG_FACILITY NFSDDBG_PROC 15#define NFSDDBG_FACILITY NFSDDBG_PROC
30 16
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d0a2ce1b4324..2a533a0af2a9 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3xdr.c
3 *
4 * XDR support for nfsd/protocol version 3. 2 * XDR support for nfsd/protocol version 3.
5 * 3 *
6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -8,19 +6,8 @@
8 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! 6 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/time.h>
13#include <linux/nfs3.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/dcache.h>
17#include <linux/namei.h> 9#include <linux/namei.h>
18#include <linux/mm.h> 10#include "xdr3.h"
19#include <linux/vfs.h>
20#include <linux/sunrpc/xdr.h>
21#include <linux/sunrpc/svc.h>
22#include <linux/nfsd/nfsd.h>
23#include <linux/nfsd/xdr3.h>
24#include "auth.h" 11#include "auth.h"
25 12
26#define NFSDDBG_FACILITY NFSDDBG_XDR 13#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 725d02f210e2..88150685df34 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfs4acl/acl.c
3 *
4 * Common NFSv4 ACL handling code. 2 * Common NFSv4 ACL handling code.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of Michigan. 4 * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
@@ -36,15 +34,7 @@
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */ 35 */
38 36
39#include <linux/string.h>
40#include <linux/slab.h>
41#include <linux/list.h>
42#include <linux/types.h>
43#include <linux/fs.h>
44#include <linux/module.h>
45#include <linux/nfs_fs.h> 37#include <linux/nfs_fs.h>
46#include <linux/posix_acl.h>
47#include <linux/nfs4.h>
48#include <linux/nfs4_acl.h> 38#include <linux/nfs4_acl.h>
49 39
50 40
@@ -389,7 +379,7 @@ sort_pacl(struct posix_acl *pacl)
389 sort_pacl_range(pacl, 1, i-1); 379 sort_pacl_range(pacl, 1, i-1);
390 380
391 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); 381 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
392 j = i++; 382 j = ++i;
393 while (pacl->a_entries[j].e_tag == ACL_GROUP) 383 while (pacl->a_entries[j].e_tag == ACL_GROUP)
394 j++; 384 j++;
395 sort_pacl_range(pacl, i, j-1); 385 sort_pacl_range(pacl, i, j-1);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24e8d78f8dde..c6eed2a3b093 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs4callback.c
3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan. 2 * Copyright (c) 2001 The Regents of the University of Michigan.
5 * All rights reserved. 3 * All rights reserved.
6 * 4 *
@@ -33,22 +31,9 @@
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */ 32 */
35 33
36#include <linux/module.h>
37#include <linux/list.h>
38#include <linux/inet.h>
39#include <linux/errno.h>
40#include <linux/delay.h>
41#include <linux/sched.h>
42#include <linux/kthread.h>
43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h> 35#include "nfsd.h"
47#include <linux/nfsd/nfsd.h> 36#include "state.h"
48#include <linux/nfsd/state.h>
49#include <linux/sunrpc/sched.h>
50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
52 37
53#define NFSDDBG_FACILITY NFSDDBG_PROC 38#define NFSDDBG_FACILITY NFSDDBG_PROC
54 39
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index ba2c199592fd..6e2983b27f3c 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4idmap.c
3 *
4 * Mapping of UID/GIDs to name and vice versa. 2 * Mapping of UID/GIDs to name and vice versa.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of 4 * Copyright (c) 2002, 2003 The Regents of the University of
@@ -35,22 +33,9 @@
35 */ 33 */
36 34
37#include <linux/module.h> 35#include <linux/module.h>
38#include <linux/init.h>
39
40#include <linux/mm.h>
41#include <linux/errno.h>
42#include <linux/string.h>
43#include <linux/sunrpc/clnt.h>
44#include <linux/nfs.h>
45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h>
48#include <linux/sunrpc/cache.h>
49#include <linux/nfsd_idmap.h> 36#include <linux/nfsd_idmap.h>
50#include <linux/list.h>
51#include <linux/time.h>
52#include <linux/seq_file.h> 37#include <linux/seq_file.h>
53#include <linux/sunrpc/svcauth.h> 38#include <linux/sched.h>
54 39
55/* 40/*
56 * Cache entry 41 * Cache entry
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0a..37514c469846 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4proc.c
3 *
4 * Server-side procedures for NFSv4. 2 * Server-side procedures for NFSv4.
5 * 3 *
6 * Copyright (c) 2002 The Regents of the University of Michigan. 4 * Copyright (c) 2002 The Regents of the University of Michigan.
@@ -34,20 +32,11 @@
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */ 34 */
37
38#include <linux/param.h>
39#include <linux/major.h>
40#include <linux/slab.h>
41#include <linux/file.h> 35#include <linux/file.h>
42 36
43#include <linux/sunrpc/svc.h> 37#include "cache.h"
44#include <linux/nfsd/nfsd.h> 38#include "xdr4.h"
45#include <linux/nfsd/cache.h> 39#include "vfs.h"
46#include <linux/nfs4.h>
47#include <linux/nfsd/state.h>
48#include <linux/nfsd/xdr4.h>
49#include <linux/nfs4_acl.h>
50#include <linux/sunrpc/gss_api.h>
51 40
52#define NFSDDBG_FACILITY NFSDDBG_PROC 41#define NFSDDBG_FACILITY NFSDDBG_PROC
53 42
@@ -170,7 +159,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
170 accmode |= NFSD_MAY_READ; 159 accmode |= NFSD_MAY_READ;
171 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 160 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
172 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); 161 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
173 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) 162 if (open->op_share_deny & NFS4_SHARE_DENY_READ)
174 accmode |= NFSD_MAY_WRITE; 163 accmode |= NFSD_MAY_WRITE;
175 164
176 status = fh_verify(rqstp, current_fh, S_IFREG, accmode); 165 status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046b..5a754f7b71ed 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4recover.c
3*
4* Copyright (c) 2004 The Regents of the University of Michigan. 2* Copyright (c) 2004 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -33,20 +31,14 @@
33* 31*
34*/ 32*/
35 33
36#include <linux/err.h>
37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h>
40#include <linux/nfsd/state.h>
41#include <linux/nfsd/xdr4.h>
42#include <linux/param.h>
43#include <linux/file.h> 34#include <linux/file.h>
44#include <linux/namei.h> 35#include <linux/namei.h>
45#include <asm/uaccess.h>
46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 36#include <linux/crypto.h>
48#include <linux/sched.h> 37#include <linux/sched.h>
49#include <linux/mount.h> 38
39#include "nfsd.h"
40#include "state.h"
41#include "vfs.h"
50 42
51#define NFSDDBG_FACILITY NFSDDBG_PROC 43#define NFSDDBG_FACILITY NFSDDBG_PROC
52 44
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2153f9bdbebd..f19ed866c95f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4state.c
3*
4* Copyright (c) 2001 The Regents of the University of Michigan. 2* Copyright (c) 2001 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -34,28 +32,14 @@
34* 32*
35*/ 33*/
36 34
37#include <linux/param.h>
38#include <linux/major.h>
39#include <linux/slab.h>
40
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h>
44#include <linux/file.h> 35#include <linux/file.h>
45#include <linux/mount.h>
46#include <linux/workqueue.h>
47#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
48#include <linux/kthread.h>
49#include <linux/nfs4.h>
50#include <linux/nfsd/state.h>
51#include <linux/nfsd/xdr4.h>
52#include <linux/namei.h> 37#include <linux/namei.h>
53#include <linux/swap.h> 38#include <linux/swap.h>
54#include <linux/mutex.h>
55#include <linux/lockd/bind.h>
56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 39#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h> 40#include <linux/sunrpc/clnt.h>
41#include "xdr4.h"
42#include "vfs.h"
59 43
60#define NFSDDBG_FACILITY NFSDDBG_PROC 44#define NFSDDBG_FACILITY NFSDDBG_PROC
61 45
@@ -477,13 +461,14 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
477 461
478/* 462/*
479 * fchan holds the client values on input, and the server values on output 463 * fchan holds the client values on input, and the server values on output
464 * sv_max_mesg is the maximum payload plus one page for overhead.
480 */ 465 */
481static int init_forechannel_attrs(struct svc_rqst *rqstp, 466static int init_forechannel_attrs(struct svc_rqst *rqstp,
482 struct nfsd4_channel_attrs *session_fchan, 467 struct nfsd4_channel_attrs *session_fchan,
483 struct nfsd4_channel_attrs *fchan) 468 struct nfsd4_channel_attrs *fchan)
484{ 469{
485 int status = 0; 470 int status = 0;
486 __u32 maxcount = svc_max_payload(rqstp); 471 __u32 maxcount = nfsd_serv->sv_max_mesg;
487 472
488 /* headerpadsz set to zero in encode routine */ 473 /* headerpadsz set to zero in encode routine */
489 474
@@ -523,6 +508,15 @@ free_session_slots(struct nfsd4_session *ses)
523 kfree(ses->se_slots[i]); 508 kfree(ses->se_slots[i]);
524} 509}
525 510
511/*
512 * We don't actually need to cache the rpc and session headers, so we
513 * can allocate a little less for each slot:
514 */
515static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
516{
517 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
518}
519
526static int 520static int
527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 521alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
528 struct nfsd4_create_session *cses) 522 struct nfsd4_create_session *cses)
@@ -554,7 +548,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
554 memcpy(new, &tmp, sizeof(*new)); 548 memcpy(new, &tmp, sizeof(*new));
555 549
556 /* allocate each struct nfsd4_slot and data cache in one piece */ 550 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 551 cachesize = slot_bytes(&new->se_fchannel);
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 552 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 553 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp) 554 if (!sp)
@@ -628,10 +622,12 @@ void
628free_session(struct kref *kref) 622free_session(struct kref *kref)
629{ 623{
630 struct nfsd4_session *ses; 624 struct nfsd4_session *ses;
625 int mem;
631 626
632 ses = container_of(kref, struct nfsd4_session, se_ref); 627 ses = container_of(kref, struct nfsd4_session, se_ref);
633 spin_lock(&nfsd_drc_lock); 628 spin_lock(&nfsd_drc_lock);
634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; 629 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
630 nfsd_drc_mem_used -= mem;
635 spin_unlock(&nfsd_drc_lock); 631 spin_unlock(&nfsd_drc_lock);
636 free_session_slots(ses); 632 free_session_slots(ses);
637 kfree(ses); 633 kfree(ses);
@@ -2404,11 +2400,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2404 2400
2405 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2401 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2406 2402
2407 dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", 2403 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
2408 dp->dl_stateid.si_boot, 2404 STATEID_VAL(&dp->dl_stateid));
2409 dp->dl_stateid.si_stateownerid,
2410 dp->dl_stateid.si_fileid,
2411 dp->dl_stateid.si_generation);
2412out: 2405out:
2413 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS 2406 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2414 && flag == NFS4_OPEN_DELEGATE_NONE 2407 && flag == NFS4_OPEN_DELEGATE_NONE
@@ -2498,9 +2491,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2498 2491
2499 status = nfs_ok; 2492 status = nfs_ok;
2500 2493
2501 dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", 2494 dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
2502 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, 2495 STATEID_VAL(&stp->st_stateid));
2503 stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
2504out: 2496out:
2505 if (fp) 2497 if (fp)
2506 put_nfs4_file(fp); 2498 put_nfs4_file(fp);
@@ -2666,9 +2658,8 @@ STALE_STATEID(stateid_t *stateid)
2666{ 2658{
2667 if (time_after((unsigned long)boot_time, 2659 if (time_after((unsigned long)boot_time,
2668 (unsigned long)stateid->si_boot)) { 2660 (unsigned long)stateid->si_boot)) {
2669 dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", 2661 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
2670 stateid->si_boot, stateid->si_stateownerid, 2662 STATEID_VAL(stateid));
2671 stateid->si_fileid, stateid->si_generation);
2672 return 1; 2663 return 1;
2673 } 2664 }
2674 return 0; 2665 return 0;
@@ -2680,9 +2671,8 @@ EXPIRED_STATEID(stateid_t *stateid)
2680 if (time_before((unsigned long)boot_time, 2671 if (time_before((unsigned long)boot_time,
2681 ((unsigned long)stateid->si_boot)) && 2672 ((unsigned long)stateid->si_boot)) &&
2682 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { 2673 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2683 dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n", 2674 dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
2684 stateid->si_boot, stateid->si_stateownerid, 2675 STATEID_VAL(stateid));
2685 stateid->si_fileid, stateid->si_generation);
2686 return 1; 2676 return 1;
2687 } 2677 }
2688 return 0; 2678 return 0;
@@ -2696,9 +2686,8 @@ stateid_error_map(stateid_t *stateid)
2696 if (EXPIRED_STATEID(stateid)) 2686 if (EXPIRED_STATEID(stateid))
2697 return nfserr_expired; 2687 return nfserr_expired;
2698 2688
2699 dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n", 2689 dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
2700 stateid->si_boot, stateid->si_stateownerid, 2690 STATEID_VAL(stateid));
2701 stateid->si_fileid, stateid->si_generation);
2702 return nfserr_bad_stateid; 2691 return nfserr_bad_stateid;
2703} 2692}
2704 2693
@@ -2884,10 +2873,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2884 struct svc_fh *current_fh = &cstate->current_fh; 2873 struct svc_fh *current_fh = &cstate->current_fh;
2885 __be32 status; 2874 __be32 status;
2886 2875
2887 dprintk("NFSD: preprocess_seqid_op: seqid=%d " 2876 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
2888 "stateid = (%08x/%08x/%08x/%08x)\n", seqid, 2877 seqid, STATEID_VAL(stateid));
2889 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2890 stateid->si_generation);
2891 2878
2892 *stpp = NULL; 2879 *stpp = NULL;
2893 *sopp = NULL; 2880 *sopp = NULL;
@@ -3019,12 +3006,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3019 sop->so_confirmed = 1; 3006 sop->so_confirmed = 1;
3020 update_stateid(&stp->st_stateid); 3007 update_stateid(&stp->st_stateid);
3021 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); 3008 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
3022 dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 3009 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
3023 "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, 3010 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
3024 stp->st_stateid.si_boot,
3025 stp->st_stateid.si_stateownerid,
3026 stp->st_stateid.si_fileid,
3027 stp->st_stateid.si_generation);
3028 3011
3029 nfsd4_create_clid_dir(sop->so_client); 3012 nfsd4_create_clid_dir(sop->so_client);
3030out: 3013out:
@@ -3283,9 +3266,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
3283 struct nfs4_file *fp; 3266 struct nfs4_file *fp;
3284 struct nfs4_delegation *dl; 3267 struct nfs4_delegation *dl;
3285 3268
3286 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", 3269 dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
3287 stid->si_boot, stid->si_stateownerid, 3270 STATEID_VAL(stid));
3288 stid->si_fileid, stid->si_generation);
3289 3271
3290 fp = find_file(ino); 3272 fp = find_file(ino);
3291 if (!fp) 3273 if (!fp)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f6..a8587e90fd5a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,24 +40,16 @@
40 * at the end of nfs4svc_decode_compoundargs. 40 * at the end of nfs4svc_decode_compoundargs.
41 */ 41 */
42 42
43#include <linux/param.h>
44#include <linux/smp.h>
45#include <linux/fs.h>
46#include <linux/namei.h> 43#include <linux/namei.h>
47#include <linux/vfs.h> 44#include <linux/statfs.h>
48#include <linux/utsname.h> 45#include <linux/utsname.h>
49#include <linux/sunrpc/xdr.h>
50#include <linux/sunrpc/svc.h>
51#include <linux/sunrpc/clnt.h>
52#include <linux/nfsd/nfsd.h>
53#include <linux/nfsd/state.h>
54#include <linux/nfsd/xdr4.h>
55#include <linux/nfsd_idmap.h> 46#include <linux/nfsd_idmap.h>
56#include <linux/nfs4.h>
57#include <linux/nfs4_acl.h> 47#include <linux/nfs4_acl.h>
58#include <linux/sunrpc/gss_api.h>
59#include <linux/sunrpc/svcauth_gss.h> 48#include <linux/sunrpc/svcauth_gss.h>
60 49
50#include "xdr4.h"
51#include "vfs.h"
52
61#define NFSDDBG_FACILITY NFSDDBG_XDR 53#define NFSDDBG_FACILITY NFSDDBG_XDR
62 54
63/* 55/*
@@ -2204,11 +2196,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2204 * we will not follow the cross mount and will fill the attribtutes 2196 * we will not follow the cross mount and will fill the attribtutes
2205 * directly from the mountpoint dentry. 2197 * directly from the mountpoint dentry.
2206 */ 2198 */
2207 if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval)) 2199 if (nfsd_mountpoint(dentry, exp)) {
2208 ignore_crossmnt = 1;
2209 else if (d_mountpoint(dentry)) {
2210 int err; 2200 int err;
2211 2201
2202 if (!(exp->ex_flags & NFSEXP_V4ROOT)
2203 && !attributes_need_mount(cd->rd_bmval)) {
2204 ignore_crossmnt = 1;
2205 goto out_encode;
2206 }
2212 /* 2207 /*
2213 * Why the heck aren't we just using nfsd_lookup?? 2208 * Why the heck aren't we just using nfsd_lookup??
2214 * Different "."/".." handling? Something else? 2209 * Different "."/".." handling? Something else?
@@ -2224,6 +2219,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2224 goto out_put; 2219 goto out_put;
2225 2220
2226 } 2221 }
2222out_encode:
2227 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, 2223 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
2228 cd->rd_rqstp, ignore_crossmnt); 2224 cd->rd_rqstp, ignore_crossmnt);
2229out_put: 2225out_put:
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 4638635c5d87..da08560c4818 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfscache.c
3 *
4 * Request reply cache. This is currently a global cache, but this may 2 * Request reply cache. This is currently a global cache, but this may
5 * change in the future and be a per-client cache. 3 * change in the future and be a per-client cache.
6 * 4 *
@@ -10,16 +8,8 @@
10 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 8 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
11 */ 9 */
12 10
13#include <linux/kernel.h> 11#include "nfsd.h"
14#include <linux/time.h> 12#include "cache.h"
15#include <linux/slab.h>
16#include <linux/string.h>
17#include <linux/spinlock.h>
18#include <linux/list.h>
19
20#include <linux/sunrpc/svc.h>
21#include <linux/nfsd/nfsd.h>
22#include <linux/nfsd/cache.h>
23 13
24/* Size of reply cache. Common values are: 14/* Size of reply cache. Common values are:
25 * 4.3BSD: 128 15 * 4.3BSD: 128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c01fc148ce8..2604c3e70ea5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,46 +1,20 @@
1/* 1/*
2 * linux/fs/nfsd/nfsctl.c
3 *
4 * Syscall interface to knfsd. 2 * Syscall interface to knfsd.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/module.h>
10
11#include <linux/linkage.h>
12#include <linux/time.h>
13#include <linux/errno.h>
14#include <linux/fs.h>
15#include <linux/namei.h> 7#include <linux/namei.h>
16#include <linux/fcntl.h>
17#include <linux/net.h>
18#include <linux/in.h>
19#include <linux/syscalls.h>
20#include <linux/unistd.h>
21#include <linux/slab.h>
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/pagemap.h>
25#include <linux/init.h>
26#include <linux/inet.h>
27#include <linux/string.h>
28#include <linux/ctype.h> 8#include <linux/ctype.h>
29 9
30#include <linux/nfs.h>
31#include <linux/nfsd_idmap.h> 10#include <linux/nfsd_idmap.h>
32#include <linux/lockd/bind.h>
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/svcsock.h> 11#include <linux/sunrpc/svcsock.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/cache.h>
37#include <linux/nfsd/xdr.h>
38#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
41 15
42#include <asm/uaccess.h> 16#include "nfsd.h"
43#include <net/ipv6.h> 17#include "cache.h"
44 18
45/* 19/*
46 * We have a single directory with 9 nodes in it. 20 * We have a single directory with 9 nodes in it.
@@ -55,6 +29,7 @@ enum {
55 NFSD_Getfd, 29 NFSD_Getfd,
56 NFSD_Getfs, 30 NFSD_Getfs,
57 NFSD_List, 31 NFSD_List,
32 NFSD_Export_features,
58 NFSD_Fh, 33 NFSD_Fh,
59 NFSD_FO_UnlockIP, 34 NFSD_FO_UnlockIP,
60 NFSD_FO_UnlockFS, 35 NFSD_FO_UnlockFS,
@@ -173,6 +148,24 @@ static const struct file_operations exports_operations = {
173 .owner = THIS_MODULE, 148 .owner = THIS_MODULE,
174}; 149};
175 150
151static int export_features_show(struct seq_file *m, void *v)
152{
153 seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
154 return 0;
155}
156
157static int export_features_open(struct inode *inode, struct file *file)
158{
159 return single_open(file, export_features_show, NULL);
160}
161
162static struct file_operations export_features_operations = {
163 .open = export_features_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167};
168
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 169extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 170extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
178 171
@@ -1330,6 +1323,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1330 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1323 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1331 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1324 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1332 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1325 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1326 [NFSD_Export_features] = {"export_features",
1327 &export_features_operations, S_IRUGO},
1333 [NFSD_FO_UnlockIP] = {"unlock_ip", 1328 [NFSD_FO_UnlockIP] = {"unlock_ip",
1334 &transaction_ops, S_IWUSR|S_IRUSR}, 1329 &transaction_ops, S_IWUSR|S_IRUSR},
1335 [NFSD_FO_UnlockFS] = {"unlock_filesystem", 1330 [NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 000000000000..e942a1aaac92
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,338 @@
1/*
2 * Hodge-podge collection of knfsd-related stuff.
3 * I will sort this out later.
4 *
5 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef LINUX_NFSD_NFSD_H
9#define LINUX_NFSD_NFSD_H
10
11#include <linux/types.h>
12#include <linux/mount.h>
13
14#include <linux/nfsd/debug.h>
15#include <linux/nfsd/export.h>
16#include <linux/nfsd/stats.h>
17/*
18 * nfsd version
19 */
20#define NFSD_SUPPORTED_MINOR_VERSION 1
21
22struct readdir_cd {
23 __be32 err; /* 0, nfserr, or nfserr_eof */
24};
25
26
27extern struct svc_program nfsd_program;
28extern struct svc_version nfsd_version2, nfsd_version3,
29 nfsd_version4;
30extern u32 nfsd_supported_minorversion;
31extern struct mutex nfsd_mutex;
32extern struct svc_serv *nfsd_serv;
33extern spinlock_t nfsd_drc_lock;
34extern unsigned int nfsd_drc_max_mem;
35extern unsigned int nfsd_drc_mem_used;
36
37extern const struct seq_operations nfs_exports_op;
38
39/*
40 * Function prototypes.
41 */
42int nfsd_svc(unsigned short port, int nrservs);
43int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
44
45int nfsd_nrthreads(void);
46int nfsd_nrpools(void);
47int nfsd_get_nrthreads(int n, int *);
48int nfsd_set_nrthreads(int n, int *);
49
50#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
51#ifdef CONFIG_NFSD_V2_ACL
52extern struct svc_version nfsd_acl_version2;
53#else
54#define nfsd_acl_version2 NULL
55#endif
56#ifdef CONFIG_NFSD_V3_ACL
57extern struct svc_version nfsd_acl_version3;
58#else
59#define nfsd_acl_version3 NULL
60#endif
61#endif
62
63enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
64int nfsd_vers(int vers, enum vers_op change);
65int nfsd_minorversion(u32 minorversion, enum vers_op change);
66void nfsd_reset_versions(void);
67int nfsd_create_serv(void);
68
69extern int nfsd_max_blksize;
70
71static inline int nfsd_v4client(struct svc_rqst *rq)
72{
73 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
74}
75
76/*
77 * NFSv4 State
78 */
79#ifdef CONFIG_NFSD_V4
80extern unsigned int max_delegations;
81int nfs4_state_init(void);
82void nfsd4_free_slabs(void);
83int nfs4_state_start(void);
84void nfs4_state_shutdown(void);
85time_t nfs4_lease_time(void);
86void nfs4_reset_lease(time_t leasetime);
87int nfs4_reset_recoverydir(char *recdir);
88#else
89static inline int nfs4_state_init(void) { return 0; }
90static inline void nfsd4_free_slabs(void) { }
91static inline int nfs4_state_start(void) { return 0; }
92static inline void nfs4_state_shutdown(void) { }
93static inline time_t nfs4_lease_time(void) { return 0; }
94static inline void nfs4_reset_lease(time_t leasetime) { }
95static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
96#endif
97
98/*
99 * lockd binding
100 */
101void nfsd_lockd_init(void);
102void nfsd_lockd_shutdown(void);
103
104
105/*
106 * These macros provide pre-xdr'ed values for faster operation.
107 */
108#define nfs_ok cpu_to_be32(NFS_OK)
109#define nfserr_perm cpu_to_be32(NFSERR_PERM)
110#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
111#define nfserr_io cpu_to_be32(NFSERR_IO)
112#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
113#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
114#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
115#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
116#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
117#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
118#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
119#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
120#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
121#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
122#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
123#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
124#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
125#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
126#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
127#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
128#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
129#define nfserr_stale cpu_to_be32(NFSERR_STALE)
130#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
131#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
132#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
133#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
134#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
135#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
136#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
137#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
138#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
139#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
140#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
141#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
142#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
143#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
144#define nfserr_same cpu_to_be32(NFSERR_SAME)
145#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
146#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
147#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
148#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
149#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
150#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
151#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
152#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
153#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
154#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
155#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
156#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
157#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
158#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
159#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
160#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
161#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
162#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
163#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
164#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
165#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
166#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
167#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
168#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
169#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
170#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
171#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
172#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
173#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
174#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
175#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
176#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
177#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
178#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
179#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
180#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
181#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
182#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
183#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
184#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
185#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
186#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
187#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
188#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
189#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
190#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
191#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
192#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
193#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
194#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
195#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
196#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
197#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
198#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
199#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
200#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
201#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
202#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
203#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
204#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
205#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
206#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
207#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
208#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
209
210/* error codes for internal use */
211/* if a request fails due to kmalloc failure, it gets dropped.
212 * Client should resend eventually
213 */
214#define nfserr_dropit cpu_to_be32(30000)
215/* end-of-file indicator in readdir */
216#define nfserr_eof cpu_to_be32(30001)
217/* replay detected */
218#define nfserr_replay_me cpu_to_be32(11001)
219/* nfs41 replay detected */
220#define nfserr_replay_cache cpu_to_be32(11002)
221
222/* Check for dir entries '.' and '..' */
223#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
224
225/*
226 * Time of server startup
227 */
228extern struct timeval nfssvc_boot;
229
230#ifdef CONFIG_NFSD_V4
231
232/* before processing a COMPOUND operation, we have to check that there
233 * is enough space in the buffer for XDR encode to succeed. otherwise,
234 * we might process an operation with side effects, and be unable to
235 * tell the client that the operation succeeded.
236 *
237 * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
238 * needed to encode an "ordinary" _successful_ operation. (GETATTR,
239 * READ, READDIR, and READLINK have their own buffer checks.) if we
240 * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
241 *
242 * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
243 * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
244 * care is taken to ensure that we never fall below this level for any
245 * reason.
246 */
247#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
248#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
249
250#define NFSD_LEASE_TIME (nfs4_lease_time())
251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
252
253/*
254 * The following attributes are currently not supported by the NFSv4 server:
255 * ARCHIVE (deprecated anyway)
256 * HIDDEN (unlikely to be supported any time soon)
257 * MIMETYPE (unlikely to be supported any time soon)
258 * QUOTA_* (will be supported in a forthcoming patch)
259 * SYSTEM (unlikely to be supported any time soon)
260 * TIME_BACKUP (unlikely to be supported any time soon)
261 * TIME_CREATE (unlikely to be supported any time soon)
262 */
263#define NFSD4_SUPPORTED_ATTRS_WORD0 \
264(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
265 | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
266 | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
267 | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \
268 | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \
269 | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \
270 | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
271 | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \
272 | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
273 | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
274
275#define NFSD4_SUPPORTED_ATTRS_WORD1 \
276(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
277 | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
278 | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
279 | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
280 | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
281 | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
282
283#define NFSD4_SUPPORTED_ATTRS_WORD2 0
284
285#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
286 NFSD4_SUPPORTED_ATTRS_WORD0
287
288#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
289 NFSD4_SUPPORTED_ATTRS_WORD1
290
291#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
292 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
293
294static inline u32 nfsd_suppattrs0(u32 minorversion)
295{
296 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
297 : NFSD4_SUPPORTED_ATTRS_WORD0;
298}
299
300static inline u32 nfsd_suppattrs1(u32 minorversion)
301{
302 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
303 : NFSD4_SUPPORTED_ATTRS_WORD1;
304}
305
306static inline u32 nfsd_suppattrs2(u32 minorversion)
307{
308 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
309 : NFSD4_SUPPORTED_ATTRS_WORD2;
310}
311
312/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
313#define NFSD_WRITEONLY_ATTRS_WORD1 \
314(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
315
316/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
317#define NFSD_WRITEABLE_ATTRS_WORD0 \
318(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
319#define NFSD_WRITEABLE_ATTRS_WORD1 \
320(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
321 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
322#define NFSD_WRITEABLE_ATTRS_WORD2 0
323
324#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
325 NFSD_WRITEABLE_ATTRS_WORD0
326/*
327 * we currently store the exclusive create verifier in the v_{a,m}time
328 * attributes so the client can't set these at create time using EXCLUSIVE4_1
329 */
330#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
331 (NFSD_WRITEABLE_ATTRS_WORD1 & \
332 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
333#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
334 NFSD_WRITEABLE_ATTRS_WORD2
335
336#endif /* CONFIG_NFSD_V4 */
337
338#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a76..55c8e63af0be 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfsfh.c
3 *
4 * NFS server file handle treatment. 2 * NFS server file handle treatment.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
@@ -9,19 +7,11 @@
9 * ... and again Southern-Winter 2001 to support export_operations 7 * ... and again Southern-Winter 2001 to support export_operations
10 */ 8 */
11 9
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/unistd.h>
15#include <linux/string.h>
16#include <linux/stat.h>
17#include <linux/dcache.h>
18#include <linux/exportfs.h> 10#include <linux/exportfs.h>
19#include <linux/mount.h>
20 11
21#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/svc.h>
23#include <linux/sunrpc/svcauth_gss.h> 12#include <linux/sunrpc/svcauth_gss.h>
24#include <linux/nfsd/nfsd.h> 13#include "nfsd.h"
14#include "vfs.h"
25#include "auth.h" 15#include "auth.h"
26 16
27#define NFSDDBG_FACILITY NFSDDBG_FH 17#define NFSDDBG_FACILITY NFSDDBG_FH
@@ -96,8 +86,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
96static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, 86static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
97 struct svc_export *exp) 87 struct svc_export *exp)
98{ 88{
89 int flags = nfsexp_flags(rqstp, exp);
90
99 /* Check if the request originated from a secure port. */ 91 /* Check if the request originated from a secure port. */
100 if (!rqstp->rq_secure && EX_SECURE(exp)) { 92 if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
101 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 93 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
102 dprintk(KERN_WARNING 94 dprintk(KERN_WARNING
103 "nfsd: request from insecure port %s!\n", 95 "nfsd: request from insecure port %s!\n",
@@ -109,6 +101,36 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
109 return nfserrno(nfsd_setuser(rqstp, exp)); 101 return nfserrno(nfsd_setuser(rqstp, exp));
110} 102}
111 103
104static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
105 struct dentry *dentry, struct svc_export *exp)
106{
107 if (!(exp->ex_flags & NFSEXP_V4ROOT))
108 return nfs_ok;
109 /*
110 * v2/v3 clients have no need for the V4ROOT export--they use
111 * the mount protocl instead; also, further V4ROOT checks may be
112 * in v4-specific code, in which case v2/v3 clients could bypass
113 * them.
114 */
115 if (!nfsd_v4client(rqstp))
116 return nfserr_stale;
117 /*
118 * We're exposing only the directories and symlinks that have to be
119 * traversed on the way to real exports:
120 */
121 if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) &&
122 !S_ISLNK(dentry->d_inode->i_mode)))
123 return nfserr_stale;
124 /*
125 * A pseudoroot export gives permission to access only one
126 * single directory; the kernel has to make another upcall
127 * before granting access to anything else under it:
128 */
129 if (unlikely(dentry != exp->ex_path.dentry))
130 return nfserr_stale;
131 return nfs_ok;
132}
133
112/* 134/*
113 * Use the given filehandle to look up the corresponding export and 135 * Use the given filehandle to look up the corresponding export and
114 * dentry. On success, the results are used to set fh_export and 136 * dentry. On success, the results are used to set fh_export and
@@ -232,14 +254,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
232 goto out; 254 goto out;
233 } 255 }
234 256
235 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
236 error = nfsd_setuser_and_check_port(rqstp, exp);
237 if (error) {
238 dput(dentry);
239 goto out;
240 }
241 }
242
243 if (S_ISDIR(dentry->d_inode->i_mode) && 257 if (S_ISDIR(dentry->d_inode->i_mode) &&
244 (dentry->d_flags & DCACHE_DISCONNECTED)) { 258 (dentry->d_flags & DCACHE_DISCONNECTED)) {
245 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 259 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -294,28 +308,32 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
294 error = nfsd_set_fh_dentry(rqstp, fhp); 308 error = nfsd_set_fh_dentry(rqstp, fhp);
295 if (error) 309 if (error)
296 goto out; 310 goto out;
297 dentry = fhp->fh_dentry;
298 exp = fhp->fh_export;
299 } else {
300 /*
301 * just rechecking permissions
302 * (e.g. nfsproc_create calls fh_verify, then nfsd_create
303 * does as well)
304 */
305 dprintk("nfsd: fh_verify - just checking\n");
306 dentry = fhp->fh_dentry;
307 exp = fhp->fh_export;
308 /*
309 * Set user creds for this exportpoint; necessary even
310 * in the "just checking" case because this may be a
311 * filehandle that was created by fh_compose, and that
312 * is about to be used in another nfsv4 compound
313 * operation.
314 */
315 error = nfsd_setuser_and_check_port(rqstp, exp);
316 if (error)
317 goto out;
318 } 311 }
312 dentry = fhp->fh_dentry;
313 exp = fhp->fh_export;
314 /*
315 * We still have to do all these permission checks, even when
316 * fh_dentry is already set:
317 * - fh_verify may be called multiple times with different
318 * "access" arguments (e.g. nfsd_proc_create calls
319 * fh_verify(...,NFSD_MAY_EXEC) first, then later (in
320 * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
321 * - in the NFSv4 case, the filehandle may have been filled
322 * in by fh_compose, and given a dentry, but further
323 * compound operations performed with that filehandle
324 * still need permissions checks. In the worst case, a
325 * mountpoint crossing may have changed the export
326 * options, and we may now need to use a different uid
327 * (for example, if different id-squashing options are in
328 * effect on the new filesystem).
329 */
330 error = check_pseudo_root(rqstp, dentry, exp);
331 if (error)
332 goto out;
333
334 error = nfsd_setuser_and_check_port(rqstp, exp);
335 if (error)
336 goto out;
319 337
320 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 338 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
321 if (error) 339 if (error)
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 000000000000..cdfb8c6a4206
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
1/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
2
3#ifndef _LINUX_NFSD_FH_INT_H
4#define _LINUX_NFSD_FH_INT_H
5
6#include <linux/nfsd/nfsfh.h>
7
8enum nfsd_fsid {
9 FSID_DEV = 0,
10 FSID_NUM,
11 FSID_MAJOR_MINOR,
12 FSID_ENCODE_DEV,
13 FSID_UUID4_INUM,
14 FSID_UUID8,
15 FSID_UUID16,
16 FSID_UUID16_INUM,
17};
18
19enum fsid_source {
20 FSIDSOURCE_DEV,
21 FSIDSOURCE_FSID,
22 FSIDSOURCE_UUID,
23};
24extern enum fsid_source fsid_source(struct svc_fh *fhp);
25
26
27/* This might look a little large to "inline" but in all calls except
28 * one, 'vers' is constant so moste of the function disappears.
29 */
30static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
31 u32 fsid, unsigned char *uuid)
32{
33 u32 *up;
34 switch(vers) {
35 case FSID_DEV:
36 fsidv[0] = htonl((MAJOR(dev)<<16) |
37 MINOR(dev));
38 fsidv[1] = ino_t_to_u32(ino);
39 break;
40 case FSID_NUM:
41 fsidv[0] = fsid;
42 break;
43 case FSID_MAJOR_MINOR:
44 fsidv[0] = htonl(MAJOR(dev));
45 fsidv[1] = htonl(MINOR(dev));
46 fsidv[2] = ino_t_to_u32(ino);
47 break;
48
49 case FSID_ENCODE_DEV:
50 fsidv[0] = new_encode_dev(dev);
51 fsidv[1] = ino_t_to_u32(ino);
52 break;
53
54 case FSID_UUID4_INUM:
55 /* 4 byte fsid and inode number */
56 up = (u32*)uuid;
57 fsidv[0] = ino_t_to_u32(ino);
58 fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
59 break;
60
61 case FSID_UUID8:
62 /* 8 byte fsid */
63 up = (u32*)uuid;
64 fsidv[0] = up[0] ^ up[2];
65 fsidv[1] = up[1] ^ up[3];
66 break;
67
68 case FSID_UUID16:
69 /* 16 byte fsid - NFSv3+ only */
70 memcpy(fsidv, uuid, 16);
71 break;
72
73 case FSID_UUID16_INUM:
74 /* 8 byte inode and 16 byte fsid */
75 *(u64*)fsidv = (u64)ino;
76 memcpy(fsidv+2, uuid, 16);
77 break;
78 default: BUG();
79 }
80}
81
82static inline int key_len(int type)
83{
84 switch(type) {
85 case FSID_DEV: return 8;
86 case FSID_NUM: return 4;
87 case FSID_MAJOR_MINOR: return 12;
88 case FSID_ENCODE_DEV: return 8;
89 case FSID_UUID4_INUM: return 8;
90 case FSID_UUID8: return 8;
91 case FSID_UUID16: return 16;
92 case FSID_UUID16_INUM: return 24;
93 default: return 0;
94 }
95}
96
97/*
98 * Shorthand for dprintk()'s
99 */
100extern char * SVCFH_fmt(struct svc_fh *fhp);
101
102/*
103 * Function prototypes
104 */
105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
107__be32 fh_update(struct svc_fh *);
108void fh_put(struct svc_fh *);
109
110static __inline__ struct svc_fh *
111fh_copy(struct svc_fh *dst, struct svc_fh *src)
112{
113 WARN_ON(src->fh_dentry || src->fh_locked);
114
115 *dst = *src;
116 return dst;
117}
118
119static inline void
120fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
121{
122 dst->fh_size = src->fh_size;
123 memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
124}
125
126static __inline__ struct svc_fh *
127fh_init(struct svc_fh *fhp, int maxsize)
128{
129 memset(fhp, 0, sizeof(*fhp));
130 fhp->fh_maxsize = maxsize;
131 return fhp;
132}
133
134#ifdef CONFIG_NFSD_V3
135/*
136 * Fill in the pre_op attr for the wcc data
137 */
138static inline void
139fill_pre_wcc(struct svc_fh *fhp)
140{
141 struct inode *inode;
142
143 inode = fhp->fh_dentry->d_inode;
144 if (!fhp->fh_pre_saved) {
145 fhp->fh_pre_mtime = inode->i_mtime;
146 fhp->fh_pre_ctime = inode->i_ctime;
147 fhp->fh_pre_size = inode->i_size;
148 fhp->fh_pre_change = inode->i_version;
149 fhp->fh_pre_saved = 1;
150 }
151}
152
153extern void fill_post_wcc(struct svc_fh *);
154#else
155#define fill_pre_wcc(ignored)
156#define fill_post_wcc(notused)
157#endif /* CONFIG_NFSD_V3 */
158
159
160/*
161 * Lock a file handle/inode
162 * NOTE: both fh_lock and fh_unlock are done "by hand" in
163 * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
164 * so, any changes here should be reflected there.
165 */
166
167static inline void
168fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
169{
170 struct dentry *dentry = fhp->fh_dentry;
171 struct inode *inode;
172
173 BUG_ON(!dentry);
174
175 if (fhp->fh_locked) {
176 printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
177 dentry->d_parent->d_name.name, dentry->d_name.name);
178 return;
179 }
180
181 inode = dentry->d_inode;
182 mutex_lock_nested(&inode->i_mutex, subclass);
183 fill_pre_wcc(fhp);
184 fhp->fh_locked = 1;
185}
186
187static inline void
188fh_lock(struct svc_fh *fhp)
189{
190 fh_lock_nested(fhp, I_MUTEX_NORMAL);
191}
192
193/*
194 * Unlock a file handle/inode
195 */
196static inline void
197fh_unlock(struct svc_fh *fhp)
198{
199 BUG_ON(!fhp->fh_dentry);
200
201 if (fhp->fh_locked) {
202 fill_post_wcc(fhp);
203 mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
204 fhp->fh_locked = 0;
205 }
206}
207
208#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0eb9c820b7a6..a047ad6111ef 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -1,29 +1,14 @@
1/* 1/*
2 * nfsproc2.c Process version 2 NFS requests.
3 * linux/fs/nfsd/nfs2proc.c
4 *
5 * Process version 2 NFS requests. 2 * Process version 2 NFS requests.
6 * 3 *
7 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
8 */ 5 */
9 6
10#include <linux/linkage.h>
11#include <linux/time.h>
12#include <linux/errno.h>
13#include <linux/fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/namei.h> 7#include <linux/namei.h>
19#include <linux/unistd.h>
20#include <linux/slab.h>
21 8
22#include <linux/sunrpc/clnt.h> 9#include "cache.h"
23#include <linux/sunrpc/svc.h> 10#include "xdr.h"
24#include <linux/nfsd/nfsd.h> 11#include "vfs.h"
25#include <linux/nfsd/cache.h>
26#include <linux/nfsd/xdr.h>
27 12
28typedef struct svc_rqst svc_rqst; 13typedef struct svc_rqst svc_rqst;
29typedef struct svc_buf svc_buf; 14typedef struct svc_buf svc_buf;
@@ -758,6 +743,7 @@ nfserrno (int errno)
758 { nfserr_io, -ETXTBSY }, 743 { nfserr_io, -ETXTBSY },
759 { nfserr_notsupp, -EOPNOTSUPP }, 744 { nfserr_notsupp, -EOPNOTSUPP },
760 { nfserr_toosmall, -ETOOSMALL }, 745 { nfserr_toosmall, -ETOOSMALL },
746 { nfserr_serverfault, -ESERVERFAULT },
761 }; 747 };
762 int i; 748 int i;
763 749
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd43..171699eb07c8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfssvc.c
3 *
4 * Central processing for nfsd. 2 * Central processing for nfsd.
5 * 3 *
6 * Authors: Olaf Kirch (okir@monad.swb.de) 4 * Authors: Olaf Kirch (okir@monad.swb.de)
@@ -8,33 +6,19 @@
8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/module.h>
12#include <linux/sched.h> 9#include <linux/sched.h>
13#include <linux/time.h>
14#include <linux/errno.h>
15#include <linux/nfs.h>
16#include <linux/in.h>
17#include <linux/uio.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/smp.h>
21#include <linux/freezer.h> 10#include <linux/freezer.h>
22#include <linux/fs_struct.h> 11#include <linux/fs_struct.h>
23#include <linux/kthread.h>
24#include <linux/swap.h> 12#include <linux/swap.h>
25 13
26#include <linux/sunrpc/types.h>
27#include <linux/sunrpc/stats.h> 14#include <linux/sunrpc/stats.h>
28#include <linux/sunrpc/svc.h>
29#include <linux/sunrpc/svcsock.h> 15#include <linux/sunrpc/svcsock.h>
30#include <linux/sunrpc/cache.h>
31#include <linux/nfsd/nfsd.h>
32#include <linux/nfsd/stats.h>
33#include <linux/nfsd/cache.h>
34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
37#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include "nfsd.h"
20#include "cache.h"
21#include "vfs.h"
38 22
39#define NFSDDBG_FACILITY NFSDDBG_SVC 23#define NFSDDBG_FACILITY NFSDDBG_SVC
40 24
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index afd08e2c90a5..4ce005dbf3e6 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,20 +1,10 @@
1/* 1/*
2 * linux/fs/nfsd/nfsxdr.c
3 *
4 * XDR support for nfsd 2 * XDR support for nfsd
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/types.h> 7#include "xdr.h"
10#include <linux/time.h>
11#include <linux/nfs.h>
12#include <linux/vfs.h>
13#include <linux/sunrpc/xdr.h>
14#include <linux/sunrpc/svc.h>
15#include <linux/nfsd/nfsd.h>
16#include <linux/nfsd/xdr.h>
17#include <linux/mm.h>
18#include "auth.h" 8#include "auth.h"
19 9
20#define NFSDDBG_FACILITY NFSDDBG_XDR 10#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 000000000000..fefeae27f25e
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,408 @@
1/*
2 * Copyright (c) 2001 The Regents of the University of Michigan.
3 * All rights reserved.
4 *
5 * Kendrick Smith <kmsmith@umich.edu>
6 * Andy Adamson <andros@umich.edu>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 */
34
35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H
37
38#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h"
40
41typedef struct {
42 u32 cl_boot;
43 u32 cl_id;
44} clientid_t;
45
46typedef struct {
47 u32 so_boot;
48 u32 so_stateownerid;
49 u32 so_fileid;
50} stateid_opaque_t;
51
52typedef struct {
53 u32 si_generation;
54 stateid_opaque_t si_opaque;
55} stateid_t;
56#define si_boot si_opaque.so_boot
57#define si_stateownerid si_opaque.so_stateownerid
58#define si_fileid si_opaque.so_fileid
59
60#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
61#define STATEID_VAL(s) \
62 (s)->si_boot, \
63 (s)->si_stateownerid, \
64 (s)->si_fileid, \
65 (s)->si_generation
66
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_delegation {
74 struct list_head dl_perfile;
75 struct list_head dl_perclnt;
76 struct list_head dl_recall_lru; /* delegation recalled */
77 atomic_t dl_count; /* ref count */
78 struct nfs4_client *dl_client;
79 struct nfs4_file *dl_file;
80 struct file_lock *dl_flock;
81 struct file *dl_vfs_file;
82 u32 dl_type;
83 time_t dl_time;
84/* For recall: */
85 u32 dl_ident;
86 stateid_t dl_stateid;
87 struct knfsd_fh dl_fh;
88 int dl_retries;
89};
90
91/* client delegation callback info */
92struct nfs4_cb_conn {
93 /* SETCLIENTID info */
94 struct sockaddr_storage cb_addr;
95 size_t cb_addrlen;
96 u32 cb_prog;
97 u32 cb_minorversion;
98 u32 cb_ident; /* minorversion 0 only */
99 /* RPC client info */
100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_clnt * cb_client;
102};
103
104/* Maximum number of slots per session. 160 is useful for long haul TCP */
105#define NFSD_MAX_SLOTS_PER_SESSION 160
106/* Maximum number of operations per session compound */
107#define NFSD_MAX_OPS_PER_COMPOUND 16
108/* Maximum session per slot cache size */
109#define NFSD_SLOT_CACHE_SIZE 1024
110/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
111#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
112#define NFSD_MAX_MEM_PER_SESSION \
113 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
114
115struct nfsd4_slot {
116 bool sl_inuse;
117 bool sl_cachethis;
118 u16 sl_opcnt;
119 u32 sl_seqid;
120 __be32 sl_status;
121 u32 sl_datalen;
122 char sl_data[];
123};
124
125struct nfsd4_channel_attrs {
126 u32 headerpadsz;
127 u32 maxreq_sz;
128 u32 maxresp_sz;
129 u32 maxresp_cached;
130 u32 maxops;
131 u32 maxreqs;
132 u32 nr_rdma_attrs;
133 u32 rdma_attrs;
134};
135
136struct nfsd4_create_session {
137 clientid_t clientid;
138 struct nfs4_sessionid sessionid;
139 u32 seqid;
140 u32 flags;
141 struct nfsd4_channel_attrs fore_channel;
142 struct nfsd4_channel_attrs back_channel;
143 u32 callback_prog;
144 u32 uid;
145 u32 gid;
146};
147
148/* The single slot clientid cache structure */
149struct nfsd4_clid_slot {
150 u32 sl_seqid;
151 __be32 sl_status;
152 struct nfsd4_create_session sl_cr_ses;
153};
154
155struct nfsd4_session {
156 struct kref se_ref;
157 struct list_head se_hash; /* hash by sessionid */
158 struct list_head se_perclnt;
159 u32 se_flags;
160 struct nfs4_client *se_client; /* for expire_client */
161 struct nfs4_sessionid se_sessionid;
162 struct nfsd4_channel_attrs se_fchannel;
163 struct nfsd4_channel_attrs se_bchannel;
164 struct nfsd4_slot *se_slots[]; /* forward channel slots */
165};
166
167static inline void
168nfsd4_put_session(struct nfsd4_session *ses)
169{
170 extern void free_session(struct kref *kref);
171 kref_put(&ses->se_ref, free_session);
172}
173
174static inline void
175nfsd4_get_session(struct nfsd4_session *ses)
176{
177 kref_get(&ses->se_ref);
178}
179
180/* formatted contents of nfs4_sessionid */
181struct nfsd4_sessionid {
182 clientid_t clientid;
183 u32 sequence;
184 u32 reserved;
185};
186
187#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
188
189/*
190 * struct nfs4_client - one per client. Clientids live here.
191 * o Each nfs4_client is hashed by clientid.
192 *
193 * o Each nfs4_clients is also hashed by name
194 * (the opaque quantity initially sent by the client to identify itself).
195 *
196 * o cl_perclient list is used to ensure no dangling stateowner references
197 * when we expire the nfs4_client
198 */
199struct nfs4_client {
200 struct list_head cl_idhash; /* hash by cl_clientid.id */
201 struct list_head cl_strhash; /* hash by cl_name */
202 struct list_head cl_openowners;
203 struct list_head cl_delegations;
204 struct list_head cl_lru; /* tail queue */
205 struct xdr_netobj cl_name; /* id generated by client */
206 char cl_recdir[HEXDIR_LEN]; /* recovery dir */
207 nfs4_verifier cl_verifier; /* generated by client */
208 time_t cl_time; /* time of last lease renewal */
209 struct sockaddr_storage cl_addr; /* client ipaddress */
210 u32 cl_flavor; /* setclientid pseudoflavor */
211 char *cl_principal; /* setclientid principal name */
212 struct svc_cred cl_cred; /* setclientid principal */
213 clientid_t cl_clientid; /* generated by server */
214 nfs4_verifier cl_confirm; /* generated by server */
215 struct nfs4_cb_conn cl_cb_conn; /* callback info */
216 atomic_t cl_count; /* ref count */
217 u32 cl_firststate; /* recovery dir creation */
218
219 /* for nfs41 */
220 struct list_head cl_sessions;
221 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
222 u32 cl_exchange_flags;
223 struct nfs4_sessionid cl_sessionid;
224
225 /* for nfs41 callbacks */
226 /* We currently support a single back channel with a single slot */
227 unsigned long cl_cb_slot_busy;
228 u32 cl_cb_seq_nr;
229 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
230 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
231 /* wait here for slots */
232};
233
234/* struct nfs4_client_reset
235 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
236 * upon lease reset, or from upcall to state_daemon (to read in state
237 * from non-volitile storage) upon reboot.
238 */
239struct nfs4_client_reclaim {
240 struct list_head cr_strhash; /* hash by cr_name */
241 char cr_recdir[HEXDIR_LEN]; /* recover dir */
242};
243
244static inline void
245update_stateid(stateid_t *stateid)
246{
247 stateid->si_generation++;
248}
249
250/* A reasonable value for REPLAY_ISIZE was estimated as follows:
251 * The OPEN response, typically the largest, requires
252 * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
253 * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
254 * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
255 */
256
257#define NFSD4_REPLAY_ISIZE 112
258
259/*
260 * Replay buffer, where the result of the last seqid-mutating operation
261 * is cached.
262 */
263struct nfs4_replay {
264 __be32 rp_status;
265 unsigned int rp_buflen;
266 char *rp_buf;
267 unsigned intrp_allocated;
268 struct knfsd_fh rp_openfh;
269 char rp_ibuf[NFSD4_REPLAY_ISIZE];
270};
271
272/*
273* nfs4_stateowner can either be an open_owner, or a lock_owner
274*
275* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
276* for lock_owner
277* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
278* for lock_owner
279* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
280* struct is reaped.
281* so_perfilestate: heads the list of nfs4_stateid (either open or lock)
282* and is used to ensure no dangling nfs4_stateid references when we
283* release a stateowner.
284* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
285* close is called to reap associated byte-range locks
286* so_close_lru: (open) stateowner is placed on this list instead of being
287* reaped (when so_perfilestate is empty) to hold the last close replay.
288* reaped by laundramat thread after lease period.
289*/
290struct nfs4_stateowner {
291 struct kref so_ref;
292 struct list_head so_idhash; /* hash by so_id */
293 struct list_head so_strhash; /* hash by op_name */
294 struct list_head so_perclient;
295 struct list_head so_stateids;
296 struct list_head so_perstateid; /* for lockowners only */
297 struct list_head so_close_lru; /* tail queue */
298 time_t so_time; /* time of placement on so_close_lru */
299 int so_is_open_owner; /* 1=openowner,0=lockowner */
300 u32 so_id;
301 struct nfs4_client * so_client;
302 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
303 * sequence id expected from the client: */
304 u32 so_seqid;
305 struct xdr_netobj so_owner; /* open owner name */
306 int so_confirmed; /* successful OPEN_CONFIRM? */
307 struct nfs4_replay so_replay;
308};
309
310/*
311* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
312* o fi_perfile list is used to search for conflicting
313* share_acces, share_deny on the file.
314*/
315struct nfs4_file {
316 atomic_t fi_ref;
317 struct list_head fi_hash; /* hash by "struct inode *" */
318 struct list_head fi_stateids;
319 struct list_head fi_delegations;
320 struct inode *fi_inode;
321 u32 fi_id; /* used with stateowner->so_id
322 * for stateid_hashtbl hash */
323 bool fi_had_conflict;
324};
325
326/*
327* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
328*
329* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
330*
331* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
332* st_perfile: file_hashtbl[] entry.
333* st_perfile_state: nfs4_stateowner->so_perfilestate
334* st_perlockowner: (open stateid) list of lock nfs4_stateowners
335* st_access_bmap: used only for open stateid
336* st_deny_bmap: used only for open stateid
337* st_openstp: open stateid lock stateid was derived from
338*
339* XXX: open stateids and lock stateids have diverged sufficiently that
340* we should consider defining separate structs for the two cases.
341*/
342
343struct nfs4_stateid {
344 struct list_head st_hash;
345 struct list_head st_perfile;
346 struct list_head st_perstateowner;
347 struct list_head st_lockowners;
348 struct nfs4_stateowner * st_stateowner;
349 struct nfs4_file * st_file;
350 stateid_t st_stateid;
351 struct file * st_vfs_file;
352 unsigned long st_access_bmap;
353 unsigned long st_deny_bmap;
354 struct nfs4_stateid * st_openstp;
355};
356
357/* flags for preprocess_seqid_op() */
358#define HAS_SESSION 0x00000001
359#define CONFIRM 0x00000002
360#define OPEN_STATE 0x00000004
361#define LOCK_STATE 0x00000008
362#define RD_STATE 0x00000010
363#define WR_STATE 0x00000020
364#define CLOSE_STATE 0x00000040
365
366#define seqid_mutating_err(err) \
367 (((err) != nfserr_stale_clientid) && \
368 ((err) != nfserr_bad_seqid) && \
369 ((err) != nfserr_stale_stateid) && \
370 ((err) != nfserr_bad_stateid))
371
372struct nfsd4_compound_state;
373
374extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
375 stateid_t *stateid, int flags, struct file **filp);
376extern void nfs4_lock_state(void);
377extern void nfs4_unlock_state(void);
378extern int nfs4_in_grace(void);
379extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
380extern void put_nfs4_client(struct nfs4_client *clp);
381extern void nfs4_free_stateowner(struct kref *kref);
382extern int set_callback_cred(void);
383extern void nfsd4_probe_callback(struct nfs4_client *clp);
384extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
385extern void nfs4_put_delegation(struct nfs4_delegation *dp);
386extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
387extern void nfsd4_init_recdir(char *recdir_name);
388extern int nfsd4_recdir_load(void);
389extern void nfsd4_shutdown_recdir(void);
390extern int nfs4_client_to_reclaim(const char *name);
391extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
392extern void nfsd4_recdir_purge_old(void);
393extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
394extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
395
396static inline void
397nfs4_put_stateowner(struct nfs4_stateowner *so)
398{
399 kref_put(&so->so_ref, nfs4_free_stateowner);
400}
401
402static inline void
403nfs4_get_stateowner(struct nfs4_stateowner *so)
404{
405 kref_get(&so->so_ref);
406}
407
408#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 71944cddf680..5232d3e8fb2f 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/stats.c
3 *
4 * procfs-based user access to knfsd statistics 2 * procfs-based user access to knfsd statistics
5 * 3 *
6 * /proc/net/rpc/nfsd 4 * /proc/net/rpc/nfsd
@@ -23,18 +21,13 @@
23 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 21 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
24 */ 22 */
25 23
26#include <linux/kernel.h>
27#include <linux/time.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h> 24#include <linux/seq_file.h>
30#include <linux/stat.h>
31#include <linux/module.h> 25#include <linux/module.h>
32
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/stats.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/stats.h> 27#include <linux/nfsd/stats.h>
37 28
29#include "nfsd.h"
30
38struct nfsd_stats nfsdstats; 31struct nfsd_stats nfsdstats;
39struct svc_stat nfsd_svcstats = { 32struct svc_stat nfsd_svcstats = {
40 .program = &nfsd_program, 33 .program = &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a293f0273263..8715d194561a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/vfs.c
4 *
5 * File operations used by nfsd. Some of these have been ripped from 3 * File operations used by nfsd. Some of these have been ripped from
6 * other parts of the kernel because they weren't exported, others 4 * other parts of the kernel because they weren't exported, others
7 * are partial duplicates with added or changed functionality. 5 * are partial duplicates with added or changed functionality.
@@ -16,48 +14,31 @@
16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 14 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
17 */ 15 */
18 16
19#include <linux/string.h>
20#include <linux/time.h>
21#include <linux/errno.h>
22#include <linux/fs.h> 17#include <linux/fs.h>
23#include <linux/file.h> 18#include <linux/file.h>
24#include <linux/mount.h>
25#include <linux/major.h>
26#include <linux/splice.h> 19#include <linux/splice.h>
27#include <linux/proc_fs.h>
28#include <linux/stat.h>
29#include <linux/fcntl.h> 20#include <linux/fcntl.h>
30#include <linux/net.h>
31#include <linux/unistd.h>
32#include <linux/slab.h>
33#include <linux/pagemap.h>
34#include <linux/in.h>
35#include <linux/module.h>
36#include <linux/namei.h> 21#include <linux/namei.h>
37#include <linux/vfs.h>
38#include <linux/delay.h> 22#include <linux/delay.h>
39#include <linux/sunrpc/svc.h>
40#include <linux/nfsd/nfsd.h>
41#ifdef CONFIG_NFSD_V3
42#include <linux/nfs3.h>
43#include <linux/nfsd/xdr3.h>
44#endif /* CONFIG_NFSD_V3 */
45#include <linux/nfsd/nfsfh.h>
46#include <linux/quotaops.h> 23#include <linux/quotaops.h>
47#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
48#include <linux/posix_acl.h>
49#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
50#include <linux/xattr.h> 26#include <linux/xattr.h>
27#include <linux/jhash.h>
28#include <linux/ima.h>
29#include <asm/uaccess.h>
30
31#ifdef CONFIG_NFSD_V3
32#include "xdr3.h"
33#endif /* CONFIG_NFSD_V3 */
34
51#ifdef CONFIG_NFSD_V4 35#ifdef CONFIG_NFSD_V4
52#include <linux/nfs4.h>
53#include <linux/nfs4_acl.h> 36#include <linux/nfs4_acl.h>
54#include <linux/nfsd_idmap.h> 37#include <linux/nfsd_idmap.h>
55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 38#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
58#include <linux/ima.h>
59 39
60#include <asm/uaccess.h> 40#include "nfsd.h"
41#include "vfs.h"
61 42
62#define NFSDDBG_FACILITY NFSDDBG_FILEOP 43#define NFSDDBG_FACILITY NFSDDBG_FILEOP
63 44
@@ -89,12 +70,6 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 70#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 71static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 72
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
98/* 73/*
99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 74 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
100 * a mount point. 75 * a mount point.
@@ -116,8 +91,16 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
116 91
117 exp2 = rqst_exp_get_by_name(rqstp, &path); 92 exp2 = rqst_exp_get_by_name(rqstp, &path);
118 if (IS_ERR(exp2)) { 93 if (IS_ERR(exp2)) {
119 if (PTR_ERR(exp2) != -ENOENT) 94 err = PTR_ERR(exp2);
120 err = PTR_ERR(exp2); 95 /*
96 * We normally allow NFS clients to continue
97 * "underneath" a mountpoint that is not exported.
98 * The exception is V4ROOT, where no traversal is ever
99 * allowed without an explicit export of the new
100 * directory.
101 */
102 if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
103 err = 0;
121 path_put(&path); 104 path_put(&path);
122 goto out; 105 goto out;
123 } 106 }
@@ -141,6 +124,53 @@ out:
141 return err; 124 return err;
142} 125}
143 126
127static void follow_to_parent(struct path *path)
128{
129 struct dentry *dp;
130
131 while (path->dentry == path->mnt->mnt_root && follow_up(path))
132 ;
133 dp = dget_parent(path->dentry);
134 dput(path->dentry);
135 path->dentry = dp;
136}
137
138static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
139{
140 struct svc_export *exp2;
141 struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
142 .dentry = dget(dparent)};
143
144 follow_to_parent(&path);
145
146 exp2 = rqst_exp_parent(rqstp, &path);
147 if (PTR_ERR(exp2) == -ENOENT) {
148 *dentryp = dget(dparent);
149 } else if (IS_ERR(exp2)) {
150 path_put(&path);
151 return PTR_ERR(exp2);
152 } else {
153 *dentryp = dget(path.dentry);
154 exp_put(*exp);
155 *exp = exp2;
156 }
157 path_put(&path);
158 return 0;
159}
160
161/*
162 * For nfsd purposes, we treat V4ROOT exports as though there was an
163 * export at *every* directory.
164 */
165int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
166{
167 if (d_mountpoint(dentry))
168 return 1;
169 if (!(exp->ex_flags & NFSEXP_V4ROOT))
170 return 0;
171 return dentry->d_inode != NULL;
172}
173
144__be32 174__be32
145nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, 175nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
146 const char *name, unsigned int len, 176 const char *name, unsigned int len,
@@ -169,35 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
169 dentry = dget(dparent); 199 dentry = dget(dparent);
170 else if (dparent != exp->ex_path.dentry) 200 else if (dparent != exp->ex_path.dentry)
171 dentry = dget_parent(dparent); 201 dentry = dget_parent(dparent);
172 else if (!EX_NOHIDE(exp)) 202 else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
173 dentry = dget(dparent); /* .. == . just like at / */ 203 dentry = dget(dparent); /* .. == . just like at / */
174 else { 204 else {
175 /* checking mountpoint crossing is very different when stepping up */ 205 /* checking mountpoint crossing is very different when stepping up */
176 struct svc_export *exp2 = NULL; 206 host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
177 struct dentry *dp; 207 if (host_err)
178 struct path path = {.mnt = mntget(exp->ex_path.mnt),
179 .dentry = dget(dparent)};
180
181 while (path.dentry == path.mnt->mnt_root &&
182 follow_up(&path))
183 ;
184 dp = dget_parent(path.dentry);
185 dput(path.dentry);
186 path.dentry = dp;
187
188 exp2 = rqst_exp_parent(rqstp, &path);
189 if (PTR_ERR(exp2) == -ENOENT) {
190 dentry = dget(dparent);
191 } else if (IS_ERR(exp2)) {
192 host_err = PTR_ERR(exp2);
193 path_put(&path);
194 goto out_nfserr; 208 goto out_nfserr;
195 } else {
196 dentry = dget(path.dentry);
197 exp_put(exp);
198 exp = exp2;
199 }
200 path_put(&path);
201 } 209 }
202 } else { 210 } else {
203 fh_lock(fhp); 211 fh_lock(fhp);
@@ -208,7 +216,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
208 /* 216 /*
209 * check if we have crossed a mount point ... 217 * check if we have crossed a mount point ...
210 */ 218 */
211 if (d_mountpoint(dentry)) { 219 if (nfsd_mountpoint(dentry, exp)) {
212 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 220 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
213 dput(dentry); 221 dput(dentry);
214 goto out_nfserr; 222 goto out_nfserr;
@@ -745,7 +753,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
745 if (IS_ERR(*filp)) 753 if (IS_ERR(*filp))
746 host_err = PTR_ERR(*filp); 754 host_err = PTR_ERR(*filp);
747 else 755 else
748 ima_counts_get(*filp); 756 host_err = ima_file_check(*filp, access);
749out_nfserr: 757out_nfserr:
750 err = nfserrno(host_err); 758 err = nfserrno(host_err);
751out: 759out:
@@ -774,12 +782,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
774 int (*fsync) (struct file *, struct dentry *, int); 782 int (*fsync) (struct file *, struct dentry *, int);
775 int err; 783 int err;
776 784
777 err = filemap_fdatawrite(inode->i_mapping); 785 err = filemap_write_and_wait(inode->i_mapping);
778 if (err == 0 && fop && (fsync = fop->fsync)) 786 if (err == 0 && fop && (fsync = fop->fsync))
779 err = fsync(filp, dp, 0); 787 err = fsync(filp, dp, 0);
780 if (err == 0)
781 err = filemap_fdatawait(inode->i_mapping);
782
783 return err; 788 return err;
784} 789}
785 790
@@ -2124,8 +2129,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2124 */ 2129 */
2125 path.mnt = exp->ex_path.mnt; 2130 path.mnt = exp->ex_path.mnt;
2126 path.dentry = dentry; 2131 path.dentry = dentry;
2127 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
2128 IMA_COUNT_LEAVE);
2129nfsd_out: 2132nfsd_out:
2130 return err? nfserrno(err) : 0; 2133 return err? nfserrno(err) : 0;
2131} 2134}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 000000000000..4b1de0a9ea75
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
3 */
4
5#ifndef LINUX_NFSD_VFS_H
6#define LINUX_NFSD_VFS_H
7
8#include "nfsfh.h"
9
10/*
11 * Flags for nfsd_permission
12 */
13#define NFSD_MAY_NOP 0
14#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
15#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
16#define NFSD_MAY_READ 4 /* == MAY_READ */
17#define NFSD_MAY_SATTR 8
18#define NFSD_MAY_TRUNC 16
19#define NFSD_MAY_LOCK 32
20#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23
24#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
25#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
26
27/*
28 * Callback function for readdir
29 */
30typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
31
32/* nfsd/vfs.c */
33int fh_lock_parent(struct svc_fh *, struct dentry *);
34int nfsd_racache_init(int);
35void nfsd_racache_shutdown(void);
36int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
37 struct svc_export **expp);
38__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
39 const char *, unsigned int, struct svc_fh *);
40__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
41 const char *, unsigned int,
42 struct svc_export **, struct dentry **);
43__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
44 struct iattr *, int, time_t);
45int nfsd_mountpoint(struct dentry *, struct svc_export *);
46#ifdef CONFIG_NFSD_V4
47__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
48 struct nfs4_acl *);
49int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
50#endif /* CONFIG_NFSD_V4 */
51__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
52 char *name, int len, struct iattr *attrs,
53 int type, dev_t rdev, struct svc_fh *res);
54#ifdef CONFIG_NFSD_V3
55__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
56__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
57 char *name, int len, struct iattr *attrs,
58 struct svc_fh *res, int createmode,
59 u32 *verifier, int *truncp, int *created);
60__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
61 loff_t, unsigned long);
62#endif /* CONFIG_NFSD_V3 */
63__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int,
64 int, struct file **);
65void nfsd_close(struct file *);
66__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
67 loff_t, struct kvec *, int, unsigned long *);
68__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
69 loff_t, struct kvec *,int, unsigned long *, int *);
70__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
71 char *, int *);
72__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
73 char *name, int len, char *path, int plen,
74 struct svc_fh *res, struct iattr *);
75__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
76 char *, int, struct svc_fh *);
77__be32 nfsd_rename(struct svc_rqst *,
78 struct svc_fh *, char *, int,
79 struct svc_fh *, char *, int);
80__be32 nfsd_remove(struct svc_rqst *,
81 struct svc_fh *, char *, int);
82__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
83 char *name, int len);
84int nfsd_truncate(struct svc_rqst *, struct svc_fh *,
85 unsigned long size);
86__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
87 loff_t *, struct readdir_cd *, filldir_t);
88__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
89 struct kstatfs *, int access);
90
91int nfsd_notify_change(struct inode *, struct iattr *);
92__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
93 struct dentry *, int);
94int nfsd_sync_dir(struct dentry *dp);
95
96#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
97struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
98int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
99#endif
100
101#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 000000000000..53b1863dd8f6
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,173 @@
1/* XDR types for nfsd. This is mainly a typing exercise. */
2
3#ifndef LINUX_NFSD_H
4#define LINUX_NFSD_H
5
6#include <linux/vfs.h>
7#include "nfsd.h"
8#include "nfsfh.h"
9
10struct nfsd_fhandle {
11 struct svc_fh fh;
12};
13
14struct nfsd_sattrargs {
15 struct svc_fh fh;
16 struct iattr attrs;
17};
18
19struct nfsd_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd_readargs {
26 struct svc_fh fh;
27 __u32 offset;
28 __u32 count;
29 int vlen;
30};
31
32struct nfsd_writeargs {
33 svc_fh fh;
34 __u32 offset;
35 int len;
36 int vlen;
37};
38
39struct nfsd_createargs {
40 struct svc_fh fh;
41 char * name;
42 unsigned int len;
43 struct iattr attrs;
44};
45
46struct nfsd_renameargs {
47 struct svc_fh ffh;
48 char * fname;
49 unsigned int flen;
50 struct svc_fh tfh;
51 char * tname;
52 unsigned int tlen;
53};
54
55struct nfsd_readlinkargs {
56 struct svc_fh fh;
57 char * buffer;
58};
59
60struct nfsd_linkargs {
61 struct svc_fh ffh;
62 struct svc_fh tfh;
63 char * tname;
64 unsigned int tlen;
65};
66
67struct nfsd_symlinkargs {
68 struct svc_fh ffh;
69 char * fname;
70 unsigned int flen;
71 char * tname;
72 unsigned int tlen;
73 struct iattr attrs;
74};
75
76struct nfsd_readdirargs {
77 struct svc_fh fh;
78 __u32 cookie;
79 __u32 count;
80 __be32 * buffer;
81};
82
83struct nfsd_attrstat {
84 struct svc_fh fh;
85 struct kstat stat;
86};
87
88struct nfsd_diropres {
89 struct svc_fh fh;
90 struct kstat stat;
91};
92
93struct nfsd_readlinkres {
94 int len;
95};
96
97struct nfsd_readres {
98 struct svc_fh fh;
99 unsigned long count;
100 struct kstat stat;
101};
102
103struct nfsd_readdirres {
104 int count;
105
106 struct readdir_cd common;
107 __be32 * buffer;
108 int buflen;
109 __be32 * offset;
110};
111
112struct nfsd_statfsres {
113 struct kstatfs stats;
114};
115
116/*
117 * Storage requirements for XDR arguments and results.
118 */
119union nfsd_xdrstore {
120 struct nfsd_sattrargs sattr;
121 struct nfsd_diropargs dirop;
122 struct nfsd_readargs read;
123 struct nfsd_writeargs write;
124 struct nfsd_createargs create;
125 struct nfsd_renameargs rename;
126 struct nfsd_linkargs link;
127 struct nfsd_symlinkargs symlink;
128 struct nfsd_readdirargs readdir;
129};
130
131#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
132
133
134int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
135int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
136int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
137 struct nfsd_sattrargs *);
138int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
139 struct nfsd_diropargs *);
140int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
141 struct nfsd_readargs *);
142int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
143 struct nfsd_writeargs *);
144int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
145 struct nfsd_createargs *);
146int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
147 struct nfsd_renameargs *);
148int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
149 struct nfsd_readlinkargs *);
150int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
151 struct nfsd_linkargs *);
152int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
153 struct nfsd_symlinkargs *);
154int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
155 struct nfsd_readdirargs *);
156int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
157int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
158int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
159int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
160int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
161int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
162int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
163
164int nfssvc_encode_entry(void *, const char *name,
165 int namlen, loff_t offset, u64 ino, unsigned int);
166
167int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
168
169/* Helper functions for NFSv2 ACL code */
170__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
171__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
172
173#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 000000000000..7df980eb0562
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,344 @@
1/*
2 * XDR types for NFSv3 in nfsd.
3 *
4 * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
5 */
6
7#ifndef _LINUX_NFSD_XDR3_H
8#define _LINUX_NFSD_XDR3_H
9
10#include "xdr.h"
11
12struct nfsd3_sattrargs {
13 struct svc_fh fh;
14 struct iattr attrs;
15 int check_guard;
16 time_t guardtime;
17};
18
19struct nfsd3_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd3_accessargs {
26 struct svc_fh fh;
27 unsigned int access;
28};
29
30struct nfsd3_readargs {
31 struct svc_fh fh;
32 __u64 offset;
33 __u32 count;
34 int vlen;
35};
36
37struct nfsd3_writeargs {
38 svc_fh fh;
39 __u64 offset;
40 __u32 count;
41 int stable;
42 __u32 len;
43 int vlen;
44};
45
46struct nfsd3_createargs {
47 struct svc_fh fh;
48 char * name;
49 unsigned int len;
50 int createmode;
51 struct iattr attrs;
52 __be32 * verf;
53};
54
55struct nfsd3_mknodargs {
56 struct svc_fh fh;
57 char * name;
58 unsigned int len;
59 __u32 ftype;
60 __u32 major, minor;
61 struct iattr attrs;
62};
63
64struct nfsd3_renameargs {
65 struct svc_fh ffh;
66 char * fname;
67 unsigned int flen;
68 struct svc_fh tfh;
69 char * tname;
70 unsigned int tlen;
71};
72
73struct nfsd3_readlinkargs {
74 struct svc_fh fh;
75 char * buffer;
76};
77
78struct nfsd3_linkargs {
79 struct svc_fh ffh;
80 struct svc_fh tfh;
81 char * tname;
82 unsigned int tlen;
83};
84
85struct nfsd3_symlinkargs {
86 struct svc_fh ffh;
87 char * fname;
88 unsigned int flen;
89 char * tname;
90 unsigned int tlen;
91 struct iattr attrs;
92};
93
94struct nfsd3_readdirargs {
95 struct svc_fh fh;
96 __u64 cookie;
97 __u32 dircount;
98 __u32 count;
99 __be32 * verf;
100 __be32 * buffer;
101};
102
103struct nfsd3_commitargs {
104 struct svc_fh fh;
105 __u64 offset;
106 __u32 count;
107};
108
109struct nfsd3_getaclargs {
110 struct svc_fh fh;
111 int mask;
112};
113
114struct posix_acl;
115struct nfsd3_setaclargs {
116 struct svc_fh fh;
117 int mask;
118 struct posix_acl *acl_access;
119 struct posix_acl *acl_default;
120};
121
122struct nfsd3_attrstat {
123 __be32 status;
124 struct svc_fh fh;
125 struct kstat stat;
126};
127
128/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
129struct nfsd3_diropres {
130 __be32 status;
131 struct svc_fh dirfh;
132 struct svc_fh fh;
133};
134
135struct nfsd3_accessres {
136 __be32 status;
137 struct svc_fh fh;
138 __u32 access;
139};
140
141struct nfsd3_readlinkres {
142 __be32 status;
143 struct svc_fh fh;
144 __u32 len;
145};
146
147struct nfsd3_readres {
148 __be32 status;
149 struct svc_fh fh;
150 unsigned long count;
151 int eof;
152};
153
154struct nfsd3_writeres {
155 __be32 status;
156 struct svc_fh fh;
157 unsigned long count;
158 int committed;
159};
160
161struct nfsd3_renameres {
162 __be32 status;
163 struct svc_fh ffh;
164 struct svc_fh tfh;
165};
166
167struct nfsd3_linkres {
168 __be32 status;
169 struct svc_fh tfh;
170 struct svc_fh fh;
171};
172
173struct nfsd3_readdirres {
174 __be32 status;
175 struct svc_fh fh;
176 int count;
177 __be32 verf[2];
178
179 struct readdir_cd common;
180 __be32 * buffer;
181 int buflen;
182 __be32 * offset;
183 __be32 * offset1;
184 struct svc_rqst * rqstp;
185
186};
187
188struct nfsd3_fsstatres {
189 __be32 status;
190 struct kstatfs stats;
191 __u32 invarsec;
192};
193
194struct nfsd3_fsinfores {
195 __be32 status;
196 __u32 f_rtmax;
197 __u32 f_rtpref;
198 __u32 f_rtmult;
199 __u32 f_wtmax;
200 __u32 f_wtpref;
201 __u32 f_wtmult;
202 __u32 f_dtpref;
203 __u64 f_maxfilesize;
204 __u32 f_properties;
205};
206
207struct nfsd3_pathconfres {
208 __be32 status;
209 __u32 p_link_max;
210 __u32 p_name_max;
211 __u32 p_no_trunc;
212 __u32 p_chown_restricted;
213 __u32 p_case_insensitive;
214 __u32 p_case_preserving;
215};
216
217struct nfsd3_commitres {
218 __be32 status;
219 struct svc_fh fh;
220};
221
222struct nfsd3_getaclres {
223 __be32 status;
224 struct svc_fh fh;
225 int mask;
226 struct posix_acl *acl_access;
227 struct posix_acl *acl_default;
228};
229
230/* dummy type for release */
231struct nfsd3_fhandle_pair {
232 __u32 dummy;
233 struct svc_fh fh1;
234 struct svc_fh fh2;
235};
236
237/*
238 * Storage requirements for XDR arguments and results.
239 */
240union nfsd3_xdrstore {
241 struct nfsd3_sattrargs sattrargs;
242 struct nfsd3_diropargs diropargs;
243 struct nfsd3_readargs readargs;
244 struct nfsd3_writeargs writeargs;
245 struct nfsd3_createargs createargs;
246 struct nfsd3_renameargs renameargs;
247 struct nfsd3_linkargs linkargs;
248 struct nfsd3_symlinkargs symlinkargs;
249 struct nfsd3_readdirargs readdirargs;
250 struct nfsd3_diropres diropres;
251 struct nfsd3_accessres accessres;
252 struct nfsd3_readlinkres readlinkres;
253 struct nfsd3_readres readres;
254 struct nfsd3_writeres writeres;
255 struct nfsd3_renameres renameres;
256 struct nfsd3_linkres linkres;
257 struct nfsd3_readdirres readdirres;
258 struct nfsd3_fsstatres fsstatres;
259 struct nfsd3_fsinfores fsinfores;
260 struct nfsd3_pathconfres pathconfres;
261 struct nfsd3_commitres commitres;
262 struct nfsd3_getaclres getaclres;
263};
264
265#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
266
267int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
268int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
269 struct nfsd3_sattrargs *);
270int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
271 struct nfsd3_diropargs *);
272int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
273 struct nfsd3_accessargs *);
274int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
275 struct nfsd3_readargs *);
276int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
277 struct nfsd3_writeargs *);
278int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
279 struct nfsd3_createargs *);
280int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
281 struct nfsd3_createargs *);
282int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
283 struct nfsd3_mknodargs *);
284int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
285 struct nfsd3_renameargs *);
286int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
287 struct nfsd3_readlinkargs *);
288int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
289 struct nfsd3_linkargs *);
290int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
291 struct nfsd3_symlinkargs *);
292int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
293 struct nfsd3_readdirargs *);
294int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
295 struct nfsd3_readdirargs *);
296int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
297 struct nfsd3_commitargs *);
298int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
299int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
300 struct nfsd3_attrstat *);
301int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
302 struct nfsd3_attrstat *);
303int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
304 struct nfsd3_diropres *);
305int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
306 struct nfsd3_accessres *);
307int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
308 struct nfsd3_readlinkres *);
309int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
310int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
311int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
312 struct nfsd3_diropres *);
313int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
314 struct nfsd3_renameres *);
315int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
316 struct nfsd3_linkres *);
317int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
318 struct nfsd3_readdirres *);
319int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
320 struct nfsd3_fsstatres *);
321int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
322 struct nfsd3_fsinfores *);
323int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
324 struct nfsd3_pathconfres *);
325int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
326 struct nfsd3_commitres *);
327
328int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
329 struct nfsd3_attrstat *);
330int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
331 struct nfsd3_fhandle_pair *);
332int nfs3svc_encode_entry(void *, const char *name,
333 int namlen, loff_t offset, u64 ino,
334 unsigned int);
335int nfs3svc_encode_entry_plus(void *, const char *name,
336 int namlen, loff_t offset, u64 ino,
337 unsigned int);
338/* Helper functions for NFSv3 ACL code */
339__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
340 struct svc_fh *fhp);
341__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
342
343
344#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 000000000000..efa337739534
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,562 @@
1/*
2 * Server-side types for NFSv4.
3 *
4 * Copyright (c) 2002 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Kendrick Smith <kmsmith@umich.edu>
8 * Andy Adamson <andros@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#ifndef _LINUX_NFSD_XDR4_H
38#define _LINUX_NFSD_XDR4_H
39
40#include "state.h"
41#include "nfsd.h"
42
43#define NFSD4_MAX_TAGLEN 128
44#define XDR_LEN(n) (((n) + 3) & ~3)
45
46struct nfsd4_compound_state {
47 struct svc_fh current_fh;
48 struct svc_fh save_fh;
49 struct nfs4_stateowner *replay_owner;
50 /* For sessions DRC */
51 struct nfsd4_session *session;
52 struct nfsd4_slot *slot;
53 __be32 *datap;
54 size_t iovlen;
55 u32 minorversion;
56 u32 status;
57};
58
59static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
60{
61 return cs->slot != NULL;
62}
63
64struct nfsd4_change_info {
65 u32 atomic;
66 bool change_supported;
67 u32 before_ctime_sec;
68 u32 before_ctime_nsec;
69 u64 before_change;
70 u32 after_ctime_sec;
71 u32 after_ctime_nsec;
72 u64 after_change;
73};
74
75struct nfsd4_access {
76 u32 ac_req_access; /* request */
77 u32 ac_supported; /* response */
78 u32 ac_resp_access; /* response */
79};
80
81struct nfsd4_close {
82 u32 cl_seqid; /* request */
83 stateid_t cl_stateid; /* request+response */
84 struct nfs4_stateowner * cl_stateowner; /* response */
85};
86
87struct nfsd4_commit {
88 u64 co_offset; /* request */
89 u32 co_count; /* request */
90 nfs4_verifier co_verf; /* response */
91};
92
93struct nfsd4_create {
94 u32 cr_namelen; /* request */
95 char * cr_name; /* request */
96 u32 cr_type; /* request */
97 union { /* request */
98 struct {
99 u32 namelen;
100 char *name;
101 } link; /* NF4LNK */
102 struct {
103 u32 specdata1;
104 u32 specdata2;
105 } dev; /* NF4BLK, NF4CHR */
106 } u;
107 u32 cr_bmval[3]; /* request */
108 struct iattr cr_iattr; /* request */
109 struct nfsd4_change_info cr_cinfo; /* response */
110 struct nfs4_acl *cr_acl;
111};
112#define cr_linklen u.link.namelen
113#define cr_linkname u.link.name
114#define cr_specdata1 u.dev.specdata1
115#define cr_specdata2 u.dev.specdata2
116
117struct nfsd4_delegreturn {
118 stateid_t dr_stateid;
119};
120
121struct nfsd4_getattr {
122 u32 ga_bmval[3]; /* request */
123 struct svc_fh *ga_fhp; /* response */
124};
125
126struct nfsd4_link {
127 u32 li_namelen; /* request */
128 char * li_name; /* request */
129 struct nfsd4_change_info li_cinfo; /* response */
130};
131
132struct nfsd4_lock_denied {
133 clientid_t ld_clientid;
134 struct nfs4_stateowner *ld_sop;
135 u64 ld_start;
136 u64 ld_length;
137 u32 ld_type;
138};
139
140struct nfsd4_lock {
141 /* request */
142 u32 lk_type;
143 u32 lk_reclaim; /* boolean */
144 u64 lk_offset;
145 u64 lk_length;
146 u32 lk_is_new;
147 union {
148 struct {
149 u32 open_seqid;
150 stateid_t open_stateid;
151 u32 lock_seqid;
152 clientid_t clientid;
153 struct xdr_netobj owner;
154 } new;
155 struct {
156 stateid_t lock_stateid;
157 u32 lock_seqid;
158 } old;
159 } v;
160
161 /* response */
162 union {
163 struct {
164 stateid_t stateid;
165 } ok;
166 struct nfsd4_lock_denied denied;
167 } u;
168 /* The lk_replay_owner is the open owner in the open_to_lock_owner
169 * case and the lock owner otherwise: */
170 struct nfs4_stateowner *lk_replay_owner;
171};
172#define lk_new_open_seqid v.new.open_seqid
173#define lk_new_open_stateid v.new.open_stateid
174#define lk_new_lock_seqid v.new.lock_seqid
175#define lk_new_clientid v.new.clientid
176#define lk_new_owner v.new.owner
177#define lk_old_lock_stateid v.old.lock_stateid
178#define lk_old_lock_seqid v.old.lock_seqid
179
180#define lk_rflags u.ok.rflags
181#define lk_resp_stateid u.ok.stateid
182#define lk_denied u.denied
183
184
185struct nfsd4_lockt {
186 u32 lt_type;
187 clientid_t lt_clientid;
188 struct xdr_netobj lt_owner;
189 u64 lt_offset;
190 u64 lt_length;
191 struct nfs4_stateowner * lt_stateowner;
192 struct nfsd4_lock_denied lt_denied;
193};
194
195
196struct nfsd4_locku {
197 u32 lu_type;
198 u32 lu_seqid;
199 stateid_t lu_stateid;
200 u64 lu_offset;
201 u64 lu_length;
202 struct nfs4_stateowner *lu_stateowner;
203};
204
205
206struct nfsd4_lookup {
207 u32 lo_len; /* request */
208 char * lo_name; /* request */
209};
210
211struct nfsd4_putfh {
212 u32 pf_fhlen; /* request */
213 char *pf_fhval; /* request */
214};
215
216struct nfsd4_open {
217 u32 op_claim_type; /* request */
218 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
219 u32 op_delegate_type; /* request - CLAIM_PREV only */
220 stateid_t op_delegate_stateid; /* request - response */
221 u32 op_create; /* request */
222 u32 op_createmode; /* request */
223 u32 op_bmval[3]; /* request */
224 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
225 nfs4_verifier verf; /* EXCLUSIVE4 */
226 clientid_t op_clientid; /* request */
227 struct xdr_netobj op_owner; /* request */
228 u32 op_seqid; /* request */
229 u32 op_share_access; /* request */
230 u32 op_share_deny; /* request */
231 stateid_t op_stateid; /* response */
232 u32 op_recall; /* recall */
233 struct nfsd4_change_info op_cinfo; /* response */
234 u32 op_rflags; /* response */
235 int op_truncate; /* used during processing */
236 struct nfs4_stateowner *op_stateowner; /* used during processing */
237 struct nfs4_acl *op_acl;
238};
239#define op_iattr iattr
240#define op_verf verf
241
242struct nfsd4_open_confirm {
243 stateid_t oc_req_stateid /* request */;
244 u32 oc_seqid /* request */;
245 stateid_t oc_resp_stateid /* response */;
246 struct nfs4_stateowner * oc_stateowner; /* response */
247};
248
249struct nfsd4_open_downgrade {
250 stateid_t od_stateid;
251 u32 od_seqid;
252 u32 od_share_access;
253 u32 od_share_deny;
254 struct nfs4_stateowner *od_stateowner;
255};
256
257
258struct nfsd4_read {
259 stateid_t rd_stateid; /* request */
260 u64 rd_offset; /* request */
261 u32 rd_length; /* request */
262 int rd_vlen;
263 struct file *rd_filp;
264
265 struct svc_rqst *rd_rqstp; /* response */
266 struct svc_fh * rd_fhp; /* response */
267};
268
269struct nfsd4_readdir {
270 u64 rd_cookie; /* request */
271 nfs4_verifier rd_verf; /* request */
272 u32 rd_dircount; /* request */
273 u32 rd_maxcount; /* request */
274 u32 rd_bmval[3]; /* request */
275 struct svc_rqst *rd_rqstp; /* response */
276 struct svc_fh * rd_fhp; /* response */
277
278 struct readdir_cd common;
279 __be32 * buffer;
280 int buflen;
281 __be32 * offset;
282};
283
284struct nfsd4_release_lockowner {
285 clientid_t rl_clientid;
286 struct xdr_netobj rl_owner;
287};
288struct nfsd4_readlink {
289 struct svc_rqst *rl_rqstp; /* request */
290 struct svc_fh * rl_fhp; /* request */
291};
292
293struct nfsd4_remove {
294 u32 rm_namelen; /* request */
295 char * rm_name; /* request */
296 struct nfsd4_change_info rm_cinfo; /* response */
297};
298
299struct nfsd4_rename {
300 u32 rn_snamelen; /* request */
301 char * rn_sname; /* request */
302 u32 rn_tnamelen; /* request */
303 char * rn_tname; /* request */
304 struct nfsd4_change_info rn_sinfo; /* response */
305 struct nfsd4_change_info rn_tinfo; /* response */
306};
307
308struct nfsd4_secinfo {
309 u32 si_namelen; /* request */
310 char *si_name; /* request */
311 struct svc_export *si_exp; /* response */
312};
313
314struct nfsd4_setattr {
315 stateid_t sa_stateid; /* request */
316 u32 sa_bmval[3]; /* request */
317 struct iattr sa_iattr; /* request */
318 struct nfs4_acl *sa_acl;
319};
320
321struct nfsd4_setclientid {
322 nfs4_verifier se_verf; /* request */
323 u32 se_namelen; /* request */
324 char * se_name; /* request */
325 u32 se_callback_prog; /* request */
326 u32 se_callback_netid_len; /* request */
327 char * se_callback_netid_val; /* request */
328 u32 se_callback_addr_len; /* request */
329 char * se_callback_addr_val; /* request */
330 u32 se_callback_ident; /* request */
331 clientid_t se_clientid; /* response */
332 nfs4_verifier se_confirm; /* response */
333};
334
335struct nfsd4_setclientid_confirm {
336 clientid_t sc_clientid;
337 nfs4_verifier sc_confirm;
338};
339
340/* also used for NVERIFY */
341struct nfsd4_verify {
342 u32 ve_bmval[3]; /* request */
343 u32 ve_attrlen; /* request */
344 char * ve_attrval; /* request */
345};
346
347struct nfsd4_write {
348 stateid_t wr_stateid; /* request */
349 u64 wr_offset; /* request */
350 u32 wr_stable_how; /* request */
351 u32 wr_buflen; /* request */
352 int wr_vlen;
353
354 u32 wr_bytes_written; /* response */
355 u32 wr_how_written; /* response */
356 nfs4_verifier wr_verifier; /* response */
357};
358
359struct nfsd4_exchange_id {
360 nfs4_verifier verifier;
361 struct xdr_netobj clname;
362 u32 flags;
363 clientid_t clientid;
364 u32 seqid;
365 int spa_how;
366};
367
368struct nfsd4_sequence {
369 struct nfs4_sessionid sessionid; /* request/response */
370 u32 seqid; /* request/response */
371 u32 slotid; /* request/response */
372 u32 maxslots; /* request/response */
373 u32 cachethis; /* request */
374#if 0
375 u32 target_maxslots; /* response */
376 u32 status_flags; /* response */
377#endif /* not yet */
378};
379
380struct nfsd4_destroy_session {
381 struct nfs4_sessionid sessionid;
382};
383
384struct nfsd4_op {
385 int opnum;
386 __be32 status;
387 union {
388 struct nfsd4_access access;
389 struct nfsd4_close close;
390 struct nfsd4_commit commit;
391 struct nfsd4_create create;
392 struct nfsd4_delegreturn delegreturn;
393 struct nfsd4_getattr getattr;
394 struct svc_fh * getfh;
395 struct nfsd4_link link;
396 struct nfsd4_lock lock;
397 struct nfsd4_lockt lockt;
398 struct nfsd4_locku locku;
399 struct nfsd4_lookup lookup;
400 struct nfsd4_verify nverify;
401 struct nfsd4_open open;
402 struct nfsd4_open_confirm open_confirm;
403 struct nfsd4_open_downgrade open_downgrade;
404 struct nfsd4_putfh putfh;
405 struct nfsd4_read read;
406 struct nfsd4_readdir readdir;
407 struct nfsd4_readlink readlink;
408 struct nfsd4_remove remove;
409 struct nfsd4_rename rename;
410 clientid_t renew;
411 struct nfsd4_secinfo secinfo;
412 struct nfsd4_setattr setattr;
413 struct nfsd4_setclientid setclientid;
414 struct nfsd4_setclientid_confirm setclientid_confirm;
415 struct nfsd4_verify verify;
416 struct nfsd4_write write;
417 struct nfsd4_release_lockowner release_lockowner;
418
419 /* NFSv4.1 */
420 struct nfsd4_exchange_id exchange_id;
421 struct nfsd4_create_session create_session;
422 struct nfsd4_destroy_session destroy_session;
423 struct nfsd4_sequence sequence;
424 } u;
425 struct nfs4_replay * replay;
426};
427
428struct nfsd4_compoundargs {
429 /* scratch variables for XDR decode */
430 __be32 * p;
431 __be32 * end;
432 struct page ** pagelist;
433 int pagelen;
434 __be32 tmp[8];
435 __be32 * tmpp;
436 struct tmpbuf {
437 struct tmpbuf *next;
438 void (*release)(const void *);
439 void *buf;
440 } *to_free;
441
442 struct svc_rqst *rqstp;
443
444 u32 taglen;
445 char * tag;
446 u32 minorversion;
447 u32 opcnt;
448 struct nfsd4_op *ops;
449 struct nfsd4_op iops[8];
450};
451
452struct nfsd4_compoundres {
453 /* scratch variables for XDR encode */
454 __be32 * p;
455 __be32 * end;
456 struct xdr_buf * xbuf;
457 struct svc_rqst * rqstp;
458
459 u32 taglen;
460 char * tag;
461 u32 opcnt;
462 __be32 * tagp; /* tag, opcount encode location */
463 struct nfsd4_compound_state cstate;
464};
465
466static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
467{
468 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
469 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
470}
471
472static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
473{
474 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
475}
476
477#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
478
479static inline void
480set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
481{
482 BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
483 cinfo->atomic = 1;
484 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
485 if (cinfo->change_supported) {
486 cinfo->before_change = fhp->fh_pre_change;
487 cinfo->after_change = fhp->fh_post_change;
488 } else {
489 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
490 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
491 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
492 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
493 }
494}
495
496int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
497int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
498 struct nfsd4_compoundargs *);
499int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
500 struct nfsd4_compoundres *);
501void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
502void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
503__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
504 struct dentry *dentry, __be32 *buffer, int *countp,
505 u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
506extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
507 struct nfsd4_compound_state *,
508 struct nfsd4_setclientid *setclid);
509extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
510 struct nfsd4_compound_state *,
511 struct nfsd4_setclientid_confirm *setclientid_confirm);
512extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
513extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
514 struct nfsd4_sequence *seq);
515extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
516 struct nfsd4_compound_state *,
517struct nfsd4_exchange_id *);
518 extern __be32 nfsd4_create_session(struct svc_rqst *,
519 struct nfsd4_compound_state *,
520 struct nfsd4_create_session *);
521extern __be32 nfsd4_sequence(struct svc_rqst *,
522 struct nfsd4_compound_state *,
523 struct nfsd4_sequence *);
524extern __be32 nfsd4_destroy_session(struct svc_rqst *,
525 struct nfsd4_compound_state *,
526 struct nfsd4_destroy_session *);
527extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
528 struct nfsd4_open *open);
529extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
530 struct svc_fh *current_fh, struct nfsd4_open *open);
531extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
532 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
533extern __be32 nfsd4_close(struct svc_rqst *rqstp,
534 struct nfsd4_compound_state *,
535 struct nfsd4_close *close);
536extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
537 struct nfsd4_compound_state *,
538 struct nfsd4_open_downgrade *od);
539extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
540 struct nfsd4_lock *lock);
541extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
542 struct nfsd4_compound_state *,
543 struct nfsd4_lockt *lockt);
544extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
545 struct nfsd4_compound_state *,
546 struct nfsd4_locku *locku);
547extern __be32
548nfsd4_release_lockowner(struct svc_rqst *rqstp,
549 struct nfsd4_compound_state *,
550 struct nfsd4_release_lockowner *rlockowner);
551extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
552extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
553 struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
554extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
555 struct nfsd4_compound_state *, clientid_t *clid);
556#endif
557
558/*
559 * Local variables:
560 * c-basic-offset: 8
561 * End:
562 */
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b2166..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support (EXPERIMENTAL)"
3 depends on EXPERIMENTAL 3 depends on EXPERIMENTAL
4 select CRC32 4 select CRC32
5 select FS_JOURNAL_INFO
6 help 5 help
7 NILFS2 is a log-structured file system (LFS) supporting continuous 6 NILFS2 is a log-structured file system (LFS) supporting continuous
8 snapshotting. In addition to versioning capability of the entire 7 snapshotting. In addition to versioning capability of the entire
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index f4a14ea2ed9c..effdbdbe6c11 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -417,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
417 417
418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - 418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
419 bmap->b_inode->i_blkbits); 419 bmap->b_inode->i_blkbits);
420 for (pbh = page_buffers(bh->b_page); pbh != bh; 420 for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
421 pbh = pbh->b_this_page, key++); 421 key++;
422 422
423 return key; 423 return key;
424} 424}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index d5ad54e204a5..18737818db63 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 nilfs_mdt_mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno) && 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 (count = nilfs_cpfile_block_sub_valid_checkpoints( 332 count =
333 cpfile, cp_bh, kaddr, nicps)) == 0) { 333 nilfs_cpfile_block_sub_valid_checkpoints(
334 /* make hole */ 334 cpfile, cp_bh, kaddr, nicps);
335 kunmap_atomic(kaddr, KM_USER0); 335 if (count == 0) {
336 brelse(cp_bh); 336 /* make hole */
337 ret = nilfs_cpfile_delete_checkpoint_block( 337 kunmap_atomic(kaddr, KM_USER0);
338 cpfile, cno); 338 brelse(cp_bh);
339 if (ret == 0) 339 ret =
340 continue; 340 nilfs_cpfile_delete_checkpoint_block(
341 printk(KERN_ERR "%s: cannot delete block\n", 341 cpfile, cno);
342 __func__); 342 if (ret == 0)
343 break; 343 continue;
344 printk(KERN_ERR
345 "%s: cannot delete block\n",
346 __func__);
347 break;
348 }
344 } 349 }
345 } 350 }
346 351
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac718277..236753df5cdf 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
51 struct nilfs_direct *direct; 51 struct nilfs_direct *direct;
52 __u64 ptr; 52 __u64 ptr;
53 53
54 direct = (struct nilfs_direct *)bmap; 54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if ((key > NILFS_DIRECT_KEY_MAX) || 55 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 (level != 1) || /* XXX: use macro for level 1 */ 56 return -ENOENT;
57 ((ptr = nilfs_direct_get_ptr(direct, key)) == 57 ptr = nilfs_direct_get_ptr(direct, key);
58 NILFS_BMAP_INVALID_PTR)) 58 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 59 return -ENOENT;
60 60
61 if (ptrp != NULL) 61 if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
73 sector_t blocknr; 73 sector_t blocknr;
74 int ret, cnt; 74 int ret, cnt;
75 75
76 if (key > NILFS_DIRECT_KEY_MAX || 76 if (key > NILFS_DIRECT_KEY_MAX)
77 (ptr = nilfs_direct_get_ptr(direct, key)) == 77 return -ENOENT;
78 NILFS_BMAP_INVALID_PTR) 78 ptr = nilfs_direct_get_ptr(direct, key);
79 if (ptr == NILFS_BMAP_INVALID_PTR)
79 return -ENOENT; 80 return -ENOENT;
80 81
81 if (NILFS_BMAP_USE_VBN(bmap)) { 82 if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d80..d6b2b83de363 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
480 unsigned int cmd, void __user *argp) 480 unsigned int cmd, void __user *argp)
481{ 481{
482 struct nilfs_argv argv[5]; 482 struct nilfs_argv argv[5];
483 const static size_t argsz[5] = { 483 static const size_t argsz[5] = {
484 sizeof(struct nilfs_vdesc), 484 sizeof(struct nilfs_vdesc),
485 sizeof(struct nilfs_period), 485 sizeof(struct nilfs_period),
486 sizeof(__u64), 486 sizeof(__u64),
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 17584c524486..105b508b47a8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2829 || sci->sc_seq_request != sci->sc_seq_done); 2829 || sci->sc_seq_request != sci->sc_seq_done);
2830 spin_unlock(&sci->sc_state_lock); 2830 spin_unlock(&sci->sc_state_lock);
2831 2831
2832 if (flag || nilfs_segctor_confirm(sci)) 2832 if (flag || !nilfs_segctor_confirm(sci))
2833 nilfs_segctor_write_out(sci); 2833 nilfs_segctor_write_out(sci);
2834 2834
2835 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2835 WARN_ON(!list_empty(&sci->sc_copied_buffers));
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5403b3ef3a42..8173faee31e6 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1118,8 +1118,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1118 /* Abandoning the newly allocated superblock */ 1118 /* Abandoning the newly allocated superblock */
1119 mutex_unlock(&nilfs->ns_mount_mutex); 1119 mutex_unlock(&nilfs->ns_mount_mutex);
1120 put_nilfs(nilfs); 1120 put_nilfs(nilfs);
1121 up_write(&s->s_umount); 1121 deactivate_locked_super(s);
1122 deactivate_super(s);
1123 /* 1122 /*
1124 * deactivate_super() invokes close_bdev_exclusive(). 1123 * deactivate_super() invokes close_bdev_exclusive().
1125 * We must finish all post-cleaning before this call; 1124 * We must finish all post-cleaning before this call;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e1..1afb0a10229f 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 5ef5f365a5c8..a94e8bd8eb1f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
552 552
553 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
555 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
556 &tmp_ientry->wd); 556 &tmp_ientry->wd);
557 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
558 if (ret) { 558 if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
632 632
633 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
634 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
635 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
636 group->inotify_data.user = user; 636 group->inotify_data.user = user;
637 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
638 638
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
646 struct fsnotify_group *group; 646 struct fsnotify_group *group;
647 struct user_struct *user; 647 struct user_struct *user;
648 struct file *filp; 648 struct file *filp;
649 struct path path;
649 int fd, ret; 650 int fd, ret;
650 651
651 /* Check the IN_* constants for consistency. */ 652 /* Check the IN_* constants for consistency. */
@@ -659,12 +660,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
659 if (fd < 0) 660 if (fd < 0)
660 return fd; 661 return fd;
661 662
662 filp = get_empty_filp();
663 if (!filp) {
664 ret = -ENFILE;
665 goto out_put_fd;
666 }
667
668 user = get_current_user(); 663 user = get_current_user();
669 if (unlikely(atomic_read(&user->inotify_devs) >= 664 if (unlikely(atomic_read(&user->inotify_devs) >=
670 inotify_max_user_instances)) { 665 inotify_max_user_instances)) {
@@ -679,24 +674,28 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
679 goto out_free_uid; 674 goto out_free_uid;
680 } 675 }
681 676
682 filp->f_op = &inotify_fops; 677 atomic_inc(&user->inotify_devs);
683 filp->f_path.mnt = mntget(inotify_mnt); 678
684 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 679 path.mnt = inotify_mnt;
685 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 680 path.dentry = inotify_mnt->mnt_root;
686 filp->f_mode = FMODE_READ; 681 path_get(&path);
682 filp = alloc_file(&path, FMODE_READ, &inotify_fops);
683 if (!filp)
684 goto Enfile;
685
687 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); 686 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
688 filp->private_data = group; 687 filp->private_data = group;
689 688
690 atomic_inc(&user->inotify_devs);
691
692 fd_install(fd, filp); 689 fd_install(fd, filp);
693 690
694 return fd; 691 return fd;
695 692
693Enfile:
694 ret = -ENFILE;
695 path_put(&path);
696 atomic_dec(&user->inotify_devs);
696out_free_uid: 697out_free_uid:
697 free_uid(user); 698 free_uid(user);
698 put_filp(filp);
699out_put_fd:
700 put_unused_fd(fd); 699 put_unused_fd(fd);
701 return ret; 700 return ret;
702} 701}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
530 * the ntfs inode. 530 * the ntfs inode.
531 * 531 *
532 * Q: What locks are held when the function is called? 532 * Q: What locks are held when the function is called?
533 * A: i_state has I_LOCK set, hence the inode is locked, also 533 * A: i_state has I_NEW set, hence the inode is locked, also
534 * i_count is set to 1, so it is not going to go away 534 * i_count is set to 1, so it is not going to go away
535 * i_flags is set to 0 and we have no business touching it. Only an ioctl() 535 * i_flags is set to 0 and we have no business touching it. Only an ioctl()
536 * is allowed to write to them. We should of course be honouring them but 536 * is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
1207 * necessary fields in @vi as well as initializing the ntfs inode. 1207 * necessary fields in @vi as well as initializing the ntfs inode.
1208 * 1208 *
1209 * Q: What locks are held when the function is called? 1209 * Q: What locks are held when the function is called?
1210 * A: i_state has I_LOCK set, hence the inode is locked, also 1210 * A: i_state has I_NEW set, hence the inode is locked, also
1211 * i_count is set to 1, so it is not going to go away 1211 * i_count is set to 1, so it is not going to go away
1212 * 1212 *
1213 * Return 0 on success and -errno on error. In the error case, the inode will 1213 * Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
1474 * normal directory inodes. 1474 * normal directory inodes.
1475 * 1475 *
1476 * Q: What locks are held when the function is called? 1476 * Q: What locks are held when the function is called?
1477 * A: i_state has I_LOCK set, hence the inode is locked, also 1477 * A: i_state has I_NEW set, hence the inode is locked, also
1478 * i_count is set to 1, so it is not going to go away 1478 * i_count is set to 1, so it is not going to go away
1479 * 1479 *
1480 * Return 0 on success and -errno on error. In the error case, the inode will 1480 * Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 701b7a3a872e..0d840669698e 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -6,6 +6,7 @@ config OCFS2_FS
6 select CRC32 6 select CRC32
7 select QUOTA 7 select QUOTA
8 select QUOTA_TREE 8 select QUOTA_TREE
9 select FS_POSIX_ACL
9 help 10 help
10 OCFS2 is a general purpose extent based shared disk cluster file 11 OCFS2 is a general purpose extent based shared disk cluster file
11 system with many similarities to ext3. It supports 64 bit inode 12 system with many similarities to ext3. It supports 64 bit inode
@@ -74,12 +75,3 @@ config OCFS2_DEBUG_FS
74 This option will enable expensive consistency checks. Enable 75 This option will enable expensive consistency checks. Enable
75 this option for debugging only as it is likely to decrease 76 this option for debugging only as it is likely to decrease
76 performance of the filesystem. 77 performance of the filesystem.
77
78config OCFS2_FS_POSIX_ACL
79 bool "OCFS2 POSIX Access Control Lists"
80 depends on OCFS2_FS
81 select FS_POSIX_ACL
82 default n
83 help
84 Posix Access Control Lists (ACLs) support permissions for users and
85 groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 31f25ce32c97..600d2d2ade11 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -39,11 +39,8 @@ ocfs2-objs := \
39 ver.o \ 39 ver.o \
40 quota_local.o \ 40 quota_local.o \
41 quota_global.o \ 41 quota_global.o \
42 xattr.o 42 xattr.o \
43 43 acl.o
44ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
45ocfs2-objs += acl.o
46endif
47 44
48ocfs2_stackglue-objs := stackglue.o 45ocfs2_stackglue-objs := stackglue.o
49ocfs2_stack_o2cb-objs := stack_o2cb.o 46ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec762103..0501974bedd0 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -98,15 +98,11 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
98 int type, 98 int type,
99 struct buffer_head *di_bh) 99 struct buffer_head *di_bh)
100{ 100{
101 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
102 int name_index; 101 int name_index;
103 char *value = NULL; 102 char *value = NULL;
104 struct posix_acl *acl; 103 struct posix_acl *acl;
105 int retval; 104 int retval;
106 105
107 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
108 return NULL;
109
110 switch (type) { 106 switch (type) {
111 case ACL_TYPE_ACCESS: 107 case ACL_TYPE_ACCESS:
112 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; 108 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
@@ -331,13 +327,14 @@ cleanup:
331 return ret; 327 return ret;
332} 328}
333 329
334static size_t ocfs2_xattr_list_acl_access(struct inode *inode, 330static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
335 char *list, 331 char *list,
336 size_t list_len, 332 size_t list_len,
337 const char *name, 333 const char *name,
338 size_t name_len) 334 size_t name_len,
335 int type)
339{ 336{
340 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 337 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
341 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 338 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
342 339
343 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 340 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +345,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
348 return size; 345 return size;
349} 346}
350 347
351static size_t ocfs2_xattr_list_acl_default(struct inode *inode, 348static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
352 char *list, 349 char *list,
353 size_t list_len, 350 size_t list_len,
354 const char *name, 351 const char *name,
355 size_t name_len) 352 size_t name_len,
353 int type)
356{ 354{
357 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 355 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 356 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 357
360 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 358 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +363,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
365 return size; 363 return size;
366} 364}
367 365
368static int ocfs2_xattr_get_acl(struct inode *inode, 366static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
369 int type, 367 void *buffer, size_t size, int type)
370 void *buffer,
371 size_t size)
372{ 368{
373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 369 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
374 struct posix_acl *acl; 370 struct posix_acl *acl;
375 int ret; 371 int ret;
376 372
373 if (strcmp(name, "") != 0)
374 return -EINVAL;
377 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 375 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
378 return -EOPNOTSUPP; 376 return -EOPNOTSUPP;
379 377
380 acl = ocfs2_get_acl(inode, type); 378 acl = ocfs2_get_acl(dentry->d_inode, type);
381 if (IS_ERR(acl)) 379 if (IS_ERR(acl))
382 return PTR_ERR(acl); 380 return PTR_ERR(acl);
383 if (acl == NULL) 381 if (acl == NULL)
@@ -388,35 +386,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
388 return ret; 386 return ret;
389} 387}
390 388
391static int ocfs2_xattr_get_acl_access(struct inode *inode, 389static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
392 const char *name, 390 const void *value, size_t size, int flags, int type)
393 void *buffer,
394 size_t size)
395{
396 if (strcmp(name, "") != 0)
397 return -EINVAL;
398 return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
399}
400
401static int ocfs2_xattr_get_acl_default(struct inode *inode,
402 const char *name,
403 void *buffer,
404 size_t size)
405{
406 if (strcmp(name, "") != 0)
407 return -EINVAL;
408 return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
409}
410
411static int ocfs2_xattr_set_acl(struct inode *inode,
412 int type,
413 const void *value,
414 size_t size)
415{ 391{
392 struct inode *inode = dentry->d_inode;
416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 393 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
417 struct posix_acl *acl; 394 struct posix_acl *acl;
418 int ret = 0; 395 int ret = 0;
419 396
397 if (strcmp(name, "") != 0)
398 return -EINVAL;
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 399 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
421 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
422 401
@@ -442,38 +421,18 @@ cleanup:
442 return ret; 421 return ret;
443} 422}
444 423
445static int ocfs2_xattr_set_acl_access(struct inode *inode,
446 const char *name,
447 const void *value,
448 size_t size,
449 int flags)
450{
451 if (strcmp(name, "") != 0)
452 return -EINVAL;
453 return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
454}
455
456static int ocfs2_xattr_set_acl_default(struct inode *inode,
457 const char *name,
458 const void *value,
459 size_t size,
460 int flags)
461{
462 if (strcmp(name, "") != 0)
463 return -EINVAL;
464 return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
465}
466
467struct xattr_handler ocfs2_xattr_acl_access_handler = { 424struct xattr_handler ocfs2_xattr_acl_access_handler = {
468 .prefix = POSIX_ACL_XATTR_ACCESS, 425 .prefix = POSIX_ACL_XATTR_ACCESS,
426 .flags = ACL_TYPE_ACCESS,
469 .list = ocfs2_xattr_list_acl_access, 427 .list = ocfs2_xattr_list_acl_access,
470 .get = ocfs2_xattr_get_acl_access, 428 .get = ocfs2_xattr_get_acl,
471 .set = ocfs2_xattr_set_acl_access, 429 .set = ocfs2_xattr_set_acl,
472}; 430};
473 431
474struct xattr_handler ocfs2_xattr_acl_default_handler = { 432struct xattr_handler ocfs2_xattr_acl_default_handler = {
475 .prefix = POSIX_ACL_XATTR_DEFAULT, 433 .prefix = POSIX_ACL_XATTR_DEFAULT,
434 .flags = ACL_TYPE_DEFAULT,
476 .list = ocfs2_xattr_list_acl_default, 435 .list = ocfs2_xattr_list_acl_default,
477 .get = ocfs2_xattr_get_acl_default, 436 .get = ocfs2_xattr_get_acl,
478 .set = ocfs2_xattr_set_acl_default, 437 .set = ocfs2_xattr_set_acl,
479}; 438};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 8f6389ed4da5..5c5d31f05853 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,8 +26,6 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29#ifdef CONFIG_OCFS2_FS_POSIX_ACL
30
31extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int);
32extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
33extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
@@ -35,24 +33,4 @@ extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
35 struct ocfs2_alloc_context *, 33 struct ocfs2_alloc_context *,
36 struct ocfs2_alloc_context *); 34 struct ocfs2_alloc_context *);
37 35
38#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
39
40#define ocfs2_check_acl NULL
41static inline int ocfs2_acl_chmod(struct inode *inode)
42{
43 return 0;
44}
45static inline int ocfs2_init_acl(handle_t *handle,
46 struct inode *inode,
47 struct inode *dir,
48 struct buffer_head *di_bh,
49 struct buffer_head *dir_bh,
50 struct ocfs2_alloc_context *meta_ac,
51 struct ocfs2_alloc_context *data_ac)
52{
53 return 0;
54}
55
56#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
57
58#endif /* OCFS2_ACL_H */ 36#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7c7198a5bc90..d17bdc718f74 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1765,9 +1765,9 @@ set_and_inc:
1765 * 1765 *
1766 * The array index of the subtree root is passed back. 1766 * The array index of the subtree root is passed back.
1767 */ 1767 */
1768static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 1768int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1769 struct ocfs2_path *left, 1769 struct ocfs2_path *left,
1770 struct ocfs2_path *right) 1770 struct ocfs2_path *right)
1771{ 1771{
1772 int i = 0; 1772 int i = 0;
1773 1773
@@ -2872,8 +2872,8 @@ out:
2872 * This looks similar, but is subtly different to 2872 * This looks similar, but is subtly different to
2873 * ocfs2_find_cpos_for_left_leaf(). 2873 * ocfs2_find_cpos_for_left_leaf().
2874 */ 2874 */
2875static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 2875int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
2876 struct ocfs2_path *path, u32 *cpos) 2876 struct ocfs2_path *path, u32 *cpos)
2877{ 2877{
2878 int i, j, ret = 0; 2878 int i, j, ret = 0;
2879 u64 blkno; 2879 u64 blkno;
@@ -7190,8 +7190,8 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
7190 * wait on them - the truncate_inode_pages() call later will 7190 * wait on them - the truncate_inode_pages() call later will
7191 * do that for us. 7191 * do that for us.
7192 */ 7192 */
7193 ret = do_sync_mapping_range(inode->i_mapping, range_start, 7193 ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
7194 range_end - 1, SYNC_FILE_RANGE_WRITE); 7194 range_end - 1);
7195 if (ret) 7195 if (ret)
7196 mlog_errno(ret); 7196 mlog_errno(ret);
7197 7197
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 9c122d574464..1db4359ccb90 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle,
317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, 317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 handle_t *handle, 318 handle_t *handle,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left,
324 struct ocfs2_path *right);
320#endif /* OCFS2_ALLOC_H */ 325#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3dae4a13f6e4..7e9df11260f4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,7 +599,7 @@ bail:
599 return ret; 599 return ret;
600} 600}
601 601
602/* 602/*
603 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 603 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
604 * particularly interested in the aio/dio case. Like the core uses 604 * particularly interested in the aio/dio case. Like the core uses
605 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 605 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
@@ -670,7 +670,7 @@ static ssize_t ocfs2_direct_IO(int rw,
670 670
671 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 671 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
672 inode->i_sb->s_bdev, iov, offset, 672 inode->i_sb->s_bdev, iov, offset,
673 nr_segs, 673 nr_segs,
674 ocfs2_direct_IO_get_blocks, 674 ocfs2_direct_IO_get_blocks,
675 ocfs2_dio_end_io); 675 ocfs2_dio_end_io);
676 676
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index d43d34a1dd31..21c808f752d8 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -368,7 +368,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
368 } 368 }
369 ocfs2_metadata_cache_io_unlock(ci); 369 ocfs2_metadata_cache_io_unlock(ci);
370 370
371 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 371 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
372 (unsigned long long)block, nr, 372 (unsigned long long)block, nr,
373 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", 373 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
374 flags); 374 flags);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index c452d116b892..5c9890006708 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -78,7 +78,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
78 78
79unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 79unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
80 80
81/* Only sets a new threshold if there are no active regions. 81/* Only sets a new threshold if there are no active regions.
82 * 82 *
83 * No locking or otherwise interesting code is required for reading 83 * No locking or otherwise interesting code is required for reading
84 * o2hb_dead_threshold as it can't change once regions are active and 84 * o2hb_dead_threshold as it can't change once regions are active and
@@ -170,13 +170,14 @@ static void o2hb_write_timeout(struct work_struct *work)
170 170
171 mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " 171 mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
172 "milliseconds\n", reg->hr_dev_name, 172 "milliseconds\n", reg->hr_dev_name,
173 jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); 173 jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
174 o2quo_disk_timeout(); 174 o2quo_disk_timeout();
175} 175}
176 176
177static void o2hb_arm_write_timeout(struct o2hb_region *reg) 177static void o2hb_arm_write_timeout(struct o2hb_region *reg)
178{ 178{
179 mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); 179 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
180 O2HB_MAX_WRITE_TIMEOUT_MS);
180 181
181 cancel_delayed_work(&reg->hr_write_timeout_work); 182 cancel_delayed_work(&reg->hr_write_timeout_work);
182 reg->hr_last_timeout_start = jiffies; 183 reg->hr_last_timeout_start = jiffies;
@@ -623,7 +624,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
623 "seq %llu last %llu changed %u equal %u\n", 624 "seq %llu last %llu changed %u equal %u\n",
624 slot->ds_node_num, (long long)slot->ds_last_generation, 625 slot->ds_node_num, (long long)slot->ds_last_generation,
625 le32_to_cpu(hb_block->hb_cksum), 626 le32_to_cpu(hb_block->hb_cksum),
626 (unsigned long long)le64_to_cpu(hb_block->hb_seq), 627 (unsigned long long)le64_to_cpu(hb_block->hb_seq),
627 (unsigned long long)slot->ds_last_time, slot->ds_changed_samples, 628 (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
628 slot->ds_equal_samples); 629 slot->ds_equal_samples);
629 630
@@ -874,7 +875,8 @@ static int o2hb_thread(void *data)
874 do_gettimeofday(&after_hb); 875 do_gettimeofday(&after_hb);
875 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 876 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
876 877
877 mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n", 878 mlog(ML_HEARTBEAT,
879 "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
878 before_hb.tv_sec, (unsigned long) before_hb.tv_usec, 880 before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
879 after_hb.tv_sec, (unsigned long) after_hb.tv_usec, 881 after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
880 elapsed_msec); 882 elapsed_msec);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 7ee6188bc79a..c81142e3ef84 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,6 +35,10 @@
35 * cluster references throughout where nodes are looked up */ 35 * cluster references throughout where nodes are looked up */
36struct o2nm_cluster *o2nm_single_cluster = NULL; 36struct o2nm_cluster *o2nm_single_cluster = NULL;
37 37
38char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
39 "reset", /* O2NM_FENCE_RESET */
40 "panic", /* O2NM_FENCE_PANIC */
41};
38 42
39struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 43struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
40{ 44{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
579 return o2nm_cluster_attr_write(page, count, 583 return o2nm_cluster_attr_write(page, count,
580 &cluster->cl_reconnect_delay_ms); 584 &cluster->cl_reconnect_delay_ms);
581} 585}
586
587static ssize_t o2nm_cluster_attr_fence_method_read(
588 struct o2nm_cluster *cluster, char *page)
589{
590 ssize_t ret = 0;
591
592 if (cluster)
593 ret = sprintf(page, "%s\n",
594 o2nm_fence_method_desc[cluster->cl_fence_method]);
595 return ret;
596}
597
598static ssize_t o2nm_cluster_attr_fence_method_write(
599 struct o2nm_cluster *cluster, const char *page, size_t count)
600{
601 unsigned int i;
602
603 if (page[count - 1] != '\n')
604 goto bail;
605
606 for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
607 if (count != strlen(o2nm_fence_method_desc[i]) + 1)
608 continue;
609 if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
610 continue;
611 if (cluster->cl_fence_method != i) {
612 printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
613 o2nm_fence_method_desc[i]);
614 cluster->cl_fence_method = i;
615 }
616 return count;
617 }
618
619bail:
620 return -EINVAL;
621}
622
582static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 623static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
583 .attr = { .ca_owner = THIS_MODULE, 624 .attr = { .ca_owner = THIS_MODULE,
584 .ca_name = "idle_timeout_ms", 625 .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
603 .store = o2nm_cluster_attr_reconnect_delay_ms_write, 644 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
604}; 645};
605 646
647static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
648 .attr = { .ca_owner = THIS_MODULE,
649 .ca_name = "fence_method",
650 .ca_mode = S_IRUGO | S_IWUSR },
651 .show = o2nm_cluster_attr_fence_method_read,
652 .store = o2nm_cluster_attr_fence_method_write,
653};
654
606static struct configfs_attribute *o2nm_cluster_attrs[] = { 655static struct configfs_attribute *o2nm_cluster_attrs[] = {
607 &o2nm_cluster_attr_idle_timeout_ms.attr, 656 &o2nm_cluster_attr_idle_timeout_ms.attr,
608 &o2nm_cluster_attr_keepalive_delay_ms.attr, 657 &o2nm_cluster_attr_keepalive_delay_ms.attr,
609 &o2nm_cluster_attr_reconnect_delay_ms.attr, 658 &o2nm_cluster_attr_reconnect_delay_ms.attr,
659 &o2nm_cluster_attr_fence_method.attr,
610 NULL, 660 NULL,
611}; 661};
612static ssize_t o2nm_cluster_show(struct config_item *item, 662static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
778 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 828 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
779 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 829 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
780 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 830 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
831 cluster->cl_fence_method = O2NM_FENCE_RESET;
781 832
782 ret = &cluster->cl_group; 833 ret = &cluster->cl_group;
783 o2nm_single_cluster = cluster; 834 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index c992ea0da4ad..09ea2d388bbb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,6 +33,12 @@
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35 35
36enum o2nm_fence_method {
37 O2NM_FENCE_RESET = 0,
38 O2NM_FENCE_PANIC,
39 O2NM_FENCE_METHODS, /* Number of fence methods */
40};
41
36struct o2nm_node { 42struct o2nm_node {
37 spinlock_t nd_lock; 43 spinlock_t nd_lock;
38 struct config_item nd_item; 44 struct config_item nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
58 unsigned int cl_idle_timeout_ms; 64 unsigned int cl_idle_timeout_ms;
59 unsigned int cl_keepalive_delay_ms; 65 unsigned int cl_keepalive_delay_ms;
60 unsigned int cl_reconnect_delay_ms; 66 unsigned int cl_reconnect_delay_ms;
67 enum o2nm_fence_method cl_fence_method;
61 68
62 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 69 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
63 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 70 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index bbacf7da48a4..639024033fce 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
74 * threads can still schedule, etc, etc */ 74 * threads can still schedule, etc, etc */
75 o2hb_stop_all_regions(); 75 o2hb_stop_all_regions();
76 76
77 printk("ocfs2 is very sorry to be fencing this system by restarting\n"); 77 switch (o2nm_single_cluster->cl_fence_method) {
78 emergency_restart(); 78 case O2NM_FENCE_PANIC:
79 panic("*** ocfs2 is very sorry to be fencing this system by "
80 "panicing ***\n");
81 break;
82 default:
83 WARN_ON(o2nm_single_cluster->cl_fence_method >=
84 O2NM_FENCE_METHODS);
85 case O2NM_FENCE_RESET:
86 printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
87 "system by restarting ***\n");
88 emergency_restart();
89 break;
90 };
79} 91}
80 92
81/* Indicate that a timeout occured on a hearbeat region write. The 93/* Indicate that a timeout occured on a hearbeat region write. The
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 334f231a422c..d8d0c65ac03c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
485 } 485 }
486 486
487 if (was_valid && !valid) { 487 if (was_valid && !valid) {
488 printk(KERN_INFO "o2net: no longer connected to " 488 printk(KERN_NOTICE "o2net: no longer connected to "
489 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); 489 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
490 o2net_complete_nodes_nsw(nn); 490 o2net_complete_nodes_nsw(nn);
491 } 491 }
@@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
493 if (!was_valid && valid) { 493 if (!was_valid && valid) {
494 o2quo_conn_up(o2net_num_from_nn(nn)); 494 o2quo_conn_up(o2net_num_from_nn(nn));
495 cancel_delayed_work(&nn->nn_connect_expired); 495 cancel_delayed_work(&nn->nn_connect_expired);
496 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", 496 printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n",
497 o2nm_this_node() > sc->sc_node->nd_num ? 497 o2nm_this_node() > sc->sc_node->nd_num ?
498 "connected to" : "accepted connection from", 498 "connected to" : "accepted connection from",
499 SC_NODEF_ARGS(sc)); 499 SC_NODEF_ARGS(sc));
@@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
930 cond_resched(); 930 cond_resched();
931 continue; 931 continue;
932 } 932 }
933 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 933 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
934 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); 934 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
935 o2net_ensure_shutdown(nn, sc, 0); 935 o2net_ensure_shutdown(nn, sc, 0);
936 break; 936 break;
@@ -1476,14 +1476,14 @@ static void o2net_idle_timer(unsigned long data)
1476 1476
1477 do_gettimeofday(&now); 1477 do_gettimeofday(&now);
1478 1478
1479 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1479 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1480 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1480 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1481 o2net_idle_timeout() / 1000, 1481 o2net_idle_timeout() / 1000,
1482 o2net_idle_timeout() % 1000); 1482 o2net_idle_timeout() % 1000);
1483 mlog(ML_NOTICE, "here are some times that might help debug the " 1483 mlog(ML_NOTICE, "here are some times that might help debug the "
1484 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1484 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1485 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1485 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
1486 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, 1486 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
1487 now.tv_sec, (long) now.tv_usec, 1487 now.tv_sec, (long) now.tv_usec,
1488 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, 1488 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
1489 sc->sc_tv_advance_start.tv_sec, 1489 sc->sc_tv_advance_start.tv_sec,
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 8d58cfe410b1..96fa7ebc530c 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -32,10 +32,10 @@
32 * on their number */ 32 * on their number */
33#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 33#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
34 34
35/* 35/*
36 * This version number represents quite a lot, unfortunately. It not 36 * This version number represents quite a lot, unfortunately. It not
37 * only represents the raw network message protocol on the wire but also 37 * only represents the raw network message protocol on the wire but also
38 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
39 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
40 * 40 *
41 * With version 11, we separate out the filesystem locking portion. The 41 * With version 11, we separate out the filesystem locking portion. The
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index b5786a787fab..3cfa114aa391 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err);
95 mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ 95 mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \
96} while (0) 96} while (0)
97 97
98#define DLM_LKSB_UNUSED1 0x01 98#define DLM_LKSB_UNUSED1 0x01
99#define DLM_LKSB_PUT_LVB 0x02 99#define DLM_LKSB_PUT_LVB 0x02
100#define DLM_LKSB_GET_LVB 0x04 100#define DLM_LKSB_GET_LVB 0x04
101#define DLM_LKSB_UNUSED2 0x08 101#define DLM_LKSB_UNUSED2 0x08
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 01cf8cc3d286..dccc439fa087 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
123 dlm_lock_put(lock); 123 dlm_lock_put(lock);
124 /* free up the reserved bast that we are cancelling. 124 /* free up the reserved bast that we are cancelling.
125 * guaranteed that this will not be the last reserved 125 * guaranteed that this will not be the last reserved
126 * ast because *both* an ast and a bast were reserved 126 * ast because *both* an ast and a bast were reserved
127 * to get to this point. the res->spinlock will not be 127 * to get to this point. the res->spinlock will not be
128 * taken here */ 128 * taken here */
129 dlm_lockres_release_ast(dlm, res); 129 dlm_lockres_release_ast(dlm, res);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ca96bce50e18..f283bce776b4 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
396 /* instead of logging the same network error over 396 /* instead of logging the same network error over
397 * and over, sleep here and wait for the heartbeat 397 * and over, sleep here and wait for the heartbeat
398 * to notice the node is dead. times out after 5s. */ 398 * to notice the node is dead. times out after 5s. */
399 dlm_wait_for_node_death(dlm, res->owner, 399 dlm_wait_for_node_death(dlm, res->owner,
400 DLM_NODE_DEATH_WAIT_MAX); 400 DLM_NODE_DEATH_WAIT_MAX);
401 ret = DLM_RECOVERING; 401 ret = DLM_RECOVERING;
402 mlog(0, "node %u died so returning DLM_RECOVERING " 402 mlog(0, "node %u died so returning DLM_RECOVERING "
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 42b0bad7a612..0cd24cf54396 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
102 assert_spin_locked(&res->spinlock); 102 assert_spin_locked(&res->spinlock);
103 103
104 stringify_lockname(res->lockname.name, res->lockname.len, 104 stringify_lockname(res->lockname.name, res->lockname.len,
105 buf, sizeof(buf) - 1); 105 buf, sizeof(buf));
106 printk("lockres: %s, owner=%u, state=%u\n", 106 printk("lockres: %s, owner=%u, state=%u\n",
107 buf, res->owner, res->state); 107 buf, res->owner, res->state);
108 printk(" last used: %lu, refcnt: %u, on purge list: %s\n", 108 printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 0334000676d3..988c9055fd4e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
816 } 816 }
817 817
818 /* Once the dlm ctxt is marked as leaving then we don't want 818 /* Once the dlm ctxt is marked as leaving then we don't want
819 * to be put in someone's domain map. 819 * to be put in someone's domain map.
820 * Also, explicitly disallow joining at certain troublesome 820 * Also, explicitly disallow joining at certain troublesome
821 * times (ie. during recovery). */ 821 * times (ie. during recovery). */
822 if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { 822 if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 437698e9465f..733337772671 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
269 } 269 }
270 dlm_revert_pending_lock(res, lock); 270 dlm_revert_pending_lock(res, lock);
271 dlm_lock_put(lock); 271 dlm_lock_put(lock);
272 } else if (dlm_is_recovery_lock(res->lockname.name, 272 } else if (dlm_is_recovery_lock(res->lockname.name,
273 res->lockname.len)) { 273 res->lockname.len)) {
274 /* special case for the $RECOVERY lock. 274 /* special case for the $RECOVERY lock.
275 * there will never be an AST delivered to put 275 * there will never be an AST delivered to put
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 03ccf9a7b1f4..a659606dcb95 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
366 struct dlm_master_list_entry *mle; 366 struct dlm_master_list_entry *mle;
367 367
368 assert_spin_locked(&dlm->spinlock); 368 assert_spin_locked(&dlm->spinlock);
369 369
370 list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { 370 list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
371 if (node_up) 371 if (node_up)
372 dlm_mle_node_up(dlm, mle, NULL, idx); 372 dlm_mle_node_up(dlm, mle, NULL, idx);
@@ -833,7 +833,7 @@ lookup:
833 __dlm_insert_mle(dlm, mle); 833 __dlm_insert_mle(dlm, mle);
834 834
835 /* still holding the dlm spinlock, check the recovery map 835 /* still holding the dlm spinlock, check the recovery map
836 * to see if there are any nodes that still need to be 836 * to see if there are any nodes that still need to be
837 * considered. these will not appear in the mle nodemap 837 * considered. these will not appear in the mle nodemap
838 * but they might own this lockres. wait on them. */ 838 * but they might own this lockres. wait on them. */
839 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); 839 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
@@ -883,7 +883,7 @@ redo_request:
883 msleep(500); 883 msleep(500);
884 } 884 }
885 continue; 885 continue;
886 } 886 }
887 887
888 dlm_kick_recovery_thread(dlm); 888 dlm_kick_recovery_thread(dlm);
889 msleep(1000); 889 msleep(1000);
@@ -939,8 +939,8 @@ wait:
939 res->lockname.name, blocked); 939 res->lockname.name, blocked);
940 if (++tries > 20) { 940 if (++tries > 20) {
941 mlog(ML_ERROR, "%s:%.*s: spinning on " 941 mlog(ML_ERROR, "%s:%.*s: spinning on "
942 "dlm_wait_for_lock_mastery, blocked=%d\n", 942 "dlm_wait_for_lock_mastery, blocked=%d\n",
943 dlm->name, res->lockname.len, 943 dlm->name, res->lockname.len,
944 res->lockname.name, blocked); 944 res->lockname.name, blocked);
945 dlm_print_one_lock_resource(res); 945 dlm_print_one_lock_resource(res);
946 dlm_print_one_mle(mle); 946 dlm_print_one_mle(mle);
@@ -1029,7 +1029,7 @@ recheck:
1029 ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); 1029 ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
1030 b = (mle->type == DLM_MLE_BLOCK); 1030 b = (mle->type == DLM_MLE_BLOCK);
1031 if ((*blocked && !b) || (!*blocked && b)) { 1031 if ((*blocked && !b) || (!*blocked && b)) {
1032 mlog(0, "%s:%.*s: status change: old=%d new=%d\n", 1032 mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
1033 dlm->name, res->lockname.len, res->lockname.name, 1033 dlm->name, res->lockname.len, res->lockname.name,
1034 *blocked, b); 1034 *blocked, b);
1035 *blocked = b; 1035 *blocked = b;
@@ -1602,7 +1602,7 @@ send_response:
1602 } 1602 }
1603 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", 1603 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
1604 dlm->node_num, res->lockname.len, res->lockname.name); 1604 dlm->node_num, res->lockname.len, res->lockname.name);
1605 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 1605 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
1606 DLM_ASSERT_MASTER_MLE_CLEANUP); 1606 DLM_ASSERT_MASTER_MLE_CLEANUP);
1607 if (ret < 0) { 1607 if (ret < 0) {
1608 mlog(ML_ERROR, "failed to dispatch assert master work\n"); 1608 mlog(ML_ERROR, "failed to dispatch assert master work\n");
@@ -1701,7 +1701,7 @@ again:
1701 1701
1702 if (r & DLM_ASSERT_RESPONSE_REASSERT) { 1702 if (r & DLM_ASSERT_RESPONSE_REASSERT) {
1703 mlog(0, "%.*s: node %u create mles on other " 1703 mlog(0, "%.*s: node %u create mles on other "
1704 "nodes and requests a re-assert\n", 1704 "nodes and requests a re-assert\n",
1705 namelen, lockname, to); 1705 namelen, lockname, to);
1706 reassert = 1; 1706 reassert = 1;
1707 } 1707 }
@@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
1812 spin_unlock(&dlm->master_lock); 1812 spin_unlock(&dlm->master_lock);
1813 spin_unlock(&dlm->spinlock); 1813 spin_unlock(&dlm->spinlock);
1814 goto done; 1814 goto done;
1815 } 1815 }
1816 } 1816 }
1817 } 1817 }
1818 spin_unlock(&dlm->master_lock); 1818 spin_unlock(&dlm->master_lock);
@@ -1883,7 +1883,7 @@ ok:
1883 int extra_ref = 0; 1883 int extra_ref = 0;
1884 int nn = -1; 1884 int nn = -1;
1885 int rr, err = 0; 1885 int rr, err = 0;
1886 1886
1887 spin_lock(&mle->spinlock); 1887 spin_lock(&mle->spinlock);
1888 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) 1888 if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
1889 extra_ref = 1; 1889 extra_ref = 1;
@@ -1891,7 +1891,7 @@ ok:
1891 /* MASTER mle: if any bits set in the response map 1891 /* MASTER mle: if any bits set in the response map
1892 * then the calling node needs to re-assert to clear 1892 * then the calling node needs to re-assert to clear
1893 * up nodes that this node contacted */ 1893 * up nodes that this node contacted */
1894 while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, 1894 while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
1895 nn+1)) < O2NM_MAX_NODES) { 1895 nn+1)) < O2NM_MAX_NODES) {
1896 if (nn != dlm->node_num && nn != assert->node_idx) 1896 if (nn != dlm->node_num && nn != assert->node_idx)
1897 master_request = 1; 1897 master_request = 1;
@@ -2002,7 +2002,7 @@ kill:
2002 __dlm_print_one_lock_resource(res); 2002 __dlm_print_one_lock_resource(res);
2003 spin_unlock(&res->spinlock); 2003 spin_unlock(&res->spinlock);
2004 spin_unlock(&dlm->spinlock); 2004 spin_unlock(&dlm->spinlock);
2005 *ret_data = (void *)res; 2005 *ret_data = (void *)res;
2006 dlm_put(dlm); 2006 dlm_put(dlm);
2007 return -EINVAL; 2007 return -EINVAL;
2008} 2008}
@@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
2040 item->u.am.request_from = request_from; 2040 item->u.am.request_from = request_from;
2041 item->u.am.flags = flags; 2041 item->u.am.flags = flags;
2042 2042
2043 if (ignore_higher) 2043 if (ignore_higher)
2044 mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, 2044 mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
2045 res->lockname.name); 2045 res->lockname.name);
2046 2046
2047 spin_lock(&dlm->work_lock); 2047 spin_lock(&dlm->work_lock);
2048 list_add_tail(&item->list, &dlm->work_list); 2048 list_add_tail(&item->list, &dlm->work_list);
2049 spin_unlock(&dlm->work_lock); 2049 spin_unlock(&dlm->work_lock);
@@ -2133,7 +2133,7 @@ put:
2133 * think that $RECOVERY is currently mastered by a dead node. If so, 2133 * think that $RECOVERY is currently mastered by a dead node. If so,
2134 * we wait a short time to allow that node to get notified by its own 2134 * we wait a short time to allow that node to get notified by its own
2135 * heartbeat stack, then check again. All $RECOVERY lock resources 2135 * heartbeat stack, then check again. All $RECOVERY lock resources
2136 * mastered by dead nodes are purged when the hearbeat callback is 2136 * mastered by dead nodes are purged when the hearbeat callback is
2137 * fired, so we can know for sure that it is safe to continue once 2137 * fired, so we can know for sure that it is safe to continue once
2138 * the node returns a live node or no node. */ 2138 * the node returns a live node or no node. */
2139static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, 2139static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
@@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
2174 ret = -EAGAIN; 2174 ret = -EAGAIN;
2175 } 2175 }
2176 spin_unlock(&dlm->spinlock); 2176 spin_unlock(&dlm->spinlock);
2177 mlog(0, "%s: reco lock master is %u\n", dlm->name, 2177 mlog(0, "%s: reco lock master is %u\n", dlm->name,
2178 master); 2178 master);
2179 break; 2179 break;
2180 } 2180 }
@@ -2602,7 +2602,7 @@ fail:
2602 2602
2603 mlog(0, "%s:%.*s: timed out during migration\n", 2603 mlog(0, "%s:%.*s: timed out during migration\n",
2604 dlm->name, res->lockname.len, res->lockname.name); 2604 dlm->name, res->lockname.len, res->lockname.name);
2605 /* avoid hang during shutdown when migrating lockres 2605 /* avoid hang during shutdown when migrating lockres
2606 * to a node which also goes down */ 2606 * to a node which also goes down */
2607 if (dlm_is_node_dead(dlm, target)) { 2607 if (dlm_is_node_dead(dlm, target)) {
2608 mlog(0, "%s:%.*s: expected migration " 2608 mlog(0, "%s:%.*s: expected migration "
@@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
2738 can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); 2738 can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
2739 spin_unlock(&res->spinlock); 2739 spin_unlock(&res->spinlock);
2740 2740
2741 /* target has died, so make the caller break out of the 2741 /* target has died, so make the caller break out of the
2742 * wait_event, but caller must recheck the domain_map */ 2742 * wait_event, but caller must recheck the domain_map */
2743 spin_lock(&dlm->spinlock); 2743 spin_lock(&dlm->spinlock);
2744 if (!test_bit(mig_target, dlm->domain_map)) 2744 if (!test_bit(mig_target, dlm->domain_map))
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index d9fa3d22e17c..344bcf90cbf4 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
1050 if (lock->ml.node == dead_node) { 1050 if (lock->ml.node == dead_node) {
1051 mlog(0, "AHA! there was " 1051 mlog(0, "AHA! there was "
1052 "a $RECOVERY lock for dead " 1052 "a $RECOVERY lock for dead "
1053 "node %u (%s)!\n", 1053 "node %u (%s)!\n",
1054 dead_node, dlm->name); 1054 dead_node, dlm->name);
1055 list_del_init(&lock->list); 1055 list_del_init(&lock->list);
1056 dlm_lock_put(lock); 1056 dlm_lock_put(lock);
@@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
1164 mres->master = master; 1164 mres->master = master;
1165} 1165}
1166 1166
1167static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
1168 struct dlm_migratable_lockres *mres,
1169 int queue)
1170{
1171 if (!lock->lksb)
1172 return;
1173
1174 /* Ignore lvb in all locks in the blocked list */
1175 if (queue == DLM_BLOCKED_LIST)
1176 return;
1177
1178 /* Only consider lvbs in locks with granted EX or PR lock levels */
1179 if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
1180 return;
1181
1182 if (dlm_lvb_is_empty(mres->lvb)) {
1183 memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
1184 return;
1185 }
1186
1187 /* Ensure the lvb copied for migration matches in other valid locks */
1188 if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
1189 return;
1190
1191 mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
1192 "node=%u\n",
1193 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
1194 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
1195 lock->lockres->lockname.len, lock->lockres->lockname.name,
1196 lock->ml.node);
1197 dlm_print_one_lock_resource(lock->lockres);
1198 BUG();
1199}
1167 1200
1168/* returns 1 if this lock fills the network structure, 1201/* returns 1 if this lock fills the network structure,
1169 * 0 otherwise */ 1202 * 0 otherwise */
@@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1181 ml->list = queue; 1214 ml->list = queue;
1182 if (lock->lksb) { 1215 if (lock->lksb) {
1183 ml->flags = lock->lksb->flags; 1216 ml->flags = lock->lksb->flags;
1184 /* send our current lvb */ 1217 dlm_prepare_lvb_for_migration(lock, mres, queue);
1185 if (ml->type == LKM_EXMODE ||
1186 ml->type == LKM_PRMODE) {
1187 /* if it is already set, this had better be a PR
1188 * and it has to match */
1189 if (!dlm_lvb_is_empty(mres->lvb) &&
1190 (ml->type == LKM_EXMODE ||
1191 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
1192 mlog(ML_ERROR, "mismatched lvbs!\n");
1193 dlm_print_one_lock_resource(lock->lockres);
1194 BUG();
1195 }
1196 memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
1197 }
1198 } 1218 }
1199 ml->node = lock->ml.node; 1219 ml->node = lock->ml.node;
1200 mres->num_locks++; 1220 mres->num_locks++;
@@ -1730,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1730 struct dlm_lock *lock = NULL; 1750 struct dlm_lock *lock = NULL;
1731 u8 from = O2NM_MAX_NODES; 1751 u8 from = O2NM_MAX_NODES;
1732 unsigned int added = 0; 1752 unsigned int added = 0;
1753 __be64 c;
1733 1754
1734 mlog(0, "running %d locks for this lockres\n", mres->num_locks); 1755 mlog(0, "running %d locks for this lockres\n", mres->num_locks);
1735 for (i=0; i<mres->num_locks; i++) { 1756 for (i=0; i<mres->num_locks; i++) {
@@ -1777,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1777 /* lock is always created locally first, and 1798 /* lock is always created locally first, and
1778 * destroyed locally last. it must be on the list */ 1799 * destroyed locally last. it must be on the list */
1779 if (!lock) { 1800 if (!lock) {
1780 __be64 c = ml->cookie; 1801 c = ml->cookie;
1781 mlog(ML_ERROR, "could not find local lock " 1802 mlog(ML_ERROR, "Could not find local lock "
1782 "with cookie %u:%llu!\n", 1803 "with cookie %u:%llu, node %u, "
1804 "list %u, flags 0x%x, type %d, "
1805 "conv %d, highest blocked %d\n",
1783 dlm_get_lock_cookie_node(be64_to_cpu(c)), 1806 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1784 dlm_get_lock_cookie_seq(be64_to_cpu(c))); 1807 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1808 ml->node, ml->list, ml->flags, ml->type,
1809 ml->convert_type, ml->highest_blocked);
1810 __dlm_print_one_lock_resource(res);
1811 BUG();
1812 }
1813
1814 if (lock->ml.node != ml->node) {
1815 c = lock->ml.cookie;
1816 mlog(ML_ERROR, "Mismatched node# in lock "
1817 "cookie %u:%llu, name %.*s, node %u\n",
1818 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1819 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1820 res->lockname.len, res->lockname.name,
1821 lock->ml.node);
1822 c = ml->cookie;
1823 mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
1824 "node %u, list %u, flags 0x%x, type %d, "
1825 "conv %d, highest blocked %d\n",
1826 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1827 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1828 ml->node, ml->list, ml->flags, ml->type,
1829 ml->convert_type, ml->highest_blocked);
1785 __dlm_print_one_lock_resource(res); 1830 __dlm_print_one_lock_resource(res);
1786 BUG(); 1831 BUG();
1787 } 1832 }
1788 BUG_ON(lock->ml.node != ml->node);
1789 1833
1790 if (tmpq != queue) { 1834 if (tmpq != queue) {
1791 mlog(0, "lock was on %u instead of %u for %.*s\n", 1835 c = ml->cookie;
1792 j, ml->list, res->lockname.len, res->lockname.name); 1836 mlog(0, "Lock cookie %u:%llu was on list %u "
1837 "instead of list %u for %.*s\n",
1838 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1839 dlm_get_lock_cookie_seq(be64_to_cpu(c)),
1840 j, ml->list, res->lockname.len,
1841 res->lockname.name);
1842 __dlm_print_one_lock_resource(res);
1793 spin_unlock(&res->spinlock); 1843 spin_unlock(&res->spinlock);
1794 continue; 1844 continue;
1795 } 1845 }
@@ -1839,7 +1889,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1839 * the lvb. */ 1889 * the lvb. */
1840 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); 1890 memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
1841 } else { 1891 } else {
1842 /* otherwise, the node is sending its 1892 /* otherwise, the node is sending its
1843 * most recent valid lvb info */ 1893 * most recent valid lvb info */
1844 BUG_ON(ml->type != LKM_EXMODE && 1894 BUG_ON(ml->type != LKM_EXMODE &&
1845 ml->type != LKM_PRMODE); 1895 ml->type != LKM_PRMODE);
@@ -1886,7 +1936,7 @@ skip_lvb:
1886 spin_lock(&res->spinlock); 1936 spin_lock(&res->spinlock);
1887 list_for_each_entry(lock, queue, list) { 1937 list_for_each_entry(lock, queue, list) {
1888 if (lock->ml.cookie == ml->cookie) { 1938 if (lock->ml.cookie == ml->cookie) {
1889 __be64 c = lock->ml.cookie; 1939 c = lock->ml.cookie;
1890 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " 1940 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1891 "exists on this lockres!\n", dlm->name, 1941 "exists on this lockres!\n", dlm->name,
1892 res->lockname.len, res->lockname.name, 1942 res->lockname.len, res->lockname.name,
@@ -2114,7 +2164,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
2114 assert_spin_locked(&res->spinlock); 2164 assert_spin_locked(&res->spinlock);
2115 2165
2116 if (res->owner == dlm->node_num) 2166 if (res->owner == dlm->node_num)
2117 /* if this node owned the lockres, and if the dead node 2167 /* if this node owned the lockres, and if the dead node
2118 * had an EX when he died, blank out the lvb */ 2168 * had an EX when he died, blank out the lvb */
2119 search_node = dead_node; 2169 search_node = dead_node;
2120 else { 2170 else {
@@ -2152,7 +2202,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2152 2202
2153 /* this node is the lockres master: 2203 /* this node is the lockres master:
2154 * 1) remove any stale locks for the dead node 2204 * 1) remove any stale locks for the dead node
2155 * 2) if the dead node had an EX when he died, blank out the lvb 2205 * 2) if the dead node had an EX when he died, blank out the lvb
2156 */ 2206 */
2157 assert_spin_locked(&dlm->spinlock); 2207 assert_spin_locked(&dlm->spinlock);
2158 assert_spin_locked(&res->spinlock); 2208 assert_spin_locked(&res->spinlock);
@@ -2193,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2193 mlog(0, "%s:%.*s: freed %u locks for dead node %u, " 2243 mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
2194 "dropping ref from lockres\n", dlm->name, 2244 "dropping ref from lockres\n", dlm->name,
2195 res->lockname.len, res->lockname.name, freed, dead_node); 2245 res->lockname.len, res->lockname.name, freed, dead_node);
2196 BUG_ON(!test_bit(dead_node, res->refmap)); 2246 if(!test_bit(dead_node, res->refmap)) {
2247 mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
2248 "but ref was not set\n", dlm->name,
2249 res->lockname.len, res->lockname.name, freed, dead_node);
2250 __dlm_print_one_lock_resource(res);
2251 }
2197 dlm_lockres_clear_refmap_bit(dead_node, res); 2252 dlm_lockres_clear_refmap_bit(dead_node, res);
2198 } else if (test_bit(dead_node, res->refmap)) { 2253 } else if (test_bit(dead_node, res->refmap)) {
2199 mlog(0, "%s:%.*s: dead node %u had a ref, but had " 2254 mlog(0, "%s:%.*s: dead node %u had a ref, but had "
@@ -2260,7 +2315,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2260 } 2315 }
2261 spin_unlock(&res->spinlock); 2316 spin_unlock(&res->spinlock);
2262 continue; 2317 continue;
2263 } 2318 }
2264 spin_lock(&res->spinlock); 2319 spin_lock(&res->spinlock);
2265 /* zero the lvb if necessary */ 2320 /* zero the lvb if necessary */
2266 dlm_revalidate_lvb(dlm, res, dead_node); 2321 dlm_revalidate_lvb(dlm, res, dead_node);
@@ -2411,7 +2466,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
2411 * this function on each node racing to become the recovery 2466 * this function on each node racing to become the recovery
2412 * master will not stop attempting this until either: 2467 * master will not stop attempting this until either:
2413 * a) this node gets the EX (and becomes the recovery master), 2468 * a) this node gets the EX (and becomes the recovery master),
2414 * or b) dlm->reco.new_master gets set to some nodenum 2469 * or b) dlm->reco.new_master gets set to some nodenum
2415 * != O2NM_INVALID_NODE_NUM (another node will do the reco). 2470 * != O2NM_INVALID_NODE_NUM (another node will do the reco).
2416 * so each time a recovery master is needed, the entire cluster 2471 * so each time a recovery master is needed, the entire cluster
2417 * will sync at this point. if the new master dies, that will 2472 * will sync at this point. if the new master dies, that will
@@ -2424,7 +2479,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
2424 2479
2425 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", 2480 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
2426 dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); 2481 dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
2427again: 2482again:
2428 memset(&lksb, 0, sizeof(lksb)); 2483 memset(&lksb, 0, sizeof(lksb));
2429 2484
2430 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, 2485 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
@@ -2437,8 +2492,8 @@ again:
2437 if (ret == DLM_NORMAL) { 2492 if (ret == DLM_NORMAL) {
2438 mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", 2493 mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
2439 dlm->name, dlm->node_num); 2494 dlm->name, dlm->node_num);
2440 2495
2441 /* got the EX lock. check to see if another node 2496 /* got the EX lock. check to see if another node
2442 * just became the reco master */ 2497 * just became the reco master */
2443 if (dlm_reco_master_ready(dlm)) { 2498 if (dlm_reco_master_ready(dlm)) {
2444 mlog(0, "%s: got reco EX lock, but %u will " 2499 mlog(0, "%s: got reco EX lock, but %u will "
@@ -2451,12 +2506,12 @@ again:
2451 /* see if recovery was already finished elsewhere */ 2506 /* see if recovery was already finished elsewhere */
2452 spin_lock(&dlm->spinlock); 2507 spin_lock(&dlm->spinlock);
2453 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 2508 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
2454 status = -EINVAL; 2509 status = -EINVAL;
2455 mlog(0, "%s: got reco EX lock, but " 2510 mlog(0, "%s: got reco EX lock, but "
2456 "node got recovered already\n", dlm->name); 2511 "node got recovered already\n", dlm->name);
2457 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { 2512 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
2458 mlog(ML_ERROR, "%s: new master is %u " 2513 mlog(ML_ERROR, "%s: new master is %u "
2459 "but no dead node!\n", 2514 "but no dead node!\n",
2460 dlm->name, dlm->reco.new_master); 2515 dlm->name, dlm->reco.new_master);
2461 BUG(); 2516 BUG();
2462 } 2517 }
@@ -2468,7 +2523,7 @@ again:
2468 * set the master and send the messages to begin recovery */ 2523 * set the master and send the messages to begin recovery */
2469 if (!status) { 2524 if (!status) {
2470 mlog(0, "%s: dead=%u, this=%u, sending " 2525 mlog(0, "%s: dead=%u, this=%u, sending "
2471 "begin_reco now\n", dlm->name, 2526 "begin_reco now\n", dlm->name,
2472 dlm->reco.dead_node, dlm->node_num); 2527 dlm->reco.dead_node, dlm->node_num);
2473 status = dlm_send_begin_reco_message(dlm, 2528 status = dlm_send_begin_reco_message(dlm,
2474 dlm->reco.dead_node); 2529 dlm->reco.dead_node);
@@ -2501,7 +2556,7 @@ again:
2501 mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", 2556 mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
2502 dlm->name, dlm->node_num); 2557 dlm->name, dlm->node_num);
2503 /* another node is master. wait on 2558 /* another node is master. wait on
2504 * reco.new_master != O2NM_INVALID_NODE_NUM 2559 * reco.new_master != O2NM_INVALID_NODE_NUM
2505 * for at most one second */ 2560 * for at most one second */
2506 wait_event_timeout(dlm->dlm_reco_thread_wq, 2561 wait_event_timeout(dlm->dlm_reco_thread_wq,
2507 dlm_reco_master_ready(dlm), 2562 dlm_reco_master_ready(dlm),
@@ -2589,9 +2644,23 @@ retry:
2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2644 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2590 ret = 0; 2645 ret = 0;
2591 } 2646 }
2647
2648 /*
2649 * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
2650 * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
2651 * We are handling both for compatibility reasons.
2652 */
2653 if (ret == -EAGAIN || ret == EAGAIN) {
2654 mlog(0, "%s: trying to start recovery of node "
2655 "%u, but node %u is waiting for last recovery "
2656 "to complete, backoff for a bit\n", dlm->name,
2657 dead_node, nodenum);
2658 msleep(100);
2659 goto retry;
2660 }
2592 if (ret < 0) { 2661 if (ret < 0) {
2593 struct dlm_lock_resource *res; 2662 struct dlm_lock_resource *res;
2594 /* this is now a serious problem, possibly ENOMEM 2663 /* this is now a serious problem, possibly ENOMEM
2595 * in the network stack. must retry */ 2664 * in the network stack. must retry */
2596 mlog_errno(ret); 2665 mlog_errno(ret);
2597 mlog(ML_ERROR, "begin reco of dlm %s to node %u " 2666 mlog(ML_ERROR, "begin reco of dlm %s to node %u "
@@ -2604,18 +2673,10 @@ retry:
2604 } else { 2673 } else {
2605 mlog(ML_ERROR, "recovery lock not found\n"); 2674 mlog(ML_ERROR, "recovery lock not found\n");
2606 } 2675 }
2607 /* sleep for a bit in hopes that we can avoid 2676 /* sleep for a bit in hopes that we can avoid
2608 * another ENOMEM */ 2677 * another ENOMEM */
2609 msleep(100); 2678 msleep(100);
2610 goto retry; 2679 goto retry;
2611 } else if (ret == EAGAIN) {
2612 mlog(0, "%s: trying to start recovery of node "
2613 "%u, but node %u is waiting for last recovery "
2614 "to complete, backoff for a bit\n", dlm->name,
2615 dead_node, nodenum);
2616 /* TODO Look into replacing msleep with cond_resched() */
2617 msleep(100);
2618 goto retry;
2619 } 2680 }
2620 } 2681 }
2621 2682
@@ -2639,7 +2700,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2639 dlm->name, br->node_idx, br->dead_node, 2700 dlm->name, br->node_idx, br->dead_node,
2640 dlm->reco.dead_node, dlm->reco.new_master); 2701 dlm->reco.dead_node, dlm->reco.new_master);
2641 spin_unlock(&dlm->spinlock); 2702 spin_unlock(&dlm->spinlock);
2642 return EAGAIN; 2703 return -EAGAIN;
2643 } 2704 }
2644 spin_unlock(&dlm->spinlock); 2705 spin_unlock(&dlm->spinlock);
2645 2706
@@ -2664,7 +2725,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2664 } 2725 }
2665 if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { 2726 if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
2666 mlog(ML_NOTICE, "%s: dead_node previously set to %u, " 2727 mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
2667 "node %u changing it to %u\n", dlm->name, 2728 "node %u changing it to %u\n", dlm->name,
2668 dlm->reco.dead_node, br->node_idx, br->dead_node); 2729 dlm->reco.dead_node, br->node_idx, br->dead_node);
2669 } 2730 }
2670 dlm_set_reco_master(dlm, br->node_idx); 2731 dlm_set_reco_master(dlm, br->node_idx);
@@ -2730,8 +2791,8 @@ stage2:
2730 if (ret < 0) { 2791 if (ret < 0) {
2731 mlog_errno(ret); 2792 mlog_errno(ret);
2732 if (dlm_is_host_down(ret)) { 2793 if (dlm_is_host_down(ret)) {
2733 /* this has no effect on this recovery 2794 /* this has no effect on this recovery
2734 * session, so set the status to zero to 2795 * session, so set the status to zero to
2735 * finish out the last recovery */ 2796 * finish out the last recovery */
2736 mlog(ML_ERROR, "node %u went down after this " 2797 mlog(ML_ERROR, "node %u went down after this "
2737 "node finished recovery.\n", nodenum); 2798 "node finished recovery.\n", nodenum);
@@ -2768,7 +2829,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2768 mlog(0, "%s: node %u finalizing recovery stage%d of " 2829 mlog(0, "%s: node %u finalizing recovery stage%d of "
2769 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, 2830 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
2770 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); 2831 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
2771 2832
2772 spin_lock(&dlm->spinlock); 2833 spin_lock(&dlm->spinlock);
2773 2834
2774 if (dlm->reco.new_master != fr->node_idx) { 2835 if (dlm->reco.new_master != fr->node_idx) {
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 00f53b2aea76..49e29ecd0201 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
190 actions &= ~(DLM_UNLOCK_REMOVE_LOCK| 190 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
191 DLM_UNLOCK_REGRANT_LOCK| 191 DLM_UNLOCK_REGRANT_LOCK|
192 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 192 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
193 } else if (status == DLM_RECOVERING || 193 } else if (status == DLM_RECOVERING ||
194 status == DLM_MIGRATING || 194 status == DLM_MIGRATING ||
195 status == DLM_FORWARD) { 195 status == DLM_FORWARD) {
196 /* must clear the actions because this unlock 196 /* must clear the actions because this unlock
197 * is about to be retried. cannot free or do 197 * is about to be retried. cannot free or do
@@ -661,14 +661,14 @@ retry:
661 if (call_ast) { 661 if (call_ast) {
662 mlog(0, "calling unlockast(%p, %d)\n", data, status); 662 mlog(0, "calling unlockast(%p, %d)\n", data, status);
663 if (is_master) { 663 if (is_master) {
664 /* it is possible that there is one last bast 664 /* it is possible that there is one last bast
665 * pending. make sure it is flushed, then 665 * pending. make sure it is flushed, then
666 * call the unlockast. 666 * call the unlockast.
667 * not an issue if this is a mastered remotely, 667 * not an issue if this is a mastered remotely,
668 * since this lock has been removed from the 668 * since this lock has been removed from the
669 * lockres queues and cannot be found. */ 669 * lockres queues and cannot be found. */
670 dlm_kick_thread(dlm, NULL); 670 dlm_kick_thread(dlm, NULL);
671 wait_event(dlm->ast_wq, 671 wait_event(dlm->ast_wq,
672 dlm_lock_basts_flushed(dlm, lock)); 672 dlm_lock_basts_flushed(dlm, lock));
673 } 673 }
674 (*unlockast)(data, status); 674 (*unlockast)(data, status);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c5e4a49e3a12..e044019cb3b1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
876 876
877 lockres->l_level = lockres->l_requested; 877 lockres->l_level = lockres->l_requested;
878
879 /*
880 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
881 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
882 * downconverting the lock before the upconvert has fully completed.
883 */
884 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
885
878 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 886 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
879 887
880 mlog_exit_void(); 888 mlog_exit_void();
@@ -907,8 +915,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
907 915
908 assert_spin_locked(&lockres->l_lock); 916 assert_spin_locked(&lockres->l_lock);
909 917
910 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
911
912 if (level > lockres->l_blocking) { 918 if (level > lockres->l_blocking) {
913 /* only schedule a downconvert if we haven't already scheduled 919 /* only schedule a downconvert if we haven't already scheduled
914 * one that goes low enough to satisfy the level we're 920 * one that goes low enough to satisfy the level we're
@@ -921,6 +927,9 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
921 lockres->l_blocking = level; 927 lockres->l_blocking = level;
922 } 928 }
923 929
930 if (needs_downconvert)
931 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
932
924 mlog_exit(needs_downconvert); 933 mlog_exit(needs_downconvert);
925 return needs_downconvert; 934 return needs_downconvert;
926} 935}
@@ -1133,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1133 mlog_entry_void(); 1142 mlog_entry_void();
1134 spin_lock_irqsave(&lockres->l_lock, flags); 1143 spin_lock_irqsave(&lockres->l_lock, flags);
1135 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1144 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1145 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1136 if (convert) 1146 if (convert)
1137 lockres->l_action = OCFS2_AST_INVALID; 1147 lockres->l_action = OCFS2_AST_INVALID;
1138 else 1148 else
@@ -1323,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1323again: 1333again:
1324 wait = 0; 1334 wait = 0;
1325 1335
1336 spin_lock_irqsave(&lockres->l_lock, flags);
1337
1326 if (catch_signals && signal_pending(current)) { 1338 if (catch_signals && signal_pending(current)) {
1327 ret = -ERESTARTSYS; 1339 ret = -ERESTARTSYS;
1328 goto out; 1340 goto unlock;
1329 } 1341 }
1330 1342
1331 spin_lock_irqsave(&lockres->l_lock, flags);
1332
1333 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1343 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1334 "Cluster lock called on freeing lockres %s! flags " 1344 "Cluster lock called on freeing lockres %s! flags "
1335 "0x%lx\n", lockres->l_name, lockres->l_flags); 1345 "0x%lx\n", lockres->l_name, lockres->l_flags);
@@ -1346,6 +1356,25 @@ again:
1346 goto unlock; 1356 goto unlock;
1347 } 1357 }
1348 1358
1359 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1360 /*
1361 * We've upconverted. If the lock now has a level we can
1362 * work with, we take it. If, however, the lock is not at the
1363 * required level, we go thru the full cycle. One way this could
1364 * happen is if a process requesting an upconvert to PR is
1365 * closely followed by another requesting upconvert to an EX.
1366 * If the process requesting EX lands here, we want it to
1367 * continue attempting to upconvert and let the process
1368 * requesting PR take the lock.
1369 * If multiple processes request upconvert to PR, the first one
1370 * here will take the lock. The others will have to go thru the
1371 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1372 * downconvert request.
1373 */
1374 if (level <= lockres->l_level)
1375 goto update_holders;
1376 }
1377
1349 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1378 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1350 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1379 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1351 /* is the lock is currently blocked on behalf of 1380 /* is the lock is currently blocked on behalf of
@@ -1416,11 +1445,14 @@ again:
1416 goto again; 1445 goto again;
1417 } 1446 }
1418 1447
1448update_holders:
1419 /* Ok, if we get here then we're good to go. */ 1449 /* Ok, if we get here then we're good to go. */
1420 ocfs2_inc_holders(lockres, level); 1450 ocfs2_inc_holders(lockres, level);
1421 1451
1422 ret = 0; 1452 ret = 0;
1423unlock: 1453unlock:
1454 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1455
1424 spin_unlock_irqrestore(&lockres->l_lock, flags); 1456 spin_unlock_irqrestore(&lockres->l_lock, flags);
1425out: 1457out:
1426 /* 1458 /*
@@ -3155,7 +3187,7 @@ out:
3155/* Mark the lockres as being dropped. It will no longer be 3187/* Mark the lockres as being dropped. It will no longer be
3156 * queued if blocking, but we still may have to wait on it 3188 * queued if blocking, but we still may have to wait on it
3157 * being dequeued from the downconvert thread before we can consider 3189 * being dequeued from the downconvert thread before we can consider
3158 * it safe to drop. 3190 * it safe to drop.
3159 * 3191 *
3160 * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3192 * You can *not* attempt to call cluster_lock on this lockres anymore. */
3161void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3193void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
@@ -3352,6 +3384,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3352 unsigned long flags; 3384 unsigned long flags;
3353 int blocking; 3385 int blocking;
3354 int new_level; 3386 int new_level;
3387 int level;
3355 int ret = 0; 3388 int ret = 0;
3356 int set_lvb = 0; 3389 int set_lvb = 0;
3357 unsigned int gen; 3390 unsigned int gen;
@@ -3360,9 +3393,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3360 3393
3361 spin_lock_irqsave(&lockres->l_lock, flags); 3394 spin_lock_irqsave(&lockres->l_lock, flags);
3362 3395
3363 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
3364
3365recheck: 3396recheck:
3397 /*
3398 * Is it still blocking? If not, we have no more work to do.
3399 */
3400 if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3401 BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3402 spin_unlock_irqrestore(&lockres->l_lock, flags);
3403 ret = 0;
3404 goto leave;
3405 }
3406
3366 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3407 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3367 /* XXX 3408 /* XXX
3368 * This is a *big* race. The OCFS2_LOCK_PENDING flag 3409 * This is a *big* race. The OCFS2_LOCK_PENDING flag
@@ -3401,6 +3442,31 @@ recheck:
3401 goto leave; 3442 goto leave;
3402 } 3443 }
3403 3444
3445 /*
3446 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3447 * set when the ast is received for an upconvert just before the
3448 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3449 * on the heels of the ast, we want to delay the downconvert just
3450 * enough to allow the up requestor to do its task. Because this
3451 * lock is in the blocked queue, the lock will be downconverted
3452 * as soon as the requestor is done with the lock.
3453 */
3454 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3455 goto leave_requeue;
3456
3457 /*
3458 * How can we block and yet be at NL? We were trying to upconvert
3459 * from NL and got canceled. The code comes back here, and now
3460 * we notice and clear BLOCKING.
3461 */
3462 if (lockres->l_level == DLM_LOCK_NL) {
3463 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
3464 lockres->l_blocking = DLM_LOCK_NL;
3465 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
3466 spin_unlock_irqrestore(&lockres->l_lock, flags);
3467 goto leave;
3468 }
3469
3404 /* if we're blocking an exclusive and we have *any* holders, 3470 /* if we're blocking an exclusive and we have *any* holders,
3405 * then requeue. */ 3471 * then requeue. */
3406 if ((lockres->l_blocking == DLM_LOCK_EX) 3472 if ((lockres->l_blocking == DLM_LOCK_EX)
@@ -3438,6 +3504,7 @@ recheck:
3438 * may sleep, so we save off a copy of what we're blocking as 3504 * may sleep, so we save off a copy of what we're blocking as
3439 * it may change while we're not holding the spin lock. */ 3505 * it may change while we're not holding the spin lock. */
3440 blocking = lockres->l_blocking; 3506 blocking = lockres->l_blocking;
3507 level = lockres->l_level;
3441 spin_unlock_irqrestore(&lockres->l_lock, flags); 3508 spin_unlock_irqrestore(&lockres->l_lock, flags);
3442 3509
3443 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3510 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
@@ -3446,7 +3513,7 @@ recheck:
3446 goto leave; 3513 goto leave;
3447 3514
3448 spin_lock_irqsave(&lockres->l_lock, flags); 3515 spin_lock_irqsave(&lockres->l_lock, flags);
3449 if (blocking != lockres->l_blocking) { 3516 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3450 /* If this changed underneath us, then we can't drop 3517 /* If this changed underneath us, then we can't drop
3451 * it just yet. */ 3518 * it just yet. */
3452 goto recheck; 3519 goto recheck;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 15713cbb865c..19ad145d2af3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
239 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", 239 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
240 (unsigned long long)blkno, generation); 240 (unsigned long long)blkno, generation);
241 } 241 }
242 242
243 *max_len = len; 243 *max_len = len;
244 244
245bail: 245bail:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 843db64e9d4a..5328529e7fd2 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -37,6 +37,7 @@
37#include "extent_map.h" 37#include "extent_map.h"
38#include "inode.h" 38#include "inode.h"
39#include "super.h" 39#include "super.h"
40#include "symlink.h"
40 41
41#include "buffer_head_io.h" 42#include "buffer_head_io.h"
42 43
@@ -191,7 +192,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
191 emi->ei_clusters += ins->ei_clusters; 192 emi->ei_clusters += ins->ei_clusters;
192 return 1; 193 return 1;
193 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 194 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
194 (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys && 195 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
195 ins->ei_flags == emi->ei_flags) { 196 ins->ei_flags == emi->ei_flags) {
196 emi->ei_phys = ins->ei_phys; 197 emi->ei_phys = ins->ei_phys;
197 emi->ei_cpos = ins->ei_cpos; 198 emi->ei_cpos = ins->ei_cpos;
@@ -703,6 +704,12 @@ out:
703 return ret; 704 return ret;
704} 705}
705 706
707/*
708 * The ocfs2_fiemap_inline() may be a little bit misleading, since
709 * it not only handles the fiemap for inlined files, but also deals
710 * with the fast symlink, cause they have no difference for extent
711 * mapping per se.
712 */
706static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 713static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
707 struct fiemap_extent_info *fieinfo, 714 struct fiemap_extent_info *fieinfo,
708 u64 map_start) 715 u64 map_start)
@@ -715,11 +722,18 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
715 struct ocfs2_inode_info *oi = OCFS2_I(inode); 722 struct ocfs2_inode_info *oi = OCFS2_I(inode);
716 723
717 di = (struct ocfs2_dinode *)di_bh->b_data; 724 di = (struct ocfs2_dinode *)di_bh->b_data;
718 id_count = le16_to_cpu(di->id2.i_data.id_count); 725 if (ocfs2_inode_is_fast_symlink(inode))
726 id_count = ocfs2_fast_symlink_chars(inode->i_sb);
727 else
728 id_count = le16_to_cpu(di->id2.i_data.id_count);
719 729
720 if (map_start < id_count) { 730 if (map_start < id_count) {
721 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 731 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
722 phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); 732 if (ocfs2_inode_is_fast_symlink(inode))
733 phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
734 else
735 phys += offsetof(struct ocfs2_dinode,
736 id2.i_data.id_data);
723 737
724 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 738 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
725 flags); 739 flags);
@@ -756,9 +770,10 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
756 down_read(&OCFS2_I(inode)->ip_alloc_sem); 770 down_read(&OCFS2_I(inode)->ip_alloc_sem);
757 771
758 /* 772 /*
759 * Handle inline-data separately. 773 * Handle inline-data and fast symlink separately.
760 */ 774 */
761 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 775 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
776 ocfs2_inode_is_fast_symlink(inode)) {
762 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 777 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
763 goto out_unlock; 778 goto out_unlock;
764 } 779 }
@@ -786,6 +801,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
786 fe_flags = 0; 801 fe_flags = 0;
787 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 802 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
788 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 803 fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
804 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
805 fe_flags |= FIEMAP_EXTENT_SHARED;
789 if (is_last) 806 if (is_last)
790 fe_flags |= FIEMAP_EXTENT_LAST; 807 fe_flags |= FIEMAP_EXTENT_LAST;
791 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 808 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3d30a1c974a8..558ce0312421 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -749,7 +749,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
749 int ret; 749 int ret;
750 750
751 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 751 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
752 /* ugh. in prepare/commit_write, if from==to==start of block, we 752 /* ugh. in prepare/commit_write, if from==to==start of block, we
753 ** skip the prepare. make sure we never send an offset for the start 753 ** skip the prepare. make sure we never send an offset for the start
754 ** of a block 754 ** of a block
755 */ 755 */
@@ -1772,13 +1772,14 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1772 loff_t *ppos, 1772 loff_t *ppos,
1773 size_t count, 1773 size_t count,
1774 int appending, 1774 int appending,
1775 int *direct_io) 1775 int *direct_io,
1776 int *has_refcount)
1776{ 1777{
1777 int ret = 0, meta_level = 0; 1778 int ret = 0, meta_level = 0;
1778 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
1779 loff_t saved_pos, end; 1780 loff_t saved_pos, end;
1780 1781
1781 /* 1782 /*
1782 * We start with a read level meta lock and only jump to an ex 1783 * We start with a read level meta lock and only jump to an ex
1783 * if we need to make modifications here. 1784 * if we need to make modifications here.
1784 */ 1785 */
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1833 saved_pos, 1834 saved_pos,
1834 count, 1835 count,
1835 &meta_level); 1836 &meta_level);
1837 if (has_refcount)
1838 *has_refcount = 1;
1836 } 1839 }
1837 1840
1838 if (ret < 0) { 1841 if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1856 break; 1859 break;
1857 } 1860 }
1858 1861
1862 if (has_refcount && *has_refcount == 1) {
1863 *direct_io = 0;
1864 break;
1865 }
1859 /* 1866 /*
1860 * Allowing concurrent direct writes means 1867 * Allowing concurrent direct writes means
1861 * i_size changes wouldn't be synchronized, so 1868 * i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1899 loff_t pos) 1906 loff_t pos)
1900{ 1907{
1901 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1908 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
1902 int can_do_direct; 1909 int can_do_direct, has_refcount = 0;
1903 ssize_t written = 0; 1910 ssize_t written = 0;
1904 size_t ocount; /* original count */ 1911 size_t ocount; /* original count */
1905 size_t count; /* after file limit checks */ 1912 size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
1942 can_do_direct = direct_io; 1949 can_do_direct = direct_io;
1943 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1950 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
1944 iocb->ki_left, appending, 1951 iocb->ki_left, appending,
1945 &can_do_direct); 1952 &can_do_direct, &has_refcount);
1946 if (ret < 0) { 1953 if (ret < 0) {
1947 mlog_errno(ret); 1954 mlog_errno(ret);
1948 goto out; 1955 goto out;
@@ -2006,14 +2013,16 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2015
2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { 2016 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
2017 ((file->f_flags & O_DIRECT) && has_refcount)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2019 pos + count - 1);
2012 if (ret < 0) 2020 if (ret < 0)
2013 written = ret; 2021 written = ret;
2014 2022
2015 if (!ret && (old_size != i_size_read(inode) || 2023 if (!ret && (old_size != i_size_read(inode) ||
2016 old_clusters != OCFS2_I(inode)->ip_clusters)) { 2024 old_clusters != OCFS2_I(inode)->ip_clusters ||
2025 has_refcount)) {
2017 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2026 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2018 if (ret < 0) 2027 if (ret < 0)
2019 written = ret; 2028 written = ret;
@@ -2024,7 +2033,7 @@ out_dio:
2024 pos + count - 1); 2033 pos + count - 1);
2025 } 2034 }
2026 2035
2027 /* 2036 /*
2028 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2037 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2029 * function pointer which is called when o_direct io completes so that 2038 * function pointer which is called when o_direct io completes so that
2030 * it can unlock our rw lock. (it's the clustered equivalent of 2039 * it can unlock our rw lock. (it's the clustered equivalent of
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2062 int ret; 2071 int ret;
2063 2072
2064 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2073 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
2065 sd->total_len, 0, NULL); 2074 sd->total_len, 0, NULL, NULL);
2066 if (ret < 0) { 2075 if (ret < 0) {
2067 mlog_errno(ret); 2076 mlog_errno(ret);
2068 return ret; 2077 return ret;
@@ -2189,7 +2198,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2189 goto bail; 2198 goto bail;
2190 } 2199 }
2191 2200
2192 /* 2201 /*
2193 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2202 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2194 * need locks to protect pending reads from racing with truncate. 2203 * need locks to protect pending reads from racing with truncate.
2195 */ 2204 */
@@ -2211,10 +2220,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2211 * We're fine letting folks race truncates and extending 2220 * We're fine letting folks race truncates and extending
2212 * writes with read across the cluster, just like they can 2221 * writes with read across the cluster, just like they can
2213 * locally. Hence no rw_lock during read. 2222 * locally. Hence no rw_lock during read.
2214 * 2223 *
2215 * Take and drop the meta data lock to update inode fields 2224 * Take and drop the meta data lock to update inode fields
2216 * like i_size. This allows the checks down below 2225 * like i_size. This allows the checks down below
2217 * generic_file_aio_read() a chance of actually working. 2226 * generic_file_aio_read() a chance of actually working.
2218 */ 2227 */
2219 ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2228 ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
2220 if (ret < 0) { 2229 if (ret < 0) {
@@ -2239,7 +2248,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2239bail: 2248bail:
2240 if (have_alloc_sem) 2249 if (have_alloc_sem)
2241 up_read(&inode->i_alloc_sem); 2250 up_read(&inode->i_alloc_sem);
2242 if (rw_level != -1) 2251 if (rw_level != -1)
2243 ocfs2_rw_unlock(inode, rw_level); 2252 ocfs2_rw_unlock(inode, rw_level);
2244 mlog_exit(ret); 2253 mlog_exit(ret);
2245 2254
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0297fb8982b8..88459bdd1ff3 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -475,7 +475,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
475 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { 475 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
476 status = ocfs2_try_open_lock(inode, 0); 476 status = ocfs2_try_open_lock(inode, 0);
477 if (status) { 477 if (status) {
478 make_bad_inode(inode); 478 make_bad_inode(inode);
479 return status; 479 return status;
480 } 480 }
481 } 481 }
@@ -684,7 +684,7 @@ bail:
684 return status; 684 return status;
685} 685}
686 686
687/* 687/*
688 * Serialize with orphan dir recovery. If the process doing 688 * Serialize with orphan dir recovery. If the process doing
689 * recovery on this orphan dir does an iget() with the dir 689 * recovery on this orphan dir does an iget() with the dir
690 * i_mutex held, we'll deadlock here. Instead we detect this 690 * i_mutex held, we'll deadlock here. Instead we detect this
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 31fbb0619510..7d9d9c132cef 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/compat.h>
10 11
11#define MLOG_MASK_PREFIX ML_INODE 12#define MLOG_MASK_PREFIX ML_INODE
12#include <cluster/masklog.h> 13#include <cluster/masklog.h>
@@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
181#ifdef CONFIG_COMPAT 182#ifdef CONFIG_COMPAT
182long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) 183long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
183{ 184{
185 bool preserve;
186 struct reflink_arguments args;
187 struct inode *inode = file->f_path.dentry->d_inode;
188
184 switch (cmd) { 189 switch (cmd) {
185 case OCFS2_IOC32_GETFLAGS: 190 case OCFS2_IOC32_GETFLAGS:
186 cmd = OCFS2_IOC_GETFLAGS; 191 cmd = OCFS2_IOC_GETFLAGS;
@@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
195 case OCFS2_IOC_GROUP_EXTEND: 200 case OCFS2_IOC_GROUP_EXTEND:
196 case OCFS2_IOC_GROUP_ADD: 201 case OCFS2_IOC_GROUP_ADD:
197 case OCFS2_IOC_GROUP_ADD64: 202 case OCFS2_IOC_GROUP_ADD64:
198 case OCFS2_IOC_REFLINK:
199 break; 203 break;
204 case OCFS2_IOC_REFLINK:
205 if (copy_from_user(&args, (struct reflink_arguments *)arg,
206 sizeof(args)))
207 return -EFAULT;
208 preserve = (args.preserve != 0);
209
210 return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
211 compat_ptr(args.new_path), preserve);
200 default: 212 default:
201 return -ENOIOCTLCMD; 213 return -ENOIOCTLCMD;
202 } 214 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bf34c491ae96..9336c60e3a36 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2034 status = -ENOENT; 2034 status = -ENOENT;
2035 mlog_errno(status); 2035 mlog_errno(status);
2036 return status; 2036 return status;
2037 } 2037 }
2038 2038
2039 mutex_lock(&orphan_dir_inode->i_mutex); 2039 mutex_lock(&orphan_dir_inode->i_mutex);
2040 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); 2040 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f010b22b1c44..50fb26a6a5f5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2108,6 +2108,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2108 } 2108 }
2109 did_quota_inode = 1; 2109 did_quota_inode = 1;
2110 2110
2111 inode->i_nlink = 0;
2111 /* do the real work now. */ 2112 /* do the real work now. */
2112 status = ocfs2_mknod_locked(osb, dir, inode, 2113 status = ocfs2_mknod_locked(osb, dir, inode,
2113 0, &new_di_bh, parent_di_bh, handle, 2114 0, &new_di_bh, parent_di_bh, handle,
@@ -2136,6 +2137,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2136 if (status < 0) 2137 if (status < 0)
2137 mlog_errno(status); 2138 mlog_errno(status);
2138 2139
2140 insert_inode_hash(inode);
2139leave: 2141leave:
2140 if (status < 0 && did_quota_inode) 2142 if (status < 0 && did_quota_inode)
2141 vfs_dq_free_inode(inode); 2143 vfs_dq_free_inode(inode);
@@ -2267,6 +2269,8 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2267 di = (struct ocfs2_dinode *)di_bh->b_data; 2269 di = (struct ocfs2_dinode *)di_bh->b_data;
2268 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); 2270 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2269 di->i_orphaned_slot = 0; 2271 di->i_orphaned_slot = 0;
2272 inode->i_nlink = 1;
2273 ocfs2_set_links_count(di, inode->i_nlink);
2270 ocfs2_journal_dirty(handle, di_bh); 2274 ocfs2_journal_dirty(handle, di_bh);
2271 2275
2272 status = ocfs2_add_entry(handle, dentry, inode, 2276 status = ocfs2_add_entry(handle, dentry, inode,
@@ -2284,7 +2288,6 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2284 goto out_commit; 2288 goto out_commit;
2285 } 2289 }
2286 2290
2287 insert_inode_hash(inode);
2288 dentry->d_op = &ocfs2_dentry_ops; 2291 dentry->d_op = &ocfs2_dentry_ops;
2289 d_instantiate(dentry, inode); 2292 d_instantiate(dentry, inode);
2290 status = 0; 2293 status = 0;
@@ -2326,4 +2329,5 @@ const struct inode_operations ocfs2_dir_iops = {
2326 .getxattr = generic_getxattr, 2329 .getxattr = generic_getxattr,
2327 .listxattr = ocfs2_listxattr, 2330 .listxattr = ocfs2_listxattr,
2328 .removexattr = generic_removexattr, 2331 .removexattr = generic_removexattr,
2332 .fiemap = ocfs2_fiemap,
2329}; 2333};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d963d8638709..740f448041e2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -136,6 +136,10 @@ enum ocfs2_unlock_action {
136#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a 136#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
137 call to dlm_lock. Only 137 call to dlm_lock. Only
138 exists with BUSY set. */ 138 exists with BUSY set. */
139#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
140 * from downconverting
141 * before the upconvert
142 * has completed */
139 143
140struct ocfs2_lock_res_ops; 144struct ocfs2_lock_res_ops;
141 145
@@ -245,9 +249,11 @@ enum ocfs2_mount_options
245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 249 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 250 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 251 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ 252 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
249 OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ 253 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
250 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 254 control lists */
255 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
256 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
251}; 257};
252 258
253#define OCFS2_OSB_SOFT_RO 0x0001 259#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e9431e4a5e7c..7638a38c32bc 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1202,7 +1202,7 @@ struct ocfs2_local_disk_dqinfo {
1202/* Header of one chunk of a quota file */ 1202/* Header of one chunk of a quota file */
1203struct ocfs2_local_disk_chunk { 1203struct ocfs2_local_disk_chunk {
1204 __le32 dqc_free; /* Number of free entries in the bitmap */ 1204 __le32 dqc_free; /* Number of free entries in the bitmap */
1205 u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding 1205 __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
1206 * chunk of quota file */ 1206 * chunk of quota file */
1207}; 1207};
1208 1208
@@ -1417,9 +1417,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
1417 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); 1417 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
1418} 1418}
1419 1419
1420static inline int ocfs2_max_inline_data(int blocksize) 1420static inline int ocfs2_max_inline_data_with_xattr(int blocksize,
1421 struct ocfs2_dinode *di)
1421{ 1422{
1422 return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data); 1423 if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL))
1424 return blocksize -
1425 offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
1426 di->i_xattr_inline_size;
1427 else
1428 return blocksize -
1429 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
1423} 1430}
1424 1431
1425static inline int ocfs2_extent_recs_per_inode(int blocksize) 1432static inline int ocfs2_extent_recs_per_inode(int blocksize)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30967e3f5e43..8ae65c9c020c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -276,7 +276,7 @@ static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
276 spin_unlock(&osb->osb_lock); 276 spin_unlock(&osb->osb_lock);
277} 277}
278 278
279void ocfs2_kref_remove_refcount_tree(struct kref *kref) 279static void ocfs2_kref_remove_refcount_tree(struct kref *kref)
280{ 280{
281 struct ocfs2_refcount_tree *tree = 281 struct ocfs2_refcount_tree *tree =
282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); 282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
@@ -524,23 +524,6 @@ out:
524 return ret; 524 return ret;
525} 525}
526 526
527int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
528 struct ocfs2_refcount_tree **ret_tree,
529 struct buffer_head **ref_bh)
530{
531 int ret;
532 u64 ref_blkno;
533
534 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
535 if (ret) {
536 mlog_errno(ret);
537 return ret;
538 }
539
540 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
541 rw, ret_tree, ref_bh);
542}
543
544void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, 527void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
545 struct ocfs2_refcount_tree *tree, int rw) 528 struct ocfs2_refcount_tree *tree, int rw)
546{ 529{
@@ -969,6 +952,103 @@ out:
969} 952}
970 953
971/* 954/*
955 * Find the end range for a leaf refcount block indicated by
956 * el->l_recs[index].e_blkno.
957 */
958static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
959 struct buffer_head *ref_root_bh,
960 struct ocfs2_extent_block *eb,
961 struct ocfs2_extent_list *el,
962 int index, u32 *cpos_end)
963{
964 int ret, i, subtree_root;
965 u32 cpos;
966 u64 blkno;
967 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
968 struct ocfs2_path *left_path = NULL, *right_path = NULL;
969 struct ocfs2_extent_tree et;
970 struct ocfs2_extent_list *tmp_el;
971
972 if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
973 /*
974 * We have a extent rec after index, so just use the e_cpos
975 * of the next extent rec.
976 */
977 *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
978 return 0;
979 }
980
981 if (!eb || (eb && !eb->h_next_leaf_blk)) {
982 /*
983 * We are the last extent rec, so any high cpos should
984 * be stored in this leaf refcount block.
985 */
986 *cpos_end = UINT_MAX;
987 return 0;
988 }
989
990 /*
991 * If the extent block isn't the last one, we have to find
992 * the subtree root between this extent block and the next
993 * leaf extent block and get the corresponding e_cpos from
994 * the subroot. Otherwise we may corrupt the b-tree.
995 */
996 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
997
998 left_path = ocfs2_new_path_from_et(&et);
999 if (!left_path) {
1000 ret = -ENOMEM;
1001 mlog_errno(ret);
1002 goto out;
1003 }
1004
1005 cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
1006 ret = ocfs2_find_path(ci, left_path, cpos);
1007 if (ret) {
1008 mlog_errno(ret);
1009 goto out;
1010 }
1011
1012 right_path = ocfs2_new_path_from_path(left_path);
1013 if (!right_path) {
1014 ret = -ENOMEM;
1015 mlog_errno(ret);
1016 goto out;
1017 }
1018
1019 ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
1020 if (ret) {
1021 mlog_errno(ret);
1022 goto out;
1023 }
1024
1025 ret = ocfs2_find_path(ci, right_path, cpos);
1026 if (ret) {
1027 mlog_errno(ret);
1028 goto out;
1029 }
1030
1031 subtree_root = ocfs2_find_subtree_root(&et, left_path,
1032 right_path);
1033
1034 tmp_el = left_path->p_node[subtree_root].el;
1035 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
1036 for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
1037 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
1038 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
1039 break;
1040 }
1041 }
1042
1043 BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
1044
1045out:
1046 ocfs2_free_path(left_path);
1047 ocfs2_free_path(right_path);
1048 return ret;
1049}
1050
1051/*
972 * Given a cpos and len, try to find the refcount record which contains cpos. 1052 * Given a cpos and len, try to find the refcount record which contains cpos.
973 * 1. If cpos can be found in one refcount record, return the record. 1053 * 1. If cpos can be found in one refcount record, return the record.
974 * 2. If cpos can't be found, return a fake record which start from cpos 1054 * 2. If cpos can't be found, return a fake record which start from cpos
@@ -983,10 +1063,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
983 struct buffer_head **ret_bh) 1063 struct buffer_head **ret_bh)
984{ 1064{
985 int ret = 0, i, found; 1065 int ret = 0, i, found;
986 u32 low_cpos; 1066 u32 low_cpos, uninitialized_var(cpos_end);
987 struct ocfs2_extent_list *el; 1067 struct ocfs2_extent_list *el;
988 struct ocfs2_extent_rec *tmp, *rec = NULL; 1068 struct ocfs2_extent_rec *rec = NULL;
989 struct ocfs2_extent_block *eb; 1069 struct ocfs2_extent_block *eb = NULL;
990 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 1070 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1071 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 struct ocfs2_refcount_block *rb = 1072 struct ocfs2_refcount_block *rb =
@@ -1034,12 +1114,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
1034 } 1114 }
1035 } 1115 }
1036 1116
1037 /* adjust len when we have ocfs2_extent_rec after it. */ 1117 if (found) {
1038 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 1118 ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
1039 tmp = &el->l_recs[i+1]; 1119 eb, el, i, &cpos_end);
1120 if (ret) {
1121 mlog_errno(ret);
1122 goto out;
1123 }
1040 1124
1041 if (le32_to_cpu(tmp->e_cpos) < cpos + len) 1125 if (cpos_end < low_cpos + len)
1042 len = le32_to_cpu(tmp->e_cpos) - cpos; 1126 len = cpos_end - low_cpos;
1043 } 1127 }
1044 1128
1045 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 1129 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
@@ -1418,7 +1502,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1418 1502
1419 /* change old and new rl_used accordingly. */ 1503 /* change old and new rl_used accordingly. */
1420 le16_add_cpu(&rl->rl_used, -num_moved); 1504 le16_add_cpu(&rl->rl_used, -num_moved);
1421 new_rl->rl_used = cpu_to_le32(num_moved); 1505 new_rl->rl_used = cpu_to_le16(num_moved);
1422 1506
1423 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1507 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1424 sizeof(struct ocfs2_refcount_rec), 1508 sizeof(struct ocfs2_refcount_rec),
@@ -1797,7 +1881,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1797 recs_need++; 1881 recs_need++;
1798 1882
1799 /* If the leaf block don't have enough record, expand it. */ 1883 /* If the leaf block don't have enough record, expand it. */
1800 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { 1884 if (le16_to_cpu(rf_list->rl_used) + recs_need >
1885 le16_to_cpu(rf_list->rl_count)) {
1801 struct ocfs2_refcount_rec tmp_rec; 1886 struct ocfs2_refcount_rec tmp_rec;
1802 u64 cpos = le64_to_cpu(orig_rec->r_cpos); 1887 u64 cpos = le64_to_cpu(orig_rec->r_cpos);
1803 len = le32_to_cpu(orig_rec->r_clusters); 1888 len = le32_to_cpu(orig_rec->r_clusters);
@@ -1859,7 +1944,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1859 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); 1944 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
1860 le64_add_cpu(&tail_rec->r_cpos, 1945 le64_add_cpu(&tail_rec->r_cpos,
1861 le32_to_cpu(tail_rec->r_clusters) - len); 1946 le32_to_cpu(tail_rec->r_clusters) - len);
1862 tail_rec->r_clusters = le32_to_cpu(len); 1947 tail_rec->r_clusters = cpu_to_le32(len);
1863 } 1948 }
1864 1949
1865 /* 1950 /*
@@ -2860,7 +2945,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2860 2945
2861 while (offset < end) { 2946 while (offset < end) {
2862 page_index = offset >> PAGE_CACHE_SHIFT; 2947 page_index = offset >> PAGE_CACHE_SHIFT;
2863 map_end = (page_index + 1) << PAGE_CACHE_SHIFT; 2948 map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
2864 if (map_end > end) 2949 if (map_end > end)
2865 map_end = end; 2950 map_end = end;
2866 2951
@@ -2872,8 +2957,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2872 2957
2873 page = grab_cache_page(mapping, page_index); 2958 page = grab_cache_page(mapping, page_index);
2874 2959
2875 /* This page can't be dirtied before we CoW it out. */ 2960 /*
2876 BUG_ON(PageDirty(page)); 2961 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
2962 * can't be dirtied before we CoW it out.
2963 */
2964 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2965 BUG_ON(PageDirty(page));
2877 2966
2878 if (!PageUptodate(page)) { 2967 if (!PageUptodate(page)) {
2879 ret = block_read_full_page(page, ocfs2_get_block); 2968 ret = block_read_full_page(page, ocfs2_get_block);
@@ -3085,7 +3174,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
3085 3174
3086 while (offset < end) { 3175 while (offset < end) {
3087 page_index = offset >> PAGE_CACHE_SHIFT; 3176 page_index = offset >> PAGE_CACHE_SHIFT;
3088 map_end = (page_index + 1) << PAGE_CACHE_SHIFT; 3177 map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
3089 if (map_end > end) 3178 if (map_end > end)
3090 map_end = end; 3179 map_end = end;
3091 3180
@@ -3840,8 +3929,7 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
3840 } 3929 }
3841 3930
3842 ret = ocfs2_insert_extent(handle, et, cpos, 3931 ret = ocfs2_insert_extent(handle, et, cpos,
3843 cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, 3932 ocfs2_clusters_to_blocks(inode->i_sb, p_cluster),
3844 p_cluster)),
3845 num_clusters, ext_flags, meta_ac); 3933 num_clusters, ext_flags, meta_ac);
3846 if (ret) { 3934 if (ret) {
3847 mlog_errno(ret); 3935 mlog_errno(ret);
@@ -4253,8 +4341,8 @@ static int ocfs2_user_path_parent(const char __user *path,
4253 * @new_dentry: target dentry 4341 * @new_dentry: target dentry
4254 * @preserve: if true, preserve all file attributes 4342 * @preserve: if true, preserve all file attributes
4255 */ 4343 */
4256int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, 4344static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4257 struct dentry *new_dentry, bool preserve) 4345 struct dentry *new_dentry, bool preserve)
4258{ 4346{
4259 struct inode *inode = old_dentry->d_inode; 4347 struct inode *inode = old_dentry->d_inode;
4260 int error; 4348 int error;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index e49c41050264..3038c92af493 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -277,7 +277,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
277 u32 dlm_key; 277 u32 dlm_key;
278 struct dlm_ctxt *dlm; 278 struct dlm_ctxt *dlm;
279 struct o2dlm_private *priv; 279 struct o2dlm_private *priv;
280 struct dlm_protocol_version dlm_version; 280 struct dlm_protocol_version fs_version;
281 281
282 BUG_ON(conn == NULL); 282 BUG_ON(conn == NULL);
283 BUG_ON(o2cb_stack.sp_proto == NULL); 283 BUG_ON(o2cb_stack.sp_proto == NULL);
@@ -304,18 +304,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
304 /* used by the dlm code to make message headers unique, each 304 /* used by the dlm code to make message headers unique, each
305 * node in this domain must agree on this. */ 305 * node in this domain must agree on this. */
306 dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); 306 dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
307 dlm_version.pv_major = conn->cc_version.pv_major; 307 fs_version.pv_major = conn->cc_version.pv_major;
308 dlm_version.pv_minor = conn->cc_version.pv_minor; 308 fs_version.pv_minor = conn->cc_version.pv_minor;
309 309
310 dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); 310 dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version);
311 if (IS_ERR(dlm)) { 311 if (IS_ERR(dlm)) {
312 rc = PTR_ERR(dlm); 312 rc = PTR_ERR(dlm);
313 mlog_errno(rc); 313 mlog_errno(rc);
314 goto out_free; 314 goto out_free;
315 } 315 }
316 316
317 conn->cc_version.pv_major = dlm_version.pv_major; 317 conn->cc_version.pv_major = fs_version.pv_major;
318 conn->cc_version.pv_minor = dlm_version.pv_minor; 318 conn->cc_version.pv_minor = fs_version.pv_minor;
319 conn->cc_lockspace = dlm; 319 conn->cc_lockspace = dlm;
320 320
321 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); 321 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index ff4c798a5635..da78a2a334fd 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -814,7 +814,7 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
814static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 814static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
815{ 815{
816 dlm_lockspace_t *fsdlm; 816 dlm_lockspace_t *fsdlm;
817 struct ocfs2_live_connection *control; 817 struct ocfs2_live_connection *uninitialized_var(control);
818 int rc = 0; 818 int rc = 0;
819 819
820 BUG_ON(conn == NULL); 820 BUG_ON(conn == NULL);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 14f47d2bfe02..755cd49a5ef3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,6 +100,8 @@ struct mount_options
100static int ocfs2_parse_options(struct super_block *sb, char *options, 100static int ocfs2_parse_options(struct super_block *sb, char *options,
101 struct mount_options *mopt, 101 struct mount_options *mopt,
102 int is_remount); 102 int is_remount);
103static int ocfs2_check_set_options(struct super_block *sb,
104 struct mount_options *options);
103static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 105static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
104static void ocfs2_put_super(struct super_block *sb); 106static void ocfs2_put_super(struct super_block *sb);
105static int ocfs2_mount_volume(struct super_block *sb); 107static int ocfs2_mount_volume(struct super_block *sb);
@@ -600,7 +602,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
600 602
601 lock_kernel(); 603 lock_kernel();
602 604
603 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 605 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
606 !ocfs2_check_set_options(sb, &parsed_options)) {
604 ret = -EINVAL; 607 ret = -EINVAL;
605 goto out; 608 goto out;
606 } 609 }
@@ -691,8 +694,6 @@ unlock_osb:
691 if (!ret) { 694 if (!ret) {
692 /* Only save off the new mount options in case of a successful 695 /* Only save off the new mount options in case of a successful
693 * remount. */ 696 * remount. */
694 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
695 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
696 osb->s_mount_opt = parsed_options.mount_opt; 697 osb->s_mount_opt = parsed_options.mount_opt;
697 osb->s_atime_quantum = parsed_options.atime_quantum; 698 osb->s_atime_quantum = parsed_options.atime_quantum;
698 osb->preferred_slot = parsed_options.slot; 699 osb->preferred_slot = parsed_options.slot;
@@ -701,6 +702,10 @@ unlock_osb:
701 702
702 if (!ocfs2_is_hard_readonly(osb)) 703 if (!ocfs2_is_hard_readonly(osb))
703 ocfs2_set_journal_params(osb); 704 ocfs2_set_journal_params(osb);
705
706 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
707 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
708 MS_POSIXACL : 0);
704 } 709 }
705out: 710out:
706 unlock_kernel(); 711 unlock_kernel();
@@ -1011,31 +1016,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1011 brelse(bh); 1016 brelse(bh);
1012 bh = NULL; 1017 bh = NULL;
1013 1018
1014 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) 1019 if (!ocfs2_check_set_options(sb, &parsed_options)) {
1015 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1020 status = -EINVAL;
1016 1021 goto read_super_error;
1022 }
1017 osb->s_mount_opt = parsed_options.mount_opt; 1023 osb->s_mount_opt = parsed_options.mount_opt;
1018 osb->s_atime_quantum = parsed_options.atime_quantum; 1024 osb->s_atime_quantum = parsed_options.atime_quantum;
1019 osb->preferred_slot = parsed_options.slot; 1025 osb->preferred_slot = parsed_options.slot;
1020 osb->osb_commit_interval = parsed_options.commit_interval; 1026 osb->osb_commit_interval = parsed_options.commit_interval;
1021 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1027 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
1022 osb->local_alloc_bits = osb->local_alloc_default_bits; 1028 osb->local_alloc_bits = osb->local_alloc_default_bits;
1023 if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
1024 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1025 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1026 status = -EINVAL;
1027 mlog(ML_ERROR, "User quotas were requested, but this "
1028 "filesystem does not have the feature enabled.\n");
1029 goto read_super_error;
1030 }
1031 if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1032 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1033 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1034 status = -EINVAL;
1035 mlog(ML_ERROR, "Group quotas were requested, but this "
1036 "filesystem does not have the feature enabled.\n");
1037 goto read_super_error;
1038 }
1039 1029
1040 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1030 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1041 if (status) 1031 if (status)
@@ -1072,7 +1062,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1072 "file system, but write access is " 1062 "file system, but write access is "
1073 "unavailable.\n"); 1063 "unavailable.\n");
1074 else 1064 else
1075 mlog_errno(status); 1065 mlog_errno(status);
1076 goto read_super_error; 1066 goto read_super_error;
1077 } 1067 }
1078 1068
@@ -1245,6 +1235,40 @@ static struct file_system_type ocfs2_fs_type = {
1245 .next = NULL 1235 .next = NULL
1246}; 1236};
1247 1237
1238static int ocfs2_check_set_options(struct super_block *sb,
1239 struct mount_options *options)
1240{
1241 if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
1242 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1243 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1244 mlog(ML_ERROR, "User quotas were requested, but this "
1245 "filesystem does not have the feature enabled.\n");
1246 return 0;
1247 }
1248 if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1249 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1250 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1251 mlog(ML_ERROR, "Group quotas were requested, but this "
1252 "filesystem does not have the feature enabled.\n");
1253 return 0;
1254 }
1255 if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL &&
1256 !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) {
1257 mlog(ML_ERROR, "ACL support requested but extended attributes "
1258 "feature is not enabled\n");
1259 return 0;
1260 }
1261 /* No ACL setting specified? Use XATTR feature... */
1262 if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL |
1263 OCFS2_MOUNT_NO_POSIX_ACL))) {
1264 if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR))
1265 options->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1266 else
1267 options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1268 }
1269 return 1;
1270}
1271
1248static int ocfs2_parse_options(struct super_block *sb, 1272static int ocfs2_parse_options(struct super_block *sb,
1249 char *options, 1273 char *options,
1250 struct mount_options *mopt, 1274 struct mount_options *mopt,
@@ -1392,40 +1416,19 @@ static int ocfs2_parse_options(struct super_block *sb,
1392 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1416 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
1393 break; 1417 break;
1394 case Opt_usrquota: 1418 case Opt_usrquota:
1395 /* We check only on remount, otherwise features
1396 * aren't yet initialized. */
1397 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1398 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1399 mlog(ML_ERROR, "User quota requested but "
1400 "filesystem feature is not set\n");
1401 status = 0;
1402 goto bail;
1403 }
1404 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; 1419 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
1405 break; 1420 break;
1406 case Opt_grpquota: 1421 case Opt_grpquota:
1407 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1408 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1409 mlog(ML_ERROR, "Group quota requested but "
1410 "filesystem feature is not set\n");
1411 status = 0;
1412 goto bail;
1413 }
1414 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1422 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1415 break; 1423 break;
1416#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1417 case Opt_acl: 1424 case Opt_acl:
1418 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1425 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1426 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
1419 break; 1427 break;
1420 case Opt_noacl: 1428 case Opt_noacl:
1429 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1421 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1430 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1422 break; 1431 break;
1423#else
1424 case Opt_acl:
1425 case Opt_noacl:
1426 printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
1427 break;
1428#endif
1429 default: 1432 default:
1430 mlog(ML_ERROR, 1433 mlog(ML_ERROR,
1431 "Unrecognized mount option \"%s\" " 1434 "Unrecognized mount option \"%s\" "
@@ -1502,12 +1505,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1502 if (opts & OCFS2_MOUNT_INODE64) 1505 if (opts & OCFS2_MOUNT_INODE64)
1503 seq_printf(s, ",inode64"); 1506 seq_printf(s, ",inode64");
1504 1507
1505#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1506 if (opts & OCFS2_MOUNT_POSIX_ACL) 1508 if (opts & OCFS2_MOUNT_POSIX_ACL)
1507 seq_printf(s, ",acl"); 1509 seq_printf(s, ",acl");
1508 else 1510 else
1509 seq_printf(s, ",noacl"); 1511 seq_printf(s, ",noacl");
1510#endif
1511 1512
1512 return 0; 1513 return 0;
1513} 1514}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index e3421030a69f..32499d213fc4 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
137 } 137 }
138 138
139 memcpy(link, target, len); 139 memcpy(link, target, len);
140 nd_set_link(nd, link);
141 140
142bail: 141bail:
142 nd_set_link(nd, status ? ERR_PTR(status) : link);
143 brelse(bh); 143 brelse(bh);
144 144
145 mlog_exit(status); 145 mlog_exit(status);
146 return status ? ERR_PTR(status) : link; 146 return NULL;
147} 147}
148 148
149static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 149static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
150{ 150{
151 char *link = cookie; 151 char *link = nd_get_link(nd);
152 152 if (!IS_ERR(link))
153 kfree(link); 153 kfree(link);
154} 154}
155 155
156const struct inode_operations ocfs2_symlink_inode_operations = { 156const struct inode_operations ocfs2_symlink_inode_operations = {
@@ -163,6 +163,7 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
163 .getxattr = generic_getxattr, 163 .getxattr = generic_getxattr,
164 .listxattr = ocfs2_listxattr, 164 .listxattr = ocfs2_listxattr,
165 .removexattr = generic_removexattr, 165 .removexattr = generic_removexattr,
166 .fiemap = ocfs2_fiemap,
166}; 167};
167const struct inode_operations ocfs2_fast_symlink_inode_operations = { 168const struct inode_operations ocfs2_fast_symlink_inode_operations = {
168 .readlink = ocfs2_readlink, 169 .readlink = ocfs2_readlink,
@@ -174,4 +175,5 @@ const struct inode_operations ocfs2_fast_symlink_inode_operations = {
174 .getxattr = generic_getxattr, 175 .getxattr = generic_getxattr,
175 .listxattr = ocfs2_listxattr, 176 .listxattr = ocfs2_listxattr,
176 .removexattr = generic_removexattr, 177 .removexattr = generic_removexattr,
178 .fiemap = ocfs2_fiemap,
177}; 179};
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index c61369342a27..a0a120e82b97 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
267} 267}
268 268
269/* Warning: even if it returns true, this does *not* guarantee that 269/* Warning: even if it returns true, this does *not* guarantee that
270 * the block is stored in our inode metadata cache. 270 * the block is stored in our inode metadata cache.
271 * 271 *
272 * This can be called under lock_buffer() 272 * This can be called under lock_buffer()
273 */ 273 */
274int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, 274int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df2..8fc6fb071c6d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -98,10 +98,8 @@ static struct ocfs2_xattr_def_value_root def_xv = {
98 98
99struct xattr_handler *ocfs2_xattr_handlers[] = { 99struct xattr_handler *ocfs2_xattr_handlers[] = {
100 &ocfs2_xattr_user_handler, 100 &ocfs2_xattr_user_handler,
101#ifdef CONFIG_OCFS2_FS_POSIX_ACL
102 &ocfs2_xattr_acl_access_handler, 101 &ocfs2_xattr_acl_access_handler,
103 &ocfs2_xattr_acl_default_handler, 102 &ocfs2_xattr_acl_default_handler,
104#endif
105 &ocfs2_xattr_trusted_handler, 103 &ocfs2_xattr_trusted_handler,
106 &ocfs2_xattr_security_handler, 104 &ocfs2_xattr_security_handler,
107 NULL 105 NULL
@@ -109,12 +107,10 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
109 107
110static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
111 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
112#ifdef CONFIG_OCFS2_FS_POSIX_ACL
113 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 110 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
114 = &ocfs2_xattr_acl_access_handler, 111 = &ocfs2_xattr_acl_access_handler,
115 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 112 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
116 = &ocfs2_xattr_acl_default_handler, 113 = &ocfs2_xattr_acl_default_handler,
117#endif
118 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 114 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
119 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 115 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
120}; 116};
@@ -205,8 +201,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
205 int offset, 201 int offset,
206 struct ocfs2_xattr_value_root **xv, 202 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh); 203 struct buffer_head **bh);
208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
210 204
211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 205static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
212{ 206{
@@ -6066,7 +6060,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6066 * to the extent block, so just calculate a maximum record num. 6060 * to the extent block, so just calculate a maximum record num.
6067 */ 6061 */
6068 if (!xv->xr_list.l_tree_depth) 6062 if (!xv->xr_list.l_tree_depth)
6069 *num_recs += xv->xr_list.l_next_free_rec; 6063 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6070 else 6064 else
6071 *num_recs += ocfs2_clusters_for_bytes(sb, 6065 *num_recs += ocfs2_clusters_for_bytes(sb,
6072 XATTR_SIZE_MAX); 6066 XATTR_SIZE_MAX);
@@ -6978,9 +6972,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
6978 6972
6979 ret = ocfs2_init_security_get(inode, dir, &si); 6973 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) { 6974 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name, 6975 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
6982 si.value, si.value_len, 6976 si.name, si.value, si.value_len,
6983 XATTR_CREATE); 6977 XATTR_CREATE);
6984 if (ret) { 6978 if (ret) {
6985 mlog_errno(ret); 6979 mlog_errno(ret);
6986 goto leave; 6980 goto leave;
@@ -7008,9 +7002,9 @@ leave:
7008/* 7002/*
7009 * 'security' attributes support 7003 * 'security' attributes support
7010 */ 7004 */
7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, 7005static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7012 size_t list_size, const char *name, 7006 size_t list_size, const char *name,
7013 size_t name_len) 7007 size_t name_len, int type)
7014{ 7008{
7015 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7009 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7016 const size_t total_len = prefix_len + name_len + 1; 7010 const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7017,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
7023 return total_len; 7017 return total_len;
7024} 7018}
7025 7019
7026static int ocfs2_xattr_security_get(struct inode *inode, const char *name, 7020static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7027 void *buffer, size_t size) 7021 void *buffer, size_t size, int type)
7028{ 7022{
7029 if (strcmp(name, "") == 0) 7023 if (strcmp(name, "") == 0)
7030 return -EINVAL; 7024 return -EINVAL;
7031 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, 7025 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7032 buffer, size); 7026 name, buffer, size);
7033} 7027}
7034 7028
7035static int ocfs2_xattr_security_set(struct inode *inode, const char *name, 7029static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7036 const void *value, size_t size, int flags) 7030 const void *value, size_t size, int flags, int type)
7037{ 7031{
7038 if (strcmp(name, "") == 0) 7032 if (strcmp(name, "") == 0)
7039 return -EINVAL; 7033 return -EINVAL;
7040 7034
7041 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, 7035 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7042 size, flags); 7036 name, value, size, flags);
7043} 7037}
7044 7038
7045int ocfs2_init_security_get(struct inode *inode, 7039int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7070,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
7076/* 7070/*
7077 * 'trusted' attributes support 7071 * 'trusted' attributes support
7078 */ 7072 */
7079static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 7073static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7080 size_t list_size, const char *name, 7074 size_t list_size, const char *name,
7081 size_t name_len) 7075 size_t name_len, int type)
7082{ 7076{
7083 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7077 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7084 const size_t total_len = prefix_len + name_len + 1; 7078 const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7085,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
7091 return total_len; 7085 return total_len;
7092} 7086}
7093 7087
7094static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, 7088static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7095 void *buffer, size_t size) 7089 void *buffer, size_t size, int type)
7096{ 7090{
7097 if (strcmp(name, "") == 0) 7091 if (strcmp(name, "") == 0)
7098 return -EINVAL; 7092 return -EINVAL;
7099 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, 7093 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7100 buffer, size); 7094 name, buffer, size);
7101} 7095}
7102 7096
7103static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, 7097static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7104 const void *value, size_t size, int flags) 7098 const void *value, size_t size, int flags, int type)
7105{ 7099{
7106 if (strcmp(name, "") == 0) 7100 if (strcmp(name, "") == 0)
7107 return -EINVAL; 7101 return -EINVAL;
7108 7102
7109 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, 7103 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7110 size, flags); 7104 name, value, size, flags);
7111} 7105}
7112 7106
7113struct xattr_handler ocfs2_xattr_trusted_handler = { 7107struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7114,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
7120/* 7114/*
7121 * 'user' attributes support 7115 * 'user' attributes support
7122 */ 7116 */
7123static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, 7117static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7124 size_t list_size, const char *name, 7118 size_t list_size, const char *name,
7125 size_t name_len) 7119 size_t name_len, int type)
7126{ 7120{
7127 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7121 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7128 const size_t total_len = prefix_len + name_len + 1; 7122 const size_t total_len = prefix_len + name_len + 1;
7129 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7123 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7130 7124
7131 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7125 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7132 return 0; 7126 return 0;
@@ -7139,31 +7133,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
7139 return total_len; 7133 return total_len;
7140} 7134}
7141 7135
7142static int ocfs2_xattr_user_get(struct inode *inode, const char *name, 7136static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7143 void *buffer, size_t size) 7137 void *buffer, size_t size, int type)
7144{ 7138{
7145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7139 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7146 7140
7147 if (strcmp(name, "") == 0) 7141 if (strcmp(name, "") == 0)
7148 return -EINVAL; 7142 return -EINVAL;
7149 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7143 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7150 return -EOPNOTSUPP; 7144 return -EOPNOTSUPP;
7151 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7145 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7152 buffer, size); 7146 buffer, size);
7153} 7147}
7154 7148
7155static int ocfs2_xattr_user_set(struct inode *inode, const char *name, 7149static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7156 const void *value, size_t size, int flags) 7150 const void *value, size_t size, int flags, int type)
7157{ 7151{
7158 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7152 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7159 7153
7160 if (strcmp(name, "") == 0) 7154 if (strcmp(name, "") == 0)
7161 return -EINVAL; 7155 return -EINVAL;
7162 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7156 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7163 return -EOPNOTSUPP; 7157 return -EOPNOTSUPP;
7164 7158
7165 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, 7159 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7166 size, flags); 7160 name, value, size, flags);
7167} 7161}
7168 7162
7169struct xattr_handler ocfs2_xattr_user_handler = { 7163struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 08e36389f56d..abd72a47f520 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,10 +40,8 @@ struct ocfs2_security_xattr_info {
40extern struct xattr_handler ocfs2_xattr_user_handler; 40extern struct xattr_handler ocfs2_xattr_user_handler;
41extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler; 42extern struct xattr_handler ocfs2_xattr_security_handler;
43#ifdef CONFIG_OCFS2_FS_POSIX_ACL
44extern struct xattr_handler ocfs2_xattr_acl_access_handler; 43extern struct xattr_handler ocfs2_xattr_acl_access_handler;
45extern struct xattr_handler ocfs2_xattr_acl_default_handler; 44extern struct xattr_handler ocfs2_xattr_acl_default_handler;
46#endif
47extern struct xattr_handler *ocfs2_xattr_handlers[]; 45extern struct xattr_handler *ocfs2_xattr_handlers[];
48 46
49ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 47ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3a..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,9 @@
30#include <linux/audit.h> 30#include <linux/audit.h>
31#include <linux/falloc.h> 31#include <linux/falloc.h>
32#include <linux/fs_struct.h> 32#include <linux/fs_struct.h>
33#include <linux/ima.h>
34
35#include "internal.h"
33 36
34int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 37int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
35{ 38{
@@ -818,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
818} 821}
819 822
820static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 823static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
821 int flags, struct file *f, 824 struct file *f,
822 int (*open)(struct inode *, struct file *), 825 int (*open)(struct inode *, struct file *),
823 const struct cred *cred) 826 const struct cred *cred)
824{ 827{
825 struct inode *inode; 828 struct inode *inode;
826 int error; 829 int error;
827 830
828 f->f_flags = flags; 831 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
829 f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
830 FMODE_PREAD | FMODE_PWRITE; 832 FMODE_PREAD | FMODE_PWRITE;
831 inode = dentry->d_inode; 833 inode = dentry->d_inode;
832 if (f->f_mode & FMODE_WRITE) { 834 if (f->f_mode & FMODE_WRITE) {
@@ -855,6 +857,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
855 if (error) 857 if (error)
856 goto cleanup_all; 858 goto cleanup_all;
857 } 859 }
860 ima_counts_get(f);
858 861
859 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 862 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
860 863
@@ -926,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
926 if (IS_ERR(dentry)) 929 if (IS_ERR(dentry))
927 goto out_err; 930 goto out_err;
928 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 931 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
929 nd->intent.open.flags - 1,
930 nd->intent.open.file, 932 nd->intent.open.file,
931 open, cred); 933 open, cred);
932out: 934out:
@@ -945,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
945 * 947 *
946 * Note that this function destroys the original nameidata 948 * Note that this function destroys the original nameidata
947 */ 949 */
948struct file *nameidata_to_filp(struct nameidata *nd, int flags) 950struct file *nameidata_to_filp(struct nameidata *nd)
949{ 951{
950 const struct cred *cred = current_cred(); 952 const struct cred *cred = current_cred();
951 struct file *filp; 953 struct file *filp;
@@ -954,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
954 filp = nd->intent.open.file; 956 filp = nd->intent.open.file;
955 /* Has the filesystem initialised the file for us? */ 957 /* Has the filesystem initialised the file for us? */
956 if (filp->f_path.dentry == NULL) 958 if (filp->f_path.dentry == NULL)
957 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 959 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
958 NULL, cred); 960 NULL, cred);
959 else 961 else
960 path_put(&nd->path); 962 path_put(&nd->path);
@@ -993,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
993 return ERR_PTR(error); 995 return ERR_PTR(error);
994 } 996 }
995 997
996 return __dentry_open(dentry, mnt, flags, f, NULL, cred); 998 f->f_flags = flags;
999 return __dentry_open(dentry, mnt, f, NULL, cred);
997} 1000}
998EXPORT_SYMBOL(dentry_open); 1001EXPORT_SYMBOL(dentry_open);
999 1002
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa3..37ba29ff3158 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
906} 906}
907 907
908static struct vfsmount *pipe_mnt __read_mostly; 908static struct vfsmount *pipe_mnt __read_mostly;
909static int pipefs_delete_dentry(struct dentry *dentry)
910{
911 /*
912 * At creation time, we pretended this dentry was hashed
913 * (by clearing DCACHE_UNHASHED bit in d_flags)
914 * At delete time, we restore the truth : not hashed.
915 * (so that dput() can proceed correctly)
916 */
917 dentry->d_flags |= DCACHE_UNHASHED;
918 return 0;
919}
920 909
921/* 910/*
922 * pipefs_dname() is called from d_path(). 911 * pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
928} 917}
929 918
930static const struct dentry_operations pipefs_dentry_operations = { 919static const struct dentry_operations pipefs_dentry_operations = {
931 .d_delete = pipefs_delete_dentry,
932 .d_dname = pipefs_dname, 920 .d_dname = pipefs_dname,
933}; 921};
934 922
@@ -974,7 +962,7 @@ struct file *create_write_pipe(int flags)
974 int err; 962 int err;
975 struct inode *inode; 963 struct inode *inode;
976 struct file *f; 964 struct file *f;
977 struct dentry *dentry; 965 struct path path;
978 struct qstr name = { .name = "" }; 966 struct qstr name = { .name = "" };
979 967
980 err = -ENFILE; 968 err = -ENFILE;
@@ -983,21 +971,16 @@ struct file *create_write_pipe(int flags)
983 goto err; 971 goto err;
984 972
985 err = -ENOMEM; 973 err = -ENOMEM;
986 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 974 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
987 if (!dentry) 975 if (!path.dentry)
988 goto err_inode; 976 goto err_inode;
977 path.mnt = mntget(pipe_mnt);
989 978
990 dentry->d_op = &pipefs_dentry_operations; 979 path.dentry->d_op = &pipefs_dentry_operations;
991 /* 980 d_instantiate(path.dentry, inode);
992 * We dont want to publish this dentry into global dentry hash table.
993 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
994 * This permits a working /proc/$pid/fd/XXX on pipes
995 */
996 dentry->d_flags &= ~DCACHE_UNHASHED;
997 d_instantiate(dentry, inode);
998 981
999 err = -ENFILE; 982 err = -ENFILE;
1000 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); 983 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1001 if (!f) 984 if (!f)
1002 goto err_dentry; 985 goto err_dentry;
1003 f->f_mapping = inode->i_mapping; 986 f->f_mapping = inode->i_mapping;
@@ -1009,7 +992,7 @@ struct file *create_write_pipe(int flags)
1009 992
1010 err_dentry: 993 err_dentry:
1011 free_pipe_info(inode); 994 free_pipe_info(inode);
1012 dput(dentry); 995 path_put(&path);
1013 return ERR_PTR(err); 996 return ERR_PTR(err);
1014 997
1015 err_inode: 998 err_inode:
@@ -1028,20 +1011,14 @@ void free_write_pipe(struct file *f)
1028 1011
1029struct file *create_read_pipe(struct file *wrf, int flags) 1012struct file *create_read_pipe(struct file *wrf, int flags)
1030{ 1013{
1031 struct file *f = get_empty_filp(); 1014 /* Grab pipe from the writer */
1015 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1016 &read_pipefifo_fops);
1032 if (!f) 1017 if (!f)
1033 return ERR_PTR(-ENFILE); 1018 return ERR_PTR(-ENFILE);
1034 1019
1035 /* Grab pipe from the writer */
1036 f->f_path = wrf->f_path;
1037 path_get(&wrf->f_path); 1020 path_get(&wrf->f_path);
1038 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1039
1040 f->f_pos = 0;
1041 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1021 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1042 f->f_op = &read_pipefifo_fops;
1043 f->f_mode = FMODE_READ;
1044 f->f_version = 0;
1045 1022
1046 return f; 1023 return f;
1047} 1024}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 4badde179b18..13b5d0708175 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,13 +134,16 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
134 * simple bit tests. 134 * simple bit tests.
135 */ 135 */
136static const char *task_state_array[] = { 136static const char *task_state_array[] = {
137 "R (running)", /* 0 */ 137 "R (running)", /* 0 */
138 "S (sleeping)", /* 1 */ 138 "S (sleeping)", /* 1 */
139 "D (disk sleep)", /* 2 */ 139 "D (disk sleep)", /* 2 */
140 "T (stopped)", /* 4 */ 140 "T (stopped)", /* 4 */
141 "T (tracing stop)", /* 8 */ 141 "t (tracing stop)", /* 8 */
142 "Z (zombie)", /* 16 */ 142 "Z (zombie)", /* 16 */
143 "X (dead)" /* 32 */ 143 "X (dead)", /* 32 */
144 "x (dead)", /* 64 */
145 "K (wakekill)", /* 128 */
146 "W (waking)", /* 256 */
144}; 147};
145 148
146static inline const char *get_task_state(struct task_struct *tsk) 149static inline const char *get_task_state(struct task_struct *tsk)
@@ -148,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
148 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 151 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
149 const char **p = &task_state_array[0]; 152 const char **p = &task_state_array[0];
150 153
154 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
155
151 while (state) { 156 while (state) {
152 p++; 157 p++;
153 state >>= 1; 158 state >>= 1;
@@ -322,94 +327,6 @@ static inline void task_context_switch_counts(struct seq_file *m,
322 p->nivcsw); 327 p->nivcsw);
323} 328}
324 329
325#ifdef CONFIG_MMU
326
327struct stack_stats {
328 struct vm_area_struct *vma;
329 unsigned long startpage;
330 unsigned long usage;
331};
332
333static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
334 unsigned long end, struct mm_walk *walk)
335{
336 struct stack_stats *ss = walk->private;
337 struct vm_area_struct *vma = ss->vma;
338 pte_t *pte, ptent;
339 spinlock_t *ptl;
340 int ret = 0;
341
342 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
343 for (; addr != end; pte++, addr += PAGE_SIZE) {
344 ptent = *pte;
345
346#ifdef CONFIG_STACK_GROWSUP
347 if (pte_present(ptent) || is_swap_pte(ptent))
348 ss->usage = addr - ss->startpage + PAGE_SIZE;
349#else
350 if (pte_present(ptent) || is_swap_pte(ptent)) {
351 ss->usage = ss->startpage - addr + PAGE_SIZE;
352 pte++;
353 ret = 1;
354 break;
355 }
356#endif
357 }
358 pte_unmap_unlock(pte - 1, ptl);
359 cond_resched();
360 return ret;
361}
362
363static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
364 struct task_struct *task)
365{
366 struct stack_stats ss;
367 struct mm_walk stack_walk = {
368 .pmd_entry = stack_usage_pte_range,
369 .mm = vma->vm_mm,
370 .private = &ss,
371 };
372
373 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
374 return 0;
375
376 ss.vma = vma;
377 ss.startpage = task->stack_start & PAGE_MASK;
378 ss.usage = 0;
379
380#ifdef CONFIG_STACK_GROWSUP
381 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
382 &stack_walk);
383#else
384 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
385 &stack_walk);
386#endif
387 return ss.usage;
388}
389
390static inline void task_show_stack_usage(struct seq_file *m,
391 struct task_struct *task)
392{
393 struct vm_area_struct *vma;
394 struct mm_struct *mm = get_task_mm(task);
395
396 if (mm) {
397 down_read(&mm->mmap_sem);
398 vma = find_vma(mm, task->stack_start);
399 if (vma)
400 seq_printf(m, "Stack usage:\t%lu kB\n",
401 get_stack_usage_in_bytes(vma, task) >> 10);
402
403 up_read(&mm->mmap_sem);
404 mmput(mm);
405 }
406}
407#else
408static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
409{
410}
411#endif /* CONFIG_MMU */
412
413static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
414{ 331{
415 seq_printf(m, "Cpus_allowed:\t"); 332 seq_printf(m, "Cpus_allowed:\t");
@@ -440,7 +357,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
440 task_show_regs(m, task); 357 task_show_regs(m, task);
441#endif 358#endif
442 task_context_switch_counts(m, task); 359 task_context_switch_counts(m, task);
443 task_show_stack_usage(m, task);
444 return 0; 360 return 0;
445} 361}
446 362
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc62d8ed..58324c299165 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1419 goto out; 1419 goto out;
1420 1420
1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1422 nd->last_type = LAST_BIND;
1423out: 1422out:
1424 return ERR_PTR(error); 1423 return ERR_PTR(error);
1425} 1424}
@@ -2370,16 +2369,30 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2370{ 2369{
2371 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2370 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2372 pid_t tgid = task_tgid_nr_ns(current, ns); 2371 pid_t tgid = task_tgid_nr_ns(current, ns);
2373 char tmp[PROC_NUMBUF]; 2372 char *name = ERR_PTR(-ENOENT);
2374 if (!tgid) 2373 if (tgid) {
2375 return ERR_PTR(-ENOENT); 2374 name = __getname();
2376 sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); 2375 if (!name)
2377 return ERR_PTR(vfs_follow_link(nd,tmp)); 2376 name = ERR_PTR(-ENOMEM);
2377 else
2378 sprintf(name, "%d", tgid);
2379 }
2380 nd_set_link(nd, name);
2381 return NULL;
2382}
2383
2384static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2385 void *cookie)
2386{
2387 char *s = nd_get_link(nd);
2388 if (!IS_ERR(s))
2389 __putname(s);
2378} 2390}
2379 2391
2380static const struct inode_operations proc_self_inode_operations = { 2392static const struct inode_operations proc_self_inode_operations = {
2381 .readlink = proc_self_readlink, 2393 .readlink = proc_self_readlink,
2382 .follow_link = proc_self_follow_link, 2394 .follow_link = proc_self_follow_link,
2395 .put_link = proc_self_put_link,
2383}; 2396};
2384 2397
2385/* 2398/*
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254b..180cf5a0bd67 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/hugetlb.h> 10#include <linux/hugetlb.h>
11#include <linux/kernel-page-flags.h>
11#include <asm/uaccess.h> 12#include <asm/uaccess.h>
12#include "internal.h" 13#include "internal.h"
13 14
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
71 * physical page flags. 72 * physical page flags.
72 */ 73 */
73 74
74/* These macros are used to decouple internal flags from exported ones */
75
76#define KPF_LOCKED 0
77#define KPF_ERROR 1
78#define KPF_REFERENCED 2
79#define KPF_UPTODATE 3
80#define KPF_DIRTY 4
81#define KPF_LRU 5
82#define KPF_ACTIVE 6
83#define KPF_SLAB 7
84#define KPF_WRITEBACK 8
85#define KPF_RECLAIM 9
86#define KPF_BUDDY 10
87
88/* 11-20: new additions in 2.6.31 */
89#define KPF_MMAP 11
90#define KPF_ANON 12
91#define KPF_SWAPCACHE 13
92#define KPF_SWAPBACKED 14
93#define KPF_COMPOUND_HEAD 15
94#define KPF_COMPOUND_TAIL 16
95#define KPF_HUGE 17
96#define KPF_UNEVICTABLE 18
97#define KPF_HWPOISON 19
98#define KPF_NOPAGE 20
99
100#define KPF_KSM 21
101
102/* kernel hacking assistances
103 * WARNING: subject to change, never rely on them!
104 */
105#define KPF_RESERVED 32
106#define KPF_MLOCKED 33
107#define KPF_MAPPEDTODISK 34
108#define KPF_PRIVATE 35
109#define KPF_PRIVATE_2 36
110#define KPF_OWNER_PRIVATE 37
111#define KPF_ARCH 38
112#define KPF_UNCACHED 39
113
114static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 75static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
115{ 76{
116 return ((kflags >> kbit) & 1) << ubit; 77 return ((kflags >> kbit) & 1) << ubit;
117} 78}
118 79
119static u64 get_uflags(struct page *page) 80u64 stable_page_flags(struct page *page)
120{ 81{
121 u64 k; 82 u64 k;
122 u64 u; 83 u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
219 else 180 else
220 ppage = NULL; 181 ppage = NULL;
221 182
222 if (put_user(get_uflags(ppage), out)) { 183 if (put_user(stable_page_flags(ppage), out)) {
223 ret = -EFAULT; 184 ret = -EFAULT;
224 break; 185 break;
225 } 186 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c03f4336b8..f277c4a111cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
361 if (!pte_present(ptent)) 361 if (!pte_present(ptent))
362 continue; 362 continue;
363 363
364 mss->resident += PAGE_SIZE;
365
366 page = vm_normal_page(vma, addr, ptent); 364 page = vm_normal_page(vma, addr, ptent);
367 if (!page) 365 if (!page)
368 continue; 366 continue;
369 367
368 mss->resident += PAGE_SIZE;
370 /* Accumulate the size in pages that have been accessed. */ 369 /* Accumulate the size in pages that have been accessed. */
371 if (pte_young(ptent) || PageReferenced(page)) 370 if (pte_young(ptent) || PageReferenced(page))
372 mss->referenced += PAGE_SIZE; 371 mss->referenced += PAGE_SIZE;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a33c13..3fc62b097bed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -323,6 +323,30 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
323} 323}
324EXPORT_SYMBOL(dquot_mark_dquot_dirty); 324EXPORT_SYMBOL(dquot_mark_dquot_dirty);
325 325
326/* Dirtify all the dquots - this can block when journalling */
327static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
328{
329 int ret, err, cnt;
330
331 ret = err = 0;
332 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
333 if (dquot[cnt])
334 /* Even in case of error we have to continue */
335 ret = mark_dquot_dirty(dquot[cnt]);
336 if (!err)
337 err = ret;
338 }
339 return err;
340}
341
342static inline void dqput_all(struct dquot **dquot)
343{
344 unsigned int cnt;
345
346 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
347 dqput(dquot[cnt]);
348}
349
326/* This function needs dq_list_lock */ 350/* This function needs dq_list_lock */
327static inline int clear_dquot_dirty(struct dquot *dquot) 351static inline int clear_dquot_dirty(struct dquot *dquot)
328{ 352{
@@ -1268,8 +1292,7 @@ int dquot_initialize(struct inode *inode, int type)
1268out_err: 1292out_err:
1269 up_write(&sb_dqopt(sb)->dqptr_sem); 1293 up_write(&sb_dqopt(sb)->dqptr_sem);
1270 /* Drop unused references */ 1294 /* Drop unused references */
1271 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1295 dqput_all(got);
1272 dqput(got[cnt]);
1273 return ret; 1296 return ret;
1274} 1297}
1275EXPORT_SYMBOL(dquot_initialize); 1298EXPORT_SYMBOL(dquot_initialize);
@@ -1288,9 +1311,7 @@ int dquot_drop(struct inode *inode)
1288 inode->i_dquot[cnt] = NULL; 1311 inode->i_dquot[cnt] = NULL;
1289 } 1312 }
1290 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1313 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1291 1314 dqput_all(put);
1292 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1293 dqput(put[cnt]);
1294 return 0; 1315 return 0;
1295} 1316}
1296EXPORT_SYMBOL(dquot_drop); 1317EXPORT_SYMBOL(dquot_drop);
@@ -1319,6 +1340,70 @@ void vfs_dq_drop(struct inode *inode)
1319EXPORT_SYMBOL(vfs_dq_drop); 1340EXPORT_SYMBOL(vfs_dq_drop);
1320 1341
1321/* 1342/*
1343 * inode_reserved_space is managed internally by quota, and protected by
1344 * i_lock similar to i_blocks+i_bytes.
1345 */
1346static qsize_t *inode_reserved_space(struct inode * inode)
1347{
1348 /* Filesystem must explicitly define it's own method in order to use
1349 * quota reservation interface */
1350 BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
1351 return inode->i_sb->dq_op->get_reserved_space(inode);
1352}
1353
1354static void inode_add_rsv_space(struct inode *inode, qsize_t number)
1355{
1356 spin_lock(&inode->i_lock);
1357 *inode_reserved_space(inode) += number;
1358 spin_unlock(&inode->i_lock);
1359}
1360
1361
1362static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1363{
1364 spin_lock(&inode->i_lock);
1365 *inode_reserved_space(inode) -= number;
1366 __inode_add_bytes(inode, number);
1367 spin_unlock(&inode->i_lock);
1368}
1369
1370static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1371{
1372 spin_lock(&inode->i_lock);
1373 *inode_reserved_space(inode) -= number;
1374 spin_unlock(&inode->i_lock);
1375}
1376
1377static qsize_t inode_get_rsv_space(struct inode *inode)
1378{
1379 qsize_t ret;
1380
1381 if (!inode->i_sb->dq_op->get_reserved_space)
1382 return 0;
1383 spin_lock(&inode->i_lock);
1384 ret = *inode_reserved_space(inode);
1385 spin_unlock(&inode->i_lock);
1386 return ret;
1387}
1388
1389static void inode_incr_space(struct inode *inode, qsize_t number,
1390 int reserve)
1391{
1392 if (reserve)
1393 inode_add_rsv_space(inode, number);
1394 else
1395 inode_add_bytes(inode, number);
1396}
1397
1398static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
1399{
1400 if (reserve)
1401 inode_sub_rsv_space(inode, number);
1402 else
1403 inode_sub_bytes(inode, number);
1404}
1405
1406/*
1322 * Following four functions update i_blocks+i_bytes fields and 1407 * Following four functions update i_blocks+i_bytes fields and
1323 * quota information (together with appropriate checks) 1408 * quota information (together with appropriate checks)
1324 * NOTE: We absolutely rely on the fact that caller dirties 1409 * NOTE: We absolutely rely on the fact that caller dirties
@@ -1336,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1336 int cnt, ret = QUOTA_OK; 1421 int cnt, ret = QUOTA_OK;
1337 char warntype[MAXQUOTAS]; 1422 char warntype[MAXQUOTAS];
1338 1423
1424 /*
1425 * First test before acquiring mutex - solves deadlocks when we
1426 * re-enter the quota code and are already holding the mutex
1427 */
1428 if (IS_NOQUOTA(inode)) {
1429 inode_incr_space(inode, number, reserve);
1430 goto out;
1431 }
1432
1433 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1434 if (IS_NOQUOTA(inode)) {
1435 inode_incr_space(inode, number, reserve);
1436 goto out_unlock;
1437 }
1438
1339 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1439 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1340 warntype[cnt] = QUOTA_NL_NOWARN; 1440 warntype[cnt] = QUOTA_NL_NOWARN;
1341 1441
@@ -1346,7 +1446,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1346 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) 1446 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
1347 == NO_QUOTA) { 1447 == NO_QUOTA) {
1348 ret = NO_QUOTA; 1448 ret = NO_QUOTA;
1349 goto out_unlock; 1449 spin_unlock(&dq_data_lock);
1450 goto out_flush_warn;
1350 } 1451 }
1351 } 1452 }
1352 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1453 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1458,29 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1357 else 1458 else
1358 dquot_incr_space(inode->i_dquot[cnt], number); 1459 dquot_incr_space(inode->i_dquot[cnt], number);
1359 } 1460 }
1360 if (!reserve) 1461 inode_incr_space(inode, number, reserve);
1361 inode_add_bytes(inode, number);
1362out_unlock:
1363 spin_unlock(&dq_data_lock); 1462 spin_unlock(&dq_data_lock);
1463
1464 if (reserve)
1465 goto out_flush_warn;
1466 mark_all_dquot_dirty(inode->i_dquot);
1467out_flush_warn:
1364 flush_warnings(inode->i_dquot, warntype); 1468 flush_warnings(inode->i_dquot, warntype);
1469out_unlock:
1470 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1471out:
1365 return ret; 1472 return ret;
1366} 1473}
1367 1474
1368int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) 1475int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
1369{ 1476{
1370 int cnt, ret = QUOTA_OK; 1477 return __dquot_alloc_space(inode, number, warn, 0);
1371
1372 /*
1373 * First test before acquiring mutex - solves deadlocks when we
1374 * re-enter the quota code and are already holding the mutex
1375 */
1376 if (IS_NOQUOTA(inode)) {
1377 inode_add_bytes(inode, number);
1378 goto out;
1379 }
1380
1381 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1382 if (IS_NOQUOTA(inode)) {
1383 inode_add_bytes(inode, number);
1384 goto out_unlock;
1385 }
1386
1387 ret = __dquot_alloc_space(inode, number, warn, 0);
1388 if (ret == NO_QUOTA)
1389 goto out_unlock;
1390
1391 /* Dirtify all the dquots - this can block when journalling */
1392 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1393 if (inode->i_dquot[cnt])
1394 mark_dquot_dirty(inode->i_dquot[cnt]);
1395out_unlock:
1396 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1397out:
1398 return ret;
1399} 1478}
1400EXPORT_SYMBOL(dquot_alloc_space); 1479EXPORT_SYMBOL(dquot_alloc_space);
1401 1480
1402int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) 1481int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
1403{ 1482{
1404 int ret = QUOTA_OK; 1483 return __dquot_alloc_space(inode, number, warn, 1);
1405
1406 if (IS_NOQUOTA(inode))
1407 goto out;
1408
1409 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1410 if (IS_NOQUOTA(inode))
1411 goto out_unlock;
1412
1413 ret = __dquot_alloc_space(inode, number, warn, 1);
1414out_unlock:
1415 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1416out:
1417 return ret;
1418} 1484}
1419EXPORT_SYMBOL(dquot_reserve_space); 1485EXPORT_SYMBOL(dquot_reserve_space);
1420 1486
@@ -1455,10 +1521,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
1455warn_put_all: 1521warn_put_all:
1456 spin_unlock(&dq_data_lock); 1522 spin_unlock(&dq_data_lock);
1457 if (ret == QUOTA_OK) 1523 if (ret == QUOTA_OK)
1458 /* Dirtify all the dquots - this can block when journalling */ 1524 mark_all_dquot_dirty(inode->i_dquot);
1459 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1460 if (inode->i_dquot[cnt])
1461 mark_dquot_dirty(inode->i_dquot[cnt]);
1462 flush_warnings(inode->i_dquot, warntype); 1525 flush_warnings(inode->i_dquot, warntype);
1463 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1526 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1464 return ret; 1527 return ret;
@@ -1471,14 +1534,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1471 int ret = QUOTA_OK; 1534 int ret = QUOTA_OK;
1472 1535
1473 if (IS_NOQUOTA(inode)) { 1536 if (IS_NOQUOTA(inode)) {
1474 inode_add_bytes(inode, number); 1537 inode_claim_rsv_space(inode, number);
1475 goto out; 1538 goto out;
1476 } 1539 }
1477 1540
1478 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1541 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1479 if (IS_NOQUOTA(inode)) { 1542 if (IS_NOQUOTA(inode)) {
1480 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1543 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1481 inode_add_bytes(inode, number); 1544 inode_claim_rsv_space(inode, number);
1482 goto out; 1545 goto out;
1483 } 1546 }
1484 1547
@@ -1490,12 +1553,9 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1490 number); 1553 number);
1491 } 1554 }
1492 /* Update inode bytes */ 1555 /* Update inode bytes */
1493 inode_add_bytes(inode, number); 1556 inode_claim_rsv_space(inode, number);
1494 spin_unlock(&dq_data_lock); 1557 spin_unlock(&dq_data_lock);
1495 /* Dirtify all the dquots - this can block when journalling */ 1558 mark_all_dquot_dirty(inode->i_dquot);
1496 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1497 if (inode->i_dquot[cnt])
1498 mark_dquot_dirty(inode->i_dquot[cnt]);
1499 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1559 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1500out: 1560out:
1501 return ret; 1561 return ret;
@@ -1503,38 +1563,9 @@ out:
1503EXPORT_SYMBOL(dquot_claim_space); 1563EXPORT_SYMBOL(dquot_claim_space);
1504 1564
1505/* 1565/*
1506 * Release reserved quota space
1507 */
1508void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1509{
1510 int cnt;
1511
1512 if (IS_NOQUOTA(inode))
1513 goto out;
1514
1515 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1516 if (IS_NOQUOTA(inode))
1517 goto out_unlock;
1518
1519 spin_lock(&dq_data_lock);
1520 /* Release reserved dquots */
1521 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1522 if (inode->i_dquot[cnt])
1523 dquot_free_reserved_space(inode->i_dquot[cnt], number);
1524 }
1525 spin_unlock(&dq_data_lock);
1526
1527out_unlock:
1528 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1529out:
1530 return;
1531}
1532EXPORT_SYMBOL(dquot_release_reserved_space);
1533
1534/*
1535 * This operation can block, but only after everything is updated 1566 * This operation can block, but only after everything is updated
1536 */ 1567 */
1537int dquot_free_space(struct inode *inode, qsize_t number) 1568int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
1538{ 1569{
1539 unsigned int cnt; 1570 unsigned int cnt;
1540 char warntype[MAXQUOTAS]; 1571 char warntype[MAXQUOTAS];
@@ -1543,7 +1574,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
1543 * re-enter the quota code and are already holding the mutex */ 1574 * re-enter the quota code and are already holding the mutex */
1544 if (IS_NOQUOTA(inode)) { 1575 if (IS_NOQUOTA(inode)) {
1545out_sub: 1576out_sub:
1546 inode_sub_bytes(inode, number); 1577 inode_decr_space(inode, number, reserve);
1547 return QUOTA_OK; 1578 return QUOTA_OK;
1548 } 1579 }
1549 1580
@@ -1558,21 +1589,40 @@ out_sub:
1558 if (!inode->i_dquot[cnt]) 1589 if (!inode->i_dquot[cnt])
1559 continue; 1590 continue;
1560 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); 1591 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1561 dquot_decr_space(inode->i_dquot[cnt], number); 1592 if (reserve)
1593 dquot_free_reserved_space(inode->i_dquot[cnt], number);
1594 else
1595 dquot_decr_space(inode->i_dquot[cnt], number);
1562 } 1596 }
1563 inode_sub_bytes(inode, number); 1597 inode_decr_space(inode, number, reserve);
1564 spin_unlock(&dq_data_lock); 1598 spin_unlock(&dq_data_lock);
1565 /* Dirtify all the dquots - this can block when journalling */ 1599
1566 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1600 if (reserve)
1567 if (inode->i_dquot[cnt]) 1601 goto out_unlock;
1568 mark_dquot_dirty(inode->i_dquot[cnt]); 1602 mark_all_dquot_dirty(inode->i_dquot);
1603out_unlock:
1569 flush_warnings(inode->i_dquot, warntype); 1604 flush_warnings(inode->i_dquot, warntype);
1570 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1605 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1571 return QUOTA_OK; 1606 return QUOTA_OK;
1572} 1607}
1608
1609int dquot_free_space(struct inode *inode, qsize_t number)
1610{
1611 return __dquot_free_space(inode, number, 0);
1612}
1573EXPORT_SYMBOL(dquot_free_space); 1613EXPORT_SYMBOL(dquot_free_space);
1574 1614
1575/* 1615/*
1616 * Release reserved quota space
1617 */
1618void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1619{
1620 __dquot_free_space(inode, number, 1);
1621
1622}
1623EXPORT_SYMBOL(dquot_release_reserved_space);
1624
1625/*
1576 * This operation can block, but only after everything is updated 1626 * This operation can block, but only after everything is updated
1577 */ 1627 */
1578int dquot_free_inode(const struct inode *inode, qsize_t number) 1628int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1599,10 +1649,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
1599 dquot_decr_inodes(inode->i_dquot[cnt], number); 1649 dquot_decr_inodes(inode->i_dquot[cnt], number);
1600 } 1650 }
1601 spin_unlock(&dq_data_lock); 1651 spin_unlock(&dq_data_lock);
1602 /* Dirtify all the dquots - this can block when journalling */ 1652 mark_all_dquot_dirty(inode->i_dquot);
1603 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1604 if (inode->i_dquot[cnt])
1605 mark_dquot_dirty(inode->i_dquot[cnt]);
1606 flush_warnings(inode->i_dquot, warntype); 1653 flush_warnings(inode->i_dquot, warntype);
1607 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1654 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1608 return QUOTA_OK; 1655 return QUOTA_OK;
@@ -1610,19 +1657,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
1610EXPORT_SYMBOL(dquot_free_inode); 1657EXPORT_SYMBOL(dquot_free_inode);
1611 1658
1612/* 1659/*
1613 * call back function, get reserved quota space from underlying fs
1614 */
1615qsize_t dquot_get_reserved_space(struct inode *inode)
1616{
1617 qsize_t reserved_space = 0;
1618
1619 if (sb_any_quota_active(inode->i_sb) &&
1620 inode->i_sb->dq_op->get_reserved_space)
1621 reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
1622 return reserved_space;
1623}
1624
1625/*
1626 * Transfer the number of inode and blocks from one diskquota to an other. 1660 * Transfer the number of inode and blocks from one diskquota to an other.
1627 * 1661 *
1628 * This operation can block, but only after everything is updated 1662 * This operation can block, but only after everything is updated
@@ -1665,7 +1699,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1665 } 1699 }
1666 spin_lock(&dq_data_lock); 1700 spin_lock(&dq_data_lock);
1667 cur_space = inode_get_bytes(inode); 1701 cur_space = inode_get_bytes(inode);
1668 rsv_space = dquot_get_reserved_space(inode); 1702 rsv_space = inode_get_rsv_space(inode);
1669 space = cur_space + rsv_space; 1703 space = cur_space + rsv_space;
1670 /* Build the transfer_from list and check the limits */ 1704 /* Build the transfer_from list and check the limits */
1671 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1705 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1709,25 +1743,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1709 spin_unlock(&dq_data_lock); 1743 spin_unlock(&dq_data_lock);
1710 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1744 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1711 1745
1712 /* Dirtify all the dquots - this can block when journalling */ 1746 mark_all_dquot_dirty(transfer_from);
1713 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1747 mark_all_dquot_dirty(transfer_to);
1714 if (transfer_from[cnt]) 1748 /* The reference we got is transferred to the inode */
1715 mark_dquot_dirty(transfer_from[cnt]); 1749 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1716 if (transfer_to[cnt]) { 1750 transfer_to[cnt] = NULL;
1717 mark_dquot_dirty(transfer_to[cnt]);
1718 /* The reference we got is transferred to the inode */
1719 transfer_to[cnt] = NULL;
1720 }
1721 }
1722warn_put_all: 1751warn_put_all:
1723 flush_warnings(transfer_to, warntype_to); 1752 flush_warnings(transfer_to, warntype_to);
1724 flush_warnings(transfer_from, warntype_from_inodes); 1753 flush_warnings(transfer_from, warntype_from_inodes);
1725 flush_warnings(transfer_from, warntype_from_space); 1754 flush_warnings(transfer_from, warntype_from_space);
1726put_all: 1755put_all:
1727 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1756 dqput_all(transfer_from);
1728 dqput(transfer_from[cnt]); 1757 dqput_all(transfer_to);
1729 dqput(transfer_to[cnt]);
1730 }
1731 return ret; 1758 return ret;
1732over_quota: 1759over_quota:
1733 spin_unlock(&dq_data_lock); 1760 spin_unlock(&dq_data_lock);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 3dfc23e02135..e3da02f4986f 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -97,8 +97,11 @@ static int v2_read_file_info(struct super_block *sb, int type)
97 unsigned int version; 97 unsigned int version;
98 98
99 if (!v2_read_header(sb, type, &dqhead)) 99 if (!v2_read_header(sb, type, &dqhead))
100 return 0; 100 return -1;
101 version = le32_to_cpu(dqhead.dqh_version); 101 version = le32_to_cpu(dqhead.dqh_version);
102 if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
103 (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
104 return -1;
102 105
103 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
104 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -120,8 +123,8 @@ static int v2_read_file_info(struct super_block *sb, int type)
120 info->dqi_maxilimit = 0xffffffff; 123 info->dqi_maxilimit = 0xffffffff;
121 } else { 124 } else {
122 /* used space is stored as unsigned 64-bit value */ 125 /* used space is stored as unsigned 64-bit value */
123 info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */ 126 info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */
124 info->dqi_maxilimit = 0xffffffffffffffff; 127 info->dqi_maxilimit = 0xffffffffffffffffULL;
125 } 128 }
126 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 129 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
127 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 130 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 32fae4040ebf..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -60,7 +60,7 @@ const struct inode_operations ramfs_file_inode_operations = {
60 */ 60 */
61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
62{ 62{
63 unsigned long npages, xpages, loop, limit; 63 unsigned long npages, xpages, loop;
64 struct page *pages; 64 struct page *pages;
65 unsigned order; 65 unsigned order;
66 void *data; 66 void *data;
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f8..513f431038f9 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
1config REISERFS_FS 1config REISERFS_FS
2 tristate "Reiserfs support" 2 tristate "Reiserfs support"
3 select CRC32 3 select CRC32
4 select FS_JOURNAL_INFO
5 help 4 help
6 Stores not just filenames but the files themselves in a balanced 5 Stores not just filenames but the files themselves in a balanced
7 tree. Uses journalling. 6 tree. Uses journalling.
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 685495707181..65c872761177 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1277 struct reiserfs_bitmap_info *bitmap; 1277 struct reiserfs_bitmap_info *bitmap;
1278 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1278 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1279 1279
1280 /* Avoid lock recursion in fault case */
1281 reiserfs_write_unlock(sb);
1280 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1282 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1283 reiserfs_write_lock(sb);
1281 if (bitmap == NULL) 1284 if (bitmap == NULL)
1282 return -ENOMEM; 1285 return -ENOMEM;
1283 1286
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3a28e7751b3c..2df0f5c7c60b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
31 JOURNAL_PER_BALANCE_CNT * 2 + 31 JOURNAL_PER_BALANCE_CNT * 2 +
32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 struct reiserfs_transaction_handle th; 33 struct reiserfs_transaction_handle th;
34 int depth;
34 int err; 35 int err;
35 36
36 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
37 38
38 reiserfs_write_lock(inode->i_sb); 39 depth = reiserfs_write_lock_once(inode->i_sb);
39 40
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
74 out: 75 out:
75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 76 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
76 inode->i_blocks = 0; 77 inode->i_blocks = 0;
77 reiserfs_write_unlock(inode->i_sb); 78 reiserfs_write_unlock_once(inode->i_sb, depth);
78} 79}
79 80
80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 81static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -1496,9 +1497,11 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1496 1497
1497 args.objectid = key->on_disk_key.k_objectid; 1498 args.objectid = key->on_disk_key.k_objectid;
1498 args.dirid = key->on_disk_key.k_dir_id; 1499 args.dirid = key->on_disk_key.k_dir_id;
1500 reiserfs_write_unlock(s);
1499 inode = iget5_locked(s, key->on_disk_key.k_objectid, 1501 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1500 reiserfs_find_actor, reiserfs_init_locked_inode, 1502 reiserfs_find_actor, reiserfs_init_locked_inode,
1501 (void *)(&args)); 1503 (void *)(&args));
1504 reiserfs_write_lock(s);
1502 if (!inode) 1505 if (!inode)
1503 return ERR_PTR(-ENOMEM); 1506 return ERR_PTR(-ENOMEM);
1504 1507
@@ -2538,6 +2541,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2538 return reiserfs_write_full_page(page, wbc); 2541 return reiserfs_write_full_page(page, wbc);
2539} 2542}
2540 2543
2544static void reiserfs_truncate_failed_write(struct inode *inode)
2545{
2546 truncate_inode_pages(inode->i_mapping, inode->i_size);
2547 reiserfs_truncate_file(inode, 0);
2548}
2549
2541static int reiserfs_write_begin(struct file *file, 2550static int reiserfs_write_begin(struct file *file,
2542 struct address_space *mapping, 2551 struct address_space *mapping,
2543 loff_t pos, unsigned len, unsigned flags, 2552 loff_t pos, unsigned len, unsigned flags,
@@ -2604,6 +2613,8 @@ static int reiserfs_write_begin(struct file *file,
2604 if (ret) { 2613 if (ret) {
2605 unlock_page(page); 2614 unlock_page(page);
2606 page_cache_release(page); 2615 page_cache_release(page);
2616 /* Truncate allocated blocks */
2617 reiserfs_truncate_failed_write(inode);
2607 } 2618 }
2608 return ret; 2619 return ret;
2609} 2620}
@@ -2701,9 +2712,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2701 ** transaction tracking stuff when the size changes. So, we have 2712 ** transaction tracking stuff when the size changes. So, we have
2702 ** to do the i_size updates here. 2713 ** to do the i_size updates here.
2703 */ 2714 */
2704 pos += copied; 2715 if (pos + copied > inode->i_size) {
2705
2706 if (pos > inode->i_size) {
2707 struct reiserfs_transaction_handle myth; 2716 struct reiserfs_transaction_handle myth;
2708 lock_depth = reiserfs_write_lock_once(inode->i_sb); 2717 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2709 locked = true; 2718 locked = true;
@@ -2721,7 +2730,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2721 goto journal_error; 2730 goto journal_error;
2722 2731
2723 reiserfs_update_inode_transaction(inode); 2732 reiserfs_update_inode_transaction(inode);
2724 inode->i_size = pos; 2733 inode->i_size = pos + copied;
2725 /* 2734 /*
2726 * this will just nest into our transaction. It's important 2735 * this will just nest into our transaction. It's important
2727 * to use mark_inode_dirty so the inode gets pushed around on the 2736 * to use mark_inode_dirty so the inode gets pushed around on the
@@ -2751,6 +2760,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2751 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 2760 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2752 unlock_page(page); 2761 unlock_page(page);
2753 page_cache_release(page); 2762 page_cache_release(page);
2763
2764 if (pos + len > inode->i_size)
2765 reiserfs_truncate_failed_write(inode);
2766
2754 return ret == 0 ? copied : ret; 2767 return ret == 0 ? copied : ret;
2755 2768
2756 journal_error: 2769 journal_error:
@@ -3051,13 +3064,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3051int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3064int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3052{ 3065{
3053 struct inode *inode = dentry->d_inode; 3066 struct inode *inode = dentry->d_inode;
3054 int error;
3055 unsigned int ia_valid; 3067 unsigned int ia_valid;
3068 int depth;
3069 int error;
3056 3070
3057 /* must be turned off for recursive notify_change calls */ 3071 /* must be turned off for recursive notify_change calls */
3058 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3072 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3059 3073
3060 reiserfs_write_lock(inode->i_sb); 3074 depth = reiserfs_write_lock_once(inode->i_sb);
3061 if (attr->ia_valid & ATTR_SIZE) { 3075 if (attr->ia_valid & ATTR_SIZE) {
3062 /* version 2 items will be caught by the s_maxbytes check 3076 /* version 2 items will be caught by the s_maxbytes check
3063 ** done for us in vmtruncate 3077 ** done for us in vmtruncate
@@ -3138,8 +3152,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3138 journal_end(&th, inode->i_sb, jbegin_count); 3152 journal_end(&th, inode->i_sb, jbegin_count);
3139 } 3153 }
3140 } 3154 }
3141 if (!error) 3155 if (!error) {
3156 /*
3157 * Relax the lock here, as it might truncate the
3158 * inode pages and wait for inode pages locks.
3159 * To release such page lock, the owner needs the
3160 * reiserfs lock
3161 */
3162 reiserfs_write_unlock_once(inode->i_sb, depth);
3142 error = inode_setattr(inode, attr); 3163 error = inode_setattr(inode, attr);
3164 depth = reiserfs_write_lock_once(inode->i_sb);
3165 }
3143 } 3166 }
3144 3167
3145 if (!error && reiserfs_posixacl(inode->i_sb)) { 3168 if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3148,7 +3171,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3148 } 3171 }
3149 3172
3150 out: 3173 out:
3151 reiserfs_write_unlock(inode->i_sb); 3174 reiserfs_write_unlock_once(inode->i_sb, depth);
3175
3152 return error; 3176 return error;
3153} 3177}
3154 3178
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index ace77451ceb1..f53505de0712 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -104,9 +104,10 @@ setflags_out:
104 err = put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break; 105 break;
106 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
107 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode)) {
108 err = -EPERM; 108 err = -EPERM;
109 break; 109 break;
110 }
110 err = mnt_want_write(filp->f_path.mnt); 111 err = mnt_want_write(filp->f_path.mnt);
111 if (err) 112 if (err)
112 break; 113 break;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2f8a7e7b8dab..ba98546fabbd 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
2009 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
2010 commit_wq = NULL; 2010 commit_wq = NULL;
2011 } 2011 }
2012 reiserfs_write_lock(sb);
2013 2012
2014 free_journal_ram(sb); 2013 free_journal_ram(sb);
2015 2014
2015 reiserfs_write_lock(sb);
2016
2016 return 0; 2017 return 0;
2017} 2018}
2018 2019
@@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2758 struct reiserfs_journal *journal; 2759 struct reiserfs_journal *journal;
2759 struct reiserfs_journal_list *jl; 2760 struct reiserfs_journal_list *jl;
2760 char b[BDEVNAME_SIZE]; 2761 char b[BDEVNAME_SIZE];
2762 int ret;
2761 2763
2764 /*
2765 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2766 * dependency inversion warnings.
2767 */
2768 reiserfs_write_unlock(sb);
2762 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2769 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2763 if (!journal) { 2770 if (!journal) {
2764 reiserfs_warning(sb, "journal-1256", 2771 reiserfs_warning(sb, "journal-1256",
2765 "unable to get memory for journal structure"); 2772 "unable to get memory for journal structure");
2773 reiserfs_write_lock(sb);
2766 return 1; 2774 return 1;
2767 } 2775 }
2768 memset(journal, 0, sizeof(struct reiserfs_journal)); 2776 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2771 INIT_LIST_HEAD(&journal->j_working_list); 2779 INIT_LIST_HEAD(&journal->j_working_list);
2772 INIT_LIST_HEAD(&journal->j_journal_list); 2780 INIT_LIST_HEAD(&journal->j_journal_list);
2773 journal->j_persistent_trans = 0; 2781 journal->j_persistent_trans = 0;
2774 if (reiserfs_allocate_list_bitmaps(sb, 2782 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2775 journal->j_list_bitmap, 2783 reiserfs_bmap_count(sb));
2776 reiserfs_bmap_count(sb))) 2784 reiserfs_write_lock(sb);
2785 if (ret)
2777 goto free_and_return; 2786 goto free_and_return;
2787
2778 allocate_bitmap_nodes(sb); 2788 allocate_bitmap_nodes(sb);
2779 2789
2780 /* reserved for journal area support */ 2790 /* reserved for journal area support */
@@ -2903,7 +2913,9 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2903 journal->j_mount_id = 10; 2913 journal->j_mount_id = 10;
2904 journal->j_state = 0; 2914 journal->j_state = 0;
2905 atomic_set(&(journal->j_jlock), 0); 2915 atomic_set(&(journal->j_jlock), 0);
2916 reiserfs_write_unlock(sb);
2906 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2917 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2918 reiserfs_write_lock(sb);
2907 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2919 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2908 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2920 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2909 journal->j_cnode_used = 0; 2921 journal->j_cnode_used = 0;
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
86 reiserfs_panic(sb, "%s called without kernel lock held %d", 86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller); 87 caller);
88} 88}
89
90#ifdef CONFIG_REISERFS_CHECK
91void reiserfs_lock_check_recursive(struct super_block *sb)
92{
93 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
94
95 WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
96}
97#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e296ff72a6cc..9d4dcf0b07cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
921 struct reiserfs_transaction_handle th; 921 struct reiserfs_transaction_handle th;
922 int jbegin_count; 922 int jbegin_count;
923 unsigned long savelink; 923 unsigned long savelink;
924 int depth;
924 925
925 inode = dentry->d_inode; 926 inode = dentry->d_inode;
926 927
@@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
932 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 933 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
933 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 934 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
934 935
935 reiserfs_write_lock(dir->i_sb); 936 depth = reiserfs_write_lock_once(dir->i_sb);
936 retval = journal_begin(&th, dir->i_sb, jbegin_count); 937 retval = journal_begin(&th, dir->i_sb, jbegin_count);
937 if (retval) 938 if (retval)
938 goto out_unlink; 939 goto out_unlink;
@@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
993 994
994 retval = journal_end(&th, dir->i_sb, jbegin_count); 995 retval = journal_end(&th, dir->i_sb, jbegin_count);
995 reiserfs_check_path(&path); 996 reiserfs_check_path(&path);
996 reiserfs_write_unlock(dir->i_sb); 997 reiserfs_write_unlock_once(dir->i_sb, depth);
997 return retval; 998 return retval;
998 999
999 end_unlink: 1000 end_unlink:
@@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
1003 if (err) 1004 if (err)
1004 retval = err; 1005 retval = err;
1005 out_unlink: 1006 out_unlink:
1006 reiserfs_write_unlock(dir->i_sb); 1007 reiserfs_write_unlock_once(dir->i_sb, depth);
1007 return retval; 1008 return retval;
1008} 1009}
1009 1010
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..81f09fab8ae4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
48#include <net/checksum.h> 48#include <net/checksum.h>
49#include <linux/stat.h> 49#include <linux/stat.h>
50#include <linux/quotaops.h> 50#include <linux/quotaops.h>
51#include <linux/security.h>
51 52
52#define PRIVROOT_NAME ".reiserfs_priv" 53#define PRIVROOT_NAME ".reiserfs_priv"
53#define XAROOT_NAME "xattrs" 54#define XAROOT_NAME "xattrs"
@@ -82,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
82 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
83 vfs_dq_init(dir); 84 vfs_dq_init(dir);
84 85
85 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb);
86 error = dir->i_op->unlink(dir, dentry); 88 error = dir->i_op->unlink(dir, dentry);
87 mutex_unlock(&dentry->d_inode->i_mutex); 89 mutex_unlock(&dentry->d_inode->i_mutex);
88 90
@@ -97,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
97 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 99 BUG_ON(!mutex_is_locked(&dir->i_mutex));
98 vfs_dq_init(dir); 100 vfs_dq_init(dir);
99 101
100 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb);
101 dentry_unhash(dentry); 104 dentry_unhash(dentry);
102 error = dir->i_op->rmdir(dir, dentry); 105 error = dir->i_op->rmdir(dir, dentry);
103 if (!error) 106 if (!error)
@@ -234,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
234 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) 237 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
235 return 0; 238 return 0;
236 239
240 reiserfs_write_unlock(inode->i_sb);
237 dir = open_xa_dir(inode, XATTR_REPLACE); 241 dir = open_xa_dir(inode, XATTR_REPLACE);
238 if (IS_ERR(dir)) { 242 if (IS_ERR(dir)) {
239 err = PTR_ERR(dir); 243 err = PTR_ERR(dir);
244 reiserfs_write_lock(inode->i_sb);
240 goto out; 245 goto out;
241 } else if (!dir->d_inode) { 246 } else if (!dir->d_inode) {
242 err = 0; 247 err = 0;
248 reiserfs_write_lock(inode->i_sb);
243 goto out_dir; 249 goto out_dir;
244 } 250 }
245 251
246 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); 252 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
253
254 reiserfs_write_lock(inode->i_sb);
255
247 buf.xadir = dir; 256 buf.xadir = dir;
248 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); 257 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
249 while ((err == 0 || err == -ENOSPC) && buf.count) { 258 while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -282,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
282 err = journal_begin(&th, inode->i_sb, blocks); 291 err = journal_begin(&th, inode->i_sb, blocks);
283 if (!err) { 292 if (!err) {
284 int jerror; 293 int jerror;
285 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 294 reiserfs_mutex_lock_nested_safe(
286 I_MUTEX_XATTR); 295 &dir->d_parent->d_inode->i_mutex,
296 I_MUTEX_XATTR, inode->i_sb);
287 err = action(dir, data); 297 err = action(dir, data);
288 jerror = journal_end(&th, inode->i_sb, blocks); 298 jerror = journal_end(&th, inode->i_sb, blocks);
289 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 299 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -442,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
442 } 452 }
443 453
444 if (dentry->d_inode) { 454 if (dentry->d_inode) {
455 reiserfs_write_lock(inode->i_sb);
445 err = xattr_unlink(xadir->d_inode, dentry); 456 err = xattr_unlink(xadir->d_inode, dentry);
457 reiserfs_write_unlock(inode->i_sb);
446 update_ctime(inode); 458 update_ctime(inode);
447 } 459 }
448 460
@@ -476,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
476 if (get_inode_sd_version(inode) == STAT_DATA_V1) 488 if (get_inode_sd_version(inode) == STAT_DATA_V1)
477 return -EOPNOTSUPP; 489 return -EOPNOTSUPP;
478 490
479 if (!buffer) 491 reiserfs_write_unlock(inode->i_sb);
480 return lookup_and_delete_xattr(inode, name); 492
493 if (!buffer) {
494 err = lookup_and_delete_xattr(inode, name);
495 reiserfs_write_lock(inode->i_sb);
496 return err;
497 }
481 498
482 dentry = xattr_lookup(inode, name, flags); 499 dentry = xattr_lookup(inode, name, flags);
483 if (IS_ERR(dentry)) 500 if (IS_ERR(dentry)) {
501 reiserfs_write_lock(inode->i_sb);
484 return PTR_ERR(dentry); 502 return PTR_ERR(dentry);
503 }
485 504
486 down_write(&REISERFS_I(inode)->i_xattr_sem); 505 down_write(&REISERFS_I(inode)->i_xattr_sem);
487 506
507 reiserfs_write_lock(inode->i_sb);
508
488 xahash = xattr_hash(buffer, buffer_size); 509 xahash = xattr_hash(buffer, buffer_size);
489 while (buffer_pos < buffer_size || buffer_pos == 0) { 510 while (buffer_pos < buffer_size || buffer_pos == 0) {
490 size_t chunk; 511 size_t chunk;
@@ -539,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
539 .ia_size = buffer_size, 560 .ia_size = buffer_size,
540 .ia_valid = ATTR_SIZE | ATTR_CTIME, 561 .ia_valid = ATTR_SIZE | ATTR_CTIME,
541 }; 562 };
563
564 reiserfs_write_unlock(inode->i_sb);
542 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 565 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
543 down_write(&dentry->d_inode->i_alloc_sem); 566 down_write(&dentry->d_inode->i_alloc_sem);
567 reiserfs_write_lock(inode->i_sb);
568
544 err = reiserfs_setattr(dentry, &newattrs); 569 err = reiserfs_setattr(dentry, &newattrs);
545 up_write(&dentry->d_inode->i_alloc_sem); 570 up_write(&dentry->d_inode->i_alloc_sem);
546 mutex_unlock(&dentry->d_inode->i_mutex); 571 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -726,15 +751,14 @@ ssize_t
726reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 751reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
727 size_t size) 752 size_t size)
728{ 753{
729 struct inode *inode = dentry->d_inode;
730 struct xattr_handler *handler; 754 struct xattr_handler *handler;
731 755
732 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 756 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
733 757
734 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 758 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
735 return -EOPNOTSUPP; 759 return -EOPNOTSUPP;
736 760
737 return handler->get(inode, name, buffer, size); 761 return handler->get(dentry, name, buffer, size, handler->flags);
738} 762}
739 763
740/* 764/*
@@ -746,15 +770,14 @@ int
746reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 770reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
747 size_t size, int flags) 771 size_t size, int flags)
748{ 772{
749 struct inode *inode = dentry->d_inode;
750 struct xattr_handler *handler; 773 struct xattr_handler *handler;
751 774
752 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 775 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
753 776
754 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 777 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
755 return -EOPNOTSUPP; 778 return -EOPNOTSUPP;
756 779
757 return handler->set(inode, name, value, size, flags); 780 return handler->set(dentry, name, value, size, flags, handler->flags);
758} 781}
759 782
760/* 783/*
@@ -764,21 +787,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
764 */ 787 */
765int reiserfs_removexattr(struct dentry *dentry, const char *name) 788int reiserfs_removexattr(struct dentry *dentry, const char *name)
766{ 789{
767 struct inode *inode = dentry->d_inode;
768 struct xattr_handler *handler; 790 struct xattr_handler *handler;
769 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 791 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
770 792
771 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 793 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
772 return -EOPNOTSUPP; 794 return -EOPNOTSUPP;
773 795
774 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 796 return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
775} 797}
776 798
777struct listxattr_buf { 799struct listxattr_buf {
778 size_t size; 800 size_t size;
779 size_t pos; 801 size_t pos;
780 char *buf; 802 char *buf;
781 struct inode *inode; 803 struct dentry *dentry;
782}; 804};
783 805
784static int listxattr_filler(void *buf, const char *name, int namelen, 806static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +811,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
789 if (name[0] != '.' || 811 if (name[0] != '.' ||
790 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 812 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
791 struct xattr_handler *handler; 813 struct xattr_handler *handler;
792 handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr, 814 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
793 name); 815 name);
794 if (!handler) /* Unsupported xattr name */ 816 if (!handler) /* Unsupported xattr name */
795 return 0; 817 return 0;
796 if (b->buf) { 818 if (b->buf) {
797 size = handler->list(b->inode, b->buf + b->pos, 819 size = handler->list(b->dentry, b->buf + b->pos,
798 b->size, name, namelen); 820 b->size, name, namelen,
821 handler->flags);
799 if (size > b->size) 822 if (size > b->size)
800 return -ERANGE; 823 return -ERANGE;
801 } else { 824 } else {
802 size = handler->list(b->inode, NULL, 0, name, namelen); 825 size = handler->list(b->dentry, NULL, 0, name,
826 namelen, handler->flags);
803 } 827 }
804 828
805 b->pos += size; 829 b->pos += size;
@@ -820,7 +844,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
820 int err = 0; 844 int err = 0;
821 loff_t pos = 0; 845 loff_t pos = 0;
822 struct listxattr_buf buf = { 846 struct listxattr_buf buf = {
823 .inode = dentry->d_inode, 847 .dentry = dentry,
824 .buf = buffer, 848 .buf = buffer,
825 .size = buffer ? size : 0, 849 .size = buffer ? size : 0,
826 }; 850 };
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a279..dd20a7883f0f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
15 struct posix_acl *acl); 15 struct posix_acl *acl);
16 16
17static int 17static int
18xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 18posix_acl_set(struct dentry *dentry, const char *name, const void *value,
19 size_t size, int flags, int type)
19{ 20{
21 struct inode *inode = dentry->d_inode;
20 struct posix_acl *acl; 22 struct posix_acl *acl;
21 int error, error2; 23 int error, error2;
22 struct reiserfs_transaction_handle th; 24 struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
60} 62}
61 63
62static int 64static int
63xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 65posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
66 size_t size, int type)
64{ 67{
65 struct posix_acl *acl; 68 struct posix_acl *acl;
66 int error; 69 int error;
67 70
68 if (!reiserfs_posixacl(inode->i_sb)) 71 if (!reiserfs_posixacl(dentry->d_sb))
69 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
70 73
71 acl = reiserfs_get_acl(inode, type); 74 acl = reiserfs_get_acl(dentry->d_inode, type);
72 if (IS_ERR(acl)) 75 if (IS_ERR(acl))
73 return PTR_ERR(acl); 76 return PTR_ERR(acl);
74 if (acl == NULL) 77 if (acl == NULL)
@@ -452,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
452 return 0; 455 return 0;
453 } 456 }
454 457
458 reiserfs_write_unlock(inode->i_sb);
455 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 459 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
460 reiserfs_write_lock(inode->i_sb);
456 if (!acl) 461 if (!acl)
457 return 0; 462 return 0;
458 if (IS_ERR(acl)) 463 if (IS_ERR(acl))
@@ -482,30 +487,12 @@ int reiserfs_acl_chmod(struct inode *inode)
482 return error; 487 return error;
483} 488}
484 489
485static int 490static size_t posix_acl_access_list(struct dentry *dentry, char *list,
486posix_acl_access_get(struct inode *inode, const char *name,
487 void *buffer, size_t size)
488{
489 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
490 return -EINVAL;
491 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
492}
493
494static int
495posix_acl_access_set(struct inode *inode, const char *name,
496 const void *value, size_t size, int flags)
497{
498 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
499 return -EINVAL;
500 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
501}
502
503static size_t posix_acl_access_list(struct inode *inode, char *list,
504 size_t list_size, const char *name, 491 size_t list_size, const char *name,
505 size_t name_len) 492 size_t name_len, int type)
506{ 493{
507 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 494 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
508 if (!reiserfs_posixacl(inode->i_sb)) 495 if (!reiserfs_posixacl(dentry->d_sb))
509 return 0; 496 return 0;
510 if (list && size <= list_size) 497 if (list && size <= list_size)
511 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 498 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +501,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
514 501
515struct xattr_handler reiserfs_posix_acl_access_handler = { 502struct xattr_handler reiserfs_posix_acl_access_handler = {
516 .prefix = POSIX_ACL_XATTR_ACCESS, 503 .prefix = POSIX_ACL_XATTR_ACCESS,
517 .get = posix_acl_access_get, 504 .flags = ACL_TYPE_ACCESS,
518 .set = posix_acl_access_set, 505 .get = posix_acl_get,
506 .set = posix_acl_set,
519 .list = posix_acl_access_list, 507 .list = posix_acl_access_list,
520}; 508};
521 509
522static int 510static size_t posix_acl_default_list(struct dentry *dentry, char *list,
523posix_acl_default_get(struct inode *inode, const char *name,
524 void *buffer, size_t size)
525{
526 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
527 return -EINVAL;
528 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
529}
530
531static int
532posix_acl_default_set(struct inode *inode, const char *name,
533 const void *value, size_t size, int flags)
534{
535 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
536 return -EINVAL;
537 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
538}
539
540static size_t posix_acl_default_list(struct inode *inode, char *list,
541 size_t list_size, const char *name, 511 size_t list_size, const char *name,
542 size_t name_len) 512 size_t name_len, int type)
543{ 513{
544 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 514 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
545 if (!reiserfs_posixacl(inode->i_sb)) 515 if (!reiserfs_posixacl(dentry->d_sb))
546 return 0; 516 return 0;
547 if (list && size <= list_size) 517 if (list && size <= list_size)
548 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 518 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +521,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
551 521
552struct xattr_handler reiserfs_posix_acl_default_handler = { 522struct xattr_handler reiserfs_posix_acl_default_handler = {
553 .prefix = POSIX_ACL_XATTR_DEFAULT, 523 .prefix = POSIX_ACL_XATTR_DEFAULT,
554 .get = posix_acl_default_get, 524 .flags = ACL_TYPE_DEFAULT,
555 .set = posix_acl_default_set, 525 .get = posix_acl_get,
526 .set = posix_acl_set,
556 .list = posix_acl_default_list, 527 .list = posix_acl_default_list,
557}; 528};
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f6..d8b5bfcbdd30 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11security_get(struct inode *inode, const char *name, void *buffer, size_t size) 11security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (IS_PRIVATE(inode)) 17 if (IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23security_set(struct inode *inode, const char *name, const void *buffer, 24security_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (IS_PRIVATE(inode)) 30 if (IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t security_list(struct inode *inode, char *list, size_t list_len, 36static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
36 const char *name, size_t namelen) 37 const char *name, size_t namelen, int handler_flags)
37{ 38{
38 const size_t len = namelen + 1; 39 const size_t len = namelen + 1;
39 40
40 if (IS_PRIVATE(inode)) 41 if (IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_len) { 44 if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e2..5b08aaca3daf 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 17 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23trusted_set(struct inode *inode, const char *name, const void *buffer, 24trusted_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 30 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t trusted_list(struct inode *inode, char *list, size_t list_size, 36static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
36 const char *name, size_t name_len) 37 const char *name, size_t name_len, int handler_flags)
37{ 38{
38 const size_t len = name_len + 1; 39 const size_t len = name_len + 1;
39 40
40 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 41 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_size) { 44 if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3db..75d59c49b911 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
10user_get(struct inode *inode, const char *name, void *buffer, size_t size) 10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
11 int handler_flags)
11{ 12{
12 13
13 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 if (!reiserfs_xattrs_user(inode->i_sb)) 16 if (!reiserfs_xattrs_user(dentry->d_sb))
16 return -EOPNOTSUPP; 17 return -EOPNOTSUPP;
17 return reiserfs_xattr_get(inode, name, buffer, size); 18 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
18} 19}
19 20
20static int 21static int
21user_set(struct inode *inode, const char *name, const void *buffer, 22user_set(struct dentry *dentry, const char *name, const void *buffer,
22 size_t size, int flags) 23 size_t size, int flags, int handler_flags)
23{ 24{
24 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 25 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
25 return -EINVAL; 26 return -EINVAL;
26 27
27 if (!reiserfs_xattrs_user(inode->i_sb)) 28 if (!reiserfs_xattrs_user(dentry->d_sb))
28 return -EOPNOTSUPP; 29 return -EOPNOTSUPP;
29 return reiserfs_xattr_set(inode, name, buffer, size, flags); 30 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
30} 31}
31 32
32static size_t user_list(struct inode *inode, char *list, size_t list_size, 33static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
33 const char *name, size_t name_len) 34 const char *name, size_t name_len, int handler_flags)
34{ 35{
35 const size_t len = name_len + 1; 36 const size_t len = name_len + 1;
36 37
37 if (!reiserfs_xattrs_user(inode->i_sb)) 38 if (!reiserfs_xattrs_user(dentry->d_sb))
38 return 0; 39 return 0;
39 if (list && len <= list_size) { 40 if (list && len <= list_size) {
40 memcpy(list, name, name_len); 41 memcpy(list, name, name_len);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c117fa80d1e9..42d213546894 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -544,6 +544,7 @@ error:
544error_rsb_inval: 544error_rsb_inval:
545 ret = -EINVAL; 545 ret = -EINVAL;
546error_rsb: 546error_rsb:
547 kfree(rsb);
547 return ret; 548 return ret;
548} 549}
549 550
diff --git a/fs/signalfd.c b/fs/signalfd.c
index b07565c94386..1dabe4ee02fe 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -236,7 +236,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
236 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
237 */ 237 */
238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK)); 239 O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
240 if (ufd < 0) 240 if (ufd < 0)
241 kfree(ctx); 241 kfree(ctx);
242 } else { 242 } else {
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
7 * This function cannot be inlined since i_size_{read,write} is rather 7 * This function cannot be inlined since i_size_{read,write} is rather
8 * heavy-weight on 32-bit systems 8 * heavy-weight on 32-bit systems
9 */ 9 */
10void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) 10void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
11{ 11{
12 i_size_write(dst, i_size_read((struct inode *)src)); 12 loff_t i_size;
13 dst->i_blocks = src->i_blocks; 13 blkcnt_t i_blocks;
14
15 /*
16 * i_size_read() includes its own seqlocking and protection from
17 * preemption (see include/linux/fs.h): we need nothing extra for
18 * that here, and prefer to avoid nesting locks than attempt to keep
19 * i_size and i_blocks in sync together.
20 */
21 i_size = i_size_read(src);
22
23 /*
24 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
25 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
26 * though stat's generic_fillattr() doesn't bother, and we won't be
27 * applying quotas (where i_blocks does become important) at the
28 * upper level.
29 *
30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */
37 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock);
39 i_blocks = src->i_blocks;
40 if (sizeof(i_blocks) > sizeof(long))
41 spin_unlock(&src->i_lock);
42
43 /*
44 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
45 * fsstack_copy_inode_size() to hold some lock around
46 * i_size_write(), otherwise i_size_read() may spin forever (see
47 * include/linux/fs.h). We don't necessarily hold i_mutex when this
48 * is called, so take i_lock for that case.
49 *
50 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
51 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
52 * for that case too, and do both at once by combining the tests.
53 *
54 * There is none of this locking overhead in the 64-bit case.
55 */
56 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
57 spin_lock(&dst->i_lock);
58 i_size_write(dst, i_size);
59 dst->i_blocks = i_blocks;
60 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
61 spin_unlock(&dst->i_lock);
14} 62}
15EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); 63EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
16 64
17/* copy all attributes; get_nlinks is optional way to override the i_nlink 65/* copy all attributes */
18 * copying 66void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
19 */
20void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
21 int (*get_nlinks)(struct inode *))
22{ 67{
23 dest->i_mode = src->i_mode; 68 dest->i_mode = src->i_mode;
24 dest->i_uid = src->i_uid; 69 dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
29 dest->i_ctime = src->i_ctime; 74 dest->i_ctime = src->i_ctime;
30 dest->i_blkbits = src->i_blkbits; 75 dest->i_blkbits = src->i_blkbits;
31 dest->i_flags = src->i_flags; 76 dest->i_flags = src->i_flags;
32 77 dest->i_nlink = src->i_nlink;
33 /*
34 * Update the nlinks AFTER updating the above fields, because the
35 * get_links callback may depend on them.
36 */
37 if (!get_nlinks)
38 dest->i_nlink = src->i_nlink;
39 else
40 dest->i_nlink = (*get_nlinks)(dest);
41} 78}
42EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 79EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8b..c4ecd52c5737 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
401} 401}
402#endif /* __ARCH_WANT_STAT64 */ 402#endif /* __ARCH_WANT_STAT64 */
403 403
404void inode_add_bytes(struct inode *inode, loff_t bytes) 404/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
405void __inode_add_bytes(struct inode *inode, loff_t bytes)
405{ 406{
406 spin_lock(&inode->i_lock);
407 inode->i_blocks += bytes >> 9; 407 inode->i_blocks += bytes >> 9;
408 bytes &= 511; 408 bytes &= 511;
409 inode->i_bytes += bytes; 409 inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
411 inode->i_blocks++; 411 inode->i_blocks++;
412 inode->i_bytes -= 512; 412 inode->i_bytes -= 512;
413 } 413 }
414}
415
416void inode_add_bytes(struct inode *inode, loff_t bytes)
417{
418 spin_lock(&inode->i_lock);
419 __inode_add_bytes(inode, bytes);
414 spin_unlock(&inode->i_lock); 420 spin_unlock(&inode->i_lock);
415} 421}
416 422
diff --git a/fs/super.c b/fs/super.c
index 19eb70b374bc..aff046b0fe78 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -901,8 +901,9 @@ int get_sb_single(struct file_system_type *fs_type,
901 return error; 901 return error;
902 } 902 }
903 s->s_flags |= MS_ACTIVE; 903 s->s_flags |= MS_ACTIVE;
904 } else {
905 do_remount_sb(s, flags, data, 0);
904 } 906 }
905 do_remount_sb(s, flags, data, 0);
906 simple_set_mnt(mnt, s); 907 simple_set_mnt(mnt, s);
907 return 0; 908 return 0;
908} 909}
diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
355{ 355{
356 int ret; 356 int ret;
357 struct file *file; 357 struct file *file;
358 struct address_space *mapping;
358 loff_t endbyte; /* inclusive */ 359 loff_t endbyte; /* inclusive */
359 int fput_needed; 360 int fput_needed;
360 umode_t i_mode; 361 umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
405 !S_ISLNK(i_mode)) 406 !S_ISLNK(i_mode))
406 goto out_put; 407 goto out_put;
407 408
408 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); 409 mapping = file->f_mapping;
410 if (!mapping) {
411 ret = -EINVAL;
412 goto out_put;
413 }
414
415 ret = 0;
416 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
417 ret = filemap_fdatawait_range(mapping, offset, endbyte);
418 if (ret < 0)
419 goto out_put;
420 }
421
422 if (flags & SYNC_FILE_RANGE_WRITE) {
423 ret = filemap_fdatawrite_range(mapping, offset, endbyte);
424 if (ret < 0)
425 goto out_put;
426 }
427
428 if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
429 ret = filemap_fdatawait_range(mapping, offset, endbyte);
430
409out_put: 431out_put:
410 fput_light(file, fput_needed); 432 fput_light(file, fput_needed);
411out: 433out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
437} 459}
438SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); 460SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
439#endif 461#endif
440
441/*
442 * `endbyte' is inclusive
443 */
444int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
445 loff_t endbyte, unsigned int flags)
446{
447 int ret;
448
449 if (!mapping) {
450 ret = -EINVAL;
451 goto out;
452 }
453
454 ret = 0;
455 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
456 ret = filemap_fdatawait_range(mapping, offset, endbyte);
457 if (ret < 0)
458 goto out;
459 }
460
461 if (flags & SYNC_FILE_RANGE_WRITE) {
462 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
463 WB_SYNC_ALL);
464 if (ret < 0)
465 goto out;
466 }
467
468 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
469 ret = filemap_fdatawait_range(mapping, offset, endbyte);
470 }
471out:
472 return ret;
473}
474EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10ae..a0a500af24a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
483 * @attr: attribute descriptor. 483 * @attr: attribute descriptor.
484 */ 484 */
485 485
486int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 486int sysfs_create_bin_file(struct kobject *kobj,
487 const struct bin_attribute *attr)
487{ 488{
488 BUG_ON(!kobj || !kobj->sd || !attr); 489 BUG_ON(!kobj || !kobj->sd || !attr);
489 490
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
497 * @attr: attribute descriptor. 498 * @attr: attribute descriptor.
498 */ 499 */
499 500
500void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 501void sysfs_remove_bin_file(struct kobject *kobj,
502 const struct bin_attribute *attr)
501{ 503{
502 sysfs_hash_and_remove(kobj->sd, attr->attr.name); 504 sysfs_hash_and_remove(kobj->sd, attr->attr.name);
503} 505}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f05f2303a8b8..699f371b9f12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -106,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
106 return NULL; 106 return NULL;
107 107
108 t = atomic_cmpxchg(&sd->s_active, v, v + 1); 108 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
109 if (likely(t == v)) 109 if (likely(t == v)) {
110 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
110 return sd; 111 return sd;
112 }
111 if (t < 0) 113 if (t < 0)
112 return NULL; 114 return NULL;
113 115
@@ -130,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
130 if (unlikely(!sd)) 132 if (unlikely(!sd))
131 return; 133 return;
132 134
135 rwsem_release(&sd->dep_map, 1, _RET_IP_);
133 v = atomic_dec_return(&sd->s_active); 136 v = atomic_dec_return(&sd->s_active);
134 if (likely(v != SD_DEACTIVATED_BIAS)) 137 if (likely(v != SD_DEACTIVATED_BIAS))
135 return; 138 return;
@@ -194,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
194 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 197 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
195 sd->s_sibling = (void *)&wait; 198 sd->s_sibling = (void *)&wait;
196 199
200 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
197 /* atomic_add_return() is a mb(), put_active() will always see 201 /* atomic_add_return() is a mb(), put_active() will always see
198 * the updated sd->s_sibling. 202 * the updated sd->s_sibling.
199 */ 203 */
200 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 204 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
201 205
202 if (v != SD_DEACTIVATED_BIAS) 206 if (v != SD_DEACTIVATED_BIAS) {
207 lock_contended(&sd->dep_map, _RET_IP_);
203 wait_for_completion(&wait); 208 wait_for_completion(&wait);
209 }
204 210
205 sd->s_sibling = NULL; 211 sd->s_sibling = NULL;
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
206} 215}
207 216
208static int sysfs_alloc_ino(ino_t *pino) 217static int sysfs_alloc_ino(ino_t *pino)
@@ -345,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
345 354
346 atomic_set(&sd->s_count, 1); 355 atomic_set(&sd->s_count, 1);
347 atomic_set(&sd->s_active, 0); 356 atomic_set(&sd->s_active, 0);
357 sysfs_dirent_init_lockdep(sd);
348 358
349 sd->s_name = name; 359 sd->s_name = name;
350 sd->s_mode = mode; 360 sd->s_mode = mode;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 220b758523ae..6a06a1d1ea7b 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -81,24 +81,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
81 if (!sd_attrs) 81 if (!sd_attrs)
82 return -ENOMEM; 82 return -ENOMEM;
83 sd->s_iattr = sd_attrs; 83 sd->s_iattr = sd_attrs;
84 } else { 84 }
85 /* attributes were changed at least once in past */ 85 /* attributes were changed at least once in past */
86 iattrs = &sd_attrs->ia_iattr; 86 iattrs = &sd_attrs->ia_iattr;
87 87
88 if (ia_valid & ATTR_UID) 88 if (ia_valid & ATTR_UID)
89 iattrs->ia_uid = iattr->ia_uid; 89 iattrs->ia_uid = iattr->ia_uid;
90 if (ia_valid & ATTR_GID) 90 if (ia_valid & ATTR_GID)
91 iattrs->ia_gid = iattr->ia_gid; 91 iattrs->ia_gid = iattr->ia_gid;
92 if (ia_valid & ATTR_ATIME) 92 if (ia_valid & ATTR_ATIME)
93 iattrs->ia_atime = iattr->ia_atime; 93 iattrs->ia_atime = iattr->ia_atime;
94 if (ia_valid & ATTR_MTIME) 94 if (ia_valid & ATTR_MTIME)
95 iattrs->ia_mtime = iattr->ia_mtime; 95 iattrs->ia_mtime = iattr->ia_mtime;
96 if (ia_valid & ATTR_CTIME) 96 if (ia_valid & ATTR_CTIME)
97 iattrs->ia_ctime = iattr->ia_ctime; 97 iattrs->ia_ctime = iattr->ia_ctime;
98 if (ia_valid & ATTR_MODE) { 98 if (ia_valid & ATTR_MODE) {
99 umode_t mode = iattr->ia_mode; 99 umode_t mode = iattr->ia_mode;
100 iattrs->ia_mode = sd->s_mode = mode; 100 iattrs->ia_mode = sd->s_mode = mode;
101 }
102 } 101 }
103 return 0; 102 return 0;
104} 103}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ca52e7b9d8f8..cdd9377a6e06 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12 13
13struct sysfs_open_dirent; 14struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
50struct sysfs_dirent { 51struct sysfs_dirent {
51 atomic_t s_count; 52 atomic_t s_count;
52 atomic_t s_active; 53 atomic_t s_active;
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55 struct lockdep_map dep_map;
56#endif
53 struct sysfs_dirent *s_parent; 57 struct sysfs_dirent *s_parent;
54 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
55 const char *s_name; 59 const char *s_name;
@@ -84,6 +88,17 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
84 return sd->s_flags & SYSFS_TYPE_MASK; 88 return sd->s_flags & SYSFS_TYPE_MASK;
85} 89}
86 90
91#ifdef CONFIG_DEBUG_LOCK_ALLOC
92#define sysfs_dirent_init_lockdep(sd) \
93do { \
94 static struct lock_class_key __key; \
95 \
96 lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
97} while(0)
98#else
99#define sysfs_dirent_init_lockdep(sd) do {} while(0)
100#endif
101
87/* 102/*
88 * Context structure to be used while adding/removing nodes. 103 * Context structure to be used while adding/removing nodes.
89 */ 104 */
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b042bd7034b1..1bfc95ad5f71 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -200,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201 201
202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
203 flags & TFD_SHARED_FCNTL_FLAGS); 203 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
204 if (ufd < 0) 204 if (ufd < 0)
205 kfree(ctx); 205 kfree(ctx);
206 206
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
45 * 45 *
46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
48 * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not 48 * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
49 * set as well. However, UBIFS disables readahead. 49 * set as well. However, UBIFS disables readahead.
50 */ 50 */
51 51
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449fb..46f87e828b48 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) 615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
616{ 616{
617 struct xattr_handler *handler; 617 struct xattr_handler *handler;
618 struct inode *inode = dentry->d_inode;
619 618
620 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 619 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
621 if (!handler) 620 if (!handler)
622 return -EOPNOTSUPP; 621 return -EOPNOTSUPP;
623 return handler->get(inode, name, buffer, size); 622 return handler->get(dentry, name, buffer, size, handler->flags);
624} 623}
625 624
626/* 625/*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
630ssize_t 629ssize_t
631generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 630generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
632{ 631{
633 struct inode *inode = dentry->d_inode; 632 struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
634 struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
635 unsigned int size = 0; 633 unsigned int size = 0;
636 634
637 if (!buffer) { 635 if (!buffer) {
638 for_each_xattr_handler(handlers, handler) 636 for_each_xattr_handler(handlers, handler) {
639 size += handler->list(inode, NULL, 0, NULL, 0); 637 size += handler->list(dentry, NULL, 0, NULL, 0,
638 handler->flags);
639 }
640 } else { 640 } else {
641 char *buf = buffer; 641 char *buf = buffer;
642 642
643 for_each_xattr_handler(handlers, handler) { 643 for_each_xattr_handler(handlers, handler) {
644 size = handler->list(inode, buf, buffer_size, NULL, 0); 644 size = handler->list(dentry, buf, buffer_size,
645 NULL, 0, handler->flags);
645 if (size > buffer_size) 646 if (size > buffer_size)
646 return -ERANGE; 647 return -ERANGE;
647 buf += size; 648 buf += size;
@@ -659,14 +660,13 @@ int
659generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) 660generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
660{ 661{
661 struct xattr_handler *handler; 662 struct xattr_handler *handler;
662 struct inode *inode = dentry->d_inode;
663 663
664 if (size == 0) 664 if (size == 0)
665 value = ""; /* empty EA, do not remove */ 665 value = ""; /* empty EA, do not remove */
666 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(inode, name, value, size, flags); 669 return handler->set(dentry, name, value, size, 0, handler->flags);
670} 670}
671 671
672/* 672/*
@@ -677,12 +677,12 @@ int
677generic_removexattr(struct dentry *dentry, const char *name) 677generic_removexattr(struct dentry *dentry, const char *name)
678{ 678{
679 struct xattr_handler *handler; 679 struct xattr_handler *handler;
680 struct inode *inode = dentry->d_inode;
681 680
682 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 681 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
683 if (!handler) 682 if (!handler)
684 return -EOPNOTSUPP; 683 return -EOPNOTSUPP;
685 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 684 return handler->set(dentry, name, NULL, 0,
685 XATTR_REPLACE, handler->flags);
686} 686}
687 687
688EXPORT_SYMBOL(generic_getxattr); 688EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 69e598b6986f..883ca5ab8af5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -251,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
251 if (mode != inode->i_mode) { 251 if (mode != inode->i_mode) {
252 struct iattr iattr; 252 struct iattr iattr;
253 253
254 iattr.ia_valid = ATTR_MODE; 254 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
255 iattr.ia_mode = mode; 255 iattr.ia_mode = mode;
256 iattr.ia_ctime = current_fs_time(inode->i_sb);
256 257
257 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 258 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
258 } 259 }
@@ -354,37 +355,14 @@ xfs_acl_chmod(struct inode *inode)
354 return error; 355 return error;
355} 356}
356 357
357/*
358 * System xattr handlers.
359 *
360 * Currently Posix ACLs are the only system namespace extended attribute
361 * handlers supported by XFS, so we just implement the handlers here.
362 * If we ever support other system extended attributes this will need
363 * some refactoring.
364 */
365
366static int 358static int
367xfs_decode_acl(const char *name) 359xfs_xattr_acl_get(struct dentry *dentry, const char *name,
368{ 360 void *value, size_t size, int type)
369 if (strcmp(name, "posix_acl_access") == 0)
370 return ACL_TYPE_ACCESS;
371 else if (strcmp(name, "posix_acl_default") == 0)
372 return ACL_TYPE_DEFAULT;
373 return -EINVAL;
374}
375
376static int
377xfs_xattr_system_get(struct inode *inode, const char *name,
378 void *value, size_t size)
379{ 361{
380 struct posix_acl *acl; 362 struct posix_acl *acl;
381 int type, error; 363 int error;
382
383 type = xfs_decode_acl(name);
384 if (type < 0)
385 return type;
386 364
387 acl = xfs_get_acl(inode, type); 365 acl = xfs_get_acl(dentry->d_inode, type);
388 if (IS_ERR(acl)) 366 if (IS_ERR(acl))
389 return PTR_ERR(acl); 367 return PTR_ERR(acl);
390 if (acl == NULL) 368 if (acl == NULL)
@@ -397,15 +375,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
397} 375}
398 376
399static int 377static int
400xfs_xattr_system_set(struct inode *inode, const char *name, 378xfs_xattr_acl_set(struct dentry *dentry, const char *name,
401 const void *value, size_t size, int flags) 379 const void *value, size_t size, int flags, int type)
402{ 380{
381 struct inode *inode = dentry->d_inode;
403 struct posix_acl *acl = NULL; 382 struct posix_acl *acl = NULL;
404 int error = 0, type; 383 int error = 0;
405 384
406 type = xfs_decode_acl(name);
407 if (type < 0)
408 return type;
409 if (flags & XATTR_CREATE) 385 if (flags & XATTR_CREATE)
410 return -EINVAL; 386 return -EINVAL;
411 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 387 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -462,8 +438,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
462 return error; 438 return error;
463} 439}
464 440
465struct xattr_handler xfs_xattr_system_handler = { 441struct xattr_handler xfs_xattr_acl_access_handler = {
466 .prefix = XATTR_SYSTEM_PREFIX, 442 .prefix = POSIX_ACL_XATTR_ACCESS,
467 .get = xfs_xattr_system_get, 443 .flags = ACL_TYPE_ACCESS,
468 .set = xfs_xattr_system_set, 444 .get = xfs_xattr_acl_get,
445 .set = xfs_xattr_acl_set,
446};
447
448struct xattr_handler xfs_xattr_acl_default_handler = {
449 .prefix = POSIX_ACL_XATTR_DEFAULT,
450 .flags = ACL_TYPE_DEFAULT,
451 .get = xfs_xattr_acl_get,
452 .set = xfs_xattr_acl_set,
469}; 453};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b4c7d4248aac..77b8be81c769 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -292,6 +292,7 @@ _xfs_buf_free_pages(
292{ 292{
293 if (bp->b_pages != bp->b_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
294 kmem_free(bp->b_pages); 294 kmem_free(bp->b_pages);
295 bp->b_pages = NULL;
295 } 296 }
296} 297}
297 298
@@ -323,9 +324,8 @@ xfs_buf_free(
323 ASSERT(!PagePrivate(page)); 324 ASSERT(!PagePrivate(page));
324 page_cache_release(page); 325 page_cache_release(page);
325 } 326 }
326 _xfs_buf_free_pages(bp);
327 } 327 }
328 328 _xfs_buf_free_pages(bp);
329 xfs_buf_deallocate(bp); 329 xfs_buf_deallocate(bp);
330} 330}
331 331
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
1149 if (bp->b_flags & XBF_ORDERED) { 1149 if (bp->b_flags & XBF_ORDERED) {
1150 ASSERT(!(bp->b_flags & XBF_READ)); 1150 ASSERT(!(bp->b_flags & XBF_READ));
1151 rw = WRITE_BARRIER; 1151 rw = WRITE_BARRIER;
1152 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1152 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1153 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1153 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1154 bp->b_flags &= ~_XBF_RUN_QUEUES; 1154 bp->b_flags &= ~_XBF_RUN_QUEUES;
1155 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1155 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1156 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1157 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1158 bp->b_flags &= ~_XBF_RUN_QUEUES;
1159 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1156 } else { 1160 } else {
1157 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1161 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1158 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1162 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a509f4addc2a..a34c7b54822d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56 XBF_ORDERED = (1 << 11), /* use ordered writes */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
58 59
59 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
60 XBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b2..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
794 struct inode *inode = &ip->i_vnode; 794 struct inode *inode = &ip->i_vnode;
795 795
796 inode->i_ino = ip->i_ino; 796 inode->i_ino = ip->i_ino;
797 inode->i_state = I_NEW|I_LOCK; 797 inode->i_state = I_NEW;
798 inode_add_to_lists(ip->i_mount->m_super, inode); 798 inode_add_to_lists(ip->i_mount->m_super, inode);
799 799
800 inode->i_mode = ip->i_d.di_mode; 800 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444ac..77414db10dc2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -954,16 +954,14 @@ xfs_fs_destroy_inode(
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955 955
956 /* 956 /*
957 * If we have nothing to flush with this inode then complete the 957 * We always use background reclaim here because even if the
958 * teardown now, otherwise delay the flush operation. 958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
959 */ 962 */
960 if (!xfs_inode_clean(ip)) {
961 xfs_inode_set_reclaim_tag(ip);
962 return;
963 }
964
965out_reclaim: 963out_reclaim:
966 xfs_ireclaim(ip); 964 xfs_inode_set_reclaim_tag(ip);
967} 965}
968 966
969/* 967/*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..1f5e4bb5e970 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
66 * the number of objects requested. 66 * the number of objects requested.
67 */ 67 */
68 read_lock(&pag->pag_ici_lock);
69 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
70 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
75 } 74 }
76 if (!nr_found) 75 if (!nr_found)
77 goto unlock; 76 return NULL;
78 77
79 /* 78 /*
80 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
84 */ 83 */
85 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87 goto unlock; 86 return NULL;
88
89 return ip; 87 return ip;
90
91unlock:
92 read_unlock(&pag->pag_ici_lock);
93 return NULL;
94} 88}
95 89
96STATIC int 90STATIC int
@@ -100,7 +94,8 @@ xfs_inode_ag_walk(
100 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
101 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
102 int flags, 96 int flags,
103 int tag) 97 int tag,
98 int exclusive)
104{ 99{
105 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
106 uint32_t first_index; 101 uint32_t first_index;
@@ -114,10 +109,20 @@ restart:
114 int error = 0; 109 int error = 0;
115 xfs_inode_t *ip; 110 xfs_inode_t *ip;
116 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
117 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
119 break; 122 break;
123 }
120 124
125 /* execute releases pag->pag_ici_lock */
121 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
122 if (error == EAGAIN) { 127 if (error == EAGAIN) {
123 skipped++; 128 skipped++;
@@ -125,9 +130,8 @@ restart:
125 } 130 }
126 if (error) 131 if (error)
127 last_error = error; 132 last_error = error;
128 /* 133
129 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
130 */
131 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
132 break; 136 break;
133 137
@@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
148 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
149 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
150 int flags, 154 int flags,
151 int tag) 155 int tag,
156 int exclusive)
152{ 157{
153 int error = 0; 158 int error = 0;
154 int last_error = 0; 159 int last_error = 0;
@@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
157 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
159 continue; 164 continue;
160 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
161 if (error) { 167 if (error) {
162 last_error = error; 168 last_error = error;
163 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -174,30 +180,31 @@ xfs_sync_inode_valid(
174 struct xfs_perag *pag) 180 struct xfs_perag *pag)
175{ 181{
176 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
177 184
178 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
179 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
181 return EFSCORRUPTED;
182 }
183 188
184 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
186 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
187 * haven't been fully initialised. 192 goto out_unlock;
188 */
189 if (!igrab(inode)) {
190 read_unlock(&pag->pag_ici_lock);
191 return ENOENT;
192 }
193 read_unlock(&pag->pag_ici_lock);
194 193
195 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
196 IRELE(ip); 199 IRELE(ip);
197 return ENOENT; 200 goto out_unlock;
198 } 201 }
199 202
200 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
201} 208}
202 209
203STATIC int 210STATIC int
@@ -282,7 +289,7 @@ xfs_sync_data(
282 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
283 290
284 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
286 if (error) 293 if (error)
287 return XFS_ERROR(error); 294 return XFS_ERROR(error);
288 295
@@ -304,7 +311,7 @@ xfs_sync_attr(
304 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
305 312
306 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
308} 315}
309 316
310STATIC int 317STATIC int
@@ -664,60 +671,6 @@ xfs_syncd_stop(
664 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
665} 672}
666 673
667STATIC int
668xfs_reclaim_inode(
669 xfs_inode_t *ip,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 return -EAGAIN;
686 }
687 __xfs_iflags_set(ip, XFS_IRECLAIM);
688 spin_unlock(&ip->i_flags_lock);
689 write_unlock(&pag->pag_ici_lock);
690 xfs_put_perag(ip->i_mount, pag);
691
692 /*
693 * If the inode is still dirty, then flush it out. If the inode
694 * is not in the AIL, then it will be OK to flush it delwri as
695 * long as xfs_iflush() does not keep any references to the inode.
696 * We leave that decision up to xfs_iflush() since it has the
697 * knowledge of whether it's OK to simply do a delwri flush of
698 * the inode or whether we need to wait until the inode is
699 * pulled from the AIL.
700 * We get the flush lock regardless, though, just to make sure
701 * we don't free it while it is being flushed.
702 */
703 xfs_ilock(ip, XFS_ILOCK_EXCL);
704 xfs_iflock(ip);
705
706 /*
707 * In the case of a forced shutdown we rely on xfs_iflush() to
708 * wait for the inode to be unpinned before returning an error.
709 */
710 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711 /* synchronize with xfs_iflush_done */
712 xfs_iflock(ip);
713 xfs_ifunlock(ip);
714 }
715
716 xfs_iunlock(ip, XFS_ILOCK_EXCL);
717 xfs_ireclaim(ip);
718 return 0;
719}
720
721void 674void
722__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
723 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
760} 713}
761 714
762STATIC int 715STATIC int
763xfs_reclaim_inode_now( 716xfs_reclaim_inode(
764 struct xfs_inode *ip, 717 struct xfs_inode *ip,
765 struct xfs_perag *pag, 718 struct xfs_perag *pag,
766 int flags) 719 int sync_mode)
767{ 720{
768 /* ignore if already under reclaim */ 721 /*
769 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
770 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
771 return 0; 732 return 0;
772 } 733 }
773 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
774 737
775 return xfs_reclaim_inode(ip, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
776} 765}
777 766
778int 767int
@@ -780,6 +769,6 @@ xfs_reclaim_inodes(
780 xfs_mount_t *mp, 769 xfs_mount_t *mp,
781 int mode) 770 int mode)
782{ 771{
783 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
785} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d91835..ea932b43335d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
55int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
57 int flags, int tag); 57 int flags, int tag, int write_lock);
58 58
59#endif 59#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c40834bdee58..c22a608321a3 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -33,51 +33,55 @@ struct xfs_dquot;
33struct xlog_ticket; 33struct xlog_ticket;
34struct log; 34struct log;
35 35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
36#define DEFINE_ATTR_LIST_EVENT(name) \ 81#define DEFINE_ATTR_LIST_EVENT(name) \
37TRACE_EVENT(name, \ 82DEFINE_EVENT(xfs_attr_list_class, name, \
38 TP_PROTO(struct xfs_attr_list_context *ctx), \ 83 TP_PROTO(struct xfs_attr_list_context *ctx), \
39 TP_ARGS(ctx), \ 84 TP_ARGS(ctx))
40 TP_STRUCT__entry( \
41 __field(dev_t, dev) \
42 __field(xfs_ino_t, ino) \
43 __field(u32, hashval) \
44 __field(u32, blkno) \
45 __field(u32, offset) \
46 __field(void *, alist) \
47 __field(int, bufsize) \
48 __field(int, count) \
49 __field(int, firstu) \
50 __field(int, dupcnt) \
51 __field(int, flags) \
52 ), \
53 TP_fast_assign( \
54 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; \
55 __entry->ino = ctx->dp->i_ino; \
56 __entry->hashval = ctx->cursor->hashval; \
57 __entry->blkno = ctx->cursor->blkno; \
58 __entry->offset = ctx->cursor->offset; \
59 __entry->alist = ctx->alist; \
60 __entry->bufsize = ctx->bufsize; \
61 __entry->count = ctx->count; \
62 __entry->firstu = ctx->firstu; \
63 __entry->flags = ctx->flags; \
64 ), \
65 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " \
66 "alist 0x%p size %u count %u firstu %u flags %d %s", \
67 MAJOR(__entry->dev), MINOR(__entry->dev), \
68 __entry->ino, \
69 __entry->hashval, \
70 __entry->blkno, \
71 __entry->offset, \
72 __entry->dupcnt, \
73 __entry->alist, \
74 __entry->bufsize, \
75 __entry->count, \
76 __entry->firstu, \
77 __entry->flags, \
78 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) \
79 ) \
80)
81DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); 85DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
82DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); 86DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
83DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); 87DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
@@ -178,91 +182,99 @@ TRACE_EVENT(xfs_iext_insert,
178 (char *)__entry->caller_ip) 182 (char *)__entry->caller_ip)
179); 183);
180 184
185DECLARE_EVENT_CLASS(xfs_bmap_class,
186 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
187 unsigned long caller_ip),
188 TP_ARGS(ip, idx, state, caller_ip),
189 TP_STRUCT__entry(
190 __field(dev_t, dev)
191 __field(xfs_ino_t, ino)
192 __field(xfs_extnum_t, idx)
193 __field(xfs_fileoff_t, startoff)
194 __field(xfs_fsblock_t, startblock)
195 __field(xfs_filblks_t, blockcount)
196 __field(xfs_exntst_t, state)
197 __field(int, bmap_state)
198 __field(unsigned long, caller_ip)
199 ),
200 TP_fast_assign(
201 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
202 ip->i_afp : &ip->i_df;
203 struct xfs_bmbt_irec r;
204
205 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
206 __entry->dev = VFS_I(ip)->i_sb->s_dev;
207 __entry->ino = ip->i_ino;
208 __entry->idx = idx;
209 __entry->startoff = r.br_startoff;
210 __entry->startblock = r.br_startblock;
211 __entry->blockcount = r.br_blockcount;
212 __entry->state = r.br_state;
213 __entry->bmap_state = state;
214 __entry->caller_ip = caller_ip;
215 ),
216 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
217 "offset %lld block %s count %lld flag %d caller %pf",
218 MAJOR(__entry->dev), MINOR(__entry->dev),
219 __entry->ino,
220 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
221 (long)__entry->idx,
222 __entry->startoff,
223 xfs_fmtfsblock(__entry->startblock),
224 __entry->blockcount,
225 __entry->state,
226 (char *)__entry->caller_ip)
227)
228
181#define DEFINE_BMAP_EVENT(name) \ 229#define DEFINE_BMAP_EVENT(name) \
182TRACE_EVENT(name, \ 230DEFINE_EVENT(xfs_bmap_class, name, \
183 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ 231 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
184 unsigned long caller_ip), \ 232 unsigned long caller_ip), \
185 TP_ARGS(ip, idx, state, caller_ip), \ 233 TP_ARGS(ip, idx, state, caller_ip))
186 TP_STRUCT__entry( \
187 __field(dev_t, dev) \
188 __field(xfs_ino_t, ino) \
189 __field(xfs_extnum_t, idx) \
190 __field(xfs_fileoff_t, startoff) \
191 __field(xfs_fsblock_t, startblock) \
192 __field(xfs_filblks_t, blockcount) \
193 __field(xfs_exntst_t, state) \
194 __field(int, bmap_state) \
195 __field(unsigned long, caller_ip) \
196 ), \
197 TP_fast_assign( \
198 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? \
199 ip->i_afp : &ip->i_df; \
200 struct xfs_bmbt_irec r; \
201 \
202 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); \
203 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
204 __entry->ino = ip->i_ino; \
205 __entry->idx = idx; \
206 __entry->startoff = r.br_startoff; \
207 __entry->startblock = r.br_startblock; \
208 __entry->blockcount = r.br_blockcount; \
209 __entry->state = r.br_state; \
210 __entry->bmap_state = state; \
211 __entry->caller_ip = caller_ip; \
212 ), \
213 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " \
214 "offset %lld block %s count %lld flag %d caller %pf", \
215 MAJOR(__entry->dev), MINOR(__entry->dev), \
216 __entry->ino, \
217 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), \
218 (long)__entry->idx, \
219 __entry->startoff, \
220 xfs_fmtfsblock(__entry->startblock), \
221 __entry->blockcount, \
222 __entry->state, \
223 (char *)__entry->caller_ip) \
224)
225
226DEFINE_BMAP_EVENT(xfs_iext_remove); 234DEFINE_BMAP_EVENT(xfs_iext_remove);
227DEFINE_BMAP_EVENT(xfs_bmap_pre_update); 235DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
228DEFINE_BMAP_EVENT(xfs_bmap_post_update); 236DEFINE_BMAP_EVENT(xfs_bmap_post_update);
229DEFINE_BMAP_EVENT(xfs_extlist); 237DEFINE_BMAP_EVENT(xfs_extlist);
230 238
231#define DEFINE_BUF_EVENT(tname) \ 239DECLARE_EVENT_CLASS(xfs_buf_class,
232TRACE_EVENT(tname, \ 240 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
233 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ 241 TP_ARGS(bp, caller_ip),
234 TP_ARGS(bp, caller_ip), \ 242 TP_STRUCT__entry(
235 TP_STRUCT__entry( \ 243 __field(dev_t, dev)
236 __field(dev_t, dev) \ 244 __field(xfs_daddr_t, bno)
237 __field(xfs_daddr_t, bno) \ 245 __field(size_t, buffer_length)
238 __field(size_t, buffer_length) \ 246 __field(int, hold)
239 __field(int, hold) \ 247 __field(int, pincount)
240 __field(int, pincount) \ 248 __field(unsigned, lockval)
241 __field(unsigned, lockval) \ 249 __field(unsigned, flags)
242 __field(unsigned, flags) \ 250 __field(unsigned long, caller_ip)
243 __field(unsigned long, caller_ip) \ 251 ),
244 ), \ 252 TP_fast_assign(
245 TP_fast_assign( \ 253 __entry->dev = bp->b_target->bt_dev;
246 __entry->dev = bp->b_target->bt_dev; \ 254 __entry->bno = bp->b_bn;
247 __entry->bno = bp->b_bn; \ 255 __entry->buffer_length = bp->b_buffer_length;
248 __entry->buffer_length = bp->b_buffer_length; \ 256 __entry->hold = atomic_read(&bp->b_hold);
249 __entry->hold = atomic_read(&bp->b_hold); \ 257 __entry->pincount = atomic_read(&bp->b_pin_count);
250 __entry->pincount = atomic_read(&bp->b_pin_count); \ 258 __entry->lockval = xfs_buf_lock_value(bp);
251 __entry->lockval = xfs_buf_lock_value(bp); \ 259 __entry->flags = bp->b_flags;
252 __entry->flags = bp->b_flags; \ 260 __entry->caller_ip = caller_ip;
253 __entry->caller_ip = caller_ip; \ 261 ),
254 ), \ 262 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
255 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 263 "lock %d flags %s caller %pf",
256 "lock %d flags %s caller %pf", \ 264 MAJOR(__entry->dev), MINOR(__entry->dev),
257 MAJOR(__entry->dev), MINOR(__entry->dev), \ 265 (unsigned long long)__entry->bno,
258 (unsigned long long)__entry->bno, \ 266 __entry->buffer_length,
259 __entry->buffer_length, \ 267 __entry->hold,
260 __entry->hold, \ 268 __entry->pincount,
261 __entry->pincount, \ 269 __entry->lockval,
262 __entry->lockval, \ 270 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
263 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 271 (void *)__entry->caller_ip)
264 (void *)__entry->caller_ip) \
265) 272)
273
274#define DEFINE_BUF_EVENT(name) \
275DEFINE_EVENT(xfs_buf_class, name, \
276 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
277 TP_ARGS(bp, caller_ip))
266DEFINE_BUF_EVENT(xfs_buf_init); 278DEFINE_BUF_EVENT(xfs_buf_init);
267DEFINE_BUF_EVENT(xfs_buf_free); 279DEFINE_BUF_EVENT(xfs_buf_free);
268DEFINE_BUF_EVENT(xfs_buf_hold); 280DEFINE_BUF_EVENT(xfs_buf_hold);
@@ -299,41 +311,45 @@ DEFINE_BUF_EVENT(xfs_reset_dqcounts);
299DEFINE_BUF_EVENT(xfs_inode_item_push); 311DEFINE_BUF_EVENT(xfs_inode_item_push);
300 312
301/* pass flags explicitly */ 313/* pass flags explicitly */
302#define DEFINE_BUF_FLAGS_EVENT(tname) \ 314DECLARE_EVENT_CLASS(xfs_buf_flags_class,
303TRACE_EVENT(tname, \ 315 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
304 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ 316 TP_ARGS(bp, flags, caller_ip),
305 TP_ARGS(bp, flags, caller_ip), \ 317 TP_STRUCT__entry(
306 TP_STRUCT__entry( \ 318 __field(dev_t, dev)
307 __field(dev_t, dev) \ 319 __field(xfs_daddr_t, bno)
308 __field(xfs_daddr_t, bno) \ 320 __field(size_t, buffer_length)
309 __field(size_t, buffer_length) \ 321 __field(int, hold)
310 __field(int, hold) \ 322 __field(int, pincount)
311 __field(int, pincount) \ 323 __field(unsigned, lockval)
312 __field(unsigned, lockval) \ 324 __field(unsigned, flags)
313 __field(unsigned, flags) \ 325 __field(unsigned long, caller_ip)
314 __field(unsigned long, caller_ip) \ 326 ),
315 ), \ 327 TP_fast_assign(
316 TP_fast_assign( \ 328 __entry->dev = bp->b_target->bt_dev;
317 __entry->dev = bp->b_target->bt_dev; \ 329 __entry->bno = bp->b_bn;
318 __entry->bno = bp->b_bn; \ 330 __entry->buffer_length = bp->b_buffer_length;
319 __entry->buffer_length = bp->b_buffer_length; \ 331 __entry->flags = flags;
320 __entry->flags = flags; \ 332 __entry->hold = atomic_read(&bp->b_hold);
321 __entry->hold = atomic_read(&bp->b_hold); \ 333 __entry->pincount = atomic_read(&bp->b_pin_count);
322 __entry->pincount = atomic_read(&bp->b_pin_count); \ 334 __entry->lockval = xfs_buf_lock_value(bp);
323 __entry->lockval = xfs_buf_lock_value(bp); \ 335 __entry->caller_ip = caller_ip;
324 __entry->caller_ip = caller_ip; \ 336 ),
325 ), \ 337 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
326 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 338 "lock %d flags %s caller %pf",
327 "lock %d flags %s caller %pf", \ 339 MAJOR(__entry->dev), MINOR(__entry->dev),
328 MAJOR(__entry->dev), MINOR(__entry->dev), \ 340 (unsigned long long)__entry->bno,
329 (unsigned long long)__entry->bno, \ 341 __entry->buffer_length,
330 __entry->buffer_length, \ 342 __entry->hold,
331 __entry->hold, \ 343 __entry->pincount,
332 __entry->pincount, \ 344 __entry->lockval,
333 __entry->lockval, \ 345 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
334 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 346 (void *)__entry->caller_ip)
335 (void *)__entry->caller_ip) \
336) 347)
348
349#define DEFINE_BUF_FLAGS_EVENT(name) \
350DEFINE_EVENT(xfs_buf_flags_class, name, \
351 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
352 TP_ARGS(bp, flags, caller_ip))
337DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); 353DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
338DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); 354DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
339DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 355DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
@@ -376,55 +392,58 @@ TRACE_EVENT(xfs_buf_ioerror,
376 (void *)__entry->caller_ip) 392 (void *)__entry->caller_ip)
377); 393);
378 394
379#define DEFINE_BUF_ITEM_EVENT(tname) \ 395DECLARE_EVENT_CLASS(xfs_buf_item_class,
380TRACE_EVENT(tname, \ 396 TP_PROTO(struct xfs_buf_log_item *bip),
381 TP_PROTO(struct xfs_buf_log_item *bip), \ 397 TP_ARGS(bip),
382 TP_ARGS(bip), \ 398 TP_STRUCT__entry(
383 TP_STRUCT__entry( \ 399 __field(dev_t, dev)
384 __field(dev_t, dev) \ 400 __field(xfs_daddr_t, buf_bno)
385 __field(xfs_daddr_t, buf_bno) \ 401 __field(size_t, buf_len)
386 __field(size_t, buf_len) \ 402 __field(int, buf_hold)
387 __field(int, buf_hold) \ 403 __field(int, buf_pincount)
388 __field(int, buf_pincount) \ 404 __field(int, buf_lockval)
389 __field(int, buf_lockval) \ 405 __field(unsigned, buf_flags)
390 __field(unsigned, buf_flags) \ 406 __field(unsigned, bli_recur)
391 __field(unsigned, bli_recur) \ 407 __field(int, bli_refcount)
392 __field(int, bli_refcount) \ 408 __field(unsigned, bli_flags)
393 __field(unsigned, bli_flags) \ 409 __field(void *, li_desc)
394 __field(void *, li_desc) \ 410 __field(unsigned, li_flags)
395 __field(unsigned, li_flags) \ 411 ),
396 ), \ 412 TP_fast_assign(
397 TP_fast_assign( \ 413 __entry->dev = bip->bli_buf->b_target->bt_dev;
398 __entry->dev = bip->bli_buf->b_target->bt_dev; \ 414 __entry->bli_flags = bip->bli_flags;
399 __entry->bli_flags = bip->bli_flags; \ 415 __entry->bli_recur = bip->bli_recur;
400 __entry->bli_recur = bip->bli_recur; \ 416 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
401 __entry->bli_refcount = atomic_read(&bip->bli_refcount); \ 417 __entry->buf_bno = bip->bli_buf->b_bn;
402 __entry->buf_bno = bip->bli_buf->b_bn; \ 418 __entry->buf_len = bip->bli_buf->b_buffer_length;
403 __entry->buf_len = bip->bli_buf->b_buffer_length; \ 419 __entry->buf_flags = bip->bli_buf->b_flags;
404 __entry->buf_flags = bip->bli_buf->b_flags; \ 420 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
405 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); \ 421 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
406 __entry->buf_pincount = \ 422 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
407 atomic_read(&bip->bli_buf->b_pin_count); \ 423 __entry->li_desc = bip->bli_item.li_desc;
408 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); \ 424 __entry->li_flags = bip->bli_item.li_flags;
409 __entry->li_desc = bip->bli_item.li_desc; \ 425 ),
410 __entry->li_flags = bip->bli_item.li_flags; \ 426 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
411 ), \ 427 "lock %d flags %s recur %d refcount %d bliflags %s "
412 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 428 "lidesc 0x%p liflags %s",
413 "lock %d flags %s recur %d refcount %d bliflags %s " \ 429 MAJOR(__entry->dev), MINOR(__entry->dev),
414 "lidesc 0x%p liflags %s", \ 430 (unsigned long long)__entry->buf_bno,
415 MAJOR(__entry->dev), MINOR(__entry->dev), \ 431 __entry->buf_len,
416 (unsigned long long)__entry->buf_bno, \ 432 __entry->buf_hold,
417 __entry->buf_len, \ 433 __entry->buf_pincount,
418 __entry->buf_hold, \ 434 __entry->buf_lockval,
419 __entry->buf_pincount, \ 435 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
420 __entry->buf_lockval, \ 436 __entry->bli_recur,
421 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), \ 437 __entry->bli_refcount,
422 __entry->bli_recur, \ 438 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
423 __entry->bli_refcount, \ 439 __entry->li_desc,
424 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), \ 440 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
425 __entry->li_desc, \
426 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) \
427) 441)
442
443#define DEFINE_BUF_ITEM_EVENT(name) \
444DEFINE_EVENT(xfs_buf_item_class, name, \
445 TP_PROTO(struct xfs_buf_log_item *bip), \
446 TP_ARGS(bip))
428DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); 447DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
429DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); 448DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
430DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); 449DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
@@ -450,78 +469,90 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
450DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); 469DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
451DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 470DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
452 471
472DECLARE_EVENT_CLASS(xfs_lock_class,
473 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
474 unsigned long caller_ip),
475 TP_ARGS(ip, lock_flags, caller_ip),
476 TP_STRUCT__entry(
477 __field(dev_t, dev)
478 __field(xfs_ino_t, ino)
479 __field(int, lock_flags)
480 __field(unsigned long, caller_ip)
481 ),
482 TP_fast_assign(
483 __entry->dev = VFS_I(ip)->i_sb->s_dev;
484 __entry->ino = ip->i_ino;
485 __entry->lock_flags = lock_flags;
486 __entry->caller_ip = caller_ip;
487 ),
488 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
489 MAJOR(__entry->dev), MINOR(__entry->dev),
490 __entry->ino,
491 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
492 (void *)__entry->caller_ip)
493)
494
453#define DEFINE_LOCK_EVENT(name) \ 495#define DEFINE_LOCK_EVENT(name) \
454TRACE_EVENT(name, \ 496DEFINE_EVENT(xfs_lock_class, name, \
455 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ 497 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
456 unsigned long caller_ip), \ 498 unsigned long caller_ip), \
457 TP_ARGS(ip, lock_flags, caller_ip), \ 499 TP_ARGS(ip, lock_flags, caller_ip))
458 TP_STRUCT__entry( \
459 __field(dev_t, dev) \
460 __field(xfs_ino_t, ino) \
461 __field(int, lock_flags) \
462 __field(unsigned long, caller_ip) \
463 ), \
464 TP_fast_assign( \
465 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
466 __entry->ino = ip->i_ino; \
467 __entry->lock_flags = lock_flags; \
468 __entry->caller_ip = caller_ip; \
469 ), \
470 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", \
471 MAJOR(__entry->dev), MINOR(__entry->dev), \
472 __entry->ino, \
473 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), \
474 (void *)__entry->caller_ip) \
475)
476
477DEFINE_LOCK_EVENT(xfs_ilock); 500DEFINE_LOCK_EVENT(xfs_ilock);
478DEFINE_LOCK_EVENT(xfs_ilock_nowait); 501DEFINE_LOCK_EVENT(xfs_ilock_nowait);
479DEFINE_LOCK_EVENT(xfs_ilock_demote); 502DEFINE_LOCK_EVENT(xfs_ilock_demote);
480DEFINE_LOCK_EVENT(xfs_iunlock); 503DEFINE_LOCK_EVENT(xfs_iunlock);
481 504
505DECLARE_EVENT_CLASS(xfs_iget_class,
506 TP_PROTO(struct xfs_inode *ip),
507 TP_ARGS(ip),
508 TP_STRUCT__entry(
509 __field(dev_t, dev)
510 __field(xfs_ino_t, ino)
511 ),
512 TP_fast_assign(
513 __entry->dev = VFS_I(ip)->i_sb->s_dev;
514 __entry->ino = ip->i_ino;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino)
519)
520
482#define DEFINE_IGET_EVENT(name) \ 521#define DEFINE_IGET_EVENT(name) \
483TRACE_EVENT(name, \ 522DEFINE_EVENT(xfs_iget_class, name, \
484 TP_PROTO(struct xfs_inode *ip), \ 523 TP_PROTO(struct xfs_inode *ip), \
485 TP_ARGS(ip), \ 524 TP_ARGS(ip))
486 TP_STRUCT__entry( \
487 __field(dev_t, dev) \
488 __field(xfs_ino_t, ino) \
489 ), \
490 TP_fast_assign( \
491 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
492 __entry->ino = ip->i_ino; \
493 ), \
494 TP_printk("dev %d:%d ino 0x%llx", \
495 MAJOR(__entry->dev), MINOR(__entry->dev), \
496 __entry->ino) \
497)
498DEFINE_IGET_EVENT(xfs_iget_skip); 525DEFINE_IGET_EVENT(xfs_iget_skip);
499DEFINE_IGET_EVENT(xfs_iget_reclaim); 526DEFINE_IGET_EVENT(xfs_iget_reclaim);
500DEFINE_IGET_EVENT(xfs_iget_found); 527DEFINE_IGET_EVENT(xfs_iget_found);
501DEFINE_IGET_EVENT(xfs_iget_alloc); 528DEFINE_IGET_EVENT(xfs_iget_alloc);
502 529
530DECLARE_EVENT_CLASS(xfs_inode_class,
531 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
532 TP_ARGS(ip, caller_ip),
533 TP_STRUCT__entry(
534 __field(dev_t, dev)
535 __field(xfs_ino_t, ino)
536 __field(int, count)
537 __field(unsigned long, caller_ip)
538 ),
539 TP_fast_assign(
540 __entry->dev = VFS_I(ip)->i_sb->s_dev;
541 __entry->ino = ip->i_ino;
542 __entry->count = atomic_read(&VFS_I(ip)->i_count);
543 __entry->caller_ip = caller_ip;
544 ),
545 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
546 MAJOR(__entry->dev), MINOR(__entry->dev),
547 __entry->ino,
548 __entry->count,
549 (char *)__entry->caller_ip)
550)
551
503#define DEFINE_INODE_EVENT(name) \ 552#define DEFINE_INODE_EVENT(name) \
504TRACE_EVENT(name, \ 553DEFINE_EVENT(xfs_inode_class, name, \
505 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 554 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
506 TP_ARGS(ip, caller_ip), \ 555 TP_ARGS(ip, caller_ip))
507 TP_STRUCT__entry( \
508 __field(dev_t, dev) \
509 __field(xfs_ino_t, ino) \
510 __field(int, count) \
511 __field(unsigned long, caller_ip) \
512 ), \
513 TP_fast_assign( \
514 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
515 __entry->ino = ip->i_ino; \
516 __entry->count = atomic_read(&VFS_I(ip)->i_count); \
517 __entry->caller_ip = caller_ip; \
518 ), \
519 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", \
520 MAJOR(__entry->dev), MINOR(__entry->dev), \
521 __entry->ino, \
522 __entry->count, \
523 (char *)__entry->caller_ip) \
524)
525DEFINE_INODE_EVENT(xfs_ihold); 556DEFINE_INODE_EVENT(xfs_ihold);
526DEFINE_INODE_EVENT(xfs_irele); 557DEFINE_INODE_EVENT(xfs_irele);
527/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 558/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
@@ -529,55 +560,59 @@ DEFINE_INODE_EVENT(xfs_inode);
529#define xfs_itrace_entry(ip) \ 560#define xfs_itrace_entry(ip) \
530 trace_xfs_inode(ip, _THIS_IP_) 561 trace_xfs_inode(ip, _THIS_IP_)
531 562
532#define DEFINE_DQUOT_EVENT(tname) \ 563DECLARE_EVENT_CLASS(xfs_dquot_class,
533TRACE_EVENT(tname, \ 564 TP_PROTO(struct xfs_dquot *dqp),
534 TP_PROTO(struct xfs_dquot *dqp), \ 565 TP_ARGS(dqp),
535 TP_ARGS(dqp), \ 566 TP_STRUCT__entry(
536 TP_STRUCT__entry( \ 567 __field(dev_t, dev)
537 __field(dev_t, dev) \ 568 __field(__be32, id)
538 __field(__be32, id) \ 569 __field(unsigned, flags)
539 __field(unsigned, flags) \ 570 __field(unsigned, nrefs)
540 __field(unsigned, nrefs) \ 571 __field(unsigned long long, res_bcount)
541 __field(unsigned long long, res_bcount) \ 572 __field(unsigned long long, bcount)
542 __field(unsigned long long, bcount) \ 573 __field(unsigned long long, icount)
543 __field(unsigned long long, icount) \ 574 __field(unsigned long long, blk_hardlimit)
544 __field(unsigned long long, blk_hardlimit) \ 575 __field(unsigned long long, blk_softlimit)
545 __field(unsigned long long, blk_softlimit) \ 576 __field(unsigned long long, ino_hardlimit)
546 __field(unsigned long long, ino_hardlimit) \ 577 __field(unsigned long long, ino_softlimit)
547 __field(unsigned long long, ino_softlimit) \
548 ), \
549 TP_fast_assign( \
550 __entry->dev = dqp->q_mount->m_super->s_dev; \
551 __entry->id = dqp->q_core.d_id; \
552 __entry->flags = dqp->dq_flags; \
553 __entry->nrefs = dqp->q_nrefs; \
554 __entry->res_bcount = dqp->q_res_bcount; \
555 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); \
556 __entry->icount = be64_to_cpu(dqp->q_core.d_icount); \
557 __entry->blk_hardlimit = \
558 be64_to_cpu(dqp->q_core.d_blk_hardlimit); \
559 __entry->blk_softlimit = \
560 be64_to_cpu(dqp->q_core.d_blk_softlimit); \
561 __entry->ino_hardlimit = \
562 be64_to_cpu(dqp->q_core.d_ino_hardlimit); \
563 __entry->ino_softlimit = \
564 be64_to_cpu(dqp->q_core.d_ino_softlimit); \
565 ), \ 578 ), \
566 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " \ 579 TP_fast_assign(
567 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " \ 580 __entry->dev = dqp->q_mount->m_super->s_dev;
568 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", \ 581 __entry->id = dqp->q_core.d_id;
569 MAJOR(__entry->dev), MINOR(__entry->dev), \ 582 __entry->flags = dqp->dq_flags;
570 be32_to_cpu(__entry->id), \ 583 __entry->nrefs = dqp->q_nrefs;
571 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), \ 584 __entry->res_bcount = dqp->q_res_bcount;
572 __entry->nrefs, \ 585 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
573 __entry->res_bcount, \ 586 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
574 __entry->bcount, \ 587 __entry->blk_hardlimit =
575 __entry->blk_hardlimit, \ 588 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
576 __entry->blk_softlimit, \ 589 __entry->blk_softlimit =
577 __entry->icount, \ 590 be64_to_cpu(dqp->q_core.d_blk_softlimit);
578 __entry->ino_hardlimit, \ 591 __entry->ino_hardlimit =
579 __entry->ino_softlimit) \ 592 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
593 __entry->ino_softlimit =
594 be64_to_cpu(dqp->q_core.d_ino_softlimit);
595 ),
596 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
597 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
598 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
599 MAJOR(__entry->dev), MINOR(__entry->dev),
600 be32_to_cpu(__entry->id),
601 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
602 __entry->nrefs,
603 __entry->res_bcount,
604 __entry->bcount,
605 __entry->blk_hardlimit,
606 __entry->blk_softlimit,
607 __entry->icount,
608 __entry->ino_hardlimit,
609 __entry->ino_softlimit)
580) 610)
611
612#define DEFINE_DQUOT_EVENT(name) \
613DEFINE_EVENT(xfs_dquot_class, name, \
614 TP_PROTO(struct xfs_dquot *dqp), \
615 TP_ARGS(dqp))
581DEFINE_DQUOT_EVENT(xfs_dqadjust); 616DEFINE_DQUOT_EVENT(xfs_dqadjust);
582DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); 617DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
583DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); 618DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
@@ -610,72 +645,75 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_done);
610DEFINE_IGET_EVENT(xfs_dquot_dqalloc); 645DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
611DEFINE_IGET_EVENT(xfs_dquot_dqdetach); 646DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
612 647
648DECLARE_EVENT_CLASS(xfs_loggrant_class,
649 TP_PROTO(struct log *log, struct xlog_ticket *tic),
650 TP_ARGS(log, tic),
651 TP_STRUCT__entry(
652 __field(dev_t, dev)
653 __field(unsigned, trans_type)
654 __field(char, ocnt)
655 __field(char, cnt)
656 __field(int, curr_res)
657 __field(int, unit_res)
658 __field(unsigned int, flags)
659 __field(void *, reserve_headq)
660 __field(void *, write_headq)
661 __field(int, grant_reserve_cycle)
662 __field(int, grant_reserve_bytes)
663 __field(int, grant_write_cycle)
664 __field(int, grant_write_bytes)
665 __field(int, curr_cycle)
666 __field(int, curr_block)
667 __field(xfs_lsn_t, tail_lsn)
668 ),
669 TP_fast_assign(
670 __entry->dev = log->l_mp->m_super->s_dev;
671 __entry->trans_type = tic->t_trans_type;
672 __entry->ocnt = tic->t_ocnt;
673 __entry->cnt = tic->t_cnt;
674 __entry->curr_res = tic->t_curr_res;
675 __entry->unit_res = tic->t_unit_res;
676 __entry->flags = tic->t_flags;
677 __entry->reserve_headq = log->l_reserve_headq;
678 __entry->write_headq = log->l_write_headq;
679 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
680 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
681 __entry->grant_write_cycle = log->l_grant_write_cycle;
682 __entry->grant_write_bytes = log->l_grant_write_bytes;
683 __entry->curr_cycle = log->l_curr_cycle;
684 __entry->curr_block = log->l_curr_block;
685 __entry->tail_lsn = log->l_tail_lsn;
686 ),
687 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
688 "t_unit_res %u t_flags %s reserve_headq 0x%p "
689 "write_headq 0x%p grant_reserve_cycle %d "
690 "grant_reserve_bytes %d grant_write_cycle %d "
691 "grant_write_bytes %d curr_cycle %d curr_block %d "
692 "tail_cycle %d tail_block %d",
693 MAJOR(__entry->dev), MINOR(__entry->dev),
694 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
695 __entry->ocnt,
696 __entry->cnt,
697 __entry->curr_res,
698 __entry->unit_res,
699 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
700 __entry->reserve_headq,
701 __entry->write_headq,
702 __entry->grant_reserve_cycle,
703 __entry->grant_reserve_bytes,
704 __entry->grant_write_cycle,
705 __entry->grant_write_bytes,
706 __entry->curr_cycle,
707 __entry->curr_block,
708 CYCLE_LSN(__entry->tail_lsn),
709 BLOCK_LSN(__entry->tail_lsn)
710 )
711)
613 712
614#define DEFINE_LOGGRANT_EVENT(tname) \ 713#define DEFINE_LOGGRANT_EVENT(name) \
615TRACE_EVENT(tname, \ 714DEFINE_EVENT(xfs_loggrant_class, name, \
616 TP_PROTO(struct log *log, struct xlog_ticket *tic), \ 715 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
617 TP_ARGS(log, tic), \ 716 TP_ARGS(log, tic))
618 TP_STRUCT__entry( \
619 __field(dev_t, dev) \
620 __field(unsigned, trans_type) \
621 __field(char, ocnt) \
622 __field(char, cnt) \
623 __field(int, curr_res) \
624 __field(int, unit_res) \
625 __field(unsigned int, flags) \
626 __field(void *, reserve_headq) \
627 __field(void *, write_headq) \
628 __field(int, grant_reserve_cycle) \
629 __field(int, grant_reserve_bytes) \
630 __field(int, grant_write_cycle) \
631 __field(int, grant_write_bytes) \
632 __field(int, curr_cycle) \
633 __field(int, curr_block) \
634 __field(xfs_lsn_t, tail_lsn) \
635 ), \
636 TP_fast_assign( \
637 __entry->dev = log->l_mp->m_super->s_dev; \
638 __entry->trans_type = tic->t_trans_type; \
639 __entry->ocnt = tic->t_ocnt; \
640 __entry->cnt = tic->t_cnt; \
641 __entry->curr_res = tic->t_curr_res; \
642 __entry->unit_res = tic->t_unit_res; \
643 __entry->flags = tic->t_flags; \
644 __entry->reserve_headq = log->l_reserve_headq; \
645 __entry->write_headq = log->l_write_headq; \
646 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; \
647 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; \
648 __entry->grant_write_cycle = log->l_grant_write_cycle; \
649 __entry->grant_write_bytes = log->l_grant_write_bytes; \
650 __entry->curr_cycle = log->l_curr_cycle; \
651 __entry->curr_block = log->l_curr_block; \
652 __entry->tail_lsn = log->l_tail_lsn; \
653 ), \
654 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " \
655 "t_unit_res %u t_flags %s reserve_headq 0x%p " \
656 "write_headq 0x%p grant_reserve_cycle %d " \
657 "grant_reserve_bytes %d grant_write_cycle %d " \
658 "grant_write_bytes %d curr_cycle %d curr_block %d " \
659 "tail_cycle %d tail_block %d", \
660 MAJOR(__entry->dev), MINOR(__entry->dev), \
661 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), \
662 __entry->ocnt, \
663 __entry->cnt, \
664 __entry->curr_res, \
665 __entry->unit_res, \
666 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), \
667 __entry->reserve_headq, \
668 __entry->write_headq, \
669 __entry->grant_reserve_cycle, \
670 __entry->grant_reserve_bytes, \
671 __entry->grant_write_cycle, \
672 __entry->grant_write_bytes, \
673 __entry->curr_cycle, \
674 __entry->curr_block, \
675 CYCLE_LSN(__entry->tail_lsn), \
676 BLOCK_LSN(__entry->tail_lsn) \
677 ) \
678)
679DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 717DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
680DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 718DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
681DEFINE_LOGGRANT_EVENT(xfs_log_reserve); 719DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
@@ -815,7 +853,7 @@ TRACE_EVENT(name, \
815 ), \ 853 ), \
816 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 854 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
817 "offset 0x%llx count %zd flags %s " \ 855 "offset 0x%llx count %zd flags %s " \
818 "startoff 0x%llx startblock 0x%llx blockcount 0x%llx", \ 856 "startoff 0x%llx startblock %s blockcount 0x%llx", \
819 MAJOR(__entry->dev), MINOR(__entry->dev), \ 857 MAJOR(__entry->dev), MINOR(__entry->dev), \
820 __entry->ino, \ 858 __entry->ino, \
821 __entry->size, \ 859 __entry->size, \
@@ -824,7 +862,7 @@ TRACE_EVENT(name, \
824 __entry->count, \ 862 __entry->count, \
825 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 863 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
826 __entry->startoff, \ 864 __entry->startoff, \
827 __entry->startblock, \ 865 xfs_fmtfsblock(__entry->startblock), \
828 __entry->blockcount) \ 866 __entry->blockcount) \
829) 867)
830DEFINE_IOMAP_EVENT(xfs_iomap_enter); 868DEFINE_IOMAP_EVENT(xfs_iomap_enter);
@@ -897,28 +935,32 @@ TRACE_EVENT(xfs_itruncate_start,
897 __entry->toss_finish) 935 __entry->toss_finish)
898); 936);
899 937
938DECLARE_EVENT_CLASS(xfs_itrunc_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
940 TP_ARGS(ip, new_size),
941 TP_STRUCT__entry(
942 __field(dev_t, dev)
943 __field(xfs_ino_t, ino)
944 __field(xfs_fsize_t, size)
945 __field(xfs_fsize_t, new_size)
946 ),
947 TP_fast_assign(
948 __entry->dev = VFS_I(ip)->i_sb->s_dev;
949 __entry->ino = ip->i_ino;
950 __entry->size = ip->i_d.di_size;
951 __entry->new_size = new_size;
952 ),
953 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
954 MAJOR(__entry->dev), MINOR(__entry->dev),
955 __entry->ino,
956 __entry->size,
957 __entry->new_size)
958)
959
900#define DEFINE_ITRUNC_EVENT(name) \ 960#define DEFINE_ITRUNC_EVENT(name) \
901TRACE_EVENT(name, \ 961DEFINE_EVENT(xfs_itrunc_class, name, \
902 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ 962 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
903 TP_ARGS(ip, new_size), \ 963 TP_ARGS(ip, new_size))
904 TP_STRUCT__entry( \
905 __field(dev_t, dev) \
906 __field(xfs_ino_t, ino) \
907 __field(xfs_fsize_t, size) \
908 __field(xfs_fsize_t, new_size) \
909 ), \
910 TP_fast_assign( \
911 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
912 __entry->ino = ip->i_ino; \
913 __entry->size = ip->i_d.di_size; \
914 __entry->new_size = new_size; \
915 ), \
916 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", \
917 MAJOR(__entry->dev), MINOR(__entry->dev), \
918 __entry->ino, \
919 __entry->size, \
920 __entry->new_size) \
921)
922DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); 964DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
923DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); 965DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
924 966
@@ -1037,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy,
1037 1079
1038TRACE_EVENT(xfs_alloc_busysearch, 1080TRACE_EVENT(xfs_alloc_busysearch,
1039 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1040 xfs_extlen_t len, int found), 1082 xfs_extlen_t len, xfs_lsn_t lsn),
1041 TP_ARGS(mp, agno, agbno, len, found), 1083 TP_ARGS(mp, agno, agbno, len, lsn),
1042 TP_STRUCT__entry( 1084 TP_STRUCT__entry(
1043 __field(dev_t, dev) 1085 __field(dev_t, dev)
1044 __field(xfs_agnumber_t, agno) 1086 __field(xfs_agnumber_t, agno)
1045 __field(xfs_agblock_t, agbno) 1087 __field(xfs_agblock_t, agbno)
1046 __field(xfs_extlen_t, len) 1088 __field(xfs_extlen_t, len)
1047 __field(int, found) 1089 __field(xfs_lsn_t, lsn)
1048 ), 1090 ),
1049 TP_fast_assign( 1091 TP_fast_assign(
1050 __entry->dev = mp->m_super->s_dev; 1092 __entry->dev = mp->m_super->s_dev;
1051 __entry->agno = agno; 1093 __entry->agno = agno;
1052 __entry->agbno = agbno; 1094 __entry->agbno = agbno;
1053 __entry->len = len; 1095 __entry->len = len;
1054 __entry->found = found; 1096 __entry->lsn = lsn;
1055 ), 1097 ),
1056 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1057 MAJOR(__entry->dev), MINOR(__entry->dev), 1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1058 __entry->agno, 1100 __entry->agno,
1059 __entry->agbno, 1101 __entry->agbno,
1060 __entry->len, 1102 __entry->len,
1061 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1103 __entry->lsn)
1062); 1104);
1063 1105
1064TRACE_EVENT(xfs_agf, 1106TRACE_EVENT(xfs_agf,
@@ -1152,77 +1194,80 @@ TRACE_EVENT(xfs_free_extent,
1152 1194
1153); 1195);
1154 1196
1155#define DEFINE_ALLOC_EVENT(name) \ 1197DECLARE_EVENT_CLASS(xfs_alloc_class,
1156TRACE_EVENT(name, \ 1198 TP_PROTO(struct xfs_alloc_arg *args),
1157 TP_PROTO(struct xfs_alloc_arg *args), \ 1199 TP_ARGS(args),
1158 TP_ARGS(args), \ 1200 TP_STRUCT__entry(
1159 TP_STRUCT__entry( \ 1201 __field(dev_t, dev)
1160 __field(dev_t, dev) \ 1202 __field(xfs_agnumber_t, agno)
1161 __field(xfs_agnumber_t, agno) \ 1203 __field(xfs_agblock_t, agbno)
1162 __field(xfs_agblock_t, agbno) \ 1204 __field(xfs_extlen_t, minlen)
1163 __field(xfs_extlen_t, minlen) \ 1205 __field(xfs_extlen_t, maxlen)
1164 __field(xfs_extlen_t, maxlen) \ 1206 __field(xfs_extlen_t, mod)
1165 __field(xfs_extlen_t, mod) \ 1207 __field(xfs_extlen_t, prod)
1166 __field(xfs_extlen_t, prod) \ 1208 __field(xfs_extlen_t, minleft)
1167 __field(xfs_extlen_t, minleft) \ 1209 __field(xfs_extlen_t, total)
1168 __field(xfs_extlen_t, total) \ 1210 __field(xfs_extlen_t, alignment)
1169 __field(xfs_extlen_t, alignment) \ 1211 __field(xfs_extlen_t, minalignslop)
1170 __field(xfs_extlen_t, minalignslop) \ 1212 __field(xfs_extlen_t, len)
1171 __field(xfs_extlen_t, len) \ 1213 __field(short, type)
1172 __field(short, type) \ 1214 __field(short, otype)
1173 __field(short, otype) \ 1215 __field(char, wasdel)
1174 __field(char, wasdel) \ 1216 __field(char, wasfromfl)
1175 __field(char, wasfromfl) \ 1217 __field(char, isfl)
1176 __field(char, isfl) \ 1218 __field(char, userdata)
1177 __field(char, userdata) \ 1219 __field(xfs_fsblock_t, firstblock)
1178 __field(xfs_fsblock_t, firstblock) \ 1220 ),
1179 ), \ 1221 TP_fast_assign(
1180 TP_fast_assign( \ 1222 __entry->dev = args->mp->m_super->s_dev;
1181 __entry->dev = args->mp->m_super->s_dev; \ 1223 __entry->agno = args->agno;
1182 __entry->agno = args->agno; \ 1224 __entry->agbno = args->agbno;
1183 __entry->agbno = args->agbno; \ 1225 __entry->minlen = args->minlen;
1184 __entry->minlen = args->minlen; \ 1226 __entry->maxlen = args->maxlen;
1185 __entry->maxlen = args->maxlen; \ 1227 __entry->mod = args->mod;
1186 __entry->mod = args->mod; \ 1228 __entry->prod = args->prod;
1187 __entry->prod = args->prod; \ 1229 __entry->minleft = args->minleft;
1188 __entry->minleft = args->minleft; \ 1230 __entry->total = args->total;
1189 __entry->total = args->total; \ 1231 __entry->alignment = args->alignment;
1190 __entry->alignment = args->alignment; \ 1232 __entry->minalignslop = args->minalignslop;
1191 __entry->minalignslop = args->minalignslop; \ 1233 __entry->len = args->len;
1192 __entry->len = args->len; \ 1234 __entry->type = args->type;
1193 __entry->type = args->type; \ 1235 __entry->otype = args->otype;
1194 __entry->otype = args->otype; \ 1236 __entry->wasdel = args->wasdel;
1195 __entry->wasdel = args->wasdel; \ 1237 __entry->wasfromfl = args->wasfromfl;
1196 __entry->wasfromfl = args->wasfromfl; \ 1238 __entry->isfl = args->isfl;
1197 __entry->isfl = args->isfl; \ 1239 __entry->userdata = args->userdata;
1198 __entry->userdata = args->userdata; \ 1240 __entry->firstblock = args->firstblock;
1199 __entry->firstblock = args->firstblock; \ 1241 ),
1200 ), \ 1242 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1201 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " \ 1243 "prod %u minleft %u total %u alignment %u minalignslop %u "
1202 "prod %u minleft %u total %u alignment %u minalignslop %u " \ 1244 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1203 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " \ 1245 "userdata %d firstblock 0x%llx",
1204 "userdata %d firstblock 0x%llx", \ 1246 MAJOR(__entry->dev), MINOR(__entry->dev),
1205 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1247 __entry->agno,
1206 __entry->agno, \ 1248 __entry->agbno,
1207 __entry->agbno, \ 1249 __entry->minlen,
1208 __entry->minlen, \ 1250 __entry->maxlen,
1209 __entry->maxlen, \ 1251 __entry->mod,
1210 __entry->mod, \ 1252 __entry->prod,
1211 __entry->prod, \ 1253 __entry->minleft,
1212 __entry->minleft, \ 1254 __entry->total,
1213 __entry->total, \ 1255 __entry->alignment,
1214 __entry->alignment, \ 1256 __entry->minalignslop,
1215 __entry->minalignslop, \ 1257 __entry->len,
1216 __entry->len, \ 1258 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1217 __print_symbolic(__entry->type, XFS_ALLOC_TYPES), \ 1259 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1218 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), \ 1260 __entry->wasdel,
1219 __entry->wasdel, \ 1261 __entry->wasfromfl,
1220 __entry->wasfromfl, \ 1262 __entry->isfl,
1221 __entry->isfl, \ 1263 __entry->userdata,
1222 __entry->userdata, \ 1264 __entry->firstblock)
1223 __entry->firstblock) \
1224) 1265)
1225 1266
1267#define DEFINE_ALLOC_EVENT(name) \
1268DEFINE_EVENT(xfs_alloc_class, name, \
1269 TP_PROTO(struct xfs_alloc_arg *args), \
1270 TP_ARGS(args))
1226DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1271DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1227DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1272DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1228DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1273DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
@@ -1245,92 +1290,100 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1245DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); 1290DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1246DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); 1291DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1247 1292
1248#define DEFINE_DIR2_TRACE(tname) \ 1293DECLARE_EVENT_CLASS(xfs_dir2_class,
1249TRACE_EVENT(tname, \ 1294 TP_PROTO(struct xfs_da_args *args),
1295 TP_ARGS(args),
1296 TP_STRUCT__entry(
1297 __field(dev_t, dev)
1298 __field(xfs_ino_t, ino)
1299 __dynamic_array(char, name, args->namelen)
1300 __field(int, namelen)
1301 __field(xfs_dahash_t, hashval)
1302 __field(xfs_ino_t, inumber)
1303 __field(int, op_flags)
1304 ),
1305 TP_fast_assign(
1306 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1307 __entry->ino = args->dp->i_ino;
1308 if (args->namelen)
1309 memcpy(__get_str(name), args->name, args->namelen);
1310 __entry->namelen = args->namelen;
1311 __entry->hashval = args->hashval;
1312 __entry->inumber = args->inumber;
1313 __entry->op_flags = args->op_flags;
1314 ),
1315 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1316 "inumber 0x%llx op_flags %s",
1317 MAJOR(__entry->dev), MINOR(__entry->dev),
1318 __entry->ino,
1319 __entry->namelen,
1320 __entry->namelen ? __get_str(name) : NULL,
1321 __entry->namelen,
1322 __entry->hashval,
1323 __entry->inumber,
1324 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1325)
1326
1327#define DEFINE_DIR2_EVENT(name) \
1328DEFINE_EVENT(xfs_dir2_class, name, \
1250 TP_PROTO(struct xfs_da_args *args), \ 1329 TP_PROTO(struct xfs_da_args *args), \
1251 TP_ARGS(args), \ 1330 TP_ARGS(args))
1252 TP_STRUCT__entry( \ 1331DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1253 __field(dev_t, dev) \ 1332DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1254 __field(xfs_ino_t, ino) \ 1333DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1255 __dynamic_array(char, name, args->namelen) \ 1334DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1256 __field(int, namelen) \ 1335DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1257 __field(xfs_dahash_t, hashval) \ 1336DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1258 __field(xfs_ino_t, inumber) \ 1337DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1259 __field(int, op_flags) \ 1338DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1260 ), \ 1339DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1261 TP_fast_assign( \ 1340DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1262 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \ 1341DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1263 __entry->ino = args->dp->i_ino; \ 1342DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1264 if (args->namelen) \ 1343DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1265 memcpy(__get_str(name), args->name, args->namelen); \ 1344DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1266 __entry->namelen = args->namelen; \ 1345DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1267 __entry->hashval = args->hashval; \ 1346DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1268 __entry->inumber = args->inumber; \ 1347DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1269 __entry->op_flags = args->op_flags; \ 1348DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1270 ), \ 1349DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1271 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " \ 1350DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1272 "inumber 0x%llx op_flags %s", \ 1351DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1273 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1352DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1274 __entry->ino, \ 1353DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1275 __entry->namelen, \ 1354DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1276 __entry->namelen ? __get_str(name) : NULL, \ 1355DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1277 __entry->namelen, \ 1356
1278 __entry->hashval, \ 1357DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1279 __entry->inumber, \ 1358 TP_PROTO(struct xfs_da_args *args, int idx),
1280 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) \ 1359 TP_ARGS(args, idx),
1360 TP_STRUCT__entry(
1361 __field(dev_t, dev)
1362 __field(xfs_ino_t, ino)
1363 __field(int, op_flags)
1364 __field(int, idx)
1365 ),
1366 TP_fast_assign(
1367 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1368 __entry->ino = args->dp->i_ino;
1369 __entry->op_flags = args->op_flags;
1370 __entry->idx = idx;
1371 ),
1372 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1373 MAJOR(__entry->dev), MINOR(__entry->dev),
1374 __entry->ino,
1375 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1376 __entry->idx)
1281) 1377)
1282DEFINE_DIR2_TRACE(xfs_dir2_sf_addname);
1283DEFINE_DIR2_TRACE(xfs_dir2_sf_create);
1284DEFINE_DIR2_TRACE(xfs_dir2_sf_lookup);
1285DEFINE_DIR2_TRACE(xfs_dir2_sf_replace);
1286DEFINE_DIR2_TRACE(xfs_dir2_sf_removename);
1287DEFINE_DIR2_TRACE(xfs_dir2_sf_toino4);
1288DEFINE_DIR2_TRACE(xfs_dir2_sf_toino8);
1289DEFINE_DIR2_TRACE(xfs_dir2_sf_to_block);
1290DEFINE_DIR2_TRACE(xfs_dir2_block_addname);
1291DEFINE_DIR2_TRACE(xfs_dir2_block_lookup);
1292DEFINE_DIR2_TRACE(xfs_dir2_block_replace);
1293DEFINE_DIR2_TRACE(xfs_dir2_block_removename);
1294DEFINE_DIR2_TRACE(xfs_dir2_block_to_sf);
1295DEFINE_DIR2_TRACE(xfs_dir2_block_to_leaf);
1296DEFINE_DIR2_TRACE(xfs_dir2_leaf_addname);
1297DEFINE_DIR2_TRACE(xfs_dir2_leaf_lookup);
1298DEFINE_DIR2_TRACE(xfs_dir2_leaf_replace);
1299DEFINE_DIR2_TRACE(xfs_dir2_leaf_removename);
1300DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_block);
1301DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_node);
1302DEFINE_DIR2_TRACE(xfs_dir2_node_addname);
1303DEFINE_DIR2_TRACE(xfs_dir2_node_lookup);
1304DEFINE_DIR2_TRACE(xfs_dir2_node_replace);
1305DEFINE_DIR2_TRACE(xfs_dir2_node_removename);
1306DEFINE_DIR2_TRACE(xfs_dir2_node_to_leaf);
1307 1378
1308#define DEFINE_DIR2_SPACE_TRACE(tname) \ 1379#define DEFINE_DIR2_SPACE_EVENT(name) \
1309TRACE_EVENT(tname, \ 1380DEFINE_EVENT(xfs_dir2_space_class, name, \
1310 TP_PROTO(struct xfs_da_args *args, int idx), \ 1381 TP_PROTO(struct xfs_da_args *args, int idx), \
1311 TP_ARGS(args, idx), \ 1382 TP_ARGS(args, idx))
1312 TP_STRUCT__entry( \ 1383DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1313 __field(dev_t, dev) \ 1384DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1314 __field(xfs_ino_t, ino) \ 1385DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1315 __field(int, op_flags) \ 1386DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1316 __field(int, idx) \
1317 ), \
1318 TP_fast_assign( \
1319 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \
1320 __entry->ino = args->dp->i_ino; \
1321 __entry->op_flags = args->op_flags; \
1322 __entry->idx = idx; \
1323 ), \
1324 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", \
1325 MAJOR(__entry->dev), MINOR(__entry->dev), \
1326 __entry->ino, \
1327 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), \
1328 __entry->idx) \
1329)
1330DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_add);
1331DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_remove);
1332DEFINE_DIR2_SPACE_TRACE(xfs_dir2_grow_inode);
1333DEFINE_DIR2_SPACE_TRACE(xfs_dir2_shrink_inode);
1334 1387
1335TRACE_EVENT(xfs_dir2_leafn_moveents, 1388TRACE_EVENT(xfs_dir2_leafn_moveents,
1336 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), 1389 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..0b1878857fc3 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
30 30
31 31
32static int 32static int
33__xfs_xattr_get(struct inode *inode, const char *name, 33xfs_xattr_get(struct dentry *dentry, const char *name,
34 void *value, size_t size, int xflags) 34 void *value, size_t size, int xflags)
35{ 35{
36 struct xfs_inode *ip = XFS_I(inode); 36 struct xfs_inode *ip = XFS_I(dentry->d_inode);
37 int error, asize = size; 37 int error, asize = size;
38 38
39 if (strcmp(name, "") == 0) 39 if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
52} 52}
53 53
54static int 54static int
55__xfs_xattr_set(struct inode *inode, const char *name, const void *value, 55xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
56 size_t size, int flags, int xflags) 56 size_t size, int flags, int xflags)
57{ 57{
58 struct xfs_inode *ip = XFS_I(inode); 58 struct xfs_inode *ip = XFS_I(dentry->d_inode);
59 59
60 if (strcmp(name, "") == 0) 60 if (strcmp(name, "") == 0)
61 return -EINVAL; 61 return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
72} 72}
73 73
74static int
75xfs_xattr_user_get(struct inode *inode, const char *name,
76 void *value, size_t size)
77{
78 return __xfs_xattr_get(inode, name, value, size, 0);
79}
80
81static int
82xfs_xattr_user_set(struct inode *inode, const char *name,
83 const void *value, size_t size, int flags)
84{
85 return __xfs_xattr_set(inode, name, value, size, flags, 0);
86}
87
88static struct xattr_handler xfs_xattr_user_handler = { 74static struct xattr_handler xfs_xattr_user_handler = {
89 .prefix = XATTR_USER_PREFIX, 75 .prefix = XATTR_USER_PREFIX,
90 .get = xfs_xattr_user_get, 76 .flags = 0, /* no flags implies user namespace */
91 .set = xfs_xattr_user_set, 77 .get = xfs_xattr_get,
78 .set = xfs_xattr_set,
92}; 79};
93 80
94
95static int
96xfs_xattr_trusted_get(struct inode *inode, const char *name,
97 void *value, size_t size)
98{
99 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
100}
101
102static int
103xfs_xattr_trusted_set(struct inode *inode, const char *name,
104 const void *value, size_t size, int flags)
105{
106 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
107}
108
109static struct xattr_handler xfs_xattr_trusted_handler = { 81static struct xattr_handler xfs_xattr_trusted_handler = {
110 .prefix = XATTR_TRUSTED_PREFIX, 82 .prefix = XATTR_TRUSTED_PREFIX,
111 .get = xfs_xattr_trusted_get, 83 .flags = ATTR_ROOT,
112 .set = xfs_xattr_trusted_set, 84 .get = xfs_xattr_get,
85 .set = xfs_xattr_set,
113}; 86};
114 87
115
116static int
117xfs_xattr_secure_get(struct inode *inode, const char *name,
118 void *value, size_t size)
119{
120 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
121}
122
123static int
124xfs_xattr_secure_set(struct inode *inode, const char *name,
125 const void *value, size_t size, int flags)
126{
127 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
128}
129
130static struct xattr_handler xfs_xattr_security_handler = { 88static struct xattr_handler xfs_xattr_security_handler = {
131 .prefix = XATTR_SECURITY_PREFIX, 89 .prefix = XATTR_SECURITY_PREFIX,
132 .get = xfs_xattr_secure_get, 90 .flags = ATTR_SECURE,
133 .set = xfs_xattr_secure_set, 91 .get = xfs_xattr_get,
92 .set = xfs_xattr_set,
134}; 93};
135 94
136
137struct xattr_handler *xfs_xattr_handlers[] = { 95struct xattr_handler *xfs_xattr_handlers[] = {
138 &xfs_xattr_user_handler, 96 &xfs_xattr_user_handler,
139 &xfs_xattr_trusted_handler, 97 &xfs_xattr_trusted_handler,
140 &xfs_xattr_security_handler, 98 &xfs_xattr_security_handler,
141#ifdef CONFIG_XFS_POSIX_ACL 99#ifdef CONFIG_XFS_POSIX_ACL
142 &xfs_xattr_system_handler, 100 &xfs_xattr_acl_access_handler,
101 &xfs_xattr_acl_default_handler,
143#endif 102#endif
144 NULL 103 NULL
145}; 104};
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 71af76fe8a23..873e07e29074 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
895} 895}
896 896
897/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..00fd357c3e46 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
49extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
50extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
51 51
52extern struct xattr_handler xfs_xattr_system_handler; 52extern struct xattr_handler xfs_xattr_acl_access_handler;
53extern struct xattr_handler xfs_xattr_acl_default_handler;
53#else 54#else
54# define xfs_check_acl NULL 55# define xfs_check_acl NULL
55# define xfs_get_acl(inode, type) NULL 56# define xfs_get_acl(inode, type) NULL
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc6d9c4..275b1f4f9430 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2563 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2564 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2567 int cnt; 2567 int cnt;
2568 2568
2569 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2570 2570
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2572 cnt = mp->m_perag[agno].pagb_count;
2573 2572
2574 uend = bno + len - 1; 2573 uend = bno + len - 1;
2575 2574
2576 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2577 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2578 2585
2579 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2580 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2581 */ 2588 continue;
2582 if (bsy->busy_tp != NULL) {
2583 bend = bsy->busy_start + bsy->busy_length - 1;
2584 if ((bno > bend) || (uend < bsy->busy_start)) {
2585 cnt--;
2586 } else {
2587 break;
2588 }
2589 }
2590 }
2591 2589
2592 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt); 2590 /* (start1,length1) within (start2, length2) */
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn;
2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2593 2596
2594 /* 2597 /*
2595 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2596 * transaction that freed the block 2599 * transaction that freed the block
2597 */ 2600 */
2598 if (cnt) { 2601 if (lsn)
2599 lsn = bsy->busy_tp->t_commit_lsn;
2600 spin_unlock(&mp->m_perag[agno].pagb_lock);
2601 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2602 } else {
2603 spin_unlock(&mp->m_perag[agno].pagb_lock);
2604 }
2605} 2603}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 5549d495947f..cf07ca7c22e7 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block {
46#define BMBT_STARTBLOCK_BITLEN 52 46#define BMBT_STARTBLOCK_BITLEN 52
47#define BMBT_BLOCKCOUNT_BITLEN 21 47#define BMBT_BLOCKCOUNT_BITLEN 21
48 48
49 49typedef struct xfs_bmbt_rec {
50#define BMBT_USE_64 1
51
52typedef struct xfs_bmbt_rec_32
53{
54 __uint32_t l0, l1, l2, l3;
55} xfs_bmbt_rec_32_t;
56typedef struct xfs_bmbt_rec_64
57{
58 __be64 l0, l1; 50 __be64 l0, l1;
59} xfs_bmbt_rec_64_t; 51} xfs_bmbt_rec_t;
60 52
61typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 53typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
62typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; 54typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
63 55
64typedef struct xfs_bmbt_rec_host { 56typedef struct xfs_bmbt_rec_host {
65 __uint64_t l0, l1; 57 __uint64_t l0, l1;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b8..84ca1cf16a1e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -114,10 +114,82 @@ xfs_swapext(
114 return error; 114 return error;
115} 115}
116 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
117int 189int
118xfs_swap_extents( 190xfs_swap_extents(
119 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
120 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
121 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
122{ 194{
123 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -161,13 +233,6 @@ xfs_swap_extents(
161 goto out_unlock; 233 goto out_unlock;
162 } 234 }
163 235
164 /* Should never get a local format */
165 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
166 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
167 error = XFS_ERROR(EINVAL);
168 goto out_unlock;
169 }
170
171 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f5c904a10c11..155e798f30a1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,7 +73,6 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77 76
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
79 78
@@ -91,7 +90,7 @@ xfs_inode_alloc(
91 ip->i_new_size = 0; 90 ip->i_new_size = 0;
92 91
93 /* prevent anyone from using this yet */ 92 /* prevent anyone from using this yet */
94 VFS_I(ip)->i_state = I_NEW|I_LOCK; 93 VFS_I(ip)->i_state = I_NEW;
95 94
96 return ip; 95 return ip;
97} 96}
@@ -217,7 +216,7 @@ xfs_iget_cache_hit(
217 trace_xfs_iget_reclaim(ip); 216 trace_xfs_iget_reclaim(ip);
218 goto out_error; 217 goto out_error;
219 } 218 }
220 inode->i_state = I_LOCK|I_NEW; 219 inode->i_state = I_NEW;
221 } else { 220 } else {
222 /* If the VFS inode is being torn down, pause and try again. */ 221 /* If the VFS inode is being torn down, pause and try again. */
223 if (!igrab(inode)) { 222 if (!igrab(inode)) {
@@ -478,17 +477,21 @@ xfs_ireclaim(
478{ 477{
479 struct xfs_mount *mp = ip->i_mount; 478 struct xfs_mount *mp = ip->i_mount;
480 struct xfs_perag *pag; 479 struct xfs_perag *pag;
480 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
481 481
482 XFS_STATS_INC(xs_ig_reclaims); 482 XFS_STATS_INC(xs_ig_reclaims);
483 483
484 /* 484 /*
485 * Remove the inode from the per-AG radix tree. It doesn't matter 485 * Remove the inode from the per-AG radix tree.
486 * if it was never added to it because radix_tree_delete can deal 486 *
487 * with that case just fine. 487 * Because radix_tree_delete won't complain even if the item was never
488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on.
488 */ 490 */
489 pag = xfs_get_perag(mp, ip->i_ino); 491 pag = xfs_get_perag(mp, ip->i_ino);
490 write_lock(&pag->pag_ici_lock); 492 write_lock(&pag->pag_ici_lock);
491 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0);
492 write_unlock(&pag->pag_ici_lock); 495 write_unlock(&pag->pag_ici_lock);
493 xfs_put_perag(mp, pag); 496 xfs_put_perag(mp, pag);
494 497
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce278b3ae7fc..ef77fd88c8e3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2841,8 +2841,8 @@ xfs_iflush(
2841 mp = ip->i_mount; 2841 mp = ip->i_mount;
2842 2842
2843 /* 2843 /*
2844 * If the inode isn't dirty, then just release the inode 2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * flush lock and do nothing. 2845 * do nothing.
2846 */ 2846 */
2847 if (xfs_inode_clean(ip)) { 2847 if (xfs_inode_clean(ip)) {
2848 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
@@ -2868,6 +2868,19 @@ xfs_iflush(
2868 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2869 2869
2870 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2871 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2872 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2873 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 65bae4c9b8bf..cc8df1ac7783 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w)
127#ifdef __KERNEL__ 127#ifdef __KERNEL__
128 128
129struct xfs_buf; 129struct xfs_buf;
130struct xfs_bmbt_rec_64; 130struct xfs_bmbt_rec;
131struct xfs_inode; 131struct xfs_inode;
132struct xfs_mount; 132struct xfs_mount;
133 133
@@ -140,9 +140,9 @@ typedef struct xfs_inode_log_item {
140 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
141 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged 143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ 147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148 148
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4cb1792040e3..600b5b06aaeb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t *log,
1441 XFS_BUF_ZEROFLAGS(bp); 1441 XFS_BUF_ZEROFLAGS(bp);
1442 XFS_BUF_BUSY(bp); 1442 XFS_BUF_BUSY(bp);
1443 XFS_BUF_ASYNC(bp); 1443 XFS_BUF_ASYNC(bp);
1444 bp->b_flags |= XBF_LOG_BUFFER;
1444 /* 1445 /*
1445 * Do an ordered write for the log block. 1446 * Do an ordered write for the log block.
1446 * Its unnecessary to flush the first split block in the log wrap case. 1447 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t *log,
1478 XFS_BUF_ZEROFLAGS(bp); 1479 XFS_BUF_ZEROFLAGS(bp);
1479 XFS_BUF_BUSY(bp); 1480 XFS_BUF_BUSY(bp);
1480 XFS_BUF_ASYNC(bp); 1481 XFS_BUF_ASYNC(bp);
1482 bp->b_flags |= XBF_LOG_BUFFER;
1481 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1483 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1482 XFS_BUF_ORDERED(bp); 1484 XFS_BUF_ORDERED(bp);
1483 dptr = XFS_BUF_PTR(bp); 1485 dptr = XFS_BUF_PTR(bp);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 9e15a1185362..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1517,6 +1517,8 @@ xfs_rtfree_range(
1517 */ 1517 */
1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1519 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1520 /* 1522 /*
1521 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1522 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6558ffd8d140..6f268756bf36 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -70,7 +70,6 @@ xfs_setattr(
70 uint commit_flags=0; 70 uint commit_flags=0;
71 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
72 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
73 int timeflags = 0;
74 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
75 int need_iolock = 1; 74 int need_iolock = 1;
76 75
@@ -135,16 +134,13 @@ xfs_setattr(
135 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
136 need_iolock = 0; 135 need_iolock = 0;
137 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
138 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
139 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
140 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
141 commit_flags = 0; 140 0, 0, 0);
142 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
143 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
144 0, 0))) { 143 goto error_return;
145 lock_flags = 0;
146 goto error_return;
147 }
148 } 144 }
149 } else { 145 } else {
150 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -295,15 +291,23 @@ xfs_setattr(
295 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
296 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
297 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
298 */ 300 */
299 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
300 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
301 307
302 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
303 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
304 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
305 if (!(flags & XFS_ATTR_DMI))
306 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
307 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
308 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
309 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -374,9 +378,6 @@ xfs_setattr(
374 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
375 inode->i_gid = gid; 379 inode->i_gid = gid;
376 } 380 }
377
378 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
379 timeflags |= XFS_ICHGTIME_CHG;
380 } 381 }
381 382
382 /* 383 /*
@@ -393,51 +394,37 @@ xfs_setattr(
393 394
394 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
395 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
396
397 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
398 timeflags |= XFS_ICHGTIME_CHG;
399 } 397 }
400 398
401 /* 399 /*
402 * Change file access or modified times. 400 * Change file access or modified times.
403 */ 401 */
404 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
405 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
406 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
407 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
408 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
409 ip->i_update_core = 1;
410 }
411 if (mask & ATTR_MTIME) {
412 inode->i_mtime = iattr->ia_mtime;
413 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
414 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
415 timeflags &= ~XFS_ICHGTIME_MOD;
416 timeflags |= XFS_ICHGTIME_CHG;
417 }
418 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
419 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
420 } 407 }
421 408 if (mask & ATTR_CTIME) {
422 /*
423 * Change file inode change time only if ATTR_CTIME set
424 * AND we have been called by a DMI function.
425 */
426
427 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
428 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
429 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
430 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
431 ip->i_update_core = 1; 412 ip->i_update_core = 1;
432 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
433 } 419 }
434 420
435 /* 421 /*
436 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
437 * current time. Not done when called by a DMI function. 423 * has been changed.
438 */ 424 */
439 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
440 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
441 428
442 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
443 430
@@ -452,12 +439,10 @@ xfs_setattr(
452 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
453 */ 440 */
454 code = 0; 441 code = 0;
455 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
456 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
457 xfs_trans_set_sync(tp);
458 444
459 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
460 }
461 446
462 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
463 448