aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/aio.c40
-rw-r--r--fs/anon_inodes.c33
-rw-r--r--fs/autofs4/autofs_i.h38
-rw-r--r--fs/autofs4/expire.c8
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c616
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c35
-rw-r--r--fs/binfmt_elf_fdpic.c37
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/acl.c68
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent-tree.c72
-rw-r--r--fs/btrfs/file.c669
-rw-r--r--fs/btrfs/inode.c567
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c115
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c38
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
-rw-r--r--fs/cachefiles/bind.c11
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/compat.c2
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/direct-io.c165
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/main.c9
-rw-r--r--fs/exec.c40
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/acl.c79
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/super.c22
-rw-r--r--fs/ext2/xattr.c11
-rw-r--r--fs/ext2/xattr_security.c16
-rw-r--r--fs/ext2/xattr_trusted.c16
-rw-r--r--fs/ext2/xattr_user.c25
-rw-r--r--fs/ext3/acl.c74
-rw-r--r--fs/ext3/xattr.c31
-rw-r--r--fs/ext3/xattr_security.c20
-rw-r--r--fs/ext3/xattr_trusted.c18
-rw-r--r--fs/ext3/xattr_user.c25
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/acl.c74
-rw-r--r--fs/ext4/xattr.c31
-rw-r--r--fs/ext4/xattr_security.c20
-rw-r--r--fs/ext4/xattr_trusted.c20
-rw-r--r--fs/ext4/xattr_user.c25
-rw-r--r--fs/fat/fat.h3
-rw-r--r--fs/fat/fatent.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fat/misc.c57
-rw-r--r--fs/file_table.c50
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/generic_acl.c158
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/acl.c16
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/xattr.c69
-rw-r--r--fs/gfs2/xattr.h7
-rw-r--r--fs/hpfs/super.c17
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c26
-rw-r--r--fs/internal.h1
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jbd/Kconfig1
-rw-r--r--fs/jbd2/Kconfig1
-rw-r--r--fs/jffs2/acl.c65
-rw-r--r--fs/jffs2/gc.c3
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/security.c18
-rw-r--r--fs/jffs2/summary.c2
-rw-r--r--fs/jffs2/xattr.c6
-rw-r--r--fs/jffs2/xattr_trusted.c18
-rw-r--r--fs/jffs2/xattr_user.c18
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c468
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4proc.c203
-rw-r--r--fs/nfs/nfs4state.c60
-rw-r--r--fs/nfsctl.c2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/cache.h83
-rw-r--r--fs/nfsd/export.c65
-rw-r--r--fs/nfsd/lockd.c10
-rw-r--r--fs/nfsd/nfs2acl.c27
-rw-r--r--fs/nfsd/nfs3acl.c15
-rw-r--r--fs/nfsd/nfs3proc.c20
-rw-r--r--fs/nfsd/nfs3xdr.c15
-rw-r--r--fs/nfsd/nfs4acl.c12
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c17
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c84
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/nfscache.c14
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsd.h338
-rw-r--r--fs/nfsd/nfsfh.c102
-rw-r--r--fs/nfsd/nfsfh.h208
-rw-r--r--fs/nfsd/nfsproc.c22
-rw-r--r--fs/nfsd/nfssvc.c22
-rw-r--r--fs/nfsd/nfsxdr.c12
-rw-r--r--fs/nfsd/state.h408
-rw-r--r--fs/nfsd/stats.c11
-rw-r--r--fs/nfsd/vfs.c139
-rw-r--r--fs/nfsd/vfs.h101
-rw-r--r--fs/nfsd/xdr.h173
-rw-r--r--fs/nfsd/xdr3.h344
-rw-r--r--fs/nfsd/xdr4.h562
-rw-r--r--fs/nilfs2/Kconfig1
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/notify/inotify/inotify_user.c29
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ocfs2/acl.c87
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/aops.c34
-rw-r--r--fs/ocfs2/xattr.c72
-rw-r--r--fs/open.c4
-rw-r--r--fs/pipe.c45
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/generic.c21
-rw-r--r--fs/proc/inode.c31
-rw-r--r--fs/proc/internal.h10
-rw-r--r--fs/proc/page.c45
-rw-r--r--fs/qnx4/bitmap.c24
-rw-r--r--fs/qnx4/inode.c22
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/Kconfig1
-rw-r--r--fs/reiserfs/Makefile6
-rw-r--r--fs/reiserfs/inode.c18
-rw-r--r--fs/reiserfs/procfs.c65
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/reiserfs/xattr.c36
-rw-r--r--fs/reiserfs/xattr_acl.c69
-rw-r--r--fs/reiserfs/xattr_security.c21
-rw-r--r--fs/reiserfs/xattr_trusted.c21
-rw-r--r--fs/reiserfs/xattr_user.c21
-rw-r--r--fs/stack.c71
-rw-r--r--fs/sync.c59
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ufs/dir.c10
-rw-r--r--fs/ufs/namei.c8
-rw-r--r--fs/ufs/super.c52
-rw-r--r--fs/ufs/ufs.h4
-rw-r--r--fs/xattr.c28
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c57
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c71
-rw-r--r--fs/xfs/xfs_acl.h3
-rw-r--r--fs/xfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/xfs_iget.c16
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_log.c2
178 files changed, 5713 insertions, 3464 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad628..64d44efad7a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
6 6
7if BLOCK 7if BLOCK
8 8
9config FS_JOURNAL_INFO
10 bool
11 default n
12
13source "fs/ext2/Kconfig" 9source "fs/ext2/Kconfig"
14source "fs/ext3/Kconfig" 10source "fs/ext3/Kconfig"
15source "fs/ext4/Kconfig" 11source "fs/ext4/Kconfig"
diff --git a/fs/aio.c b/fs/aio.c
index c30dfc006108..1cf12b3dd83a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -711,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
711 */ 711 */
712 ret = retry(iocb); 712 ret = retry(iocb);
713 713
714 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { 714 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
715 BUG_ON(!list_empty(&iocb->ki_wait.task_list));
716 aio_complete(iocb, ret, 0); 715 aio_complete(iocb, ret, 0);
717 }
718out: 716out:
719 spin_lock_irq(&ctx->ctx_lock); 717 spin_lock_irq(&ctx->ctx_lock);
720 718
@@ -866,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
866 unsigned long flags; 864 unsigned long flags;
867 int run = 0; 865 int run = 0;
868 866
869 /* We're supposed to be the only path putting the iocb back on the run
870 * list. If we find that the iocb is *back* on a wait queue already
871 * than retry has happened before we could queue the iocb. This also
872 * means that the retry could have completed and freed our iocb, no
873 * good. */
874 BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
875
876 spin_lock_irqsave(&ctx->ctx_lock, flags); 867 spin_lock_irqsave(&ctx->ctx_lock, flags);
877 /* set this inside the lock so that we can't race with aio_run_iocb() 868 /* set this inside the lock so that we can't race with aio_run_iocb()
878 * testing it and putting the iocb on the run list under the lock */ 869 * testing it and putting the iocb on the run list under the lock */
@@ -886,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
886/* 877/*
887 * kick_iocb: 878 * kick_iocb:
888 * Called typically from a wait queue callback context 879 * Called typically from a wait queue callback context
889 * (aio_wake_function) to trigger a retry of the iocb. 880 * to trigger a retry of the iocb.
890 * The retry is usually executed by aio workqueue 881 * The retry is usually executed by aio workqueue
891 * threads (See aio_kick_handler). 882 * threads (See aio_kick_handler).
892 */ 883 */
@@ -1520,31 +1511,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
1520 return 0; 1511 return 0;
1521} 1512}
1522 1513
1523/*
1524 * aio_wake_function:
1525 * wait queue callback function for aio notification,
1526 * Simply triggers a retry of the operation via kick_iocb.
1527 *
1528 * This callback is specified in the wait queue entry in
1529 * a kiocb.
1530 *
1531 * Note:
1532 * This routine is executed with the wait queue lock held.
1533 * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests
1534 * the ioctx lock inside the wait queue lock. This is safe
1535 * because this callback isn't used for wait queues which
1536 * are nested inside ioctx lock (i.e. ctx->wait)
1537 */
1538static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1539 int sync, void *key)
1540{
1541 struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait);
1542
1543 list_del_init(&wait->task_list);
1544 kick_iocb(iocb);
1545 return 1;
1546}
1547
1548static void aio_batch_add(struct address_space *mapping, 1514static void aio_batch_add(struct address_space *mapping,
1549 struct hlist_head *batch_hash) 1515 struct hlist_head *batch_hash)
1550{ 1516{
@@ -1642,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1642 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; 1608 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
1643 req->ki_left = req->ki_nbytes = iocb->aio_nbytes; 1609 req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
1644 req->ki_opcode = iocb->aio_lio_opcode; 1610 req->ki_opcode = iocb->aio_lio_opcode;
1645 init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
1646 INIT_LIST_HEAD(&req->ki_wait.task_list);
1647 1611
1648 ret = aio_setup_iocb(req); 1612 ret = aio_setup_iocb(req);
1649 1613
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdbf..2c994591f4d7 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
35 mnt); 35 mnt);
36} 36}
37 37
38static int anon_inodefs_delete_dentry(struct dentry *dentry) 38/*
39 * anon_inodefs_dname() is called from d_path().
40 */
41static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
39{ 42{
40 /* 43 return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
41 * We faked vfs to believe the dentry was hashed when we created it. 44 dentry->d_name.name);
42 * Now we restore the flag so that dput() will work correctly.
43 */
44 dentry->d_flags |= DCACHE_UNHASHED;
45 return 1;
46} 45}
47 46
48static struct file_system_type anon_inode_fs_type = { 47static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
51 .kill_sb = kill_anon_super, 50 .kill_sb = kill_anon_super,
52}; 51};
53static const struct dentry_operations anon_inodefs_dentry_operations = { 52static const struct dentry_operations anon_inodefs_dentry_operations = {
54 .d_delete = anon_inodefs_delete_dentry, 53 .d_dname = anon_inodefs_dname,
55}; 54};
56 55
57/* 56/*
@@ -88,7 +87,7 @@ struct file *anon_inode_getfile(const char *name,
88 void *priv, int flags) 87 void *priv, int flags)
89{ 88{
90 struct qstr this; 89 struct qstr this;
91 struct dentry *dentry; 90 struct path path;
92 struct file *file; 91 struct file *file;
93 int error; 92 int error;
94 93
@@ -106,10 +105,11 @@ struct file *anon_inode_getfile(const char *name,
106 this.name = name; 105 this.name = name;
107 this.len = strlen(name); 106 this.len = strlen(name);
108 this.hash = 0; 107 this.hash = 0;
109 dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 108 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
110 if (!dentry) 109 if (!path.dentry)
111 goto err_module; 110 goto err_module;
112 111
112 path.mnt = mntget(anon_inode_mnt);
113 /* 113 /*
114 * We know the anon_inode inode count is always greater than zero, 114 * We know the anon_inode inode count is always greater than zero,
115 * so we can avoid doing an igrab() and we can use an open-coded 115 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,14 +117,11 @@ struct file *anon_inode_getfile(const char *name,
117 */ 117 */
118 atomic_inc(&anon_inode_inode->i_count); 118 atomic_inc(&anon_inode_inode->i_count);
119 119
120 dentry->d_op = &anon_inodefs_dentry_operations; 120 path.dentry->d_op = &anon_inodefs_dentry_operations;
121 /* Do not publish this dentry inside the global dentry hash table */ 121 d_instantiate(path.dentry, anon_inode_inode);
122 dentry->d_flags &= ~DCACHE_UNHASHED;
123 d_instantiate(dentry, anon_inode_inode);
124 122
125 error = -ENFILE; 123 error = -ENFILE;
126 file = alloc_file(anon_inode_mnt, dentry, 124 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
127 FMODE_READ | FMODE_WRITE, fops);
128 if (!file) 125 if (!file)
129 goto err_dput; 126 goto err_dput;
130 file->f_mapping = anon_inode_inode->i_mapping; 127 file->f_mapping = anon_inode_inode->i_mapping;
@@ -137,7 +134,7 @@ struct file *anon_inode_getfile(const char *name,
137 return file; 134 return file;
138 135
139err_dput: 136err_dput:
140 dput(dentry); 137 path_put(&path);
141err_module: 138err_module:
142 module_put(fops->owner); 139 module_put(fops->owner);
143 return ERR_PTR(error); 140 return ERR_PTR(error);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8f7cdde41733..0118d67221b2 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -60,6 +60,11 @@ do { \
60 current->pid, __func__, ##args); \ 60 current->pid, __func__, ##args); \
61} while (0) 61} while (0)
62 62
63struct rehash_entry {
64 struct task_struct *task;
65 struct list_head list;
66};
67
63/* Unified info structure. This is pointed to by both the dentry and 68/* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 69 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 70 structure. It holds a reference to the dentry, so dentries are never
@@ -75,6 +80,9 @@ struct autofs_info {
75 struct completion expire_complete; 80 struct completion expire_complete;
76 81
77 struct list_head active; 82 struct list_head active;
83 int active_count;
84 struct list_head rehash_list;
85
78 struct list_head expiring; 86 struct list_head expiring;
79 87
80 struct autofs_sb_info *sbi; 88 struct autofs_sb_info *sbi;
@@ -95,6 +103,8 @@ struct autofs_info {
95 103
96#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 104#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
97#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ 105#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
106#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
107#define AUTOFS_INF_REHASH (1<<3) /* dentry in transit to ->lookup() */
98 108
99struct autofs_wait_queue { 109struct autofs_wait_queue {
100 wait_queue_head_t queue; 110 wait_queue_head_t queue;
@@ -161,7 +171,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
161{ 171{
162 struct autofs_info *inf = autofs4_dentry_ino(dentry); 172 struct autofs_info *inf = autofs4_dentry_ino(dentry);
163 173
164 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 174 if (inf->flags & AUTOFS_INF_PENDING)
165 return 1; 175 return 1;
166 176
167 if (inf->flags & AUTOFS_INF_EXPIRING) 177 if (inf->flags & AUTOFS_INF_EXPIRING)
@@ -264,5 +274,31 @@ out:
264 return ret; 274 return ret;
265} 275}
266 276
277static inline void autofs4_add_expiring(struct dentry *dentry)
278{
279 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
280 struct autofs_info *ino = autofs4_dentry_ino(dentry);
281 if (ino) {
282 spin_lock(&sbi->lookup_lock);
283 if (list_empty(&ino->expiring))
284 list_add(&ino->expiring, &sbi->expiring_list);
285 spin_unlock(&sbi->lookup_lock);
286 }
287 return;
288}
289
290static inline void autofs4_del_expiring(struct dentry *dentry)
291{
292 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
293 struct autofs_info *ino = autofs4_dentry_ino(dentry);
294 if (ino) {
295 spin_lock(&sbi->lookup_lock);
296 if (!list_empty(&ino->expiring))
297 list_del_init(&ino->expiring);
298 spin_unlock(&sbi->lookup_lock);
299 }
300 return;
301}
302
267void autofs4_dentry_release(struct dentry *); 303void autofs4_dentry_release(struct dentry *);
268extern void autofs4_kill_sb(struct super_block *); 304extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3da18d453488..74bc9aa6df31 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -27,7 +27,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
27 return 0; 27 return 0;
28 28
29 /* No point expiring a pending mount */ 29 /* No point expiring a pending mount */
30 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 30 if (ino->flags & AUTOFS_INF_PENDING)
31 return 0; 31 return 0;
32 32
33 if (!do_now) { 33 if (!do_now) {
@@ -279,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
279 root->d_mounted--; 279 root->d_mounted--;
280 } 280 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 281 ino->flags |= AUTOFS_INF_EXPIRING;
282 autofs4_add_expiring(root);
282 init_completion(&ino->expire_complete); 283 init_completion(&ino->expire_complete);
283 spin_unlock(&sbi->fs_lock); 284 spin_unlock(&sbi->fs_lock);
284 return root; 285 return root;
@@ -406,6 +407,7 @@ found:
406 expired, (int)expired->d_name.len, expired->d_name.name); 407 expired, (int)expired->d_name.len, expired->d_name.name);
407 ino = autofs4_dentry_ino(expired); 408 ino = autofs4_dentry_ino(expired);
408 ino->flags |= AUTOFS_INF_EXPIRING; 409 ino->flags |= AUTOFS_INF_EXPIRING;
410 autofs4_add_expiring(expired);
409 init_completion(&ino->expire_complete); 411 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 412 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 413 spin_lock(&dcache_lock);
@@ -433,7 +435,7 @@ int autofs4_expire_wait(struct dentry *dentry)
433 435
434 DPRINTK("expire done status=%d", status); 436 DPRINTK("expire done status=%d", status);
435 437
436 if (d_unhashed(dentry)) 438 if (d_unhashed(dentry) && IS_DEADDIR(dentry->d_inode))
437 return -EAGAIN; 439 return -EAGAIN;
438 440
439 return status; 441 return status;
@@ -473,6 +475,7 @@ int autofs4_expire_run(struct super_block *sb,
473 spin_lock(&sbi->fs_lock); 475 spin_lock(&sbi->fs_lock);
474 ino = autofs4_dentry_ino(dentry); 476 ino = autofs4_dentry_ino(dentry);
475 ino->flags &= ~AUTOFS_INF_EXPIRING; 477 ino->flags &= ~AUTOFS_INF_EXPIRING;
478 autofs4_del_expiring(dentry);
476 complete_all(&ino->expire_complete); 479 complete_all(&ino->expire_complete);
477 spin_unlock(&sbi->fs_lock); 480 spin_unlock(&sbi->fs_lock);
478 481
@@ -503,6 +506,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 506 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 507 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 508 ino->flags &= ~AUTOFS_INF_EXPIRING;
509 autofs4_del_expiring(dentry);
506 complete_all(&ino->expire_complete); 510 complete_all(&ino->expire_complete);
507 spin_unlock(&sbi->fs_lock); 511 spin_unlock(&sbi->fs_lock);
508 dput(dentry); 512 dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 69c8142da838..d0a3de247458 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -49,6 +49,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
49 ino->dentry = NULL; 49 ino->dentry = NULL;
50 ino->size = 0; 50 ino->size = 0;
51 INIT_LIST_HEAD(&ino->active); 51 INIT_LIST_HEAD(&ino->active);
52 INIT_LIST_HEAD(&ino->rehash_list);
53 ino->active_count = 0;
52 INIT_LIST_HEAD(&ino->expiring); 54 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0); 55 atomic_set(&ino->count, 0);
54 } 56 }
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index b96a3c57359d..30cc9ddf4b70 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -72,6 +72,139 @@ const struct inode_operations autofs4_dir_inode_operations = {
72 .rmdir = autofs4_dir_rmdir, 72 .rmdir = autofs4_dir_rmdir,
73}; 73};
74 74
75static void autofs4_add_active(struct dentry *dentry)
76{
77 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
78 struct autofs_info *ino = autofs4_dentry_ino(dentry);
79 if (ino) {
80 spin_lock(&sbi->lookup_lock);
81 if (!ino->active_count) {
82 if (list_empty(&ino->active))
83 list_add(&ino->active, &sbi->active_list);
84 }
85 ino->active_count++;
86 spin_unlock(&sbi->lookup_lock);
87 }
88 return;
89}
90
91static void autofs4_del_active(struct dentry *dentry)
92{
93 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
94 struct autofs_info *ino = autofs4_dentry_ino(dentry);
95 if (ino) {
96 spin_lock(&sbi->lookup_lock);
97 ino->active_count--;
98 if (!ino->active_count) {
99 if (!list_empty(&ino->active))
100 list_del_init(&ino->active);
101 }
102 spin_unlock(&sbi->lookup_lock);
103 }
104 return;
105}
106
107static void autofs4_add_rehash_entry(struct autofs_info *ino,
108 struct rehash_entry *entry)
109{
110 entry->task = current;
111 INIT_LIST_HEAD(&entry->list);
112 list_add(&entry->list, &ino->rehash_list);
113 return;
114}
115
116static void autofs4_remove_rehash_entry(struct autofs_info *ino)
117{
118 struct list_head *head = &ino->rehash_list;
119 struct rehash_entry *entry;
120 list_for_each_entry(entry, head, list) {
121 if (entry->task == current) {
122 list_del(&entry->list);
123 kfree(entry);
124 break;
125 }
126 }
127 return;
128}
129
130static void autofs4_remove_rehash_entrys(struct autofs_info *ino)
131{
132 struct autofs_sb_info *sbi = ino->sbi;
133 struct rehash_entry *entry, *next;
134 struct list_head *head;
135
136 spin_lock(&sbi->fs_lock);
137 spin_lock(&sbi->lookup_lock);
138 if (!(ino->flags & AUTOFS_INF_REHASH)) {
139 spin_unlock(&sbi->lookup_lock);
140 spin_unlock(&sbi->fs_lock);
141 return;
142 }
143 ino->flags &= ~AUTOFS_INF_REHASH;
144 head = &ino->rehash_list;
145 list_for_each_entry_safe(entry, next, head, list) {
146 list_del(&entry->list);
147 kfree(entry);
148 }
149 spin_unlock(&sbi->lookup_lock);
150 spin_unlock(&sbi->fs_lock);
151 dput(ino->dentry);
152
153 return;
154}
155
156static void autofs4_revalidate_drop(struct dentry *dentry,
157 struct rehash_entry *entry)
158{
159 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
160 struct autofs_info *ino = autofs4_dentry_ino(dentry);
161 /*
162 * Add to the active list so we can pick this up in
163 * ->lookup(). Also add an entry to a rehash list so
164 * we know when there are no dentrys in flight so we
165 * know when we can rehash the dentry.
166 */
167 spin_lock(&sbi->lookup_lock);
168 if (list_empty(&ino->active))
169 list_add(&ino->active, &sbi->active_list);
170 autofs4_add_rehash_entry(ino, entry);
171 spin_unlock(&sbi->lookup_lock);
172 if (!(ino->flags & AUTOFS_INF_REHASH)) {
173 ino->flags |= AUTOFS_INF_REHASH;
174 dget(dentry);
175 spin_lock(&dentry->d_lock);
176 __d_drop(dentry);
177 spin_unlock(&dentry->d_lock);
178 }
179 return;
180}
181
182static void autofs4_revalidate_rehash(struct dentry *dentry)
183{
184 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
185 struct autofs_info *ino = autofs4_dentry_ino(dentry);
186 if (ino->flags & AUTOFS_INF_REHASH) {
187 spin_lock(&sbi->lookup_lock);
188 autofs4_remove_rehash_entry(ino);
189 if (list_empty(&ino->rehash_list)) {
190 spin_unlock(&sbi->lookup_lock);
191 ino->flags &= ~AUTOFS_INF_REHASH;
192 d_rehash(dentry);
193 dput(ino->dentry);
194 } else
195 spin_unlock(&sbi->lookup_lock);
196 }
197 return;
198}
199
200static unsigned int autofs4_need_mount(unsigned int flags)
201{
202 unsigned int res = 0;
203 if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS))
204 res = 1;
205 return res;
206}
207
75static int autofs4_dir_open(struct inode *inode, struct file *file) 208static int autofs4_dir_open(struct inode *inode, struct file *file)
76{ 209{
77 struct dentry *dentry = file->f_path.dentry; 210 struct dentry *dentry = file->f_path.dentry;
@@ -93,7 +226,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
93 * it. 226 * it.
94 */ 227 */
95 spin_lock(&dcache_lock); 228 spin_lock(&dcache_lock);
96 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 229 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
97 spin_unlock(&dcache_lock); 230 spin_unlock(&dcache_lock);
98 return -ENOENT; 231 return -ENOENT;
99 } 232 }
@@ -103,7 +236,7 @@ out:
103 return dcache_dir_open(inode, file); 236 return dcache_dir_open(inode, file);
104} 237}
105 238
106static int try_to_fill_dentry(struct dentry *dentry, int flags) 239static int try_to_fill_dentry(struct dentry *dentry)
107{ 240{
108 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 241 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
109 struct autofs_info *ino = autofs4_dentry_ino(dentry); 242 struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -116,55 +249,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
116 * Wait for a pending mount, triggering one if there 249 * Wait for a pending mount, triggering one if there
117 * isn't one already 250 * isn't one already
118 */ 251 */
119 if (dentry->d_inode == NULL) { 252 DPRINTK("waiting for mount name=%.*s",
120 DPRINTK("waiting for mount name=%.*s", 253 dentry->d_name.len, dentry->d_name.name);
121 dentry->d_name.len, dentry->d_name.name);
122
123 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
124
125 DPRINTK("mount done status=%d", status);
126
127 /* Turn this into a real negative dentry? */
128 if (status == -ENOENT) {
129 spin_lock(&dentry->d_lock);
130 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
131 spin_unlock(&dentry->d_lock);
132 return status;
133 } else if (status) {
134 /* Return a negative dentry, but leave it "pending" */
135 return status;
136 }
137 /* Trigger mount for path component or follow link */
138 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
139 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
140 current->link_count) {
141 DPRINTK("waiting for mount name=%.*s",
142 dentry->d_name.len, dentry->d_name.name);
143
144 spin_lock(&dentry->d_lock);
145 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
146 spin_unlock(&dentry->d_lock);
147 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
148 254
149 DPRINTK("mount done status=%d", status); 255 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
150 256
151 if (status) { 257 DPRINTK("mount done status=%d", status);
152 spin_lock(&dentry->d_lock);
153 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
154 spin_unlock(&dentry->d_lock);
155 return status;
156 }
157 }
158
159 /* Initialize expiry counter after successful mount */
160 if (ino)
161 ino->last_used = jiffies;
162 258
163 spin_lock(&dentry->d_lock); 259 /* Update expiry counter */
164 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 260 ino->last_used = jiffies;
165 spin_unlock(&dentry->d_lock);
166 261
167 return 0; 262 return status;
168} 263}
169 264
170/* For autofs direct mounts the follow link triggers the mount */ 265/* For autofs direct mounts the follow link triggers the mount */
@@ -202,27 +297,39 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
202 autofs4_expire_wait(dentry); 297 autofs4_expire_wait(dentry);
203 298
204 /* We trigger a mount for almost all flags */ 299 /* We trigger a mount for almost all flags */
205 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS); 300 lookup_type = autofs4_need_mount(nd->flags);
206 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING)) 301 spin_lock(&sbi->fs_lock);
302 spin_lock(&dcache_lock);
303 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
304 spin_unlock(&dcache_lock);
305 spin_unlock(&sbi->fs_lock);
207 goto follow; 306 goto follow;
307 }
208 308
209 /* 309 /*
210 * If the dentry contains directories then it is an autofs 310 * If the dentry contains directories then it is an autofs
211 * multi-mount with no root mount offset. So don't try to 311 * multi-mount with no root mount offset. So don't try to
212 * mount it again. 312 * mount it again.
213 */ 313 */
214 spin_lock(&dcache_lock); 314 if (ino->flags & AUTOFS_INF_PENDING ||
215 if (dentry->d_flags & DCACHE_AUTOFS_PENDING || 315 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
216 (!d_mountpoint(dentry) && __simple_empty(dentry))) { 316 ino->flags |= AUTOFS_INF_PENDING;
217 spin_unlock(&dcache_lock); 317 spin_unlock(&dcache_lock);
318 spin_unlock(&sbi->fs_lock);
319
320 status = try_to_fill_dentry(dentry);
321
322 spin_lock(&sbi->fs_lock);
323 ino->flags &= ~AUTOFS_INF_PENDING;
324 spin_unlock(&sbi->fs_lock);
218 325
219 status = try_to_fill_dentry(dentry, 0);
220 if (status) 326 if (status)
221 goto out_error; 327 goto out_error;
222 328
223 goto follow; 329 goto follow;
224 } 330 }
225 spin_unlock(&dcache_lock); 331 spin_unlock(&dcache_lock);
332 spin_unlock(&sbi->fs_lock);
226follow: 333follow:
227 /* 334 /*
228 * If there is no root mount it must be an autofs 335 * If there is no root mount it must be an autofs
@@ -254,18 +361,47 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
254{ 361{
255 struct inode *dir = dentry->d_parent->d_inode; 362 struct inode *dir = dentry->d_parent->d_inode;
256 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 363 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
257 int oz_mode = autofs4_oz_mode(sbi); 364 struct autofs_info *ino = autofs4_dentry_ino(dentry);
365 struct rehash_entry *entry;
258 int flags = nd ? nd->flags : 0; 366 int flags = nd ? nd->flags : 0;
259 int status = 1; 367 unsigned int mutex_aquired;
368
369 DPRINTK("name = %.*s oz_mode = %d",
370 dentry->d_name.len, dentry->d_name.name, oz_mode);
371
372 /* Daemon never causes a mount to trigger */
373 if (autofs4_oz_mode(sbi))
374 return 1;
375
376 entry = kmalloc(sizeof(struct rehash_entry), GFP_KERNEL);
377 if (!entry)
378 return -ENOMEM;
379
380 mutex_aquired = mutex_trylock(&dir->i_mutex);
260 381
261 /* Pending dentry */
262 spin_lock(&sbi->fs_lock); 382 spin_lock(&sbi->fs_lock);
383 spin_lock(&dcache_lock);
384 /* Pending dentry */
263 if (autofs4_ispending(dentry)) { 385 if (autofs4_ispending(dentry)) {
264 /* The daemon never causes a mount to trigger */ 386 int status;
265 spin_unlock(&sbi->fs_lock);
266 387
267 if (oz_mode) 388 /*
268 return 1; 389 * We can only unhash and send this to ->lookup() if
390 * the directory mutex is held over d_revalidate() and
391 * ->lookup(). This prevents the VFS from incorrectly
392 * seeing the dentry as non-existent.
393 */
394 ino->flags |= AUTOFS_INF_PENDING;
395 if (!mutex_aquired) {
396 autofs4_revalidate_drop(dentry, entry);
397 spin_unlock(&dcache_lock);
398 spin_unlock(&sbi->fs_lock);
399 return 0;
400 }
401 spin_unlock(&dcache_lock);
402 spin_unlock(&sbi->fs_lock);
403 mutex_unlock(&dir->i_mutex);
404 kfree(entry);
269 405
270 /* 406 /*
271 * If the directory has gone away due to an expire 407 * If the directory has gone away due to an expire
@@ -279,46 +415,82 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
279 * A zero status is success otherwise we have a 415 * A zero status is success otherwise we have a
280 * negative error code. 416 * negative error code.
281 */ 417 */
282 status = try_to_fill_dentry(dentry, flags); 418 status = try_to_fill_dentry(dentry);
419
420 spin_lock(&sbi->fs_lock);
421 ino->flags &= ~AUTOFS_INF_PENDING;
422 spin_unlock(&sbi->fs_lock);
423
283 if (status == 0) 424 if (status == 0)
284 return 1; 425 return 1;
285 426
286 return status; 427 return status;
287 } 428 }
288 spin_unlock(&sbi->fs_lock);
289
290 /* Negative dentry.. invalidate if "old" */
291 if (dentry->d_inode == NULL)
292 return 0;
293 429
294 /* Check for a non-mountpoint directory with no contents */ 430 /* Check for a non-mountpoint directory with no contents */
295 spin_lock(&dcache_lock);
296 if (S_ISDIR(dentry->d_inode->i_mode) && 431 if (S_ISDIR(dentry->d_inode->i_mode) &&
297 !d_mountpoint(dentry) && 432 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
298 __simple_empty(dentry)) {
299 DPRINTK("dentry=%p %.*s, emptydir", 433 DPRINTK("dentry=%p %.*s, emptydir",
300 dentry, dentry->d_name.len, dentry->d_name.name); 434 dentry, dentry->d_name.len, dentry->d_name.name);
301 spin_unlock(&dcache_lock);
302 435
303 /* The daemon never causes a mount to trigger */ 436 if (autofs4_need_mount(flags) || current->link_count) {
304 if (oz_mode) 437 int status;
305 return 1;
306 438
307 /* 439 /*
308 * A zero status is success otherwise we have a 440 * We can only unhash and send this to ->lookup() if
309 * negative error code. 441 * the directory mutex is held over d_revalidate() and
310 */ 442 * ->lookup(). This prevents the VFS from incorrectly
311 status = try_to_fill_dentry(dentry, flags); 443 * seeing the dentry as non-existent.
312 if (status == 0) 444 */
313 return 1; 445 ino->flags |= AUTOFS_INF_PENDING;
446 if (!mutex_aquired) {
447 autofs4_revalidate_drop(dentry, entry);
448 spin_unlock(&dcache_lock);
449 spin_unlock(&sbi->fs_lock);
450 return 0;
451 }
452 spin_unlock(&dcache_lock);
453 spin_unlock(&sbi->fs_lock);
454 mutex_unlock(&dir->i_mutex);
455 kfree(entry);
314 456
315 return status; 457 /*
458 * A zero status is success otherwise we have a
459 * negative error code.
460 */
461 status = try_to_fill_dentry(dentry);
462
463 spin_lock(&sbi->fs_lock);
464 ino->flags &= ~AUTOFS_INF_PENDING;
465 spin_unlock(&sbi->fs_lock);
466
467 if (status == 0)
468 return 1;
469
470 return status;
471 }
316 } 472 }
317 spin_unlock(&dcache_lock); 473 spin_unlock(&dcache_lock);
474 spin_unlock(&sbi->fs_lock);
475
476 if (mutex_aquired)
477 mutex_unlock(&dir->i_mutex);
478
479 kfree(entry);
318 480
319 return 1; 481 return 1;
320} 482}
321 483
484static void autofs4_free_rehash_entrys(struct autofs_info *inf)
485{
486 struct list_head *head = &inf->rehash_list;
487 struct rehash_entry *entry, *next;
488 list_for_each_entry_safe(entry, next, head, list) {
489 list_del(&entry->list);
490 kfree(entry);
491 }
492}
493
322void autofs4_dentry_release(struct dentry *de) 494void autofs4_dentry_release(struct dentry *de)
323{ 495{
324 struct autofs_info *inf; 496 struct autofs_info *inf;
@@ -337,6 +509,8 @@ void autofs4_dentry_release(struct dentry *de)
337 list_del(&inf->active); 509 list_del(&inf->active);
338 if (!list_empty(&inf->expiring)) 510 if (!list_empty(&inf->expiring))
339 list_del(&inf->expiring); 511 list_del(&inf->expiring);
512 if (!list_empty(&inf->rehash_list))
513 autofs4_free_rehash_entrys(inf);
340 spin_unlock(&sbi->lookup_lock); 514 spin_unlock(&sbi->lookup_lock);
341 } 515 }
342 516
@@ -359,35 +533,52 @@ static const struct dentry_operations autofs4_dentry_operations = {
359 .d_release = autofs4_dentry_release, 533 .d_release = autofs4_dentry_release,
360}; 534};
361 535
362static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 536static struct dentry *autofs4_lookup_active(struct dentry *dentry)
363{ 537{
538 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
539 struct dentry *parent = dentry->d_parent;
540 struct qstr *name = &dentry->d_name;
364 unsigned int len = name->len; 541 unsigned int len = name->len;
365 unsigned int hash = name->hash; 542 unsigned int hash = name->hash;
366 const unsigned char *str = name->name; 543 const unsigned char *str = name->name;
367 struct list_head *p, *head; 544 struct list_head *p, *head;
368 545
546restart:
369 spin_lock(&dcache_lock); 547 spin_lock(&dcache_lock);
370 spin_lock(&sbi->lookup_lock); 548 spin_lock(&sbi->lookup_lock);
371 head = &sbi->active_list; 549 head = &sbi->active_list;
372 list_for_each(p, head) { 550 list_for_each(p, head) {
373 struct autofs_info *ino; 551 struct autofs_info *ino;
374 struct dentry *dentry; 552 struct dentry *active;
375 struct qstr *qstr; 553 struct qstr *qstr;
376 554
377 ino = list_entry(p, struct autofs_info, active); 555 ino = list_entry(p, struct autofs_info, active);
378 dentry = ino->dentry; 556 active = ino->dentry;
379 557
380 spin_lock(&dentry->d_lock); 558 spin_lock(&active->d_lock);
381 559
382 /* Already gone? */ 560 /* Already gone? */
383 if (atomic_read(&dentry->d_count) == 0) 561 if (atomic_read(&active->d_count) == 0)
384 goto next; 562 goto next;
385 563
386 qstr = &dentry->d_name; 564 if (active->d_inode && IS_DEADDIR(active->d_inode)) {
565 if (!list_empty(&ino->rehash_list)) {
566 dget(active);
567 spin_unlock(&active->d_lock);
568 spin_unlock(&sbi->lookup_lock);
569 spin_unlock(&dcache_lock);
570 autofs4_remove_rehash_entrys(ino);
571 dput(active);
572 goto restart;
573 }
574 goto next;
575 }
576
577 qstr = &active->d_name;
387 578
388 if (dentry->d_name.hash != hash) 579 if (active->d_name.hash != hash)
389 goto next; 580 goto next;
390 if (dentry->d_parent != parent) 581 if (active->d_parent != parent)
391 goto next; 582 goto next;
392 583
393 if (qstr->len != len) 584 if (qstr->len != len)
@@ -395,15 +586,13 @@ static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct d
395 if (memcmp(qstr->name, str, len)) 586 if (memcmp(qstr->name, str, len))
396 goto next; 587 goto next;
397 588
398 if (d_unhashed(dentry)) { 589 dget(active);
399 dget(dentry); 590 spin_unlock(&active->d_lock);
400 spin_unlock(&dentry->d_lock); 591 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&sbi->lookup_lock); 592 spin_unlock(&dcache_lock);
402 spin_unlock(&dcache_lock); 593 return active;
403 return dentry;
404 }
405next: 594next:
406 spin_unlock(&dentry->d_lock); 595 spin_unlock(&active->d_lock);
407 } 596 }
408 spin_unlock(&sbi->lookup_lock); 597 spin_unlock(&sbi->lookup_lock);
409 spin_unlock(&dcache_lock); 598 spin_unlock(&dcache_lock);
@@ -411,8 +600,11 @@ next:
411 return NULL; 600 return NULL;
412} 601}
413 602
414static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 603static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
415{ 604{
605 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
606 struct dentry *parent = dentry->d_parent;
607 struct qstr *name = &dentry->d_name;
416 unsigned int len = name->len; 608 unsigned int len = name->len;
417 unsigned int hash = name->hash; 609 unsigned int hash = name->hash;
418 const unsigned char *str = name->name; 610 const unsigned char *str = name->name;
@@ -423,23 +615,23 @@ static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct
423 head = &sbi->expiring_list; 615 head = &sbi->expiring_list;
424 list_for_each(p, head) { 616 list_for_each(p, head) {
425 struct autofs_info *ino; 617 struct autofs_info *ino;
426 struct dentry *dentry; 618 struct dentry *expiring;
427 struct qstr *qstr; 619 struct qstr *qstr;
428 620
429 ino = list_entry(p, struct autofs_info, expiring); 621 ino = list_entry(p, struct autofs_info, expiring);
430 dentry = ino->dentry; 622 expiring = ino->dentry;
431 623
432 spin_lock(&dentry->d_lock); 624 spin_lock(&expiring->d_lock);
433 625
434 /* Bad luck, we've already been dentry_iput */ 626 /* Bad luck, we've already been dentry_iput */
435 if (!dentry->d_inode) 627 if (!expiring->d_inode)
436 goto next; 628 goto next;
437 629
438 qstr = &dentry->d_name; 630 qstr = &expiring->d_name;
439 631
440 if (dentry->d_name.hash != hash) 632 if (expiring->d_name.hash != hash)
441 goto next; 633 goto next;
442 if (dentry->d_parent != parent) 634 if (expiring->d_parent != parent)
443 goto next; 635 goto next;
444 636
445 if (qstr->len != len) 637 if (qstr->len != len)
@@ -447,15 +639,13 @@ static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct
447 if (memcmp(qstr->name, str, len)) 639 if (memcmp(qstr->name, str, len))
448 goto next; 640 goto next;
449 641
450 if (d_unhashed(dentry)) { 642 dget(expiring);
451 dget(dentry); 643 spin_unlock(&expiring->d_lock);
452 spin_unlock(&dentry->d_lock); 644 spin_unlock(&sbi->lookup_lock);
453 spin_unlock(&sbi->lookup_lock); 645 spin_unlock(&dcache_lock);
454 spin_unlock(&dcache_lock); 646 return expiring;
455 return dentry;
456 }
457next: 647next:
458 spin_unlock(&dentry->d_lock); 648 spin_unlock(&expiring->d_lock);
459 } 649 }
460 spin_unlock(&sbi->lookup_lock); 650 spin_unlock(&sbi->lookup_lock);
461 spin_unlock(&dcache_lock); 651 spin_unlock(&dcache_lock);
@@ -463,13 +653,56 @@ next:
463 return NULL; 653 return NULL;
464} 654}
465 655
656static struct autofs_info *init_new_dentry(struct autofs_sb_info *sbi,
657 struct dentry *dentry, int oz_mode)
658{
659 struct autofs_info *ino;
660
661 /*
662 * Mark the dentry incomplete but don't hash it. We do this
663 * to serialize our inode creation operations (symlink and
664 * mkdir) which prevents deadlock during the callback to
665 * the daemon. Subsequent user space lookups for the same
666 * dentry are placed on the wait queue while the daemon
667 * itself is allowed passage unresticted so the create
668 * operation itself can then hash the dentry. Finally,
669 * we check for the hashed dentry and return the newly
670 * hashed dentry.
671 */
672 dentry->d_op = &autofs4_root_dentry_operations;
673
674 /*
675 * And we need to ensure that the same dentry is used for
676 * all following lookup calls until it is hashed so that
677 * the dentry flags are persistent throughout the request.
678 */
679 ino = autofs4_init_ino(NULL, sbi, 0555);
680 if (!ino)
681 return ERR_PTR(-ENOMEM);
682
683 dentry->d_fsdata = ino;
684 ino->dentry = dentry;
685
686 /*
687 * Only set the mount pending flag for new dentrys not created
688 * by the daemon.
689 */
690 if (!oz_mode)
691 ino->flags |= AUTOFS_INF_PENDING;
692
693 d_instantiate(dentry, NULL);
694
695 return ino;
696}
697
466/* Lookups in the root directory */ 698/* Lookups in the root directory */
467static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 699static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
468{ 700{
469 struct autofs_sb_info *sbi; 701 struct autofs_sb_info *sbi;
470 struct autofs_info *ino; 702 struct autofs_info *ino;
471 struct dentry *expiring, *unhashed; 703 struct dentry *expiring, *active;
472 int oz_mode; 704 int oz_mode;
705 int status = 0;
473 706
474 DPRINTK("name = %.*s", 707 DPRINTK("name = %.*s",
475 dentry->d_name.len, dentry->d_name.name); 708 dentry->d_name.len, dentry->d_name.name);
@@ -484,123 +717,100 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
484 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 717 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
485 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 718 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
486 719
487 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); 720 spin_lock(&sbi->fs_lock);
488 if (unhashed) 721 active = autofs4_lookup_active(dentry);
489 dentry = unhashed; 722 if (active) {
490 else { 723 dentry = active;
491 /* 724 ino = autofs4_dentry_ino(dentry);
492 * Mark the dentry incomplete but don't hash it. We do this 725 /* If this came from revalidate, rehash it */
493 * to serialize our inode creation operations (symlink and 726 autofs4_revalidate_rehash(dentry);
494 * mkdir) which prevents deadlock during the callback to 727 spin_unlock(&sbi->fs_lock);
495 * the daemon. Subsequent user space lookups for the same 728 } else {
496 * dentry are placed on the wait queue while the daemon 729 spin_unlock(&sbi->fs_lock);
497 * itself is allowed passage unresticted so the create 730 ino = init_new_dentry(sbi, dentry, oz_mode);
498 * operation itself can then hash the dentry. Finally, 731 if (IS_ERR(ino))
499 * we check for the hashed dentry and return the newly 732 return (struct dentry *) ino;
500 * hashed dentry.
501 */
502 dentry->d_op = &autofs4_root_dentry_operations;
503
504 /*
505 * And we need to ensure that the same dentry is used for
506 * all following lookup calls until it is hashed so that
507 * the dentry flags are persistent throughout the request.
508 */
509 ino = autofs4_init_ino(NULL, sbi, 0555);
510 if (!ino)
511 return ERR_PTR(-ENOMEM);
512
513 dentry->d_fsdata = ino;
514 ino->dentry = dentry;
515
516 spin_lock(&sbi->lookup_lock);
517 list_add(&ino->active, &sbi->active_list);
518 spin_unlock(&sbi->lookup_lock);
519
520 d_instantiate(dentry, NULL);
521 } 733 }
522 734
735 autofs4_add_active(dentry);
736
523 if (!oz_mode) { 737 if (!oz_mode) {
738 expiring = autofs4_lookup_expiring(dentry);
524 mutex_unlock(&dir->i_mutex); 739 mutex_unlock(&dir->i_mutex);
525 expiring = autofs4_lookup_expiring(sbi,
526 dentry->d_parent,
527 &dentry->d_name);
528 if (expiring) { 740 if (expiring) {
529 /* 741 /*
530 * If we are racing with expire the request might not 742 * If we are racing with expire the request might not
531 * be quite complete but the directory has been removed 743 * be quite complete but the directory has been removed
532 * so it must have been successful, so just wait for it. 744 * so it must have been successful, so just wait for it.
533 */ 745 */
534 ino = autofs4_dentry_ino(expiring);
535 autofs4_expire_wait(expiring); 746 autofs4_expire_wait(expiring);
536 spin_lock(&sbi->lookup_lock);
537 if (!list_empty(&ino->expiring))
538 list_del_init(&ino->expiring);
539 spin_unlock(&sbi->lookup_lock);
540 dput(expiring); 747 dput(expiring);
541 } 748 }
542 749 status = try_to_fill_dentry(dentry);
543 spin_lock(&dentry->d_lock);
544 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
545 spin_unlock(&dentry->d_lock);
546 if (dentry->d_op && dentry->d_op->d_revalidate)
547 (dentry->d_op->d_revalidate)(dentry, nd);
548 mutex_lock(&dir->i_mutex); 750 mutex_lock(&dir->i_mutex);
751 spin_lock(&sbi->fs_lock);
752 ino->flags &= ~AUTOFS_INF_PENDING;
753 spin_unlock(&sbi->fs_lock);
549 } 754 }
550 755
756 autofs4_del_active(dentry);
757
551 /* 758 /*
552 * If we are still pending, check if we had to handle 759 * If we had a mount fail, check if we had to handle
553 * a signal. If so we can force a restart.. 760 * a signal. If so we can force a restart..
554 */ 761 */
555 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { 762 if (status) {
556 /* See if we were interrupted */ 763 /* See if we were interrupted */
557 if (signal_pending(current)) { 764 if (signal_pending(current)) {
558 sigset_t *sigset = &current->pending.signal; 765 sigset_t *sigset = &current->pending.signal;
559 if (sigismember (sigset, SIGKILL) || 766 if (sigismember (sigset, SIGKILL) ||
560 sigismember (sigset, SIGQUIT) || 767 sigismember (sigset, SIGQUIT) ||
561 sigismember (sigset, SIGINT)) { 768 sigismember (sigset, SIGINT)) {
562 if (unhashed) 769 if (active)
563 dput(unhashed); 770 dput(active);
564 return ERR_PTR(-ERESTARTNOINTR); 771 return ERR_PTR(-ERESTARTNOINTR);
565 } 772 }
566 } 773 }
567 if (!oz_mode) { 774 }
568 spin_lock(&dentry->d_lock); 775
569 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 776 /*
570 spin_unlock(&dentry->d_lock); 777 * User space can (and has done in the past) remove and re-create
778 * this directory during the callback. This can leave us with an
779 * unhashed dentry, but a successful mount! So we need to
780 * perform another cached lookup in case the dentry now exists.
781 */
782 if (!oz_mode && !have_submounts(dentry)) {
783 struct dentry *new;
784 new = d_lookup(dentry->d_parent, &dentry->d_name);
785 if (new) {
786 if (active)
787 dput(active);
788 return new;
789 } else {
790 if (!status)
791 status = -ENOENT;
571 } 792 }
572 } 793 }
573 794
574 /* 795 /*
575 * If this dentry is unhashed, then we shouldn't honour this 796 * If we had a mount failure, return status to user space.
576 * lookup. Returning ENOENT here doesn't do the right thing 797 * If the mount succeeded and we used a dentry from the active queue
577 * for all system calls, but it should be OK for the operations 798 * return it.
578 * we permit from an autofs.
579 */ 799 */
580 if (!oz_mode && d_unhashed(dentry)) { 800 if (status) {
801 dentry = ERR_PTR(status);
802 if (active)
803 dput(active);
804 return dentry;
805 } else {
581 /* 806 /*
582 * A user space application can (and has done in the past) 807 * Valid successful mount, return active dentry or NULL
583 * remove and re-create this directory during the callback. 808 * for a new dentry.
584 * This can leave us with an unhashed dentry, but a
585 * successful mount! So we need to perform another
586 * cached lookup in case the dentry now exists.
587 */ 809 */
588 struct dentry *parent = dentry->d_parent; 810 if (active)
589 struct dentry *new = d_lookup(parent, &dentry->d_name); 811 return active;
590 if (new != NULL)
591 dentry = new;
592 else
593 dentry = ERR_PTR(-ENOENT);
594
595 if (unhashed)
596 dput(unhashed);
597
598 return dentry;
599 } 812 }
600 813
601 if (unhashed)
602 return unhashed;
603
604 return NULL; 814 return NULL;
605} 815}
606 816
@@ -624,11 +834,6 @@ static int autofs4_dir_symlink(struct inode *dir,
624 if (!ino) 834 if (!ino)
625 return -ENOMEM; 835 return -ENOMEM;
626 836
627 spin_lock(&sbi->lookup_lock);
628 if (!list_empty(&ino->active))
629 list_del_init(&ino->active);
630 spin_unlock(&sbi->lookup_lock);
631
632 ino->size = strlen(symname); 837 ino->size = strlen(symname);
633 cp = kmalloc(ino->size + 1, GFP_KERNEL); 838 cp = kmalloc(ino->size + 1, GFP_KERNEL);
634 if (!cp) { 839 if (!cp) {
@@ -705,10 +910,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
705 dir->i_mtime = CURRENT_TIME; 910 dir->i_mtime = CURRENT_TIME;
706 911
707 spin_lock(&dcache_lock); 912 spin_lock(&dcache_lock);
708 spin_lock(&sbi->lookup_lock);
709 if (list_empty(&ino->expiring))
710 list_add(&ino->expiring, &sbi->expiring_list);
711 spin_unlock(&sbi->lookup_lock);
712 spin_lock(&dentry->d_lock); 913 spin_lock(&dentry->d_lock);
713 __d_drop(dentry); 914 __d_drop(dentry);
714 spin_unlock(&dentry->d_lock); 915 spin_unlock(&dentry->d_lock);
@@ -734,10 +935,6 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
734 spin_unlock(&dcache_lock); 935 spin_unlock(&dcache_lock);
735 return -ENOTEMPTY; 936 return -ENOTEMPTY;
736 } 937 }
737 spin_lock(&sbi->lookup_lock);
738 if (list_empty(&ino->expiring))
739 list_add(&ino->expiring, &sbi->expiring_list);
740 spin_unlock(&sbi->lookup_lock);
741 spin_lock(&dentry->d_lock); 938 spin_lock(&dentry->d_lock);
742 __d_drop(dentry); 939 __d_drop(dentry);
743 spin_unlock(&dentry->d_lock); 940 spin_unlock(&dentry->d_lock);
@@ -775,11 +972,6 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
775 if (!ino) 972 if (!ino)
776 return -ENOMEM; 973 return -ENOMEM;
777 974
778 spin_lock(&sbi->lookup_lock);
779 if (!list_empty(&ino->active))
780 list_del_init(&ino->active);
781 spin_unlock(&sbi->lookup_lock);
782
783 inode = autofs4_get_inode(dir->i_sb, ino); 975 inode = autofs4_get_inode(dir->i_sb, ino);
784 if (!inode) { 976 if (!inode) {
785 if (!dentry->d_fsdata) 977 if (!dentry->d_fsdata)
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c778..346b69405363 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
32 32
33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
34static int load_aout_library(struct file*); 34static int load_aout_library(struct file*);
35static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 35static int aout_core_dump(struct coredump_params *cprm);
36 36
37static struct linux_binfmt aout_format = { 37static struct linux_binfmt aout_format = {
38 .module = THIS_MODULE, 38 .module = THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
89 * dumping of the process results in another error.. 89 * dumping of the process results in another error..
90 */ 90 */
91 91
92static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 92static int aout_core_dump(struct coredump_params *cprm)
93{ 93{
94 struct file *file = cprm->file;
94 mm_segment_t fs; 95 mm_segment_t fs;
95 int has_dumped = 0; 96 int has_dumped = 0;
96 unsigned long dump_start, dump_size; 97 unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
108 current->flags |= PF_DUMPCORE; 109 current->flags |= PF_DUMPCORE;
109 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 110 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
110 dump.u_ar0 = offsetof(struct user, regs); 111 dump.u_ar0 = offsetof(struct user, regs);
111 dump.signal = signr; 112 dump.signal = cprm->signr;
112 aout_dump_thread(regs, &dump); 113 aout_dump_thread(cprm->regs, &dump);
113 114
114/* If the size of the dump file exceeds the rlimit, then see what would happen 115/* If the size of the dump file exceeds the rlimit, then see what would happen
115 if we wrote the stack, but not the data area. */ 116 if we wrote the stack, but not the data area. */
116 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) 117 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
117 dump.u_dsize = 0; 118 dump.u_dsize = 0;
118 119
119/* Make sure we have enough room to write the stack and data areas. */ 120/* Make sure we have enough room to write the stack and data areas. */
120 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 121 if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
121 dump.u_ssize = 0; 122 dump.u_ssize = 0;
122 123
123/* make sure we actually have a data and stack area to dump */ 124/* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d15ea1790bfb..edd90c49003c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -44,8 +44,8 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
44 * If we don't support core dumping, then supply a NULL so we 44 * If we don't support core dumping, then supply a NULL so we
45 * don't even try. 45 * don't even try.
46 */ 46 */
47#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 47#ifdef CONFIG_ELF_CORE
48static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 48static int elf_core_dump(struct coredump_params *cprm);
49#else 49#else
50#define elf_core_dump NULL 50#define elf_core_dump NULL
51#endif 51#endif
@@ -1101,12 +1101,7 @@ out:
1101 return error; 1101 return error;
1102} 1102}
1103 1103
1104/* 1104#ifdef CONFIG_ELF_CORE
1105 * Note that some platforms still use traditional core dumps and not
1106 * the ELF core dump. Each platform can select it as appropriate.
1107 */
1108#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110/* 1105/*
1111 * ELF core dumper 1106 * ELF core dumper
1112 * 1107 *
@@ -1277,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
1277} 1272}
1278#undef DUMP_WRITE 1273#undef DUMP_WRITE
1279 1274
1280#define DUMP_WRITE(addr, nr) \ 1275#define DUMP_WRITE(addr, nr) \
1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1276 if ((size += (nr)) > cprm->limit || \
1277 !dump_write(cprm->file, (addr), (nr))) \
1282 goto end_coredump; 1278 goto end_coredump;
1283 1279
1284static void fill_elf_header(struct elfhdr *elf, int segs, 1280static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1906,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1906 * and then they are actually written out. If we run out of core limit 1902 * and then they are actually written out. If we run out of core limit
1907 * we just truncate. 1903 * we just truncate.
1908 */ 1904 */
1909static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 1905static int elf_core_dump(struct coredump_params *cprm)
1910{ 1906{
1911 int has_dumped = 0; 1907 int has_dumped = 0;
1912 mm_segment_t fs; 1908 mm_segment_t fs;
@@ -1952,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
1952 * notes. This also sets up the file header. 1948 * notes. This also sets up the file header.
1953 */ 1949 */
1954 if (!fill_note_info(elf, segs + 1, /* including notes section */ 1950 if (!fill_note_info(elf, segs + 1, /* including notes section */
1955 &info, signr, regs)) 1951 &info, cprm->signr, cprm->regs))
1956 goto cleanup; 1952 goto cleanup;
1957 1953
1958 has_dumped = 1; 1954 has_dumped = 1;
@@ -2014,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2014#endif 2010#endif
2015 2011
2016 /* write out the notes section */ 2012 /* write out the notes section */
2017 if (!write_note_info(&info, file, &foffset)) 2013 if (!write_note_info(&info, cprm->file, &foffset))
2018 goto end_coredump; 2014 goto end_coredump;
2019 2015
2020 if (elf_coredump_extra_notes_write(file, &foffset)) 2016 if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2021 goto end_coredump; 2017 goto end_coredump;
2022 2018
2023 /* Align to page */ 2019 /* Align to page */
2024 if (!dump_seek(file, dataoff - foffset)) 2020 if (!dump_seek(cprm->file, dataoff - foffset))
2025 goto end_coredump; 2021 goto end_coredump;
2026 2022
2027 for (vma = first_vma(current, gate_vma); vma != NULL; 2023 for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2038 page = get_dump_page(addr); 2034 page = get_dump_page(addr);
2039 if (page) { 2035 if (page) {
2040 void *kaddr = kmap(page); 2036 void *kaddr = kmap(page);
2041 stop = ((size += PAGE_SIZE) > limit) || 2037 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2042 !dump_write(file, kaddr, PAGE_SIZE); 2038 !dump_write(cprm->file, kaddr,
2039 PAGE_SIZE);
2043 kunmap(page); 2040 kunmap(page);
2044 page_cache_release(page); 2041 page_cache_release(page);
2045 } else 2042 } else
2046 stop = !dump_seek(file, PAGE_SIZE); 2043 stop = !dump_seek(cprm->file, PAGE_SIZE);
2047 if (stop) 2044 if (stop)
2048 goto end_coredump; 2045 goto end_coredump;
2049 } 2046 }
@@ -2063,7 +2060,7 @@ out:
2063 return has_dumped; 2060 return has_dumped;
2064} 2061}
2065 2062
2066#endif /* USE_ELF_CORE_DUMP */ 2063#endif /* CONFIG_ELF_CORE */
2067 2064
2068static int __init init_elf_binfmt(void) 2065static int __init init_elf_binfmt(void)
2069{ 2066{
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 79d2b1aa389f..c25256a5c5b0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -75,14 +75,14 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
75static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *, 75static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
76 struct file *, struct mm_struct *); 76 struct file *, struct mm_struct *);
77 77
78#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 78#ifdef CONFIG_ELF_CORE
79static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit); 79static int elf_fdpic_core_dump(struct coredump_params *cprm);
80#endif 80#endif
81 81
82static struct linux_binfmt elf_fdpic_format = { 82static struct linux_binfmt elf_fdpic_format = {
83 .module = THIS_MODULE, 83 .module = THIS_MODULE,
84 .load_binary = load_elf_fdpic_binary, 84 .load_binary = load_elf_fdpic_binary,
85#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 85#ifdef CONFIG_ELF_CORE
86 .core_dump = elf_fdpic_core_dump, 86 .core_dump = elf_fdpic_core_dump,
87#endif 87#endif
88 .min_coredump = ELF_EXEC_PAGESIZE, 88 .min_coredump = ELF_EXEC_PAGESIZE,
@@ -1201,7 +1201,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1201 * 1201 *
1202 * Modelled on fs/binfmt_elf.c core dumper 1202 * Modelled on fs/binfmt_elf.c core dumper
1203 */ 1203 */
1204#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 1204#ifdef CONFIG_ELF_CORE
1205 1205
1206/* 1206/*
1207 * These are the only things you should do on a core-file: use only these 1207 * These are the only things you should do on a core-file: use only these
@@ -1326,8 +1326,9 @@ static int writenote(struct memelfnote *men, struct file *file)
1326#undef DUMP_WRITE 1326#undef DUMP_WRITE
1327#undef DUMP_SEEK 1327#undef DUMP_SEEK
1328 1328
1329#define DUMP_WRITE(addr, nr) \ 1329#define DUMP_WRITE(addr, nr) \
1330 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1330 if ((size += (nr)) > cprm->limit || \
1331 !dump_write(cprm->file, (addr), (nr))) \
1331 goto end_coredump; 1332 goto end_coredump;
1332 1333
1333static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1334static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1583,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1582 * and then they are actually written out. If we run out of core limit 1583 * and then they are actually written out. If we run out of core limit
1583 * we just truncate. 1584 * we just truncate.
1584 */ 1585 */
1585static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, 1586static int elf_fdpic_core_dump(struct coredump_params *cprm)
1586 struct file *file, unsigned long limit)
1587{ 1587{
1588#define NUM_NOTES 6 1588#define NUM_NOTES 6
1589 int has_dumped = 0; 1589 int has_dumped = 0;
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1642 goto cleanup; 1642 goto cleanup;
1643#endif 1643#endif
1644 1644
1645 if (signr) { 1645 if (cprm->signr) {
1646 struct core_thread *ct; 1646 struct core_thread *ct;
1647 struct elf_thread_status *tmp; 1647 struct elf_thread_status *tmp;
1648 1648
@@ -1661,14 +1661,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1661 int sz; 1661 int sz;
1662 1662
1663 tmp = list_entry(t, struct elf_thread_status, list); 1663 tmp = list_entry(t, struct elf_thread_status, list);
1664 sz = elf_dump_thread_status(signr, tmp); 1664 sz = elf_dump_thread_status(cprm->signr, tmp);
1665 thread_status_size += sz; 1665 thread_status_size += sz;
1666 } 1666 }
1667 } 1667 }
1668 1668
1669 /* now collect the dump for the current */ 1669 /* now collect the dump for the current */
1670 fill_prstatus(prstatus, current, signr); 1670 fill_prstatus(prstatus, current, cprm->signr);
1671 elf_core_copy_regs(&prstatus->pr_reg, regs); 1671 elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
1672 1672
1673 segs = current->mm->map_count; 1673 segs = current->mm->map_count;
1674#ifdef ELF_CORE_EXTRA_PHDRS 1674#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1703,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1703 1703
1704 /* Try to dump the FPU. */ 1704 /* Try to dump the FPU. */
1705 if ((prstatus->pr_fpvalid = 1705 if ((prstatus->pr_fpvalid =
1706 elf_core_copy_task_fpregs(current, regs, fpu))) 1706 elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
1707 fill_note(notes + numnote++, 1707 fill_note(notes + numnote++,
1708 "CORE", NT_PRFPREG, sizeof(*fpu), fpu); 1708 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1709#ifdef ELF_CORE_COPY_XFPREGS 1709#ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1774,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1774 1774
1775 /* write out the notes section */ 1775 /* write out the notes section */
1776 for (i = 0; i < numnote; i++) 1776 for (i = 0; i < numnote; i++)
1777 if (!writenote(notes + i, file)) 1777 if (!writenote(notes + i, cprm->file))
1778 goto end_coredump; 1778 goto end_coredump;
1779 1779
1780 /* write out the thread status notes section */ 1780 /* write out the thread status notes section */
@@ -1783,14 +1783,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1783 list_entry(t, struct elf_thread_status, list); 1783 list_entry(t, struct elf_thread_status, list);
1784 1784
1785 for (i = 0; i < tmp->num_notes; i++) 1785 for (i = 0; i < tmp->num_notes; i++)
1786 if (!writenote(&tmp->notes[i], file)) 1786 if (!writenote(&tmp->notes[i], cprm->file))
1787 goto end_coredump; 1787 goto end_coredump;
1788 } 1788 }
1789 1789
1790 if (!dump_seek(file, dataoff)) 1790 if (!dump_seek(cprm->file, dataoff))
1791 goto end_coredump; 1791 goto end_coredump;
1792 1792
1793 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) 1793 if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
1794 mm_flags) < 0)
1794 goto end_coredump; 1795 goto end_coredump;
1795 1796
1796#ifdef ELF_CORE_WRITE_EXTRA_DATA 1797#ifdef ELF_CORE_WRITE_EXTRA_DATA
@@ -1826,4 +1827,4 @@ cleanup:
1826#undef NUM_NOTES 1827#undef NUM_NOTES
1827} 1828}
1828 1829
1829#endif /* USE_ELF_CORE_DUMP */ 1830#endif /* CONFIG_ELF_CORE */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e756..d4a00ea1054c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
87#endif 87#endif
88 88
89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); 89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
90static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 90static int flat_core_dump(struct coredump_params *cprm);
91 91
92static struct linux_binfmt flat_format = { 92static struct linux_binfmt flat_format = {
93 .module = THIS_MODULE, 93 .module = THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
102 * Currently only a stub-function. 102 * Currently only a stub-function.
103 */ 103 */
104 104
105static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 105static int flat_core_dump(struct coredump_params *cprm)
106{ 106{
107 printk("Process %s:%d received signr %d and should have core dumped\n", 107 printk("Process %s:%d received signr %d and should have core dumped\n",
108 current->comm, current->pid, (int) signr); 108 current->comm, current->pid, (int) cprm->signr);
109 return(1); 109 return(1);
110} 110}
111 111
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e77..2a9b5330cc5e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
43 * don't even try. 43 * don't even try.
44 */ 44 */
45#if 0 45#if 0
46static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit); 46static int som_core_dump(struct coredump_params *cprm);
47#else 47#else
48#define som_core_dump NULL 48#define som_core_dump NULL
49#endif 49#endif
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bfb..7bb3c020e570 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
4 select LIBCRC32C 4 select LIBCRC32C
5 select ZLIB_INFLATE 5 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 6 select ZLIB_DEFLATE
7 select FS_JOURNAL_INFO
8 help 7 help
9 Btrfs is a new filesystem with extents, writable snapshotting, 8 Btrfs is a new filesystem with extents, writable snapshotting,
10 support for multiple devices and many more features. 9 support for multiple devices and many more features.
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..2e9e69987a82 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 73 return acl;
74} 74}
75 75
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 76static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 77 void *value, size_t size, int type)
78{ 78{
79 struct posix_acl *acl; 79 struct posix_acl *acl;
80 int ret = 0; 80 int ret = 0;
81 81
82 acl = btrfs_get_acl(inode, type); 82 acl = btrfs_get_acl(dentry->d_inode, type);
83 83
84 if (IS_ERR(acl)) 84 if (IS_ERR(acl))
85 return PTR_ERR(acl); 85 return PTR_ERR(acl);
@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
94/* 94/*
95 * Needs to be called with fs_mutex held 95 * Needs to be called with fs_mutex held
96 */ 96 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 97static int btrfs_set_acl(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct posix_acl *acl, int type)
98{ 99{
99 int ret, size = 0; 100 int ret, size = 0;
100 const char *name; 101 const char *name;
@@ -140,8 +141,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 141 goto out;
141 } 142 }
142 143
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 144 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 145out:
146 kfree(value); 146 kfree(value);
147 147
@@ -151,10 +151,10 @@ out:
151 return ret; 151 return ret;
152} 152}
153 153
154static int btrfs_xattr_set_acl(struct inode *inode, int type, 154static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size) 155 const void *value, size_t size, int flags, int type)
156{ 156{
157 int ret = 0; 157 int ret;
158 struct posix_acl *acl = NULL; 158 struct posix_acl *acl = NULL;
159 159
160 if (value) { 160 if (value) {
@@ -167,38 +167,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
167 } 167 }
168 } 168 }
169 169
170 ret = btrfs_set_acl(inode, acl, type); 170 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 171
172 posix_acl_release(acl); 172 posix_acl_release(acl);
173 173
174 return ret; 174 return ret;
175} 175}
176 176
177
178static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
182}
183
184static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
185 const void *value, size_t size, int flags)
186{
187 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
188}
189
190static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
191 void *value, size_t size)
192{
193 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
194}
195
196static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
197 const void *value, size_t size, int flags)
198{
199 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
200}
201
202int btrfs_check_acl(struct inode *inode, int mask) 177int btrfs_check_acl(struct inode *inode, int mask)
203{ 178{
204 struct posix_acl *acl; 179 struct posix_acl *acl;
@@ -221,7 +196,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
221 * stuff has been fixed to work with that. If the locking stuff changes, we 196 * stuff has been fixed to work with that. If the locking stuff changes, we
222 * need to re-evaluate the acl locking stuff. 197 * need to re-evaluate the acl locking stuff.
223 */ 198 */
224int btrfs_init_acl(struct inode *inode, struct inode *dir) 199int btrfs_init_acl(struct btrfs_trans_handle *trans,
200 struct inode *inode, struct inode *dir)
225{ 201{
226 struct posix_acl *acl = NULL; 202 struct posix_acl *acl = NULL;
227 int ret = 0; 203 int ret = 0;
@@ -246,7 +222,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
246 mode_t mode; 222 mode_t mode;
247 223
248 if (S_ISDIR(inode->i_mode)) { 224 if (S_ISDIR(inode->i_mode)) {
249 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 225 ret = btrfs_set_acl(trans, inode, acl,
226 ACL_TYPE_DEFAULT);
250 if (ret) 227 if (ret)
251 goto failed; 228 goto failed;
252 } 229 }
@@ -261,7 +238,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
261 inode->i_mode = mode; 238 inode->i_mode = mode;
262 if (ret > 0) { 239 if (ret > 0) {
263 /* we need an acl */ 240 /* we need an acl */
264 ret = btrfs_set_acl(inode, clone, 241 ret = btrfs_set_acl(trans, inode, clone,
265 ACL_TYPE_ACCESS); 242 ACL_TYPE_ACCESS);
266 } 243 }
267 } 244 }
@@ -294,7 +271,7 @@ int btrfs_acl_chmod(struct inode *inode)
294 271
295 ret = posix_acl_chmod_masq(clone, inode->i_mode); 272 ret = posix_acl_chmod_masq(clone, inode->i_mode);
296 if (!ret) 273 if (!ret)
297 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 274 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
298 275
299 posix_acl_release(clone); 276 posix_acl_release(clone);
300 277
@@ -303,14 +280,16 @@ int btrfs_acl_chmod(struct inode *inode)
303 280
304struct xattr_handler btrfs_xattr_acl_default_handler = { 281struct xattr_handler btrfs_xattr_acl_default_handler = {
305 .prefix = POSIX_ACL_XATTR_DEFAULT, 282 .prefix = POSIX_ACL_XATTR_DEFAULT,
306 .get = btrfs_xattr_acl_default_get, 283 .flags = ACL_TYPE_DEFAULT,
307 .set = btrfs_xattr_acl_default_set, 284 .get = btrfs_xattr_acl_get,
285 .set = btrfs_xattr_acl_set,
308}; 286};
309 287
310struct xattr_handler btrfs_xattr_acl_access_handler = { 288struct xattr_handler btrfs_xattr_acl_access_handler = {
311 .prefix = POSIX_ACL_XATTR_ACCESS, 289 .prefix = POSIX_ACL_XATTR_ACCESS,
312 .get = btrfs_xattr_acl_access_get, 290 .flags = ACL_TYPE_ACCESS,
313 .set = btrfs_xattr_acl_access_set, 291 .get = btrfs_xattr_acl_get,
292 .set = btrfs_xattr_acl_set,
314}; 293};
315 294
316#else /* CONFIG_BTRFS_FS_POSIX_ACL */ 295#else /* CONFIG_BTRFS_FS_POSIX_ACL */
@@ -320,7 +299,8 @@ int btrfs_acl_chmod(struct inode *inode)
320 return 0; 299 return 0;
321} 300}
322 301
323int btrfs_init_acl(struct inode *inode, struct inode *dir) 302int btrfs_init_acl(struct btrfs_trans_handle *trans,
303 struct inode *inode, struct inode *dir)
324{ 304{
325 return 0; 305 return 0;
326} 306}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f010..3f1f50d9d916 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
166 163
167static inline void btrfs_i_size_write(struct inode *inode, u64 size) 164static inline void btrfs_i_size_write(struct inode *inode, u64 size)
168{ 165{
169 inode->i_size = size; 166 i_size_write(inode, size);
170 BTRFS_I(inode)->disk_i_size = size; 167 BTRFS_I(inode)->disk_i_size = size;
171} 168}
172 169
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..c4bc570a396e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40static int setup_items_for_insert(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, struct btrfs_path *path,
42 struct btrfs_key *cpu_key, u32 *data_size,
43 u32 total_data, u32 total_size, int nr);
44
40 45
41struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
42{ 47{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 456 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 457 spin_unlock(&root->node_lock);
453 458
454 btrfs_free_extent(trans, root, buf->start, buf->len, 459 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 460 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 461 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 462 add_root_to_dirty_list(root);
459 } else { 463 } else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 472 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 473 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 474 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 475 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 476 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 477 }
475 if (unlock_orig) 478 if (unlock_orig)
476 btrfs_tree_unlock(buf); 479 btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1033 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1034 /* once for the path */
1032 free_extent_buffer(mid); 1035 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1037 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1038 /* once for the root ptr */
1036 free_extent_buffer(mid); 1039 free_extent_buffer(mid);
1037 return ret; 1040 return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1098 1);
1096 if (wret) 1099 if (wret)
1097 ret = wret; 1100 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1101 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1102 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1103 root->root_key.objectid,
1101 level, 0); 1104 level);
1102 if (wret) 1105 if (wret)
1103 ret = wret; 1106 ret = wret;
1104 } else { 1107 } else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1146 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1147 if (wret)
1145 ret = wret; 1148 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1150 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1151 if (wret)
1150 ret = wret; 1152 ret = wret;
1151 } else { 1153 } else {
@@ -2997,75 +2999,85 @@ again:
2997 return ret; 2999 return ret;
2998} 3000}
2999 3001
3000/* 3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3003 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3004 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3005{
3021 u32 item_size; 3006 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3007 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3008 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3009 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3010 u32 item_size;
3026 int ret = 0; 3011 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3012
3033 leaf = path->nodes[0]; 3013 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3014 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3015
3036 goto split; 3016 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3017 key.type != BTRFS_EXTENT_CSUM_KEY);
3018
3019 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3020 return 0;
3037 3021
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3022 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3023 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3024 fi = btrfs_item_ptr(leaf, path->slots[0],
3025 struct btrfs_file_extent_item);
3026 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3027 }
3039 btrfs_release_path(root, path); 3028 btrfs_release_path(root, path);
3040 3029
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3030 path->keep_locks = 1;
3043 3031 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3033 path->search_for_split = 0;
3034 if (ret < 0)
3035 goto err;
3046 3036
3037 ret = -EAGAIN;
3038 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3039 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3041 goto err;
3050 path->keep_locks = 0; 3042
3051 return -EAGAIN; 3043 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item);
3046 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3047 goto err;
3052 } 3048 }
3053 3049
3054 btrfs_set_path_blocking(path); 3050 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3051 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3052 BUG_ON(ret);
3059 3053
3054 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3055 btrfs_unlock_up_safe(path, 1);
3056 return 0;
3057err:
3058 path->keep_locks = 0;
3059 return ret;
3060}
3061
3062static noinline int split_item(struct btrfs_trans_handle *trans,
3063 struct btrfs_root *root,
3064 struct btrfs_path *path,
3065 struct btrfs_key *new_key,
3066 unsigned long split_offset)
3067{
3068 struct extent_buffer *leaf;
3069 struct btrfs_item *item;
3070 struct btrfs_item *new_item;
3071 int slot;
3072 char *buf;
3073 u32 nritems;
3074 u32 item_size;
3075 u32 orig_offset;
3076 struct btrfs_disk_key disk_key;
3077
3061 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3079 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3080
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3081 btrfs_set_path_blocking(path);
3070 3082
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3083 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3085 item_size = btrfs_item_size(leaf, item);
3074 3086
3075 buf = kmalloc(item_size, GFP_NOFS); 3087 buf = kmalloc(item_size, GFP_NOFS);
3088 if (!buf)
3089 return -ENOMEM;
3090
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3091 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3092 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3093
3094 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3095 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3096 if (slot != nritems) {
3084 /* shift the items */ 3097 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3098 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3099 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3100 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3101 }
3090 3102
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3103 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
3113 item_size - split_offset); 3125 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3126 btrfs_mark_buffer_dirty(leaf);
3115 3127
3116 ret = 0; 3128 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3129 kfree(buf);
3130 return 0;
3131}
3132
3133/*
3134 * This function splits a single item into two items,
3135 * giving 'new_key' to the new item and splitting the
3136 * old one at split_offset (from the start of the item).
3137 *
3138 * The path may be released by this operation. After
3139 * the split, the path is pointing to the old item. The
3140 * new item is going to be in the same node as the old one.
3141 *
3142 * Note, the item being split must be smaller enough to live alone on
3143 * a tree block with room for one extra struct btrfs_item
3144 *
3145 * This allows us to split the item in place, keeping a lock on the
3146 * leaf the entire time.
3147 */
3148int btrfs_split_item(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct btrfs_key *new_key,
3152 unsigned long split_offset)
3153{
3154 int ret;
3155 ret = setup_leaf_for_split(trans, root, path,
3156 sizeof(struct btrfs_item));
3157 if (ret)
3158 return ret;
3159
3160 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3161 return ret;
3123} 3162}
3124 3163
3125/* 3164/*
3165 * This function duplicate a item, giving 'new_key' to the new item.
3166 * It guarantees both items live in the same tree leaf and the new item
3167 * is contiguous with the original item.
3168 *
3169 * This allows us to split file extent in place, keeping a lock on the
3170 * leaf the entire time.
3171 */
3172int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct btrfs_key *new_key)
3176{
3177 struct extent_buffer *leaf;
3178 int ret;
3179 u32 item_size;
3180
3181 leaf = path->nodes[0];
3182 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3183 ret = setup_leaf_for_split(trans, root, path,
3184 item_size + sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 path->slots[0]++;
3189 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3190 item_size, item_size +
3191 sizeof(struct btrfs_item), 1);
3192 BUG_ON(ret);
3193
3194 leaf = path->nodes[0];
3195 memcpy_extent_buffer(leaf,
3196 btrfs_item_ptr_offset(leaf, path->slots[0]),
3197 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3198 item_size);
3199 return 0;
3200}
3201
3202/*
3126 * make the item pointed to by the path smaller. new_size indicates 3203 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3204 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3205 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3791 */
3715 btrfs_unlock_up_safe(path, 0); 3792 btrfs_unlock_up_safe(path, 0);
3716 3793
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3795 0, root->root_key.objectid, 0);
3719 return ret; 3796 return ret;
3720} 3797}
3721/* 3798/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a4..9f806dd04c27 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 311 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 312 sizeof(struct btrfs_file_extent_item))
313#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
314 sizeof(struct btrfs_item) -\
315 sizeof(struct btrfs_dir_item))
313 316
314 317
315/* 318/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 862 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 863 struct rw_semaphore extent_commit_sem;
861 864
862 struct rw_semaphore subvol_sem; 865 struct rw_semaphore cleanup_work_sem;
863 866
867 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 868 struct srcu_struct subvol_srcu;
865 869
866 struct list_head trans_list; 870 struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 872 struct list_head dead_roots;
869 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
870 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
871 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
1034 int ref_cows; 1041 int ref_cows;
1035 int track_dirty; 1042 int track_dirty;
1036 int in_radix; 1043 int in_radix;
1044 int clean_orphans;
1037 1045
1038 u64 defrag_trans_start; 1046 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1047 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1048 struct btrfs_key defrag_max;
1041 int defrag_running; 1049 int defrag_running;
1042 int defrag_level;
1043 char *name; 1050 char *name;
1044 int in_sysfs; 1051 int in_sysfs;
1045 1052
@@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1975 u64 parent, u64 root_objectid, 1982 u64 parent, u64 root_objectid,
1976 struct btrfs_disk_key *key, int level, 1983 struct btrfs_disk_key *key, int level,
1977 u64 hint, u64 empty_size); 1984 u64 hint, u64 empty_size);
1985int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1986 struct btrfs_root *root,
1987 u64 bytenr, u32 blocksize,
1988 u64 parent, u64 root_objectid, int level);
1978struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1989struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1979 struct btrfs_root *root, 1990 struct btrfs_root *root,
1980 u64 bytenr, u32 blocksize, 1991 u64 bytenr, u32 blocksize,
@@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2089 struct btrfs_path *path, 2100 struct btrfs_path *path,
2090 struct btrfs_key *new_key, 2101 struct btrfs_key *new_key,
2091 unsigned long split_offset); 2102 unsigned long split_offset);
2103int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root,
2105 struct btrfs_path *path,
2106 struct btrfs_key *new_key);
2092int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2107int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2093 *root, struct btrfs_key *key, struct btrfs_path *p, int 2108 *root, struct btrfs_key *key, struct btrfs_path *p, int
2094 ins_len, int cow); 2109 ins_len, int cow);
@@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2196 struct btrfs_path *path, 2211 struct btrfs_path *path,
2197 struct btrfs_dir_item *di); 2212 struct btrfs_dir_item *di);
2198int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2213int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2199 struct btrfs_root *root, const char *name, 2214 struct btrfs_root *root,
2200 u16 name_len, const void *data, u16 data_len, 2215 struct btrfs_path *path, u64 objectid,
2201 u64 dir); 2216 const char *name, u16 name_len,
2217 const void *data, u16 data_len);
2202struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2218struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root, 2219 struct btrfs_root *root,
2204 struct btrfs_path *path, u64 dir, 2220 struct btrfs_path *path, u64 dir,
@@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2292 struct inode *inode, u64 new_size, 2308 struct inode *inode, u64 new_size,
2293 u32 min_type); 2309 u32 min_type);
2294 2310
2295int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2311int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2296int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2312int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2297int btrfs_writepages(struct address_space *mapping, 2313int btrfs_writepages(struct address_space *mapping,
2298 struct writeback_control *wbc); 2314 struct writeback_control *wbc);
@@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2332void btrfs_orphan_cleanup(struct btrfs_root *root); 2348void btrfs_orphan_cleanup(struct btrfs_root *root);
2333int btrfs_cont_expand(struct inode *inode, loff_t size); 2349int btrfs_cont_expand(struct inode *inode, loff_t size);
2334int btrfs_invalidate_inodes(struct btrfs_root *root); 2350int btrfs_invalidate_inodes(struct btrfs_root *root);
2351void btrfs_add_delayed_iput(struct inode *inode);
2352void btrfs_run_delayed_iputs(struct btrfs_root *root);
2335extern const struct dentry_operations btrfs_dentry_operations; 2353extern const struct dentry_operations btrfs_dentry_operations;
2336 2354
2337/* ioctl.c */ 2355/* ioctl.c */
@@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2345 int skip_pinned); 2363 int skip_pinned);
2346int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2364int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2347extern const struct file_operations btrfs_file_operations; 2365extern const struct file_operations btrfs_file_operations;
2348int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2366int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2349 struct btrfs_root *root, struct inode *inode, 2367 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2350 u64 start, u64 end, u64 locked_end,
2351 u64 inline_limit, u64 *hint_block, int drop_cache);
2352int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2368int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2353 struct btrfs_root *root,
2354 struct inode *inode, u64 start, u64 end); 2369 struct inode *inode, u64 start, u64 end);
2355int btrfs_release_file(struct inode *inode, struct file *file); 2370int btrfs_release_file(struct inode *inode, struct file *file);
2356 2371
@@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
2380#else 2395#else
2381#define btrfs_check_acl NULL 2396#define btrfs_check_acl NULL
2382#endif 2397#endif
2383int btrfs_init_acl(struct inode *inode, struct inode *dir); 2398int btrfs_init_acl(struct btrfs_trans_handle *trans,
2399 struct inode *inode, struct inode *dir);
2384int btrfs_acl_chmod(struct inode *inode); 2400int btrfs_acl_chmod(struct inode *inode);
2385 2401
2386/* relocation.c */ 2402/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd7450..009e3bd18f23 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 932 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
980 981
981 while (1) { 982 while (1) {
982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 983 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983 0, &start, &end, EXTENT_DIRTY); 984 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 if (ret) 985 if (ret)
985 break; 986 break;
986 987
987 clear_extent_dirty(&log_root_tree->dirty_log_pages, 988 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
988 start, end, GFP_NOFS); 989 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 } 990 }
990 eb = fs_info->log_root_tree->node; 991 eb = fs_info->log_root_tree->node;
991 992
@@ -1210,8 +1211,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1212 root); 1213 root);
1213 if (ret == 0) 1214 if (ret == 0) {
1214 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1217 if (ret) { 1220 if (ret) {
@@ -1225,10 +1228,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1229 root->root_key.objectid);
1227 WARN_ON(ret); 1230 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1231 return root;
1233fail: 1232fail:
1234 free_fs_root(root); 1233 free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1477 1476
1478 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1479 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1480 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1481 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1482 } 1482 }
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1607 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1608 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1609 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1610 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1611 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1614 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1615 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1616 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1617 1619
1618 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1619 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1691 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1692 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1693 init_rwsem(&fs_info->extent_commit_sem);
1694 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1695 init_rwsem(&fs_info->subvol_sem);
1693 1696
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1697 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2386,8 +2389,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2389 int ret;
2387 2390
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2391 mutex_lock(&root->fs_info->cleaner_mutex);
2392 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2393 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2394 mutex_unlock(&root->fs_info->cleaner_mutex);
2395
2396 /* wait until ongoing cleanup work done */
2397 down_write(&root->fs_info->cleanup_work_sem);
2398 up_write(&root->fs_info->cleanup_work_sem);
2399
2391 trans = btrfs_start_transaction(root, 1); 2400 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2401 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2402 BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc193..56e50137d0e6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -195,6 +195,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 195 int stripe_len;
196 int i, nr, ret; 196 int i, nr, ret;
197 197
198 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
199 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
200 cache->bytes_super += stripe_len;
201 ret = add_excluded_extent(root, cache->key.objectid,
202 stripe_len);
203 BUG_ON(ret);
204 }
205
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 206 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 207 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 208 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +263,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 263 if (ret)
256 break; 264 break;
257 265
258 if (extent_start == start) { 266 if (extent_start <= start) {
259 start = extent_end + 1; 267 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 268 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 269 size = extent_start - start;
@@ -2880,9 +2888,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2888 root = async->root;
2881 info = async->info; 2889 info = async->info;
2882 2890
2883 btrfs_start_delalloc_inodes(root); 2891 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2892 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2893 btrfs_wait_ordered_extents(root, 0, 0);
2886 2894
2887 spin_lock(&info->lock); 2895 spin_lock(&info->lock);
2888 info->flushing = 0; 2896 info->flushing = 0;
@@ -2956,8 +2964,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2964 return;
2957 2965
2958flush: 2966flush:
2959 btrfs_start_delalloc_inodes(root); 2967 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2968 btrfs_wait_ordered_extents(root, 0, 0);
2961 2969
2962 spin_lock(&info->lock); 2970 spin_lock(&info->lock);
2963 info->flushing = 0; 2971 info->flushing = 0;
@@ -3454,14 +3462,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3454 else 3462 else
3455 old_val -= num_bytes; 3463 old_val -= num_bytes;
3456 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3464 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3457
3458 /* block accounting for root item */
3459 old_val = btrfs_root_used(&root->root_item);
3460 if (alloc)
3461 old_val += num_bytes;
3462 else
3463 old_val -= num_bytes;
3464 btrfs_set_root_used(&root->root_item, old_val);
3465 spin_unlock(&info->delalloc_lock); 3465 spin_unlock(&info->delalloc_lock);
3466 3466
3467 while (total) { 3467 while (total) {
@@ -4049,6 +4049,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4049 return ret; 4049 return ret;
4050} 4050}
4051 4051
4052int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4053 struct btrfs_root *root,
4054 u64 bytenr, u32 blocksize,
4055 u64 parent, u64 root_objectid, int level)
4056{
4057 u64 used;
4058 spin_lock(&root->node_lock);
4059 used = btrfs_root_used(&root->root_item) - blocksize;
4060 btrfs_set_root_used(&root->root_item, used);
4061 spin_unlock(&root->node_lock);
4062
4063 return btrfs_free_extent(trans, root, bytenr, blocksize,
4064 parent, root_objectid, level, 0);
4065}
4066
4052static u64 stripe_align(struct btrfs_root *root, u64 val) 4067static u64 stripe_align(struct btrfs_root *root, u64 val)
4053{ 4068{
4054 u64 mask = ((u64)root->stripesize - 1); 4069 u64 mask = ((u64)root->stripesize - 1);
@@ -4578,7 +4593,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4578{ 4593{
4579 int ret; 4594 int ret;
4580 u64 search_start = 0; 4595 u64 search_start = 0;
4581 struct btrfs_fs_info *info = root->fs_info;
4582 4596
4583 data = btrfs_get_alloc_profile(root, data); 4597 data = btrfs_get_alloc_profile(root, data);
4584again: 4598again:
@@ -4586,17 +4600,9 @@ again:
4586 * the only place that sets empty_size is btrfs_realloc_node, which 4600 * the only place that sets empty_size is btrfs_realloc_node, which
4587 * is not called recursively on allocations 4601 * is not called recursively on allocations
4588 */ 4602 */
4589 if (empty_size || root->ref_cows) { 4603 if (empty_size || root->ref_cows)
4590 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4591 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4592 2 * 1024 * 1024,
4593 BTRFS_BLOCK_GROUP_METADATA |
4594 (info->metadata_alloc_profile &
4595 info->avail_metadata_alloc_bits), 0);
4596 }
4597 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4604 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4598 num_bytes + 2 * 1024 * 1024, data, 0); 4605 num_bytes + 2 * 1024 * 1024, data, 0);
4599 }
4600 4606
4601 WARN_ON(num_bytes < root->sectorsize); 4607 WARN_ON(num_bytes < root->sectorsize);
4602 ret = find_free_extent(trans, root, num_bytes, empty_size, 4608 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4903,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4897 extent_op); 4903 extent_op);
4898 BUG_ON(ret); 4904 BUG_ON(ret);
4899 } 4905 }
4906
4907 if (root_objectid == root->root_key.objectid) {
4908 u64 used;
4909 spin_lock(&root->node_lock);
4910 used = btrfs_root_used(&root->root_item) + num_bytes;
4911 btrfs_set_root_used(&root->root_item, used);
4912 spin_unlock(&root->node_lock);
4913 }
4900 return ret; 4914 return ret;
4901} 4915}
4902 4916
@@ -4919,8 +4933,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4919 btrfs_set_buffer_uptodate(buf); 4933 btrfs_set_buffer_uptodate(buf);
4920 4934
4921 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4935 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4922 set_extent_dirty(&root->dirty_log_pages, buf->start, 4936 /*
4923 buf->start + buf->len - 1, GFP_NOFS); 4937 * we allow two log transactions at a time, use different
4938 * EXENT bit to differentiate dirty pages.
4939 */
4940 if (root->log_transid % 2 == 0)
4941 set_extent_dirty(&root->dirty_log_pages, buf->start,
4942 buf->start + buf->len - 1, GFP_NOFS);
4943 else
4944 set_extent_new(&root->dirty_log_pages, buf->start,
4945 buf->start + buf->len - 1, GFP_NOFS);
4924 } else { 4946 } else {
4925 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4947 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4926 buf->start + buf->len - 1, GFP_NOFS); 4948 buf->start + buf->len - 1, GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f759302e12..feaa13b105d9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 179 }
180 flags = em->flags; 180 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 182 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 183 free_extent_map(em);
185 write_unlock(&em_tree->lock); 184 write_unlock(&em_tree->lock);
186 break; 185 break;
187 } 186 }
188 if (start < em->start) { 187 start = em->start + em->len;
189 len = em->start - start; 188 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 189 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 190 free_extent_map(em);
195 write_unlock(&em_tree->lock); 191 write_unlock(&em_tree->lock);
196 continue; 192 continue;
@@ -265,319 +261,247 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 261 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 262 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 263 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 264 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 265int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 266 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 267{
277 u64 extent_end = 0; 268 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 269 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 270 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 271 struct btrfs_path *path;
288 struct btrfs_key key; 272 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 273 struct btrfs_key new_key;
290 int keep; 274 u64 search_start = start;
291 int slot; 275 u64 disk_bytenr = 0;
292 int bookend; 276 u64 num_bytes = 0;
293 int found_type = 0; 277 u64 extent_offset = 0;
294 int found_extent; 278 u64 extent_end = 0;
295 int found_inline; 279 int del_nr = 0;
280 int del_slot = 0;
281 int extent_type;
296 int recow; 282 int recow;
297 int ret; 283 int ret;
298 284
299 inline_limit = 0;
300 if (drop_cache) 285 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 287
303 path = btrfs_alloc_path(); 288 path = btrfs_alloc_path();
304 if (!path) 289 if (!path)
305 return -ENOMEM; 290 return -ENOMEM;
291
306 while (1) { 292 while (1) {
307 recow = 0; 293 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 294 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 295 search_start, -1);
311 if (ret < 0) 296 if (ret < 0)
312 goto out; 297 break;
313 if (ret > 0) { 298 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 299 leaf = path->nodes[0];
315 ret = 0; 300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 301 if (key.objectid == inode->i_ino &&
317 } 302 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 303 path->slots[0]--;
319 } 304 }
305 ret = 0;
320next_slot: 306next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 307 leaf = path->nodes[0];
329 slot = path->slots[0]; 308 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 309 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 310 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 311 if (ret < 0)
333 key.offset >= end) { 312 break;
334 goto out; 313 if (ret > 0) {
335 } 314 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 315 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 316 }
317 leaf = path->nodes[0];
318 recow = 1;
319 }
320
321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
322 if (key.objectid > inode->i_ino ||
323 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
324 break;
325
326 fi = btrfs_item_ptr(leaf, path->slots[0],
327 struct btrfs_file_extent_item);
328 extent_type = btrfs_file_extent_type(leaf, fi);
329
330 if (extent_type == BTRFS_FILE_EXTENT_REG ||
331 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
332 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
333 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
334 extent_offset = btrfs_file_extent_offset(leaf, fi);
335 extent_end = key.offset +
336 btrfs_file_extent_num_bytes(leaf, fi);
337 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
338 extent_end = key.offset +
339 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 340 } else {
341 WARN_ON(1);
373 extent_end = search_start; 342 extent_end = search_start;
374 } 343 }
375 344
376 /* we found nothing we can drop */ 345 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 346 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 347 goto next_slot;
391 } 348 }
392 349
393 if (end <= extent_end && start >= key.offset && found_inline) 350 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 351 if (recow) {
395 352 btrfs_release_path(root, path);
396 if (found_extent) { 353 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 354 }
406 355
407 if (bookend && found_extent) { 356 /*
408 if (locked_end < extent_end) { 357 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 358 * | -------- extent -------- |
410 locked_end, extent_end - 1, 359 */
411 GFP_NOFS); 360 if (start > key.offset && end < extent_end) {
412 if (!ret) { 361 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 362 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 363
415 locked_end, extent_end - 1, 364 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 365 new_key.offset = start;
417 locked_end = extent_end; 366 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 367 &new_key);
419 } 368 if (ret == -EAGAIN) {
420 locked_end = extent_end; 369 btrfs_release_path(root, path);
370 continue;
421 } 371 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 372 if (ret < 0)
423 if (disk_bytenr != 0) { 373 break;
374
375 leaf = path->nodes[0];
376 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
377 struct btrfs_file_extent_item);
378 btrfs_set_file_extent_num_bytes(leaf, fi,
379 start - key.offset);
380
381 fi = btrfs_item_ptr(leaf, path->slots[0],
382 struct btrfs_file_extent_item);
383
384 extent_offset += start - key.offset;
385 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
386 btrfs_set_file_extent_num_bytes(leaf, fi,
387 extent_end - start);
388 btrfs_mark_buffer_dirty(leaf);
389
390 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 391 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 392 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 393 root->root_key.objectid,
427 root->root_key.objectid, 394 new_key.objectid,
428 key.objectid, key.offset - 395 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 396 BUG_ON(ret);
397 *hint_byte = disk_bytenr;
431 } 398 }
399 key.offset = start;
432 } 400 }
401 /*
402 * | ---- range to drop ----- |
403 * | -------- extent -------- |
404 */
405 if (start <= key.offset && end < extent_end) {
406 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433 407
434 if (found_inline) { 408 memcpy(&new_key, &key, sizeof(new_key));
435 u64 mask = root->sectorsize - 1; 409 new_key.offset = end;
436 search_start = (extent_end + mask) & ~mask; 410 btrfs_set_item_key_safe(trans, root, path, &new_key);
437 } else 411
438 search_start = extent_end; 412 extent_offset += end - key.offset;
439 413 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
440 /* truncate existing extent */ 414 btrfs_set_file_extent_num_bytes(leaf, fi,
441 if (start > key.offset) { 415 extent_end - end);
442 u64 new_num; 416 btrfs_mark_buffer_dirty(leaf);
443 u64 old_num; 417 if (disk_bytenr > 0) {
444 keep = 1; 418 inode_sub_bytes(inode, end - key.offset);
445 WARN_ON(start & (root->sectorsize - 1)); 419 *hint_byte = disk_bytenr;
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 420 }
421 break;
476 } 422 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 423
508 btrfs_release_path(root, path); 424 search_start = extent_end;
509 path->leave_spinning = 1; 425 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 426 * | ---- range to drop ----- |
511 sizeof(*extent)); 427 * | -------- extent -------- |
512 BUG_ON(ret); 428 */
429 if (start > key.offset && end >= extent_end) {
430 BUG_ON(del_nr > 0);
431 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 432
514 leaf = path->nodes[0]; 433 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 434 start - key.offset);
516 struct btrfs_file_extent_item); 435 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 436 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 437 inode_sub_bytes(inode, extent_end - start);
519 438 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 439 }
521 compression); 440 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 441 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 442
533 /* 443 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 444 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 445 }
552 446
553 if (found_extent && !keep) { 447 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 448 * | ---- range to drop ----- |
449 * | ------ extent ------ |
450 */
451 if (start <= key.offset && end >= extent_end) {
452 if (del_nr == 0) {
453 del_slot = path->slots[0];
454 del_nr = 1;
455 } else {
456 BUG_ON(del_slot + del_nr != path->slots[0]);
457 del_nr++;
458 }
555 459
556 if (old_disk_bytenr != 0) { 460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 461 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 462 extent_end - key.offset);
463 extent_end = ALIGN(extent_end,
464 root->sectorsize);
465 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 466 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 467 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 468 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 469 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 470 extent_offset);
565 BUG_ON(ret); 471 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 472 inode_sub_bytes(inode,
473 extent_end - key.offset);
474 *hint_byte = disk_bytenr;
567 } 475 }
568 }
569 476
570 if (search_start >= end) { 477 if (end == extent_end)
571 ret = 0; 478 break;
572 goto out; 479
480 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
481 path->slots[0]++;
482 goto next_slot;
483 }
484
485 ret = btrfs_del_items(trans, root, path, del_slot,
486 del_nr);
487 BUG_ON(ret);
488
489 del_nr = 0;
490 del_slot = 0;
491
492 btrfs_release_path(root, path);
493 continue;
573 } 494 }
495
496 BUG_ON(1);
574 } 497 }
575out: 498
576 btrfs_free_path(path); 499 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 500 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 501 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 502 }
503
504 btrfs_free_path(path);
581 return ret; 505 return ret;
582} 506}
583 507
@@ -620,23 +544,23 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 544 * two or three.
621 */ 545 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 546int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 547 struct inode *inode, u64 start, u64 end)
625{ 548{
549 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 550 struct extent_buffer *leaf;
627 struct btrfs_path *path; 551 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 552 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 553 struct btrfs_key key;
554 struct btrfs_key new_key;
630 u64 bytenr; 555 u64 bytenr;
631 u64 num_bytes; 556 u64 num_bytes;
632 u64 extent_end; 557 u64 extent_end;
633 u64 orig_offset; 558 u64 orig_offset;
634 u64 other_start; 559 u64 other_start;
635 u64 other_end; 560 u64 other_end;
636 u64 split = start; 561 u64 split;
637 u64 locked_end = end; 562 int del_nr = 0;
638 int extent_type; 563 int del_slot = 0;
639 int split_end = 1;
640 int ret; 564 int ret;
641 565
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 566 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +568,10 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 568 path = btrfs_alloc_path();
645 BUG_ON(!path); 569 BUG_ON(!path);
646again: 570again:
571 split = start;
647 key.objectid = inode->i_ino; 572 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 573 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 574 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 575
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 576 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 577 if (ret > 0 && path->slots[0] > 0)
@@ -661,8 +583,8 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 583 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 584 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 585 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 586 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 587 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 588 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 589 BUG_ON(key.offset > start || extent_end < end);
668 590
@@ -670,150 +592,91 @@ again:
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 592 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 593 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
672 594
673 if (key.offset == start) 595 while (start > key.offset || end < extent_end) {
674 split = end; 596 if (key.offset == start)
675 597 split = end;
676 if (key.offset == start && extent_end == end) { 598
677 int del_nr = 0; 599 memcpy(&new_key, &key, sizeof(new_key));
678 int del_slot = 0; 600 new_key.offset = split;
679 other_start = end; 601 ret = btrfs_duplicate_item(trans, root, path, &new_key);
680 other_end = 0; 602 if (ret == -EAGAIN) {
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 603 btrfs_release_path(root, path);
682 bytenr, &other_start, &other_end)) { 604 goto again;
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0;
692 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
694 bytenr, &other_start, &other_end)) {
695 key.offset = other_start;
696 del_slot = path->slots[0];
697 del_nr++;
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
699 0, root->root_key.objectid,
700 inode->i_ino, orig_offset);
701 BUG_ON(ret);
702 }
703 split_end = 0;
704 if (del_nr == 0) {
705 btrfs_set_file_extent_type(leaf, fi,
706 BTRFS_FILE_EXTENT_REG);
707 goto done;
708 } 605 }
606 BUG_ON(ret < 0);
709 607
710 fi = btrfs_item_ptr(leaf, del_slot - 1, 608 leaf = path->nodes[0];
609 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
711 struct btrfs_file_extent_item); 610 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi, 611 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset); 612 split - key.offset);
613
614 fi = btrfs_item_ptr(leaf, path->slots[0],
615 struct btrfs_file_extent_item);
616
617 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
618 btrfs_set_file_extent_num_bytes(leaf, fi,
619 extent_end - split);
715 btrfs_mark_buffer_dirty(leaf); 620 btrfs_mark_buffer_dirty(leaf);
716 621
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 622 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
623 root->root_key.objectid,
624 inode->i_ino, orig_offset);
718 BUG_ON(ret); 625 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 }
743 626
744 if (extent_end == end) { 627 if (split == start) {
745 split_end = 0; 628 key.offset = start;
746 extent_type = BTRFS_FILE_EXTENT_REG; 629 } else {
747 } 630 BUG_ON(start != key.offset);
748 if (extent_end == end && split == start) {
749 other_start = end;
750 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
752 bytenr, &other_start, &other_end)) {
753 path->slots[0]++;
754 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split);
762 goto done;
763 }
764 }
765 if (extent_end == end && split == end) {
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--; 631 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 632 extent_end = end;
772 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
774 other_start);
775 goto done;
776 } 633 }
777 } 634 }
778 635
779 btrfs_mark_buffer_dirty(leaf);
780
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
782 root->root_key.objectid,
783 inode->i_ino, orig_offset);
784 BUG_ON(ret);
785 btrfs_release_path(root, path);
786
787 key.offset = start;
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
789 BUG_ON(ret);
790
791 leaf = path->nodes[0];
792 fi = btrfs_item_ptr(leaf, path->slots[0], 636 fi = btrfs_item_ptr(leaf, path->slots[0],
793 struct btrfs_file_extent_item); 637 struct btrfs_file_extent_item);
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
795 btrfs_set_file_extent_type(leaf, fi, extent_type);
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
801 btrfs_set_file_extent_compression(leaf, fi, 0);
802 btrfs_set_file_extent_encryption(leaf, fi, 0);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
804done:
805 btrfs_mark_buffer_dirty(leaf);
806 638
807release: 639 other_start = end;
808 btrfs_release_path(root, path); 640 other_end = 0;
809 if (split_end && split == start) { 641 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
810 split = end; 642 bytenr, &other_start, &other_end)) {
811 goto again; 643 extent_end = other_end;
644 del_slot = path->slots[0] + 1;
645 del_nr++;
646 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
647 0, root->root_key.objectid,
648 inode->i_ino, orig_offset);
649 BUG_ON(ret);
812 } 650 }
813 if (locked_end > end) { 651 other_start = 0;
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 652 other_end = start;
815 GFP_NOFS); 653 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
654 bytenr, &other_start, &other_end)) {
655 key.offset = other_start;
656 del_slot = path->slots[0];
657 del_nr++;
658 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
659 0, root->root_key.objectid,
660 inode->i_ino, orig_offset);
661 BUG_ON(ret);
816 } 662 }
663 if (del_nr == 0) {
664 btrfs_set_file_extent_type(leaf, fi,
665 BTRFS_FILE_EXTENT_REG);
666 btrfs_mark_buffer_dirty(leaf);
667 goto out;
668 }
669
670 fi = btrfs_item_ptr(leaf, del_slot - 1,
671 struct btrfs_file_extent_item);
672 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
673 btrfs_set_file_extent_num_bytes(leaf, fi,
674 extent_end - key.offset);
675 btrfs_mark_buffer_dirty(leaf);
676
677 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
678 BUG_ON(ret);
679out:
817 btrfs_free_path(path); 680 btrfs_free_path(path);
818 return 0; 681 return 0;
819} 682}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..5440bab23635 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -543,7 +554,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 554 if (list_empty(&async_cow->extents))
544 return 0; 555 return 0;
545 556
546 trans = btrfs_join_transaction(root, 1);
547 557
548 while (!list_empty(&async_cow->extents)) { 558 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 559 async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +600,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 600 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 601 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 602 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 603
604 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 605 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 606 async_extent->compressed_size,
603 async_extent->compressed_size, 607 async_extent->compressed_size,
604 0, alloc_hint, 608 0, alloc_hint,
605 (u64)-1, &ins, 1); 609 (u64)-1, &ins, 1);
610 btrfs_end_transaction(trans, root);
611
606 if (ret) { 612 if (ret) {
607 int i; 613 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 614 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +624,14 @@ retry:
618 goto retry; 624 goto retry;
619 } 625 }
620 626
627 /*
628 * here we're doing allocation and writeback of the
629 * compressed pages
630 */
631 btrfs_drop_extent_cache(inode, async_extent->start,
632 async_extent->start +
633 async_extent->ram_size - 1, 0);
634
621 em = alloc_extent_map(GFP_NOFS); 635 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 636 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 637 em->len = async_extent->ram_size;
@@ -649,8 +663,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 663 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 664 BUG_ON(ret);
651 665
652 btrfs_end_transaction(trans, root);
653
654 /* 666 /*
655 * clear dirty, set writeback and unlock the pages. 667 * clear dirty, set writeback and unlock the pages.
656 */ 668 */
@@ -672,13 +684,11 @@ retry:
672 async_extent->nr_pages); 684 async_extent->nr_pages);
673 685
674 BUG_ON(ret); 686 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 687 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 688 kfree(async_extent);
678 cond_resched(); 689 cond_resched();
679 } 690 }
680 691
681 btrfs_end_transaction(trans, root);
682 return 0; 692 return 0;
683} 693}
684 694
@@ -742,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 752 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 753 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 754 EXTENT_END_WRITEBACK);
755
745 *nr_written = *nr_written + 756 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 757 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 758 *page_started = 1;
@@ -1596,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1607 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1608 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1609 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1610 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1611 u16 other_encoding, int extent_type)
1602{ 1612{
@@ -1622,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1632 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1633 * with the others.
1624 */ 1634 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1635 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1636 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1637 BUG_ON(ret);
1629 1638
1630 ins.objectid = inode->i_ino; 1639 ins.objectid = inode->i_ino;
@@ -1730,23 +1739,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1730 } 1739 }
1731 } 1740 }
1732 1741
1733 trans = btrfs_join_transaction(root, 1);
1734
1735 if (!ordered_extent) 1742 if (!ordered_extent)
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1743 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent); 1744 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) 1745 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1739 goto nocow; 1746 BUG_ON(!list_empty(&ordered_extent->list));
1747 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1748 if (!ret) {
1749 trans = btrfs_join_transaction(root, 1);
1750 ret = btrfs_update_inode(trans, root, inode);
1751 BUG_ON(ret);
1752 btrfs_end_transaction(trans, root);
1753 }
1754 goto out;
1755 }
1740 1756
1741 lock_extent(io_tree, ordered_extent->file_offset, 1757 lock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1758 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1759 GFP_NOFS);
1744 1760
1761 trans = btrfs_join_transaction(root, 1);
1762
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1763 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1764 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1765 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1766 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1767 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1768 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1769 ordered_extent->file_offset +
1752 ordered_extent->len); 1770 ordered_extent->len);
@@ -1758,8 +1776,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1776 ordered_extent->disk_len,
1759 ordered_extent->len, 1777 ordered_extent->len,
1760 ordered_extent->len, 1778 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1779 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1780 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1781 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1786,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1786 unlock_extent(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1787 ordered_extent->file_offset + ordered_extent->len - 1,
1772 GFP_NOFS); 1788 GFP_NOFS);
1773nocow:
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1789 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1790 &ordered_extent->list);
1776 1791
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1792 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1793 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1794 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1795 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1796 btrfs_end_transaction(trans, root);
1782 1797out:
1783 /* once for us */ 1798 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1799 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1800 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1801 btrfs_put_ordered_extent(ordered_extent);
1787 1802
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1803 return 0;
1790} 1804}
1791 1805
@@ -2008,6 +2022,54 @@ zeroit:
2008 return -EIO; 2022 return -EIO;
2009} 2023}
2010 2024
2025struct delayed_iput {
2026 struct list_head list;
2027 struct inode *inode;
2028};
2029
2030void btrfs_add_delayed_iput(struct inode *inode)
2031{
2032 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2033 struct delayed_iput *delayed;
2034
2035 if (atomic_add_unless(&inode->i_count, -1, 1))
2036 return;
2037
2038 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
2039 delayed->inode = inode;
2040
2041 spin_lock(&fs_info->delayed_iput_lock);
2042 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2043 spin_unlock(&fs_info->delayed_iput_lock);
2044}
2045
2046void btrfs_run_delayed_iputs(struct btrfs_root *root)
2047{
2048 LIST_HEAD(list);
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct delayed_iput *delayed;
2051 int empty;
2052
2053 spin_lock(&fs_info->delayed_iput_lock);
2054 empty = list_empty(&fs_info->delayed_iputs);
2055 spin_unlock(&fs_info->delayed_iput_lock);
2056 if (empty)
2057 return;
2058
2059 down_read(&root->fs_info->cleanup_work_sem);
2060 spin_lock(&fs_info->delayed_iput_lock);
2061 list_splice_init(&fs_info->delayed_iputs, &list);
2062 spin_unlock(&fs_info->delayed_iput_lock);
2063
2064 while (!list_empty(&list)) {
2065 delayed = list_entry(list.next, struct delayed_iput, list);
2066 list_del(&delayed->list);
2067 iput(delayed->inode);
2068 kfree(delayed);
2069 }
2070 up_read(&root->fs_info->cleanup_work_sem);
2071}
2072
2011/* 2073/*
2012 * This creates an orphan entry for the given inode in case something goes 2074 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 2075 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2142,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2142 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2143 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2144
2083 path = btrfs_alloc_path(); 2145 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2146 return;
2147
2148 path = btrfs_alloc_path();
2149 BUG_ON(!path);
2086 path->reada = -1; 2150 path->reada = -1;
2087 2151
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2152 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2153 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2154 key.offset = (u64)-1;
2091 2155
2092
2093 while (1) { 2156 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2157 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2158 if (ret < 0) {
@@ -2834,37 +2897,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2897 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2898 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2899 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2900int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2901 struct btrfs_root *root,
2839 struct inode *inode, 2902 struct inode *inode,
2840 u64 new_size, u32 min_type) 2903 u64 new_size, u32 min_type)
2841{ 2904{
2842 int ret;
2843 struct btrfs_path *path; 2905 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2906 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2907 struct btrfs_file_extent_item *fi;
2908 struct btrfs_key key;
2909 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2910 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2911 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2912 u64 extent_offset = 0;
2852 u64 item_end = 0; 2913 u64 item_end = 0;
2914 u64 mask = root->sectorsize - 1;
2915 u32 found_type = (u8)-1;
2853 int found_extent; 2916 int found_extent;
2854 int del_item; 2917 int del_item;
2855 int pending_del_nr = 0; 2918 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2919 int pending_del_slot = 0;
2857 int extent_type = -1; 2920 int extent_type = -1;
2858 int encoding; 2921 int encoding;
2859 u64 mask = root->sectorsize - 1; 2922 int ret;
2923 int err = 0;
2924
2925 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2926
2861 if (root->ref_cows) 2927 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2928 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2929
2863 path = btrfs_alloc_path(); 2930 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2931 BUG_ON(!path);
2865 path->reada = -1; 2932 path->reada = -1;
2866 2933
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2934 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2935 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2936 key.type = (u8)-1;
@@ -2872,17 +2938,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2938search_again:
2873 path->leave_spinning = 1; 2939 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2940 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2941 if (ret < 0) {
2876 goto error; 2942 err = ret;
2943 goto out;
2944 }
2877 2945
2878 if (ret > 0) { 2946 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2947 /* there are no items in the tree for us to truncate, we're
2880 * done 2948 * done
2881 */ 2949 */
2882 if (path->slots[0] == 0) { 2950 if (path->slots[0] == 0)
2883 ret = 0; 2951 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2952 path->slots[0]--;
2887 } 2953 }
2888 2954
@@ -2917,28 +2983,17 @@ search_again:
2917 } 2983 }
2918 item_end--; 2984 item_end--;
2919 } 2985 }
2920 if (item_end < new_size) { 2986 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2987 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2988 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2989 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2990 break;
2933 btrfs_set_key_type(&key, found_type); 2991 if (found_key.offset >= new_size)
2934 goto next; 2992 del_item = 1;
2993 else
2994 del_item = 0;
2935 } 2995 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2996 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2997 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2998 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2999 goto delete;
@@ -3025,42 +3080,36 @@ delete:
3025 inode->i_ino, extent_offset); 3080 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3081 BUG_ON(ret);
3027 } 3082 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3083
3038 path->slots[0]--; 3084 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3085 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3086
3041 struct btrfs_key debug; 3087 if (path->slots[0] == 0 ||
3042del_pending: 3088 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3089 if (root->ref_cows) {
3044 pending_del_slot); 3090 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3091 goto out;
3046 pending_del_slot, 3092 }
3047 pending_del_nr); 3093 if (pending_del_nr) {
3048 BUG_ON(ret); 3094 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3095 pending_del_slot,
3096 pending_del_nr);
3097 BUG_ON(ret);
3098 pending_del_nr = 0;
3099 }
3050 btrfs_release_path(root, path); 3100 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3101 goto search_again;
3102 } else {
3103 path->slots[0]--;
3054 } 3104 }
3055 } 3105 }
3056 ret = 0; 3106out:
3057error:
3058 if (pending_del_nr) { 3107 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3108 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3109 pending_del_nr);
3061 } 3110 }
3062 btrfs_free_path(path); 3111 btrfs_free_path(path);
3063 return ret; 3112 return err;
3064} 3113}
3065 3114
3066/* 3115/*
@@ -3180,10 +3229,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3229 if (size <= hole_start)
3181 return 0; 3230 return 0;
3182 3231
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3232 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3233 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3234 btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3241,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3196 btrfs_put_ordered_extent(ordered); 3241 btrfs_put_ordered_extent(ordered);
3197 } 3242 }
3198 3243
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3244 cur_offset = hole_start;
3203 while (1) { 3245 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3246 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3248,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3248 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3249 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3250 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3251 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3252 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3253 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3254
3220 err = btrfs_reserve_metadata_space(root, 1); 3255 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3256 if (err)
3222 break; 3257 break;
3223 3258
3259 trans = btrfs_start_transaction(root, 1);
3260 btrfs_set_trans_block_group(trans, inode);
3261
3262 err = btrfs_drop_extents(trans, inode, cur_offset,
3263 cur_offset + hole_size,
3264 &hint_byte, 1);
3265 BUG_ON(err);
3266
3224 err = btrfs_insert_file_extent(trans, root, 3267 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3268 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3269 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3270 0, 0, 0);
3271 BUG_ON(err);
3272
3228 btrfs_drop_extent_cache(inode, hole_start, 3273 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3274 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3275
3276 btrfs_end_transaction(trans, root);
3277 btrfs_unreserve_metadata_space(root, 2);
3231 } 3278 }
3232 free_extent_map(em); 3279 free_extent_map(em);
3233 cur_offset = last_byte; 3280 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3281 if (cur_offset >= block_end)
3235 break; 3282 break;
3236 } 3283 }
3237 3284
3238 btrfs_end_transaction(trans, root);
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3285 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3240 return err; 3286 return err;
3241} 3287}
3242 3288
3289static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3290{
3291 struct btrfs_root *root = BTRFS_I(inode)->root;
3292 struct btrfs_trans_handle *trans;
3293 unsigned long nr;
3294 int ret;
3295
3296 if (attr->ia_size == inode->i_size)
3297 return 0;
3298
3299 if (attr->ia_size > inode->i_size) {
3300 unsigned long limit;
3301 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3302 if (attr->ia_size > inode->i_sb->s_maxbytes)
3303 return -EFBIG;
3304 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3305 send_sig(SIGXFSZ, current, 0);
3306 return -EFBIG;
3307 }
3308 }
3309
3310 ret = btrfs_reserve_metadata_space(root, 1);
3311 if (ret)
3312 return ret;
3313
3314 trans = btrfs_start_transaction(root, 1);
3315 btrfs_set_trans_block_group(trans, inode);
3316
3317 ret = btrfs_orphan_add(trans, inode);
3318 BUG_ON(ret);
3319
3320 nr = trans->blocks_used;
3321 btrfs_end_transaction(trans, root);
3322 btrfs_unreserve_metadata_space(root, 1);
3323 btrfs_btree_balance_dirty(root, nr);
3324
3325 if (attr->ia_size > inode->i_size) {
3326 ret = btrfs_cont_expand(inode, attr->ia_size);
3327 if (ret) {
3328 btrfs_truncate(inode);
3329 return ret;
3330 }
3331
3332 i_size_write(inode, attr->ia_size);
3333 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3334
3335 trans = btrfs_start_transaction(root, 1);
3336 btrfs_set_trans_block_group(trans, inode);
3337
3338 ret = btrfs_update_inode(trans, root, inode);
3339 BUG_ON(ret);
3340 if (inode->i_nlink > 0) {
3341 ret = btrfs_orphan_del(trans, inode);
3342 BUG_ON(ret);
3343 }
3344 nr = trans->blocks_used;
3345 btrfs_end_transaction(trans, root);
3346 btrfs_btree_balance_dirty(root, nr);
3347 return 0;
3348 }
3349
3350 /*
3351 * We're truncating a file that used to have good data down to
3352 * zero. Make sure it gets into the ordered flush list so that
3353 * any new writes get down to disk quickly.
3354 */
3355 if (attr->ia_size == 0)
3356 BTRFS_I(inode)->ordered_data_close = 1;
3357
3358 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3359 ret = vmtruncate(inode, attr->ia_size);
3360 BUG_ON(ret);
3361
3362 return 0;
3363}
3364
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3365static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3366{
3245 struct inode *inode = dentry->d_inode; 3367 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3372,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3372 return err;
3251 3373
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3374 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3375 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3376 if (err)
3255 if (err) 3377 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3378 }
3379 attr->ia_valid &= ~ATTR_SIZE;
3268 3380
3269 err = inode_setattr(inode, attr); 3381 if (attr->ia_valid)
3382 err = inode_setattr(inode, attr);
3270 3383
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3384 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3385 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3400,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3400 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3401 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3402
3403 if (root->fs_info->log_root_recovering) {
3404 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3405 goto no_delete;
3406 }
3407
3290 if (inode->i_nlink > 0) { 3408 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3409 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3410 goto no_delete;
3293 } 3411 }
3294 3412
3295 btrfs_i_size_write(inode, 0); 3413 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3414
3298 btrfs_set_trans_block_group(trans, inode); 3415 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3416 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3417 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3418 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3419
3305 btrfs_orphan_del(trans, inode); 3420 if (ret != -EAGAIN)
3421 break;
3306 3422
3307 nr = trans->blocks_used; 3423 nr = trans->blocks_used;
3308 clear_inode(inode); 3424 btrfs_end_transaction(trans, root);
3425 trans = NULL;
3426 btrfs_btree_balance_dirty(root, nr);
3427 }
3309 3428
3310 btrfs_end_transaction(trans, root); 3429 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3430 ret = btrfs_orphan_del(trans, inode);
3312 return; 3431 BUG_ON(ret);
3432 }
3313 3433
3314no_delete_lock:
3315 nr = trans->blocks_used; 3434 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3435 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3436 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3437no_delete:
3319 clear_inode(inode); 3438 clear_inode(inode);
3439 return;
3320} 3440}
3321 3441
3322/* 3442/*
@@ -3569,7 +3689,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3689 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3690 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3691 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3692 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3693}
3575 3694
@@ -3695,6 +3814,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3695 } 3814 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3815 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3816
3817 if (root != sub_root) {
3818 down_read(&root->fs_info->cleanup_work_sem);
3819 if (!(inode->i_sb->s_flags & MS_RDONLY))
3820 btrfs_orphan_cleanup(sub_root);
3821 up_read(&root->fs_info->cleanup_work_sem);
3822 }
3823
3698 return inode; 3824 return inode;
3699} 3825}
3700 3826
@@ -4219,7 +4345,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4345 if (IS_ERR(inode))
4220 goto out_unlock; 4346 goto out_unlock;
4221 4347
4222 err = btrfs_init_inode_security(inode, dir); 4348 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4349 if (err) {
4224 drop_inode = 1; 4350 drop_inode = 1;
4225 goto out_unlock; 4351 goto out_unlock;
@@ -4290,7 +4416,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4416 if (IS_ERR(inode))
4291 goto out_unlock; 4417 goto out_unlock;
4292 4418
4293 err = btrfs_init_inode_security(inode, dir); 4419 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4420 if (err) {
4295 drop_inode = 1; 4421 drop_inode = 1;
4296 goto out_unlock; 4422 goto out_unlock;
@@ -4336,6 +4462,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4462 if (inode->i_nlink == 0)
4337 return -ENOENT; 4463 return -ENOENT;
4338 4464
4465 /* do not allow sys_link's with other subvols of the same device */
4466 if (root->objectid != BTRFS_I(inode)->root->objectid)
4467 return -EPERM;
4468
4339 /* 4469 /*
4340 * 1 item for inode ref 4470 * 1 item for inode ref
4341 * 2 items for dir items 4471 * 2 items for dir items
@@ -4423,7 +4553,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4553
4424 drop_on_err = 1; 4554 drop_on_err = 1;
4425 4555
4426 err = btrfs_init_inode_security(inode, dir); 4556 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4557 if (err)
4428 goto out_fail; 4558 goto out_fail;
4429 4559
@@ -5074,17 +5204,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5204 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5205 u64 mask = root->sectorsize - 1;
5076 5206
5077 if (!S_ISREG(inode->i_mode)) 5207 if (!S_ISREG(inode->i_mode)) {
5078 return; 5208 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5209 return;
5210 }
5081 5211
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5212 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5213 if (ret)
5084 return; 5214 return;
5215
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5216 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5217 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5218
5087 trans = btrfs_start_transaction(root, 1); 5219 trans = btrfs_start_transaction(root, 1);
5220 btrfs_set_trans_block_group(trans, inode);
5088 5221
5089 /* 5222 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5223 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5239,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5239 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5240 btrfs_add_ordered_operation(trans, root, inode);
5108 5241
5109 btrfs_set_trans_block_group(trans, inode); 5242 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5243 ret = btrfs_truncate_inode_items(trans, root, inode,
5244 inode->i_size,
5245 BTRFS_EXTENT_DATA_KEY);
5246 if (ret != -EAGAIN)
5247 break;
5111 5248
5112 ret = btrfs_orphan_add(trans, inode); 5249 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5250 BUG_ON(ret);
5114 goto out; 5251
5115 /* FIXME, add redo link to tree so we don't leak on crash */ 5252 nr = trans->blocks_used;
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5253 btrfs_end_transaction(trans, root);
5117 BTRFS_EXTENT_DATA_KEY); 5254 btrfs_btree_balance_dirty(root, nr);
5118 btrfs_update_inode(trans, root, inode); 5255
5256 trans = btrfs_start_transaction(root, 1);
5257 btrfs_set_trans_block_group(trans, inode);
5258 }
5119 5259
5120 ret = btrfs_orphan_del(trans, inode); 5260 if (ret == 0 && inode->i_nlink > 0) {
5261 ret = btrfs_orphan_del(trans, inode);
5262 BUG_ON(ret);
5263 }
5264
5265 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5266 BUG_ON(ret);
5122 5267
5123out:
5124 nr = trans->blocks_used; 5268 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5269 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5270 BUG_ON(ret);
@@ -5217,9 +5361,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5361
5218 spin_lock(&root->list_lock); 5362 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5363 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5364 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5365 inode->i_ino);
5222 dump_stack(); 5366 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5367 }
5224 spin_unlock(&root->list_lock); 5368 spin_unlock(&root->list_lock);
5225 5369
@@ -5476,7 +5620,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5620 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5621 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5622 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5623int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5624{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5625 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5626 struct btrfs_inode *binode;
@@ -5495,7 +5639,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5639 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5640 if (inode) {
5497 filemap_flush(inode->i_mapping); 5641 filemap_flush(inode->i_mapping);
5498 iput(inode); 5642 if (delay_iput)
5643 btrfs_add_delayed_iput(inode);
5644 else
5645 iput(inode);
5499 } 5646 }
5500 cond_resched(); 5647 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5648 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5716,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5716 if (IS_ERR(inode))
5570 goto out_unlock; 5717 goto out_unlock;
5571 5718
5572 err = btrfs_init_inode_security(inode, dir); 5719 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5720 if (err) {
5574 drop_inode = 1; 5721 drop_inode = 1;
5575 goto out_unlock; 5722 goto out_unlock;
@@ -5641,10 +5788,10 @@ out_fail:
5641 return err; 5788 return err;
5642} 5789}
5643 5790
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5791static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5792 u64 alloc_hint, int mode)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5793{
5794 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5795 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5796 struct btrfs_key ins;
5650 u64 alloc_size; 5797 u64 alloc_size;
@@ -5655,43 +5802,56 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5655 while (num_bytes > 0) { 5802 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5803 alloc_size = min(num_bytes, root->fs_info->max_extent);
5657 5804
5658 ret = btrfs_reserve_metadata_space(root, 1); 5805 trans = btrfs_start_transaction(root, 1);
5659 if (ret)
5660 goto out;
5661 5806
5662 ret = btrfs_reserve_extent(trans, root, alloc_size, 5807 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5808 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5809 (u64)-1, &ins, 1);
5665 if (ret) { 5810 if (ret) {
5666 WARN_ON(1); 5811 WARN_ON(1);
5667 goto out; 5812 goto stop_trans;
5813 }
5814
5815 ret = btrfs_reserve_metadata_space(root, 3);
5816 if (ret) {
5817 btrfs_free_reserved_extent(root, ins.objectid,
5818 ins.offset);
5819 goto stop_trans;
5668 } 5820 }
5821
5669 ret = insert_reserved_file_extent(trans, inode, 5822 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5823 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5824 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5825 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5826 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5827 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5828 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5829 cur_offset + ins.offset -1, 0);
5830
5678 num_bytes -= ins.offset; 5831 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5832 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5833 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5834
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5835 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5836 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5837 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5838 cur_offset > inode->i_size) {
5689 btrfs_i_size_write(inode, cur_offset); 5839 i_size_write(inode, cur_offset);
5840 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5841 }
5842
5690 ret = btrfs_update_inode(trans, root, inode); 5843 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5844 BUG_ON(ret);
5845
5846 btrfs_end_transaction(trans, root);
5847 btrfs_unreserve_metadata_space(root, 3);
5692 } 5848 }
5849 return ret;
5693 5850
5851stop_trans:
5852 btrfs_end_transaction(trans, root);
5694 return ret; 5853 return ret;
5854
5695} 5855}
5696 5856
5697static long btrfs_fallocate(struct inode *inode, int mode, 5857static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5865,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5865 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5866 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5867 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5868 int ret;
5711 5869
5712 alloc_start = offset & ~mask; 5870 alloc_start = offset & ~mask;
@@ -5725,9 +5883,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5883 goto out;
5726 } 5884 }
5727 5885
5728 root = BTRFS_I(inode)->root; 5886 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5887 alloc_end - alloc_start);
5732 if (ret) 5888 if (ret)
5733 goto out; 5889 goto out;
@@ -5736,12 +5892,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5892 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5893 struct btrfs_ordered_extent *ordered;
5738 5894
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5895 /* the extent lock is ordered inside the running
5746 * transaction 5896 * transaction
5747 */ 5897 */
@@ -5755,8 +5905,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5755 btrfs_put_ordered_extent(ordered); 5905 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5906 unlock_extent(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5907 alloc_start, locked_end, GFP_NOFS);
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5759
5760 /* 5908 /*
5761 * we can't wait on the range with the transaction 5909 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5910 * running or with the extent lock held
@@ -5777,10 +5925,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5925 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5926 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5927 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5928 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5929 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5930 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5931 ret = prealloc_file_range(inode,
5932 cur_offset, last_byte,
5933 alloc_hint, mode);
5784 if (ret < 0) { 5934 if (ret < 0) {
5785 free_extent_map(em); 5935 free_extent_map(em);
5786 break; 5936 break;
@@ -5799,9 +5949,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5949 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5950 GFP_NOFS);
5801 5951
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5952 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5953 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5954out:
5806 mutex_unlock(&inode->i_mutex); 5955 mutex_unlock(&inode->i_mutex);
5807 return ret; 5956 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..645a17927a8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 237 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1;
241 240
242 /* 241 /*
243 * 1 - inode item 242 * 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 289 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 290 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 291 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 292 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 293 btrfs_set_root_last_snapshot(&root_item, 0);
295 294
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 295 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 341
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 342 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 343fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 344 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 345 if (err && !ret)
348 ret = err; 346 ret = err;
349 347
350 btrfs_unreserve_metadata_space(root, 6); 348 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 349 return ret;
353} 350}
354 351
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 352static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 353 char *name, int namelen)
357{ 354{
355 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 356 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 357 struct btrfs_trans_handle *trans;
360 int ret = 0; 358 int ret;
361 int err;
362 unsigned long nr = 0;
363 359
364 if (!root->ref_cows) 360 if (!root->ref_cows)
365 return -EINVAL; 361 return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 368 */
373 ret = btrfs_reserve_metadata_space(root, 6); 369 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 370 if (ret)
375 goto fail_unlock; 371 goto fail;
376 372
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 373 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 374 if (!pending_snapshot) {
379 ret = -ENOMEM; 375 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 376 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 377 goto fail;
382 } 378 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 379 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 380 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 381 ret = -ENOMEM;
386 kfree(pending_snapshot); 382 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 383 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 384 goto fail;
389 } 385 }
390 memcpy(pending_snapshot->name, name, namelen); 386 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 387 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 391 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 392 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 393 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 394 ret = btrfs_commit_transaction(trans, root);
395 BUG_ON(ret);
396 btrfs_unreserve_metadata_space(root, 6);
399 397
400fail_unlock: 398 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode);
401 goto fail;
402 }
403 BUG_ON(!inode);
404 d_instantiate(dentry, inode);
405 ret = 0;
406fail:
402 return ret; 407 return ret;
403} 408}
404 409
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1032 BUG_ON(!trans);
1028 1033
1029 /* punch hole in destination first */ 1034 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1035 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1036
1033 /* clone data */ 1037 /* clone data */
1034 key.objectid = src->i_ino; 1038 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..b10a49d4bc6a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 291
292/* 292/*
293 * remove an ordered extent from the tree. No references are dropped 293 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 294 * and you must wake_up entry->wait. You must hold the tree mutex
295 * while you call this function.
295 */ 296 */
296int btrfs_remove_ordered_extent(struct inode *inode, 297static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 298 struct btrfs_ordered_extent *entry)
298{ 299{
299 struct btrfs_ordered_inode_tree *tree; 300 struct btrfs_ordered_inode_tree *tree;
300 struct rb_node *node; 301 struct rb_node *node;
301 302
302 tree = &BTRFS_I(inode)->ordered_tree; 303 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 304 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 305 rb_erase(node, &tree->tree);
306 tree->last = NULL; 306 tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
326 } 326 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
328 328
329 return 0;
330}
331
332/*
333 * remove an ordered extent from the tree. No references are dropped
334 * but any waiters are woken.
335 */
336int btrfs_remove_ordered_extent(struct inode *inode,
337 struct btrfs_ordered_extent *entry)
338{
339 struct btrfs_ordered_inode_tree *tree;
340 int ret;
341
342 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex);
344 ret = __btrfs_remove_ordered_extent(inode, entry);
329 mutex_unlock(&tree->mutex); 345 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait); 346 wake_up(&entry->wait);
331 return 0; 347
348 return ret;
332} 349}
333 350
334/* 351/*
335 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 353 * space between drives.
337 */ 354 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
339{ 357{
340 struct list_head splice; 358 struct list_head splice;
341 struct list_head *cur; 359 struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 390 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
375 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
376 } else { 397 } else {
377 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
378 } 399 }
@@ -430,7 +451,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 452 else
432 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
433 iput(inode); 454 btrfs_add_delayed_iput(inode);
434 } 455 }
435 456
436 cond_resched(); 457 cond_resched();
@@ -589,7 +610,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 610 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 611 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 612 */
592int btrfs_ordered_update_i_size(struct inode *inode, 613int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 614 struct btrfs_ordered_extent *ordered)
594{ 615{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,30 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 618 u64 disk_i_size;
598 u64 new_i_size; 619 u64 new_i_size;
599 u64 i_size_test; 620 u64 i_size_test;
621 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 622 struct rb_node *node;
623 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 624 struct btrfs_ordered_extent *test;
625 int ret = 1;
626
627 if (ordered)
628 offset = entry_end(ordered);
602 629
603 mutex_lock(&tree->mutex); 630 mutex_lock(&tree->mutex);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 631 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 632
633 /* truncate file */
634 if (disk_i_size > i_size) {
635 BTRFS_I(inode)->disk_i_size = i_size;
636 ret = 0;
637 goto out;
638 }
639
606 /* 640 /*
607 * if the disk i_size is already at the inode->i_size, or 641 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 642 * this ordered extent is inside the disk i_size, we're done
609 */ 643 */
610 if (disk_i_size >= inode->i_size || 644 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 645 goto out;
613 } 646 }
614 647
@@ -616,8 +649,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 649 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 650 * between disk_i_size and this ordered extent
618 */ 651 */
619 if (test_range_bit(io_tree, disk_i_size, 652 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 653 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 654 goto out;
623 } 655 }
@@ -626,20 +658,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 658 * if we find an ordered extent then we can't update disk i_size
627 * yet 659 * yet
628 */ 660 */
629 node = &ordered->rb_node; 661 if (ordered) {
630 while (1) { 662 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 663 } else {
632 if (!node) 664 prev = tree_search(tree, offset);
633 break; 665 /*
666 * we insert file extents without involving ordered struct,
667 * so there should be no ordered struct cover this offset
668 */
669 if (prev) {
670 test = rb_entry(prev, struct btrfs_ordered_extent,
671 rb_node);
672 BUG_ON(offset_in_entry(test, offset));
673 }
674 node = prev;
675 }
676 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 677 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 678 if (test->file_offset + test->len <= disk_i_size)
636 break; 679 break;
637 if (test->file_offset >= inode->i_size) 680 if (test->file_offset >= i_size)
638 break; 681 break;
639 if (test->file_offset >= disk_i_size) 682 if (test->file_offset >= disk_i_size)
640 goto out; 683 goto out;
684 node = rb_prev(node);
641 } 685 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 686 new_i_size = min_t(u64, offset, i_size);
643 687
644 /* 688 /*
645 * at this point, we know we can safely update i_size to at least 689 * at this point, we know we can safely update i_size to at least
@@ -647,7 +691,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 691 * walk forward and see if ios from higher up in the file have
648 * finished. 692 * finished.
649 */ 693 */
650 node = rb_next(&ordered->rb_node); 694 if (ordered) {
695 node = rb_next(&ordered->rb_node);
696 } else {
697 if (prev)
698 node = rb_next(prev);
699 else
700 node = rb_first(&tree->tree);
701 }
651 i_size_test = 0; 702 i_size_test = 0;
652 if (node) { 703 if (node) {
653 /* 704 /*
@@ -655,10 +706,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 706 * between our ordered extent and the next one.
656 */ 707 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 708 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 709 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 710 i_size_test = test->file_offset;
660 } else { 711 } else {
661 i_size_test = i_size_read(inode); 712 i_size_test = i_size;
662 } 713 }
663 714
664 /* 715 /*
@@ -667,15 +718,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 718 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 719 * disk_i_size to the end of the region.
669 */ 720 */
670 if (i_size_test > entry_end(ordered) && 721 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 722 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 723 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 724 new_i_size = min_t(u64, i_size_test, i_size);
674 } 725 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 726 BTRFS_I(inode)->disk_i_size = new_i_size;
727 ret = 0;
676out: 728out:
729 /*
730 * we need to remove the ordered extent with the tree lock held
731 * so that other people calling this function don't find our fully
732 * processed ordered entry and skip updating the i_size
733 */
734 if (ordered)
735 __btrfs_remove_ordered_extent(inode, ordered);
677 mutex_unlock(&tree->mutex); 736 mutex_unlock(&tree->mutex);
678 return 0; 737 if (ordered)
738 wake_up(&ordered->wait);
739 return ret;
679} 740}
680 741
681/* 742/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 151struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..a9728680eca8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1561 return 0;
1562} 1562}
1563 1563
1564static void put_inodes(struct list_head *list)
1565{
1566 struct inodevec *ivec;
1567 while (!list_empty(list)) {
1568 ivec = list_entry(list->next, struct inodevec, list);
1569 list_del(&ivec->list);
1570 while (ivec->nr > 0) {
1571 ivec->nr--;
1572 iput(ivec->inode[ivec->nr]);
1573 }
1574 kfree(ivec);
1575 }
1576}
1577
1564static int find_next_key(struct btrfs_path *path, int level, 1578static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1579 struct btrfs_key *key)
1566 1580
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1737
1724 btrfs_btree_balance_dirty(root, nr); 1738 btrfs_btree_balance_dirty(root, nr);
1725 1739
1740 /*
1741 * put inodes outside transaction, otherwise we may deadlock.
1742 */
1743 put_inodes(&inode_list);
1744
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1746 invalidate_extent_cache(root, &key, &next_key);
1728 } 1747 }
@@ -1752,19 +1771,7 @@ out:
1752 1771
1753 btrfs_btree_balance_dirty(root, nr); 1772 btrfs_btree_balance_dirty(root, nr);
1754 1773
1755 /* 1774 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1775
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1777 invalidate_extent_cache(root, &key, &next_key);
@@ -3534,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3541 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3542 (unsigned long long)rc->block_group->flags);
3536 3543
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3544 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3545 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3546
3540 while (1) { 3547 while (1) {
3541 rc->extents_found = 0; 3548 rc->extents_found = 0;
@@ -3755,6 +3762,7 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3762 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3763 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3764 err = PTR_ERR(fs_root);
3765 btrfs_orphan_cleanup(fs_root);
3758 } 3766 }
3759 return err; 3767 return err;
3760} 3768}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..3f9b45704fcd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -128,6 +128,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
128 substring_t args[MAX_OPT_ARGS]; 128 substring_t args[MAX_OPT_ARGS];
129 char *p, *num; 129 char *p, *num;
130 int intarg; 130 int intarg;
131 int ret = 0;
131 132
132 if (!options) 133 if (!options)
133 return 0; 134 return 0;
@@ -262,12 +263,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
262 case Opt_discard: 263 case Opt_discard:
263 btrfs_set_opt(info->mount_opt, DISCARD); 264 btrfs_set_opt(info->mount_opt, DISCARD);
264 break; 265 break;
266 case Opt_err:
267 printk(KERN_INFO "btrfs: unrecognized mount option "
268 "'%s'\n", p);
269 ret = -EINVAL;
270 goto out;
265 default: 271 default:
266 break; 272 break;
267 } 273 }
268 } 274 }
275out:
269 kfree(options); 276 kfree(options);
270 return 0; 277 return ret;
271} 278}
272 279
273/* 280/*
@@ -405,8 +412,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 412 return 0;
406 } 413 }
407 414
408 btrfs_start_delalloc_inodes(root); 415 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 416 btrfs_wait_ordered_extents(root, 0, 0);
410 417
411 trans = btrfs_start_transaction(root, 1); 418 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 419 ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +457,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
450 seq_puts(seq, ",notreelog"); 457 seq_puts(seq, ",notreelog");
451 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 458 if (btrfs_test_opt(root, FLUSHONCOMMIT))
452 seq_puts(seq, ",flushoncommit"); 459 seq_puts(seq, ",flushoncommit");
460 if (btrfs_test_opt(root, DISCARD))
461 seq_puts(seq, ",discard");
453 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 462 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
454 seq_puts(seq, ",noacl"); 463 seq_puts(seq, ",noacl");
455 return 0; 464 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9b..b2acc79f1b34 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
333 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
334 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
335 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
336 return 0; 339 return 0;
337} 340}
338 341
@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
354 * those extents are sent to disk but does not wait on them 357 * those extents are sent to disk but does not wait on them
355 */ 358 */
356int btrfs_write_marked_extents(struct btrfs_root *root, 359int btrfs_write_marked_extents(struct btrfs_root *root,
357 struct extent_io_tree *dirty_pages) 360 struct extent_io_tree *dirty_pages, int mark)
358{ 361{
359 int ret; 362 int ret;
360 int err = 0; 363 int err = 0;
@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
367 370
368 while (1) { 371 while (1) {
369 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 372 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
370 EXTENT_DIRTY); 373 mark);
371 if (ret) 374 if (ret)
372 break; 375 break;
373 while (start <= end) { 376 while (start <= end) {
@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
413 * on all the pages and clear them from the dirty pages state tree 416 * on all the pages and clear them from the dirty pages state tree
414 */ 417 */
415int btrfs_wait_marked_extents(struct btrfs_root *root, 418int btrfs_wait_marked_extents(struct btrfs_root *root,
416 struct extent_io_tree *dirty_pages) 419 struct extent_io_tree *dirty_pages, int mark)
417{ 420{
418 int ret; 421 int ret;
419 int err = 0; 422 int err = 0;
@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
425 unsigned long index; 428 unsigned long index;
426 429
427 while (1) { 430 while (1) {
428 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 431 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
429 EXTENT_DIRTY); 432 mark);
430 if (ret) 433 if (ret)
431 break; 434 break;
432 435
433 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 436 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
434 while (start <= end) { 437 while (start <= end) {
435 index = start >> PAGE_CACHE_SHIFT; 438 index = start >> PAGE_CACHE_SHIFT;
436 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 439 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
460 * those extents are on disk for transaction or log commit 463 * those extents are on disk for transaction or log commit
461 */ 464 */
462int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 465int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
463 struct extent_io_tree *dirty_pages) 466 struct extent_io_tree *dirty_pages, int mark)
464{ 467{
465 int ret; 468 int ret;
466 int ret2; 469 int ret2;
467 470
468 ret = btrfs_write_marked_extents(root, dirty_pages); 471 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
469 ret2 = btrfs_wait_marked_extents(root, dirty_pages); 472 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
470 return ret || ret2; 473 return ret || ret2;
471} 474}
472 475
@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
479 return filemap_write_and_wait(btree_inode->i_mapping); 482 return filemap_write_and_wait(btree_inode->i_mapping);
480 } 483 }
481 return btrfs_write_and_wait_marked_extents(root, 484 return btrfs_write_and_wait_marked_extents(root,
482 &trans->transaction->dirty_pages); 485 &trans->transaction->dirty_pages,
486 EXTENT_DIRTY);
483} 487}
484 488
485/* 489/*
@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
497{ 501{
498 int ret; 502 int ret;
499 u64 old_root_bytenr; 503 u64 old_root_bytenr;
504 u64 old_root_used;
500 struct btrfs_root *tree_root = root->fs_info->tree_root; 505 struct btrfs_root *tree_root = root->fs_info->tree_root;
501 506
507 old_root_used = btrfs_root_used(&root->root_item);
502 btrfs_write_dirty_block_groups(trans, root); 508 btrfs_write_dirty_block_groups(trans, root);
503 509
504 while (1) { 510 while (1) {
505 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 511 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
506 if (old_root_bytenr == root->node->start) 512 if (old_root_bytenr == root->node->start &&
513 old_root_used == btrfs_root_used(&root->root_item))
507 break; 514 break;
508 515
509 btrfs_set_root_node(&root->root_item, root->node); 516 btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
512 &root->root_item); 519 &root->root_item);
513 BUG_ON(ret); 520 BUG_ON(ret);
514 521
522 old_root_used = btrfs_root_used(&root->root_item);
515 ret = btrfs_write_dirty_block_groups(trans, root); 523 ret = btrfs_write_dirty_block_groups(trans, root);
516 BUG_ON(ret); 524 BUG_ON(ret);
517 } 525 }
@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
795 memcpy(&pending->root_key, &key, sizeof(key)); 803 memcpy(&pending->root_key, &key, sizeof(key));
796fail: 804fail:
797 kfree(new_root_item); 805 kfree(new_root_item);
798 btrfs_unreserve_metadata_space(root, 6);
799 return ret; 806 return ret;
800} 807}
801 808
@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
807 u64 index = 0; 814 u64 index = 0;
808 struct btrfs_trans_handle *trans; 815 struct btrfs_trans_handle *trans;
809 struct inode *parent_inode; 816 struct inode *parent_inode;
810 struct inode *inode;
811 struct btrfs_root *parent_root; 817 struct btrfs_root *parent_root;
812 818
813 parent_inode = pending->dentry->d_parent->d_inode; 819 parent_inode = pending->dentry->d_parent->d_inode;
@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
839 845
840 BUG_ON(ret); 846 BUG_ON(ret);
841 847
842 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
843 d_instantiate(pending->dentry, inode);
844fail: 848fail:
845 btrfs_end_transaction(trans, fs_info->fs_root); 849 btrfs_end_transaction(trans, fs_info->fs_root);
846 return ret; 850 return ret;
@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
994 mutex_unlock(&root->fs_info->trans_mutex); 998 mutex_unlock(&root->fs_info->trans_mutex);
995 999
996 if (flush_on_commit) { 1000 if (flush_on_commit) {
997 btrfs_start_delalloc_inodes(root); 1001 btrfs_start_delalloc_inodes(root, 1);
998 ret = btrfs_wait_ordered_extents(root, 0); 1002 ret = btrfs_wait_ordered_extents(root, 0, 1);
999 BUG_ON(ret); 1003 BUG_ON(ret);
1000 } else if (snap_pending) { 1004 } else if (snap_pending) {
1001 ret = btrfs_wait_ordered_extents(root, 1); 1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
1002 BUG_ON(ret); 1006 BUG_ON(ret);
1003 } 1007 }
1004 1008
@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1116 current->journal_info = NULL; 1120 current->journal_info = NULL;
1117 1121
1118 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1122 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1123
1124 if (current != root->fs_info->transaction_kthread)
1125 btrfs_run_delayed_iputs(root);
1126
1119 return ret; 1127 return ret;
1120} 1128}
1121 1129
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938c..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 108 struct btrfs_root *root);
109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root, 111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages); 112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root, 113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages); 114 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676a..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 542
543 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
547 BUG_ON(ret); 547 BUG_ON(ret);
548 548
549 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1976{
1978 int index1; 1977 int index1;
1979 int index2; 1978 int index2;
1979 int mark;
1980 int ret; 1980 int ret;
1981 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 u64 log_transid = 0; 1983 unsigned long log_transid = 0;
1984 1984
1985 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1986 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2014 goto out; 2014 goto out;
2015 } 2015 }
2016 2016
2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2017 /* we start IO on all the marked extents here, but we don't actually 2023 /* we start IO on all the marked extents here, but we don't actually
2018 * wait for them until later. 2024 * wait for them until later.
2019 */ 2025 */
2020 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); 2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 BUG_ON(ret); 2027 BUG_ON(ret);
2022 2028
2023 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
2024 2030
2025 root->log_batch = 0; 2031 root->log_batch = 0;
2026 log_transid = root->log_transid;
2027 root->log_transid++; 2032 root->log_transid++;
2028 log->log_transid = root->log_transid; 2033 log->log_transid = root->log_transid;
2029 root->log_start_pid = 0; 2034 root->log_start_pid = 0;
2030 smp_mb(); 2035 smp_mb();
2031 /* 2036 /*
2032 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2033 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2034 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2035 */ 2040 */
2036 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2037 2042
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2052 2057
2053 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2054 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2057 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2058 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2072 * check the full commit flag again 2077 * check the full commit flag again
2073 */ 2078 */
2074 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2077 ret = -EAGAIN; 2082 ret = -EAGAIN;
2078 goto out_wake_log_root; 2083 goto out_wake_log_root;
2079 } 2084 }
2080 2085
2081 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2083 BUG_ON(ret); 2089 BUG_ON(ret);
2084 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2091
2086 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2147 2153
2148 while (1) { 2154 while (1) {
2149 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2150 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 if (ret) 2157 if (ret)
2152 break; 2158 break;
2153 2159
2154 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2155 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 } 2162 }
2157 2163
2158 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..198cff28766d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2209 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2210 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2212 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2213 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2215 calc_size = 8 * 1024 * 1024;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48a..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 3797e0077b35..2906077ac798 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -84,7 +84,7 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) 84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
85{ 85{
86 struct cachefiles_object *fsdef; 86 struct cachefiles_object *fsdef;
87 struct nameidata nd; 87 struct path path;
88 struct kstatfs stats; 88 struct kstatfs stats;
89 struct dentry *graveyard, *cachedir, *root; 89 struct dentry *graveyard, *cachedir, *root;
90 const struct cred *saved_cred; 90 const struct cred *saved_cred;
@@ -114,15 +114,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
114 _debug("- fsdef %p", fsdef); 114 _debug("- fsdef %p", fsdef);
115 115
116 /* look up the directory at the root of the cache */ 116 /* look up the directory at the root of the cache */
117 memset(&nd, 0, sizeof(nd)); 117 ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
118
119 ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
120 if (ret < 0) 118 if (ret < 0)
121 goto error_open_root; 119 goto error_open_root;
122 120
123 cache->mnt = mntget(nd.path.mnt); 121 cache->mnt = path.mnt;
124 root = dget(nd.path.dentry); 122 root = path.dentry;
125 path_put(&nd.path);
126 123
127 /* check parameters */ 124 /* check parameters */
128 ret = -EOPNOTSUPP; 125 ret = -EOPNOTSUPP;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a6c8c6fe8df9..1d8332563863 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/mount.h> 12#include <linux/mount.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/ima.h>
15#include "internal.h" 14#include "internal.h"
16 15
17/* 16/*
@@ -923,7 +922,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
923 if (IS_ERR(file)) { 922 if (IS_ERR(file)) {
924 ret = PTR_ERR(file); 923 ret = PTR_ERR(file);
925 } else { 924 } else {
926 ima_counts_get(file);
927 ret = -EIO; 925 ret = -EIO;
928 if (file->f_op->write) { 926 if (file->f_op->write) {
929 pos = (loff_t) page->index << PAGE_SHIFT; 927 pos = (loff_t) page->index << PAGE_SHIFT;
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1b..6177f7cca16a 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26 /* 26 /*
27 * See Documentation/filesystems/Exporting 27 * See Documentation/filesystems/nfs/Exporting
28 * and examples in fs/exportfs 28 * and examples in fs/exportfs
29 * 29 *
30 * Since cifs is a network file system, an "fsid" must be included for 30 * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/compat.c b/fs/compat.c
index 6c19040ffeef..00d90c2e66f0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -38,8 +38,6 @@
38#include <linux/dirent.h> 38#include <linux/dirent.h>
39#include <linux/fsnotify.h> 39#include <linux/fsnotify.h>
40#include <linux/highuid.h> 40#include <linux/highuid.h>
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/syscall.h> 41#include <linux/nfsd/syscall.h>
44#include <linux/personality.h> 42#include <linux/personality.h>
45#include <linux/rwsem.h> 43#include <linux/rwsem.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index a100fa35a48f..953173a293a9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -978,6 +978,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
978 q.hash = full_name_hash(q.name, q.len); 978 q.hash = full_name_hash(q.name, q.len);
979 return d_alloc(parent, &q); 979 return d_alloc(parent, &q);
980} 980}
981EXPORT_SYMBOL(d_alloc_name);
981 982
982/* the caller must hold dcache_lock */ 983/* the caller must hold dcache_lock */
983static void __d_instantiate(struct dentry *dentry, struct inode *inode) 984static void __d_instantiate(struct dentry *dentry, struct inode *inode)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..e82adc2debb7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
53 * 53 *
54 * If blkfactor is zero then the user's request was aligned to the filesystem's 54 * If blkfactor is zero then the user's request was aligned to the filesystem's
55 * blocksize. 55 * blocksize.
56 *
57 * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
58 * This determines whether we need to do the fancy locking which prevents
59 * direct-IO from being able to read uninitialised disk blocks. If its zero
60 * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
61 * not held for the entire direct write (taken briefly, initially, during a
62 * direct read though, but its never held for the duration of a direct-IO).
63 */ 56 */
64 57
65struct dio { 58struct dio {
@@ -68,7 +61,7 @@ struct dio {
68 struct inode *inode; 61 struct inode *inode;
69 int rw; 62 int rw;
70 loff_t i_size; /* i_size when submitted */ 63 loff_t i_size; /* i_size when submitted */
71 int lock_type; /* doesn't change */ 64 int flags; /* doesn't change */
72 unsigned blkbits; /* doesn't change */ 65 unsigned blkbits; /* doesn't change */
73 unsigned blkfactor; /* When we're using an alignment which 66 unsigned blkfactor; /* When we're using an alignment which
74 is finer than the filesystem's soft 67 is finer than the filesystem's soft
@@ -104,6 +97,18 @@ struct dio {
104 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ 97 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
105 sector_t cur_page_block; /* Where it starts */ 98 sector_t cur_page_block; /* Where it starts */
106 99
100 /* BIO completion state */
101 spinlock_t bio_lock; /* protects BIO fields below */
102 unsigned long refcount; /* direct_io_worker() and bios */
103 struct bio *bio_list; /* singly linked via bi_private */
104 struct task_struct *waiter; /* waiting task (NULL if none) */
105
106 /* AIO related stuff */
107 struct kiocb *iocb; /* kiocb */
108 int is_async; /* is IO async ? */
109 int io_error; /* IO error in completion path */
110 ssize_t result; /* IO result */
111
107 /* 112 /*
108 * Page fetching state. These variables belong to dio_refill_pages(). 113 * Page fetching state. These variables belong to dio_refill_pages().
109 */ 114 */
@@ -115,22 +120,16 @@ struct dio {
115 * Page queue. These variables belong to dio_refill_pages() and 120 * Page queue. These variables belong to dio_refill_pages() and
116 * dio_get_page(). 121 * dio_get_page().
117 */ 122 */
118 struct page *pages[DIO_PAGES]; /* page buffer */
119 unsigned head; /* next page to process */ 123 unsigned head; /* next page to process */
120 unsigned tail; /* last valid page + 1 */ 124 unsigned tail; /* last valid page + 1 */
121 int page_errors; /* errno from get_user_pages() */ 125 int page_errors; /* errno from get_user_pages() */
122 126
123 /* BIO completion state */ 127 /*
124 spinlock_t bio_lock; /* protects BIO fields below */ 128 * pages[] (and any fields placed after it) are not zeroed out at
125 unsigned long refcount; /* direct_io_worker() and bios */ 129 * allocation time. Don't add new fields after pages[] unless you
126 struct bio *bio_list; /* singly linked via bi_private */ 130 * wish that they not be zeroed.
127 struct task_struct *waiter; /* waiting task (NULL if none) */ 131 */
128 132 struct page *pages[DIO_PAGES]; /* page buffer */
129 /* AIO related stuff */
130 struct kiocb *iocb; /* kiocb */
131 int is_async; /* is IO async ? */
132 int io_error; /* IO error in completion path */
133 ssize_t result; /* IO result */
134}; 133};
135 134
136/* 135/*
@@ -240,7 +239,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
240 if (dio->end_io && dio->result) 239 if (dio->end_io && dio->result)
241 dio->end_io(dio->iocb, offset, transferred, 240 dio->end_io(dio->iocb, offset, transferred,
242 dio->map_bh.b_private); 241 dio->map_bh.b_private);
243 if (dio->lock_type == DIO_LOCKING) 242
243 if (dio->flags & DIO_LOCKING)
244 /* lockdep: non-owner release */ 244 /* lockdep: non-owner release */
245 up_read_non_owner(&dio->inode->i_alloc_sem); 245 up_read_non_owner(&dio->inode->i_alloc_sem);
246 246
@@ -515,21 +515,24 @@ static int get_more_blocks(struct dio *dio)
515 map_bh->b_state = 0; 515 map_bh->b_state = 0;
516 map_bh->b_size = fs_count << dio->inode->i_blkbits; 516 map_bh->b_size = fs_count << dio->inode->i_blkbits;
517 517
518 /*
519 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
520 * forbid block creations: only overwrites are permitted.
521 * We will return early to the caller once we see an
522 * unmapped buffer head returned, and the caller will fall
523 * back to buffered I/O.
524 *
525 * Otherwise the decision is left to the get_blocks method,
526 * which may decide to handle it or also return an unmapped
527 * buffer head.
528 */
518 create = dio->rw & WRITE; 529 create = dio->rw & WRITE;
519 if (dio->lock_type == DIO_LOCKING) { 530 if (dio->flags & DIO_SKIP_HOLES) {
520 if (dio->block_in_file < (i_size_read(dio->inode) >> 531 if (dio->block_in_file < (i_size_read(dio->inode) >>
521 dio->blkbits)) 532 dio->blkbits))
522 create = 0; 533 create = 0;
523 } else if (dio->lock_type == DIO_NO_LOCKING) {
524 create = 0;
525 } 534 }
526 535
527 /*
528 * For writes inside i_size we forbid block creations: only
529 * overwrites are permitted. We fall back to buffered writes
530 * at a higher level for inside-i_size block-instantiating
531 * writes.
532 */
533 ret = (*dio->get_block)(dio->inode, fs_startblk, 536 ret = (*dio->get_block)(dio->inode, fs_startblk,
534 map_bh, create); 537 map_bh, create);
535 } 538 }
@@ -1039,7 +1042,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1039 * we can let i_mutex go now that its achieved its purpose 1042 * we can let i_mutex go now that its achieved its purpose
1040 * of protecting us from looking up uninitialized blocks. 1043 * of protecting us from looking up uninitialized blocks.
1041 */ 1044 */
1042 if ((rw == READ) && (dio->lock_type == DIO_LOCKING)) 1045 if (rw == READ && (dio->flags & DIO_LOCKING))
1043 mutex_unlock(&dio->inode->i_mutex); 1046 mutex_unlock(&dio->inode->i_mutex);
1044 1047
1045 /* 1048 /*
@@ -1086,30 +1089,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1086 1089
1087/* 1090/*
1088 * This is a library function for use by filesystem drivers. 1091 * This is a library function for use by filesystem drivers.
1089 * The locking rules are governed by the dio_lock_type parameter.
1090 * 1092 *
1091 * DIO_NO_LOCKING (no locking, for raw block device access) 1093 * The locking rules are governed by the flags parameter:
1092 * For writes, i_mutex is not held on entry; it is never taken. 1094 * - if the flags value contains DIO_LOCKING we use a fancy locking
1095 * scheme for dumb filesystems.
1096 * For writes this function is called under i_mutex and returns with
1097 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1098 * taken and dropped again before returning.
1099 * For reads and writes i_alloc_sem is taken in shared mode and released
1100 * on I/O completion (which may happen asynchronously after returning to
1101 * the caller).
1093 * 1102 *
1094 * DIO_LOCKING (simple locking for regular files) 1103 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1095 * For writes we are called under i_mutex and return with i_mutex held, even 1104 * internal locking but rather rely on the filesystem to synchronize
1096 * though it is internally dropped. 1105 * direct I/O reads/writes versus each other and truncate.
1097 * For reads, i_mutex is not held on entry, but it is taken and dropped before 1106 * For reads and writes both i_mutex and i_alloc_sem are not held on
1098 * returning. 1107 * entry and are never taken.
1099 *
1100 * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
1101 * uninitialised data, allowing parallel direct readers and writers)
1102 * For writes we are called without i_mutex, return without it, never touch it.
1103 * For reads we are called under i_mutex and return with i_mutex held, even
1104 * though it may be internally dropped.
1105 *
1106 * Additional i_alloc_sem locking requirements described inline below.
1107 */ 1108 */
1108ssize_t 1109ssize_t
1109__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1110__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1110 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1111 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1111 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1112 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1112 int dio_lock_type) 1113 int flags)
1113{ 1114{
1114 int seg; 1115 int seg;
1115 size_t size; 1116 size_t size;
@@ -1120,8 +1121,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1120 ssize_t retval = -EINVAL; 1121 ssize_t retval = -EINVAL;
1121 loff_t end = offset; 1122 loff_t end = offset;
1122 struct dio *dio; 1123 struct dio *dio;
1123 int release_i_mutex = 0;
1124 int acquire_i_mutex = 0;
1125 1124
1126 if (rw & WRITE) 1125 if (rw & WRITE)
1127 rw = WRITE_ODIRECT_PLUG; 1126 rw = WRITE_ODIRECT_PLUG;
@@ -1151,48 +1150,41 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1151 } 1150 }
1152 } 1151 }
1153 1152
1154 dio = kzalloc(sizeof(*dio), GFP_KERNEL); 1153 dio = kmalloc(sizeof(*dio), GFP_KERNEL);
1155 retval = -ENOMEM; 1154 retval = -ENOMEM;
1156 if (!dio) 1155 if (!dio)
1157 goto out; 1156 goto out;
1158
1159 /* 1157 /*
1160 * For block device access DIO_NO_LOCKING is used, 1158 * Believe it or not, zeroing out the page array caused a .5%
1161 * neither readers nor writers do any locking at all 1159 * performance regression in a database benchmark. So, we take
1162 * For regular files using DIO_LOCKING, 1160 * care to only zero out what's needed.
1163 * readers need to grab i_mutex and i_alloc_sem
1164 * writers need to grab i_alloc_sem only (i_mutex is already held)
1165 * For regular files using DIO_OWN_LOCKING,
1166 * neither readers nor writers take any locks here
1167 */ 1161 */
1168 dio->lock_type = dio_lock_type; 1162 memset(dio, 0, offsetof(struct dio, pages));
1169 if (dio_lock_type != DIO_NO_LOCKING) { 1163
1164 dio->flags = flags;
1165 if (dio->flags & DIO_LOCKING) {
1170 /* watch out for a 0 len io from a tricksy fs */ 1166 /* watch out for a 0 len io from a tricksy fs */
1171 if (rw == READ && end > offset) { 1167 if (rw == READ && end > offset) {
1172 struct address_space *mapping; 1168 struct address_space *mapping =
1169 iocb->ki_filp->f_mapping;
1173 1170
1174 mapping = iocb->ki_filp->f_mapping; 1171 /* will be released by direct_io_worker */
1175 if (dio_lock_type != DIO_OWN_LOCKING) { 1172 mutex_lock(&inode->i_mutex);
1176 mutex_lock(&inode->i_mutex);
1177 release_i_mutex = 1;
1178 }
1179 1173
1180 retval = filemap_write_and_wait_range(mapping, offset, 1174 retval = filemap_write_and_wait_range(mapping, offset,
1181 end - 1); 1175 end - 1);
1182 if (retval) { 1176 if (retval) {
1177 mutex_unlock(&inode->i_mutex);
1183 kfree(dio); 1178 kfree(dio);
1184 goto out; 1179 goto out;
1185 } 1180 }
1186
1187 if (dio_lock_type == DIO_OWN_LOCKING) {
1188 mutex_unlock(&inode->i_mutex);
1189 acquire_i_mutex = 1;
1190 }
1191 } 1181 }
1192 1182
1193 if (dio_lock_type == DIO_LOCKING) 1183 /*
1194 /* lockdep: not the owner will release it */ 1184 * Will be released at I/O completion, possibly in a
1195 down_read_non_owner(&inode->i_alloc_sem); 1185 * different thread.
1186 */
1187 down_read_non_owner(&inode->i_alloc_sem);
1196 } 1188 }
1197 1189
1198 /* 1190 /*
@@ -1210,24 +1202,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1210 /* 1202 /*
1211 * In case of error extending write may have instantiated a few 1203 * In case of error extending write may have instantiated a few
1212 * blocks outside i_size. Trim these off again for DIO_LOCKING. 1204 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1213 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by 1205 *
1214 * it's own meaner. 1206 * NOTE: filesystems with their own locking have to handle this
1207 * on their own.
1215 */ 1208 */
1216 if (unlikely(retval < 0 && (rw & WRITE))) { 1209 if (flags & DIO_LOCKING) {
1217 loff_t isize = i_size_read(inode); 1210 if (unlikely((rw & WRITE) && retval < 0)) {
1218 1211 loff_t isize = i_size_read(inode);
1219 if (end > isize && dio_lock_type == DIO_LOCKING) 1212 if (end > isize)
1220 vmtruncate(inode, isize); 1213 vmtruncate(inode, isize);
1214 }
1221 } 1215 }
1222 1216
1223 if (rw == READ && dio_lock_type == DIO_LOCKING)
1224 release_i_mutex = 0;
1225
1226out: 1217out:
1227 if (release_i_mutex)
1228 mutex_unlock(&inode->i_mutex);
1229 else if (acquire_i_mutex)
1230 mutex_lock(&inode->i_mutex);
1231 return retval; 1218 return retval;
1232} 1219}
1233EXPORT_SYMBOL(__blockdev_direct_IO); 1220EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
62 struct inode *lower_inode = 62 struct inode *lower_inode =
63 ecryptfs_inode_to_lower(dentry->d_inode); 63 ecryptfs_inode_to_lower(dentry->d_inode);
64 64
65 fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); 65 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
66 } 66 }
67out: 67out:
68 return rc; 68 return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..429ca0b3ba08 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 626 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 627 if (rc)
628 goto out_lock; 628 goto out_lock;
629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); 629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
630 if (new_dir != old_dir) 630 if (new_dir != old_dir)
631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); 631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
632out_lock: 632out_lock:
633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
634 dput(lower_new_dentry->d_parent); 634 dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
967 rc = notify_change(lower_dentry, ia); 967 rc = notify_change(lower_dentry, ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 968 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 969out:
970 fsstack_copy_attr_all(inode, lower_inode, NULL); 970 fsstack_copy_attr_all(inode, lower_inode);
971 return rc; 971 return rc;
972} 972}
973 973
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c6ac85d6c701..567bc4b9f70a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,7 +35,6 @@
35#include <linux/key.h> 35#include <linux/key.h>
36#include <linux/parser.h> 36#include <linux/parser.h>
37#include <linux/fs_stack.h> 37#include <linux/fs_stack.h>
38#include <linux/ima.h>
39#include "ecryptfs_kernel.h" 38#include "ecryptfs_kernel.h"
40 39
41/** 40/**
@@ -119,7 +118,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
119 const struct cred *cred = current_cred(); 118 const struct cred *cred = current_cred();
120 struct ecryptfs_inode_info *inode_info = 119 struct ecryptfs_inode_info *inode_info =
121 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 120 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
122 int opened_lower_file = 0;
123 int rc = 0; 121 int rc = 0;
124 122
125 mutex_lock(&inode_info->lower_file_mutex); 123 mutex_lock(&inode_info->lower_file_mutex);
@@ -136,12 +134,9 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
136 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
137 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 135 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
138 inode_info->lower_file = NULL; 136 inode_info->lower_file = NULL;
139 } else 137 }
140 opened_lower_file = 1;
141 } 138 }
142 mutex_unlock(&inode_info->lower_file_mutex); 139 mutex_unlock(&inode_info->lower_file_mutex);
143 if (opened_lower_file)
144 ima_counts_get(inode_info->lower_file);
145 return rc; 140 return rc;
146} 141}
147 142
@@ -194,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
194 init_special_inode(inode, lower_inode->i_mode, 189 init_special_inode(inode, lower_inode->i_mode,
195 lower_inode->i_rdev); 190 lower_inode->i_rdev);
196 dentry->d_op = &ecryptfs_dops; 191 dentry->d_op = &ecryptfs_dops;
197 fsstack_copy_attr_all(inode, lower_inode, NULL); 192 fsstack_copy_attr_all(inode, lower_inode);
198 /* This size will be overwritten for real files w/ headers and 193 /* This size will be overwritten for real files w/ headers and
199 * other metadata */ 194 * other metadata */
200 fsstack_copy_inode_size(inode, lower_inode); 195 fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/exec.c b/fs/exec.c
index 623a5cc3076a..632b02e34ec7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -826,7 +826,9 @@ static int de_thread(struct task_struct *tsk)
826 attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); 826 attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
827 transfer_pid(leader, tsk, PIDTYPE_PGID); 827 transfer_pid(leader, tsk, PIDTYPE_PGID);
828 transfer_pid(leader, tsk, PIDTYPE_SID); 828 transfer_pid(leader, tsk, PIDTYPE_SID);
829
829 list_replace_rcu(&leader->tasks, &tsk->tasks); 830 list_replace_rcu(&leader->tasks, &tsk->tasks);
831 list_replace_init(&leader->sibling, &tsk->sibling);
830 832
831 tsk->group_leader = tsk; 833 tsk->group_leader = tsk;
832 leader->group_leader = tsk; 834 leader->group_leader = tsk;
@@ -1761,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1761 struct mm_struct *mm = current->mm; 1763 struct mm_struct *mm = current->mm;
1762 struct linux_binfmt * binfmt; 1764 struct linux_binfmt * binfmt;
1763 struct inode * inode; 1765 struct inode * inode;
1764 struct file * file;
1765 const struct cred *old_cred; 1766 const struct cred *old_cred;
1766 struct cred *cred; 1767 struct cred *cred;
1767 int retval = 0; 1768 int retval = 0;
1768 int flag = 0; 1769 int flag = 0;
1769 int ispipe = 0; 1770 int ispipe = 0;
1770 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1771 char **helper_argv = NULL; 1771 char **helper_argv = NULL;
1772 int helper_argc = 0; 1772 int helper_argc = 0;
1773 int dump_count = 0; 1773 int dump_count = 0;
1774 static atomic_t core_dump_count = ATOMIC_INIT(0); 1774 static atomic_t core_dump_count = ATOMIC_INIT(0);
1775 struct coredump_params cprm = {
1776 .signr = signr,
1777 .regs = regs,
1778 .limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
1779 };
1775 1780
1776 audit_core_dumps(signr); 1781 audit_core_dumps(signr);
1777 1782
@@ -1827,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1827 ispipe = format_corename(corename, signr); 1832 ispipe = format_corename(corename, signr);
1828 unlock_kernel(); 1833 unlock_kernel();
1829 1834
1830 if ((!ispipe) && (core_limit < binfmt->min_coredump)) 1835 if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
1831 goto fail_unlock; 1836 goto fail_unlock;
1832 1837
1833 if (ispipe) { 1838 if (ispipe) {
1834 if (core_limit == 0) { 1839 if (cprm.limit == 0) {
1835 /* 1840 /*
1836 * Normally core limits are irrelevant to pipes, since 1841 * Normally core limits are irrelevant to pipes, since
1837 * we're not writing to the file system, but we use 1842 * we're not writing to the file system, but we use
1838 * core_limit of 0 here as a speacial value. Any 1843 * cprm.limit of 0 here as a speacial value. Any
1839 * non-zero limit gets set to RLIM_INFINITY below, but 1844 * non-zero limit gets set to RLIM_INFINITY below, but
1840 * a limit of 0 skips the dump. This is a consistent 1845 * a limit of 0 skips the dump. This is a consistent
1841 * way to catch recursive crashes. We can still crash 1846 * way to catch recursive crashes. We can still crash
@@ -1868,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1868 goto fail_dropcount; 1873 goto fail_dropcount;
1869 } 1874 }
1870 1875
1871 core_limit = RLIM_INFINITY; 1876 cprm.limit = RLIM_INFINITY;
1872 1877
1873 /* SIGPIPE can happen, but it's just never processed */ 1878 /* SIGPIPE can happen, but it's just never processed */
1874 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, 1879 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
1875 &file)) { 1880 &cprm.file)) {
1876 printk(KERN_INFO "Core dump to %s pipe failed\n", 1881 printk(KERN_INFO "Core dump to %s pipe failed\n",
1877 corename); 1882 corename);
1878 goto fail_dropcount; 1883 goto fail_dropcount;
1879 } 1884 }
1880 } else 1885 } else
1881 file = filp_open(corename, 1886 cprm.file = filp_open(corename,
1882 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 1887 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1883 0600); 1888 0600);
1884 if (IS_ERR(file)) 1889 if (IS_ERR(cprm.file))
1885 goto fail_dropcount; 1890 goto fail_dropcount;
1886 inode = file->f_path.dentry->d_inode; 1891 inode = cprm.file->f_path.dentry->d_inode;
1887 if (inode->i_nlink > 1) 1892 if (inode->i_nlink > 1)
1888 goto close_fail; /* multiple links - don't dump */ 1893 goto close_fail; /* multiple links - don't dump */
1889 if (!ispipe && d_unhashed(file->f_path.dentry)) 1894 if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
1890 goto close_fail; 1895 goto close_fail;
1891 1896
1892 /* AK: actually i see no reason to not allow this for named pipes etc., 1897 /* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1899,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1899 */ 1904 */
1900 if (inode->i_uid != current_fsuid()) 1905 if (inode->i_uid != current_fsuid())
1901 goto close_fail; 1906 goto close_fail;
1902 if (!file->f_op) 1907 if (!cprm.file->f_op)
1903 goto close_fail; 1908 goto close_fail;
1904 if (!file->f_op->write) 1909 if (!cprm.file->f_op->write)
1905 goto close_fail; 1910 goto close_fail;
1906 if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) 1911 if (!ispipe &&
1912 do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
1907 goto close_fail; 1913 goto close_fail;
1908 1914
1909 retval = binfmt->core_dump(signr, regs, file, core_limit); 1915 retval = binfmt->core_dump(&cprm);
1910 1916
1911 if (retval) 1917 if (retval)
1912 current->signal->group_exit_code |= 0x80; 1918 current->signal->group_exit_code |= 0x80;
1913close_fail: 1919close_fail:
1914 if (ispipe && core_pipe_limit) 1920 if (ispipe && core_pipe_limit)
1915 wait_for_dump_helpers(file); 1921 wait_for_dump_helpers(cprm.file);
1916 filp_close(file, NULL); 1922 filp_close(cprm.file, NULL);
1917fail_dropcount: 1923fail_dropcount:
1918 if (dump_count) 1924 if (dump_count)
1919 atomic_dec(&core_dump_count); 1925 atomic_dec(&core_dump_count);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c7..e9e175949a63 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
6 * and for mapping back from file handles to dentries. 6 * and for mapping back from file handles to dentries.
7 * 7 *
8 * For details on why we do all the strange and hairy things in here 8 * For details on why we do all the strange and hairy things in here
9 * take a look at Documentation/filesystems/Exporting. 9 * take a look at Documentation/filesystems/nfs/Exporting.
10 */ 10 */
11#include <linux/exportfs.h> 11#include <linux/exportfs.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a70..a99e54318c3d 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
339 * Extended attribut handlers 339 * Extended attribut handlers
340 */ 340 */
341static size_t 341static size_t
342ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, 342ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
343 const char *name, size_t name_len) 343 const char *name, size_t name_len, int type)
344{ 344{
345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
346 346
347 if (!test_opt(inode->i_sb, POSIX_ACL)) 347 if (!test_opt(dentry->d_sb, POSIX_ACL))
348 return 0; 348 return 0;
349 if (list && size <= list_size) 349 if (list && size <= list_size)
350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
352} 352}
353 353
354static size_t 354static size_t
355ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, 355ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
356 const char *name, size_t name_len) 356 const char *name, size_t name_len, int type)
357{ 357{
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 359
360 if (!test_opt(inode->i_sb, POSIX_ACL)) 360 if (!test_opt(dentry->d_sb, POSIX_ACL))
361 return 0; 361 return 0;
362 if (list && size <= list_size) 362 if (list && size <= list_size)
363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
365} 365}
366 366
367static int 367static int
368ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 368ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
369 size_t size, int type)
369{ 370{
370 struct posix_acl *acl; 371 struct posix_acl *acl;
371 int error; 372 int error;
372 373
373 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (strcmp(name, "") != 0)
375 return -EINVAL;
376 if (!test_opt(dentry->d_sb, POSIX_ACL))
374 return -EOPNOTSUPP; 377 return -EOPNOTSUPP;
375 378
376 acl = ext2_get_acl(inode, type); 379 acl = ext2_get_acl(dentry->d_inode, type);
377 if (IS_ERR(acl)) 380 if (IS_ERR(acl))
378 return PTR_ERR(acl); 381 return PTR_ERR(acl);
379 if (acl == NULL) 382 if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
385} 388}
386 389
387static int 390static int
388ext2_xattr_get_acl_access(struct inode *inode, const char *name, 391ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
389 void *buffer, size_t size) 392 size_t size, int flags, int type)
390{
391 if (strcmp(name, "") != 0)
392 return -EINVAL;
393 return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int
397ext2_xattr_get_acl_default(struct inode *inode, const char *name,
398 void *buffer, size_t size)
399{
400 if (strcmp(name, "") != 0)
401 return -EINVAL;
402 return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
403}
404
405static int
406ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
407 size_t size)
408{ 393{
409 struct posix_acl *acl; 394 struct posix_acl *acl;
410 int error; 395 int error;
411 396
412 if (!test_opt(inode->i_sb, POSIX_ACL)) 397 if (strcmp(name, "") != 0)
398 return -EINVAL;
399 if (!test_opt(dentry->d_sb, POSIX_ACL))
413 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
414 if (!is_owner_or_cap(inode)) 401 if (!is_owner_or_cap(dentry->d_inode))
415 return -EPERM; 402 return -EPERM;
416 403
417 if (value) { 404 if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
426 } else 413 } else
427 acl = NULL; 414 acl = NULL;
428 415
429 error = ext2_set_acl(inode, type, acl); 416 error = ext2_set_acl(dentry->d_inode, type, acl);
430 417
431release_and_out: 418release_and_out:
432 posix_acl_release(acl); 419 posix_acl_release(acl);
433 return error; 420 return error;
434} 421}
435 422
436static int
437ext2_xattr_set_acl_access(struct inode *inode, const char *name,
438 const void *value, size_t size, int flags)
439{
440 if (strcmp(name, "") != 0)
441 return -EINVAL;
442 return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
443}
444
445static int
446ext2_xattr_set_acl_default(struct inode *inode, const char *name,
447 const void *value, size_t size, int flags)
448{
449 if (strcmp(name, "") != 0)
450 return -EINVAL;
451 return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
452}
453
454struct xattr_handler ext2_xattr_acl_access_handler = { 423struct xattr_handler ext2_xattr_acl_access_handler = {
455 .prefix = POSIX_ACL_XATTR_ACCESS, 424 .prefix = POSIX_ACL_XATTR_ACCESS,
425 .flags = ACL_TYPE_ACCESS,
456 .list = ext2_xattr_list_acl_access, 426 .list = ext2_xattr_list_acl_access,
457 .get = ext2_xattr_get_acl_access, 427 .get = ext2_xattr_get_acl,
458 .set = ext2_xattr_set_acl_access, 428 .set = ext2_xattr_set_acl,
459}; 429};
460 430
461struct xattr_handler ext2_xattr_acl_default_handler = { 431struct xattr_handler ext2_xattr_acl_default_handler = {
462 .prefix = POSIX_ACL_XATTR_DEFAULT, 432 .prefix = POSIX_ACL_XATTR_DEFAULT,
433 .flags = ACL_TYPE_DEFAULT,
463 .list = ext2_xattr_list_acl_default, 434 .list = ext2_xattr_list_acl_default,
464 .get = ext2_xattr_get_acl_default, 435 .get = ext2_xattr_get_acl,
465 .set = ext2_xattr_set_acl_default, 436 .set = ext2_xattr_set_acl,
466}; 437};
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index fc2bd05d3559..7516957273ed 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -721,5 +721,5 @@ const struct file_operations ext2_dir_operations = {
721#ifdef CONFIG_COMPAT 721#ifdef CONFIG_COMPAT
722 .compat_ioctl = ext2_compat_ioctl, 722 .compat_ioctl = ext2_compat_ioctl,
723#endif 723#endif
724 .fsync = simple_fsync, 724 .fsync = ext2_fsync,
725}; 725};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index da318b0fa637..061914add3cf 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -155,6 +155,7 @@ extern void ext2_write_super (struct super_block *);
155extern const struct file_operations ext2_dir_operations; 155extern const struct file_operations ext2_dir_operations;
156 156
157/* file.c */ 157/* file.c */
158extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync);
158extern const struct inode_operations ext2_file_inode_operations; 159extern const struct inode_operations ext2_file_inode_operations;
159extern const struct file_operations ext2_file_operations; 160extern const struct file_operations ext2_file_operations;
160extern const struct file_operations ext2_xip_file_operations; 161extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a2f3afd1a1c1..586e3589d4c2 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/pagemap.h>
22#include "ext2.h" 23#include "ext2.h"
23#include "xattr.h" 24#include "xattr.h"
24#include "acl.h" 25#include "acl.h"
@@ -38,6 +39,22 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
38 return 0; 39 return 0;
39} 40}
40 41
42int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
43{
44 int ret;
45 struct super_block *sb = dentry->d_inode->i_sb;
46 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
47
48 ret = simple_fsync(file, dentry, datasync);
49 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
50 /* We don't really know where the IO error happened... */
51 ext2_error(sb, __func__,
52 "detected IO error when writing metadata buffers");
53 ret = -EIO;
54 }
55 return ret;
56}
57
41/* 58/*
42 * We have mostly NULL's here: the current defaults are ok for 59 * We have mostly NULL's here: the current defaults are ok for
43 * the ext2 filesystem. 60 * the ext2 filesystem.
@@ -55,7 +72,7 @@ const struct file_operations ext2_file_operations = {
55 .mmap = generic_file_mmap, 72 .mmap = generic_file_mmap,
56 .open = generic_file_open, 73 .open = generic_file_open,
57 .release = ext2_release_file, 74 .release = ext2_release_file,
58 .fsync = simple_fsync, 75 .fsync = ext2_fsync,
59 .splice_read = generic_file_splice_read, 76 .splice_read = generic_file_splice_read,
60 .splice_write = generic_file_splice_write, 77 .splice_write = generic_file_splice_write,
61}; 78};
@@ -72,7 +89,7 @@ const struct file_operations ext2_xip_file_operations = {
72 .mmap = xip_file_mmap, 89 .mmap = xip_file_mmap,
73 .open = generic_file_open, 90 .open = generic_file_open,
74 .release = ext2_release_file, 91 .release = ext2_release_file,
75 .fsync = simple_fsync, 92 .fsync = ext2_fsync,
76}; 93};
77#endif 94#endif
78 95
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1388802b7803..f9cb54a585ce 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1105,9 +1105,30 @@ failed_sbi:
1105 return ret; 1105 return ret;
1106} 1106}
1107 1107
1108static void ext2_clear_super_error(struct super_block *sb)
1109{
1110 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1111
1112 if (buffer_write_io_error(sbh)) {
1113 /*
1114 * Oh, dear. A previous attempt to write the
1115 * superblock failed. This could happen because the
1116 * USB device was yanked out. Or it could happen to
1117 * be a transient write error and maybe the block will
1118 * be remapped. Nothing we can do but to retry the
1119 * write and hope for the best.
1120 */
1121 printk(KERN_ERR "EXT2-fs: %s previous I/O error to "
1122 "superblock detected", sb->s_id);
1123 clear_buffer_write_io_error(sbh);
1124 set_buffer_uptodate(sbh);
1125 }
1126}
1127
1108static void ext2_commit_super (struct super_block * sb, 1128static void ext2_commit_super (struct super_block * sb,
1109 struct ext2_super_block * es) 1129 struct ext2_super_block * es)
1110{ 1130{
1131 ext2_clear_super_error(sb);
1111 es->s_wtime = cpu_to_le32(get_seconds()); 1132 es->s_wtime = cpu_to_le32(get_seconds());
1112 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 1133 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1113 sb->s_dirt = 0; 1134 sb->s_dirt = 0;
@@ -1115,6 +1136,7 @@ static void ext2_commit_super (struct super_block * sb,
1115 1136
1116static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) 1137static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1117{ 1138{
1139 ext2_clear_super_error(sb);
1118 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1140 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1119 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1141 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1120 es->s_wtime = cpu_to_le32(get_seconds()); 1142 es->s_wtime = cpu_to_le32(get_seconds());
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d5..904f00642f84 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
60#include <linux/mbcache.h> 60#include <linux/mbcache.h>
61#include <linux/quotaops.h> 61#include <linux/quotaops.h>
62#include <linux/rwsem.h> 62#include <linux/rwsem.h>
63#include <linux/security.h>
63#include "ext2.h" 64#include "ext2.h"
64#include "xattr.h" 65#include "xattr.h"
65#include "acl.h" 66#include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
249 * used / required on success. 250 * used / required on success.
250 */ 251 */
251static int 252static int
252ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 253ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
253{ 254{
255 struct inode *inode = dentry->d_inode;
254 struct buffer_head *bh = NULL; 256 struct buffer_head *bh = NULL;
255 struct ext2_xattr_entry *entry; 257 struct ext2_xattr_entry *entry;
256 char *end; 258 char *end;
@@ -300,9 +302,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
300 ext2_xattr_handler(entry->e_name_index); 302 ext2_xattr_handler(entry->e_name_index);
301 303
302 if (handler) { 304 if (handler) {
303 size_t size = handler->list(inode, buffer, rest, 305 size_t size = handler->list(dentry, buffer, rest,
304 entry->e_name, 306 entry->e_name,
305 entry->e_name_len); 307 entry->e_name_len,
308 handler->flags);
306 if (buffer) { 309 if (buffer) {
307 if (size > rest) { 310 if (size > rest) {
308 error = -ERANGE; 311 error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
330ssize_t 333ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) 334ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
332{ 335{
333 return ext2_xattr_list(dentry->d_inode, buffer, size); 336 return ext2_xattr_list(dentry, buffer, size);
334} 337}
335 338
336/* 339/*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb7..c8155845ac05 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
11#include "xattr.h" 11#include "xattr.h"
12 12
13static size_t 13static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len, int type)
16{ 16{
17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
26} 26}
27 27
28static int 28static int
29ext2_xattr_security_get(struct inode *inode, const char *name, 29ext2_xattr_security_get(struct dentry *dentry, const char *name,
30 void *buffer, size_t size) 30 void *buffer, size_t size, int type)
31{ 31{
32 if (strcmp(name, "") == 0) 32 if (strcmp(name, "") == 0)
33 return -EINVAL; 33 return -EINVAL;
34 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, 34 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
35 buffer, size); 35 buffer, size);
36} 36}
37 37
38static int 38static int
39ext2_xattr_security_set(struct inode *inode, const char *name, 39ext2_xattr_security_set(struct dentry *dentry, const char *name,
40 const void *value, size_t size, int flags) 40 const void *value, size_t size, int flags, int type)
41{ 41{
42 if (strcmp(name, "") == 0) 42 if (strcmp(name, "") == 0)
43 return -EINVAL; 43 return -EINVAL;
44 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, 44 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
45 value, size, flags); 45 value, size, flags);
46} 46}
47 47
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9f..2a26d71f4771 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext2_xattr_trusted_get(struct inode *inode, const char *name, 34ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, 39 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
40 buffer, size); 40 buffer, size);
41} 41}
42 42
43static int 43static int
44ext2_xattr_trusted_set(struct inode *inode, const char *name, 44ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 45 const void *value, size_t size, int flags, int type)
46{ 46{
47 if (strcmp(name, "") == 0) 47 if (strcmp(name, "") == 0)
48 return -EINVAL; 48 return -EINVAL;
49 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, 49 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
50 value, size, flags); 50 value, size, flags);
51} 51}
52 52
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62f..3f6caf3684b4 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(dentry->d_sb, XATTR_USER))
22 return 0; 22 return 0;
23 23
24 if (list && total_len <= list_size) { 24 if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
30} 30}
31 31
32static int 32static int
33ext2_xattr_user_get(struct inode *inode, const char *name, 33ext2_xattr_user_get(struct dentry *dentry, const char *name,
34 void *buffer, size_t size) 34 void *buffer, size_t size, int type)
35{ 35{
36 if (strcmp(name, "") == 0) 36 if (strcmp(name, "") == 0)
37 return -EINVAL; 37 return -EINVAL;
38 if (!test_opt(inode->i_sb, XATTR_USER)) 38 if (!test_opt(dentry->d_sb, XATTR_USER))
39 return -EOPNOTSUPP; 39 return -EOPNOTSUPP;
40 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); 40 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
41 name, buffer, size);
41} 42}
42 43
43static int 44static int
44ext2_xattr_user_set(struct inode *inode, const char *name, 45ext2_xattr_user_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
46{ 47{
47 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
48 return -EINVAL; 49 return -EINVAL;
49 if (!test_opt(inode->i_sb, XATTR_USER)) 50 if (!test_opt(dentry->d_sb, XATTR_USER))
50 return -EOPNOTSUPP; 51 return -EOPNOTSUPP;
51 52
52 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, 53 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext2_xattr_user_handler = { 57struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5f..82ba34158661 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
366 * Extended attribute handlers 366 * Extended attribute handlers
367 */ 367 */
368static size_t 368static size_t
369ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 369ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
370 const char *name, size_t name_len) 370 const char *name, size_t name_len, int type)
371{ 371{
372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
373 373
374 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (!test_opt(dentry->d_sb, POSIX_ACL))
375 return 0; 375 return 0;
376 if (list && size <= list_len) 376 if (list && size <= list_len)
377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
379} 379}
380 380
381static size_t 381static size_t
382ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 382ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
383 const char *name, size_t name_len) 383 const char *name, size_t name_len, int type)
384{ 384{
385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
386 386
387 if (!test_opt(inode->i_sb, POSIX_ACL)) 387 if (!test_opt(dentry->d_sb, POSIX_ACL))
388 return 0; 388 return 0;
389 if (list && size <= list_len) 389 if (list && size <= list_len)
390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
392} 392}
393 393
394static int 394static int
395ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 395ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
396 size_t size, int type)
396{ 397{
397 struct posix_acl *acl; 398 struct posix_acl *acl;
398 int error; 399 int error;
399 400
400 if (!test_opt(inode->i_sb, POSIX_ACL)) 401 if (strcmp(name, "") != 0)
402 return -EINVAL;
403 if (!test_opt(dentry->d_sb, POSIX_ACL))
401 return -EOPNOTSUPP; 404 return -EOPNOTSUPP;
402 405
403 acl = ext3_get_acl(inode, type); 406 acl = ext3_get_acl(dentry->d_inode, type);
404 if (IS_ERR(acl)) 407 if (IS_ERR(acl))
405 return PTR_ERR(acl); 408 return PTR_ERR(acl);
406 if (acl == NULL) 409 if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
412} 415}
413 416
414static int 417static int
415ext3_xattr_get_acl_access(struct inode *inode, const char *name, 418ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
416 void *buffer, size_t size) 419 size_t size, int flags, int type)
417{
418 if (strcmp(name, "") != 0)
419 return -EINVAL;
420 return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
421}
422
423static int
424ext3_xattr_get_acl_default(struct inode *inode, const char *name,
425 void *buffer, size_t size)
426{
427 if (strcmp(name, "") != 0)
428 return -EINVAL;
429 return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
430}
431
432static int
433ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
434 size_t size)
435{ 420{
421 struct inode *inode = dentry->d_inode;
436 handle_t *handle; 422 handle_t *handle;
437 struct posix_acl *acl; 423 struct posix_acl *acl;
438 int error, retries = 0; 424 int error, retries = 0;
439 425
426 if (strcmp(name, "") != 0)
427 return -EINVAL;
440 if (!test_opt(inode->i_sb, POSIX_ACL)) 428 if (!test_opt(inode->i_sb, POSIX_ACL))
441 return -EOPNOTSUPP; 429 return -EOPNOTSUPP;
442 if (!is_owner_or_cap(inode)) 430 if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
468 return error; 456 return error;
469} 457}
470 458
471static int
472ext3_xattr_set_acl_access(struct inode *inode, const char *name,
473 const void *value, size_t size, int flags)
474{
475 if (strcmp(name, "") != 0)
476 return -EINVAL;
477 return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
478}
479
480static int
481ext3_xattr_set_acl_default(struct inode *inode, const char *name,
482 const void *value, size_t size, int flags)
483{
484 if (strcmp(name, "") != 0)
485 return -EINVAL;
486 return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
487}
488
489struct xattr_handler ext3_xattr_acl_access_handler = { 459struct xattr_handler ext3_xattr_acl_access_handler = {
490 .prefix = POSIX_ACL_XATTR_ACCESS, 460 .prefix = POSIX_ACL_XATTR_ACCESS,
461 .flags = ACL_TYPE_ACCESS,
491 .list = ext3_xattr_list_acl_access, 462 .list = ext3_xattr_list_acl_access,
492 .get = ext3_xattr_get_acl_access, 463 .get = ext3_xattr_get_acl,
493 .set = ext3_xattr_set_acl_access, 464 .set = ext3_xattr_set_acl,
494}; 465};
495 466
496struct xattr_handler ext3_xattr_acl_default_handler = { 467struct xattr_handler ext3_xattr_acl_default_handler = {
497 .prefix = POSIX_ACL_XATTR_DEFAULT, 468 .prefix = POSIX_ACL_XATTR_DEFAULT,
469 .flags = ACL_TYPE_DEFAULT,
498 .list = ext3_xattr_list_acl_default, 470 .list = ext3_xattr_list_acl_default,
499 .get = ext3_xattr_get_acl_default, 471 .get = ext3_xattr_get_acl,
500 .set = ext3_xattr_set_acl_default, 472 .set = ext3_xattr_set_acl,
501}; 473};
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b97..66895ccf76c7 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
99 struct mb_cache_entry **); 99 struct mb_cache_entry **);
100static void ext3_xattr_rehash(struct ext3_xattr_header *, 100static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 101 struct ext3_xattr_entry *);
102static int ext3_xattr_list(struct inode *inode, char *buffer, 102static int ext3_xattr_list(struct dentry *dentry, char *buffer,
103 size_t buffer_size); 103 size_t buffer_size);
104 104
105static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
147ssize_t 147ssize_t
148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) 148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
149{ 149{
150 return ext3_xattr_list(dentry->d_inode, buffer, size); 150 return ext3_xattr_list(dentry, buffer, size);
151} 151}
152 152
153static int 153static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
332} 332}
333 333
334static int 334static int
335ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry, 335ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
336 char *buffer, size_t buffer_size) 336 char *buffer, size_t buffer_size)
337{ 337{
338 size_t rest = buffer_size; 338 size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
342 ext3_xattr_handler(entry->e_name_index); 342 ext3_xattr_handler(entry->e_name_index);
343 343
344 if (handler) { 344 if (handler) {
345 size_t size = handler->list(inode, buffer, rest, 345 size_t size = handler->list(dentry, buffer, rest,
346 entry->e_name, 346 entry->e_name,
347 entry->e_name_len); 347 entry->e_name_len,
348 handler->flags);
348 if (buffer) { 349 if (buffer) {
349 if (size > rest) 350 if (size > rest)
350 return -ERANGE; 351 return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
357} 358}
358 359
359static int 360static int
360ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 361ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
361{ 362{
363 struct inode *inode = dentry->d_inode;
362 struct buffer_head *bh = NULL; 364 struct buffer_head *bh = NULL;
363 int error; 365 int error;
364 366
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
383 goto cleanup; 385 goto cleanup;
384 } 386 }
385 ext3_xattr_cache_insert(bh); 387 ext3_xattr_cache_insert(bh);
386 error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 388 error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
387 389
388cleanup: 390cleanup:
389 brelse(bh); 391 brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
392} 394}
393 395
394static int 396static int
395ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 397ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
396{ 398{
399 struct inode *inode = dentry->d_inode;
397 struct ext3_xattr_ibody_header *header; 400 struct ext3_xattr_ibody_header *header;
398 struct ext3_inode *raw_inode; 401 struct ext3_inode *raw_inode;
399 struct ext3_iloc iloc; 402 struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
411 error = ext3_xattr_check_names(IFIRST(header), end); 414 error = ext3_xattr_check_names(IFIRST(header), end);
412 if (error) 415 if (error)
413 goto cleanup; 416 goto cleanup;
414 error = ext3_xattr_list_entries(inode, IFIRST(header), 417 error = ext3_xattr_list_entries(dentry, IFIRST(header),
415 buffer, buffer_size); 418 buffer, buffer_size);
416 419
417cleanup: 420cleanup:
@@ -430,12 +433,12 @@ cleanup:
430 * used / required on success. 433 * used / required on success.
431 */ 434 */
432static int 435static int
433ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 436ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
434{ 437{
435 int i_error, b_error; 438 int i_error, b_error;
436 439
437 down_read(&EXT3_I(inode)->xattr_sem); 440 down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
438 i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size); 441 i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
439 if (i_error < 0) { 442 if (i_error < 0) {
440 b_error = 0; 443 b_error = 0;
441 } else { 444 } else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
443 buffer += i_error; 446 buffer += i_error;
444 buffer_size -= i_error; 447 buffer_size -= i_error;
445 } 448 }
446 b_error = ext3_xattr_block_list(inode, buffer, buffer_size); 449 b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
447 if (b_error < 0) 450 if (b_error < 0)
448 i_error = 0; 451 i_error = 0;
449 } 452 }
450 up_read(&EXT3_I(inode)->xattr_sem); 453 up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
451 return i_error + b_error; 454 return i_error + b_error;
452} 455}
453 456
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf2..474348788dd9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext3_xattr_security_get(struct inode *inode, const char *name, 31ext3_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name, 36 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext3_xattr_security_set(struct inode *inode, const char *name, 41ext3_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name, 46 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4b..e5562845ed96 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext3_xattr_trusted_get(struct inode *inode, const char *name, 35ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
36 void *buffer, size_t size) 36 void *buffer, size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, 40 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext3_xattr_trusted_set(struct inode *inode, const char *name, 45ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, 50 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
51 value, size, flags); 51 value, size, flags);
52} 52}
53 53
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b3..3bcfe9ee0a68 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext3_xattr_user_get(struct inode *inode, const char *name, 34ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
35 void *buffer, size_t size) 35 size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size); 41 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext3_xattr_user_set(struct inode *inode, const char *name, 46ext3_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, 53 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext3_xattr_user_handler = { 57struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e4..9acf7e808139 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
2 tristate "The Extended 4 (ext4) filesystem" 2 tristate "The Extended 4 (ext4) filesystem"
3 select JBD2 3 select JBD2
4 select CRC16 4 select CRC16
5 select FS_JOURNAL_INFO
6 help 5 help
7 This is the next generation of the ext3 filesystem. 6 This is the next generation of the ext3 filesystem.
8 7
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
364 * Extended attribute handlers 364 * Extended attribute handlers
365 */ 365 */
366static size_t 366static size_t
367ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 367ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
368 const char *name, size_t name_len) 368 const char *name, size_t name_len, int type)
369{ 369{
370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
371 371
372 if (!test_opt(inode->i_sb, POSIX_ACL)) 372 if (!test_opt(dentry->d_sb, POSIX_ACL))
373 return 0; 373 return 0;
374 if (list && size <= list_len) 374 if (list && size <= list_len)
375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
377} 377}
378 378
379static size_t 379static size_t
380ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 380ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
381 const char *name, size_t name_len) 381 const char *name, size_t name_len, int type)
382{ 382{
383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
384 384
385 if (!test_opt(inode->i_sb, POSIX_ACL)) 385 if (!test_opt(dentry->d_sb, POSIX_ACL))
386 return 0; 386 return 0;
387 if (list && size <= list_len) 387 if (list && size <= list_len)
388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
390} 390}
391 391
392static int 392static int
393ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 393ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
394 size_t size, int type)
394{ 395{
395 struct posix_acl *acl; 396 struct posix_acl *acl;
396 int error; 397 int error;
397 398
398 if (!test_opt(inode->i_sb, POSIX_ACL)) 399 if (strcmp(name, "") != 0)
400 return -EINVAL;
401 if (!test_opt(dentry->d_sb, POSIX_ACL))
399 return -EOPNOTSUPP; 402 return -EOPNOTSUPP;
400 403
401 acl = ext4_get_acl(inode, type); 404 acl = ext4_get_acl(dentry->d_inode, type);
402 if (IS_ERR(acl)) 405 if (IS_ERR(acl))
403 return PTR_ERR(acl); 406 return PTR_ERR(acl);
404 if (acl == NULL) 407 if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
410} 413}
411 414
412static int 415static int
413ext4_xattr_get_acl_access(struct inode *inode, const char *name, 416ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
414 void *buffer, size_t size) 417 size_t size, int flags, int type)
415{
416 if (strcmp(name, "") != 0)
417 return -EINVAL;
418 return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
419}
420
421static int
422ext4_xattr_get_acl_default(struct inode *inode, const char *name,
423 void *buffer, size_t size)
424{
425 if (strcmp(name, "") != 0)
426 return -EINVAL;
427 return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
428}
429
430static int
431ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
432 size_t size)
433{ 418{
419 struct inode *inode = dentry->d_inode;
434 handle_t *handle; 420 handle_t *handle;
435 struct posix_acl *acl; 421 struct posix_acl *acl;
436 int error, retries = 0; 422 int error, retries = 0;
437 423
424 if (strcmp(name, "") != 0)
425 return -EINVAL;
438 if (!test_opt(inode->i_sb, POSIX_ACL)) 426 if (!test_opt(inode->i_sb, POSIX_ACL))
439 return -EOPNOTSUPP; 427 return -EOPNOTSUPP;
440 if (!is_owner_or_cap(inode)) 428 if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
466 return error; 454 return error;
467} 455}
468 456
469static int
470ext4_xattr_set_acl_access(struct inode *inode, const char *name,
471 const void *value, size_t size, int flags)
472{
473 if (strcmp(name, "") != 0)
474 return -EINVAL;
475 return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
476}
477
478static int
479ext4_xattr_set_acl_default(struct inode *inode, const char *name,
480 const void *value, size_t size, int flags)
481{
482 if (strcmp(name, "") != 0)
483 return -EINVAL;
484 return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
485}
486
487struct xattr_handler ext4_xattr_acl_access_handler = { 457struct xattr_handler ext4_xattr_acl_access_handler = {
488 .prefix = POSIX_ACL_XATTR_ACCESS, 458 .prefix = POSIX_ACL_XATTR_ACCESS,
459 .flags = ACL_TYPE_ACCESS,
489 .list = ext4_xattr_list_acl_access, 460 .list = ext4_xattr_list_acl_access,
490 .get = ext4_xattr_get_acl_access, 461 .get = ext4_xattr_get_acl,
491 .set = ext4_xattr_set_acl_access, 462 .set = ext4_xattr_set_acl,
492}; 463};
493 464
494struct xattr_handler ext4_xattr_acl_default_handler = { 465struct xattr_handler ext4_xattr_acl_default_handler = {
495 .prefix = POSIX_ACL_XATTR_DEFAULT, 466 .prefix = POSIX_ACL_XATTR_DEFAULT,
467 .flags = ACL_TYPE_DEFAULT,
496 .list = ext4_xattr_list_acl_default, 468 .list = ext4_xattr_list_acl_default,
497 .get = ext4_xattr_get_acl_default, 469 .get = ext4_xattr_get_acl,
498 .set = ext4_xattr_set_acl_default, 470 .set = ext4_xattr_set_acl,
499}; 471};
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb3..83218bebbc7c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer, 95static int ext4_xattr_list(struct dentry *dentry, char *buffer,
96 size_t buffer_size); 96 size_t buffer_size);
97 97
98static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
140ssize_t 140ssize_t
141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) 141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
142{ 142{
143 return ext4_xattr_list(dentry->d_inode, buffer, size); 143 return ext4_xattr_list(dentry, buffer, size);
144} 144}
145 145
146static int 146static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
325} 325}
326 326
327static int 327static int
328ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, 328ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
329 char *buffer, size_t buffer_size) 329 char *buffer, size_t buffer_size)
330{ 330{
331 size_t rest = buffer_size; 331 size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
335 ext4_xattr_handler(entry->e_name_index); 335 ext4_xattr_handler(entry->e_name_index);
336 336
337 if (handler) { 337 if (handler) {
338 size_t size = handler->list(inode, buffer, rest, 338 size_t size = handler->list(dentry, buffer, rest,
339 entry->e_name, 339 entry->e_name,
340 entry->e_name_len); 340 entry->e_name_len,
341 handler->flags);
341 if (buffer) { 342 if (buffer) {
342 if (size > rest) 343 if (size > rest)
343 return -ERANGE; 344 return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
350} 351}
351 352
352static int 353static int
353ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 354ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
354{ 355{
356 struct inode *inode = dentry->d_inode;
355 struct buffer_head *bh = NULL; 357 struct buffer_head *bh = NULL;
356 int error; 358 int error;
357 359
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
376 goto cleanup; 378 goto cleanup;
377 } 379 }
378 ext4_xattr_cache_insert(bh); 380 ext4_xattr_cache_insert(bh);
379 error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 381 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
380 382
381cleanup: 383cleanup:
382 brelse(bh); 384 brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
385} 387}
386 388
387static int 389static int
388ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 390ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
389{ 391{
392 struct inode *inode = dentry->d_inode;
390 struct ext4_xattr_ibody_header *header; 393 struct ext4_xattr_ibody_header *header;
391 struct ext4_inode *raw_inode; 394 struct ext4_inode *raw_inode;
392 struct ext4_iloc iloc; 395 struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
404 error = ext4_xattr_check_names(IFIRST(header), end); 407 error = ext4_xattr_check_names(IFIRST(header), end);
405 if (error) 408 if (error)
406 goto cleanup; 409 goto cleanup;
407 error = ext4_xattr_list_entries(inode, IFIRST(header), 410 error = ext4_xattr_list_entries(dentry, IFIRST(header),
408 buffer, buffer_size); 411 buffer, buffer_size);
409 412
410cleanup: 413cleanup:
@@ -423,12 +426,12 @@ cleanup:
423 * used / required on success. 426 * used / required on success.
424 */ 427 */
425static int 428static int
426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 429ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427{ 430{
428 int i_error, b_error; 431 int i_error, b_error;
429 432
430 down_read(&EXT4_I(inode)->xattr_sem); 433 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
431 i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); 434 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
432 if (i_error < 0) { 435 if (i_error < 0) {
433 b_error = 0; 436 b_error = 0;
434 } else { 437 } else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
436 buffer += i_error; 439 buffer += i_error;
437 buffer_size -= i_error; 440 buffer_size -= i_error;
438 } 441 }
439 b_error = ext4_xattr_block_list(inode, buffer, buffer_size); 442 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
440 if (b_error < 0) 443 if (b_error < 0)
441 i_error = 0; 444 i_error = 0;
442 } 445 }
443 up_read(&EXT4_I(inode)->xattr_sem); 446 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
444 return i_error + b_error; 447 return i_error + b_error;
445} 448}
446 449
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..983c253999a7 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext4_xattr_security_get(struct inode *inode, const char *name, 31ext4_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, 36 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext4_xattr_security_set(struct inode *inode, const char *name, 41ext4_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, 46 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext4_xattr_trusted_get(struct inode *inode, const char *name, 35ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
36 void *buffer, size_t size) 36 size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, 40 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext4_xattr_trusted_set(struct inode *inode, const char *name, 45ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, 50 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
51 value, size, flags); 51 name, value, size, flags);
52} 52}
53 53
54struct xattr_handler ext4_xattr_trusted_handler = { 54struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext4_xattr_user_get(struct inode *inode, const char *name, 34ext4_xattr_user_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); 41 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext4_xattr_user_set(struct inode *inode, const char *name, 46ext4_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, 53 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext4_xattr_user_handler = { 57struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7db0979c6b72..e6efdfa0f6db 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -44,7 +44,8 @@ struct fat_mount_options {
44 nocase:1, /* Does this need case conversion? 0=need case conversion*/ 44 nocase:1, /* Does this need case conversion? 0=need case conversion*/
45 usefree:1, /* Use free_clusters for FAT32 */ 45 usefree:1, /* Use free_clusters for FAT32 */
46 tz_utc:1, /* Filesystem timestamps are in UTC */ 46 tz_utc:1, /* Filesystem timestamps are in UTC */
47 rodir:1; /* allow ATTR_RO for directory */ 47 rodir:1, /* allow ATTR_RO for directory */
48 discard:1; /* Issue discard requests on deletions */
48}; 49};
49 50
50#define FAT_HASH_BITS 8 51#define FAT_HASH_BITS 8
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a81037721a6f..81184d3b75a3 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -566,16 +566,21 @@ int fat_free_clusters(struct inode *inode, int cluster)
566 goto error; 566 goto error;
567 } 567 }
568 568
569 /* 569 if (sbi->options.discard) {
570 * Issue discard for the sectors we no longer care about, 570 /*
571 * batching contiguous clusters into one request 571 * Issue discard for the sectors we no longer
572 */ 572 * care about, batching contiguous clusters
573 if (cluster != fatent.entry + 1) { 573 * into one request
574 int nr_clus = fatent.entry - first_cl + 1; 574 */
575 575 if (cluster != fatent.entry + 1) {
576 sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), 576 int nr_clus = fatent.entry - first_cl + 1;
577 nr_clus * sbi->sec_per_clus); 577
578 first_cl = cluster; 578 sb_issue_discard(sb,
579 fat_clus_to_blknr(sbi, first_cl),
580 nr_clus * sbi->sec_per_clus);
581
582 first_cl = cluster;
583 }
579 } 584 }
580 585
581 ops->ent_put(&fatent, FAT_ENT_FREE); 586 ops->ent_put(&fatent, FAT_ENT_FREE);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 76b7961ab663..14da530b05ca 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -858,6 +858,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
858 seq_puts(m, ",errors=panic"); 858 seq_puts(m, ",errors=panic");
859 else 859 else
860 seq_puts(m, ",errors=remount-ro"); 860 seq_puts(m, ",errors=remount-ro");
861 if (opts->discard)
862 seq_puts(m, ",discard");
861 863
862 return 0; 864 return 0;
863} 865}
@@ -871,7 +873,7 @@ enum {
871 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 873 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
872 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 874 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
873 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 875 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
874 Opt_err_panic, Opt_err_ro, Opt_err, 876 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
875}; 877};
876 878
877static const match_table_t fat_tokens = { 879static const match_table_t fat_tokens = {
@@ -899,6 +901,7 @@ static const match_table_t fat_tokens = {
899 {Opt_err_cont, "errors=continue"}, 901 {Opt_err_cont, "errors=continue"},
900 {Opt_err_panic, "errors=panic"}, 902 {Opt_err_panic, "errors=panic"},
901 {Opt_err_ro, "errors=remount-ro"}, 903 {Opt_err_ro, "errors=remount-ro"},
904 {Opt_discard, "discard"},
902 {Opt_obsolate, "conv=binary"}, 905 {Opt_obsolate, "conv=binary"},
903 {Opt_obsolate, "conv=text"}, 906 {Opt_obsolate, "conv=text"},
904 {Opt_obsolate, "conv=auto"}, 907 {Opt_obsolate, "conv=auto"},
@@ -1136,6 +1139,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1136 case Opt_rodir: 1139 case Opt_rodir:
1137 opts->rodir = 1; 1140 opts->rodir = 1;
1138 break; 1141 break;
1142 case Opt_discard:
1143 opts->discard = 1;
1144 break;
1139 1145
1140 /* obsolete mount options */ 1146 /* obsolete mount options */
1141 case Opt_obsolate: 1147 case Opt_obsolate:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 0f55f5cb732f..d3da05f26465 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/time.h>
12#include "fat.h" 13#include "fat.h"
13 14
14/* 15/*
@@ -157,10 +158,6 @@ extern struct timezone sys_tz;
157#define SECS_PER_MIN 60 158#define SECS_PER_MIN 60
158#define SECS_PER_HOUR (60 * 60) 159#define SECS_PER_HOUR (60 * 60)
159#define SECS_PER_DAY (SECS_PER_HOUR * 24) 160#define SECS_PER_DAY (SECS_PER_HOUR * 24)
160#define UNIX_SECS_1980 315532800L
161#if BITS_PER_LONG == 64
162#define UNIX_SECS_2108 4354819200L
163#endif
164/* days between 1.1.70 and 1.1.80 (2 leap days) */ 161/* days between 1.1.70 and 1.1.80 (2 leap days) */
165#define DAYS_DELTA (365 * 10 + 2) 162#define DAYS_DELTA (365 * 10 + 2)
166/* 120 (2100 - 1980) isn't leap year */ 163/* 120 (2100 - 1980) isn't leap year */
@@ -213,58 +210,35 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
213void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, 210void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
214 __le16 *time, __le16 *date, u8 *time_cs) 211 __le16 *time, __le16 *date, u8 *time_cs)
215{ 212{
216 time_t second = ts->tv_sec; 213 struct tm tm;
217 time_t day, leap_day, month, year; 214 time_to_tm(ts->tv_sec, sbi->options.tz_utc ? 0 :
215 -sys_tz.tz_minuteswest * 60, &tm);
218 216
219 if (!sbi->options.tz_utc) 217 /* FAT can only support year between 1980 to 2107 */
220 second -= sys_tz.tz_minuteswest * SECS_PER_MIN; 218 if (tm.tm_year < 1980 - 1900) {
221
222 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
223 if (second < UNIX_SECS_1980) {
224 *time = 0; 219 *time = 0;
225 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); 220 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1);
226 if (time_cs) 221 if (time_cs)
227 *time_cs = 0; 222 *time_cs = 0;
228 return; 223 return;
229 } 224 }
230#if BITS_PER_LONG == 64 225 if (tm.tm_year > 2107 - 1900) {
231 if (second >= UNIX_SECS_2108) {
232 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); 226 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29);
233 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); 227 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31);
234 if (time_cs) 228 if (time_cs)
235 *time_cs = 199; 229 *time_cs = 199;
236 return; 230 return;
237 } 231 }
238#endif
239 232
240 day = second / SECS_PER_DAY - DAYS_DELTA; 233 /* from 1900 -> from 1980 */
241 year = day / 365; 234 tm.tm_year -= 80;
242 leap_day = (year + 3) / 4; 235 /* 0~11 -> 1~12 */
243 if (year > YEAR_2100) /* 2100 isn't leap year */ 236 tm.tm_mon++;
244 leap_day--; 237 /* 0~59 -> 0~29(2sec counts) */
245 if (year * 365 + leap_day > day) 238 tm.tm_sec >>= 1;
246 year--;
247 leap_day = (year + 3) / 4;
248 if (year > YEAR_2100) /* 2100 isn't leap year */
249 leap_day--;
250 day -= year * 365 + leap_day;
251
252 if (IS_LEAP_YEAR(year) && day == days_in_year[3]) {
253 month = 2;
254 } else {
255 if (IS_LEAP_YEAR(year) && day > days_in_year[3])
256 day--;
257 for (month = 1; month < 12; month++) {
258 if (days_in_year[month + 1] > day)
259 break;
260 }
261 }
262 day -= days_in_year[month];
263 239
264 *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 240 *time = cpu_to_le16(tm.tm_hour << 11 | tm.tm_min << 5 | tm.tm_sec);
265 | ((second / SECS_PER_MIN) % 60) << 5 241 *date = cpu_to_le16(tm.tm_year << 9 | tm.tm_mon << 5 | tm.tm_mday);
266 | (second % SECS_PER_MIN) >> 1);
267 *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1));
268 if (time_cs) 242 if (time_cs)
269 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; 243 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000;
270} 244}
@@ -285,4 +259,3 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
285 } 259 }
286 return err; 260 return err;
287} 261}
288
diff --git a/fs/file_table.c b/fs/file_table.c
index 4bef4c01ec6f..0afacf654398 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -21,9 +21,12 @@
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu_counter.h> 23#include <linux/percpu_counter.h>
24#include <linux/ima.h>
24 25
25#include <asm/atomic.h> 26#include <asm/atomic.h>
26 27
28#include "internal.h"
29
27/* sysctl tunables... */ 30/* sysctl tunables... */
28struct files_stat_struct files_stat = { 31struct files_stat_struct files_stat = {
29 .max_files = NR_FILE 32 .max_files = NR_FILE
@@ -147,8 +150,6 @@ fail:
147 return NULL; 150 return NULL;
148} 151}
149 152
150EXPORT_SYMBOL(get_empty_filp);
151
152/** 153/**
153 * alloc_file - allocate and initialize a 'struct file' 154 * alloc_file - allocate and initialize a 'struct file'
154 * @mnt: the vfsmount on which the file will reside 155 * @mnt: the vfsmount on which the file will reside
@@ -164,8 +165,8 @@ EXPORT_SYMBOL(get_empty_filp);
164 * If all the callers of init_file() are eliminated, its 165 * If all the callers of init_file() are eliminated, its
165 * code should be moved into this function. 166 * code should be moved into this function.
166 */ 167 */
167struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, 168struct file *alloc_file(struct path *path, fmode_t mode,
168 fmode_t mode, const struct file_operations *fop) 169 const struct file_operations *fop)
169{ 170{
170 struct file *file; 171 struct file *file;
171 172
@@ -173,35 +174,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
173 if (!file) 174 if (!file)
174 return NULL; 175 return NULL;
175 176
176 init_file(file, mnt, dentry, mode, fop); 177 file->f_path = *path;
177 return file; 178 file->f_mapping = path->dentry->d_inode->i_mapping;
178}
179EXPORT_SYMBOL(alloc_file);
180
181/**
182 * init_file - initialize a 'struct file'
183 * @file: the already allocated 'struct file' to initialized
184 * @mnt: the vfsmount on which the file resides
185 * @dentry: the dentry representing this file
186 * @mode: the mode the file is opened with
187 * @fop: the 'struct file_operations' for this file
188 *
189 * Use this instead of setting the members directly. Doing so
190 * avoids making mistakes like forgetting the mntget() or
191 * forgetting to take a write on the mnt.
192 *
193 * Note: This is a crappy interface. It is here to make
194 * merging with the existing users of get_empty_filp()
195 * who have complex failure logic easier. All users
196 * of this should be moving to alloc_file().
197 */
198int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
199 fmode_t mode, const struct file_operations *fop)
200{
201 int error = 0;
202 file->f_path.dentry = dentry;
203 file->f_path.mnt = mntget(mnt);
204 file->f_mapping = dentry->d_inode->i_mapping;
205 file->f_mode = mode; 179 file->f_mode = mode;
206 file->f_op = fop; 180 file->f_op = fop;
207 181
@@ -211,14 +185,16 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
211 * visible. We do this for consistency, and so 185 * visible. We do this for consistency, and so
212 * that we can do debugging checks at __fput() 186 * that we can do debugging checks at __fput()
213 */ 187 */
214 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { 188 if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
189 int error = 0;
215 file_take_write(file); 190 file_take_write(file);
216 error = mnt_clone_write(mnt); 191 error = mnt_clone_write(path->mnt);
217 WARN_ON(error); 192 WARN_ON(error);
218 } 193 }
219 return error; 194 ima_counts_get(file);
195 return file;
220} 196}
221EXPORT_SYMBOL(init_file); 197EXPORT_SYMBOL(alloc_file);
222 198
223void fput(struct file *file) 199void fput(struct file *file)
224{ 200{
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index e590242fa41a..3221a0c7944e 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -91,7 +91,7 @@ EXPORT_SYMBOL(fscache_object_destroy);
91 */ 91 */
92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) 92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos)
93{ 93{
94 struct fscache_object *pobj, *obj, *minobj = NULL; 94 struct fscache_object *pobj, *obj = NULL, *minobj = NULL;
95 struct rb_node *p; 95 struct rb_node *p;
96 unsigned long pos; 96 unsigned long pos;
97 97
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbec..55458031e501 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
1/* 1/*
2 * fs/generic_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de> 2 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 * 3 *
6 * This file is released under the GPL. 4 * This file is released under the GPL.
5 *
6 * Generic ACL support for in-memory filesystems.
7 */ 7 */
8 8
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/generic_acl.h> 11#include <linux/generic_acl.h>
12#include <linux/posix_acl.h>
13#include <linux/posix_acl_xattr.h>
12 14
13/** 15
14 * generic_acl_list - Generic xattr_handler->list() operation 16static size_t
15 * @ops: Filesystem specific getacl and setacl callbacks 17generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
16 */ 18 const char *name, size_t name_len, int type)
17size_t
18generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
19 int type, char *list, size_t list_size)
20{ 19{
21 struct posix_acl *acl; 20 struct posix_acl *acl;
22 const char *name; 21 const char *xname;
23 size_t size; 22 size_t size;
24 23
25 acl = ops->getacl(inode, type); 24 acl = get_cached_acl(dentry->d_inode, type);
26 if (!acl) 25 if (!acl)
27 return 0; 26 return 0;
28 posix_acl_release(acl); 27 posix_acl_release(acl);
29 28
30 switch(type) { 29 switch (type) {
31 case ACL_TYPE_ACCESS: 30 case ACL_TYPE_ACCESS:
32 name = POSIX_ACL_XATTR_ACCESS; 31 xname = POSIX_ACL_XATTR_ACCESS;
33 break; 32 break;
34 33 case ACL_TYPE_DEFAULT:
35 case ACL_TYPE_DEFAULT: 34 xname = POSIX_ACL_XATTR_DEFAULT;
36 name = POSIX_ACL_XATTR_DEFAULT; 35 break;
37 break; 36 default:
38 37 return 0;
39 default:
40 return 0;
41 } 38 }
42 size = strlen(name) + 1; 39 size = strlen(xname) + 1;
43 if (list && size <= list_size) 40 if (list && size <= list_size)
44 memcpy(list, name, size); 41 memcpy(list, xname, size);
45 return size; 42 return size;
46} 43}
47 44
48/** 45static int
49 * generic_acl_get - Generic xattr_handler->get() operation 46generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
50 * @ops: Filesystem specific getacl and setacl callbacks 47 size_t size, int type)
51 */
52int
53generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
54 int type, void *buffer, size_t size)
55{ 48{
56 struct posix_acl *acl; 49 struct posix_acl *acl;
57 int error; 50 int error;
58 51
59 acl = ops->getacl(inode, type); 52 if (strcmp(name, "") != 0)
53 return -EINVAL;
54
55 acl = get_cached_acl(dentry->d_inode, type);
60 if (!acl) 56 if (!acl)
61 return -ENODATA; 57 return -ENODATA;
62 error = posix_acl_to_xattr(acl, buffer, size); 58 error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
65 return error; 61 return error;
66} 62}
67 63
68/** 64static int
69 * generic_acl_set - Generic xattr_handler->set() operation 65generic_acl_set(struct dentry *dentry, const char *name, const void *value,
70 * @ops: Filesystem specific getacl and setacl callbacks 66 size_t size, int flags, int type)
71 */
72int
73generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
74 int type, const void *value, size_t size)
75{ 67{
68 struct inode *inode = dentry->d_inode;
76 struct posix_acl *acl = NULL; 69 struct posix_acl *acl = NULL;
77 int error; 70 int error;
78 71
72 if (strcmp(name, "") != 0)
73 return -EINVAL;
79 if (S_ISLNK(inode->i_mode)) 74 if (S_ISLNK(inode->i_mode))
80 return -EOPNOTSUPP; 75 return -EOPNOTSUPP;
81 if (!is_owner_or_cap(inode)) 76 if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
91 error = posix_acl_valid(acl); 86 error = posix_acl_valid(acl);
92 if (error) 87 if (error)
93 goto failed; 88 goto failed;
94 switch(type) { 89 switch (type) {
95 case ACL_TYPE_ACCESS: 90 case ACL_TYPE_ACCESS:
96 mode = inode->i_mode; 91 mode = inode->i_mode;
97 error = posix_acl_equiv_mode(acl, &mode); 92 error = posix_acl_equiv_mode(acl, &mode);
98 if (error < 0) 93 if (error < 0)
99 goto failed; 94 goto failed;
100 inode->i_mode = mode; 95 inode->i_mode = mode;
101 if (error == 0) { 96 if (error == 0) {
102 posix_acl_release(acl); 97 posix_acl_release(acl);
103 acl = NULL; 98 acl = NULL;
104 } 99 }
105 break; 100 break;
106 101 case ACL_TYPE_DEFAULT:
107 case ACL_TYPE_DEFAULT: 102 if (!S_ISDIR(inode->i_mode)) {
108 if (!S_ISDIR(inode->i_mode)) { 103 error = -EINVAL;
109 error = -EINVAL; 104 goto failed;
110 goto failed; 105 }
111 } 106 break;
112 break;
113 } 107 }
114 } 108 }
115 ops->setacl(inode, type, acl); 109 set_cached_acl(inode, type, acl);
116 error = 0; 110 error = 0;
117failed: 111failed:
118 posix_acl_release(acl); 112 posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
121 115
122/** 116/**
123 * generic_acl_init - Take care of acl inheritance at @inode create time 117 * generic_acl_init - Take care of acl inheritance at @inode create time
124 * @ops: Filesystem specific getacl and setacl callbacks
125 * 118 *
126 * Files created inside a directory with a default ACL inherit the 119 * Files created inside a directory with a default ACL inherit the
127 * directory's default ACL. 120 * directory's default ACL.
128 */ 121 */
129int 122int
130generic_acl_init(struct inode *inode, struct inode *dir, 123generic_acl_init(struct inode *inode, struct inode *dir)
131 struct generic_acl_operations *ops)
132{ 124{
133 struct posix_acl *acl = NULL; 125 struct posix_acl *acl = NULL;
134 mode_t mode = inode->i_mode; 126 mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
136 128
137 inode->i_mode = mode & ~current_umask(); 129 inode->i_mode = mode & ~current_umask();
138 if (!S_ISLNK(inode->i_mode)) 130 if (!S_ISLNK(inode->i_mode))
139 acl = ops->getacl(dir, ACL_TYPE_DEFAULT); 131 acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
140 if (acl) { 132 if (acl) {
141 struct posix_acl *clone; 133 struct posix_acl *clone;
142 134
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
145 error = -ENOMEM; 137 error = -ENOMEM;
146 if (!clone) 138 if (!clone)
147 goto cleanup; 139 goto cleanup;
148 ops->setacl(inode, ACL_TYPE_DEFAULT, clone); 140 set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
149 posix_acl_release(clone); 141 posix_acl_release(clone);
150 } 142 }
151 clone = posix_acl_clone(acl, GFP_KERNEL); 143 clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
156 if (error >= 0) { 148 if (error >= 0) {
157 inode->i_mode = mode; 149 inode->i_mode = mode;
158 if (error > 0) 150 if (error > 0)
159 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 151 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
160 } 152 }
161 posix_acl_release(clone); 153 posix_acl_release(clone);
162 } 154 }
@@ -169,20 +161,19 @@ cleanup:
169 161
170/** 162/**
171 * generic_acl_chmod - change the access acl of @inode upon chmod() 163 * generic_acl_chmod - change the access acl of @inode upon chmod()
172 * @ops: FIlesystem specific getacl and setacl callbacks
173 * 164 *
174 * A chmod also changes the permissions of the owner, group/mask, and 165 * A chmod also changes the permissions of the owner, group/mask, and
175 * other ACL entries. 166 * other ACL entries.
176 */ 167 */
177int 168int
178generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) 169generic_acl_chmod(struct inode *inode)
179{ 170{
180 struct posix_acl *acl, *clone; 171 struct posix_acl *acl, *clone;
181 int error = 0; 172 int error = 0;
182 173
183 if (S_ISLNK(inode->i_mode)) 174 if (S_ISLNK(inode->i_mode))
184 return -EOPNOTSUPP; 175 return -EOPNOTSUPP;
185 acl = ops->getacl(inode, ACL_TYPE_ACCESS); 176 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
186 if (acl) { 177 if (acl) {
187 clone = posix_acl_clone(acl, GFP_KERNEL); 178 clone = posix_acl_clone(acl, GFP_KERNEL);
188 posix_acl_release(acl); 179 posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
190 return -ENOMEM; 181 return -ENOMEM;
191 error = posix_acl_chmod_masq(clone, inode->i_mode); 182 error = posix_acl_chmod_masq(clone, inode->i_mode);
192 if (!error) 183 if (!error)
193 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 184 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
194 posix_acl_release(clone); 185 posix_acl_release(clone);
195 } 186 }
196 return error; 187 return error;
197} 188}
189
190int
191generic_check_acl(struct inode *inode, int mask)
192{
193 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
194
195 if (acl) {
196 int error = posix_acl_permission(inode, acl, mask);
197 posix_acl_release(acl);
198 return error;
199 }
200 return -EAGAIN;
201}
202
203struct xattr_handler generic_acl_access_handler = {
204 .prefix = POSIX_ACL_XATTR_ACCESS,
205 .flags = ACL_TYPE_ACCESS,
206 .list = generic_acl_list,
207 .get = generic_acl_get,
208 .set = generic_acl_set,
209};
210
211struct xattr_handler generic_acl_default_handler = {
212 .prefix = POSIX_ACL_XATTR_DEFAULT,
213 .flags = ACL_TYPE_DEFAULT,
214 .list = generic_acl_list,
215 .get = generic_acl_get,
216 .set = generic_acl_set,
217};
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa6..4dcddf83326f 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
10 select SLOW_WORK 10 select SLOW_WORK
11 select QUOTA 11 select QUOTA
12 select QUOTACTL 12 select QUOTACTL
13 select FS_JOURNAL_INFO
14 help 13 help
15 A cluster filesystem. 14 A cluster filesystem.
16 15
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea846173..87ee309d4c24 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
126 error = posix_acl_to_xattr(acl, data, len); 126 error = posix_acl_to_xattr(acl, data, len);
127 if (error < 0) 127 if (error < 0)
128 goto out; 128 goto out;
129 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); 129 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
130 if (!error) 130 if (!error)
131 set_cached_acl(inode, type, acl); 131 set_cached_acl(inode, type, acl);
132out: 132out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
232 return -EINVAL; 232 return -EINVAL;
233} 233}
234 234
235static int gfs2_xattr_system_get(struct inode *inode, const char *name, 235static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
236 void *buffer, size_t size) 236 void *buffer, size_t size, int xtype)
237{ 237{
238 struct inode *inode = dentry->d_inode;
238 struct posix_acl *acl; 239 struct posix_acl *acl;
239 int type; 240 int type;
240 int error; 241 int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
255 return error; 256 return error;
256} 257}
257 258
258static int gfs2_xattr_system_set(struct inode *inode, const char *name, 259static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
259 const void *value, size_t size, int flags) 260 const void *value, size_t size, int flags,
261 int xtype)
260{ 262{
263 struct inode *inode = dentry->d_inode;
261 struct gfs2_sbd *sdp = GFS2_SB(inode); 264 struct gfs2_sbd *sdp = GFS2_SB(inode);
262 struct posix_acl *acl = NULL; 265 struct posix_acl *acl = NULL;
263 int error = 0, type; 266 int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
319 } 322 }
320 323
321set_acl: 324set_acl:
322 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); 325 error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
323 if (!error) { 326 if (!error) {
324 if (acl) 327 if (acl)
325 set_cached_acl(inode, type, acl); 328 set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
334 337
335struct xattr_handler gfs2_xattr_system_handler = { 338struct xattr_handler gfs2_xattr_system_handler = {
336 .prefix = XATTR_SYSTEM_PREFIX, 339 .prefix = XATTR_SYSTEM_PREFIX,
340 .flags = GFS2_EATYPE_SYS,
337 .get = gfs2_xattr_system_get, 341 .get = gfs2_xattr_system_get,
338 .set = gfs2_xattr_system_set, 342 .set = gfs2_xattr_system_set,
339}; 343};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2d..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
126 * segment inside gfs2_inode_lookup code needs to get moved around. 126 * segment inside gfs2_inode_lookup code needs to get moved around.
127 * 127 *
128 * Clean up I_LOCK and I_NEW as well. 128 * Clears I_NEW as well.
129 **/ 129 **/
130 130
131void gfs2_set_iop(struct inode *inode) 131void gfs2_set_iop(struct inode *inode)
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
801 return err; 801 return err;
802 } 802 }
803 803
804 err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0); 804 err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
805 GFS2_EATYPE_SECURITY);
805 kfree(value); 806 kfree(value);
806 kfree(name); 807 kfree(name);
807 808
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc4740..8a04108e0c22 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
567/** 567/**
568 * gfs2_xattr_get - Get a GFS2 extended attribute 568 * gfs2_xattr_get - Get a GFS2 extended attribute
569 * @inode: The inode 569 * @inode: The inode
570 * @type: The type of extended attribute
571 * @name: The name of the extended attribute 570 * @name: The name of the extended attribute
572 * @buffer: The buffer to write the result into 571 * @buffer: The buffer to write the result into
573 * @size: The size of the buffer 572 * @size: The size of the buffer
573 * @type: The type of extended attribute
574 * 574 *
575 * Returns: actual size of data on success, -errno on error 575 * Returns: actual size of data on success, -errno on error
576 */ 576 */
577 577static int gfs2_xattr_get(struct dentry *dentry, const char *name,
578int gfs2_xattr_get(struct inode *inode, int type, const char *name, 578 void *buffer, size_t size, int type)
579 void *buffer, size_t size)
580{ 579{
581 struct gfs2_inode *ip = GFS2_I(inode); 580 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
582 struct gfs2_ea_location el; 581 struct gfs2_ea_location el;
583 int error; 582 int error;
584 583
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1119 1118
1120/** 1119/**
1121 * gfs2_xattr_remove - Remove a GFS2 extended attribute 1120 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1122 * @inode: The inode 1121 * @ip: The inode
1123 * @type: The type of the extended attribute 1122 * @type: The type of the extended attribute
1124 * @name: The name of the extended attribute 1123 * @name: The name of the extended attribute
1125 * 1124 *
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1130 * Returns: 0, or errno on failure 1129 * Returns: 0, or errno on failure
1131 */ 1130 */
1132 1131
1133static int gfs2_xattr_remove(struct inode *inode, int type, const char *name) 1132static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
1134{ 1133{
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_ea_location el; 1134 struct gfs2_ea_location el;
1137 int error; 1135 int error;
1138 1136
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
1156} 1154}
1157 1155
1158/** 1156/**
1159 * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute 1157 * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1160 * @inode: The inode 1158 * @ip: The inode
1161 * @type: The type of the extended attribute
1162 * @name: The name of the extended attribute 1159 * @name: The name of the extended attribute
1163 * @value: The value of the extended attribute (NULL for remove) 1160 * @value: The value of the extended attribute (NULL for remove)
1164 * @size: The size of the @value argument 1161 * @size: The size of the @value argument
1165 * @flags: Create or Replace 1162 * @flags: Create or Replace
1163 * @type: The type of the extended attribute
1166 * 1164 *
1167 * See gfs2_xattr_remove() for details of the removal of xattrs. 1165 * See gfs2_xattr_remove() for details of the removal of xattrs.
1168 * 1166 *
1169 * Returns: 0 or errno on failure 1167 * Returns: 0 or errno on failure
1170 */ 1168 */
1171 1169
1172int gfs2_xattr_set(struct inode *inode, int type, const char *name, 1170int __gfs2_xattr_set(struct inode *inode, const char *name,
1173 const void *value, size_t size, int flags) 1171 const void *value, size_t size, int flags, int type)
1174{ 1172{
1175 struct gfs2_sbd *sdp = GFS2_SB(inode);
1176 struct gfs2_inode *ip = GFS2_I(inode); 1173 struct gfs2_inode *ip = GFS2_I(inode);
1174 struct gfs2_sbd *sdp = GFS2_SB(inode);
1177 struct gfs2_ea_location el; 1175 struct gfs2_ea_location el;
1178 unsigned int namel = strlen(name); 1176 unsigned int namel = strlen(name);
1179 int error; 1177 int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1184 return -ERANGE; 1182 return -ERANGE;
1185 1183
1186 if (value == NULL) 1184 if (value == NULL)
1187 return gfs2_xattr_remove(inode, type, name); 1185 return gfs2_xattr_remove(ip, type, name);
1188 1186
1189 if (ea_check_size(sdp, namel, size)) 1187 if (ea_check_size(sdp, namel, size))
1190 return -ERANGE; 1188 return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1224 return error; 1222 return error;
1225} 1223}
1226 1224
1225static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1226 const void *value, size_t size, int flags, int type)
1227{
1228 return __gfs2_xattr_set(dentry->d_inode, name, value,
1229 size, flags, type);
1230}
1231
1227static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1232static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1228 struct gfs2_ea_header *ea, char *data) 1233 struct gfs2_ea_header *ea, char *data)
1229{ 1234{
@@ -1529,40 +1534,18 @@ out_alloc:
1529 return error; 1534 return error;
1530} 1535}
1531 1536
1532static int gfs2_xattr_user_get(struct inode *inode, const char *name,
1533 void *buffer, size_t size)
1534{
1535 return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
1536}
1537
1538static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1539 const void *value, size_t size, int flags)
1540{
1541 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1542}
1543
1544static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1545 void *buffer, size_t size)
1546{
1547 return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
1548}
1549
1550static int gfs2_xattr_security_set(struct inode *inode, const char *name,
1551 const void *value, size_t size, int flags)
1552{
1553 return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
1554}
1555
1556static struct xattr_handler gfs2_xattr_user_handler = { 1537static struct xattr_handler gfs2_xattr_user_handler = {
1557 .prefix = XATTR_USER_PREFIX, 1538 .prefix = XATTR_USER_PREFIX,
1558 .get = gfs2_xattr_user_get, 1539 .flags = GFS2_EATYPE_USR,
1559 .set = gfs2_xattr_user_set, 1540 .get = gfs2_xattr_get,
1541 .set = gfs2_xattr_set,
1560}; 1542};
1561 1543
1562static struct xattr_handler gfs2_xattr_security_handler = { 1544static struct xattr_handler gfs2_xattr_security_handler = {
1563 .prefix = XATTR_SECURITY_PREFIX, 1545 .prefix = XATTR_SECURITY_PREFIX,
1564 .get = gfs2_xattr_security_get, 1546 .flags = GFS2_EATYPE_SECURITY,
1565 .set = gfs2_xattr_security_set, 1547 .get = gfs2_xattr_get,
1548 .set = gfs2_xattr_set,
1566}; 1549};
1567 1550
1568struct xattr_handler *gfs2_xattr_handlers[] = { 1551struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4d..d392f8358f2f 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
53 struct gfs2_ea_header *el_prev; 53 struct gfs2_ea_header *el_prev;
54}; 54};
55 55
56extern int gfs2_xattr_get(struct inode *inode, int type, const char *name, 56extern int __gfs2_xattr_set(struct inode *inode, const char *name,
57 void *buffer, size_t size); 57 const void *value, size_t size,
58extern int gfs2_xattr_set(struct inode *inode, int type, const char *name, 58 int flags, int type);
59 const void *value, size_t size, int flags);
60extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); 59extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
61extern int gfs2_ea_dealloc(struct gfs2_inode *ip); 60extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
62 61
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f2feaa06bf26..cadc4ce48656 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -14,6 +14,7 @@
14#include <linux/magic.h> 14#include <linux/magic.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
17#include <linux/bitmap.h>
17 18
18/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
19 20
@@ -115,15 +116,13 @@ static void hpfs_put_super(struct super_block *s)
115unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) 116unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
116{ 117{
117 struct quad_buffer_head qbh; 118 struct quad_buffer_head qbh;
118 unsigned *bits; 119 unsigned long *bits;
119 unsigned i, count; 120 unsigned count;
120 if (!(bits = hpfs_map_4sectors(s, secno, &qbh, 4))) return 0; 121
121 count = 0; 122 bits = hpfs_map_4sectors(s, secno, &qbh, 4);
122 for (i = 0; i < 2048 / sizeof(unsigned); i++) { 123 if (!bits)
123 unsigned b; 124 return 0;
124 if (!bits[i]) continue; 125 count = bitmap_weight(bits, 2048 * BITS_PER_BYTE);
125 for (b = bits[i]; b; b>>=1) count += b & 1;
126 }
127 hpfs_brelse4(&qbh); 126 hpfs_brelse4(&qbh);
128 return count; 127 return count;
129} 128}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b8..a0bbd3d1b41a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/dnotify.h> 30#include <linux/dnotify.h>
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/ima.h>
34#include <linux/magic.h> 33#include <linux/magic.h>
35 34
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -922,7 +921,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
922 int error = -ENOMEM; 921 int error = -ENOMEM;
923 struct file *file; 922 struct file *file;
924 struct inode *inode; 923 struct inode *inode;
925 struct dentry *dentry, *root; 924 struct path path;
925 struct dentry *root;
926 struct qstr quick_string; 926 struct qstr quick_string;
927 927
928 *user = NULL; 928 *user = NULL;
@@ -944,10 +944,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
944 quick_string.name = name; 944 quick_string.name = name;
945 quick_string.len = strlen(quick_string.name); 945 quick_string.len = strlen(quick_string.name);
946 quick_string.hash = 0; 946 quick_string.hash = 0;
947 dentry = d_alloc(root, &quick_string); 947 path.dentry = d_alloc(root, &quick_string);
948 if (!dentry) 948 if (!path.dentry)
949 goto out_shm_unlock; 949 goto out_shm_unlock;
950 950
951 path.mnt = mntget(hugetlbfs_vfsmount);
951 error = -ENOSPC; 952 error = -ENOSPC;
952 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), 953 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
953 current_fsgid(), S_IFREG | S_IRWXUGO, 0); 954 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,24 +961,22 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
960 acctflag)) 961 acctflag))
961 goto out_inode; 962 goto out_inode;
962 963
963 d_instantiate(dentry, inode); 964 d_instantiate(path.dentry, inode);
964 inode->i_size = size; 965 inode->i_size = size;
965 inode->i_nlink = 0; 966 inode->i_nlink = 0;
966 967
967 error = -ENFILE; 968 error = -ENFILE;
968 file = alloc_file(hugetlbfs_vfsmount, dentry, 969 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
969 FMODE_WRITE | FMODE_READ,
970 &hugetlbfs_file_operations); 970 &hugetlbfs_file_operations);
971 if (!file) 971 if (!file)
972 goto out_dentry; /* inode is already attached */ 972 goto out_dentry; /* inode is already attached */
973 ima_counts_get(file);
974 973
975 return file; 974 return file;
976 975
977out_inode: 976out_inode:
978 iput(inode); 977 iput(inode);
979out_dentry: 978out_dentry:
980 dput(dentry); 979 path_put(&path);
981out_shm_unlock: 980out_shm_unlock:
982 if (*user) { 981 if (*user) {
983 user_shm_unlock(size, *user); 982 user_shm_unlock(size, *user);
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
113 * Prevent speculative execution through spin_unlock(&inode_lock); 113 * Prevent speculative execution through spin_unlock(&inode_lock);
114 */ 114 */
115 smp_mb(); 115 smp_mb();
116 wake_up_bit(&inode->i_state, __I_LOCK); 116 wake_up_bit(&inode->i_state, __I_NEW);
117} 117}
118 118
119/** 119/**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
690 } 690 }
691#endif 691#endif
692 /* 692 /*
693 * This is special! We do not need the spinlock when clearing I_LOCK, 693 * This is special! We do not need the spinlock when clearing I_NEW,
694 * because we're guaranteed that nobody else tries to do anything about 694 * because we're guaranteed that nobody else tries to do anything about
695 * the state of the inode when it is locked, as we just created it (so 695 * the state of the inode when it is locked, as we just created it (so
696 * there can be no old holders that haven't tested I_LOCK). 696 * there can be no old holders that haven't tested I_NEW).
697 * However we must emit the memory barrier so that other CPUs reliably 697 * However we must emit the memory barrier so that other CPUs reliably
698 * see the clearing of I_LOCK after the other inode initialisation has 698 * see the clearing of I_NEW after the other inode initialisation has
699 * completed. 699 * completed.
700 */ 700 */
701 smp_mb(); 701 smp_mb();
702 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 702 WARN_ON(!(inode->i_state & I_NEW));
703 inode->i_state &= ~(I_LOCK|I_NEW); 703 inode->i_state &= ~I_NEW;
704 wake_up_inode(inode); 704 wake_up_inode(inode);
705} 705}
706EXPORT_SYMBOL(unlock_new_inode); 706EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
731 goto set_failed; 731 goto set_failed;
732 732
733 __inode_add_to_lists(sb, head, inode); 733 __inode_add_to_lists(sb, head, inode);
734 inode->i_state = I_LOCK|I_NEW; 734 inode->i_state = I_NEW;
735 spin_unlock(&inode_lock); 735 spin_unlock(&inode_lock);
736 736
737 /* Return the locked inode with I_NEW set, the 737 /* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
778 if (!old) { 778 if (!old) {
779 inode->i_ino = ino; 779 inode->i_ino = ino;
780 __inode_add_to_lists(sb, head, inode); 780 __inode_add_to_lists(sb, head, inode);
781 inode->i_state = I_LOCK|I_NEW; 781 inode->i_state = I_NEW;
782 spin_unlock(&inode_lock); 782 spin_unlock(&inode_lock);
783 783
784 /* Return the locked inode with I_NEW set, the 784 /* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1083 ino_t ino = inode->i_ino; 1083 ino_t ino = inode->i_ino;
1084 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1084 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1085 1085
1086 inode->i_state |= I_LOCK|I_NEW; 1086 inode->i_state |= I_NEW;
1087 while (1) { 1087 while (1) {
1088 struct hlist_node *node; 1088 struct hlist_node *node;
1089 struct inode *old = NULL; 1089 struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1120 struct super_block *sb = inode->i_sb; 1120 struct super_block *sb = inode->i_sb;
1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1122 1122
1123 inode->i_state |= I_LOCK|I_NEW; 1123 inode->i_state |= I_NEW;
1124 1124
1125 while (1) { 1125 while (1) {
1126 struct hlist_node *node; 1126 struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
1510 * until the deletion _might_ have completed. Callers are responsible 1510 * until the deletion _might_ have completed. Callers are responsible
1511 * to recheck inode state. 1511 * to recheck inode state.
1512 * 1512 *
1513 * It doesn't matter if I_LOCK is not set initially, a call to 1513 * It doesn't matter if I_NEW is not set initially, a call to
1514 * wake_up_inode() after removing from the hash list will DTRT. 1514 * wake_up_inode() after removing from the hash list will DTRT.
1515 * 1515 *
1516 * This is called with inode_lock held. 1516 * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
1518static void __wait_on_freeing_inode(struct inode *inode) 1518static void __wait_on_freeing_inode(struct inode *inode)
1519{ 1519{
1520 wait_queue_head_t *wq; 1520 wait_queue_head_t *wq;
1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1522 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1522 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1524 spin_unlock(&inode_lock); 1524 spin_unlock(&inode_lock);
1525 schedule(); 1525 schedule();
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72e..f67cd141d9a8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,6 +79,7 @@ extern void chroot_fs_refs(struct path *, struct path *);
79 * file_table.c 79 * file_table.c
80 */ 80 */
81extern void mark_files_ro(struct super_block *); 81extern void mark_files_ro(struct super_block *);
82extern struct file *get_empty_filp(void);
82 83
83/* 84/*
84 * super.c 85 * super.c
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba9..ed752cb38474 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * The following files are helpful: 10 * The following files are helpful:
11 * 11 *
12 * Documentation/filesystems/Exporting 12 * Documentation/filesystems/nfs/Exporting
13 * fs/exportfs/expfs.c. 13 * fs/exportfs/expfs.c.
14 */ 14 */
15 15
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd4..4e28beeed157 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
1config JBD 1config JBD
2 tristate 2 tristate
3 select FS_JOURNAL_INFO
4 help 3 help
5 This is a generic journalling layer for block devices. It is 4 This is a generic journalling layer for block devices. It is
6 currently used by the ext3 file system, but it could also be 5 currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdfd..f32f346f4b0a 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
1config JBD2 1config JBD2
2 tristate 2 tristate
3 select CRC32 3 select CRC32
4 select FS_JOURNAL_INFO
5 help 4 help
6 This is a generic journaling layer for block devices that support 5 This is a generic journaling layer for block devices that support
7 both 32-bit and 64-bit block numbers. It is currently used by 6 both 32-bit and 64-bit block numbers. It is currently used by
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e97419..7cdc3196476a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
350 return rc; 350 return rc;
351} 351}
352 352
353static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size, 353static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
354 const char *name, size_t name_len) 354 size_t list_size, const char *name, size_t name_len, int type)
355{ 355{
356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS); 356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
357 357
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
360 return retlen; 360 return retlen;
361} 361}
362 362
363static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size, 363static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
364 const char *name, size_t name_len) 364 size_t list_size, const char *name, size_t name_len, int type)
365{ 365{
366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT); 366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
367 367
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
370 return retlen; 370 return retlen;
371} 371}
372 372
373static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size) 373static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
374 void *buffer, size_t size, int type)
374{ 375{
375 struct posix_acl *acl; 376 struct posix_acl *acl;
376 int rc; 377 int rc;
377 378
378 acl = jffs2_get_acl(inode, type); 379 if (name[0] != '\0')
380 return -EINVAL;
381
382 acl = jffs2_get_acl(dentry->d_inode, type);
379 if (IS_ERR(acl)) 383 if (IS_ERR(acl))
380 return PTR_ERR(acl); 384 return PTR_ERR(acl);
381 if (!acl) 385 if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
386 return rc; 390 return rc;
387} 391}
388 392
389static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) 393static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
390{ 394 const void *value, size_t size, int flags, int type)
391 if (name[0] != '\0')
392 return -EINVAL;
393 return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
397{
398 if (name[0] != '\0')
399 return -EINVAL;
400 return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
401}
402
403static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
404{ 395{
405 struct posix_acl *acl; 396 struct posix_acl *acl;
406 int rc; 397 int rc;
407 398
408 if (!is_owner_or_cap(inode)) 399 if (name[0] != '\0')
400 return -EINVAL;
401 if (!is_owner_or_cap(dentry->d_inode))
409 return -EPERM; 402 return -EPERM;
410 403
411 if (value) { 404 if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
420 } else { 413 } else {
421 acl = NULL; 414 acl = NULL;
422 } 415 }
423 rc = jffs2_set_acl(inode, type, acl); 416 rc = jffs2_set_acl(dentry->d_inode, type, acl);
424 out: 417 out:
425 posix_acl_release(acl); 418 posix_acl_release(acl);
426 return rc; 419 return rc;
427} 420}
428 421
429static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
430 const void *buffer, size_t size, int flags)
431{
432 if (name[0] != '\0')
433 return -EINVAL;
434 return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
435}
436
437static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
438 const void *buffer, size_t size, int flags)
439{
440 if (name[0] != '\0')
441 return -EINVAL;
442 return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
443}
444
445struct xattr_handler jffs2_acl_access_xattr_handler = { 422struct xattr_handler jffs2_acl_access_xattr_handler = {
446 .prefix = POSIX_ACL_XATTR_ACCESS, 423 .prefix = POSIX_ACL_XATTR_ACCESS,
424 .flags = ACL_TYPE_DEFAULT,
447 .list = jffs2_acl_access_listxattr, 425 .list = jffs2_acl_access_listxattr,
448 .get = jffs2_acl_access_getxattr, 426 .get = jffs2_acl_getxattr,
449 .set = jffs2_acl_access_setxattr, 427 .set = jffs2_acl_setxattr,
450}; 428};
451 429
452struct xattr_handler jffs2_acl_default_xattr_handler = { 430struct xattr_handler jffs2_acl_default_xattr_handler = {
453 .prefix = POSIX_ACL_XATTR_DEFAULT, 431 .prefix = POSIX_ACL_XATTR_DEFAULT,
432 .flags = ACL_TYPE_DEFAULT,
454 .list = jffs2_acl_default_listxattr, 433 .list = jffs2_acl_default_listxattr,
455 .get = jffs2_acl_default_getxattr, 434 .get = jffs2_acl_getxattr,
456 .set = jffs2_acl_default_setxattr, 435 .set = jffs2_acl_setxattr,
457}; 436};
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 090c556ffed2..3b6f2fa12cff 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -700,7 +700,8 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
700 struct jffs2_raw_inode ri; 700 struct jffs2_raw_inode ri;
701 struct jffs2_node_frag *last_frag; 701 struct jffs2_node_frag *last_frag;
702 union jffs2_device_node dev; 702 union jffs2_device_node dev;
703 char *mdata = NULL, mdatalen = 0; 703 char *mdata = NULL;
704 int mdatalen = 0;
704 uint32_t alloclen, ilen; 705 uint32_t alloclen, ilen;
705 int ret; 706 int ret;
706 707
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 378991cfe40f..e22de8397b74 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1284,7 +1284,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1284 f->target = NULL; 1284 f->target = NULL;
1285 mutex_unlock(&f->sem); 1285 mutex_unlock(&f->sem);
1286 jffs2_do_clear_inode(c, f); 1286 jffs2_do_clear_inode(c, f);
1287 return -ret; 1287 return ret;
1288 } 1288 }
1289 1289
1290 f->target[je32_to_cpu(latest_node->csize)] = '\0'; 1290 f->target[je32_to_cpu(latest_node->csize)] = '\0';
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb3..eaccee058583 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
44} 44}
45 45
46/* ---- XATTR Handler for "security.*" ----------------- */ 46/* ---- XATTR Handler for "security.*" ----------------- */
47static int jffs2_security_getxattr(struct inode *inode, const char *name, 47static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
48 void *buffer, size_t size) 48 void *buffer, size_t size, int type)
49{ 49{
50 if (!strcmp(name, "")) 50 if (!strcmp(name, ""))
51 return -EINVAL; 51 return -EINVAL;
52 52
53 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); 53 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
54 name, buffer, size);
54} 55}
55 56
56static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer, 57static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
57 size_t size, int flags) 58 const void *buffer, size_t size, int flags, int type)
58{ 59{
59 if (!strcmp(name, "")) 60 if (!strcmp(name, ""))
60 return -EINVAL; 61 return -EINVAL;
61 62
62 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags); 63 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
64 name, buffer, size, flags);
63} 65}
64 66
65static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size, 67static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
66 const char *name, size_t name_len) 68 size_t list_size, const char *name, size_t name_len, int type)
67{ 69{
68 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; 70 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
69 71
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 6caf1e1ee26d..800171dca53b 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,7 +23,7 @@
23 23
24int jffs2_sum_init(struct jffs2_sb_info *c) 24int jffs2_sum_init(struct jffs2_sb_info *c)
25{ 25{
26 uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE); 26 uint32_t sum_size = min_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
27 27
28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
29 29
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd5..9e75c62c85d6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
990 if (!xhandle) 990 if (!xhandle)
991 continue; 991 continue;
992 if (buffer) { 992 if (buffer) {
993 rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len); 993 rc = xhandle->list(dentry, buffer+len, size-len,
994 xd->xname, xd->name_len, xd->flags);
994 } else { 995 } else {
995 rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len); 996 rc = xhandle->list(dentry, NULL, 0, xd->xname,
997 xd->name_len, xd->flags);
996 } 998 }
997 if (rc < 0) 999 if (rc < 0)
998 goto out; 1000 goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef348..3e5a5e356e05 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_trusted_getxattr(struct inode *inode, const char *name, 19static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada1..8544af67dffe 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_user_getxattr(struct inode *inode, const char *name, 19static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1292 */ 1292 */
1293 /* 1293 /*
1294 * I believe this code is no longer needed. Splitting I_LOCK 1294 * I believe this code is no longer needed. Splitting I_LOCK
1295 * into two bits, I_LOCK and I_SYNC should prevent this 1295 * into two bits, I_NEW and I_SYNC should prevent this
1296 * deadlock as well. But since I don't have a JFS testload 1296 * deadlock as well. But since I don't have a JFS testload
1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1298 * Joern 1298 * Joern
diff --git a/fs/libfs.c b/fs/libfs.c
index 219576c52d80..6e8d17e1dc4c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -848,7 +848,6 @@ EXPORT_SYMBOL(simple_write_end);
848EXPORT_SYMBOL(simple_dir_inode_operations); 848EXPORT_SYMBOL(simple_dir_inode_operations);
849EXPORT_SYMBOL(simple_dir_operations); 849EXPORT_SYMBOL(simple_dir_operations);
850EXPORT_SYMBOL(simple_empty); 850EXPORT_SYMBOL(simple_empty);
851EXPORT_SYMBOL(d_alloc_name);
852EXPORT_SYMBOL(simple_fill_super); 851EXPORT_SYMBOL(simple_fill_super);
853EXPORT_SYMBOL(simple_getattr); 852EXPORT_SYMBOL(simple_getattr);
854EXPORT_SYMBOL(simple_link); 853EXPORT_SYMBOL(simple_link);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bd173a6ca3b1..a7966eed3c17 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e1d28ddd2169..56c9519d900a 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/namei.c b/fs/namei.c
index 87f97ba90ad1..dad4b80257db 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,8 @@
35#include <linux/fs_struct.h> 35#include <linux/fs_struct.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38#include "internal.h"
39
38#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 40#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
39 41
40/* [Feb-1997 T. Schoebel-Theuer] 42/* [Feb-1997 T. Schoebel-Theuer]
@@ -108,8 +110,6 @@
108 * any extra contention... 110 * any extra contention...
109 */ 111 */
110 112
111static int __link_path_walk(const char *name, struct nameidata *nd);
112
113/* In order to reduce some races, while at the same time doing additional 113/* In order to reduce some races, while at the same time doing additional
114 * checking and hopefully speeding things up, we copy filenames to the 114 * checking and hopefully speeding things up, we copy filenames to the
115 * kernel data space before using them.. 115 * kernel data space before using them..
@@ -414,36 +414,55 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
414} 414}
415 415
416/* 416/*
417 * Internal lookup() using the new generic dcache. 417 * force_reval_path - force revalidation of a dentry
418 * SMP-safe 418 *
419 * In some situations the path walking code will trust dentries without
420 * revalidating them. This causes problems for filesystems that depend on
421 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
422 * (which indicates that it's possible for the dentry to go stale), force
423 * a d_revalidate call before proceeding.
424 *
425 * Returns 0 if the revalidation was successful. If the revalidation fails,
426 * either return the error returned by d_revalidate or -ESTALE if the
427 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
428 * invalidate the dentry. It's up to the caller to handle putting references
429 * to the path if necessary.
419 */ 430 */
420static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 431static int
432force_reval_path(struct path *path, struct nameidata *nd)
421{ 433{
422 struct dentry * dentry = __d_lookup(parent, name); 434 int status;
435 struct dentry *dentry = path->dentry;
423 436
424 /* lockess __d_lookup may fail due to concurrent d_move() 437 /*
425 * in some unrelated directory, so try with d_lookup 438 * only check on filesystems where it's possible for the dentry to
439 * become stale. It's assumed that if this flag is set then the
440 * d_revalidate op will also be defined.
426 */ 441 */
427 if (!dentry) 442 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
428 dentry = d_lookup(parent, name); 443 return 0;
429 444
430 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 445 status = dentry->d_op->d_revalidate(dentry, nd);
431 dentry = do_revalidate(dentry, nd); 446 if (status > 0)
447 return 0;
432 448
433 return dentry; 449 if (!status) {
450 d_invalidate(dentry);
451 status = -ESTALE;
452 }
453 return status;
434} 454}
435 455
436/* 456/*
437 * Short-cut version of permission(), for calling by 457 * Short-cut version of permission(), for calling on directories
438 * path_walk(), when dcache lock is held. Combines parts 458 * during pathname resolution. Combines parts of permission()
439 * of permission() and generic_permission(), and tests ONLY for 459 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
440 * MAY_EXEC permission.
441 * 460 *
442 * If appropriate, check DAC only. If not appropriate, or 461 * If appropriate, check DAC only. If not appropriate, or
443 * short-cut DAC fails, then call permission() to do more 462 * short-cut DAC fails, then call ->permission() to do more
444 * complete permission check. 463 * complete permission check.
445 */ 464 */
446static int exec_permission_lite(struct inode *inode) 465static int exec_permission(struct inode *inode)
447{ 466{
448 int ret; 467 int ret;
449 468
@@ -465,99 +484,6 @@ ok:
465 return security_inode_permission(inode, MAY_EXEC); 484 return security_inode_permission(inode, MAY_EXEC);
466} 485}
467 486
468/*
469 * This is called when everything else fails, and we actually have
470 * to go to the low-level filesystem to find out what we should do..
471 *
472 * We get the directory semaphore, and after getting that we also
473 * make sure that nobody added the entry to the dcache in the meantime..
474 * SMP-safe
475 */
476static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
477{
478 struct dentry * result;
479 struct inode *dir = parent->d_inode;
480
481 mutex_lock(&dir->i_mutex);
482 /*
483 * First re-do the cached lookup just in case it was created
484 * while we waited for the directory semaphore..
485 *
486 * FIXME! This could use version numbering or similar to
487 * avoid unnecessary cache lookups.
488 *
489 * The "dcache_lock" is purely to protect the RCU list walker
490 * from concurrent renames at this point (we mustn't get false
491 * negatives from the RCU list walk here, unlike the optimistic
492 * fast walk).
493 *
494 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
495 */
496 result = d_lookup(parent, name);
497 if (!result) {
498 struct dentry *dentry;
499
500 /* Don't create child dentry for a dead directory. */
501 result = ERR_PTR(-ENOENT);
502 if (IS_DEADDIR(dir))
503 goto out_unlock;
504
505 dentry = d_alloc(parent, name);
506 result = ERR_PTR(-ENOMEM);
507 if (dentry) {
508 result = dir->i_op->lookup(dir, dentry, nd);
509 if (result)
510 dput(dentry);
511 else
512 result = dentry;
513 }
514out_unlock:
515 mutex_unlock(&dir->i_mutex);
516 return result;
517 }
518
519 /*
520 * Uhhuh! Nasty case: the cache was re-populated while
521 * we waited on the semaphore. Need to revalidate.
522 */
523 mutex_unlock(&dir->i_mutex);
524 if (result->d_op && result->d_op->d_revalidate) {
525 result = do_revalidate(result, nd);
526 if (!result)
527 result = ERR_PTR(-ENOENT);
528 }
529 return result;
530}
531
532/*
533 * Wrapper to retry pathname resolution whenever the underlying
534 * file system returns an ESTALE.
535 *
536 * Retry the whole path once, forcing real lookup requests
537 * instead of relying on the dcache.
538 */
539static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
540{
541 struct path save = nd->path;
542 int result;
543
544 /* make sure the stuff we saved doesn't go away */
545 path_get(&save);
546
547 result = __link_path_walk(name, nd);
548 if (result == -ESTALE) {
549 /* nd->path had been dropped */
550 nd->path = save;
551 path_get(&nd->path);
552 nd->flags |= LOOKUP_REVAL;
553 result = __link_path_walk(name, nd);
554 }
555
556 path_put(&save);
557
558 return result;
559}
560
561static __always_inline void set_root(struct nameidata *nd) 487static __always_inline void set_root(struct nameidata *nd)
562{ 488{
563 if (!nd->root.mnt) { 489 if (!nd->root.mnt) {
@@ -569,6 +495,8 @@ static __always_inline void set_root(struct nameidata *nd)
569 } 495 }
570} 496}
571 497
498static int link_path_walk(const char *, struct nameidata *);
499
572static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 500static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
573{ 501{
574 int res = 0; 502 int res = 0;
@@ -641,11 +569,14 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
641 error = 0; 569 error = 0;
642 if (s) 570 if (s)
643 error = __vfs_follow_link(nd, s); 571 error = __vfs_follow_link(nd, s);
572 else if (nd->last_type == LAST_BIND) {
573 error = force_reval_path(&nd->path, nd);
574 if (error)
575 path_put(&nd->path);
576 }
644 if (dentry->d_inode->i_op->put_link) 577 if (dentry->d_inode->i_op->put_link)
645 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 578 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
646 } 579 }
647 path_put(path);
648
649 return error; 580 return error;
650} 581}
651 582
@@ -672,6 +603,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
672 current->total_link_count++; 603 current->total_link_count++;
673 nd->depth++; 604 nd->depth++;
674 err = __do_follow_link(path, nd); 605 err = __do_follow_link(path, nd);
606 path_put(path);
675 current->link_count--; 607 current->link_count--;
676 nd->depth--; 608 nd->depth--;
677 return err; 609 return err;
@@ -797,8 +729,19 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
797 struct path *path) 729 struct path *path)
798{ 730{
799 struct vfsmount *mnt = nd->path.mnt; 731 struct vfsmount *mnt = nd->path.mnt;
800 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 732 struct dentry *dentry, *parent;
733 struct inode *dir;
734 /*
735 * See if the low-level filesystem might want
736 * to use its own hash..
737 */
738 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
739 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
740 if (err < 0)
741 return err;
742 }
801 743
744 dentry = __d_lookup(nd->path.dentry, name);
802 if (!dentry) 745 if (!dentry)
803 goto need_lookup; 746 goto need_lookup;
804 if (dentry->d_op && dentry->d_op->d_revalidate) 747 if (dentry->d_op && dentry->d_op->d_revalidate)
@@ -810,7 +753,59 @@ done:
810 return 0; 753 return 0;
811 754
812need_lookup: 755need_lookup:
813 dentry = real_lookup(nd->path.dentry, name, nd); 756 parent = nd->path.dentry;
757 dir = parent->d_inode;
758
759 mutex_lock(&dir->i_mutex);
760 /*
761 * First re-do the cached lookup just in case it was created
762 * while we waited for the directory semaphore..
763 *
764 * FIXME! This could use version numbering or similar to
765 * avoid unnecessary cache lookups.
766 *
767 * The "dcache_lock" is purely to protect the RCU list walker
768 * from concurrent renames at this point (we mustn't get false
769 * negatives from the RCU list walk here, unlike the optimistic
770 * fast walk).
771 *
772 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
773 */
774 dentry = d_lookup(parent, name);
775 if (!dentry) {
776 struct dentry *new;
777
778 /* Don't create child dentry for a dead directory. */
779 dentry = ERR_PTR(-ENOENT);
780 if (IS_DEADDIR(dir))
781 goto out_unlock;
782
783 new = d_alloc(parent, name);
784 dentry = ERR_PTR(-ENOMEM);
785 if (new) {
786 dentry = dir->i_op->lookup(dir, new, nd);
787 if (dentry)
788 dput(new);
789 else
790 dentry = new;
791 }
792out_unlock:
793 mutex_unlock(&dir->i_mutex);
794 if (IS_ERR(dentry))
795 goto fail;
796 goto done;
797 }
798
799 /*
800 * Uhhuh! Nasty case: the cache was re-populated while
801 * we waited on the semaphore. Need to revalidate.
802 */
803 mutex_unlock(&dir->i_mutex);
804 if (dentry->d_op && dentry->d_op->d_revalidate) {
805 dentry = do_revalidate(dentry, nd);
806 if (!dentry)
807 dentry = ERR_PTR(-ENOENT);
808 }
814 if (IS_ERR(dentry)) 809 if (IS_ERR(dentry))
815 goto fail; 810 goto fail;
816 goto done; 811 goto done;
@@ -835,7 +830,7 @@ fail:
835 * Returns 0 and nd will have valid dentry and mnt on success. 830 * Returns 0 and nd will have valid dentry and mnt on success.
836 * Returns error and drops reference to input namei data on failure. 831 * Returns error and drops reference to input namei data on failure.
837 */ 832 */
838static int __link_path_walk(const char *name, struct nameidata *nd) 833static int link_path_walk(const char *name, struct nameidata *nd)
839{ 834{
840 struct path next; 835 struct path next;
841 struct inode *inode; 836 struct inode *inode;
@@ -858,7 +853,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
858 unsigned int c; 853 unsigned int c;
859 854
860 nd->flags |= LOOKUP_CONTINUE; 855 nd->flags |= LOOKUP_CONTINUE;
861 err = exec_permission_lite(inode); 856 err = exec_permission(inode);
862 if (err) 857 if (err)
863 break; 858 break;
864 859
@@ -898,16 +893,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
898 case 1: 893 case 1:
899 continue; 894 continue;
900 } 895 }
901 /*
902 * See if the low-level filesystem might want
903 * to use its own hash..
904 */
905 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
906 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
907 &this);
908 if (err < 0)
909 break;
910 }
911 /* This does the actual lookups.. */ 896 /* This does the actual lookups.. */
912 err = do_lookup(nd, &this, &next); 897 err = do_lookup(nd, &this, &next);
913 if (err) 898 if (err)
@@ -953,12 +938,6 @@ last_component:
953 case 1: 938 case 1:
954 goto return_reval; 939 goto return_reval;
955 } 940 }
956 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
957 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
958 &this);
959 if (err < 0)
960 break;
961 }
962 err = do_lookup(nd, &this, &next); 941 err = do_lookup(nd, &this, &next);
963 if (err) 942 if (err)
964 break; 943 break;
@@ -1017,8 +996,27 @@ return_err:
1017 996
1018static int path_walk(const char *name, struct nameidata *nd) 997static int path_walk(const char *name, struct nameidata *nd)
1019{ 998{
999 struct path save = nd->path;
1000 int result;
1001
1020 current->total_link_count = 0; 1002 current->total_link_count = 0;
1021 return link_path_walk(name, nd); 1003
1004 /* make sure the stuff we saved doesn't go away */
1005 path_get(&save);
1006
1007 result = link_path_walk(name, nd);
1008 if (result == -ESTALE) {
1009 /* nd->path had been dropped */
1010 current->total_link_count = 0;
1011 nd->path = save;
1012 path_get(&nd->path);
1013 nd->flags |= LOOKUP_REVAL;
1014 result = link_path_walk(name, nd);
1015 }
1016
1017 path_put(&save);
1018
1019 return result;
1022} 1020}
1023 1021
1024static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1022static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
@@ -1141,36 +1139,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1141 return retval; 1139 return retval;
1142} 1140}
1143 1141
1144/**
1145 * path_lookup_open - lookup a file path with open intent
1146 * @dfd: the directory to use as base, or AT_FDCWD
1147 * @name: pointer to file name
1148 * @lookup_flags: lookup intent flags
1149 * @nd: pointer to nameidata
1150 * @open_flags: open intent flags
1151 */
1152static int path_lookup_open(int dfd, const char *name,
1153 unsigned int lookup_flags, struct nameidata *nd, int open_flags)
1154{
1155 struct file *filp = get_empty_filp();
1156 int err;
1157
1158 if (filp == NULL)
1159 return -ENFILE;
1160 nd->intent.open.file = filp;
1161 nd->intent.open.flags = open_flags;
1162 nd->intent.open.create_mode = 0;
1163 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1164 if (IS_ERR(nd->intent.open.file)) {
1165 if (err == 0) {
1166 err = PTR_ERR(nd->intent.open.file);
1167 path_put(&nd->path);
1168 }
1169 } else if (err != 0)
1170 release_open_intent(nd);
1171 return err;
1172}
1173
1174static struct dentry *__lookup_hash(struct qstr *name, 1142static struct dentry *__lookup_hash(struct qstr *name,
1175 struct dentry *base, struct nameidata *nd) 1143 struct dentry *base, struct nameidata *nd)
1176{ 1144{
@@ -1191,7 +1159,17 @@ static struct dentry *__lookup_hash(struct qstr *name,
1191 goto out; 1159 goto out;
1192 } 1160 }
1193 1161
1194 dentry = cached_lookup(base, name, nd); 1162 dentry = __d_lookup(base, name);
1163
1164 /* lockess __d_lookup may fail due to concurrent d_move()
1165 * in some unrelated directory, so try with d_lookup
1166 */
1167 if (!dentry)
1168 dentry = d_lookup(base, name);
1169
1170 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1171 dentry = do_revalidate(dentry, nd);
1172
1195 if (!dentry) { 1173 if (!dentry) {
1196 struct dentry *new; 1174 struct dentry *new;
1197 1175
@@ -1223,7 +1201,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1223{ 1201{
1224 int err; 1202 int err;
1225 1203
1226 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); 1204 err = exec_permission(nd->path.dentry->d_inode);
1227 if (err) 1205 if (err)
1228 return ERR_PTR(err); 1206 return ERR_PTR(err);
1229 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1207 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1273,7 +1251,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1273 if (err) 1251 if (err)
1274 return ERR_PTR(err); 1252 return ERR_PTR(err);
1275 1253
1276 err = inode_permission(base->d_inode, MAY_EXEC); 1254 err = exec_permission(base->d_inode);
1277 if (err) 1255 if (err)
1278 return ERR_PTR(err); 1256 return ERR_PTR(err);
1279 return __lookup_hash(&this, base, NULL); 1257 return __lookup_hash(&this, base, NULL);
@@ -1511,69 +1489,45 @@ int may_open(struct path *path, int acc_mode, int flag)
1511 if (error) 1489 if (error)
1512 return error; 1490 return error;
1513 1491
1514 error = ima_path_check(path, acc_mode ?
1515 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1516 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1517 IMA_COUNT_UPDATE);
1518
1519 if (error)
1520 return error;
1521 /* 1492 /*
1522 * An append-only file must be opened in append mode for writing. 1493 * An append-only file must be opened in append mode for writing.
1523 */ 1494 */
1524 if (IS_APPEND(inode)) { 1495 if (IS_APPEND(inode)) {
1525 error = -EPERM;
1526 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1496 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1527 goto err_out; 1497 return -EPERM;
1528 if (flag & O_TRUNC) 1498 if (flag & O_TRUNC)
1529 goto err_out; 1499 return -EPERM;
1530 } 1500 }
1531 1501
1532 /* O_NOATIME can only be set by the owner or superuser */ 1502 /* O_NOATIME can only be set by the owner or superuser */
1533 if (flag & O_NOATIME) 1503 if (flag & O_NOATIME && !is_owner_or_cap(inode))
1534 if (!is_owner_or_cap(inode)) { 1504 return -EPERM;
1535 error = -EPERM;
1536 goto err_out;
1537 }
1538 1505
1539 /* 1506 /*
1540 * Ensure there are no outstanding leases on the file. 1507 * Ensure there are no outstanding leases on the file.
1541 */ 1508 */
1542 error = break_lease(inode, flag); 1509 return break_lease(inode, flag);
1543 if (error) 1510}
1544 goto err_out;
1545
1546 if (flag & O_TRUNC) {
1547 error = get_write_access(inode);
1548 if (error)
1549 goto err_out;
1550
1551 /*
1552 * Refuse to truncate files with mandatory locks held on them.
1553 */
1554 error = locks_verify_locked(inode);
1555 if (!error)
1556 error = security_path_truncate(path, 0,
1557 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1558 if (!error) {
1559 vfs_dq_init(inode);
1560
1561 error = do_truncate(dentry, 0,
1562 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1563 NULL);
1564 }
1565 put_write_access(inode);
1566 if (error)
1567 goto err_out;
1568 } else
1569 if (flag & FMODE_WRITE)
1570 vfs_dq_init(inode);
1571 1511
1572 return 0; 1512static int handle_truncate(struct path *path)
1573err_out: 1513{
1574 ima_counts_put(path, acc_mode ? 1514 struct inode *inode = path->dentry->d_inode;
1575 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : 1515 int error = get_write_access(inode);
1576 ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); 1516 if (error)
1517 return error;
1518 /*
1519 * Refuse to truncate files with mandatory locks held on them.
1520 */
1521 error = locks_verify_locked(inode);
1522 if (!error)
1523 error = security_path_truncate(path, 0,
1524 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1525 if (!error) {
1526 error = do_truncate(path->dentry, 0,
1527 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1528 NULL);
1529 }
1530 put_write_access(inode);
1577 return error; 1531 return error;
1578} 1532}
1579 1533
@@ -1628,7 +1582,7 @@ static inline int open_to_namei_flags(int flag)
1628 return flag; 1582 return flag;
1629} 1583}
1630 1584
1631static int open_will_write_to_fs(int flag, struct inode *inode) 1585static int open_will_truncate(int flag, struct inode *inode)
1632{ 1586{
1633 /* 1587 /*
1634 * We'll never write to the fs underlying 1588 * We'll never write to the fs underlying
@@ -1650,10 +1604,10 @@ struct file *do_filp_open(int dfd, const char *pathname,
1650 struct file *filp; 1604 struct file *filp;
1651 struct nameidata nd; 1605 struct nameidata nd;
1652 int error; 1606 int error;
1653 struct path path; 1607 struct path path, save;
1654 struct dentry *dir; 1608 struct dentry *dir;
1655 int count = 0; 1609 int count = 0;
1656 int will_write; 1610 int will_truncate;
1657 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1658 1612
1659 /* 1613 /*
@@ -1681,8 +1635,22 @@ struct file *do_filp_open(int dfd, const char *pathname,
1681 * The simplest case - just a plain lookup. 1635 * The simplest case - just a plain lookup.
1682 */ 1636 */
1683 if (!(flag & O_CREAT)) { 1637 if (!(flag & O_CREAT)) {
1684 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1638 filp = get_empty_filp();
1685 &nd, flag); 1639
1640 if (filp == NULL)
1641 return ERR_PTR(-ENFILE);
1642 nd.intent.open.file = filp;
1643 nd.intent.open.flags = flag;
1644 nd.intent.open.create_mode = 0;
1645 error = do_path_lookup(dfd, pathname,
1646 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1647 if (IS_ERR(nd.intent.open.file)) {
1648 if (error == 0) {
1649 error = PTR_ERR(nd.intent.open.file);
1650 path_put(&nd.path);
1651 }
1652 } else if (error)
1653 release_open_intent(&nd);
1686 if (error) 1654 if (error)
1687 return ERR_PTR(error); 1655 return ERR_PTR(error);
1688 goto ok; 1656 goto ok;
@@ -1758,13 +1726,17 @@ do_last:
1758 goto exit; 1726 goto exit;
1759 } 1727 }
1760 filp = nameidata_to_filp(&nd, open_flag); 1728 filp = nameidata_to_filp(&nd, open_flag);
1761 if (IS_ERR(filp))
1762 ima_counts_put(&nd.path,
1763 acc_mode & (MAY_READ | MAY_WRITE |
1764 MAY_EXEC));
1765 mnt_drop_write(nd.path.mnt); 1729 mnt_drop_write(nd.path.mnt);
1766 if (nd.root.mnt) 1730 if (nd.root.mnt)
1767 path_put(&nd.root); 1731 path_put(&nd.root);
1732 if (!IS_ERR(filp)) {
1733 error = ima_path_check(&filp->f_path, filp->f_mode &
1734 (MAY_READ | MAY_WRITE | MAY_EXEC));
1735 if (error) {
1736 fput(filp);
1737 filp = ERR_PTR(error);
1738 }
1739 }
1768 return filp; 1740 return filp;
1769 } 1741 }
1770 1742
@@ -1792,7 +1764,7 @@ do_last:
1792 1764
1793 path_to_nameidata(&path, &nd); 1765 path_to_nameidata(&path, &nd);
1794 error = -EISDIR; 1766 error = -EISDIR;
1795 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1767 if (S_ISDIR(path.dentry->d_inode->i_mode))
1796 goto exit; 1768 goto exit;
1797ok: 1769ok:
1798 /* 1770 /*
@@ -1805,28 +1777,45 @@ ok:
1805 * be avoided. Taking this mnt write here 1777 * be avoided. Taking this mnt write here
1806 * ensures that (2) can not occur. 1778 * ensures that (2) can not occur.
1807 */ 1779 */
1808 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); 1780 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
1809 if (will_write) { 1781 if (will_truncate) {
1810 error = mnt_want_write(nd.path.mnt); 1782 error = mnt_want_write(nd.path.mnt);
1811 if (error) 1783 if (error)
1812 goto exit; 1784 goto exit;
1813 } 1785 }
1814 error = may_open(&nd.path, acc_mode, flag); 1786 error = may_open(&nd.path, acc_mode, flag);
1815 if (error) { 1787 if (error) {
1816 if (will_write) 1788 if (will_truncate)
1817 mnt_drop_write(nd.path.mnt); 1789 mnt_drop_write(nd.path.mnt);
1818 goto exit; 1790 goto exit;
1819 } 1791 }
1820 filp = nameidata_to_filp(&nd, open_flag); 1792 filp = nameidata_to_filp(&nd, open_flag);
1821 if (IS_ERR(filp)) 1793 if (!IS_ERR(filp)) {
1822 ima_counts_put(&nd.path, 1794 error = ima_path_check(&filp->f_path, filp->f_mode &
1823 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); 1795 (MAY_READ | MAY_WRITE | MAY_EXEC));
1796 if (error) {
1797 fput(filp);
1798 filp = ERR_PTR(error);
1799 }
1800 }
1801 if (!IS_ERR(filp)) {
1802 if (acc_mode & MAY_WRITE)
1803 vfs_dq_init(nd.path.dentry->d_inode);
1804
1805 if (will_truncate) {
1806 error = handle_truncate(&nd.path);
1807 if (error) {
1808 fput(filp);
1809 filp = ERR_PTR(error);
1810 }
1811 }
1812 }
1824 /* 1813 /*
1825 * It is now safe to drop the mnt write 1814 * It is now safe to drop the mnt write
1826 * because the filp has had a write taken 1815 * because the filp has had a write taken
1827 * on its behalf. 1816 * on its behalf.
1828 */ 1817 */
1829 if (will_write) 1818 if (will_truncate)
1830 mnt_drop_write(nd.path.mnt); 1819 mnt_drop_write(nd.path.mnt);
1831 if (nd.root.mnt) 1820 if (nd.root.mnt)
1832 path_put(&nd.root); 1821 path_put(&nd.root);
@@ -1863,7 +1852,18 @@ do_link:
1863 error = security_inode_follow_link(path.dentry, &nd); 1852 error = security_inode_follow_link(path.dentry, &nd);
1864 if (error) 1853 if (error)
1865 goto exit_dput; 1854 goto exit_dput;
1855 save = nd.path;
1856 path_get(&save);
1866 error = __do_follow_link(&path, &nd); 1857 error = __do_follow_link(&path, &nd);
1858 if (error == -ESTALE) {
1859 /* nd.path had been dropped */
1860 nd.path = save;
1861 path_get(&nd.path);
1862 nd.flags |= LOOKUP_REVAL;
1863 error = __do_follow_link(&path, &nd);
1864 }
1865 path_put(&save);
1866 path_put(&path);
1867 if (error) { 1867 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1868 /* Does someone understand code flow here? Or it is only
1869 * me so stupid? Anathema to whoever designed this non-sense 1869 * me so stupid? Anathema to whoever designed this non-sense
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5af..59e5673b4597 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
90 If you want your system to mount its root file system via NFS, 90 If you want your system to mount its root file system via NFS,
91 choose Y here. This is common practice for managing systems 91 choose Y here. This is common practice for managing systems
92 without local permanent storage. For details, read 92 without local permanent storage. For details, read
93 <file:Documentation/filesystems/nfsroot.txt>. 93 <file:Documentation/filesystems/nfs/nfsroot.txt>.
94 94
95 Most people say N here. 95 Most people say N here.
96 96
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7e57b04e4014..865265bdca03 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -108,6 +108,10 @@ enum {
108 NFS_OWNER_RECLAIM_NOGRACE 108 NFS_OWNER_RECLAIM_NOGRACE
109}; 109};
110 110
111#define NFS_LOCK_NEW 0
112#define NFS_LOCK_RECLAIM 1
113#define NFS_LOCK_EXPIRED 2
114
111/* 115/*
112 * struct nfs4_state maintains the client-side state for a given 116 * struct nfs4_state maintains the client-side state for a given
113 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). 117 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -282,6 +286,7 @@ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
282extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 286extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
283extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); 287extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
284extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 288extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
289extern void nfs_release_seqid(struct nfs_seqid *seqid);
285extern void nfs_free_seqid(struct nfs_seqid *seqid); 290extern void nfs_free_seqid(struct nfs_seqid *seqid);
286 291
287extern const nfs4_stateid zero_stateid; 292extern const nfs4_stateid zero_stateid;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9f5f11ecfd93..198d51d17c13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64 64
65struct nfs4_opendata; 65struct nfs4_opendata;
66static int _nfs4_proc_open(struct nfs4_opendata *data); 66static int _nfs4_proc_open(struct nfs4_opendata *data);
67static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
67static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
68static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 69static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
69static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 70static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
@@ -341,6 +342,27 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
341 free_slotid, tbl->highest_used_slotid); 342 free_slotid, tbl->highest_used_slotid);
342} 343}
343 344
345/*
346 * Signal state manager thread if session is drained
347 */
348static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
349{
350 struct rpc_task *task;
351
352 if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
353 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
354 if (task)
355 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
356 return;
357 }
358
359 if (ses->fc_slot_table.highest_used_slotid != -1)
360 return;
361
362 dprintk("%s COMPLETE: Session Drained\n", __func__);
363 complete(&ses->complete);
364}
365
344static void nfs41_sequence_free_slot(const struct nfs_client *clp, 366static void nfs41_sequence_free_slot(const struct nfs_client *clp,
345 struct nfs4_sequence_res *res) 367 struct nfs4_sequence_res *res)
346{ 368{
@@ -356,15 +378,7 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp,
356 378
357 spin_lock(&tbl->slot_tbl_lock); 379 spin_lock(&tbl->slot_tbl_lock);
358 nfs4_free_slot(tbl, res->sr_slotid); 380 nfs4_free_slot(tbl, res->sr_slotid);
359 381 nfs41_check_drain_session_complete(clp->cl_session);
360 /* Signal state manager thread if session is drained */
361 if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
362 if (tbl->highest_used_slotid == -1) {
363 dprintk("%s COMPLETE: Session Drained\n", __func__);
364 complete(&clp->cl_session->complete);
365 }
366 } else
367 rpc_wake_up_next(&tbl->slot_tbl_waitq);
368 spin_unlock(&tbl->slot_tbl_lock); 382 spin_unlock(&tbl->slot_tbl_lock);
369 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 383 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
370} 384}
@@ -421,7 +435,7 @@ out:
421 * Note: must be called with under the slot_tbl_lock. 435 * Note: must be called with under the slot_tbl_lock.
422 */ 436 */
423static u8 437static u8
424nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) 438nfs4_find_slot(struct nfs4_slot_table *tbl)
425{ 439{
426 int slotid; 440 int slotid;
427 u8 ret_id = NFS4_MAX_SLOT_TABLE; 441 u8 ret_id = NFS4_MAX_SLOT_TABLE;
@@ -463,7 +477,8 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
463 tbl = &session->fc_slot_table; 477 tbl = &session->fc_slot_table;
464 478
465 spin_lock(&tbl->slot_tbl_lock); 479 spin_lock(&tbl->slot_tbl_lock);
466 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state)) { 480 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
481 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
467 /* 482 /*
468 * The state manager will wait until the slot table is empty. 483 * The state manager will wait until the slot table is empty.
469 * Schedule the reset thread 484 * Schedule the reset thread
@@ -474,7 +489,15 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
474 return -EAGAIN; 489 return -EAGAIN;
475 } 490 }
476 491
477 slotid = nfs4_find_slot(tbl, task); 492 if (!rpc_queue_empty(&tbl->slot_tbl_waitq) &&
493 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
494 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
495 spin_unlock(&tbl->slot_tbl_lock);
496 dprintk("%s enforce FIFO order\n", __func__);
497 return -EAGAIN;
498 }
499
500 slotid = nfs4_find_slot(tbl);
478 if (slotid == NFS4_MAX_SLOT_TABLE) { 501 if (slotid == NFS4_MAX_SLOT_TABLE) {
479 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 502 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
480 spin_unlock(&tbl->slot_tbl_lock); 503 spin_unlock(&tbl->slot_tbl_lock);
@@ -483,6 +506,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
483 } 506 }
484 spin_unlock(&tbl->slot_tbl_lock); 507 spin_unlock(&tbl->slot_tbl_lock);
485 508
509 rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
486 slot = tbl->slots + slotid; 510 slot = tbl->slots + slotid;
487 args->sa_session = session; 511 args->sa_session = session;
488 args->sa_slotid = slotid; 512 args->sa_slotid = slotid;
@@ -545,6 +569,12 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
545 rpc_call_start(task); 569 rpc_call_start(task);
546} 570}
547 571
572static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
573{
574 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
575 nfs41_call_sync_prepare(task, calldata);
576}
577
548static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) 578static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
549{ 579{
550 struct nfs41_call_sync_data *data = calldata; 580 struct nfs41_call_sync_data *data = calldata;
@@ -557,12 +587,18 @@ struct rpc_call_ops nfs41_call_sync_ops = {
557 .rpc_call_done = nfs41_call_sync_done, 587 .rpc_call_done = nfs41_call_sync_done,
558}; 588};
559 589
590struct rpc_call_ops nfs41_call_priv_sync_ops = {
591 .rpc_call_prepare = nfs41_call_priv_sync_prepare,
592 .rpc_call_done = nfs41_call_sync_done,
593};
594
560static int nfs4_call_sync_sequence(struct nfs_client *clp, 595static int nfs4_call_sync_sequence(struct nfs_client *clp,
561 struct rpc_clnt *clnt, 596 struct rpc_clnt *clnt,
562 struct rpc_message *msg, 597 struct rpc_message *msg,
563 struct nfs4_sequence_args *args, 598 struct nfs4_sequence_args *args,
564 struct nfs4_sequence_res *res, 599 struct nfs4_sequence_res *res,
565 int cache_reply) 600 int cache_reply,
601 int privileged)
566{ 602{
567 int ret; 603 int ret;
568 struct rpc_task *task; 604 struct rpc_task *task;
@@ -580,6 +616,8 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
580 }; 616 };
581 617
582 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 618 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
619 if (privileged)
620 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
583 task = rpc_run_task(&task_setup); 621 task = rpc_run_task(&task_setup);
584 if (IS_ERR(task)) 622 if (IS_ERR(task))
585 ret = PTR_ERR(task); 623 ret = PTR_ERR(task);
@@ -597,7 +635,7 @@ int _nfs4_call_sync_session(struct nfs_server *server,
597 int cache_reply) 635 int cache_reply)
598{ 636{
599 return nfs4_call_sync_sequence(server->nfs_client, server->client, 637 return nfs4_call_sync_sequence(server->nfs_client, server->client,
600 msg, args, res, cache_reply); 638 msg, args, res, cache_reply, 0);
601} 639}
602 640
603#endif /* CONFIG_NFS_V4_1 */ 641#endif /* CONFIG_NFS_V4_1 */
@@ -1035,7 +1073,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1035 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 1073 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1036 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 1074 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1037 nfs4_init_opendata_res(opendata); 1075 nfs4_init_opendata_res(opendata);
1038 ret = _nfs4_proc_open(opendata); 1076 ret = _nfs4_recover_proc_open(opendata);
1039 if (ret != 0) 1077 if (ret != 0)
1040 return ret; 1078 return ret;
1041 newstate = nfs4_opendata_to_nfs4_state(opendata); 1079 newstate = nfs4_opendata_to_nfs4_state(opendata);
@@ -1326,6 +1364,12 @@ out_no_action:
1326 1364
1327} 1365}
1328 1366
1367static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
1368{
1369 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
1370 nfs4_open_prepare(task, calldata);
1371}
1372
1329static void nfs4_open_done(struct rpc_task *task, void *calldata) 1373static void nfs4_open_done(struct rpc_task *task, void *calldata)
1330{ 1374{
1331 struct nfs4_opendata *data = calldata; 1375 struct nfs4_opendata *data = calldata;
@@ -1384,10 +1428,13 @@ static const struct rpc_call_ops nfs4_open_ops = {
1384 .rpc_release = nfs4_open_release, 1428 .rpc_release = nfs4_open_release,
1385}; 1429};
1386 1430
1387/* 1431static const struct rpc_call_ops nfs4_recover_open_ops = {
1388 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata 1432 .rpc_call_prepare = nfs4_recover_open_prepare,
1389 */ 1433 .rpc_call_done = nfs4_open_done,
1390static int _nfs4_proc_open(struct nfs4_opendata *data) 1434 .rpc_release = nfs4_open_release,
1435};
1436
1437static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
1391{ 1438{
1392 struct inode *dir = data->dir->d_inode; 1439 struct inode *dir = data->dir->d_inode;
1393 struct nfs_server *server = NFS_SERVER(dir); 1440 struct nfs_server *server = NFS_SERVER(dir);
@@ -1414,21 +1461,57 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1414 data->rpc_done = 0; 1461 data->rpc_done = 0;
1415 data->rpc_status = 0; 1462 data->rpc_status = 0;
1416 data->cancelled = 0; 1463 data->cancelled = 0;
1464 if (isrecover)
1465 task_setup_data.callback_ops = &nfs4_recover_open_ops;
1417 task = rpc_run_task(&task_setup_data); 1466 task = rpc_run_task(&task_setup_data);
1418 if (IS_ERR(task)) 1467 if (IS_ERR(task))
1419 return PTR_ERR(task); 1468 return PTR_ERR(task);
1420 status = nfs4_wait_for_completion_rpc_task(task); 1469 status = nfs4_wait_for_completion_rpc_task(task);
1421 if (status != 0) { 1470 if (status != 0) {
1422 data->cancelled = 1; 1471 data->cancelled = 1;
1423 smp_wmb(); 1472 smp_wmb();
1424 } else 1473 } else
1425 status = data->rpc_status; 1474 status = data->rpc_status;
1426 rpc_put_task(task); 1475 rpc_put_task(task);
1476
1477 return status;
1478}
1479
1480static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1481{
1482 struct inode *dir = data->dir->d_inode;
1483 struct nfs_openres *o_res = &data->o_res;
1484 int status;
1485
1486 status = nfs4_run_open_task(data, 1);
1427 if (status != 0 || !data->rpc_done) 1487 if (status != 0 || !data->rpc_done)
1428 return status; 1488 return status;
1429 1489
1430 if (o_res->fh.size == 0) 1490 nfs_refresh_inode(dir, o_res->dir_attr);
1431 _nfs4_proc_lookup(dir, o_arg->name, &o_res->fh, o_res->f_attr); 1491
1492 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1493 status = _nfs4_proc_open_confirm(data);
1494 if (status != 0)
1495 return status;
1496 }
1497
1498 return status;
1499}
1500
1501/*
1502 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
1503 */
1504static int _nfs4_proc_open(struct nfs4_opendata *data)
1505{
1506 struct inode *dir = data->dir->d_inode;
1507 struct nfs_server *server = NFS_SERVER(dir);
1508 struct nfs_openargs *o_arg = &data->o_arg;
1509 struct nfs_openres *o_res = &data->o_res;
1510 int status;
1511
1512 status = nfs4_run_open_task(data, 0);
1513 if (status != 0 || !data->rpc_done)
1514 return status;
1432 1515
1433 if (o_arg->open_flags & O_CREAT) { 1516 if (o_arg->open_flags & O_CREAT) {
1434 update_changeattr(dir, &o_res->cinfo); 1517 update_changeattr(dir, &o_res->cinfo);
@@ -1752,11 +1835,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1752 if (calldata->arg.fmode == 0) 1835 if (calldata->arg.fmode == 0)
1753 break; 1836 break;
1754 default: 1837 default:
1755 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { 1838 if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
1756 nfs_restart_rpc(task, server->nfs_client); 1839 rpc_restart_call_prepare(task);
1757 return;
1758 }
1759 } 1840 }
1841 nfs_release_seqid(calldata->arg.seqid);
1760 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 1842 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
1761} 1843}
1762 1844
@@ -1848,8 +1930,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1848 calldata->state = state; 1930 calldata->state = state;
1849 calldata->arg.fh = NFS_FH(state->inode); 1931 calldata->arg.fh = NFS_FH(state->inode);
1850 calldata->arg.stateid = &state->open_stateid; 1932 calldata->arg.stateid = &state->open_stateid;
1851 if (nfs4_has_session(server->nfs_client))
1852 memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */
1853 /* Serialization for the sequence id */ 1933 /* Serialization for the sequence id */
1854 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1934 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1855 if (calldata->arg.seqid == NULL) 1935 if (calldata->arg.seqid == NULL)
@@ -3941,6 +4021,12 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
3941 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4021 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
3942} 4022}
3943 4023
4024static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
4025{
4026 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4027 nfs4_lock_prepare(task, calldata);
4028}
4029
3944static void nfs4_lock_done(struct rpc_task *task, void *calldata) 4030static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3945{ 4031{
3946 struct nfs4_lockdata *data = calldata; 4032 struct nfs4_lockdata *data = calldata;
@@ -3996,7 +4082,13 @@ static const struct rpc_call_ops nfs4_lock_ops = {
3996 .rpc_release = nfs4_lock_release, 4082 .rpc_release = nfs4_lock_release,
3997}; 4083};
3998 4084
3999static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) 4085static const struct rpc_call_ops nfs4_recover_lock_ops = {
4086 .rpc_call_prepare = nfs4_recover_lock_prepare,
4087 .rpc_call_done = nfs4_lock_done,
4088 .rpc_release = nfs4_lock_release,
4089};
4090
4091static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
4000{ 4092{
4001 struct nfs4_lockdata *data; 4093 struct nfs4_lockdata *data;
4002 struct rpc_task *task; 4094 struct rpc_task *task;
@@ -4020,8 +4112,11 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4020 return -ENOMEM; 4112 return -ENOMEM;
4021 if (IS_SETLKW(cmd)) 4113 if (IS_SETLKW(cmd))
4022 data->arg.block = 1; 4114 data->arg.block = 1;
4023 if (reclaim != 0) 4115 if (recovery_type > NFS_LOCK_NEW) {
4024 data->arg.reclaim = 1; 4116 if (recovery_type == NFS_LOCK_RECLAIM)
4117 data->arg.reclaim = NFS_LOCK_RECLAIM;
4118 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4119 }
4025 msg.rpc_argp = &data->arg, 4120 msg.rpc_argp = &data->arg,
4026 msg.rpc_resp = &data->res, 4121 msg.rpc_resp = &data->res,
4027 task_setup_data.callback_data = data; 4122 task_setup_data.callback_data = data;
@@ -4048,7 +4143,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4048 /* Cache the lock if possible... */ 4143 /* Cache the lock if possible... */
4049 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4144 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4050 return 0; 4145 return 0;
4051 err = _nfs4_do_setlk(state, F_SETLK, request, 1); 4146 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4052 if (err != -NFS4ERR_DELAY) 4147 if (err != -NFS4ERR_DELAY)
4053 break; 4148 break;
4054 nfs4_handle_exception(server, err, &exception); 4149 nfs4_handle_exception(server, err, &exception);
@@ -4068,7 +4163,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4068 do { 4163 do {
4069 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4070 return 0; 4165 return 0;
4071 err = _nfs4_do_setlk(state, F_SETLK, request, 0); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
4072 switch (err) { 4167 switch (err) {
4073 default: 4168 default:
4074 goto out; 4169 goto out;
@@ -4104,7 +4199,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4104 status = do_vfs_lock(request->fl_file, request); 4199 status = do_vfs_lock(request->fl_file, request);
4105 goto out_unlock; 4200 goto out_unlock;
4106 } 4201 }
4107 status = _nfs4_do_setlk(state, cmd, request, 0); 4202 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4108 if (status != 0) 4203 if (status != 0)
4109 goto out_unlock; 4204 goto out_unlock;
4110 /* Note: we always want to sleep here! */ 4205 /* Note: we always want to sleep here! */
@@ -4187,7 +4282,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4187 if (err != 0) 4282 if (err != 0)
4188 goto out; 4283 goto out;
4189 do { 4284 do {
4190 err = _nfs4_do_setlk(state, F_SETLK, fl, 0); 4285 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4191 switch (err) { 4286 switch (err) {
4192 default: 4287 default:
4193 printk(KERN_ERR "%s: unhandled error %d.\n", 4288 printk(KERN_ERR "%s: unhandled error %d.\n",
@@ -4395,11 +4490,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4395 (struct nfs4_get_lease_time_data *)calldata; 4490 (struct nfs4_get_lease_time_data *)calldata;
4396 4491
4397 dprintk("--> %s\n", __func__); 4492 dprintk("--> %s\n", __func__);
4493 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4398 /* just setup sequence, do not trigger session recovery 4494 /* just setup sequence, do not trigger session recovery
4399 since we're invoked within one */ 4495 since we're invoked within one */
4400 ret = nfs41_setup_sequence(data->clp->cl_session, 4496 ret = nfs41_setup_sequence(data->clp->cl_session,
4401 &data->args->la_seq_args, 4497 &data->args->la_seq_args,
4402 &data->res->lr_seq_res, 0, task); 4498 &data->res->lr_seq_res, 0, task);
4403 4499
4404 BUG_ON(ret == -EAGAIN); 4500 BUG_ON(ret == -EAGAIN);
4405 rpc_call_start(task); 4501 rpc_call_start(task);
@@ -4619,7 +4715,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4619 tbl = &session->fc_slot_table; 4715 tbl = &session->fc_slot_table;
4620 tbl->highest_used_slotid = -1; 4716 tbl->highest_used_slotid = -1;
4621 spin_lock_init(&tbl->slot_tbl_lock); 4717 spin_lock_init(&tbl->slot_tbl_lock);
4622 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 4718 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4623 4719
4624 tbl = &session->bc_slot_table; 4720 tbl = &session->bc_slot_table;
4625 tbl->highest_used_slotid = -1; 4721 tbl->highest_used_slotid = -1;
@@ -4838,14 +4934,22 @@ int nfs4_init_session(struct nfs_server *server)
4838{ 4934{
4839 struct nfs_client *clp = server->nfs_client; 4935 struct nfs_client *clp = server->nfs_client;
4840 struct nfs4_session *session; 4936 struct nfs4_session *session;
4937 unsigned int rsize, wsize;
4841 int ret; 4938 int ret;
4842 4939
4843 if (!nfs4_has_session(clp)) 4940 if (!nfs4_has_session(clp))
4844 return 0; 4941 return 0;
4845 4942
4943 rsize = server->rsize;
4944 if (rsize == 0)
4945 rsize = NFS_MAX_FILE_IO_SIZE;
4946 wsize = server->wsize;
4947 if (wsize == 0)
4948 wsize = NFS_MAX_FILE_IO_SIZE;
4949
4846 session = clp->cl_session; 4950 session = clp->cl_session;
4847 session->fc_attrs.max_rqst_sz = server->wsize + nfs41_maxwrite_overhead; 4951 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
4848 session->fc_attrs.max_resp_sz = server->rsize + nfs41_maxread_overhead; 4952 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
4849 4953
4850 ret = nfs4_recover_expired_lease(server); 4954 ret = nfs4_recover_expired_lease(server);
4851 if (!ret) 4955 if (!ret)
@@ -4871,7 +4975,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
4871 args.sa_cache_this = 0; 4975 args.sa_cache_this = 0;
4872 4976
4873 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, 4977 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
4874 &res, 0); 4978 &res, args.sa_cache_this, 1);
4875} 4979}
4876 4980
4877void nfs41_sequence_call_done(struct rpc_task *task, void *data) 4981void nfs41_sequence_call_done(struct rpc_task *task, void *data)
@@ -4953,6 +5057,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
4953{ 5057{
4954 struct nfs4_reclaim_complete_data *calldata = data; 5058 struct nfs4_reclaim_complete_data *calldata = data;
4955 5059
5060 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4956 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, 5061 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
4957 &calldata->res.seq_res, 0, task)) 5062 &calldata->res.seq_res, 0, task))
4958 return; 5063 return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e76427e6346f..6d263ed79e92 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -135,16 +135,30 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
135 return status; 135 return status;
136} 136}
137 137
138static void nfs41_end_drain_session(struct nfs_client *clp, 138static void nfs4_end_drain_session(struct nfs_client *clp)
139 struct nfs4_session *ses)
140{ 139{
141 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) 140 struct nfs4_session *ses = clp->cl_session;
142 rpc_wake_up(&ses->fc_slot_table.slot_tbl_waitq); 141 int max_slots;
142
143 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
144 spin_lock(&ses->fc_slot_table.slot_tbl_lock);
145 max_slots = ses->fc_slot_table.max_slots;
146 while (max_slots--) {
147 struct rpc_task *task;
148
149 task = rpc_wake_up_next(&ses->fc_slot_table.
150 slot_tbl_waitq);
151 if (!task)
152 break;
153 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
154 }
155 spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
156 }
143} 157}
144 158
145static int nfs41_begin_drain_session(struct nfs_client *clp, 159static int nfs4_begin_drain_session(struct nfs_client *clp)
146 struct nfs4_session *ses)
147{ 160{
161 struct nfs4_session *ses = clp->cl_session;
148 struct nfs4_slot_table *tbl = &ses->fc_slot_table; 162 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
149 163
150 spin_lock(&tbl->slot_tbl_lock); 164 spin_lock(&tbl->slot_tbl_lock);
@@ -162,16 +176,13 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
162{ 176{
163 int status; 177 int status;
164 178
165 status = nfs41_begin_drain_session(clp, clp->cl_session); 179 nfs4_begin_drain_session(clp);
166 if (status != 0)
167 goto out;
168 status = nfs4_proc_exchange_id(clp, cred); 180 status = nfs4_proc_exchange_id(clp, cred);
169 if (status != 0) 181 if (status != 0)
170 goto out; 182 goto out;
171 status = nfs4_proc_create_session(clp); 183 status = nfs4_proc_create_session(clp);
172 if (status != 0) 184 if (status != 0)
173 goto out; 185 goto out;
174 nfs41_end_drain_session(clp, clp->cl_session);
175 nfs41_setup_state_renewal(clp); 186 nfs41_setup_state_renewal(clp);
176 nfs_mark_client_ready(clp, NFS_CS_READY); 187 nfs_mark_client_ready(clp, NFS_CS_READY);
177out: 188out:
@@ -755,16 +766,21 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
755 return new; 766 return new;
756} 767}
757 768
758void nfs_free_seqid(struct nfs_seqid *seqid) 769void nfs_release_seqid(struct nfs_seqid *seqid)
759{ 770{
760 if (!list_empty(&seqid->list)) { 771 if (!list_empty(&seqid->list)) {
761 struct rpc_sequence *sequence = seqid->sequence->sequence; 772 struct rpc_sequence *sequence = seqid->sequence->sequence;
762 773
763 spin_lock(&sequence->lock); 774 spin_lock(&sequence->lock);
764 list_del(&seqid->list); 775 list_del_init(&seqid->list);
765 spin_unlock(&sequence->lock); 776 spin_unlock(&sequence->lock);
766 rpc_wake_up(&sequence->wait); 777 rpc_wake_up(&sequence->wait);
767 } 778 }
779}
780
781void nfs_free_seqid(struct nfs_seqid *seqid)
782{
783 nfs_release_seqid(seqid);
768 kfree(seqid); 784 kfree(seqid);
769} 785}
770 786
@@ -1257,13 +1273,9 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1257 1273
1258static int nfs4_reset_session(struct nfs_client *clp) 1274static int nfs4_reset_session(struct nfs_client *clp)
1259{ 1275{
1260 struct nfs4_session *ses = clp->cl_session;
1261 int status; 1276 int status;
1262 1277
1263 status = nfs41_begin_drain_session(clp, ses); 1278 nfs4_begin_drain_session(clp);
1264 if (status != 0)
1265 return status;
1266
1267 status = nfs4_proc_destroy_session(clp->cl_session); 1279 status = nfs4_proc_destroy_session(clp->cl_session);
1268 if (status && status != -NFS4ERR_BADSESSION && 1280 if (status && status != -NFS4ERR_BADSESSION &&
1269 status != -NFS4ERR_DEADSESSION) { 1281 status != -NFS4ERR_DEADSESSION) {
@@ -1279,19 +1291,17 @@ static int nfs4_reset_session(struct nfs_client *clp)
1279out: 1291out:
1280 /* 1292 /*
1281 * Let the state manager reestablish state 1293 * Let the state manager reestablish state
1282 * without waking other tasks yet.
1283 */ 1294 */
1284 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1295 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1285 /* Wake up the next rpc task */ 1296 status == 0)
1286 nfs41_end_drain_session(clp, ses); 1297 nfs41_setup_state_renewal(clp);
1287 if (status == 0) 1298
1288 nfs41_setup_state_renewal(clp);
1289 }
1290 return status; 1299 return status;
1291} 1300}
1292 1301
1293#else /* CONFIG_NFS_V4_1 */ 1302#else /* CONFIG_NFS_V4_1 */
1294static int nfs4_reset_session(struct nfs_client *clp) { return 0; } 1303static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1304static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1295#endif /* CONFIG_NFS_V4_1 */ 1305#endif /* CONFIG_NFS_V4_1 */
1296 1306
1297/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors 1307/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1382,6 +1392,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1382 goto out_error; 1392 goto out_error;
1383 } 1393 }
1384 1394
1395 nfs4_end_drain_session(clp);
1385 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { 1396 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
1386 nfs_client_return_marked_delegations(clp); 1397 nfs_client_return_marked_delegations(clp);
1387 continue; 1398 continue;
@@ -1398,6 +1409,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1398out_error: 1409out_error:
1399 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1410 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
1400 " with error %d\n", clp->cl_hostname, -status); 1411 " with error %d\n", clp->cl_hostname, -status);
1412 nfs4_end_drain_session(clp);
1401 nfs4_clear_state_manager_bit(clp); 1413 nfs4_clear_state_manager_bit(clp);
1402} 1414}
1403 1415
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 8f9a20556f79..d3854d94b7cf 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -7,8 +7,6 @@
7#include <linux/types.h> 7#include <linux/types.h>
8#include <linux/file.h> 8#include <linux/file.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/sunrpc/svc.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/syscall.h> 10#include <linux/nfsd/syscall.h>
13#include <linux/cred.h> 11#include <linux/cred.h>
14#include <linux/sched.h> 12#include <linux/sched.h>
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 36fcabbf5186..79717a40daba 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -1,15 +1,7 @@
1/* 1/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2 * linux/fs/nfsd/auth.c
3 *
4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
5 */
6 2
7#include <linux/types.h>
8#include <linux/sched.h> 3#include <linux/sched.h>
9#include <linux/sunrpc/svc.h> 4#include "nfsd.h"
10#include <linux/sunrpc/svcauth.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/export.h>
13#include "auth.h" 5#include "auth.h"
14 6
15int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) 7int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 000000000000..d892be61016c
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,83 @@
1/*
2 * Request reply cache. This was heavily inspired by the
3 * implementation in 4.3BSD/4.4BSD.
4 *
5 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef NFSCACHE_H
9#define NFSCACHE_H
10
11#include <linux/sunrpc/svc.h>
12
13/*
14 * Representation of a reply cache entry.
15 */
16struct svc_cacherep {
17 struct hlist_node c_hash;
18 struct list_head c_lru;
19
20 unsigned char c_state, /* unused, inprog, done */
21 c_type, /* status, buffer */
22 c_secure : 1; /* req came from port < 1024 */
23 struct sockaddr_in c_addr;
24 __be32 c_xid;
25 u32 c_prot;
26 u32 c_proc;
27 u32 c_vers;
28 unsigned long c_timestamp;
29 union {
30 struct kvec u_vec;
31 __be32 u_status;
32 } c_u;
33};
34
35#define c_replvec c_u.u_vec
36#define c_replstat c_u.u_status
37
38/* cache entry states */
39enum {
40 RC_UNUSED,
41 RC_INPROG,
42 RC_DONE
43};
44
45/* return values */
46enum {
47 RC_DROPIT,
48 RC_REPLY,
49 RC_DOIT,
50 RC_INTR
51};
52
53/*
54 * Cache types.
55 * We may want to add more types one day, e.g. for diropres and
56 * attrstat replies. Using cache entries with fixed length instead
57 * of buffer pointers may be more efficient.
58 */
59enum {
60 RC_NOCACHE,
61 RC_REPLSTAT,
62 RC_REPLBUFF,
63};
64
65/*
66 * If requests are retransmitted within this interval, they're dropped.
67 */
68#define RC_DELAY (HZ/5)
69
70int nfsd_reply_cache_init(void);
71void nfsd_reply_cache_shutdown(void);
72int nfsd_cache_lookup(struct svc_rqst *, int);
73void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
74
75#ifdef CONFIG_NFSD_V4
76void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
77#else /* CONFIG_NFSD_V4 */
78static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
79{
80}
81#endif /* CONFIG_NFSD_V4 */
82
83#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c1c9e035d4a4..c487810a2366 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/export.c
4 *
5 * NFS exporting and validation. 3 * NFS exporting and validation.
6 * 4 *
7 * We maintain a list of clients, each of which has a list of 5 * We maintain a list of clients, each of which has a list of
@@ -14,29 +12,16 @@
14 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> 12 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
15 */ 13 */
16 14
17#include <linux/unistd.h>
18#include <linux/slab.h>
19#include <linux/stat.h>
20#include <linux/in.h>
21#include <linux/seq_file.h>
22#include <linux/syscalls.h>
23#include <linux/rwsem.h>
24#include <linux/dcache.h>
25#include <linux/namei.h> 15#include <linux/namei.h>
26#include <linux/mount.h>
27#include <linux/hash.h>
28#include <linux/module.h> 16#include <linux/module.h>
29#include <linux/exportfs.h> 17#include <linux/exportfs.h>
30 18
31#include <linux/sunrpc/svc.h>
32#include <linux/nfsd/nfsd.h>
33#include <linux/nfsd/nfsfh.h>
34#include <linux/nfsd/syscall.h> 19#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h>
36#include <linux/sunrpc/msg_prot.h>
37#include <linux/sunrpc/gss_api.h>
38#include <net/ipv6.h> 20#include <net/ipv6.h>
39 21
22#include "nfsd.h"
23#include "nfsfh.h"
24
40#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
41 26
42typedef struct auth_domain svc_client; 27typedef struct auth_domain svc_client;
@@ -369,16 +354,25 @@ static struct svc_export *svc_export_update(struct svc_export *new,
369 struct svc_export *old); 354 struct svc_export *old);
370static struct svc_export *svc_export_lookup(struct svc_export *); 355static struct svc_export *svc_export_lookup(struct svc_export *);
371 356
372static int check_export(struct inode *inode, int flags, unsigned char *uuid) 357static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
373{ 358{
374 359
375 /* We currently export only dirs and regular files. 360 /*
376 * This is what umountd does. 361 * We currently export only dirs, regular files, and (for v4
362 * pseudoroot) symlinks.
377 */ 363 */
378 if (!S_ISDIR(inode->i_mode) && 364 if (!S_ISDIR(inode->i_mode) &&
365 !S_ISLNK(inode->i_mode) &&
379 !S_ISREG(inode->i_mode)) 366 !S_ISREG(inode->i_mode))
380 return -ENOTDIR; 367 return -ENOTDIR;
381 368
369 /*
370 * Mountd should never pass down a writeable V4ROOT export, but,
371 * just to make sure:
372 */
373 if (*flags & NFSEXP_V4ROOT)
374 *flags |= NFSEXP_READONLY;
375
382 /* There are two requirements on a filesystem to be exportable. 376 /* There are two requirements on a filesystem to be exportable.
383 * 1: We must be able to identify the filesystem from a number. 377 * 1: We must be able to identify the filesystem from a number.
384 * either a device number (so FS_REQUIRES_DEV needed) 378 * either a device number (so FS_REQUIRES_DEV needed)
@@ -387,7 +381,7 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid)
387 * This means that s_export_op must be set. 381 * This means that s_export_op must be set.
388 */ 382 */
389 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && 383 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
390 !(flags & NFSEXP_FSID) && 384 !(*flags & NFSEXP_FSID) &&
391 uuid == NULL) { 385 uuid == NULL) {
392 dprintk("exp_export: export of non-dev fs without fsid\n"); 386 dprintk("exp_export: export of non-dev fs without fsid\n");
393 return -EINVAL; 387 return -EINVAL;
@@ -602,7 +596,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
602 goto out4; 596 goto out4;
603 } 597 }
604 598
605 err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags, 599 err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
606 exp.ex_uuid); 600 exp.ex_uuid);
607 if (err) 601 if (err)
608 goto out4; 602 goto out4;
@@ -1041,7 +1035,7 @@ exp_export(struct nfsctl_export *nxp)
1041 goto finish; 1035 goto finish;
1042 } 1036 }
1043 1037
1044 err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL); 1038 err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL);
1045 if (err) goto finish; 1039 if (err) goto finish;
1046 1040
1047 err = -ENOMEM; 1041 err = -ENOMEM;
@@ -1320,6 +1314,23 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1320 return exp; 1314 return exp;
1321} 1315}
1322 1316
1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
1318{
1319 struct svc_export *exp;
1320 u32 fsidv[2];
1321
1322 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1323
1324 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
1325 /*
1326 * We shouldn't have accepting an nfsv4 request at all if we
1327 * don't have a pseudoexport!:
1328 */
1329 if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT)
1330 exp = ERR_PTR(-ESERVERFAULT);
1331 return exp;
1332}
1333
1323/* 1334/*
1324 * Called when we need the filehandle for the root of the pseudofs, 1335 * Called when we need the filehandle for the root of the pseudofs,
1325 * for a given NFSv4 client. The root is defined to be the 1336 * for a given NFSv4 client. The root is defined to be the
@@ -1330,11 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1330{ 1341{
1331 struct svc_export *exp; 1342 struct svc_export *exp;
1332 __be32 rv; 1343 __be32 rv;
1333 u32 fsidv[2];
1334 1344
1335 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); 1345 exp = find_fsidzero_export(rqstp);
1336
1337 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
1338 if (IS_ERR(exp)) 1346 if (IS_ERR(exp))
1339 return nfserrno(PTR_ERR(exp)); 1347 return nfserrno(PTR_ERR(exp));
1340 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1348 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
@@ -1425,6 +1433,7 @@ static struct flags {
1425 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1433 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
1426 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1434 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
1427 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, 1435 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
1436 { NFSEXP_V4ROOT, {"v4root", ""}},
1428#ifdef MSNFS 1437#ifdef MSNFS
1429 { NFSEXP_MSNFS, {"msnfs", ""}}, 1438 { NFSEXP_MSNFS, {"msnfs", ""}},
1430#endif 1439#endif
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9afe..0c6d81670137 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/lockd.c
3 *
4 * This file contains all the stubs needed when communicating with lockd. 2 * This file contains all the stubs needed when communicating with lockd.
5 * This level of indirection is necessary so we can run nfsd+lockd without 3 * This level of indirection is necessary so we can run nfsd+lockd without
6 * requiring the nfs client to be compiled in/loaded, and vice versa. 4 * requiring the nfs client to be compiled in/loaded, and vice versa.
@@ -8,14 +6,10 @@
8 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/fs.h>
13#include <linux/file.h> 9#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/sunrpc/clnt.h>
16#include <linux/sunrpc/svc.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/bind.h> 10#include <linux/lockd/bind.h>
11#include "nfsd.h"
12#include "vfs.h"
19 13
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 14#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 15
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3219e84116..f20589d2ae27 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,19 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs2acl.c
3 *
4 * Process version 2 NFSACL requests. 2 * Process version 2 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr.h>
14#include <linux/nfsd/xdr3.h>
15#include <linux/posix_acl.h>
16#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
17 13
18#define NFSDDBG_FACILITY NFSDDBG_PROC 14#define NFSDDBG_FACILITY NFSDDBG_PROC
19#define RETURN_STATUS(st) { resp->status = (st); return (st); } 15#define RETURN_STATUS(st) { resp->status = (st); return (st); }
@@ -217,6 +213,16 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
217 * XDR encode functions 213 * XDR encode functions
218 */ 214 */
219 215
216/*
217 * There must be an encoding function for void results so svc_process
218 * will work properly.
219 */
220int
221nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
222{
223 return xdr_ressize_check(rqstp, p);
224}
225
220/* GETACL */ 226/* GETACL */
221static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, 227static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
222 struct nfsd3_getaclres *resp) 228 struct nfsd3_getaclres *resp)
@@ -308,7 +314,6 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
308} 314}
309 315
310#define nfsaclsvc_decode_voidargs NULL 316#define nfsaclsvc_decode_voidargs NULL
311#define nfsaclsvc_encode_voidres NULL
312#define nfsaclsvc_release_void NULL 317#define nfsaclsvc_release_void NULL
313#define nfsd3_fhandleargs nfsd_fhandle 318#define nfsd3_fhandleargs nfsd_fhandle
314#define nfsd3_attrstatres nfsd_attrstat 319#define nfsd3_attrstatres nfsd_attrstat
@@ -346,5 +351,5 @@ struct svc_version nfsd_acl_version2 = {
346 .vs_proc = nfsd_acl_procedures2, 351 .vs_proc = nfsd_acl_procedures2,
347 .vs_dispatch = nfsd_dispatch, 352 .vs_dispatch = nfsd_dispatch,
348 .vs_xdrsize = NFS3_SVC_XDRSIZE, 353 .vs_xdrsize = NFS3_SVC_XDRSIZE,
349 .vs_hidden = 1, 354 .vs_hidden = 0,
350}; 355};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9981dbb377a3..e0c4846bad92 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -1,18 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3acl.c
3 *
4 * Process version 3 NFSACL requests. 2 * Process version 3 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs3.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr3.h>
14#include <linux/posix_acl.h>
15#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
16 13
17#define RETURN_STATUS(st) { resp->status = (st); return (st); } 14#define RETURN_STATUS(st) { resp->status = (st); return (st); }
18 15
@@ -264,6 +261,6 @@ struct svc_version nfsd_acl_version3 = {
264 .vs_proc = nfsd_acl_procedures3, 261 .vs_proc = nfsd_acl_procedures3,
265 .vs_dispatch = nfsd_dispatch, 262 .vs_dispatch = nfsd_dispatch,
266 .vs_xdrsize = NFS3_SVC_XDRSIZE, 263 .vs_xdrsize = NFS3_SVC_XDRSIZE,
267 .vs_hidden = 1, 264 .vs_hidden = 0,
268}; 265};
269 266
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a922..3d68f45a37b9 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -1,30 +1,16 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3proc.c
3 *
4 * Process version 3 NFS requests. 2 * Process version 3 NFS requests.
5 * 3 *
6 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/linkage.h>
10#include <linux/time.h>
11#include <linux/errno.h>
12#include <linux/fs.h> 7#include <linux/fs.h>
13#include <linux/ext2_fs.h> 8#include <linux/ext2_fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/major.h>
21#include <linux/magic.h> 9#include <linux/magic.h>
22 10
23#include <linux/sunrpc/svc.h> 11#include "cache.h"
24#include <linux/nfsd/nfsd.h> 12#include "xdr3.h"
25#include <linux/nfsd/cache.h> 13#include "vfs.h"
26#include <linux/nfsd/xdr3.h>
27#include <linux/nfs3.h>
28 14
29#define NFSDDBG_FACILITY NFSDDBG_PROC 15#define NFSDDBG_FACILITY NFSDDBG_PROC
30 16
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d0a2ce1b4324..2a533a0af2a9 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3xdr.c
3 *
4 * XDR support for nfsd/protocol version 3. 2 * XDR support for nfsd/protocol version 3.
5 * 3 *
6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -8,19 +6,8 @@
8 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! 6 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/time.h>
13#include <linux/nfs3.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/dcache.h>
17#include <linux/namei.h> 9#include <linux/namei.h>
18#include <linux/mm.h> 10#include "xdr3.h"
19#include <linux/vfs.h>
20#include <linux/sunrpc/xdr.h>
21#include <linux/sunrpc/svc.h>
22#include <linux/nfsd/nfsd.h>
23#include <linux/nfsd/xdr3.h>
24#include "auth.h" 11#include "auth.h"
25 12
26#define NFSDDBG_FACILITY NFSDDBG_XDR 13#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 725d02f210e2..88150685df34 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfs4acl/acl.c
3 *
4 * Common NFSv4 ACL handling code. 2 * Common NFSv4 ACL handling code.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of Michigan. 4 * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
@@ -36,15 +34,7 @@
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */ 35 */
38 36
39#include <linux/string.h>
40#include <linux/slab.h>
41#include <linux/list.h>
42#include <linux/types.h>
43#include <linux/fs.h>
44#include <linux/module.h>
45#include <linux/nfs_fs.h> 37#include <linux/nfs_fs.h>
46#include <linux/posix_acl.h>
47#include <linux/nfs4.h>
48#include <linux/nfs4_acl.h> 38#include <linux/nfs4_acl.h>
49 39
50 40
@@ -389,7 +379,7 @@ sort_pacl(struct posix_acl *pacl)
389 sort_pacl_range(pacl, 1, i-1); 379 sort_pacl_range(pacl, 1, i-1);
390 380
391 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); 381 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
392 j = i++; 382 j = ++i;
393 while (pacl->a_entries[j].e_tag == ACL_GROUP) 383 while (pacl->a_entries[j].e_tag == ACL_GROUP)
394 j++; 384 j++;
395 sort_pacl_range(pacl, i, j-1); 385 sort_pacl_range(pacl, i, j-1);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24e8d78f8dde..c6eed2a3b093 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs4callback.c
3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan. 2 * Copyright (c) 2001 The Regents of the University of Michigan.
5 * All rights reserved. 3 * All rights reserved.
6 * 4 *
@@ -33,22 +31,9 @@
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */ 32 */
35 33
36#include <linux/module.h>
37#include <linux/list.h>
38#include <linux/inet.h>
39#include <linux/errno.h>
40#include <linux/delay.h>
41#include <linux/sched.h>
42#include <linux/kthread.h>
43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h> 35#include "nfsd.h"
47#include <linux/nfsd/nfsd.h> 36#include "state.h"
48#include <linux/nfsd/state.h>
49#include <linux/sunrpc/sched.h>
50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
52 37
53#define NFSDDBG_FACILITY NFSDDBG_PROC 38#define NFSDDBG_FACILITY NFSDDBG_PROC
54 39
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index ba2c199592fd..6e2983b27f3c 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4idmap.c
3 *
4 * Mapping of UID/GIDs to name and vice versa. 2 * Mapping of UID/GIDs to name and vice versa.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of 4 * Copyright (c) 2002, 2003 The Regents of the University of
@@ -35,22 +33,9 @@
35 */ 33 */
36 34
37#include <linux/module.h> 35#include <linux/module.h>
38#include <linux/init.h>
39
40#include <linux/mm.h>
41#include <linux/errno.h>
42#include <linux/string.h>
43#include <linux/sunrpc/clnt.h>
44#include <linux/nfs.h>
45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h>
48#include <linux/sunrpc/cache.h>
49#include <linux/nfsd_idmap.h> 36#include <linux/nfsd_idmap.h>
50#include <linux/list.h>
51#include <linux/time.h>
52#include <linux/seq_file.h> 37#include <linux/seq_file.h>
53#include <linux/sunrpc/svcauth.h> 38#include <linux/sched.h>
54 39
55/* 40/*
56 * Cache entry 41 * Cache entry
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0a..37514c469846 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4proc.c
3 *
4 * Server-side procedures for NFSv4. 2 * Server-side procedures for NFSv4.
5 * 3 *
6 * Copyright (c) 2002 The Regents of the University of Michigan. 4 * Copyright (c) 2002 The Regents of the University of Michigan.
@@ -34,20 +32,11 @@
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */ 34 */
37
38#include <linux/param.h>
39#include <linux/major.h>
40#include <linux/slab.h>
41#include <linux/file.h> 35#include <linux/file.h>
42 36
43#include <linux/sunrpc/svc.h> 37#include "cache.h"
44#include <linux/nfsd/nfsd.h> 38#include "xdr4.h"
45#include <linux/nfsd/cache.h> 39#include "vfs.h"
46#include <linux/nfs4.h>
47#include <linux/nfsd/state.h>
48#include <linux/nfsd/xdr4.h>
49#include <linux/nfs4_acl.h>
50#include <linux/sunrpc/gss_api.h>
51 40
52#define NFSDDBG_FACILITY NFSDDBG_PROC 41#define NFSDDBG_FACILITY NFSDDBG_PROC
53 42
@@ -170,7 +159,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
170 accmode |= NFSD_MAY_READ; 159 accmode |= NFSD_MAY_READ;
171 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 160 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
172 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); 161 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
173 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) 162 if (open->op_share_deny & NFS4_SHARE_DENY_READ)
174 accmode |= NFSD_MAY_WRITE; 163 accmode |= NFSD_MAY_WRITE;
175 164
176 status = fh_verify(rqstp, current_fh, S_IFREG, accmode); 165 status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046b..5a754f7b71ed 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4recover.c
3*
4* Copyright (c) 2004 The Regents of the University of Michigan. 2* Copyright (c) 2004 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -33,20 +31,14 @@
33* 31*
34*/ 32*/
35 33
36#include <linux/err.h>
37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h>
40#include <linux/nfsd/state.h>
41#include <linux/nfsd/xdr4.h>
42#include <linux/param.h>
43#include <linux/file.h> 34#include <linux/file.h>
44#include <linux/namei.h> 35#include <linux/namei.h>
45#include <asm/uaccess.h>
46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 36#include <linux/crypto.h>
48#include <linux/sched.h> 37#include <linux/sched.h>
49#include <linux/mount.h> 38
39#include "nfsd.h"
40#include "state.h"
41#include "vfs.h"
50 42
51#define NFSDDBG_FACILITY NFSDDBG_PROC 43#define NFSDDBG_FACILITY NFSDDBG_PROC
52 44
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2153f9bdbebd..f19ed866c95f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4state.c
3*
4* Copyright (c) 2001 The Regents of the University of Michigan. 2* Copyright (c) 2001 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -34,28 +32,14 @@
34* 32*
35*/ 33*/
36 34
37#include <linux/param.h>
38#include <linux/major.h>
39#include <linux/slab.h>
40
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h>
44#include <linux/file.h> 35#include <linux/file.h>
45#include <linux/mount.h>
46#include <linux/workqueue.h>
47#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
48#include <linux/kthread.h>
49#include <linux/nfs4.h>
50#include <linux/nfsd/state.h>
51#include <linux/nfsd/xdr4.h>
52#include <linux/namei.h> 37#include <linux/namei.h>
53#include <linux/swap.h> 38#include <linux/swap.h>
54#include <linux/mutex.h>
55#include <linux/lockd/bind.h>
56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 39#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h> 40#include <linux/sunrpc/clnt.h>
41#include "xdr4.h"
42#include "vfs.h"
59 43
60#define NFSDDBG_FACILITY NFSDDBG_PROC 44#define NFSDDBG_FACILITY NFSDDBG_PROC
61 45
@@ -477,13 +461,14 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
477 461
478/* 462/*
479 * fchan holds the client values on input, and the server values on output 463 * fchan holds the client values on input, and the server values on output
464 * sv_max_mesg is the maximum payload plus one page for overhead.
480 */ 465 */
481static int init_forechannel_attrs(struct svc_rqst *rqstp, 466static int init_forechannel_attrs(struct svc_rqst *rqstp,
482 struct nfsd4_channel_attrs *session_fchan, 467 struct nfsd4_channel_attrs *session_fchan,
483 struct nfsd4_channel_attrs *fchan) 468 struct nfsd4_channel_attrs *fchan)
484{ 469{
485 int status = 0; 470 int status = 0;
486 __u32 maxcount = svc_max_payload(rqstp); 471 __u32 maxcount = nfsd_serv->sv_max_mesg;
487 472
488 /* headerpadsz set to zero in encode routine */ 473 /* headerpadsz set to zero in encode routine */
489 474
@@ -523,6 +508,15 @@ free_session_slots(struct nfsd4_session *ses)
523 kfree(ses->se_slots[i]); 508 kfree(ses->se_slots[i]);
524} 509}
525 510
511/*
512 * We don't actually need to cache the rpc and session headers, so we
513 * can allocate a little less for each slot:
514 */
515static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
516{
517 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
518}
519
526static int 520static int
527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 521alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
528 struct nfsd4_create_session *cses) 522 struct nfsd4_create_session *cses)
@@ -554,7 +548,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
554 memcpy(new, &tmp, sizeof(*new)); 548 memcpy(new, &tmp, sizeof(*new));
555 549
556 /* allocate each struct nfsd4_slot and data cache in one piece */ 550 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 551 cachesize = slot_bytes(&new->se_fchannel);
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 552 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 553 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp) 554 if (!sp)
@@ -628,10 +622,12 @@ void
628free_session(struct kref *kref) 622free_session(struct kref *kref)
629{ 623{
630 struct nfsd4_session *ses; 624 struct nfsd4_session *ses;
625 int mem;
631 626
632 ses = container_of(kref, struct nfsd4_session, se_ref); 627 ses = container_of(kref, struct nfsd4_session, se_ref);
633 spin_lock(&nfsd_drc_lock); 628 spin_lock(&nfsd_drc_lock);
634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; 629 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
630 nfsd_drc_mem_used -= mem;
635 spin_unlock(&nfsd_drc_lock); 631 spin_unlock(&nfsd_drc_lock);
636 free_session_slots(ses); 632 free_session_slots(ses);
637 kfree(ses); 633 kfree(ses);
@@ -2404,11 +2400,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2404 2400
2405 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2401 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2406 2402
2407 dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", 2403 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
2408 dp->dl_stateid.si_boot, 2404 STATEID_VAL(&dp->dl_stateid));
2409 dp->dl_stateid.si_stateownerid,
2410 dp->dl_stateid.si_fileid,
2411 dp->dl_stateid.si_generation);
2412out: 2405out:
2413 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS 2406 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2414 && flag == NFS4_OPEN_DELEGATE_NONE 2407 && flag == NFS4_OPEN_DELEGATE_NONE
@@ -2498,9 +2491,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2498 2491
2499 status = nfs_ok; 2492 status = nfs_ok;
2500 2493
2501 dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", 2494 dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
2502 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, 2495 STATEID_VAL(&stp->st_stateid));
2503 stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
2504out: 2496out:
2505 if (fp) 2497 if (fp)
2506 put_nfs4_file(fp); 2498 put_nfs4_file(fp);
@@ -2666,9 +2658,8 @@ STALE_STATEID(stateid_t *stateid)
2666{ 2658{
2667 if (time_after((unsigned long)boot_time, 2659 if (time_after((unsigned long)boot_time,
2668 (unsigned long)stateid->si_boot)) { 2660 (unsigned long)stateid->si_boot)) {
2669 dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", 2661 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
2670 stateid->si_boot, stateid->si_stateownerid, 2662 STATEID_VAL(stateid));
2671 stateid->si_fileid, stateid->si_generation);
2672 return 1; 2663 return 1;
2673 } 2664 }
2674 return 0; 2665 return 0;
@@ -2680,9 +2671,8 @@ EXPIRED_STATEID(stateid_t *stateid)
2680 if (time_before((unsigned long)boot_time, 2671 if (time_before((unsigned long)boot_time,
2681 ((unsigned long)stateid->si_boot)) && 2672 ((unsigned long)stateid->si_boot)) &&
2682 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { 2673 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2683 dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n", 2674 dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
2684 stateid->si_boot, stateid->si_stateownerid, 2675 STATEID_VAL(stateid));
2685 stateid->si_fileid, stateid->si_generation);
2686 return 1; 2676 return 1;
2687 } 2677 }
2688 return 0; 2678 return 0;
@@ -2696,9 +2686,8 @@ stateid_error_map(stateid_t *stateid)
2696 if (EXPIRED_STATEID(stateid)) 2686 if (EXPIRED_STATEID(stateid))
2697 return nfserr_expired; 2687 return nfserr_expired;
2698 2688
2699 dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n", 2689 dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
2700 stateid->si_boot, stateid->si_stateownerid, 2690 STATEID_VAL(stateid));
2701 stateid->si_fileid, stateid->si_generation);
2702 return nfserr_bad_stateid; 2691 return nfserr_bad_stateid;
2703} 2692}
2704 2693
@@ -2884,10 +2873,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2884 struct svc_fh *current_fh = &cstate->current_fh; 2873 struct svc_fh *current_fh = &cstate->current_fh;
2885 __be32 status; 2874 __be32 status;
2886 2875
2887 dprintk("NFSD: preprocess_seqid_op: seqid=%d " 2876 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
2888 "stateid = (%08x/%08x/%08x/%08x)\n", seqid, 2877 seqid, STATEID_VAL(stateid));
2889 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2890 stateid->si_generation);
2891 2878
2892 *stpp = NULL; 2879 *stpp = NULL;
2893 *sopp = NULL; 2880 *sopp = NULL;
@@ -3019,12 +3006,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3019 sop->so_confirmed = 1; 3006 sop->so_confirmed = 1;
3020 update_stateid(&stp->st_stateid); 3007 update_stateid(&stp->st_stateid);
3021 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); 3008 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
3022 dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 3009 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
3023 "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, 3010 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
3024 stp->st_stateid.si_boot,
3025 stp->st_stateid.si_stateownerid,
3026 stp->st_stateid.si_fileid,
3027 stp->st_stateid.si_generation);
3028 3011
3029 nfsd4_create_clid_dir(sop->so_client); 3012 nfsd4_create_clid_dir(sop->so_client);
3030out: 3013out:
@@ -3283,9 +3266,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
3283 struct nfs4_file *fp; 3266 struct nfs4_file *fp;
3284 struct nfs4_delegation *dl; 3267 struct nfs4_delegation *dl;
3285 3268
3286 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", 3269 dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
3287 stid->si_boot, stid->si_stateownerid, 3270 STATEID_VAL(stid));
3288 stid->si_fileid, stid->si_generation);
3289 3271
3290 fp = find_file(ino); 3272 fp = find_file(ino);
3291 if (!fp) 3273 if (!fp)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f6..a8587e90fd5a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,24 +40,16 @@
40 * at the end of nfs4svc_decode_compoundargs. 40 * at the end of nfs4svc_decode_compoundargs.
41 */ 41 */
42 42
43#include <linux/param.h>
44#include <linux/smp.h>
45#include <linux/fs.h>
46#include <linux/namei.h> 43#include <linux/namei.h>
47#include <linux/vfs.h> 44#include <linux/statfs.h>
48#include <linux/utsname.h> 45#include <linux/utsname.h>
49#include <linux/sunrpc/xdr.h>
50#include <linux/sunrpc/svc.h>
51#include <linux/sunrpc/clnt.h>
52#include <linux/nfsd/nfsd.h>
53#include <linux/nfsd/state.h>
54#include <linux/nfsd/xdr4.h>
55#include <linux/nfsd_idmap.h> 46#include <linux/nfsd_idmap.h>
56#include <linux/nfs4.h>
57#include <linux/nfs4_acl.h> 47#include <linux/nfs4_acl.h>
58#include <linux/sunrpc/gss_api.h>
59#include <linux/sunrpc/svcauth_gss.h> 48#include <linux/sunrpc/svcauth_gss.h>
60 49
50#include "xdr4.h"
51#include "vfs.h"
52
61#define NFSDDBG_FACILITY NFSDDBG_XDR 53#define NFSDDBG_FACILITY NFSDDBG_XDR
62 54
63/* 55/*
@@ -2204,11 +2196,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2204 * we will not follow the cross mount and will fill the attribtutes 2196 * we will not follow the cross mount and will fill the attribtutes
2205 * directly from the mountpoint dentry. 2197 * directly from the mountpoint dentry.
2206 */ 2198 */
2207 if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval)) 2199 if (nfsd_mountpoint(dentry, exp)) {
2208 ignore_crossmnt = 1;
2209 else if (d_mountpoint(dentry)) {
2210 int err; 2200 int err;
2211 2201
2202 if (!(exp->ex_flags & NFSEXP_V4ROOT)
2203 && !attributes_need_mount(cd->rd_bmval)) {
2204 ignore_crossmnt = 1;
2205 goto out_encode;
2206 }
2212 /* 2207 /*
2213 * Why the heck aren't we just using nfsd_lookup?? 2208 * Why the heck aren't we just using nfsd_lookup??
2214 * Different "."/".." handling? Something else? 2209 * Different "."/".." handling? Something else?
@@ -2224,6 +2219,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2224 goto out_put; 2219 goto out_put;
2225 2220
2226 } 2221 }
2222out_encode:
2227 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, 2223 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
2228 cd->rd_rqstp, ignore_crossmnt); 2224 cd->rd_rqstp, ignore_crossmnt);
2229out_put: 2225out_put:
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 4638635c5d87..da08560c4818 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfscache.c
3 *
4 * Request reply cache. This is currently a global cache, but this may 2 * Request reply cache. This is currently a global cache, but this may
5 * change in the future and be a per-client cache. 3 * change in the future and be a per-client cache.
6 * 4 *
@@ -10,16 +8,8 @@
10 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 8 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
11 */ 9 */
12 10
13#include <linux/kernel.h> 11#include "nfsd.h"
14#include <linux/time.h> 12#include "cache.h"
15#include <linux/slab.h>
16#include <linux/string.h>
17#include <linux/spinlock.h>
18#include <linux/list.h>
19
20#include <linux/sunrpc/svc.h>
21#include <linux/nfsd/nfsd.h>
22#include <linux/nfsd/cache.h>
23 13
24/* Size of reply cache. Common values are: 14/* Size of reply cache. Common values are:
25 * 4.3BSD: 128 15 * 4.3BSD: 128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c01fc148ce8..2604c3e70ea5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,46 +1,20 @@
1/* 1/*
2 * linux/fs/nfsd/nfsctl.c
3 *
4 * Syscall interface to knfsd. 2 * Syscall interface to knfsd.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/module.h>
10
11#include <linux/linkage.h>
12#include <linux/time.h>
13#include <linux/errno.h>
14#include <linux/fs.h>
15#include <linux/namei.h> 7#include <linux/namei.h>
16#include <linux/fcntl.h>
17#include <linux/net.h>
18#include <linux/in.h>
19#include <linux/syscalls.h>
20#include <linux/unistd.h>
21#include <linux/slab.h>
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/pagemap.h>
25#include <linux/init.h>
26#include <linux/inet.h>
27#include <linux/string.h>
28#include <linux/ctype.h> 8#include <linux/ctype.h>
29 9
30#include <linux/nfs.h>
31#include <linux/nfsd_idmap.h> 10#include <linux/nfsd_idmap.h>
32#include <linux/lockd/bind.h>
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/svcsock.h> 11#include <linux/sunrpc/svcsock.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/cache.h>
37#include <linux/nfsd/xdr.h>
38#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
41 15
42#include <asm/uaccess.h> 16#include "nfsd.h"
43#include <net/ipv6.h> 17#include "cache.h"
44 18
45/* 19/*
46 * We have a single directory with 9 nodes in it. 20 * We have a single directory with 9 nodes in it.
@@ -55,6 +29,7 @@ enum {
55 NFSD_Getfd, 29 NFSD_Getfd,
56 NFSD_Getfs, 30 NFSD_Getfs,
57 NFSD_List, 31 NFSD_List,
32 NFSD_Export_features,
58 NFSD_Fh, 33 NFSD_Fh,
59 NFSD_FO_UnlockIP, 34 NFSD_FO_UnlockIP,
60 NFSD_FO_UnlockFS, 35 NFSD_FO_UnlockFS,
@@ -173,6 +148,24 @@ static const struct file_operations exports_operations = {
173 .owner = THIS_MODULE, 148 .owner = THIS_MODULE,
174}; 149};
175 150
151static int export_features_show(struct seq_file *m, void *v)
152{
153 seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
154 return 0;
155}
156
157static int export_features_open(struct inode *inode, struct file *file)
158{
159 return single_open(file, export_features_show, NULL);
160}
161
162static struct file_operations export_features_operations = {
163 .open = export_features_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167};
168
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 169extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 170extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
178 171
@@ -1330,6 +1323,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1330 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1323 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1331 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1324 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1332 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1325 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1326 [NFSD_Export_features] = {"export_features",
1327 &export_features_operations, S_IRUGO},
1333 [NFSD_FO_UnlockIP] = {"unlock_ip", 1328 [NFSD_FO_UnlockIP] = {"unlock_ip",
1334 &transaction_ops, S_IWUSR|S_IRUSR}, 1329 &transaction_ops, S_IWUSR|S_IRUSR},
1335 [NFSD_FO_UnlockFS] = {"unlock_filesystem", 1330 [NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 000000000000..e942a1aaac92
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,338 @@
1/*
2 * Hodge-podge collection of knfsd-related stuff.
3 * I will sort this out later.
4 *
5 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef LINUX_NFSD_NFSD_H
9#define LINUX_NFSD_NFSD_H
10
11#include <linux/types.h>
12#include <linux/mount.h>
13
14#include <linux/nfsd/debug.h>
15#include <linux/nfsd/export.h>
16#include <linux/nfsd/stats.h>
17/*
18 * nfsd version
19 */
20#define NFSD_SUPPORTED_MINOR_VERSION 1
21
22struct readdir_cd {
23 __be32 err; /* 0, nfserr, or nfserr_eof */
24};
25
26
27extern struct svc_program nfsd_program;
28extern struct svc_version nfsd_version2, nfsd_version3,
29 nfsd_version4;
30extern u32 nfsd_supported_minorversion;
31extern struct mutex nfsd_mutex;
32extern struct svc_serv *nfsd_serv;
33extern spinlock_t nfsd_drc_lock;
34extern unsigned int nfsd_drc_max_mem;
35extern unsigned int nfsd_drc_mem_used;
36
37extern const struct seq_operations nfs_exports_op;
38
39/*
40 * Function prototypes.
41 */
42int nfsd_svc(unsigned short port, int nrservs);
43int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
44
45int nfsd_nrthreads(void);
46int nfsd_nrpools(void);
47int nfsd_get_nrthreads(int n, int *);
48int nfsd_set_nrthreads(int n, int *);
49
50#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
51#ifdef CONFIG_NFSD_V2_ACL
52extern struct svc_version nfsd_acl_version2;
53#else
54#define nfsd_acl_version2 NULL
55#endif
56#ifdef CONFIG_NFSD_V3_ACL
57extern struct svc_version nfsd_acl_version3;
58#else
59#define nfsd_acl_version3 NULL
60#endif
61#endif
62
63enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
64int nfsd_vers(int vers, enum vers_op change);
65int nfsd_minorversion(u32 minorversion, enum vers_op change);
66void nfsd_reset_versions(void);
67int nfsd_create_serv(void);
68
69extern int nfsd_max_blksize;
70
71static inline int nfsd_v4client(struct svc_rqst *rq)
72{
73 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
74}
75
76/*
77 * NFSv4 State
78 */
79#ifdef CONFIG_NFSD_V4
80extern unsigned int max_delegations;
81int nfs4_state_init(void);
82void nfsd4_free_slabs(void);
83int nfs4_state_start(void);
84void nfs4_state_shutdown(void);
85time_t nfs4_lease_time(void);
86void nfs4_reset_lease(time_t leasetime);
87int nfs4_reset_recoverydir(char *recdir);
88#else
89static inline int nfs4_state_init(void) { return 0; }
90static inline void nfsd4_free_slabs(void) { }
91static inline int nfs4_state_start(void) { return 0; }
92static inline void nfs4_state_shutdown(void) { }
93static inline time_t nfs4_lease_time(void) { return 0; }
94static inline void nfs4_reset_lease(time_t leasetime) { }
95static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
96#endif
97
98/*
99 * lockd binding
100 */
101void nfsd_lockd_init(void);
102void nfsd_lockd_shutdown(void);
103
104
105/*
106 * These macros provide pre-xdr'ed values for faster operation.
107 */
108#define nfs_ok cpu_to_be32(NFS_OK)
109#define nfserr_perm cpu_to_be32(NFSERR_PERM)
110#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
111#define nfserr_io cpu_to_be32(NFSERR_IO)
112#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
113#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
114#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
115#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
116#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
117#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
118#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
119#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
120#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
121#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
122#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
123#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
124#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
125#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
126#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
127#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
128#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
129#define nfserr_stale cpu_to_be32(NFSERR_STALE)
130#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
131#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
132#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
133#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
134#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
135#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
136#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
137#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
138#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
139#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
140#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
141#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
142#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
143#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
144#define nfserr_same cpu_to_be32(NFSERR_SAME)
145#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
146#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
147#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
148#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
149#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
150#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
151#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
152#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
153#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
154#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
155#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
156#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
157#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
158#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
159#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
160#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
161#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
162#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
163#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
164#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
165#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
166#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
167#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
168#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
169#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
170#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
171#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
172#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
173#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
174#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
175#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
176#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
177#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
178#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
179#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
180#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
181#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
182#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
183#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
184#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
185#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
186#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
187#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
188#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
189#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
190#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
191#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
192#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
193#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
194#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
195#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
196#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
197#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
198#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
199#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
200#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
201#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
202#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
203#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
204#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
205#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
206#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
207#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
208#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
209
210/* error codes for internal use */
211/* if a request fails due to kmalloc failure, it gets dropped.
212 * Client should resend eventually
213 */
214#define nfserr_dropit cpu_to_be32(30000)
215/* end-of-file indicator in readdir */
216#define nfserr_eof cpu_to_be32(30001)
217/* replay detected */
218#define nfserr_replay_me cpu_to_be32(11001)
219/* nfs41 replay detected */
220#define nfserr_replay_cache cpu_to_be32(11002)
221
222/* Check for dir entries '.' and '..' */
223#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
224
225/*
226 * Time of server startup
227 */
228extern struct timeval nfssvc_boot;
229
230#ifdef CONFIG_NFSD_V4
231
232/* before processing a COMPOUND operation, we have to check that there
233 * is enough space in the buffer for XDR encode to succeed. otherwise,
234 * we might process an operation with side effects, and be unable to
235 * tell the client that the operation succeeded.
236 *
237 * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
238 * needed to encode an "ordinary" _successful_ operation. (GETATTR,
239 * READ, READDIR, and READLINK have their own buffer checks.) if we
240 * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
241 *
242 * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
243 * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
244 * care is taken to ensure that we never fall below this level for any
245 * reason.
246 */
247#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
248#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
249
250#define NFSD_LEASE_TIME (nfs4_lease_time())
251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
252
253/*
254 * The following attributes are currently not supported by the NFSv4 server:
255 * ARCHIVE (deprecated anyway)
256 * HIDDEN (unlikely to be supported any time soon)
257 * MIMETYPE (unlikely to be supported any time soon)
258 * QUOTA_* (will be supported in a forthcoming patch)
259 * SYSTEM (unlikely to be supported any time soon)
260 * TIME_BACKUP (unlikely to be supported any time soon)
261 * TIME_CREATE (unlikely to be supported any time soon)
262 */
263#define NFSD4_SUPPORTED_ATTRS_WORD0 \
264(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
265 | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
266 | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
267 | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \
268 | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \
269 | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \
270 | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
271 | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \
272 | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
273 | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
274
275#define NFSD4_SUPPORTED_ATTRS_WORD1 \
276(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
277 | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
278 | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
279 | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
280 | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
281 | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
282
283#define NFSD4_SUPPORTED_ATTRS_WORD2 0
284
285#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
286 NFSD4_SUPPORTED_ATTRS_WORD0
287
288#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
289 NFSD4_SUPPORTED_ATTRS_WORD1
290
291#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
292 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
293
294static inline u32 nfsd_suppattrs0(u32 minorversion)
295{
296 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
297 : NFSD4_SUPPORTED_ATTRS_WORD0;
298}
299
300static inline u32 nfsd_suppattrs1(u32 minorversion)
301{
302 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
303 : NFSD4_SUPPORTED_ATTRS_WORD1;
304}
305
306static inline u32 nfsd_suppattrs2(u32 minorversion)
307{
308 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
309 : NFSD4_SUPPORTED_ATTRS_WORD2;
310}
311
312/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
313#define NFSD_WRITEONLY_ATTRS_WORD1 \
314(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
315
316/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
317#define NFSD_WRITEABLE_ATTRS_WORD0 \
318(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
319#define NFSD_WRITEABLE_ATTRS_WORD1 \
320(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
321 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
322#define NFSD_WRITEABLE_ATTRS_WORD2 0
323
324#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
325 NFSD_WRITEABLE_ATTRS_WORD0
326/*
327 * we currently store the exclusive create verifier in the v_{a,m}time
328 * attributes so the client can't set these at create time using EXCLUSIVE4_1
329 */
330#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
331 (NFSD_WRITEABLE_ATTRS_WORD1 & \
332 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
333#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
334 NFSD_WRITEABLE_ATTRS_WORD2
335
336#endif /* CONFIG_NFSD_V4 */
337
338#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a76..1c12177b908c 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfsfh.c
3 *
4 * NFS server file handle treatment. 2 * NFS server file handle treatment.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
@@ -9,19 +7,11 @@
9 * ... and again Southern-Winter 2001 to support export_operations 7 * ... and again Southern-Winter 2001 to support export_operations
10 */ 8 */
11 9
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/unistd.h>
15#include <linux/string.h>
16#include <linux/stat.h>
17#include <linux/dcache.h>
18#include <linux/exportfs.h> 10#include <linux/exportfs.h>
19#include <linux/mount.h>
20 11
21#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/svc.h>
23#include <linux/sunrpc/svcauth_gss.h> 12#include <linux/sunrpc/svcauth_gss.h>
24#include <linux/nfsd/nfsd.h> 13#include "nfsd.h"
14#include "vfs.h"
25#include "auth.h" 15#include "auth.h"
26 16
27#define NFSDDBG_FACILITY NFSDDBG_FH 17#define NFSDDBG_FACILITY NFSDDBG_FH
@@ -96,8 +86,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
96static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, 86static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
97 struct svc_export *exp) 87 struct svc_export *exp)
98{ 88{
89 int flags = nfsexp_flags(rqstp, exp);
90
99 /* Check if the request originated from a secure port. */ 91 /* Check if the request originated from a secure port. */
100 if (!rqstp->rq_secure && EX_SECURE(exp)) { 92 if (!rqstp->rq_secure && (flags & NFSEXP_INSECURE_PORT)) {
101 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 93 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
102 dprintk(KERN_WARNING 94 dprintk(KERN_WARNING
103 "nfsd: request from insecure port %s!\n", 95 "nfsd: request from insecure port %s!\n",
@@ -109,6 +101,36 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
109 return nfserrno(nfsd_setuser(rqstp, exp)); 101 return nfserrno(nfsd_setuser(rqstp, exp));
110} 102}
111 103
104static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
105 struct dentry *dentry, struct svc_export *exp)
106{
107 if (!(exp->ex_flags & NFSEXP_V4ROOT))
108 return nfs_ok;
109 /*
110 * v2/v3 clients have no need for the V4ROOT export--they use
111 * the mount protocl instead; also, further V4ROOT checks may be
112 * in v4-specific code, in which case v2/v3 clients could bypass
113 * them.
114 */
115 if (!nfsd_v4client(rqstp))
116 return nfserr_stale;
117 /*
118 * We're exposing only the directories and symlinks that have to be
119 * traversed on the way to real exports:
120 */
121 if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) &&
122 !S_ISLNK(dentry->d_inode->i_mode)))
123 return nfserr_stale;
124 /*
125 * A pseudoroot export gives permission to access only one
126 * single directory; the kernel has to make another upcall
127 * before granting access to anything else under it:
128 */
129 if (unlikely(dentry != exp->ex_path.dentry))
130 return nfserr_stale;
131 return nfs_ok;
132}
133
112/* 134/*
113 * Use the given filehandle to look up the corresponding export and 135 * Use the given filehandle to look up the corresponding export and
114 * dentry. On success, the results are used to set fh_export and 136 * dentry. On success, the results are used to set fh_export and
@@ -232,14 +254,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
232 goto out; 254 goto out;
233 } 255 }
234 256
235 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
236 error = nfsd_setuser_and_check_port(rqstp, exp);
237 if (error) {
238 dput(dentry);
239 goto out;
240 }
241 }
242
243 if (S_ISDIR(dentry->d_inode->i_mode) && 257 if (S_ISDIR(dentry->d_inode->i_mode) &&
244 (dentry->d_flags & DCACHE_DISCONNECTED)) { 258 (dentry->d_flags & DCACHE_DISCONNECTED)) {
245 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 259 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -294,28 +308,32 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
294 error = nfsd_set_fh_dentry(rqstp, fhp); 308 error = nfsd_set_fh_dentry(rqstp, fhp);
295 if (error) 309 if (error)
296 goto out; 310 goto out;
297 dentry = fhp->fh_dentry;
298 exp = fhp->fh_export;
299 } else {
300 /*
301 * just rechecking permissions
302 * (e.g. nfsproc_create calls fh_verify, then nfsd_create
303 * does as well)
304 */
305 dprintk("nfsd: fh_verify - just checking\n");
306 dentry = fhp->fh_dentry;
307 exp = fhp->fh_export;
308 /*
309 * Set user creds for this exportpoint; necessary even
310 * in the "just checking" case because this may be a
311 * filehandle that was created by fh_compose, and that
312 * is about to be used in another nfsv4 compound
313 * operation.
314 */
315 error = nfsd_setuser_and_check_port(rqstp, exp);
316 if (error)
317 goto out;
318 } 311 }
312 dentry = fhp->fh_dentry;
313 exp = fhp->fh_export;
314 /*
315 * We still have to do all these permission checks, even when
316 * fh_dentry is already set:
317 * - fh_verify may be called multiple times with different
318 * "access" arguments (e.g. nfsd_proc_create calls
319 * fh_verify(...,NFSD_MAY_EXEC) first, then later (in
320 * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
321 * - in the NFSv4 case, the filehandle may have been filled
322 * in by fh_compose, and given a dentry, but further
323 * compound operations performed with that filehandle
324 * still need permissions checks. In the worst case, a
325 * mountpoint crossing may have changed the export
326 * options, and we may now need to use a different uid
327 * (for example, if different id-squashing options are in
328 * effect on the new filesystem).
329 */
330 error = check_pseudo_root(rqstp, dentry, exp);
331 if (error)
332 goto out;
333
334 error = nfsd_setuser_and_check_port(rqstp, exp);
335 if (error)
336 goto out;
319 337
320 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 338 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
321 if (error) 339 if (error)
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 000000000000..cdfb8c6a4206
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
1/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
2
3#ifndef _LINUX_NFSD_FH_INT_H
4#define _LINUX_NFSD_FH_INT_H
5
6#include <linux/nfsd/nfsfh.h>
7
8enum nfsd_fsid {
9 FSID_DEV = 0,
10 FSID_NUM,
11 FSID_MAJOR_MINOR,
12 FSID_ENCODE_DEV,
13 FSID_UUID4_INUM,
14 FSID_UUID8,
15 FSID_UUID16,
16 FSID_UUID16_INUM,
17};
18
19enum fsid_source {
20 FSIDSOURCE_DEV,
21 FSIDSOURCE_FSID,
22 FSIDSOURCE_UUID,
23};
24extern enum fsid_source fsid_source(struct svc_fh *fhp);
25
26
27/* This might look a little large to "inline" but in all calls except
28 * one, 'vers' is constant so moste of the function disappears.
29 */
30static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
31 u32 fsid, unsigned char *uuid)
32{
33 u32 *up;
34 switch(vers) {
35 case FSID_DEV:
36 fsidv[0] = htonl((MAJOR(dev)<<16) |
37 MINOR(dev));
38 fsidv[1] = ino_t_to_u32(ino);
39 break;
40 case FSID_NUM:
41 fsidv[0] = fsid;
42 break;
43 case FSID_MAJOR_MINOR:
44 fsidv[0] = htonl(MAJOR(dev));
45 fsidv[1] = htonl(MINOR(dev));
46 fsidv[2] = ino_t_to_u32(ino);
47 break;
48
49 case FSID_ENCODE_DEV:
50 fsidv[0] = new_encode_dev(dev);
51 fsidv[1] = ino_t_to_u32(ino);
52 break;
53
54 case FSID_UUID4_INUM:
55 /* 4 byte fsid and inode number */
56 up = (u32*)uuid;
57 fsidv[0] = ino_t_to_u32(ino);
58 fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
59 break;
60
61 case FSID_UUID8:
62 /* 8 byte fsid */
63 up = (u32*)uuid;
64 fsidv[0] = up[0] ^ up[2];
65 fsidv[1] = up[1] ^ up[3];
66 break;
67
68 case FSID_UUID16:
69 /* 16 byte fsid - NFSv3+ only */
70 memcpy(fsidv, uuid, 16);
71 break;
72
73 case FSID_UUID16_INUM:
74 /* 8 byte inode and 16 byte fsid */
75 *(u64*)fsidv = (u64)ino;
76 memcpy(fsidv+2, uuid, 16);
77 break;
78 default: BUG();
79 }
80}
81
82static inline int key_len(int type)
83{
84 switch(type) {
85 case FSID_DEV: return 8;
86 case FSID_NUM: return 4;
87 case FSID_MAJOR_MINOR: return 12;
88 case FSID_ENCODE_DEV: return 8;
89 case FSID_UUID4_INUM: return 8;
90 case FSID_UUID8: return 8;
91 case FSID_UUID16: return 16;
92 case FSID_UUID16_INUM: return 24;
93 default: return 0;
94 }
95}
96
97/*
98 * Shorthand for dprintk()'s
99 */
100extern char * SVCFH_fmt(struct svc_fh *fhp);
101
102/*
103 * Function prototypes
104 */
105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
107__be32 fh_update(struct svc_fh *);
108void fh_put(struct svc_fh *);
109
110static __inline__ struct svc_fh *
111fh_copy(struct svc_fh *dst, struct svc_fh *src)
112{
113 WARN_ON(src->fh_dentry || src->fh_locked);
114
115 *dst = *src;
116 return dst;
117}
118
119static inline void
120fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
121{
122 dst->fh_size = src->fh_size;
123 memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
124}
125
126static __inline__ struct svc_fh *
127fh_init(struct svc_fh *fhp, int maxsize)
128{
129 memset(fhp, 0, sizeof(*fhp));
130 fhp->fh_maxsize = maxsize;
131 return fhp;
132}
133
134#ifdef CONFIG_NFSD_V3
135/*
136 * Fill in the pre_op attr for the wcc data
137 */
138static inline void
139fill_pre_wcc(struct svc_fh *fhp)
140{
141 struct inode *inode;
142
143 inode = fhp->fh_dentry->d_inode;
144 if (!fhp->fh_pre_saved) {
145 fhp->fh_pre_mtime = inode->i_mtime;
146 fhp->fh_pre_ctime = inode->i_ctime;
147 fhp->fh_pre_size = inode->i_size;
148 fhp->fh_pre_change = inode->i_version;
149 fhp->fh_pre_saved = 1;
150 }
151}
152
153extern void fill_post_wcc(struct svc_fh *);
154#else
155#define fill_pre_wcc(ignored)
156#define fill_post_wcc(notused)
157#endif /* CONFIG_NFSD_V3 */
158
159
160/*
161 * Lock a file handle/inode
162 * NOTE: both fh_lock and fh_unlock are done "by hand" in
163 * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
164 * so, any changes here should be reflected there.
165 */
166
167static inline void
168fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
169{
170 struct dentry *dentry = fhp->fh_dentry;
171 struct inode *inode;
172
173 BUG_ON(!dentry);
174
175 if (fhp->fh_locked) {
176 printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
177 dentry->d_parent->d_name.name, dentry->d_name.name);
178 return;
179 }
180
181 inode = dentry->d_inode;
182 mutex_lock_nested(&inode->i_mutex, subclass);
183 fill_pre_wcc(fhp);
184 fhp->fh_locked = 1;
185}
186
187static inline void
188fh_lock(struct svc_fh *fhp)
189{
190 fh_lock_nested(fhp, I_MUTEX_NORMAL);
191}
192
193/*
194 * Unlock a file handle/inode
195 */
196static inline void
197fh_unlock(struct svc_fh *fhp)
198{
199 BUG_ON(!fhp->fh_dentry);
200
201 if (fhp->fh_locked) {
202 fill_post_wcc(fhp);
203 mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
204 fhp->fh_locked = 0;
205 }
206}
207
208#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0eb9c820b7a6..a047ad6111ef 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -1,29 +1,14 @@
1/* 1/*
2 * nfsproc2.c Process version 2 NFS requests.
3 * linux/fs/nfsd/nfs2proc.c
4 *
5 * Process version 2 NFS requests. 2 * Process version 2 NFS requests.
6 * 3 *
7 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
8 */ 5 */
9 6
10#include <linux/linkage.h>
11#include <linux/time.h>
12#include <linux/errno.h>
13#include <linux/fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/namei.h> 7#include <linux/namei.h>
19#include <linux/unistd.h>
20#include <linux/slab.h>
21 8
22#include <linux/sunrpc/clnt.h> 9#include "cache.h"
23#include <linux/sunrpc/svc.h> 10#include "xdr.h"
24#include <linux/nfsd/nfsd.h> 11#include "vfs.h"
25#include <linux/nfsd/cache.h>
26#include <linux/nfsd/xdr.h>
27 12
28typedef struct svc_rqst svc_rqst; 13typedef struct svc_rqst svc_rqst;
29typedef struct svc_buf svc_buf; 14typedef struct svc_buf svc_buf;
@@ -758,6 +743,7 @@ nfserrno (int errno)
758 { nfserr_io, -ETXTBSY }, 743 { nfserr_io, -ETXTBSY },
759 { nfserr_notsupp, -EOPNOTSUPP }, 744 { nfserr_notsupp, -EOPNOTSUPP },
760 { nfserr_toosmall, -ETOOSMALL }, 745 { nfserr_toosmall, -ETOOSMALL },
746 { nfserr_serverfault, -ESERVERFAULT },
761 }; 747 };
762 int i; 748 int i;
763 749
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd43..171699eb07c8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfssvc.c
3 *
4 * Central processing for nfsd. 2 * Central processing for nfsd.
5 * 3 *
6 * Authors: Olaf Kirch (okir@monad.swb.de) 4 * Authors: Olaf Kirch (okir@monad.swb.de)
@@ -8,33 +6,19 @@
8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/module.h>
12#include <linux/sched.h> 9#include <linux/sched.h>
13#include <linux/time.h>
14#include <linux/errno.h>
15#include <linux/nfs.h>
16#include <linux/in.h>
17#include <linux/uio.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/smp.h>
21#include <linux/freezer.h> 10#include <linux/freezer.h>
22#include <linux/fs_struct.h> 11#include <linux/fs_struct.h>
23#include <linux/kthread.h>
24#include <linux/swap.h> 12#include <linux/swap.h>
25 13
26#include <linux/sunrpc/types.h>
27#include <linux/sunrpc/stats.h> 14#include <linux/sunrpc/stats.h>
28#include <linux/sunrpc/svc.h>
29#include <linux/sunrpc/svcsock.h> 15#include <linux/sunrpc/svcsock.h>
30#include <linux/sunrpc/cache.h>
31#include <linux/nfsd/nfsd.h>
32#include <linux/nfsd/stats.h>
33#include <linux/nfsd/cache.h>
34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
37#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include "nfsd.h"
20#include "cache.h"
21#include "vfs.h"
38 22
39#define NFSDDBG_FACILITY NFSDDBG_SVC 23#define NFSDDBG_FACILITY NFSDDBG_SVC
40 24
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index afd08e2c90a5..4ce005dbf3e6 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,20 +1,10 @@
1/* 1/*
2 * linux/fs/nfsd/nfsxdr.c
3 *
4 * XDR support for nfsd 2 * XDR support for nfsd
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/types.h> 7#include "xdr.h"
10#include <linux/time.h>
11#include <linux/nfs.h>
12#include <linux/vfs.h>
13#include <linux/sunrpc/xdr.h>
14#include <linux/sunrpc/svc.h>
15#include <linux/nfsd/nfsd.h>
16#include <linux/nfsd/xdr.h>
17#include <linux/mm.h>
18#include "auth.h" 8#include "auth.h"
19 9
20#define NFSDDBG_FACILITY NFSDDBG_XDR 10#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 000000000000..fefeae27f25e
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,408 @@
1/*
2 * Copyright (c) 2001 The Regents of the University of Michigan.
3 * All rights reserved.
4 *
5 * Kendrick Smith <kmsmith@umich.edu>
6 * Andy Adamson <andros@umich.edu>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 */
34
35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H
37
38#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h"
40
41typedef struct {
42 u32 cl_boot;
43 u32 cl_id;
44} clientid_t;
45
46typedef struct {
47 u32 so_boot;
48 u32 so_stateownerid;
49 u32 so_fileid;
50} stateid_opaque_t;
51
52typedef struct {
53 u32 si_generation;
54 stateid_opaque_t si_opaque;
55} stateid_t;
56#define si_boot si_opaque.so_boot
57#define si_stateownerid si_opaque.so_stateownerid
58#define si_fileid si_opaque.so_fileid
59
60#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
61#define STATEID_VAL(s) \
62 (s)->si_boot, \
63 (s)->si_stateownerid, \
64 (s)->si_fileid, \
65 (s)->si_generation
66
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_delegation {
74 struct list_head dl_perfile;
75 struct list_head dl_perclnt;
76 struct list_head dl_recall_lru; /* delegation recalled */
77 atomic_t dl_count; /* ref count */
78 struct nfs4_client *dl_client;
79 struct nfs4_file *dl_file;
80 struct file_lock *dl_flock;
81 struct file *dl_vfs_file;
82 u32 dl_type;
83 time_t dl_time;
84/* For recall: */
85 u32 dl_ident;
86 stateid_t dl_stateid;
87 struct knfsd_fh dl_fh;
88 int dl_retries;
89};
90
91/* client delegation callback info */
92struct nfs4_cb_conn {
93 /* SETCLIENTID info */
94 struct sockaddr_storage cb_addr;
95 size_t cb_addrlen;
96 u32 cb_prog;
97 u32 cb_minorversion;
98 u32 cb_ident; /* minorversion 0 only */
99 /* RPC client info */
100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_clnt * cb_client;
102};
103
104/* Maximum number of slots per session. 160 is useful for long haul TCP */
105#define NFSD_MAX_SLOTS_PER_SESSION 160
106/* Maximum number of operations per session compound */
107#define NFSD_MAX_OPS_PER_COMPOUND 16
108/* Maximum session per slot cache size */
109#define NFSD_SLOT_CACHE_SIZE 1024
110/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
111#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
112#define NFSD_MAX_MEM_PER_SESSION \
113 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
114
115struct nfsd4_slot {
116 bool sl_inuse;
117 bool sl_cachethis;
118 u16 sl_opcnt;
119 u32 sl_seqid;
120 __be32 sl_status;
121 u32 sl_datalen;
122 char sl_data[];
123};
124
125struct nfsd4_channel_attrs {
126 u32 headerpadsz;
127 u32 maxreq_sz;
128 u32 maxresp_sz;
129 u32 maxresp_cached;
130 u32 maxops;
131 u32 maxreqs;
132 u32 nr_rdma_attrs;
133 u32 rdma_attrs;
134};
135
136struct nfsd4_create_session {
137 clientid_t clientid;
138 struct nfs4_sessionid sessionid;
139 u32 seqid;
140 u32 flags;
141 struct nfsd4_channel_attrs fore_channel;
142 struct nfsd4_channel_attrs back_channel;
143 u32 callback_prog;
144 u32 uid;
145 u32 gid;
146};
147
148/* The single slot clientid cache structure */
149struct nfsd4_clid_slot {
150 u32 sl_seqid;
151 __be32 sl_status;
152 struct nfsd4_create_session sl_cr_ses;
153};
154
155struct nfsd4_session {
156 struct kref se_ref;
157 struct list_head se_hash; /* hash by sessionid */
158 struct list_head se_perclnt;
159 u32 se_flags;
160 struct nfs4_client *se_client; /* for expire_client */
161 struct nfs4_sessionid se_sessionid;
162 struct nfsd4_channel_attrs se_fchannel;
163 struct nfsd4_channel_attrs se_bchannel;
164 struct nfsd4_slot *se_slots[]; /* forward channel slots */
165};
166
167static inline void
168nfsd4_put_session(struct nfsd4_session *ses)
169{
170 extern void free_session(struct kref *kref);
171 kref_put(&ses->se_ref, free_session);
172}
173
174static inline void
175nfsd4_get_session(struct nfsd4_session *ses)
176{
177 kref_get(&ses->se_ref);
178}
179
180/* formatted contents of nfs4_sessionid */
181struct nfsd4_sessionid {
182 clientid_t clientid;
183 u32 sequence;
184 u32 reserved;
185};
186
187#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
188
189/*
190 * struct nfs4_client - one per client. Clientids live here.
191 * o Each nfs4_client is hashed by clientid.
192 *
193 * o Each nfs4_clients is also hashed by name
194 * (the opaque quantity initially sent by the client to identify itself).
195 *
196 * o cl_perclient list is used to ensure no dangling stateowner references
197 * when we expire the nfs4_client
198 */
199struct nfs4_client {
200 struct list_head cl_idhash; /* hash by cl_clientid.id */
201 struct list_head cl_strhash; /* hash by cl_name */
202 struct list_head cl_openowners;
203 struct list_head cl_delegations;
204 struct list_head cl_lru; /* tail queue */
205 struct xdr_netobj cl_name; /* id generated by client */
206 char cl_recdir[HEXDIR_LEN]; /* recovery dir */
207 nfs4_verifier cl_verifier; /* generated by client */
208 time_t cl_time; /* time of last lease renewal */
209 struct sockaddr_storage cl_addr; /* client ipaddress */
210 u32 cl_flavor; /* setclientid pseudoflavor */
211 char *cl_principal; /* setclientid principal name */
212 struct svc_cred cl_cred; /* setclientid principal */
213 clientid_t cl_clientid; /* generated by server */
214 nfs4_verifier cl_confirm; /* generated by server */
215 struct nfs4_cb_conn cl_cb_conn; /* callback info */
216 atomic_t cl_count; /* ref count */
217 u32 cl_firststate; /* recovery dir creation */
218
219 /* for nfs41 */
220 struct list_head cl_sessions;
221 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
222 u32 cl_exchange_flags;
223 struct nfs4_sessionid cl_sessionid;
224
225 /* for nfs41 callbacks */
226 /* We currently support a single back channel with a single slot */
227 unsigned long cl_cb_slot_busy;
228 u32 cl_cb_seq_nr;
229 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
230 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
231 /* wait here for slots */
232};
233
234/* struct nfs4_client_reset
235 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
236 * upon lease reset, or from upcall to state_daemon (to read in state
237 * from non-volitile storage) upon reboot.
238 */
239struct nfs4_client_reclaim {
240 struct list_head cr_strhash; /* hash by cr_name */
241 char cr_recdir[HEXDIR_LEN]; /* recover dir */
242};
243
244static inline void
245update_stateid(stateid_t *stateid)
246{
247 stateid->si_generation++;
248}
249
250/* A reasonable value for REPLAY_ISIZE was estimated as follows:
251 * The OPEN response, typically the largest, requires
252 * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
253 * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
254 * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
255 */
256
257#define NFSD4_REPLAY_ISIZE 112
258
259/*
260 * Replay buffer, where the result of the last seqid-mutating operation
261 * is cached.
262 */
263struct nfs4_replay {
264 __be32 rp_status;
265 unsigned int rp_buflen;
266 char *rp_buf;
267 unsigned intrp_allocated;
268 struct knfsd_fh rp_openfh;
269 char rp_ibuf[NFSD4_REPLAY_ISIZE];
270};
271
272/*
273* nfs4_stateowner can either be an open_owner, or a lock_owner
274*
275* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
276* for lock_owner
277* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
278* for lock_owner
279* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
280* struct is reaped.
281* so_perfilestate: heads the list of nfs4_stateid (either open or lock)
282* and is used to ensure no dangling nfs4_stateid references when we
283* release a stateowner.
284* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
285* close is called to reap associated byte-range locks
286* so_close_lru: (open) stateowner is placed on this list instead of being
287* reaped (when so_perfilestate is empty) to hold the last close replay.
288* reaped by laundramat thread after lease period.
289*/
290struct nfs4_stateowner {
291 struct kref so_ref;
292 struct list_head so_idhash; /* hash by so_id */
293 struct list_head so_strhash; /* hash by op_name */
294 struct list_head so_perclient;
295 struct list_head so_stateids;
296 struct list_head so_perstateid; /* for lockowners only */
297 struct list_head so_close_lru; /* tail queue */
298 time_t so_time; /* time of placement on so_close_lru */
299 int so_is_open_owner; /* 1=openowner,0=lockowner */
300 u32 so_id;
301 struct nfs4_client * so_client;
302 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
303 * sequence id expected from the client: */
304 u32 so_seqid;
305 struct xdr_netobj so_owner; /* open owner name */
306 int so_confirmed; /* successful OPEN_CONFIRM? */
307 struct nfs4_replay so_replay;
308};
309
310/*
311* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
312* o fi_perfile list is used to search for conflicting
313* share_acces, share_deny on the file.
314*/
315struct nfs4_file {
316 atomic_t fi_ref;
317 struct list_head fi_hash; /* hash by "struct inode *" */
318 struct list_head fi_stateids;
319 struct list_head fi_delegations;
320 struct inode *fi_inode;
321 u32 fi_id; /* used with stateowner->so_id
322 * for stateid_hashtbl hash */
323 bool fi_had_conflict;
324};
325
326/*
327* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
328*
329* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
330*
331* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
332* st_perfile: file_hashtbl[] entry.
333* st_perfile_state: nfs4_stateowner->so_perfilestate
334* st_perlockowner: (open stateid) list of lock nfs4_stateowners
335* st_access_bmap: used only for open stateid
336* st_deny_bmap: used only for open stateid
337* st_openstp: open stateid lock stateid was derived from
338*
339* XXX: open stateids and lock stateids have diverged sufficiently that
340* we should consider defining separate structs for the two cases.
341*/
342
343struct nfs4_stateid {
344 struct list_head st_hash;
345 struct list_head st_perfile;
346 struct list_head st_perstateowner;
347 struct list_head st_lockowners;
348 struct nfs4_stateowner * st_stateowner;
349 struct nfs4_file * st_file;
350 stateid_t st_stateid;
351 struct file * st_vfs_file;
352 unsigned long st_access_bmap;
353 unsigned long st_deny_bmap;
354 struct nfs4_stateid * st_openstp;
355};
356
357/* flags for preprocess_seqid_op() */
358#define HAS_SESSION 0x00000001
359#define CONFIRM 0x00000002
360#define OPEN_STATE 0x00000004
361#define LOCK_STATE 0x00000008
362#define RD_STATE 0x00000010
363#define WR_STATE 0x00000020
364#define CLOSE_STATE 0x00000040
365
366#define seqid_mutating_err(err) \
367 (((err) != nfserr_stale_clientid) && \
368 ((err) != nfserr_bad_seqid) && \
369 ((err) != nfserr_stale_stateid) && \
370 ((err) != nfserr_bad_stateid))
371
372struct nfsd4_compound_state;
373
374extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
375 stateid_t *stateid, int flags, struct file **filp);
376extern void nfs4_lock_state(void);
377extern void nfs4_unlock_state(void);
378extern int nfs4_in_grace(void);
379extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
380extern void put_nfs4_client(struct nfs4_client *clp);
381extern void nfs4_free_stateowner(struct kref *kref);
382extern int set_callback_cred(void);
383extern void nfsd4_probe_callback(struct nfs4_client *clp);
384extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
385extern void nfs4_put_delegation(struct nfs4_delegation *dp);
386extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
387extern void nfsd4_init_recdir(char *recdir_name);
388extern int nfsd4_recdir_load(void);
389extern void nfsd4_shutdown_recdir(void);
390extern int nfs4_client_to_reclaim(const char *name);
391extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
392extern void nfsd4_recdir_purge_old(void);
393extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
394extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
395
396static inline void
397nfs4_put_stateowner(struct nfs4_stateowner *so)
398{
399 kref_put(&so->so_ref, nfs4_free_stateowner);
400}
401
402static inline void
403nfs4_get_stateowner(struct nfs4_stateowner *so)
404{
405 kref_get(&so->so_ref);
406}
407
408#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 71944cddf680..5232d3e8fb2f 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/stats.c
3 *
4 * procfs-based user access to knfsd statistics 2 * procfs-based user access to knfsd statistics
5 * 3 *
6 * /proc/net/rpc/nfsd 4 * /proc/net/rpc/nfsd
@@ -23,18 +21,13 @@
23 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 21 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
24 */ 22 */
25 23
26#include <linux/kernel.h>
27#include <linux/time.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h> 24#include <linux/seq_file.h>
30#include <linux/stat.h>
31#include <linux/module.h> 25#include <linux/module.h>
32
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/stats.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/stats.h> 27#include <linux/nfsd/stats.h>
37 28
29#include "nfsd.h"
30
38struct nfsd_stats nfsdstats; 31struct nfsd_stats nfsdstats;
39struct svc_stat nfsd_svcstats = { 32struct svc_stat nfsd_svcstats = {
40 .program = &nfsd_program, 33 .program = &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a293f0273263..7c2e337d05af 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/vfs.c
4 *
5 * File operations used by nfsd. Some of these have been ripped from 3 * File operations used by nfsd. Some of these have been ripped from
6 * other parts of the kernel because they weren't exported, others 4 * other parts of the kernel because they weren't exported, others
7 * are partial duplicates with added or changed functionality. 5 * are partial duplicates with added or changed functionality.
@@ -16,48 +14,31 @@
16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 14 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
17 */ 15 */
18 16
19#include <linux/string.h>
20#include <linux/time.h>
21#include <linux/errno.h>
22#include <linux/fs.h> 17#include <linux/fs.h>
23#include <linux/file.h> 18#include <linux/file.h>
24#include <linux/mount.h>
25#include <linux/major.h>
26#include <linux/splice.h> 19#include <linux/splice.h>
27#include <linux/proc_fs.h>
28#include <linux/stat.h>
29#include <linux/fcntl.h> 20#include <linux/fcntl.h>
30#include <linux/net.h>
31#include <linux/unistd.h>
32#include <linux/slab.h>
33#include <linux/pagemap.h>
34#include <linux/in.h>
35#include <linux/module.h>
36#include <linux/namei.h> 21#include <linux/namei.h>
37#include <linux/vfs.h>
38#include <linux/delay.h> 22#include <linux/delay.h>
39#include <linux/sunrpc/svc.h>
40#include <linux/nfsd/nfsd.h>
41#ifdef CONFIG_NFSD_V3
42#include <linux/nfs3.h>
43#include <linux/nfsd/xdr3.h>
44#endif /* CONFIG_NFSD_V3 */
45#include <linux/nfsd/nfsfh.h>
46#include <linux/quotaops.h> 23#include <linux/quotaops.h>
47#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
48#include <linux/posix_acl.h>
49#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
50#include <linux/xattr.h> 26#include <linux/xattr.h>
27#include <linux/jhash.h>
28#include <linux/ima.h>
29#include <asm/uaccess.h>
30
31#ifdef CONFIG_NFSD_V3
32#include "xdr3.h"
33#endif /* CONFIG_NFSD_V3 */
34
51#ifdef CONFIG_NFSD_V4 35#ifdef CONFIG_NFSD_V4
52#include <linux/nfs4.h>
53#include <linux/nfs4_acl.h> 36#include <linux/nfs4_acl.h>
54#include <linux/nfsd_idmap.h> 37#include <linux/nfsd_idmap.h>
55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 38#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
58#include <linux/ima.h>
59 39
60#include <asm/uaccess.h> 40#include "nfsd.h"
41#include "vfs.h"
61 42
62#define NFSDDBG_FACILITY NFSDDBG_FILEOP 43#define NFSDDBG_FACILITY NFSDDBG_FILEOP
63 44
@@ -89,12 +70,6 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 70#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 71static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 72
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
98/* 73/*
99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 74 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
100 * a mount point. 75 * a mount point.
@@ -116,8 +91,16 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
116 91
117 exp2 = rqst_exp_get_by_name(rqstp, &path); 92 exp2 = rqst_exp_get_by_name(rqstp, &path);
118 if (IS_ERR(exp2)) { 93 if (IS_ERR(exp2)) {
119 if (PTR_ERR(exp2) != -ENOENT) 94 err = PTR_ERR(exp2);
120 err = PTR_ERR(exp2); 95 /*
96 * We normally allow NFS clients to continue
97 * "underneath" a mountpoint that is not exported.
98 * The exception is V4ROOT, where no traversal is ever
99 * allowed without an explicit export of the new
100 * directory.
101 */
102 if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
103 err = 0;
121 path_put(&path); 104 path_put(&path);
122 goto out; 105 goto out;
123 } 106 }
@@ -141,6 +124,53 @@ out:
141 return err; 124 return err;
142} 125}
143 126
127static void follow_to_parent(struct path *path)
128{
129 struct dentry *dp;
130
131 while (path->dentry == path->mnt->mnt_root && follow_up(path))
132 ;
133 dp = dget_parent(path->dentry);
134 dput(path->dentry);
135 path->dentry = dp;
136}
137
138static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
139{
140 struct svc_export *exp2;
141 struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
142 .dentry = dget(dparent)};
143
144 follow_to_parent(&path);
145
146 exp2 = rqst_exp_parent(rqstp, &path);
147 if (PTR_ERR(exp2) == -ENOENT) {
148 *dentryp = dget(dparent);
149 } else if (IS_ERR(exp2)) {
150 path_put(&path);
151 return PTR_ERR(exp2);
152 } else {
153 *dentryp = dget(path.dentry);
154 exp_put(*exp);
155 *exp = exp2;
156 }
157 path_put(&path);
158 return 0;
159}
160
161/*
162 * For nfsd purposes, we treat V4ROOT exports as though there was an
163 * export at *every* directory.
164 */
165int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
166{
167 if (d_mountpoint(dentry))
168 return 1;
169 if (!(exp->ex_flags & NFSEXP_V4ROOT))
170 return 0;
171 return dentry->d_inode != NULL;
172}
173
144__be32 174__be32
145nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, 175nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
146 const char *name, unsigned int len, 176 const char *name, unsigned int len,
@@ -169,35 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
169 dentry = dget(dparent); 199 dentry = dget(dparent);
170 else if (dparent != exp->ex_path.dentry) 200 else if (dparent != exp->ex_path.dentry)
171 dentry = dget_parent(dparent); 201 dentry = dget_parent(dparent);
172 else if (!EX_NOHIDE(exp)) 202 else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
173 dentry = dget(dparent); /* .. == . just like at / */ 203 dentry = dget(dparent); /* .. == . just like at / */
174 else { 204 else {
175 /* checking mountpoint crossing is very different when stepping up */ 205 /* checking mountpoint crossing is very different when stepping up */
176 struct svc_export *exp2 = NULL; 206 host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
177 struct dentry *dp; 207 if (host_err)
178 struct path path = {.mnt = mntget(exp->ex_path.mnt),
179 .dentry = dget(dparent)};
180
181 while (path.dentry == path.mnt->mnt_root &&
182 follow_up(&path))
183 ;
184 dp = dget_parent(path.dentry);
185 dput(path.dentry);
186 path.dentry = dp;
187
188 exp2 = rqst_exp_parent(rqstp, &path);
189 if (PTR_ERR(exp2) == -ENOENT) {
190 dentry = dget(dparent);
191 } else if (IS_ERR(exp2)) {
192 host_err = PTR_ERR(exp2);
193 path_put(&path);
194 goto out_nfserr; 208 goto out_nfserr;
195 } else {
196 dentry = dget(path.dentry);
197 exp_put(exp);
198 exp = exp2;
199 }
200 path_put(&path);
201 } 209 }
202 } else { 210 } else {
203 fh_lock(fhp); 211 fh_lock(fhp);
@@ -208,7 +216,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
208 /* 216 /*
209 * check if we have crossed a mount point ... 217 * check if we have crossed a mount point ...
210 */ 218 */
211 if (d_mountpoint(dentry)) { 219 if (nfsd_mountpoint(dentry, exp)) {
212 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 220 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
213 dput(dentry); 221 dput(dentry);
214 goto out_nfserr; 222 goto out_nfserr;
@@ -744,8 +752,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
744 flags, current_cred()); 752 flags, current_cred());
745 if (IS_ERR(*filp)) 753 if (IS_ERR(*filp))
746 host_err = PTR_ERR(*filp); 754 host_err = PTR_ERR(*filp);
747 else
748 ima_counts_get(*filp);
749out_nfserr: 755out_nfserr:
750 err = nfserrno(host_err); 756 err = nfserrno(host_err);
751out: 757out:
@@ -2124,8 +2130,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2124 */ 2130 */
2125 path.mnt = exp->ex_path.mnt; 2131 path.mnt = exp->ex_path.mnt;
2126 path.dentry = dentry; 2132 path.dentry = dentry;
2127 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC), 2133 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
2128 IMA_COUNT_LEAVE);
2129nfsd_out: 2134nfsd_out:
2130 return err? nfserrno(err) : 0; 2135 return err? nfserrno(err) : 0;
2131} 2136}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 000000000000..4b1de0a9ea75
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
3 */
4
5#ifndef LINUX_NFSD_VFS_H
6#define LINUX_NFSD_VFS_H
7
8#include "nfsfh.h"
9
10/*
11 * Flags for nfsd_permission
12 */
13#define NFSD_MAY_NOP 0
14#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
15#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
16#define NFSD_MAY_READ 4 /* == MAY_READ */
17#define NFSD_MAY_SATTR 8
18#define NFSD_MAY_TRUNC 16
19#define NFSD_MAY_LOCK 32
20#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23
24#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
25#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
26
27/*
28 * Callback function for readdir
29 */
30typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
31
32/* nfsd/vfs.c */
33int fh_lock_parent(struct svc_fh *, struct dentry *);
34int nfsd_racache_init(int);
35void nfsd_racache_shutdown(void);
36int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
37 struct svc_export **expp);
38__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
39 const char *, unsigned int, struct svc_fh *);
40__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
41 const char *, unsigned int,
42 struct svc_export **, struct dentry **);
43__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
44 struct iattr *, int, time_t);
45int nfsd_mountpoint(struct dentry *, struct svc_export *);
46#ifdef CONFIG_NFSD_V4
47__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
48 struct nfs4_acl *);
49int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
50#endif /* CONFIG_NFSD_V4 */
51__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
52 char *name, int len, struct iattr *attrs,
53 int type, dev_t rdev, struct svc_fh *res);
54#ifdef CONFIG_NFSD_V3
55__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
56__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
57 char *name, int len, struct iattr *attrs,
58 struct svc_fh *res, int createmode,
59 u32 *verifier, int *truncp, int *created);
60__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
61 loff_t, unsigned long);
62#endif /* CONFIG_NFSD_V3 */
63__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int,
64 int, struct file **);
65void nfsd_close(struct file *);
66__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
67 loff_t, struct kvec *, int, unsigned long *);
68__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
69 loff_t, struct kvec *,int, unsigned long *, int *);
70__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
71 char *, int *);
72__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
73 char *name, int len, char *path, int plen,
74 struct svc_fh *res, struct iattr *);
75__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
76 char *, int, struct svc_fh *);
77__be32 nfsd_rename(struct svc_rqst *,
78 struct svc_fh *, char *, int,
79 struct svc_fh *, char *, int);
80__be32 nfsd_remove(struct svc_rqst *,
81 struct svc_fh *, char *, int);
82__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
83 char *name, int len);
84int nfsd_truncate(struct svc_rqst *, struct svc_fh *,
85 unsigned long size);
86__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
87 loff_t *, struct readdir_cd *, filldir_t);
88__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
89 struct kstatfs *, int access);
90
91int nfsd_notify_change(struct inode *, struct iattr *);
92__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
93 struct dentry *, int);
94int nfsd_sync_dir(struct dentry *dp);
95
96#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
97struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
98int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
99#endif
100
101#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 000000000000..53b1863dd8f6
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,173 @@
1/* XDR types for nfsd. This is mainly a typing exercise. */
2
3#ifndef LINUX_NFSD_H
4#define LINUX_NFSD_H
5
6#include <linux/vfs.h>
7#include "nfsd.h"
8#include "nfsfh.h"
9
10struct nfsd_fhandle {
11 struct svc_fh fh;
12};
13
14struct nfsd_sattrargs {
15 struct svc_fh fh;
16 struct iattr attrs;
17};
18
19struct nfsd_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd_readargs {
26 struct svc_fh fh;
27 __u32 offset;
28 __u32 count;
29 int vlen;
30};
31
32struct nfsd_writeargs {
33 svc_fh fh;
34 __u32 offset;
35 int len;
36 int vlen;
37};
38
39struct nfsd_createargs {
40 struct svc_fh fh;
41 char * name;
42 unsigned int len;
43 struct iattr attrs;
44};
45
46struct nfsd_renameargs {
47 struct svc_fh ffh;
48 char * fname;
49 unsigned int flen;
50 struct svc_fh tfh;
51 char * tname;
52 unsigned int tlen;
53};
54
55struct nfsd_readlinkargs {
56 struct svc_fh fh;
57 char * buffer;
58};
59
60struct nfsd_linkargs {
61 struct svc_fh ffh;
62 struct svc_fh tfh;
63 char * tname;
64 unsigned int tlen;
65};
66
67struct nfsd_symlinkargs {
68 struct svc_fh ffh;
69 char * fname;
70 unsigned int flen;
71 char * tname;
72 unsigned int tlen;
73 struct iattr attrs;
74};
75
76struct nfsd_readdirargs {
77 struct svc_fh fh;
78 __u32 cookie;
79 __u32 count;
80 __be32 * buffer;
81};
82
83struct nfsd_attrstat {
84 struct svc_fh fh;
85 struct kstat stat;
86};
87
88struct nfsd_diropres {
89 struct svc_fh fh;
90 struct kstat stat;
91};
92
93struct nfsd_readlinkres {
94 int len;
95};
96
97struct nfsd_readres {
98 struct svc_fh fh;
99 unsigned long count;
100 struct kstat stat;
101};
102
103struct nfsd_readdirres {
104 int count;
105
106 struct readdir_cd common;
107 __be32 * buffer;
108 int buflen;
109 __be32 * offset;
110};
111
112struct nfsd_statfsres {
113 struct kstatfs stats;
114};
115
116/*
117 * Storage requirements for XDR arguments and results.
118 */
119union nfsd_xdrstore {
120 struct nfsd_sattrargs sattr;
121 struct nfsd_diropargs dirop;
122 struct nfsd_readargs read;
123 struct nfsd_writeargs write;
124 struct nfsd_createargs create;
125 struct nfsd_renameargs rename;
126 struct nfsd_linkargs link;
127 struct nfsd_symlinkargs symlink;
128 struct nfsd_readdirargs readdir;
129};
130
131#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
132
133
134int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
135int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
136int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
137 struct nfsd_sattrargs *);
138int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
139 struct nfsd_diropargs *);
140int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
141 struct nfsd_readargs *);
142int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
143 struct nfsd_writeargs *);
144int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
145 struct nfsd_createargs *);
146int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
147 struct nfsd_renameargs *);
148int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
149 struct nfsd_readlinkargs *);
150int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
151 struct nfsd_linkargs *);
152int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
153 struct nfsd_symlinkargs *);
154int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
155 struct nfsd_readdirargs *);
156int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
157int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
158int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
159int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
160int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
161int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
162int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
163
164int nfssvc_encode_entry(void *, const char *name,
165 int namlen, loff_t offset, u64 ino, unsigned int);
166
167int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
168
169/* Helper functions for NFSv2 ACL code */
170__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
171__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
172
173#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 000000000000..7df980eb0562
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,344 @@
1/*
2 * XDR types for NFSv3 in nfsd.
3 *
4 * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
5 */
6
7#ifndef _LINUX_NFSD_XDR3_H
8#define _LINUX_NFSD_XDR3_H
9
10#include "xdr.h"
11
12struct nfsd3_sattrargs {
13 struct svc_fh fh;
14 struct iattr attrs;
15 int check_guard;
16 time_t guardtime;
17};
18
19struct nfsd3_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd3_accessargs {
26 struct svc_fh fh;
27 unsigned int access;
28};
29
30struct nfsd3_readargs {
31 struct svc_fh fh;
32 __u64 offset;
33 __u32 count;
34 int vlen;
35};
36
37struct nfsd3_writeargs {
38 svc_fh fh;
39 __u64 offset;
40 __u32 count;
41 int stable;
42 __u32 len;
43 int vlen;
44};
45
46struct nfsd3_createargs {
47 struct svc_fh fh;
48 char * name;
49 unsigned int len;
50 int createmode;
51 struct iattr attrs;
52 __be32 * verf;
53};
54
55struct nfsd3_mknodargs {
56 struct svc_fh fh;
57 char * name;
58 unsigned int len;
59 __u32 ftype;
60 __u32 major, minor;
61 struct iattr attrs;
62};
63
64struct nfsd3_renameargs {
65 struct svc_fh ffh;
66 char * fname;
67 unsigned int flen;
68 struct svc_fh tfh;
69 char * tname;
70 unsigned int tlen;
71};
72
73struct nfsd3_readlinkargs {
74 struct svc_fh fh;
75 char * buffer;
76};
77
78struct nfsd3_linkargs {
79 struct svc_fh ffh;
80 struct svc_fh tfh;
81 char * tname;
82 unsigned int tlen;
83};
84
85struct nfsd3_symlinkargs {
86 struct svc_fh ffh;
87 char * fname;
88 unsigned int flen;
89 char * tname;
90 unsigned int tlen;
91 struct iattr attrs;
92};
93
94struct nfsd3_readdirargs {
95 struct svc_fh fh;
96 __u64 cookie;
97 __u32 dircount;
98 __u32 count;
99 __be32 * verf;
100 __be32 * buffer;
101};
102
103struct nfsd3_commitargs {
104 struct svc_fh fh;
105 __u64 offset;
106 __u32 count;
107};
108
109struct nfsd3_getaclargs {
110 struct svc_fh fh;
111 int mask;
112};
113
114struct posix_acl;
115struct nfsd3_setaclargs {
116 struct svc_fh fh;
117 int mask;
118 struct posix_acl *acl_access;
119 struct posix_acl *acl_default;
120};
121
122struct nfsd3_attrstat {
123 __be32 status;
124 struct svc_fh fh;
125 struct kstat stat;
126};
127
128/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
129struct nfsd3_diropres {
130 __be32 status;
131 struct svc_fh dirfh;
132 struct svc_fh fh;
133};
134
135struct nfsd3_accessres {
136 __be32 status;
137 struct svc_fh fh;
138 __u32 access;
139};
140
141struct nfsd3_readlinkres {
142 __be32 status;
143 struct svc_fh fh;
144 __u32 len;
145};
146
147struct nfsd3_readres {
148 __be32 status;
149 struct svc_fh fh;
150 unsigned long count;
151 int eof;
152};
153
154struct nfsd3_writeres {
155 __be32 status;
156 struct svc_fh fh;
157 unsigned long count;
158 int committed;
159};
160
161struct nfsd3_renameres {
162 __be32 status;
163 struct svc_fh ffh;
164 struct svc_fh tfh;
165};
166
167struct nfsd3_linkres {
168 __be32 status;
169 struct svc_fh tfh;
170 struct svc_fh fh;
171};
172
173struct nfsd3_readdirres {
174 __be32 status;
175 struct svc_fh fh;
176 int count;
177 __be32 verf[2];
178
179 struct readdir_cd common;
180 __be32 * buffer;
181 int buflen;
182 __be32 * offset;
183 __be32 * offset1;
184 struct svc_rqst * rqstp;
185
186};
187
188struct nfsd3_fsstatres {
189 __be32 status;
190 struct kstatfs stats;
191 __u32 invarsec;
192};
193
194struct nfsd3_fsinfores {
195 __be32 status;
196 __u32 f_rtmax;
197 __u32 f_rtpref;
198 __u32 f_rtmult;
199 __u32 f_wtmax;
200 __u32 f_wtpref;
201 __u32 f_wtmult;
202 __u32 f_dtpref;
203 __u64 f_maxfilesize;
204 __u32 f_properties;
205};
206
207struct nfsd3_pathconfres {
208 __be32 status;
209 __u32 p_link_max;
210 __u32 p_name_max;
211 __u32 p_no_trunc;
212 __u32 p_chown_restricted;
213 __u32 p_case_insensitive;
214 __u32 p_case_preserving;
215};
216
217struct nfsd3_commitres {
218 __be32 status;
219 struct svc_fh fh;
220};
221
222struct nfsd3_getaclres {
223 __be32 status;
224 struct svc_fh fh;
225 int mask;
226 struct posix_acl *acl_access;
227 struct posix_acl *acl_default;
228};
229
230/* dummy type for release */
231struct nfsd3_fhandle_pair {
232 __u32 dummy;
233 struct svc_fh fh1;
234 struct svc_fh fh2;
235};
236
237/*
238 * Storage requirements for XDR arguments and results.
239 */
240union nfsd3_xdrstore {
241 struct nfsd3_sattrargs sattrargs;
242 struct nfsd3_diropargs diropargs;
243 struct nfsd3_readargs readargs;
244 struct nfsd3_writeargs writeargs;
245 struct nfsd3_createargs createargs;
246 struct nfsd3_renameargs renameargs;
247 struct nfsd3_linkargs linkargs;
248 struct nfsd3_symlinkargs symlinkargs;
249 struct nfsd3_readdirargs readdirargs;
250 struct nfsd3_diropres diropres;
251 struct nfsd3_accessres accessres;
252 struct nfsd3_readlinkres readlinkres;
253 struct nfsd3_readres readres;
254 struct nfsd3_writeres writeres;
255 struct nfsd3_renameres renameres;
256 struct nfsd3_linkres linkres;
257 struct nfsd3_readdirres readdirres;
258 struct nfsd3_fsstatres fsstatres;
259 struct nfsd3_fsinfores fsinfores;
260 struct nfsd3_pathconfres pathconfres;
261 struct nfsd3_commitres commitres;
262 struct nfsd3_getaclres getaclres;
263};
264
265#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
266
267int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
268int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
269 struct nfsd3_sattrargs *);
270int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
271 struct nfsd3_diropargs *);
272int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
273 struct nfsd3_accessargs *);
274int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
275 struct nfsd3_readargs *);
276int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
277 struct nfsd3_writeargs *);
278int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
279 struct nfsd3_createargs *);
280int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
281 struct nfsd3_createargs *);
282int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
283 struct nfsd3_mknodargs *);
284int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
285 struct nfsd3_renameargs *);
286int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
287 struct nfsd3_readlinkargs *);
288int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
289 struct nfsd3_linkargs *);
290int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
291 struct nfsd3_symlinkargs *);
292int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
293 struct nfsd3_readdirargs *);
294int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
295 struct nfsd3_readdirargs *);
296int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
297 struct nfsd3_commitargs *);
298int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
299int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
300 struct nfsd3_attrstat *);
301int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
302 struct nfsd3_attrstat *);
303int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
304 struct nfsd3_diropres *);
305int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
306 struct nfsd3_accessres *);
307int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
308 struct nfsd3_readlinkres *);
309int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
310int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
311int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
312 struct nfsd3_diropres *);
313int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
314 struct nfsd3_renameres *);
315int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
316 struct nfsd3_linkres *);
317int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
318 struct nfsd3_readdirres *);
319int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
320 struct nfsd3_fsstatres *);
321int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
322 struct nfsd3_fsinfores *);
323int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
324 struct nfsd3_pathconfres *);
325int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
326 struct nfsd3_commitres *);
327
328int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
329 struct nfsd3_attrstat *);
330int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
331 struct nfsd3_fhandle_pair *);
332int nfs3svc_encode_entry(void *, const char *name,
333 int namlen, loff_t offset, u64 ino,
334 unsigned int);
335int nfs3svc_encode_entry_plus(void *, const char *name,
336 int namlen, loff_t offset, u64 ino,
337 unsigned int);
338/* Helper functions for NFSv3 ACL code */
339__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
340 struct svc_fh *fhp);
341__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
342
343
344#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 000000000000..efa337739534
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,562 @@
1/*
2 * Server-side types for NFSv4.
3 *
4 * Copyright (c) 2002 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Kendrick Smith <kmsmith@umich.edu>
8 * Andy Adamson <andros@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#ifndef _LINUX_NFSD_XDR4_H
38#define _LINUX_NFSD_XDR4_H
39
40#include "state.h"
41#include "nfsd.h"
42
43#define NFSD4_MAX_TAGLEN 128
44#define XDR_LEN(n) (((n) + 3) & ~3)
45
46struct nfsd4_compound_state {
47 struct svc_fh current_fh;
48 struct svc_fh save_fh;
49 struct nfs4_stateowner *replay_owner;
50 /* For sessions DRC */
51 struct nfsd4_session *session;
52 struct nfsd4_slot *slot;
53 __be32 *datap;
54 size_t iovlen;
55 u32 minorversion;
56 u32 status;
57};
58
59static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
60{
61 return cs->slot != NULL;
62}
63
64struct nfsd4_change_info {
65 u32 atomic;
66 bool change_supported;
67 u32 before_ctime_sec;
68 u32 before_ctime_nsec;
69 u64 before_change;
70 u32 after_ctime_sec;
71 u32 after_ctime_nsec;
72 u64 after_change;
73};
74
75struct nfsd4_access {
76 u32 ac_req_access; /* request */
77 u32 ac_supported; /* response */
78 u32 ac_resp_access; /* response */
79};
80
81struct nfsd4_close {
82 u32 cl_seqid; /* request */
83 stateid_t cl_stateid; /* request+response */
84 struct nfs4_stateowner * cl_stateowner; /* response */
85};
86
87struct nfsd4_commit {
88 u64 co_offset; /* request */
89 u32 co_count; /* request */
90 nfs4_verifier co_verf; /* response */
91};
92
93struct nfsd4_create {
94 u32 cr_namelen; /* request */
95 char * cr_name; /* request */
96 u32 cr_type; /* request */
97 union { /* request */
98 struct {
99 u32 namelen;
100 char *name;
101 } link; /* NF4LNK */
102 struct {
103 u32 specdata1;
104 u32 specdata2;
105 } dev; /* NF4BLK, NF4CHR */
106 } u;
107 u32 cr_bmval[3]; /* request */
108 struct iattr cr_iattr; /* request */
109 struct nfsd4_change_info cr_cinfo; /* response */
110 struct nfs4_acl *cr_acl;
111};
112#define cr_linklen u.link.namelen
113#define cr_linkname u.link.name
114#define cr_specdata1 u.dev.specdata1
115#define cr_specdata2 u.dev.specdata2
116
117struct nfsd4_delegreturn {
118 stateid_t dr_stateid;
119};
120
121struct nfsd4_getattr {
122 u32 ga_bmval[3]; /* request */
123 struct svc_fh *ga_fhp; /* response */
124};
125
126struct nfsd4_link {
127 u32 li_namelen; /* request */
128 char * li_name; /* request */
129 struct nfsd4_change_info li_cinfo; /* response */
130};
131
132struct nfsd4_lock_denied {
133 clientid_t ld_clientid;
134 struct nfs4_stateowner *ld_sop;
135 u64 ld_start;
136 u64 ld_length;
137 u32 ld_type;
138};
139
140struct nfsd4_lock {
141 /* request */
142 u32 lk_type;
143 u32 lk_reclaim; /* boolean */
144 u64 lk_offset;
145 u64 lk_length;
146 u32 lk_is_new;
147 union {
148 struct {
149 u32 open_seqid;
150 stateid_t open_stateid;
151 u32 lock_seqid;
152 clientid_t clientid;
153 struct xdr_netobj owner;
154 } new;
155 struct {
156 stateid_t lock_stateid;
157 u32 lock_seqid;
158 } old;
159 } v;
160
161 /* response */
162 union {
163 struct {
164 stateid_t stateid;
165 } ok;
166 struct nfsd4_lock_denied denied;
167 } u;
168 /* The lk_replay_owner is the open owner in the open_to_lock_owner
169 * case and the lock owner otherwise: */
170 struct nfs4_stateowner *lk_replay_owner;
171};
172#define lk_new_open_seqid v.new.open_seqid
173#define lk_new_open_stateid v.new.open_stateid
174#define lk_new_lock_seqid v.new.lock_seqid
175#define lk_new_clientid v.new.clientid
176#define lk_new_owner v.new.owner
177#define lk_old_lock_stateid v.old.lock_stateid
178#define lk_old_lock_seqid v.old.lock_seqid
179
180#define lk_rflags u.ok.rflags
181#define lk_resp_stateid u.ok.stateid
182#define lk_denied u.denied
183
184
185struct nfsd4_lockt {
186 u32 lt_type;
187 clientid_t lt_clientid;
188 struct xdr_netobj lt_owner;
189 u64 lt_offset;
190 u64 lt_length;
191 struct nfs4_stateowner * lt_stateowner;
192 struct nfsd4_lock_denied lt_denied;
193};
194
195
196struct nfsd4_locku {
197 u32 lu_type;
198 u32 lu_seqid;
199 stateid_t lu_stateid;
200 u64 lu_offset;
201 u64 lu_length;
202 struct nfs4_stateowner *lu_stateowner;
203};
204
205
206struct nfsd4_lookup {
207 u32 lo_len; /* request */
208 char * lo_name; /* request */
209};
210
211struct nfsd4_putfh {
212 u32 pf_fhlen; /* request */
213 char *pf_fhval; /* request */
214};
215
216struct nfsd4_open {
217 u32 op_claim_type; /* request */
218 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
219 u32 op_delegate_type; /* request - CLAIM_PREV only */
220 stateid_t op_delegate_stateid; /* request - response */
221 u32 op_create; /* request */
222 u32 op_createmode; /* request */
223 u32 op_bmval[3]; /* request */
224 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
225 nfs4_verifier verf; /* EXCLUSIVE4 */
226 clientid_t op_clientid; /* request */
227 struct xdr_netobj op_owner; /* request */
228 u32 op_seqid; /* request */
229 u32 op_share_access; /* request */
230 u32 op_share_deny; /* request */
231 stateid_t op_stateid; /* response */
232 u32 op_recall; /* recall */
233 struct nfsd4_change_info op_cinfo; /* response */
234 u32 op_rflags; /* response */
235 int op_truncate; /* used during processing */
236 struct nfs4_stateowner *op_stateowner; /* used during processing */
237 struct nfs4_acl *op_acl;
238};
239#define op_iattr iattr
240#define op_verf verf
241
242struct nfsd4_open_confirm {
243 stateid_t oc_req_stateid /* request */;
244 u32 oc_seqid /* request */;
245 stateid_t oc_resp_stateid /* response */;
246 struct nfs4_stateowner * oc_stateowner; /* response */
247};
248
249struct nfsd4_open_downgrade {
250 stateid_t od_stateid;
251 u32 od_seqid;
252 u32 od_share_access;
253 u32 od_share_deny;
254 struct nfs4_stateowner *od_stateowner;
255};
256
257
258struct nfsd4_read {
259 stateid_t rd_stateid; /* request */
260 u64 rd_offset; /* request */
261 u32 rd_length; /* request */
262 int rd_vlen;
263 struct file *rd_filp;
264
265 struct svc_rqst *rd_rqstp; /* response */
266 struct svc_fh * rd_fhp; /* response */
267};
268
269struct nfsd4_readdir {
270 u64 rd_cookie; /* request */
271 nfs4_verifier rd_verf; /* request */
272 u32 rd_dircount; /* request */
273 u32 rd_maxcount; /* request */
274 u32 rd_bmval[3]; /* request */
275 struct svc_rqst *rd_rqstp; /* response */
276 struct svc_fh * rd_fhp; /* response */
277
278 struct readdir_cd common;
279 __be32 * buffer;
280 int buflen;
281 __be32 * offset;
282};
283
284struct nfsd4_release_lockowner {
285 clientid_t rl_clientid;
286 struct xdr_netobj rl_owner;
287};
288struct nfsd4_readlink {
289 struct svc_rqst *rl_rqstp; /* request */
290 struct svc_fh * rl_fhp; /* request */
291};
292
293struct nfsd4_remove {
294 u32 rm_namelen; /* request */
295 char * rm_name; /* request */
296 struct nfsd4_change_info rm_cinfo; /* response */
297};
298
299struct nfsd4_rename {
300 u32 rn_snamelen; /* request */
301 char * rn_sname; /* request */
302 u32 rn_tnamelen; /* request */
303 char * rn_tname; /* request */
304 struct nfsd4_change_info rn_sinfo; /* response */
305 struct nfsd4_change_info rn_tinfo; /* response */
306};
307
308struct nfsd4_secinfo {
309 u32 si_namelen; /* request */
310 char *si_name; /* request */
311 struct svc_export *si_exp; /* response */
312};
313
314struct nfsd4_setattr {
315 stateid_t sa_stateid; /* request */
316 u32 sa_bmval[3]; /* request */
317 struct iattr sa_iattr; /* request */
318 struct nfs4_acl *sa_acl;
319};
320
321struct nfsd4_setclientid {
322 nfs4_verifier se_verf; /* request */
323 u32 se_namelen; /* request */
324 char * se_name; /* request */
325 u32 se_callback_prog; /* request */
326 u32 se_callback_netid_len; /* request */
327 char * se_callback_netid_val; /* request */
328 u32 se_callback_addr_len; /* request */
329 char * se_callback_addr_val; /* request */
330 u32 se_callback_ident; /* request */
331 clientid_t se_clientid; /* response */
332 nfs4_verifier se_confirm; /* response */
333};
334
335struct nfsd4_setclientid_confirm {
336 clientid_t sc_clientid;
337 nfs4_verifier sc_confirm;
338};
339
340/* also used for NVERIFY */
341struct nfsd4_verify {
342 u32 ve_bmval[3]; /* request */
343 u32 ve_attrlen; /* request */
344 char * ve_attrval; /* request */
345};
346
347struct nfsd4_write {
348 stateid_t wr_stateid; /* request */
349 u64 wr_offset; /* request */
350 u32 wr_stable_how; /* request */
351 u32 wr_buflen; /* request */
352 int wr_vlen;
353
354 u32 wr_bytes_written; /* response */
355 u32 wr_how_written; /* response */
356 nfs4_verifier wr_verifier; /* response */
357};
358
359struct nfsd4_exchange_id {
360 nfs4_verifier verifier;
361 struct xdr_netobj clname;
362 u32 flags;
363 clientid_t clientid;
364 u32 seqid;
365 int spa_how;
366};
367
368struct nfsd4_sequence {
369 struct nfs4_sessionid sessionid; /* request/response */
370 u32 seqid; /* request/response */
371 u32 slotid; /* request/response */
372 u32 maxslots; /* request/response */
373 u32 cachethis; /* request */
374#if 0
375 u32 target_maxslots; /* response */
376 u32 status_flags; /* response */
377#endif /* not yet */
378};
379
380struct nfsd4_destroy_session {
381 struct nfs4_sessionid sessionid;
382};
383
384struct nfsd4_op {
385 int opnum;
386 __be32 status;
387 union {
388 struct nfsd4_access access;
389 struct nfsd4_close close;
390 struct nfsd4_commit commit;
391 struct nfsd4_create create;
392 struct nfsd4_delegreturn delegreturn;
393 struct nfsd4_getattr getattr;
394 struct svc_fh * getfh;
395 struct nfsd4_link link;
396 struct nfsd4_lock lock;
397 struct nfsd4_lockt lockt;
398 struct nfsd4_locku locku;
399 struct nfsd4_lookup lookup;
400 struct nfsd4_verify nverify;
401 struct nfsd4_open open;
402 struct nfsd4_open_confirm open_confirm;
403 struct nfsd4_open_downgrade open_downgrade;
404 struct nfsd4_putfh putfh;
405 struct nfsd4_read read;
406 struct nfsd4_readdir readdir;
407 struct nfsd4_readlink readlink;
408 struct nfsd4_remove remove;
409 struct nfsd4_rename rename;
410 clientid_t renew;
411 struct nfsd4_secinfo secinfo;
412 struct nfsd4_setattr setattr;
413 struct nfsd4_setclientid setclientid;
414 struct nfsd4_setclientid_confirm setclientid_confirm;
415 struct nfsd4_verify verify;
416 struct nfsd4_write write;
417 struct nfsd4_release_lockowner release_lockowner;
418
419 /* NFSv4.1 */
420 struct nfsd4_exchange_id exchange_id;
421 struct nfsd4_create_session create_session;
422 struct nfsd4_destroy_session destroy_session;
423 struct nfsd4_sequence sequence;
424 } u;
425 struct nfs4_replay * replay;
426};
427
428struct nfsd4_compoundargs {
429 /* scratch variables for XDR decode */
430 __be32 * p;
431 __be32 * end;
432 struct page ** pagelist;
433 int pagelen;
434 __be32 tmp[8];
435 __be32 * tmpp;
436 struct tmpbuf {
437 struct tmpbuf *next;
438 void (*release)(const void *);
439 void *buf;
440 } *to_free;
441
442 struct svc_rqst *rqstp;
443
444 u32 taglen;
445 char * tag;
446 u32 minorversion;
447 u32 opcnt;
448 struct nfsd4_op *ops;
449 struct nfsd4_op iops[8];
450};
451
452struct nfsd4_compoundres {
453 /* scratch variables for XDR encode */
454 __be32 * p;
455 __be32 * end;
456 struct xdr_buf * xbuf;
457 struct svc_rqst * rqstp;
458
459 u32 taglen;
460 char * tag;
461 u32 opcnt;
462 __be32 * tagp; /* tag, opcount encode location */
463 struct nfsd4_compound_state cstate;
464};
465
466static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
467{
468 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
469 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
470}
471
472static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
473{
474 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
475}
476
477#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
478
479static inline void
480set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
481{
482 BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
483 cinfo->atomic = 1;
484 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
485 if (cinfo->change_supported) {
486 cinfo->before_change = fhp->fh_pre_change;
487 cinfo->after_change = fhp->fh_post_change;
488 } else {
489 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
490 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
491 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
492 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
493 }
494}
495
496int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
497int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
498 struct nfsd4_compoundargs *);
499int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
500 struct nfsd4_compoundres *);
501void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
502void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
503__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
504 struct dentry *dentry, __be32 *buffer, int *countp,
505 u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
506extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
507 struct nfsd4_compound_state *,
508 struct nfsd4_setclientid *setclid);
509extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
510 struct nfsd4_compound_state *,
511 struct nfsd4_setclientid_confirm *setclientid_confirm);
512extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
513extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
514 struct nfsd4_sequence *seq);
515extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
516 struct nfsd4_compound_state *,
517struct nfsd4_exchange_id *);
518 extern __be32 nfsd4_create_session(struct svc_rqst *,
519 struct nfsd4_compound_state *,
520 struct nfsd4_create_session *);
521extern __be32 nfsd4_sequence(struct svc_rqst *,
522 struct nfsd4_compound_state *,
523 struct nfsd4_sequence *);
524extern __be32 nfsd4_destroy_session(struct svc_rqst *,
525 struct nfsd4_compound_state *,
526 struct nfsd4_destroy_session *);
527extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
528 struct nfsd4_open *open);
529extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
530 struct svc_fh *current_fh, struct nfsd4_open *open);
531extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
532 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
533extern __be32 nfsd4_close(struct svc_rqst *rqstp,
534 struct nfsd4_compound_state *,
535 struct nfsd4_close *close);
536extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
537 struct nfsd4_compound_state *,
538 struct nfsd4_open_downgrade *od);
539extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
540 struct nfsd4_lock *lock);
541extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
542 struct nfsd4_compound_state *,
543 struct nfsd4_lockt *lockt);
544extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
545 struct nfsd4_compound_state *,
546 struct nfsd4_locku *locku);
547extern __be32
548nfsd4_release_lockowner(struct svc_rqst *rqstp,
549 struct nfsd4_compound_state *,
550 struct nfsd4_release_lockowner *rlockowner);
551extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
552extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
553 struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
554extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
555 struct nfsd4_compound_state *, clientid_t *clid);
556#endif
557
558/*
559 * Local variables:
560 * c-basic-offset: 8
561 * End:
562 */
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b2166..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support (EXPERIMENTAL)"
3 depends on EXPERIMENTAL 3 depends on EXPERIMENTAL
4 select CRC32 4 select CRC32
5 select FS_JOURNAL_INFO
6 help 5 help
7 NILFS2 is a log-structured file system (LFS) supporting continuous 6 NILFS2 is a log-structured file system (LFS) supporting continuous
8 snapshotting. In addition to versioning capability of the entire 7 snapshotting. In addition to versioning capability of the entire
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5403b3ef3a42..8173faee31e6 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1118,8 +1118,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1118 /* Abandoning the newly allocated superblock */ 1118 /* Abandoning the newly allocated superblock */
1119 mutex_unlock(&nilfs->ns_mount_mutex); 1119 mutex_unlock(&nilfs->ns_mount_mutex);
1120 put_nilfs(nilfs); 1120 put_nilfs(nilfs);
1121 up_write(&s->s_umount); 1121 deactivate_locked_super(s);
1122 deactivate_super(s);
1123 /* 1122 /*
1124 * deactivate_super() invokes close_bdev_exclusive(). 1123 * deactivate_super() invokes close_bdev_exclusive().
1125 * We must finish all post-cleaning before this call; 1124 * We must finish all post-cleaning before this call;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 5ef5f365a5c8..8271cf05c957 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
646 struct fsnotify_group *group; 646 struct fsnotify_group *group;
647 struct user_struct *user; 647 struct user_struct *user;
648 struct file *filp; 648 struct file *filp;
649 struct path path;
649 int fd, ret; 650 int fd, ret;
650 651
651 /* Check the IN_* constants for consistency. */ 652 /* Check the IN_* constants for consistency. */
@@ -659,12 +660,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
659 if (fd < 0) 660 if (fd < 0)
660 return fd; 661 return fd;
661 662
662 filp = get_empty_filp();
663 if (!filp) {
664 ret = -ENFILE;
665 goto out_put_fd;
666 }
667
668 user = get_current_user(); 663 user = get_current_user();
669 if (unlikely(atomic_read(&user->inotify_devs) >= 664 if (unlikely(atomic_read(&user->inotify_devs) >=
670 inotify_max_user_instances)) { 665 inotify_max_user_instances)) {
@@ -679,24 +674,28 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
679 goto out_free_uid; 674 goto out_free_uid;
680 } 675 }
681 676
682 filp->f_op = &inotify_fops; 677 atomic_inc(&user->inotify_devs);
683 filp->f_path.mnt = mntget(inotify_mnt); 678
684 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 679 path.mnt = inotify_mnt;
685 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 680 path.dentry = inotify_mnt->mnt_root;
686 filp->f_mode = FMODE_READ; 681 path_get(&path);
682 filp = alloc_file(&path, FMODE_READ, &inotify_fops);
683 if (!filp)
684 goto Enfile;
685
687 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); 686 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
688 filp->private_data = group; 687 filp->private_data = group;
689 688
690 atomic_inc(&user->inotify_devs);
691
692 fd_install(fd, filp); 689 fd_install(fd, filp);
693 690
694 return fd; 691 return fd;
695 692
693Enfile:
694 ret = -ENFILE;
695 path_put(&path);
696 atomic_dec(&user->inotify_devs);
696out_free_uid: 697out_free_uid:
697 free_uid(user); 698 free_uid(user);
698 put_filp(filp);
699out_put_fd:
700 put_unused_fd(fd); 699 put_unused_fd(fd);
701 return ret; 700 return ret;
702} 701}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
530 * the ntfs inode. 530 * the ntfs inode.
531 * 531 *
532 * Q: What locks are held when the function is called? 532 * Q: What locks are held when the function is called?
533 * A: i_state has I_LOCK set, hence the inode is locked, also 533 * A: i_state has I_NEW set, hence the inode is locked, also
534 * i_count is set to 1, so it is not going to go away 534 * i_count is set to 1, so it is not going to go away
535 * i_flags is set to 0 and we have no business touching it. Only an ioctl() 535 * i_flags is set to 0 and we have no business touching it. Only an ioctl()
536 * is allowed to write to them. We should of course be honouring them but 536 * is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
1207 * necessary fields in @vi as well as initializing the ntfs inode. 1207 * necessary fields in @vi as well as initializing the ntfs inode.
1208 * 1208 *
1209 * Q: What locks are held when the function is called? 1209 * Q: What locks are held when the function is called?
1210 * A: i_state has I_LOCK set, hence the inode is locked, also 1210 * A: i_state has I_NEW set, hence the inode is locked, also
1211 * i_count is set to 1, so it is not going to go away 1211 * i_count is set to 1, so it is not going to go away
1212 * 1212 *
1213 * Return 0 on success and -errno on error. In the error case, the inode will 1213 * Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
1474 * normal directory inodes. 1474 * normal directory inodes.
1475 * 1475 *
1476 * Q: What locks are held when the function is called? 1476 * Q: What locks are held when the function is called?
1477 * A: i_state has I_LOCK set, hence the inode is locked, also 1477 * A: i_state has I_NEW set, hence the inode is locked, also
1478 * i_count is set to 1, so it is not going to go away 1478 * i_count is set to 1, so it is not going to go away
1479 * 1479 *
1480 * Return 0 on success and -errno on error. In the error case, the inode will 1480 * Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec762103..e3e47415d851 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -331,13 +331,14 @@ cleanup:
331 return ret; 331 return ret;
332} 332}
333 333
334static size_t ocfs2_xattr_list_acl_access(struct inode *inode, 334static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
335 char *list, 335 char *list,
336 size_t list_len, 336 size_t list_len,
337 const char *name, 337 const char *name,
338 size_t name_len) 338 size_t name_len,
339 int type)
339{ 340{
340 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 341 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
341 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 342 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
342 343
343 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 344 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +349,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
348 return size; 349 return size;
349} 350}
350 351
351static size_t ocfs2_xattr_list_acl_default(struct inode *inode, 352static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
352 char *list, 353 char *list,
353 size_t list_len, 354 size_t list_len,
354 const char *name, 355 const char *name,
355 size_t name_len) 356 size_t name_len,
357 int type)
356{ 358{
357 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 359 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 360 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 361
360 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 362 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +367,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
365 return size; 367 return size;
366} 368}
367 369
368static int ocfs2_xattr_get_acl(struct inode *inode, 370static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
369 int type, 371 void *buffer, size_t size, int type)
370 void *buffer,
371 size_t size)
372{ 372{
373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 373 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
374 struct posix_acl *acl; 374 struct posix_acl *acl;
375 int ret; 375 int ret;
376 376
377 if (strcmp(name, "") != 0)
378 return -EINVAL;
377 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 379 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
378 return -EOPNOTSUPP; 380 return -EOPNOTSUPP;
379 381
380 acl = ocfs2_get_acl(inode, type); 382 acl = ocfs2_get_acl(dentry->d_inode, type);
381 if (IS_ERR(acl)) 383 if (IS_ERR(acl))
382 return PTR_ERR(acl); 384 return PTR_ERR(acl);
383 if (acl == NULL) 385 if (acl == NULL)
@@ -388,35 +390,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
388 return ret; 390 return ret;
389} 391}
390 392
391static int ocfs2_xattr_get_acl_access(struct inode *inode, 393static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
392 const char *name, 394 const void *value, size_t size, int flags, int type)
393 void *buffer,
394 size_t size)
395{
396 if (strcmp(name, "") != 0)
397 return -EINVAL;
398 return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
399}
400
401static int ocfs2_xattr_get_acl_default(struct inode *inode,
402 const char *name,
403 void *buffer,
404 size_t size)
405{
406 if (strcmp(name, "") != 0)
407 return -EINVAL;
408 return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
409}
410
411static int ocfs2_xattr_set_acl(struct inode *inode,
412 int type,
413 const void *value,
414 size_t size)
415{ 395{
396 struct inode *inode = dentry->d_inode;
416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 397 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
417 struct posix_acl *acl; 398 struct posix_acl *acl;
418 int ret = 0; 399 int ret = 0;
419 400
401 if (strcmp(name, "") != 0)
402 return -EINVAL;
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 403 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
421 return -EOPNOTSUPP; 404 return -EOPNOTSUPP;
422 405
@@ -442,38 +425,18 @@ cleanup:
442 return ret; 425 return ret;
443} 426}
444 427
445static int ocfs2_xattr_set_acl_access(struct inode *inode,
446 const char *name,
447 const void *value,
448 size_t size,
449 int flags)
450{
451 if (strcmp(name, "") != 0)
452 return -EINVAL;
453 return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
454}
455
456static int ocfs2_xattr_set_acl_default(struct inode *inode,
457 const char *name,
458 const void *value,
459 size_t size,
460 int flags)
461{
462 if (strcmp(name, "") != 0)
463 return -EINVAL;
464 return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
465}
466
467struct xattr_handler ocfs2_xattr_acl_access_handler = { 428struct xattr_handler ocfs2_xattr_acl_access_handler = {
468 .prefix = POSIX_ACL_XATTR_ACCESS, 429 .prefix = POSIX_ACL_XATTR_ACCESS,
430 .flags = ACL_TYPE_ACCESS,
469 .list = ocfs2_xattr_list_acl_access, 431 .list = ocfs2_xattr_list_acl_access,
470 .get = ocfs2_xattr_get_acl_access, 432 .get = ocfs2_xattr_get_acl,
471 .set = ocfs2_xattr_set_acl_access, 433 .set = ocfs2_xattr_set_acl,
472}; 434};
473 435
474struct xattr_handler ocfs2_xattr_acl_default_handler = { 436struct xattr_handler ocfs2_xattr_acl_default_handler = {
475 .prefix = POSIX_ACL_XATTR_DEFAULT, 437 .prefix = POSIX_ACL_XATTR_DEFAULT,
438 .flags = ACL_TYPE_DEFAULT,
476 .list = ocfs2_xattr_list_acl_default, 439 .list = ocfs2_xattr_list_acl_default,
477 .get = ocfs2_xattr_get_acl_default, 440 .get = ocfs2_xattr_get_acl,
478 .set = ocfs2_xattr_set_acl_default, 441 .set = ocfs2_xattr_set_acl,
479}; 442};
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7c7198a5bc90..fb4e672579b8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7190,8 +7190,8 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
7190 * wait on them - the truncate_inode_pages() call later will 7190 * wait on them - the truncate_inode_pages() call later will
7191 * do that for us. 7191 * do that for us.
7192 */ 7192 */
7193 ret = do_sync_mapping_range(inode->i_mapping, range_start, 7193 ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
7194 range_end - 1, SYNC_FILE_RANGE_WRITE); 7194 range_end - 1);
7195 if (ret) 7195 if (ret)
7196 mlog_errno(ret); 7196 mlog_errno(ret);
7197 7197
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
547 * 547 *
548 * called like this: dio->get_blocks(dio->inode, fs_startblk, 548 * called like this: dio->get_blocks(dio->inode, fs_startblk,
549 * fs_count, map_bh, dio->rw == WRITE); 549 * fs_count, map_bh, dio->rw == WRITE);
550 *
551 * Note that we never bother to allocate blocks here, and thus ignore the
552 * create argument.
550 */ 553 */
551static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, 554static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
552 struct buffer_head *bh_result, int create) 555 struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
563 566
564 inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 567 inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
565 568
566 /*
567 * Any write past EOF is not allowed because we'd be extending.
568 */
569 if (create && (iblock + max_blocks) > inode_blocks) {
570 ret = -EIO;
571 goto bail;
572 }
573
574 /* This figures out the size of the next contiguous block, and 569 /* This figures out the size of the next contiguous block, and
575 * our logical offset */ 570 * our logical offset */
576 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, 571 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
582 goto bail; 577 goto bail;
583 } 578 }
584 579
585 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
586 ocfs2_error(inode->i_sb,
587 "Inode %llu has a hole at block %llu\n",
588 (unsigned long long)OCFS2_I(inode)->ip_blkno,
589 (unsigned long long)iblock);
590 ret = -EROFS;
591 goto bail;
592 }
593
594 /* We should already CoW the refcounted extent. */ 580 /* We should already CoW the refcounted extent. */
595 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 581 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
596 /* 582 /*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
601 */ 587 */
602 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) 588 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
603 map_bh(bh_result, inode->i_sb, p_blkno); 589 map_bh(bh_result, inode->i_sb, p_blkno);
604 else { 590 else
605 /*
606 * ocfs2_prepare_inode_for_write() should have caught
607 * the case where we'd be filling a hole and triggered
608 * a buffered write instead.
609 */
610 if (create) {
611 ret = -EIO;
612 mlog_errno(ret);
613 goto bail;
614 }
615
616 clear_buffer_mapped(bh_result); 591 clear_buffer_mapped(bh_result);
617 }
618 592
619 /* make sure we don't map more than max_blocks blocks here as 593 /* make sure we don't map more than max_blocks blocks here as
620 that's all the kernel will handle at this point. */ 594 that's all the kernel will handle at this point. */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df2..43c114831c0d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -205,8 +205,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
205 int offset, 205 int offset,
206 struct ocfs2_xattr_value_root **xv, 206 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh); 207 struct buffer_head **bh);
208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
210 208
211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 209static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
212{ 210{
@@ -6978,9 +6976,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
6978 6976
6979 ret = ocfs2_init_security_get(inode, dir, &si); 6977 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) { 6978 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name, 6979 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
6982 si.value, si.value_len, 6980 si.name, si.value, si.value_len,
6983 XATTR_CREATE); 6981 XATTR_CREATE);
6984 if (ret) { 6982 if (ret) {
6985 mlog_errno(ret); 6983 mlog_errno(ret);
6986 goto leave; 6984 goto leave;
@@ -7008,9 +7006,9 @@ leave:
7008/* 7006/*
7009 * 'security' attributes support 7007 * 'security' attributes support
7010 */ 7008 */
7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, 7009static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7012 size_t list_size, const char *name, 7010 size_t list_size, const char *name,
7013 size_t name_len) 7011 size_t name_len, int type)
7014{ 7012{
7015 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7013 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7016 const size_t total_len = prefix_len + name_len + 1; 7014 const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7021,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
7023 return total_len; 7021 return total_len;
7024} 7022}
7025 7023
7026static int ocfs2_xattr_security_get(struct inode *inode, const char *name, 7024static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7027 void *buffer, size_t size) 7025 void *buffer, size_t size, int type)
7028{ 7026{
7029 if (strcmp(name, "") == 0) 7027 if (strcmp(name, "") == 0)
7030 return -EINVAL; 7028 return -EINVAL;
7031 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, 7029 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7032 buffer, size); 7030 name, buffer, size);
7033} 7031}
7034 7032
7035static int ocfs2_xattr_security_set(struct inode *inode, const char *name, 7033static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7036 const void *value, size_t size, int flags) 7034 const void *value, size_t size, int flags, int type)
7037{ 7035{
7038 if (strcmp(name, "") == 0) 7036 if (strcmp(name, "") == 0)
7039 return -EINVAL; 7037 return -EINVAL;
7040 7038
7041 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, 7039 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7042 size, flags); 7040 name, value, size, flags);
7043} 7041}
7044 7042
7045int ocfs2_init_security_get(struct inode *inode, 7043int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7074,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
7076/* 7074/*
7077 * 'trusted' attributes support 7075 * 'trusted' attributes support
7078 */ 7076 */
7079static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 7077static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7080 size_t list_size, const char *name, 7078 size_t list_size, const char *name,
7081 size_t name_len) 7079 size_t name_len, int type)
7082{ 7080{
7083 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7081 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7084 const size_t total_len = prefix_len + name_len + 1; 7082 const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7089,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
7091 return total_len; 7089 return total_len;
7092} 7090}
7093 7091
7094static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, 7092static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7095 void *buffer, size_t size) 7093 void *buffer, size_t size, int type)
7096{ 7094{
7097 if (strcmp(name, "") == 0) 7095 if (strcmp(name, "") == 0)
7098 return -EINVAL; 7096 return -EINVAL;
7099 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, 7097 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7100 buffer, size); 7098 name, buffer, size);
7101} 7099}
7102 7100
7103static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, 7101static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7104 const void *value, size_t size, int flags) 7102 const void *value, size_t size, int flags, int type)
7105{ 7103{
7106 if (strcmp(name, "") == 0) 7104 if (strcmp(name, "") == 0)
7107 return -EINVAL; 7105 return -EINVAL;
7108 7106
7109 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, 7107 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7110 size, flags); 7108 name, value, size, flags);
7111} 7109}
7112 7110
7113struct xattr_handler ocfs2_xattr_trusted_handler = { 7111struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7118,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
7120/* 7118/*
7121 * 'user' attributes support 7119 * 'user' attributes support
7122 */ 7120 */
7123static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, 7121static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7124 size_t list_size, const char *name, 7122 size_t list_size, const char *name,
7125 size_t name_len) 7123 size_t name_len, int type)
7126{ 7124{
7127 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7125 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7128 const size_t total_len = prefix_len + name_len + 1; 7126 const size_t total_len = prefix_len + name_len + 1;
7129 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7127 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7130 7128
7131 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7129 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7132 return 0; 7130 return 0;
@@ -7139,31 +7137,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
7139 return total_len; 7137 return total_len;
7140} 7138}
7141 7139
7142static int ocfs2_xattr_user_get(struct inode *inode, const char *name, 7140static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7143 void *buffer, size_t size) 7141 void *buffer, size_t size, int type)
7144{ 7142{
7145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7143 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7146 7144
7147 if (strcmp(name, "") == 0) 7145 if (strcmp(name, "") == 0)
7148 return -EINVAL; 7146 return -EINVAL;
7149 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7147 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7150 return -EOPNOTSUPP; 7148 return -EOPNOTSUPP;
7151 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7149 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7152 buffer, size); 7150 buffer, size);
7153} 7151}
7154 7152
7155static int ocfs2_xattr_user_set(struct inode *inode, const char *name, 7153static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7156 const void *value, size_t size, int flags) 7154 const void *value, size_t size, int flags, int type)
7157{ 7155{
7158 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7156 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7159 7157
7160 if (strcmp(name, "") == 0) 7158 if (strcmp(name, "") == 0)
7161 return -EINVAL; 7159 return -EINVAL;
7162 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7160 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7163 return -EOPNOTSUPP; 7161 return -EOPNOTSUPP;
7164 7162
7165 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, 7163 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7166 size, flags); 7164 name, value, size, flags);
7167} 7165}
7168 7166
7169struct xattr_handler ocfs2_xattr_user_handler = { 7167struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3a..ca69241796bd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,9 @@
30#include <linux/audit.h> 30#include <linux/audit.h>
31#include <linux/falloc.h> 31#include <linux/falloc.h>
32#include <linux/fs_struct.h> 32#include <linux/fs_struct.h>
33#include <linux/ima.h>
34
35#include "internal.h"
33 36
34int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 37int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
35{ 38{
@@ -855,6 +858,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
855 if (error) 858 if (error)
856 goto cleanup_all; 859 goto cleanup_all;
857 } 860 }
861 ima_counts_get(f);
858 862
859 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 863 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
860 864
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa3..37ba29ff3158 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
906} 906}
907 907
908static struct vfsmount *pipe_mnt __read_mostly; 908static struct vfsmount *pipe_mnt __read_mostly;
909static int pipefs_delete_dentry(struct dentry *dentry)
910{
911 /*
912 * At creation time, we pretended this dentry was hashed
913 * (by clearing DCACHE_UNHASHED bit in d_flags)
914 * At delete time, we restore the truth : not hashed.
915 * (so that dput() can proceed correctly)
916 */
917 dentry->d_flags |= DCACHE_UNHASHED;
918 return 0;
919}
920 909
921/* 910/*
922 * pipefs_dname() is called from d_path(). 911 * pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
928} 917}
929 918
930static const struct dentry_operations pipefs_dentry_operations = { 919static const struct dentry_operations pipefs_dentry_operations = {
931 .d_delete = pipefs_delete_dentry,
932 .d_dname = pipefs_dname, 920 .d_dname = pipefs_dname,
933}; 921};
934 922
@@ -974,7 +962,7 @@ struct file *create_write_pipe(int flags)
974 int err; 962 int err;
975 struct inode *inode; 963 struct inode *inode;
976 struct file *f; 964 struct file *f;
977 struct dentry *dentry; 965 struct path path;
978 struct qstr name = { .name = "" }; 966 struct qstr name = { .name = "" };
979 967
980 err = -ENFILE; 968 err = -ENFILE;
@@ -983,21 +971,16 @@ struct file *create_write_pipe(int flags)
983 goto err; 971 goto err;
984 972
985 err = -ENOMEM; 973 err = -ENOMEM;
986 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 974 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
987 if (!dentry) 975 if (!path.dentry)
988 goto err_inode; 976 goto err_inode;
977 path.mnt = mntget(pipe_mnt);
989 978
990 dentry->d_op = &pipefs_dentry_operations; 979 path.dentry->d_op = &pipefs_dentry_operations;
991 /* 980 d_instantiate(path.dentry, inode);
992 * We dont want to publish this dentry into global dentry hash table.
993 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
994 * This permits a working /proc/$pid/fd/XXX on pipes
995 */
996 dentry->d_flags &= ~DCACHE_UNHASHED;
997 d_instantiate(dentry, inode);
998 981
999 err = -ENFILE; 982 err = -ENFILE;
1000 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); 983 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1001 if (!f) 984 if (!f)
1002 goto err_dentry; 985 goto err_dentry;
1003 f->f_mapping = inode->i_mapping; 986 f->f_mapping = inode->i_mapping;
@@ -1009,7 +992,7 @@ struct file *create_write_pipe(int flags)
1009 992
1010 err_dentry: 993 err_dentry:
1011 free_pipe_info(inode); 994 free_pipe_info(inode);
1012 dput(dentry); 995 path_put(&path);
1013 return ERR_PTR(err); 996 return ERR_PTR(err);
1014 997
1015 err_inode: 998 err_inode:
@@ -1028,20 +1011,14 @@ void free_write_pipe(struct file *f)
1028 1011
1029struct file *create_read_pipe(struct file *wrf, int flags) 1012struct file *create_read_pipe(struct file *wrf, int flags)
1030{ 1013{
1031 struct file *f = get_empty_filp(); 1014 /* Grab pipe from the writer */
1015 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1016 &read_pipefifo_fops);
1032 if (!f) 1017 if (!f)
1033 return ERR_PTR(-ENFILE); 1018 return ERR_PTR(-ENFILE);
1034 1019
1035 /* Grab pipe from the writer */
1036 f->f_path = wrf->f_path;
1037 path_get(&wrf->f_path); 1020 path_get(&wrf->f_path);
1038 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1039
1040 f->f_pos = 0;
1041 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1021 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1042 f->f_op = &read_pipefifo_fops;
1043 f->f_mode = FMODE_READ;
1044 f->f_version = 0;
1045 1022
1046 return f; 1023 return f;
1047} 1024}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4df4a464a919..18d5cc62d8ed 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2266,7 +2266,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
2266 2266
2267#endif 2267#endif
2268 2268
2269#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2269#ifdef CONFIG_ELF_CORE
2270static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2270static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2271 size_t count, loff_t *ppos) 2271 size_t count, loff_t *ppos)
2272{ 2272{
@@ -2623,7 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2623#ifdef CONFIG_FAULT_INJECTION 2623#ifdef CONFIG_FAULT_INJECTION
2624 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2624 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2625#endif 2625#endif
2626#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2626#ifdef CONFIG_ELF_CORE
2627 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2627 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2628#endif 2628#endif
2629#ifdef CONFIG_TASK_IO_ACCOUNTING 2629#ifdef CONFIG_TASK_IO_ACCOUNTING
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index fa678abc9db1..480cb1065eec 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -429,7 +429,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
429 unsigned int ino; 429 unsigned int ino;
430 430
431 ino = de->low_ino; 431 ino = de->low_ino;
432 de_get(de); 432 pde_get(de);
433 spin_unlock(&proc_subdir_lock); 433 spin_unlock(&proc_subdir_lock);
434 error = -EINVAL; 434 error = -EINVAL;
435 inode = proc_get_inode(dir->i_sb, ino, de); 435 inode = proc_get_inode(dir->i_sb, ino, de);
@@ -445,7 +445,7 @@ out_unlock:
445 return NULL; 445 return NULL;
446 } 446 }
447 if (de) 447 if (de)
448 de_put(de); 448 pde_put(de);
449 return ERR_PTR(error); 449 return ERR_PTR(error);
450} 450}
451 451
@@ -509,17 +509,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
509 struct proc_dir_entry *next; 509 struct proc_dir_entry *next;
510 510
511 /* filldir passes info to user space */ 511 /* filldir passes info to user space */
512 de_get(de); 512 pde_get(de);
513 spin_unlock(&proc_subdir_lock); 513 spin_unlock(&proc_subdir_lock);
514 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 514 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
515 de->low_ino, de->mode >> 12) < 0) { 515 de->low_ino, de->mode >> 12) < 0) {
516 de_put(de); 516 pde_put(de);
517 goto out; 517 goto out;
518 } 518 }
519 spin_lock(&proc_subdir_lock); 519 spin_lock(&proc_subdir_lock);
520 filp->f_pos++; 520 filp->f_pos++;
521 next = de->next; 521 next = de->next;
522 de_put(de); 522 pde_put(de);
523 de = next; 523 de = next;
524 } while (de); 524 } while (de);
525 spin_unlock(&proc_subdir_lock); 525 spin_unlock(&proc_subdir_lock);
@@ -763,7 +763,7 @@ out:
763 return NULL; 763 return NULL;
764} 764}
765 765
766void free_proc_entry(struct proc_dir_entry *de) 766static void free_proc_entry(struct proc_dir_entry *de)
767{ 767{
768 unsigned int ino = de->low_ino; 768 unsigned int ino = de->low_ino;
769 769
@@ -777,6 +777,12 @@ void free_proc_entry(struct proc_dir_entry *de)
777 kfree(de); 777 kfree(de);
778} 778}
779 779
780void pde_put(struct proc_dir_entry *pde)
781{
782 if (atomic_dec_and_test(&pde->count))
783 free_proc_entry(pde);
784}
785
780/* 786/*
781 * Remove a /proc entry and free it if it's not currently in use. 787 * Remove a /proc entry and free it if it's not currently in use.
782 */ 788 */
@@ -845,6 +851,5 @@ continue_removing:
845 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " 851 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
846 "'%s/%s', leaking at least '%s'\n", __func__, 852 "'%s/%s', leaking at least '%s'\n", __func__,
847 de->parent->name, de->name, de->subdir->name); 853 de->parent->name, de->name, de->subdir->name);
848 if (atomic_dec_and_test(&de->count)) 854 pde_put(de);
849 free_proc_entry(de);
850} 855}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d78ade305541..445a02bcaab3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,29 +24,6 @@
24 24
25#include "internal.h" 25#include "internal.h"
26 26
27struct proc_dir_entry *de_get(struct proc_dir_entry *de)
28{
29 atomic_inc(&de->count);
30 return de;
31}
32
33/*
34 * Decrements the use count and checks for deferred deletion.
35 */
36void de_put(struct proc_dir_entry *de)
37{
38 if (!atomic_read(&de->count)) {
39 printk("de_put: entry %s already free!\n", de->name);
40 return;
41 }
42
43 if (atomic_dec_and_test(&de->count))
44 free_proc_entry(de);
45}
46
47/*
48 * Decrement the use count of the proc_dir_entry.
49 */
50static void proc_delete_inode(struct inode *inode) 27static void proc_delete_inode(struct inode *inode)
51{ 28{
52 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
@@ -59,7 +36,7 @@ static void proc_delete_inode(struct inode *inode)
59 /* Let go of any associated proc directory entry */ 36 /* Let go of any associated proc directory entry */
60 de = PROC_I(inode)->pde; 37 de = PROC_I(inode)->pde;
61 if (de) 38 if (de)
62 de_put(de); 39 pde_put(de);
63 if (PROC_I(inode)->sysctl) 40 if (PROC_I(inode)->sysctl)
64 sysctl_head_put(PROC_I(inode)->sysctl); 41 sysctl_head_put(PROC_I(inode)->sysctl);
65 clear_inode(inode); 42 clear_inode(inode);
@@ -480,7 +457,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
480 } 457 }
481 unlock_new_inode(inode); 458 unlock_new_inode(inode);
482 } else 459 } else
483 de_put(de); 460 pde_put(de);
484 return inode; 461 return inode;
485} 462}
486 463
@@ -495,7 +472,7 @@ int proc_fill_super(struct super_block *s)
495 s->s_op = &proc_sops; 472 s->s_op = &proc_sops;
496 s->s_time_gran = 1; 473 s->s_time_gran = 1;
497 474
498 de_get(&proc_root); 475 pde_get(&proc_root);
499 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 476 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
500 if (!root_inode) 477 if (!root_inode)
501 goto out_no_root; 478 goto out_no_root;
@@ -509,6 +486,6 @@ int proc_fill_super(struct super_block *s)
509out_no_root: 486out_no_root:
510 printk("proc_read_super: get root inode failed\n"); 487 printk("proc_read_super: get root inode failed\n");
511 iput(root_inode); 488 iput(root_inode);
512 de_put(&proc_root); 489 pde_put(&proc_root);
513 return -ENOMEM; 490 return -ENOMEM;
514} 491}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 753ca37002c8..1f24a3eddd12 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,8 +61,6 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64void free_proc_entry(struct proc_dir_entry *de);
65
66void proc_init_inodecache(void); 64void proc_init_inodecache(void);
67 65
68static inline struct pid *proc_pid(struct inode *inode) 66static inline struct pid *proc_pid(struct inode *inode)
@@ -101,8 +99,12 @@ unsigned long task_vsize(struct mm_struct *);
101int task_statm(struct mm_struct *, int *, int *, int *, int *); 99int task_statm(struct mm_struct *, int *, int *, int *, int *);
102void task_mem(struct seq_file *, struct mm_struct *); 100void task_mem(struct seq_file *, struct mm_struct *);
103 101
104struct proc_dir_entry *de_get(struct proc_dir_entry *de); 102static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
105void de_put(struct proc_dir_entry *de); 103{
104 atomic_inc(&pde->count);
105 return pde;
106}
107void pde_put(struct proc_dir_entry *pde);
106 108
107extern struct vfsmount *proc_mnt; 109extern struct vfsmount *proc_mnt;
108int proc_fill_super(struct super_block *); 110int proc_fill_super(struct super_block *);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254b..180cf5a0bd67 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/hugetlb.h> 10#include <linux/hugetlb.h>
11#include <linux/kernel-page-flags.h>
11#include <asm/uaccess.h> 12#include <asm/uaccess.h>
12#include "internal.h" 13#include "internal.h"
13 14
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
71 * physical page flags. 72 * physical page flags.
72 */ 73 */
73 74
74/* These macros are used to decouple internal flags from exported ones */
75
76#define KPF_LOCKED 0
77#define KPF_ERROR 1
78#define KPF_REFERENCED 2
79#define KPF_UPTODATE 3
80#define KPF_DIRTY 4
81#define KPF_LRU 5
82#define KPF_ACTIVE 6
83#define KPF_SLAB 7
84#define KPF_WRITEBACK 8
85#define KPF_RECLAIM 9
86#define KPF_BUDDY 10
87
88/* 11-20: new additions in 2.6.31 */
89#define KPF_MMAP 11
90#define KPF_ANON 12
91#define KPF_SWAPCACHE 13
92#define KPF_SWAPBACKED 14
93#define KPF_COMPOUND_HEAD 15
94#define KPF_COMPOUND_TAIL 16
95#define KPF_HUGE 17
96#define KPF_UNEVICTABLE 18
97#define KPF_HWPOISON 19
98#define KPF_NOPAGE 20
99
100#define KPF_KSM 21
101
102/* kernel hacking assistances
103 * WARNING: subject to change, never rely on them!
104 */
105#define KPF_RESERVED 32
106#define KPF_MLOCKED 33
107#define KPF_MAPPEDTODISK 34
108#define KPF_PRIVATE 35
109#define KPF_PRIVATE_2 36
110#define KPF_OWNER_PRIVATE 37
111#define KPF_ARCH 38
112#define KPF_UNCACHED 39
113
114static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 75static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
115{ 76{
116 return ((kflags >> kbit) & 1) << ubit; 77 return ((kflags >> kbit) & 1) << ubit;
117} 78}
118 79
119static u64 get_uflags(struct page *page) 80u64 stable_page_flags(struct page *page)
120{ 81{
121 u64 k; 82 u64 k;
122 u64 u; 83 u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
219 else 180 else
220 ppage = NULL; 181 ppage = NULL;
221 182
222 if (put_user(get_uflags(ppage), out)) { 183 if (put_user(stable_page_flags(ppage), out)) {
223 ret = -EFAULT; 184 ret = -EFAULT;
224 break; 185 break;
225 } 186 }
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 32f5d131a644..22e0d60e53ef 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -17,13 +17,6 @@
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include "qnx4.h" 18#include "qnx4.h"
19 19
20#if 0
21int qnx4_new_block(struct super_block *sb)
22{
23 return 0;
24}
25#endif /* 0 */
26
27static void count_bits(register const char *bmPart, register int size, 20static void count_bits(register const char *bmPart, register int size,
28 int *const tf) 21 int *const tf)
29{ 22{
@@ -35,22 +28,7 @@ static void count_bits(register const char *bmPart, register int size,
35 } 28 }
36 do { 29 do {
37 b = *bmPart++; 30 b = *bmPart++;
38 if ((b & 1) == 0) 31 tot += 8 - hweight8(b);
39 tot++;
40 if ((b & 2) == 0)
41 tot++;
42 if ((b & 4) == 0)
43 tot++;
44 if ((b & 8) == 0)
45 tot++;
46 if ((b & 16) == 0)
47 tot++;
48 if ((b & 32) == 0)
49 tot++;
50 if ((b & 64) == 0)
51 tot++;
52 if ((b & 128) == 0)
53 tot++;
54 size--; 32 size--;
55 } while (size != 0); 33 } while (size != 0);
56 *tf = tot; 34 *tf = tot;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 449f5a66dd34..ebf3440d28ca 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -64,25 +64,7 @@ static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
64 result = sb_getblk(inode->i_sb, nr); 64 result = sb_getblk(inode->i_sb, nr);
65 return result; 65 return result;
66 } 66 }
67 if (!create) { 67 return NULL;
68 return NULL;
69 }
70#if 0
71 tmp = qnx4_new_block(inode->i_sb);
72 if (!tmp) {
73 return NULL;
74 }
75 result = sb_getblk(inode->i_sb, tmp);
76 if (tst) {
77 qnx4_free_block(inode->i_sb, tmp);
78 brelse(result);
79 goto repeat;
80 }
81 tst = tmp;
82#endif
83 inode->i_ctime = CURRENT_TIME_SEC;
84 mark_inode_dirty(inode);
85 return result;
86} 68}
87 69
88struct buffer_head *qnx4_bread(struct inode *inode, int block, int create) 70struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
@@ -113,8 +95,6 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
113 if ( phys ) { 95 if ( phys ) {
114 // logical block is before EOF 96 // logical block is before EOF
115 map_bh(bh, inode->i_sb, phys); 97 map_bh(bh, inode->i_sb, phys);
116 } else if ( create ) {
117 // to be done.
118 } 98 }
119 return 0; 99 return 0;
120} 100}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 32fae4040ebf..2efc57173fd7 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -60,7 +60,7 @@ const struct inode_operations ramfs_file_inode_operations = {
60 */ 60 */
61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
62{ 62{
63 unsigned long npages, xpages, loop, limit; 63 unsigned long npages, xpages, loop;
64 struct page *pages; 64 struct page *pages;
65 unsigned order; 65 unsigned order;
66 void *data; 66 void *data;
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f8..513f431038f9 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
1config REISERFS_FS 1config REISERFS_FS
2 tristate "Reiserfs support" 2 tristate "Reiserfs support"
3 select CRC32 3 select CRC32
4 select FS_JOURNAL_INFO
5 help 4 help
6 Stores not just filenames but the files themselves in a balanced 5 Stores not just filenames but the files themselves in a balanced
7 tree. Uses journalling. 6 tree. Uses journalling.
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 6a9e30c041dd..792b3cb2cd18 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,11 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ 7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ 8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
9 hashes.o tail_conversion.o journal.o resize.o \ 9 hashes.o tail_conversion.o journal.o resize.o \
10 item_ops.o ioctl.o procfs.o xattr.o lock.o 10 item_ops.o ioctl.o xattr.o lock.o
11
12ifeq ($(CONFIG_REISERFS_PROC_INFO),y)
13reiserfs-objs += procfs.o
14endif
11 15
12ifeq ($(CONFIG_REISERFS_FS_XATTR),y) 16ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
13reiserfs-objs += xattr_user.o xattr_trusted.o 17reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3a28e7751b3c..290ae38fca8a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2538,6 +2538,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2538 return reiserfs_write_full_page(page, wbc); 2538 return reiserfs_write_full_page(page, wbc);
2539} 2539}
2540 2540
2541static void reiserfs_truncate_failed_write(struct inode *inode)
2542{
2543 truncate_inode_pages(inode->i_mapping, inode->i_size);
2544 reiserfs_truncate_file(inode, 0);
2545}
2546
2541static int reiserfs_write_begin(struct file *file, 2547static int reiserfs_write_begin(struct file *file,
2542 struct address_space *mapping, 2548 struct address_space *mapping,
2543 loff_t pos, unsigned len, unsigned flags, 2549 loff_t pos, unsigned len, unsigned flags,
@@ -2604,6 +2610,8 @@ static int reiserfs_write_begin(struct file *file,
2604 if (ret) { 2610 if (ret) {
2605 unlock_page(page); 2611 unlock_page(page);
2606 page_cache_release(page); 2612 page_cache_release(page);
2613 /* Truncate allocated blocks */
2614 reiserfs_truncate_failed_write(inode);
2607 } 2615 }
2608 return ret; 2616 return ret;
2609} 2617}
@@ -2701,9 +2709,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2701 ** transaction tracking stuff when the size changes. So, we have 2709 ** transaction tracking stuff when the size changes. So, we have
2702 ** to do the i_size updates here. 2710 ** to do the i_size updates here.
2703 */ 2711 */
2704 pos += copied; 2712 if (pos + copied > inode->i_size) {
2705
2706 if (pos > inode->i_size) {
2707 struct reiserfs_transaction_handle myth; 2713 struct reiserfs_transaction_handle myth;
2708 lock_depth = reiserfs_write_lock_once(inode->i_sb); 2714 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2709 locked = true; 2715 locked = true;
@@ -2721,7 +2727,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2721 goto journal_error; 2727 goto journal_error;
2722 2728
2723 reiserfs_update_inode_transaction(inode); 2729 reiserfs_update_inode_transaction(inode);
2724 inode->i_size = pos; 2730 inode->i_size = pos + copied;
2725 /* 2731 /*
2726 * this will just nest into our transaction. It's important 2732 * this will just nest into our transaction. It's important
2727 * to use mark_inode_dirty so the inode gets pushed around on the 2733 * to use mark_inode_dirty so the inode gets pushed around on the
@@ -2751,6 +2757,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2751 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 2757 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2752 unlock_page(page); 2758 unlock_page(page);
2753 page_cache_release(page); 2759 page_cache_release(page);
2760
2761 if (pos + len > inode->i_size)
2762 reiserfs_truncate_failed_write(inode);
2763
2754 return ret == 0 ? copied : ret; 2764 return ret == 0 ? copied : ret;
2755 2765
2756 journal_error: 2766 journal_error:
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9229e5514a4e..7a9981196c1c 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -17,8 +17,6 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19 19
20#ifdef CONFIG_REISERFS_PROC_INFO
21
22/* 20/*
23 * LOCKING: 21 * LOCKING:
24 * 22 *
@@ -48,14 +46,6 @@ static int show_version(struct seq_file *m, struct super_block *sb)
48 return 0; 46 return 0;
49} 47}
50 48
51int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
52 int count, int *eof, void *data)
53{
54 *start = buffer;
55 *eof = 1;
56 return 0;
57}
58
59#define SF( x ) ( r -> x ) 49#define SF( x ) ( r -> x )
60#define SFP( x ) SF( s_proc_info_data.x ) 50#define SFP( x ) SF( s_proc_info_data.x )
61#define SFPL( x ) SFP( x[ level ] ) 51#define SFPL( x ) SFP( x[ level ] )
@@ -538,19 +528,6 @@ int reiserfs_proc_info_done(struct super_block *sb)
538 return 0; 528 return 0;
539} 529}
540 530
541struct proc_dir_entry *reiserfs_proc_register_global(char *name,
542 read_proc_t * func)
543{
544 return (proc_info_root) ? create_proc_read_entry(name, 0,
545 proc_info_root,
546 func, NULL) : NULL;
547}
548
549void reiserfs_proc_unregister_global(const char *name)
550{
551 remove_proc_entry(name, proc_info_root);
552}
553
554int reiserfs_proc_info_global_init(void) 531int reiserfs_proc_info_global_init(void)
555{ 532{
556 if (proc_info_root == NULL) { 533 if (proc_info_root == NULL) {
@@ -572,48 +549,6 @@ int reiserfs_proc_info_global_done(void)
572 } 549 }
573 return 0; 550 return 0;
574} 551}
575
576/* REISERFS_PROC_INFO */
577#else
578
579int reiserfs_proc_info_init(struct super_block *sb)
580{
581 return 0;
582}
583int reiserfs_proc_info_done(struct super_block *sb)
584{
585 return 0;
586}
587
588struct proc_dir_entry *reiserfs_proc_register_global(char *name,
589 read_proc_t * func)
590{
591 return NULL;
592}
593
594void reiserfs_proc_unregister_global(const char *name)
595{;
596}
597
598int reiserfs_proc_info_global_init(void)
599{
600 return 0;
601}
602int reiserfs_proc_info_global_done(void)
603{
604 return 0;
605}
606
607int reiserfs_global_version_in_proc(char *buffer, char **start,
608 off_t offset,
609 int count, int *eof, void *data)
610{
611 return 0;
612}
613
614/* REISERFS_PROC_INFO */
615#endif
616
617/* 552/*
618 * Revision 1.1.8.2 2001/07/15 17:08:42 god 553 * Revision 1.1.8.2 2001/07/15 17:08:42 god
619 * . use get_super() in procfs.c 554 * . use get_super() in procfs.c
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 339b0baf2af6..b4a7dd03bdb9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2222,8 +2222,6 @@ static int __init init_reiserfs_fs(void)
2222 } 2222 }
2223 2223
2224 reiserfs_proc_info_global_init(); 2224 reiserfs_proc_info_global_init();
2225 reiserfs_proc_register_global("version",
2226 reiserfs_global_version_in_proc);
2227 2225
2228 ret = register_filesystem(&reiserfs_fs_type); 2226 ret = register_filesystem(&reiserfs_fs_type);
2229 2227
@@ -2231,7 +2229,6 @@ static int __init init_reiserfs_fs(void)
2231 return 0; 2229 return 0;
2232 } 2230 }
2233 2231
2234 reiserfs_proc_unregister_global("version");
2235 reiserfs_proc_info_global_done(); 2232 reiserfs_proc_info_global_done();
2236 destroy_inodecache(); 2233 destroy_inodecache();
2237 2234
@@ -2240,7 +2237,6 @@ static int __init init_reiserfs_fs(void)
2240 2237
2241static void __exit exit_reiserfs_fs(void) 2238static void __exit exit_reiserfs_fs(void)
2242{ 2239{
2243 reiserfs_proc_unregister_global("version");
2244 reiserfs_proc_info_global_done(); 2240 reiserfs_proc_info_global_done();
2245 unregister_filesystem(&reiserfs_fs_type); 2241 unregister_filesystem(&reiserfs_fs_type);
2246 destroy_inodecache(); 2242 destroy_inodecache();
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8c7033a8b67e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
48#include <net/checksum.h> 48#include <net/checksum.h>
49#include <linux/stat.h> 49#include <linux/stat.h>
50#include <linux/quotaops.h> 50#include <linux/quotaops.h>
51#include <linux/security.h>
51 52
52#define PRIVROOT_NAME ".reiserfs_priv" 53#define PRIVROOT_NAME ".reiserfs_priv"
53#define XAROOT_NAME "xattrs" 54#define XAROOT_NAME "xattrs"
@@ -726,15 +727,14 @@ ssize_t
726reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 727reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
727 size_t size) 728 size_t size)
728{ 729{
729 struct inode *inode = dentry->d_inode;
730 struct xattr_handler *handler; 730 struct xattr_handler *handler;
731 731
732 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 732 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
733 733
734 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 734 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
735 return -EOPNOTSUPP; 735 return -EOPNOTSUPP;
736 736
737 return handler->get(inode, name, buffer, size); 737 return handler->get(dentry, name, buffer, size, handler->flags);
738} 738}
739 739
740/* 740/*
@@ -746,15 +746,14 @@ int
746reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 746reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
747 size_t size, int flags) 747 size_t size, int flags)
748{ 748{
749 struct inode *inode = dentry->d_inode;
750 struct xattr_handler *handler; 749 struct xattr_handler *handler;
751 750
752 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 751 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
753 752
754 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 753 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
755 return -EOPNOTSUPP; 754 return -EOPNOTSUPP;
756 755
757 return handler->set(inode, name, value, size, flags); 756 return handler->set(dentry, name, value, size, flags, handler->flags);
758} 757}
759 758
760/* 759/*
@@ -764,21 +763,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
764 */ 763 */
765int reiserfs_removexattr(struct dentry *dentry, const char *name) 764int reiserfs_removexattr(struct dentry *dentry, const char *name)
766{ 765{
767 struct inode *inode = dentry->d_inode;
768 struct xattr_handler *handler; 766 struct xattr_handler *handler;
769 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 767 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
770 768
771 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 769 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
772 return -EOPNOTSUPP; 770 return -EOPNOTSUPP;
773 771
774 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 772 return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
775} 773}
776 774
777struct listxattr_buf { 775struct listxattr_buf {
778 size_t size; 776 size_t size;
779 size_t pos; 777 size_t pos;
780 char *buf; 778 char *buf;
781 struct inode *inode; 779 struct dentry *dentry;
782}; 780};
783 781
784static int listxattr_filler(void *buf, const char *name, int namelen, 782static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +787,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
789 if (name[0] != '.' || 787 if (name[0] != '.' ||
790 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 788 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
791 struct xattr_handler *handler; 789 struct xattr_handler *handler;
792 handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr, 790 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
793 name); 791 name);
794 if (!handler) /* Unsupported xattr name */ 792 if (!handler) /* Unsupported xattr name */
795 return 0; 793 return 0;
796 if (b->buf) { 794 if (b->buf) {
797 size = handler->list(b->inode, b->buf + b->pos, 795 size = handler->list(b->dentry, b->buf + b->pos,
798 b->size, name, namelen); 796 b->size, name, namelen,
797 handler->flags);
799 if (size > b->size) 798 if (size > b->size)
800 return -ERANGE; 799 return -ERANGE;
801 } else { 800 } else {
802 size = handler->list(b->inode, NULL, 0, name, namelen); 801 size = handler->list(b->dentry, NULL, 0, name,
802 namelen, handler->flags);
803 } 803 }
804 804
805 b->pos += size; 805 b->pos += size;
@@ -820,7 +820,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
820 int err = 0; 820 int err = 0;
821 loff_t pos = 0; 821 loff_t pos = 0;
822 struct listxattr_buf buf = { 822 struct listxattr_buf buf = {
823 .inode = dentry->d_inode, 823 .dentry = dentry,
824 .buf = buffer, 824 .buf = buffer,
825 .size = buffer ? size : 0, 825 .size = buffer ? size : 0,
826 }; 826 };
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a279..cc32e6ada67b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
15 struct posix_acl *acl); 15 struct posix_acl *acl);
16 16
17static int 17static int
18xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 18posix_acl_set(struct dentry *dentry, const char *name, const void *value,
19 size_t size, int flags, int type)
19{ 20{
21 struct inode *inode = dentry->d_inode;
20 struct posix_acl *acl; 22 struct posix_acl *acl;
21 int error, error2; 23 int error, error2;
22 struct reiserfs_transaction_handle th; 24 struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
60} 62}
61 63
62static int 64static int
63xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 65posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
66 size_t size, int type)
64{ 67{
65 struct posix_acl *acl; 68 struct posix_acl *acl;
66 int error; 69 int error;
67 70
68 if (!reiserfs_posixacl(inode->i_sb)) 71 if (!reiserfs_posixacl(dentry->d_sb))
69 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
70 73
71 acl = reiserfs_get_acl(inode, type); 74 acl = reiserfs_get_acl(dentry->d_inode, type);
72 if (IS_ERR(acl)) 75 if (IS_ERR(acl))
73 return PTR_ERR(acl); 76 return PTR_ERR(acl);
74 if (acl == NULL) 77 if (acl == NULL)
@@ -482,30 +485,12 @@ int reiserfs_acl_chmod(struct inode *inode)
482 return error; 485 return error;
483} 486}
484 487
485static int 488static size_t posix_acl_access_list(struct dentry *dentry, char *list,
486posix_acl_access_get(struct inode *inode, const char *name,
487 void *buffer, size_t size)
488{
489 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
490 return -EINVAL;
491 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
492}
493
494static int
495posix_acl_access_set(struct inode *inode, const char *name,
496 const void *value, size_t size, int flags)
497{
498 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
499 return -EINVAL;
500 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
501}
502
503static size_t posix_acl_access_list(struct inode *inode, char *list,
504 size_t list_size, const char *name, 489 size_t list_size, const char *name,
505 size_t name_len) 490 size_t name_len, int type)
506{ 491{
507 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 492 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
508 if (!reiserfs_posixacl(inode->i_sb)) 493 if (!reiserfs_posixacl(dentry->d_sb))
509 return 0; 494 return 0;
510 if (list && size <= list_size) 495 if (list && size <= list_size)
511 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 496 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +499,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
514 499
515struct xattr_handler reiserfs_posix_acl_access_handler = { 500struct xattr_handler reiserfs_posix_acl_access_handler = {
516 .prefix = POSIX_ACL_XATTR_ACCESS, 501 .prefix = POSIX_ACL_XATTR_ACCESS,
517 .get = posix_acl_access_get, 502 .flags = ACL_TYPE_ACCESS,
518 .set = posix_acl_access_set, 503 .get = posix_acl_get,
504 .set = posix_acl_set,
519 .list = posix_acl_access_list, 505 .list = posix_acl_access_list,
520}; 506};
521 507
522static int 508static size_t posix_acl_default_list(struct dentry *dentry, char *list,
523posix_acl_default_get(struct inode *inode, const char *name,
524 void *buffer, size_t size)
525{
526 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
527 return -EINVAL;
528 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
529}
530
531static int
532posix_acl_default_set(struct inode *inode, const char *name,
533 const void *value, size_t size, int flags)
534{
535 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
536 return -EINVAL;
537 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
538}
539
540static size_t posix_acl_default_list(struct inode *inode, char *list,
541 size_t list_size, const char *name, 509 size_t list_size, const char *name,
542 size_t name_len) 510 size_t name_len, int type)
543{ 511{
544 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 512 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
545 if (!reiserfs_posixacl(inode->i_sb)) 513 if (!reiserfs_posixacl(dentry->d_sb))
546 return 0; 514 return 0;
547 if (list && size <= list_size) 515 if (list && size <= list_size)
548 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 516 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +519,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
551 519
552struct xattr_handler reiserfs_posix_acl_default_handler = { 520struct xattr_handler reiserfs_posix_acl_default_handler = {
553 .prefix = POSIX_ACL_XATTR_DEFAULT, 521 .prefix = POSIX_ACL_XATTR_DEFAULT,
554 .get = posix_acl_default_get, 522 .flags = ACL_TYPE_DEFAULT,
555 .set = posix_acl_default_set, 523 .get = posix_acl_get,
524 .set = posix_acl_set,
556 .list = posix_acl_default_list, 525 .list = posix_acl_default_list,
557}; 526};
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f6..d8b5bfcbdd30 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11security_get(struct inode *inode, const char *name, void *buffer, size_t size) 11security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (IS_PRIVATE(inode)) 17 if (IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23security_set(struct inode *inode, const char *name, const void *buffer, 24security_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (IS_PRIVATE(inode)) 30 if (IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t security_list(struct inode *inode, char *list, size_t list_len, 36static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
36 const char *name, size_t namelen) 37 const char *name, size_t namelen, int handler_flags)
37{ 38{
38 const size_t len = namelen + 1; 39 const size_t len = namelen + 1;
39 40
40 if (IS_PRIVATE(inode)) 41 if (IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_len) { 44 if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e2..5b08aaca3daf 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 17 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23trusted_set(struct inode *inode, const char *name, const void *buffer, 24trusted_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 30 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t trusted_list(struct inode *inode, char *list, size_t list_size, 36static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
36 const char *name, size_t name_len) 37 const char *name, size_t name_len, int handler_flags)
37{ 38{
38 const size_t len = name_len + 1; 39 const size_t len = name_len + 1;
39 40
40 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 41 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_size) { 44 if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3db..75d59c49b911 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
10user_get(struct inode *inode, const char *name, void *buffer, size_t size) 10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
11 int handler_flags)
11{ 12{
12 13
13 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 if (!reiserfs_xattrs_user(inode->i_sb)) 16 if (!reiserfs_xattrs_user(dentry->d_sb))
16 return -EOPNOTSUPP; 17 return -EOPNOTSUPP;
17 return reiserfs_xattr_get(inode, name, buffer, size); 18 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
18} 19}
19 20
20static int 21static int
21user_set(struct inode *inode, const char *name, const void *buffer, 22user_set(struct dentry *dentry, const char *name, const void *buffer,
22 size_t size, int flags) 23 size_t size, int flags, int handler_flags)
23{ 24{
24 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 25 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
25 return -EINVAL; 26 return -EINVAL;
26 27
27 if (!reiserfs_xattrs_user(inode->i_sb)) 28 if (!reiserfs_xattrs_user(dentry->d_sb))
28 return -EOPNOTSUPP; 29 return -EOPNOTSUPP;
29 return reiserfs_xattr_set(inode, name, buffer, size, flags); 30 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
30} 31}
31 32
32static size_t user_list(struct inode *inode, char *list, size_t list_size, 33static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
33 const char *name, size_t name_len) 34 const char *name, size_t name_len, int handler_flags)
34{ 35{
35 const size_t len = name_len + 1; 36 const size_t len = name_len + 1;
36 37
37 if (!reiserfs_xattrs_user(inode->i_sb)) 38 if (!reiserfs_xattrs_user(dentry->d_sb))
38 return 0; 39 return 0;
39 if (list && len <= list_size) { 40 if (list && len <= list_size) {
40 memcpy(list, name, name_len); 41 memcpy(list, name, name_len);
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
7 * This function cannot be inlined since i_size_{read,write} is rather 7 * This function cannot be inlined since i_size_{read,write} is rather
8 * heavy-weight on 32-bit systems 8 * heavy-weight on 32-bit systems
9 */ 9 */
10void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) 10void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
11{ 11{
12 i_size_write(dst, i_size_read((struct inode *)src)); 12 loff_t i_size;
13 dst->i_blocks = src->i_blocks; 13 blkcnt_t i_blocks;
14
15 /*
16 * i_size_read() includes its own seqlocking and protection from
17 * preemption (see include/linux/fs.h): we need nothing extra for
18 * that here, and prefer to avoid nesting locks than attempt to keep
19 * i_size and i_blocks in sync together.
20 */
21 i_size = i_size_read(src);
22
23 /*
24 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
25 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
26 * though stat's generic_fillattr() doesn't bother, and we won't be
27 * applying quotas (where i_blocks does become important) at the
28 * upper level.
29 *
30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */
37 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock);
39 i_blocks = src->i_blocks;
40 if (sizeof(i_blocks) > sizeof(long))
41 spin_unlock(&src->i_lock);
42
43 /*
44 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
45 * fsstack_copy_inode_size() to hold some lock around
46 * i_size_write(), otherwise i_size_read() may spin forever (see
47 * include/linux/fs.h). We don't necessarily hold i_mutex when this
48 * is called, so take i_lock for that case.
49 *
50 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
51 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
52 * for that case too, and do both at once by combining the tests.
53 *
54 * There is none of this locking overhead in the 64-bit case.
55 */
56 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
57 spin_lock(&dst->i_lock);
58 i_size_write(dst, i_size);
59 dst->i_blocks = i_blocks;
60 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
61 spin_unlock(&dst->i_lock);
14} 62}
15EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); 63EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
16 64
17/* copy all attributes; get_nlinks is optional way to override the i_nlink 65/* copy all attributes */
18 * copying 66void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
19 */
20void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
21 int (*get_nlinks)(struct inode *))
22{ 67{
23 dest->i_mode = src->i_mode; 68 dest->i_mode = src->i_mode;
24 dest->i_uid = src->i_uid; 69 dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
29 dest->i_ctime = src->i_ctime; 74 dest->i_ctime = src->i_ctime;
30 dest->i_blkbits = src->i_blkbits; 75 dest->i_blkbits = src->i_blkbits;
31 dest->i_flags = src->i_flags; 76 dest->i_flags = src->i_flags;
32 77 dest->i_nlink = src->i_nlink;
33 /*
34 * Update the nlinks AFTER updating the above fields, because the
35 * get_links callback may depend on them.
36 */
37 if (!get_nlinks)
38 dest->i_nlink = src->i_nlink;
39 else
40 dest->i_nlink = (*get_nlinks)(dest);
41} 78}
42EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 79EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
355{ 355{
356 int ret; 356 int ret;
357 struct file *file; 357 struct file *file;
358 struct address_space *mapping;
358 loff_t endbyte; /* inclusive */ 359 loff_t endbyte; /* inclusive */
359 int fput_needed; 360 int fput_needed;
360 umode_t i_mode; 361 umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
405 !S_ISLNK(i_mode)) 406 !S_ISLNK(i_mode))
406 goto out_put; 407 goto out_put;
407 408
408 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); 409 mapping = file->f_mapping;
410 if (!mapping) {
411 ret = -EINVAL;
412 goto out_put;
413 }
414
415 ret = 0;
416 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
417 ret = filemap_fdatawait_range(mapping, offset, endbyte);
418 if (ret < 0)
419 goto out_put;
420 }
421
422 if (flags & SYNC_FILE_RANGE_WRITE) {
423 ret = filemap_fdatawrite_range(mapping, offset, endbyte);
424 if (ret < 0)
425 goto out_put;
426 }
427
428 if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
429 ret = filemap_fdatawait_range(mapping, offset, endbyte);
430
409out_put: 431out_put:
410 fput_light(file, fput_needed); 432 fput_light(file, fput_needed);
411out: 433out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
437} 459}
438SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); 460SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
439#endif 461#endif
440
441/*
442 * `endbyte' is inclusive
443 */
444int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
445 loff_t endbyte, unsigned int flags)
446{
447 int ret;
448
449 if (!mapping) {
450 ret = -EINVAL;
451 goto out;
452 }
453
454 ret = 0;
455 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
456 ret = filemap_fdatawait_range(mapping, offset, endbyte);
457 if (ret < 0)
458 goto out;
459 }
460
461 if (flags & SYNC_FILE_RANGE_WRITE) {
462 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
463 WB_SYNC_ALL);
464 if (ret < 0)
465 goto out;
466 }
467
468 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
469 ret = filemap_fdatawait_range(mapping, offset, endbyte);
470 }
471out:
472 return ret;
473}
474EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
45 * 45 *
46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
48 * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not 48 * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
49 * set as well. However, UBIFS disables readahead. 49 * set as well. However, UBIFS disables readahead.
50 */ 50 */
51 51
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 6f671f1ac271..22af68f8b682 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -70,13 +70,13 @@ static inline unsigned long ufs_dir_pages(struct inode *inode)
70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; 70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
71} 71}
72 72
73ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry) 73ino_t ufs_inode_by_name(struct inode *dir, struct qstr *qstr)
74{ 74{
75 ino_t res = 0; 75 ino_t res = 0;
76 struct ufs_dir_entry *de; 76 struct ufs_dir_entry *de;
77 struct page *page; 77 struct page *page;
78 78
79 de = ufs_find_entry(dir, dentry, &page); 79 de = ufs_find_entry(dir, qstr, &page);
80 if (de) { 80 if (de) {
81 res = fs32_to_cpu(dir->i_sb, de->d_ino); 81 res = fs32_to_cpu(dir->i_sb, de->d_ino);
82 ufs_put_page(page); 82 ufs_put_page(page);
@@ -249,12 +249,12 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
249 * (as a parameter - res_dir). Page is returned mapped and unlocked. 249 * (as a parameter - res_dir). Page is returned mapped and unlocked.
250 * Entry is guaranteed to be valid. 250 * Entry is guaranteed to be valid.
251 */ 251 */
252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry, 252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct qstr *qstr,
253 struct page **res_page) 253 struct page **res_page)
254{ 254{
255 struct super_block *sb = dir->i_sb; 255 struct super_block *sb = dir->i_sb;
256 const char *name = dentry->d_name.name; 256 const char *name = qstr->name;
257 int namelen = dentry->d_name.len; 257 int namelen = qstr->len;
258 unsigned reclen = UFS_DIR_REC_LEN(namelen); 258 unsigned reclen = UFS_DIR_REC_LEN(namelen);
259 unsigned long start, n; 259 unsigned long start, n;
260 unsigned long npages = ufs_dir_pages(dir); 260 unsigned long npages = ufs_dir_pages(dir);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 23119fe7ad62..4c26d9e8bc94 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -56,7 +56,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
56 return ERR_PTR(-ENAMETOOLONG); 56 return ERR_PTR(-ENAMETOOLONG);
57 57
58 lock_kernel(); 58 lock_kernel();
59 ino = ufs_inode_by_name(dir, dentry); 59 ino = ufs_inode_by_name(dir, &dentry->d_name);
60 if (ino) { 60 if (ino) {
61 inode = ufs_iget(dir->i_sb, ino); 61 inode = ufs_iget(dir->i_sb, ino);
62 if (IS_ERR(inode)) { 62 if (IS_ERR(inode)) {
@@ -237,7 +237,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
237 struct page *page; 237 struct page *page;
238 int err = -ENOENT; 238 int err = -ENOENT;
239 239
240 de = ufs_find_entry(dir, dentry, &page); 240 de = ufs_find_entry(dir, &dentry->d_name, &page);
241 if (!de) 241 if (!de)
242 goto out; 242 goto out;
243 243
@@ -281,7 +281,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
281 struct ufs_dir_entry *old_de; 281 struct ufs_dir_entry *old_de;
282 int err = -ENOENT; 282 int err = -ENOENT;
283 283
284 old_de = ufs_find_entry(old_dir, old_dentry, &old_page); 284 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
285 if (!old_de) 285 if (!old_de)
286 goto out; 286 goto out;
287 287
@@ -301,7 +301,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
301 goto out_dir; 301 goto out_dir;
302 302
303 err = -ENOENT; 303 err = -ENOENT;
304 new_de = ufs_find_entry(new_dir, new_dentry, &new_page); 304 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
305 if (!new_de) 305 if (!new_de)
306 goto out_dir; 306 goto out_dir;
307 inode_inc_link_count(old_inode); 307 inode_inc_link_count(old_inode);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5faed7954d0a..143c20bfb04b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -66,6 +66,7 @@
66 */ 66 */
67 67
68 68
69#include <linux/exportfs.h>
69#include <linux/module.h> 70#include <linux/module.h>
70#include <linux/bitops.h> 71#include <linux/bitops.h>
71 72
@@ -96,6 +97,56 @@
96#include "swab.h" 97#include "swab.h"
97#include "util.h" 98#include "util.h"
98 99
100static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
101{
102 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
103 struct inode *inode;
104
105 if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg)
106 return ERR_PTR(-ESTALE);
107
108 inode = ufs_iget(sb, ino);
109 if (IS_ERR(inode))
110 return ERR_CAST(inode);
111 if (generation && inode->i_generation != generation) {
112 iput(inode);
113 return ERR_PTR(-ESTALE);
114 }
115 return inode;
116}
117
118static struct dentry *ufs_fh_to_dentry(struct super_block *sb, struct fid *fid,
119 int fh_len, int fh_type)
120{
121 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ufs_nfs_get_inode);
122}
123
124static struct dentry *ufs_fh_to_parent(struct super_block *sb, struct fid *fid,
125 int fh_len, int fh_type)
126{
127 return generic_fh_to_parent(sb, fid, fh_len, fh_type, ufs_nfs_get_inode);
128}
129
130static struct dentry *ufs_get_parent(struct dentry *child)
131{
132 struct qstr dot_dot = {
133 .name = "..",
134 .len = 2,
135 };
136 ino_t ino;
137
138 ino = ufs_inode_by_name(child->d_inode, &dot_dot);
139 if (!ino)
140 return ERR_PTR(-ENOENT);
141 return d_obtain_alias(ufs_iget(child->d_inode->i_sb, ino));
142}
143
144static const struct export_operations ufs_export_ops = {
145 .fh_to_dentry = ufs_fh_to_dentry,
146 .fh_to_parent = ufs_fh_to_parent,
147 .get_parent = ufs_get_parent,
148};
149
99#ifdef CONFIG_UFS_DEBUG 150#ifdef CONFIG_UFS_DEBUG
100/* 151/*
101 * Print contents of ufs_super_block, useful for debugging 152 * Print contents of ufs_super_block, useful for debugging
@@ -990,6 +1041,7 @@ magic_found:
990 * Read ufs_super_block into internal data structures 1041 * Read ufs_super_block into internal data structures
991 */ 1042 */
992 sb->s_op = &ufs_super_ops; 1043 sb->s_op = &ufs_super_ops;
1044 sb->s_export_op = &ufs_export_ops;
993 sb->dq_op = NULL; /***/ 1045 sb->dq_op = NULL; /***/
994 sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); 1046 sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
995 1047
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 644e77e13599..0b4c39bc0d9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -86,9 +86,9 @@ extern void ufs_put_cylinder (struct super_block *, unsigned);
86/* dir.c */ 86/* dir.c */
87extern const struct inode_operations ufs_dir_inode_operations; 87extern const struct inode_operations ufs_dir_inode_operations;
88extern int ufs_add_link (struct dentry *, struct inode *); 88extern int ufs_add_link (struct dentry *, struct inode *);
89extern ino_t ufs_inode_by_name(struct inode *, struct dentry *); 89extern ino_t ufs_inode_by_name(struct inode *, struct qstr *);
90extern int ufs_make_empty(struct inode *, struct inode *); 90extern int ufs_make_empty(struct inode *, struct inode *);
91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct dentry *, struct page **); 91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct qstr *, struct page **);
92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *); 92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
93extern int ufs_empty_dir (struct inode *); 93extern int ufs_empty_dir (struct inode *);
94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **); 94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449fb..46f87e828b48 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) 615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
616{ 616{
617 struct xattr_handler *handler; 617 struct xattr_handler *handler;
618 struct inode *inode = dentry->d_inode;
619 618
620 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 619 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
621 if (!handler) 620 if (!handler)
622 return -EOPNOTSUPP; 621 return -EOPNOTSUPP;
623 return handler->get(inode, name, buffer, size); 622 return handler->get(dentry, name, buffer, size, handler->flags);
624} 623}
625 624
626/* 625/*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
630ssize_t 629ssize_t
631generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 630generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
632{ 631{
633 struct inode *inode = dentry->d_inode; 632 struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
634 struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
635 unsigned int size = 0; 633 unsigned int size = 0;
636 634
637 if (!buffer) { 635 if (!buffer) {
638 for_each_xattr_handler(handlers, handler) 636 for_each_xattr_handler(handlers, handler) {
639 size += handler->list(inode, NULL, 0, NULL, 0); 637 size += handler->list(dentry, NULL, 0, NULL, 0,
638 handler->flags);
639 }
640 } else { 640 } else {
641 char *buf = buffer; 641 char *buf = buffer;
642 642
643 for_each_xattr_handler(handlers, handler) { 643 for_each_xattr_handler(handlers, handler) {
644 size = handler->list(inode, buf, buffer_size, NULL, 0); 644 size = handler->list(dentry, buf, buffer_size,
645 NULL, 0, handler->flags);
645 if (size > buffer_size) 646 if (size > buffer_size)
646 return -ERANGE; 647 return -ERANGE;
647 buf += size; 648 buf += size;
@@ -659,14 +660,13 @@ int
659generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) 660generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
660{ 661{
661 struct xattr_handler *handler; 662 struct xattr_handler *handler;
662 struct inode *inode = dentry->d_inode;
663 663
664 if (size == 0) 664 if (size == 0)
665 value = ""; /* empty EA, do not remove */ 665 value = ""; /* empty EA, do not remove */
666 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(inode, name, value, size, flags); 669 return handler->set(dentry, name, value, size, 0, handler->flags);
670} 670}
671 671
672/* 672/*
@@ -677,12 +677,12 @@ int
677generic_removexattr(struct dentry *dentry, const char *name) 677generic_removexattr(struct dentry *dentry, const char *name)
678{ 678{
679 struct xattr_handler *handler; 679 struct xattr_handler *handler;
680 struct inode *inode = dentry->d_inode;
681 680
682 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 681 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
683 if (!handler) 682 if (!handler)
684 return -EOPNOTSUPP; 683 return -EOPNOTSUPP;
685 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 684 return handler->set(dentry, name, NULL, 0,
685 XATTR_REPLACE, handler->flags);
686} 686}
687 687
688EXPORT_SYMBOL(generic_getxattr); 688EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 69e598b6986f..2512125dfa7c 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -354,37 +354,14 @@ xfs_acl_chmod(struct inode *inode)
354 return error; 354 return error;
355} 355}
356 356
357/*
358 * System xattr handlers.
359 *
360 * Currently Posix ACLs are the only system namespace extended attribute
361 * handlers supported by XFS, so we just implement the handlers here.
362 * If we ever support other system extended attributes this will need
363 * some refactoring.
364 */
365
366static int 357static int
367xfs_decode_acl(const char *name) 358xfs_xattr_acl_get(struct dentry *dentry, const char *name,
368{ 359 void *value, size_t size, int type)
369 if (strcmp(name, "posix_acl_access") == 0)
370 return ACL_TYPE_ACCESS;
371 else if (strcmp(name, "posix_acl_default") == 0)
372 return ACL_TYPE_DEFAULT;
373 return -EINVAL;
374}
375
376static int
377xfs_xattr_system_get(struct inode *inode, const char *name,
378 void *value, size_t size)
379{ 360{
380 struct posix_acl *acl; 361 struct posix_acl *acl;
381 int type, error; 362 int error;
382
383 type = xfs_decode_acl(name);
384 if (type < 0)
385 return type;
386 363
387 acl = xfs_get_acl(inode, type); 364 acl = xfs_get_acl(dentry->d_inode, type);
388 if (IS_ERR(acl)) 365 if (IS_ERR(acl))
389 return PTR_ERR(acl); 366 return PTR_ERR(acl);
390 if (acl == NULL) 367 if (acl == NULL)
@@ -397,15 +374,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
397} 374}
398 375
399static int 376static int
400xfs_xattr_system_set(struct inode *inode, const char *name, 377xfs_xattr_acl_set(struct dentry *dentry, const char *name,
401 const void *value, size_t size, int flags) 378 const void *value, size_t size, int flags, int type)
402{ 379{
380 struct inode *inode = dentry->d_inode;
403 struct posix_acl *acl = NULL; 381 struct posix_acl *acl = NULL;
404 int error = 0, type; 382 int error = 0;
405 383
406 type = xfs_decode_acl(name);
407 if (type < 0)
408 return type;
409 if (flags & XATTR_CREATE) 384 if (flags & XATTR_CREATE)
410 return -EINVAL; 385 return -EINVAL;
411 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 386 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -462,8 +437,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
462 return error; 437 return error;
463} 438}
464 439
465struct xattr_handler xfs_xattr_system_handler = { 440struct xattr_handler xfs_xattr_acl_access_handler = {
466 .prefix = XATTR_SYSTEM_PREFIX, 441 .prefix = POSIX_ACL_XATTR_ACCESS,
467 .get = xfs_xattr_system_get, 442 .flags = ACL_TYPE_ACCESS,
468 .set = xfs_xattr_system_set, 443 .get = xfs_xattr_acl_get,
444 .set = xfs_xattr_acl_set,
445};
446
447struct xattr_handler xfs_xattr_acl_default_handler = {
448 .prefix = POSIX_ACL_XATTR_DEFAULT,
449 .flags = ACL_TYPE_DEFAULT,
450 .get = xfs_xattr_acl_get,
451 .set = xfs_xattr_acl_set,
469}; 452};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d798c54296eb..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
1474 1474
1475 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1475 bdev = xfs_find_bdev_for_inode(XFS_I(inode));
1476 1476
1477 if (rw == WRITE) { 1477 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1478 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1478 IOMAP_UNWRITTEN : IOMAP_READ);
1479 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1479
1480 bdev, iov, offset, nr_segs, 1480 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1481 xfs_get_blocks_direct, 1481 offset, nr_segs,
1482 xfs_end_io_direct); 1482 xfs_get_blocks_direct,
1483 } else { 1483 xfs_end_io_direct);
1484 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1485 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1486 bdev, iov, offset, nr_segs,
1487 xfs_get_blocks_direct,
1488 xfs_end_io_direct);
1489 }
1490 1484
1491 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1485 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1492 xfs_destroy_ioend(iocb->private); 1486 xfs_destroy_ioend(iocb->private);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b4c7d4248aac..77b8be81c769 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -292,6 +292,7 @@ _xfs_buf_free_pages(
292{ 292{
293 if (bp->b_pages != bp->b_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
294 kmem_free(bp->b_pages); 294 kmem_free(bp->b_pages);
295 bp->b_pages = NULL;
295 } 296 }
296} 297}
297 298
@@ -323,9 +324,8 @@ xfs_buf_free(
323 ASSERT(!PagePrivate(page)); 324 ASSERT(!PagePrivate(page));
324 page_cache_release(page); 325 page_cache_release(page);
325 } 326 }
326 _xfs_buf_free_pages(bp);
327 } 327 }
328 328 _xfs_buf_free_pages(bp);
329 xfs_buf_deallocate(bp); 329 xfs_buf_deallocate(bp);
330} 330}
331 331
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
1149 if (bp->b_flags & XBF_ORDERED) { 1149 if (bp->b_flags & XBF_ORDERED) {
1150 ASSERT(!(bp->b_flags & XBF_READ)); 1150 ASSERT(!(bp->b_flags & XBF_READ));
1151 rw = WRITE_BARRIER; 1151 rw = WRITE_BARRIER;
1152 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1152 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1153 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1153 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1154 bp->b_flags &= ~_XBF_RUN_QUEUES; 1154 bp->b_flags &= ~_XBF_RUN_QUEUES;
1155 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1155 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1156 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1157 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1158 bp->b_flags &= ~_XBF_RUN_QUEUES;
1159 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1156 } else { 1160 } else {
1157 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1161 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1158 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1162 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a509f4addc2a..a34c7b54822d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56 XBF_ORDERED = (1 << 11), /* use ordered writes */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
58 59
59 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
60 XBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b2..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
794 struct inode *inode = &ip->i_vnode; 794 struct inode *inode = &ip->i_vnode;
795 795
796 inode->i_ino = ip->i_ino; 796 inode->i_ino = ip->i_ino;
797 inode->i_state = I_NEW|I_LOCK; 797 inode->i_state = I_NEW;
798 inode_add_to_lists(ip->i_mount->m_super, inode); 798 inode_add_to_lists(ip->i_mount->m_super, inode);
799 799
800 inode->i_mode = ip->i_d.di_mode; 800 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..0b1878857fc3 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
30 30
31 31
32static int 32static int
33__xfs_xattr_get(struct inode *inode, const char *name, 33xfs_xattr_get(struct dentry *dentry, const char *name,
34 void *value, size_t size, int xflags) 34 void *value, size_t size, int xflags)
35{ 35{
36 struct xfs_inode *ip = XFS_I(inode); 36 struct xfs_inode *ip = XFS_I(dentry->d_inode);
37 int error, asize = size; 37 int error, asize = size;
38 38
39 if (strcmp(name, "") == 0) 39 if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
52} 52}
53 53
54static int 54static int
55__xfs_xattr_set(struct inode *inode, const char *name, const void *value, 55xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
56 size_t size, int flags, int xflags) 56 size_t size, int flags, int xflags)
57{ 57{
58 struct xfs_inode *ip = XFS_I(inode); 58 struct xfs_inode *ip = XFS_I(dentry->d_inode);
59 59
60 if (strcmp(name, "") == 0) 60 if (strcmp(name, "") == 0)
61 return -EINVAL; 61 return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
72} 72}
73 73
74static int
75xfs_xattr_user_get(struct inode *inode, const char *name,
76 void *value, size_t size)
77{
78 return __xfs_xattr_get(inode, name, value, size, 0);
79}
80
81static int
82xfs_xattr_user_set(struct inode *inode, const char *name,
83 const void *value, size_t size, int flags)
84{
85 return __xfs_xattr_set(inode, name, value, size, flags, 0);
86}
87
88static struct xattr_handler xfs_xattr_user_handler = { 74static struct xattr_handler xfs_xattr_user_handler = {
89 .prefix = XATTR_USER_PREFIX, 75 .prefix = XATTR_USER_PREFIX,
90 .get = xfs_xattr_user_get, 76 .flags = 0, /* no flags implies user namespace */
91 .set = xfs_xattr_user_set, 77 .get = xfs_xattr_get,
78 .set = xfs_xattr_set,
92}; 79};
93 80
94
95static int
96xfs_xattr_trusted_get(struct inode *inode, const char *name,
97 void *value, size_t size)
98{
99 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
100}
101
102static int
103xfs_xattr_trusted_set(struct inode *inode, const char *name,
104 const void *value, size_t size, int flags)
105{
106 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
107}
108
109static struct xattr_handler xfs_xattr_trusted_handler = { 81static struct xattr_handler xfs_xattr_trusted_handler = {
110 .prefix = XATTR_TRUSTED_PREFIX, 82 .prefix = XATTR_TRUSTED_PREFIX,
111 .get = xfs_xattr_trusted_get, 83 .flags = ATTR_ROOT,
112 .set = xfs_xattr_trusted_set, 84 .get = xfs_xattr_get,
85 .set = xfs_xattr_set,
113}; 86};
114 87
115
116static int
117xfs_xattr_secure_get(struct inode *inode, const char *name,
118 void *value, size_t size)
119{
120 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
121}
122
123static int
124xfs_xattr_secure_set(struct inode *inode, const char *name,
125 const void *value, size_t size, int flags)
126{
127 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
128}
129
130static struct xattr_handler xfs_xattr_security_handler = { 88static struct xattr_handler xfs_xattr_security_handler = {
131 .prefix = XATTR_SECURITY_PREFIX, 89 .prefix = XATTR_SECURITY_PREFIX,
132 .get = xfs_xattr_secure_get, 90 .flags = ATTR_SECURE,
133 .set = xfs_xattr_secure_set, 91 .get = xfs_xattr_get,
92 .set = xfs_xattr_set,
134}; 93};
135 94
136
137struct xattr_handler *xfs_xattr_handlers[] = { 95struct xattr_handler *xfs_xattr_handlers[] = {
138 &xfs_xattr_user_handler, 96 &xfs_xattr_user_handler,
139 &xfs_xattr_trusted_handler, 97 &xfs_xattr_trusted_handler,
140 &xfs_xattr_security_handler, 98 &xfs_xattr_security_handler,
141#ifdef CONFIG_XFS_POSIX_ACL 99#ifdef CONFIG_XFS_POSIX_ACL
142 &xfs_xattr_system_handler, 100 &xfs_xattr_acl_access_handler,
101 &xfs_xattr_acl_default_handler,
143#endif 102#endif
144 NULL 103 NULL
145}; 104};
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..00fd357c3e46 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
49extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
50extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
51 51
52extern struct xattr_handler xfs_xattr_system_handler; 52extern struct xattr_handler xfs_xattr_acl_access_handler;
53extern struct xattr_handler xfs_xattr_acl_default_handler;
53#else 54#else
54# define xfs_check_acl NULL 55# define xfs_check_acl NULL
55# define xfs_get_acl(inode, type) NULL 56# define xfs_get_acl(inode, type) NULL
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 5549d495947f..cf07ca7c22e7 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block {
46#define BMBT_STARTBLOCK_BITLEN 52 46#define BMBT_STARTBLOCK_BITLEN 52
47#define BMBT_BLOCKCOUNT_BITLEN 21 47#define BMBT_BLOCKCOUNT_BITLEN 21
48 48
49 49typedef struct xfs_bmbt_rec {
50#define BMBT_USE_64 1
51
52typedef struct xfs_bmbt_rec_32
53{
54 __uint32_t l0, l1, l2, l3;
55} xfs_bmbt_rec_32_t;
56typedef struct xfs_bmbt_rec_64
57{
58 __be64 l0, l1; 50 __be64 l0, l1;
59} xfs_bmbt_rec_64_t; 51} xfs_bmbt_rec_t;
60 52
61typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 53typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
62typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; 54typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
63 55
64typedef struct xfs_bmbt_rec_host { 56typedef struct xfs_bmbt_rec_host {
65 __uint64_t l0, l1; 57 __uint64_t l0, l1;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f5c904a10c11..fa402a6bbbcf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
91 ip->i_new_size = 0; 91 ip->i_new_size = 0;
92 92
93 /* prevent anyone from using this yet */ 93 /* prevent anyone from using this yet */
94 VFS_I(ip)->i_state = I_NEW|I_LOCK; 94 VFS_I(ip)->i_state = I_NEW;
95 95
96 return ip; 96 return ip;
97} 97}
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
217 trace_xfs_iget_reclaim(ip); 217 trace_xfs_iget_reclaim(ip);
218 goto out_error; 218 goto out_error;
219 } 219 }
220 inode->i_state = I_LOCK|I_NEW; 220 inode->i_state = I_NEW;
221 } else { 221 } else {
222 /* If the VFS inode is being torn down, pause and try again. */ 222 /* If the VFS inode is being torn down, pause and try again. */
223 if (!igrab(inode)) { 223 if (!igrab(inode)) {
@@ -478,17 +478,21 @@ xfs_ireclaim(
478{ 478{
479 struct xfs_mount *mp = ip->i_mount; 479 struct xfs_mount *mp = ip->i_mount;
480 struct xfs_perag *pag; 480 struct xfs_perag *pag;
481 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
481 482
482 XFS_STATS_INC(xs_ig_reclaims); 483 XFS_STATS_INC(xs_ig_reclaims);
483 484
484 /* 485 /*
485 * Remove the inode from the per-AG radix tree. It doesn't matter 486 * Remove the inode from the per-AG radix tree.
486 * if it was never added to it because radix_tree_delete can deal 487 *
487 * with that case just fine. 488 * Because radix_tree_delete won't complain even if the item was never
489 * added to the tree assert that it's been there before to catch
490 * problems with the inode life time early on.
488 */ 491 */
489 pag = xfs_get_perag(mp, ip->i_ino); 492 pag = xfs_get_perag(mp, ip->i_ino);
490 write_lock(&pag->pag_ici_lock); 493 write_lock(&pag->pag_ici_lock);
491 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 494 if (!radix_tree_delete(&pag->pag_ici_root, agino))
495 ASSERT(0);
492 write_unlock(&pag->pag_ici_lock); 496 write_unlock(&pag->pag_ici_lock);
493 xfs_put_perag(mp, pag); 497 xfs_put_perag(mp, pag);
494 498
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 65bae4c9b8bf..cc8df1ac7783 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w)
127#ifdef __KERNEL__ 127#ifdef __KERNEL__
128 128
129struct xfs_buf; 129struct xfs_buf;
130struct xfs_bmbt_rec_64; 130struct xfs_bmbt_rec;
131struct xfs_inode; 131struct xfs_inode;
132struct xfs_mount; 132struct xfs_mount;
133 133
@@ -140,9 +140,9 @@ typedef struct xfs_inode_log_item {
140 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
141 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged 143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ 147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148 148
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4cb1792040e3..600b5b06aaeb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t *log,
1441 XFS_BUF_ZEROFLAGS(bp); 1441 XFS_BUF_ZEROFLAGS(bp);
1442 XFS_BUF_BUSY(bp); 1442 XFS_BUF_BUSY(bp);
1443 XFS_BUF_ASYNC(bp); 1443 XFS_BUF_ASYNC(bp);
1444 bp->b_flags |= XBF_LOG_BUFFER;
1444 /* 1445 /*
1445 * Do an ordered write for the log block. 1446 * Do an ordered write for the log block.
1446 * Its unnecessary to flush the first split block in the log wrap case. 1447 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t *log,
1478 XFS_BUF_ZEROFLAGS(bp); 1479 XFS_BUF_ZEROFLAGS(bp);
1479 XFS_BUF_BUSY(bp); 1480 XFS_BUF_BUSY(bp);
1480 XFS_BUF_ASYNC(bp); 1481 XFS_BUF_ASYNC(bp);
1482 bp->b_flags |= XBF_LOG_BUFFER;
1481 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1483 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1482 XFS_BUF_ORDERED(bp); 1484 XFS_BUF_ORDERED(bp);
1483 dptr = XFS_BUF_PTR(bp); 1485 dptr = XFS_BUF_PTR(bp);