aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig3
-rw-r--r--fs/9p/vfs_dir.c92
-rw-r--r--fs/9p/vfs_file.c5
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/adfs/Kconfig4
-rw-r--r--fs/affs/Kconfig4
-rw-r--r--fs/afs/Kconfig7
-rw-r--r--fs/befs/Kconfig4
-rw-r--r--fs/bfs/Kconfig4
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/transaction.c1
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ceph/Kconfig4
-rw-r--r--fs/cifs/Kconfig8
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/configfs/dir.c5
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c15
-rw-r--r--fs/ecryptfs/Kconfig4
-rw-r--r--fs/efs/Kconfig4
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/file.c2
-rw-r--r--fs/fuse/cuse.c10
-rw-r--r--fs/fuse/dev.c128
-rw-r--r--fs/fuse/dir.c259
-rw-r--r--fs/fuse/file.c238
-rw-r--r--fs/fuse/fuse_i.h74
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/gfs2/aops.c17
-rw-r--r--fs/gfs2/bmap.c30
-rw-r--r--fs/gfs2/dir.c30
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/glock.c116
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c8
-rw-r--r--fs/gfs2/log.c76
-rw-r--r--fs/gfs2/log.h12
-rw-r--r--fs/gfs2/lops.c83
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/meta_io.c35
-rw-r--r--fs/gfs2/meta_io.h3
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/rgrp.c18
-rw-r--r--fs/gfs2/super.c70
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/gfs2/sys.c48
-rw-r--r--fs/gfs2/trans.c124
-rw-r--r--fs/gfs2/trans.h3
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/xattr.c36
-rw-r--r--fs/hfs/Kconfig4
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/jffs2/Kconfig10
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/logfs/Kconfig4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/callback_proc.c61
-rw-r--r--fs/nfs/delegation.c154
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4proc.c133
-rw-r--r--fs/nfs/nfs4state.c11
-rw-r--r--fs/nfs/objlayout/objio_osd.c1
-rw-r--r--fs/nfs/pnfs.c150
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/super.c49
-rw-r--r--fs/nfs/unlink.c5
-rw-r--r--fs/nfsd/Kconfig4
-rw-r--r--fs/nilfs2/Kconfig3
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c8
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/extent_map.c3
-rw-r--r--fs/ocfs2/journal.c10
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c3
-rw-r--r--fs/proc/Makefile3
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/proc_net.c14
-rw-r--r--fs/pstore/inode.c18
-rw-r--r--fs/pstore/platform.c35
-rw-r--r--fs/pstore/ram.c10
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/select.c1
-rw-r--r--fs/sysfs/group.c42
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/symlink.c45
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ufs/Kconfig2
-rw-r--r--fs/xfs/Kconfig4
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_attr.c9
-rw-r--r--fs/xfs/xfs_bmap.c118
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf_item.c118
-rw-r--r--fs/xfs/xfs_buf_item.h14
-rw-r--r--fs/xfs/xfs_dquot.c12
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_inode.c6
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h4
-rw-r--r--fs/xfs/xfs_iomap.c77
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_mount.c12
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c28
-rw-r--r--fs/xfs/xfs_super.c29
-rw-r--r--fs/xfs/xfs_trans.c376
-rw-r--r--fs/xfs/xfs_trans.h18
-rw-r--r--fs/xfs/xfs_trans_ail.c14
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_trans_inode.c41
-rw-r--r--fs/xfs/xfs_types.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c12
139 files changed, 2078 insertions, 1448 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 0a93dc1cb4ac..55abfd62654a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -11,8 +11,7 @@ config 9P_FS
11 11
12if 9P_FS 12if 9P_FS
13config 9P_FSCACHE 13config 9P_FSCACHE
14 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support"
15 depends on EXPERIMENTAL
16 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y 15 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
17 help 16 help
18 Choose Y here to enable persistent, read-only local 17 Choose Y here to enable persistent, read-only local
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ff911e779651..be1e34adc3c6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -52,10 +52,9 @@
52 */ 52 */
53 53
54struct p9_rdir { 54struct p9_rdir {
55 struct mutex mutex;
56 int head; 55 int head;
57 int tail; 56 int tail;
58 uint8_t *buf; 57 uint8_t buf[];
59}; 58};
60 59
61/** 60/**
@@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf)
93 * 92 *
94 */ 93 */
95 94
96static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) 95static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen)
97{ 96{
98 struct p9_rdir *rdir; 97 struct p9_fid *fid = filp->private_data;
99 struct p9_fid *fid; 98 if (!fid->rdir)
100 int err = 0; 99 fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
101 100 return fid->rdir;
102 fid = filp->private_data;
103 if (!fid->rdir) {
104 rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
105
106 if (rdir == NULL) {
107 err = -ENOMEM;
108 goto exit;
109 }
110 spin_lock(&filp->f_dentry->d_lock);
111 if (!fid->rdir) {
112 rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir);
113 mutex_init(&rdir->mutex);
114 rdir->head = rdir->tail = 0;
115 fid->rdir = (void *) rdir;
116 rdir = NULL;
117 }
118 spin_unlock(&filp->f_dentry->d_lock);
119 kfree(rdir);
120 }
121exit:
122 return err;
123} 101}
124 102
125/** 103/**
@@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
145 123
146 buflen = fid->clnt->msize - P9_IOHDRSZ; 124 buflen = fid->clnt->msize - P9_IOHDRSZ;
147 125
148 err = v9fs_alloc_rdir_buf(filp, buflen); 126 rdir = v9fs_alloc_rdir_buf(filp, buflen);
149 if (err) 127 if (!rdir)
150 goto exit; 128 return -ENOMEM;
151 rdir = (struct p9_rdir *) fid->rdir;
152 129
153 err = mutex_lock_interruptible(&rdir->mutex); 130 while (1) {
154 if (err)
155 return err;
156 while (err == 0) {
157 if (rdir->tail == rdir->head) { 131 if (rdir->tail == rdir->head) {
158 err = v9fs_file_readn(filp, rdir->buf, NULL, 132 err = v9fs_file_readn(filp, rdir->buf, NULL,
159 buflen, filp->f_pos); 133 buflen, filp->f_pos);
160 if (err <= 0) 134 if (err <= 0)
161 goto unlock_and_exit; 135 return err;
162 136
163 rdir->head = 0; 137 rdir->head = 0;
164 rdir->tail = err; 138 rdir->tail = err;
@@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
169 rdir->tail - rdir->head, &st); 143 rdir->tail - rdir->head, &st);
170 if (err) { 144 if (err) {
171 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 145 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
172 err = -EIO;
173 p9stat_free(&st); 146 p9stat_free(&st);
174 goto unlock_and_exit; 147 return -EIO;
175 } 148 }
176 reclen = st.size+2; 149 reclen = st.size+2;
177 150
@@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
180 153
181 p9stat_free(&st); 154 p9stat_free(&st);
182 155
183 if (over) { 156 if (over)
184 err = 0; 157 return 0;
185 goto unlock_and_exit; 158
186 }
187 rdir->head += reclen; 159 rdir->head += reclen;
188 filp->f_pos += reclen; 160 filp->f_pos += reclen;
189 } 161 }
190 } 162 }
191
192unlock_and_exit:
193 mutex_unlock(&rdir->mutex);
194exit:
195 return err;
196} 163}
197 164
198/** 165/**
@@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
218 185
219 buflen = fid->clnt->msize - P9_READDIRHDRSZ; 186 buflen = fid->clnt->msize - P9_READDIRHDRSZ;
220 187
221 err = v9fs_alloc_rdir_buf(filp, buflen); 188 rdir = v9fs_alloc_rdir_buf(filp, buflen);
222 if (err) 189 if (!rdir)
223 goto exit; 190 return -ENOMEM;
224 rdir = (struct p9_rdir *) fid->rdir;
225 191
226 err = mutex_lock_interruptible(&rdir->mutex); 192 while (1) {
227 if (err)
228 return err;
229
230 while (err == 0) {
231 if (rdir->tail == rdir->head) { 193 if (rdir->tail == rdir->head) {
232 err = p9_client_readdir(fid, rdir->buf, buflen, 194 err = p9_client_readdir(fid, rdir->buf, buflen,
233 filp->f_pos); 195 filp->f_pos);
234 if (err <= 0) 196 if (err <= 0)
235 goto unlock_and_exit; 197 return err;
236 198
237 rdir->head = 0; 199 rdir->head = 0;
238 rdir->tail = err; 200 rdir->tail = err;
@@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
245 &curdirent); 207 &curdirent);
246 if (err < 0) { 208 if (err < 0) {
247 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 209 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
248 err = -EIO; 210 return -EIO;
249 goto unlock_and_exit;
250 } 211 }
251 212
252 /* d_off in dirent structure tracks the offset into 213 /* d_off in dirent structure tracks the offset into
@@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
261 curdirent.d_type); 222 curdirent.d_type);
262 oldoffset = curdirent.d_off; 223 oldoffset = curdirent.d_off;
263 224
264 if (over) { 225 if (over)
265 err = 0; 226 return 0;
266 goto unlock_and_exit;
267 }
268 227
269 filp->f_pos = curdirent.d_off; 228 filp->f_pos = curdirent.d_off;
270 rdir->head += err; 229 rdir->head += err;
271 } 230 }
272 } 231 }
273
274unlock_and_exit:
275 mutex_unlock(&rdir->mutex);
276exit:
277 return err;
278} 232}
279 233
280 234
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..c921ac92ea4c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file)
80 p9_client_clunk(fid); 80 p9_client_clunk(fid);
81 return err; 81 return err;
82 } 82 }
83 if (file->f_flags & O_TRUNC) {
84 i_size_write(inode, 0);
85 inode->i_blocks = 0;
86 }
87 if ((file->f_flags & O_APPEND) && 83 if ((file->f_flags & O_APPEND) &&
88 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) 84 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
89 generic_file_llseek(file, 0, SEEK_END); 85 generic_file_llseek(file, 0, SEEK_END);
@@ -620,6 +616,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
620 lock_page(page); 616 lock_page(page);
621 if (page->mapping != inode->i_mapping) 617 if (page->mapping != inode->i_mapping)
622 goto out_unlock; 618 goto out_unlock;
619 wait_for_stable_page(page);
623 620
624 return VM_FAULT_LOCKED; 621 return VM_FAULT_LOCKED;
625out_unlock: 622out_unlock:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 890bed538f9b..57d017ac68e4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended)
192 break; 192 break;
193 } 193 }
194 194
195 if (uflags & O_TRUNC)
196 ret |= P9_OTRUNC;
197
198 if (extended) { 195 if (extended) {
199 if (uflags & O_EXCL) 196 if (uflags & O_EXCL)
200 ret |= P9_OEXCL; 197 ret |= P9_OEXCL;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 40895546e103..8d24ad66dfb8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags)
186 { O_CREAT, P9_DOTL_CREATE }, 186 { O_CREAT, P9_DOTL_CREATE },
187 { O_EXCL, P9_DOTL_EXCL }, 187 { O_EXCL, P9_DOTL_EXCL },
188 { O_NOCTTY, P9_DOTL_NOCTTY }, 188 { O_NOCTTY, P9_DOTL_NOCTTY },
189 { O_TRUNC, P9_DOTL_TRUNC },
190 { O_APPEND, P9_DOTL_APPEND }, 189 { O_APPEND, P9_DOTL_APPEND },
191 { O_NONBLOCK, P9_DOTL_NONBLOCK }, 190 { O_NONBLOCK, P9_DOTL_NONBLOCK },
192 { O_DSYNC, P9_DOTL_DSYNC }, 191 { O_DSYNC, P9_DOTL_DSYNC },
@@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
268 } 267 }
269 268
270 /* Only creates */ 269 /* Only creates */
271 if (!(flags & O_CREAT) || dentry->d_inode) 270 if (!(flags & O_CREAT))
272 return finish_no_open(file, res); 271 return finish_no_open(file, res);
272 else if (dentry->d_inode) {
273 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
274 return -EEXIST;
275 else
276 return finish_no_open(file, res);
277 }
273 278
274 v9ses = v9fs_inode2v9ses(dir); 279 v9ses = v9fs_inode2v9ses(dir);
275 280
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index e55182a74605..c5a7787dd5e9 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,6 +1,6 @@
1config ADFS_FS 1config ADFS_FS
2 tristate "ADFS file system support (EXPERIMENTAL)" 2 tristate "ADFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Acorn Disc Filing System is the standard file system of the 5 The Acorn Disc Filing System is the standard file system of the
6 RiscOS operating system which runs on Acorn's ARM-based Risc PC 6 RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index cfad9afb4762..a04d9e848d05 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -1,6 +1,6 @@
1config AFFS_FS 1config AFFS_FS
2 tristate "Amiga FFS file system support (EXPERIMENTAL)" 2 tristate "Amiga FFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Fast File System (FFS) is the common file system used on hard 5 The Fast File System (FFS) is the common file system used on hard
6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y 6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 8f975f25b486..ebba3b18e5da 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -1,6 +1,6 @@
1config AFS_FS 1config AFS_FS
2 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2 tristate "Andrew File System support (AFS)"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select AF_RXRPC 4 select AF_RXRPC
5 select DNS_RESOLVER 5 select DNS_RESOLVER
6 help 6 help
@@ -22,8 +22,7 @@ config AFS_DEBUG
22 If unsure, say N. 22 If unsure, say N.
23 23
24config AFS_FSCACHE 24config AFS_FSCACHE
25 bool "Provide AFS client caching support (EXPERIMENTAL)" 25 bool "Provide AFS client caching support"
26 depends on EXPERIMENTAL
27 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y 26 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
28 help 27 help
29 Say Y here if you want AFS data to be cached locally on disk through 28 Say Y here if you want AFS data to be cached locally on disk through
diff --git a/fs/befs/Kconfig b/fs/befs/Kconfig
index 7835d30f211f..edc5cc2aefad 100644
--- a/fs/befs/Kconfig
+++ b/fs/befs/Kconfig
@@ -1,6 +1,6 @@
1config BEFS_FS 1config BEFS_FS
2 tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" 2 tristate "BeOS file system (BeFS) support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 The BeOS File System (BeFS) is the native file system of Be, Inc's 6 The BeOS File System (BeFS) is the native file system of Be, Inc's
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index c2336c62024f..3728a6479c64 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -1,6 +1,6 @@
1config BFS_FS 1config BFS_FS
2 tristate "BFS file system support (EXPERIMENTAL)" 2 tristate "BFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 Boot File System (BFS) is a file system used under SCO UnixWare to 5 Boot File System (BFS) is a file system used under SCO UnixWare to
6 allow the bootloader access to the kernel image and other important 6 allow the bootloader access to the kernel image and other important
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..ff9dbc630efa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/utsname.h> 34#include <linux/utsname.h>
35#include <linux/coredump.h> 35#include <linux/coredump.h>
36#include <linux/sched.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/param.h> 38#include <asm/param.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -1248,7 +1249,7 @@ static int writenote(struct memelfnote *men, struct file *file,
1248#undef DUMP_WRITE 1249#undef DUMP_WRITE
1249 1250
1250static void fill_elf_header(struct elfhdr *elf, int segs, 1251static void fill_elf_header(struct elfhdr *elf, int segs,
1251 u16 machine, u32 flags, u8 osabi) 1252 u16 machine, u32 flags)
1252{ 1253{
1253 memset(elf, 0, sizeof(*elf)); 1254 memset(elf, 0, sizeof(*elf));
1254 1255
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1320 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1321 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1321 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1322 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1322 } else { 1323 } else {
1323 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1324 cputime_t utime, stime;
1324 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1325
1326 task_cputime(p, &utime, &stime);
1327 cputime_to_timeval(utime, &prstatus->pr_utime);
1328 cputime_to_timeval(stime, &prstatus->pr_stime);
1325 } 1329 }
1326 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1330 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1327 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1331 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
@@ -1630,7 +1634,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1630 * Initialize the ELF file header. 1634 * Initialize the ELF file header.
1631 */ 1635 */
1632 fill_elf_header(elf, phdrs, 1636 fill_elf_header(elf, phdrs,
1633 view->e_machine, view->e_flags, view->ei_osabi); 1637 view->e_machine, view->e_flags);
1634 1638
1635 /* 1639 /*
1636 * Allocate a structure for each thread. 1640 * Allocate a structure for each thread.
@@ -1870,7 +1874,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1870 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 1874 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1871 1875
1872 /* Set up header */ 1876 /* Set up header */
1873 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI); 1877 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1874 1878
1875 /* 1879 /*
1876 * Set up the notes in similar form to SVR4 core dumps made 1880 * Set up the notes in similar form to SVR4 core dumps made
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1377 } else { 1377 } else {
1378 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1378 cputime_t utime, stime;
1379 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1379
1380 task_cputime(p, &utime, &stime);
1381 cputime_to_timeval(utime, &prstatus->pr_utime);
1382 cputime_to_timeval(stime, &prstatus->pr_stime);
1380 } 1383 }
1381 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1384 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1382 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1385 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..78333a37f49d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -994,6 +994,7 @@ int revalidate_disk(struct gendisk *disk)
994 994
995 mutex_lock(&bdev->bd_mutex); 995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev); 996 check_disk_size_change(disk, bdev);
997 bdev->bd_invalidated = 0;
997 mutex_unlock(&bdev->bd_mutex); 998 mutex_unlock(&bdev->bd_mutex);
998 bdput(bdev); 999 bdput(bdev);
999 return ret; 1000 return ret;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index d33f01c08b60..ccd25ba7a9ac 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,5 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" 2 tristate "Btrfs filesystem Unstable disk format"
3 depends on EXPERIMENTAL
4 select LIBCRC32C 3 select LIBCRC32C
5 select ZLIB_INFLATE 4 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5a3327b8f90d..1e59ed575cc9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6522,7 +6522,7 @@ reada:
6522} 6522}
6523 6523
6524/* 6524/*
6525 * hepler to process tree block while walking down the tree. 6525 * helper to process tree block while walking down the tree.
6526 * 6526 *
6527 * when wc->stage == UPDATE_BACKREF, this function updates 6527 * when wc->stage == UPDATE_BACKREF, this function updates
6528 * back refs for pointers in the block. 6528 * back refs for pointers in the block.
@@ -6597,7 +6597,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6597} 6597}
6598 6598
6599/* 6599/*
6600 * hepler to process tree block pointer. 6600 * helper to process tree block pointer.
6601 * 6601 *
6602 * when wc->stage == DROP_REFERENCE, this function checks 6602 * when wc->stage == DROP_REFERENCE, this function checks
6603 * reference count of the block pointed to. if the block 6603 * reference count of the block pointed to. if the block
@@ -6735,7 +6735,7 @@ skip:
6735} 6735}
6736 6736
6737/* 6737/*
6738 * hepler to process tree block while walking up the tree. 6738 * helper to process tree block while walking up the tree.
6739 * 6739 *
6740 * when wc->stage == DROP_REFERENCE, this function drops 6740 * when wc->stage == DROP_REFERENCE, this function drops
6741 * reference count on the block. 6741 * reference count on the block.
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 300e09ac3659..17c306bf177a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3472,7 +3472,7 @@ out:
3472} 3472}
3473 3473
3474/* 3474/*
3475 * hepler to find all tree blocks that reference a given data extent 3475 * helper to find all tree blocks that reference a given data extent
3476 */ 3476 */
3477static noinline_for_stack 3477static noinline_for_stack
3478int add_data_references(struct reloc_control *rc, 3478int add_data_references(struct reloc_control *rc,
@@ -3566,7 +3566,7 @@ int add_data_references(struct reloc_control *rc,
3566} 3566}
3567 3567
3568/* 3568/*
3569 * hepler to find next unprocessed extent 3569 * helper to find next unprocessed extent
3570 */ 3570 */
3571static noinline_for_stack 3571static noinline_for_stack
3572int find_next_extent(struct btrfs_trans_handle *trans, 3572int find_next_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index fc03aa60b684..4c0067c4f76d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -112,7 +112,6 @@ loop:
112 * to redo the trans_no_join checks above 112 * to redo the trans_no_join checks above
113 */ 113 */
114 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 114 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
115 cur_trans = fs_info->running_transaction;
116 goto loop; 115 goto loop;
117 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 116 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
118 spin_unlock(&fs_info->trans_lock); 117 spin_unlock(&fs_info->trans_lock);
diff --git a/fs/buffer.c b/fs/buffer.c
index 7a75c3e0fd58..2ea9cd44aeae 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2359 if (unlikely(ret < 0)) 2359 if (unlikely(ret < 0))
2360 goto out_unlock; 2360 goto out_unlock;
2361 set_page_dirty(page); 2361 set_page_dirty(page);
2362 wait_on_page_writeback(page); 2362 wait_for_stable_page(page);
2363 return 0; 2363 return 0;
2364out_unlock: 2364out_unlock:
2365 unlock_page(page); 2365 unlock_page(page);
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 9eb134ea6eb2..49bc78243db9 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,6 +1,6 @@
1config CEPH_FS 1config CEPH_FS
2 tristate "Ceph distributed file system (EXPERIMENTAL)" 2 tristate "Ceph distributed file system"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select CEPH_LIB 4 select CEPH_LIB
5 select LIBCRC32C 5 select LIBCRC32C
6 select CRYPTO_AES 6 select CRYPTO_AES
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 21ff76c22a17..2906ee276408 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -155,14 +155,14 @@ config CIFS_DFS_UPCALL
155 points. If unsure, say N. 155 points. If unsure, say N.
156 156
157config CIFS_NFSD_EXPORT 157config CIFS_NFSD_EXPORT
158 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 158 bool "Allow nfsd to export CIFS file system"
159 depends on CIFS && EXPERIMENTAL && BROKEN 159 depends on CIFS && BROKEN
160 help 160 help
161 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 161 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
162 162
163config CIFS_SMB2 163config CIFS_SMB2
164 bool "SMB2 network file system support (EXPERIMENTAL)" 164 bool "SMB2 network file system support"
165 depends on CIFS && EXPERIMENTAL && INET 165 depends on CIFS && INET
166 select NLS 166 select NLS
167 select KEYS 167 select KEYS
168 select FSCACHE 168 select FSCACHE
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 51dc2fb6e854..9f6c4c45d21e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -76,7 +76,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
76 } 76 }
77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); 77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
78 if (rc) { 78 if (rc) {
79 cERROR(1, "%s: Could not update iwth link_str", __func__); 79 cERROR(1, "%s: Could not update with link_str", __func__);
80 goto symlink_hash_err; 80 goto symlink_hash_err;
81 } 81 }
82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash); 82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 712b10f64c70..e9dcfa3c208c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1037,10 +1037,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
1037static int configfs_depend_prep(struct dentry *origin, 1037static int configfs_depend_prep(struct dentry *origin,
1038 struct config_item *target) 1038 struct config_item *target)
1039{ 1039{
1040 struct configfs_dirent *child_sd, *sd = origin->d_fsdata; 1040 struct configfs_dirent *child_sd, *sd;
1041 int ret = 0; 1041 int ret = 0;
1042 1042
1043 BUG_ON(!origin || !sd); 1043 BUG_ON(!origin || !origin->d_fsdata);
1044 sd = origin->d_fsdata;
1044 1045
1045 if (sd->s_element == target) /* Boo-yah */ 1046 if (sd->s_element == target) /* Boo-yah */
1046 goto out; 1047 goto out;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a5f12b7e228d..0c4f80b447fb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -322,7 +322,6 @@ static struct dentry *__create_file(const char *name, umode_t mode,
322 if (!parent) 322 if (!parent)
323 parent = debugfs_mount->mnt_root; 323 parent = debugfs_mount->mnt_root;
324 324
325 dentry = NULL;
326 mutex_lock(&parent->d_inode->i_mutex); 325 mutex_lock(&parent->d_inode->i_mutex);
327 dentry = lookup_one_len(name, parent, strlen(name)); 326 dentry = lookup_one_len(name, parent, strlen(name));
328 if (!IS_ERR(dentry)) { 327 if (!IS_ERR(dentry)) {
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 77c0f70f8fe8..e7665c31f7b1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -96,10 +96,13 @@ do { \
96} 96}
97 97
98 98
99#define DLM_RTF_SHRINK 0x00000001
100
99struct dlm_rsbtable { 101struct dlm_rsbtable {
100 struct rb_root keep; 102 struct rb_root keep;
101 struct rb_root toss; 103 struct rb_root toss;
102 spinlock_t lock; 104 spinlock_t lock;
105 uint32_t flags;
103}; 106};
104 107
105 108
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index a579f30f237d..f7501651762d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1132,6 +1132,7 @@ static void toss_rsb(struct kref *kref)
1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); 1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); 1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
1134 r->res_toss_time = jiffies; 1134 r->res_toss_time = jiffies;
1135 ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
1135 if (r->res_lvbptr) { 1136 if (r->res_lvbptr) {
1136 dlm_free_lvb(r->res_lvbptr); 1137 dlm_free_lvb(r->res_lvbptr);
1137 r->res_lvbptr = NULL; 1138 r->res_lvbptr = NULL;
@@ -1659,11 +1660,18 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1659 char *name; 1660 char *name;
1660 int our_nodeid = dlm_our_nodeid(); 1661 int our_nodeid = dlm_our_nodeid();
1661 int remote_count = 0; 1662 int remote_count = 0;
1663 int need_shrink = 0;
1662 int i, len, rv; 1664 int i, len, rv;
1663 1665
1664 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX); 1666 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
1665 1667
1666 spin_lock(&ls->ls_rsbtbl[b].lock); 1668 spin_lock(&ls->ls_rsbtbl[b].lock);
1669
1670 if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
1671 spin_unlock(&ls->ls_rsbtbl[b].lock);
1672 return;
1673 }
1674
1667 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) { 1675 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
1668 next = rb_next(n); 1676 next = rb_next(n);
1669 r = rb_entry(n, struct dlm_rsb, res_hashnode); 1677 r = rb_entry(n, struct dlm_rsb, res_hashnode);
@@ -1679,6 +1687,8 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1679 continue; 1687 continue;
1680 } 1688 }
1681 1689
1690 need_shrink = 1;
1691
1682 if (!time_after_eq(jiffies, r->res_toss_time + 1692 if (!time_after_eq(jiffies, r->res_toss_time +
1683 dlm_config.ci_toss_secs * HZ)) { 1693 dlm_config.ci_toss_secs * HZ)) {
1684 continue; 1694 continue;
@@ -1710,6 +1720,11 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1710 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 1720 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1711 dlm_free_rsb(r); 1721 dlm_free_rsb(r);
1712 } 1722 }
1723
1724 if (need_shrink)
1725 ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
1726 else
1727 ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
1713 spin_unlock(&ls->ls_rsbtbl[b].lock); 1728 spin_unlock(&ls->ls_rsbtbl[b].lock);
1714 1729
1715 /* 1730 /*
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index cc16562654de..e15ef38c24fa 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
1config ECRYPT_FS 1config ECRYPT_FS
2 tristate "eCrypt filesystem layer support (EXPERIMENTAL)" 2 tristate "eCrypt filesystem layer support"
3 depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) 3 depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
4 select CRYPTO_ECB 4 select CRYPTO_ECB
5 select CRYPTO_CBC 5 select CRYPTO_CBC
6 select CRYPTO_MD5 6 select CRYPTO_MD5
diff --git a/fs/efs/Kconfig b/fs/efs/Kconfig
index 6ebfc1c207a8..d020e3c30fea 100644
--- a/fs/efs/Kconfig
+++ b/fs/efs/Kconfig
@@ -1,6 +1,6 @@
1config EFS_FS 1config EFS_FS
2 tristate "EFS file system support (read only) (EXPERIMENTAL)" 2 tristate "EFS file system support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard 5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard
6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer 6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e50223b3299..4ba2683c1d44 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2065,6 +2065,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2065 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2065 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2067 "writeback"); 2067 "writeback");
2068 sb->s_flags |= MS_SNAP_STABLE;
2068 2069
2069 return 0; 2070 return 0;
2070 2071
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..cd818d8bb221 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4968,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4968 0, len, NULL, 4968 0, len, NULL,
4969 ext4_bh_unmapped)) { 4969 ext4_bh_unmapped)) {
4970 /* Wait so that we don't change page under IO */ 4970 /* Wait so that we don't change page under IO */
4971 wait_on_page_writeback(page); 4971 wait_for_stable_page(page);
4972 ret = VM_FAULT_LOCKED; 4972 ret = VM_FAULT_LOCKED;
4973 goto out; 4973 goto out;
4974 } 4974 }
diff --git a/fs/file.c b/fs/file.c
index 2b3570b7caeb..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -516,7 +516,7 @@ struct files_struct init_files = {
516 .close_on_exec = init_files.close_on_exec_init, 516 .close_on_exec = init_files.close_on_exec_init,
517 .open_fds = init_files.open_fds_init, 517 .open_fds = init_files.open_fds_init,
518 }, 518 },
519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
520}; 520};
521 521
522/* 522/*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index e397b675b029..6f96a8def147 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -91,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
91 loff_t *ppos) 91 loff_t *ppos)
92{ 92{
93 loff_t pos = 0; 93 loff_t pos = 0;
94 struct iovec iov = { .iov_base = buf, .iov_len = count };
94 95
95 return fuse_direct_io(file, buf, count, &pos, 0); 96 return fuse_direct_io(file, &iov, 1, count, &pos, 0);
96} 97}
97 98
98static ssize_t cuse_write(struct file *file, const char __user *buf, 99static ssize_t cuse_write(struct file *file, const char __user *buf,
99 size_t count, loff_t *ppos) 100 size_t count, loff_t *ppos)
100{ 101{
101 loff_t pos = 0; 102 loff_t pos = 0;
103 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
104
102 /* 105 /*
103 * No locking or generic_write_checks(), the server is 106 * No locking or generic_write_checks(), the server is
104 * responsible for locking and sanity checks. 107 * responsible for locking and sanity checks.
105 */ 108 */
106 return fuse_direct_io(file, buf, count, &pos, 1); 109 return fuse_direct_io(file, &iov, 1, count, &pos, 1);
107} 110}
108 111
109static int cuse_open(struct inode *inode, struct file *file) 112static int cuse_open(struct inode *inode, struct file *file)
@@ -419,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc)
419 422
420 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 423 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
421 424
422 req = fuse_get_req(fc); 425 req = fuse_get_req(fc, 1);
423 if (IS_ERR(req)) { 426 if (IS_ERR(req)) {
424 rc = PTR_ERR(req); 427 rc = PTR_ERR(req);
425 goto err; 428 goto err;
@@ -449,6 +452,7 @@ static int cuse_send_init(struct cuse_conn *cc)
449 req->out.argvar = 1; 452 req->out.argvar = 1;
450 req->out.argpages = 1; 453 req->out.argpages = 1;
451 req->pages[0] = page; 454 req->pages[0] = page;
455 req->page_descs[0].length = req->out.args[1].size;
452 req->num_pages = 1; 456 req->num_pages = 1;
453 req->end = cuse_process_init_reply; 457 req->end = cuse_process_init_reply;
454 fuse_request_send_background(fc, req); 458 fuse_request_send_background(fc, req);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e83351aa5bad..e9bdec0b16d9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,34 +34,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
34 return file->private_data; 34 return file->private_data;
35} 35}
36 36
37static void fuse_request_init(struct fuse_req *req) 37static void fuse_request_init(struct fuse_req *req, struct page **pages,
38 struct fuse_page_desc *page_descs,
39 unsigned npages)
38{ 40{
39 memset(req, 0, sizeof(*req)); 41 memset(req, 0, sizeof(*req));
42 memset(pages, 0, sizeof(*pages) * npages);
43 memset(page_descs, 0, sizeof(*page_descs) * npages);
40 INIT_LIST_HEAD(&req->list); 44 INIT_LIST_HEAD(&req->list);
41 INIT_LIST_HEAD(&req->intr_entry); 45 INIT_LIST_HEAD(&req->intr_entry);
42 init_waitqueue_head(&req->waitq); 46 init_waitqueue_head(&req->waitq);
43 atomic_set(&req->count, 1); 47 atomic_set(&req->count, 1);
48 req->pages = pages;
49 req->page_descs = page_descs;
50 req->max_pages = npages;
44} 51}
45 52
46struct fuse_req *fuse_request_alloc(void) 53static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
47{ 54{
48 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); 55 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
49 if (req) 56 if (req) {
50 fuse_request_init(req); 57 struct page **pages;
58 struct fuse_page_desc *page_descs;
59
60 if (npages <= FUSE_REQ_INLINE_PAGES) {
61 pages = req->inline_pages;
62 page_descs = req->inline_page_descs;
63 } else {
64 pages = kmalloc(sizeof(struct page *) * npages, flags);
65 page_descs = kmalloc(sizeof(struct fuse_page_desc) *
66 npages, flags);
67 }
68
69 if (!pages || !page_descs) {
70 kfree(pages);
71 kfree(page_descs);
72 kmem_cache_free(fuse_req_cachep, req);
73 return NULL;
74 }
75
76 fuse_request_init(req, pages, page_descs, npages);
77 }
51 return req; 78 return req;
52} 79}
80
81struct fuse_req *fuse_request_alloc(unsigned npages)
82{
83 return __fuse_request_alloc(npages, GFP_KERNEL);
84}
53EXPORT_SYMBOL_GPL(fuse_request_alloc); 85EXPORT_SYMBOL_GPL(fuse_request_alloc);
54 86
55struct fuse_req *fuse_request_alloc_nofs(void) 87struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
56{ 88{
57 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); 89 return __fuse_request_alloc(npages, GFP_NOFS);
58 if (req)
59 fuse_request_init(req);
60 return req;
61} 90}
62 91
63void fuse_request_free(struct fuse_req *req) 92void fuse_request_free(struct fuse_req *req)
64{ 93{
94 if (req->pages != req->inline_pages) {
95 kfree(req->pages);
96 kfree(req->page_descs);
97 }
65 kmem_cache_free(fuse_req_cachep, req); 98 kmem_cache_free(fuse_req_cachep, req);
66} 99}
67 100
@@ -97,7 +130,7 @@ static void fuse_req_init_context(struct fuse_req *req)
97 req->in.h.pid = current->pid; 130 req->in.h.pid = current->pid;
98} 131}
99 132
100struct fuse_req *fuse_get_req(struct fuse_conn *fc) 133struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
101{ 134{
102 struct fuse_req *req; 135 struct fuse_req *req;
103 sigset_t oldset; 136 sigset_t oldset;
@@ -116,7 +149,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
116 if (!fc->connected) 149 if (!fc->connected)
117 goto out; 150 goto out;
118 151
119 req = fuse_request_alloc(); 152 req = fuse_request_alloc(npages);
120 err = -ENOMEM; 153 err = -ENOMEM;
121 if (!req) 154 if (!req)
122 goto out; 155 goto out;
@@ -165,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
165 struct fuse_file *ff = file->private_data; 198 struct fuse_file *ff = file->private_data;
166 199
167 spin_lock(&fc->lock); 200 spin_lock(&fc->lock);
168 fuse_request_init(req); 201 fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
169 BUG_ON(ff->reserved_req); 202 BUG_ON(ff->reserved_req);
170 ff->reserved_req = req; 203 ff->reserved_req = req;
171 wake_up_all(&fc->reserved_req_waitq); 204 wake_up_all(&fc->reserved_req_waitq);
@@ -186,13 +219,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
186 * filesystem should not have it's own file open. If deadlock is 219 * filesystem should not have it's own file open. If deadlock is
187 * intentional, it can still be broken by "aborting" the filesystem. 220 * intentional, it can still be broken by "aborting" the filesystem.
188 */ 221 */
189struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) 222struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
223 struct file *file)
190{ 224{
191 struct fuse_req *req; 225 struct fuse_req *req;
192 226
193 atomic_inc(&fc->num_waiting); 227 atomic_inc(&fc->num_waiting);
194 wait_event(fc->blocked_waitq, !fc->blocked); 228 wait_event(fc->blocked_waitq, !fc->blocked);
195 req = fuse_request_alloc(); 229 req = fuse_request_alloc(0);
196 if (!req) 230 if (!req)
197 req = get_reserved_req(fc, file); 231 req = get_reserved_req(fc, file);
198 232
@@ -406,9 +440,8 @@ __acquires(fc->lock)
406 } 440 }
407} 441}
408 442
409void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 443static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
410{ 444{
411 req->isreply = 1;
412 spin_lock(&fc->lock); 445 spin_lock(&fc->lock);
413 if (!fc->connected) 446 if (!fc->connected)
414 req->out.h.error = -ENOTCONN; 447 req->out.h.error = -ENOTCONN;
@@ -425,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
425 } 458 }
426 spin_unlock(&fc->lock); 459 spin_unlock(&fc->lock);
427} 460}
461
462void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
463{
464 req->isreply = 1;
465 __fuse_request_send(fc, req);
466}
428EXPORT_SYMBOL_GPL(fuse_request_send); 467EXPORT_SYMBOL_GPL(fuse_request_send);
429 468
430static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 469static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
@@ -491,6 +530,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
491 fuse_request_send_nowait_locked(fc, req); 530 fuse_request_send_nowait_locked(fc, req);
492} 531}
493 532
533void fuse_force_forget(struct file *file, u64 nodeid)
534{
535 struct inode *inode = file->f_path.dentry->d_inode;
536 struct fuse_conn *fc = get_fuse_conn(inode);
537 struct fuse_req *req;
538 struct fuse_forget_in inarg;
539
540 memset(&inarg, 0, sizeof(inarg));
541 inarg.nlookup = 1;
542 req = fuse_get_req_nofail_nopages(fc, file);
543 req->in.h.opcode = FUSE_FORGET;
544 req->in.h.nodeid = nodeid;
545 req->in.numargs = 1;
546 req->in.args[0].size = sizeof(inarg);
547 req->in.args[0].value = &inarg;
548 req->isreply = 0;
549 __fuse_request_send(fc, req);
550 /* ignore errors */
551 fuse_put_request(fc, req);
552}
553
494/* 554/*
495 * Lock the request. Up to the next unlock_request() there mustn't be 555 * Lock the request. Up to the next unlock_request() there mustn't be
496 * anything that could cause a page-fault. If the request was already 556 * anything that could cause a page-fault. If the request was already
@@ -850,11 +910,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
850{ 910{
851 unsigned i; 911 unsigned i;
852 struct fuse_req *req = cs->req; 912 struct fuse_req *req = cs->req;
853 unsigned offset = req->page_offset;
854 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
855 913
856 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 914 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
857 int err; 915 int err;
916 unsigned offset = req->page_descs[i].offset;
917 unsigned count = min(nbytes, req->page_descs[i].length);
858 918
859 err = fuse_copy_page(cs, &req->pages[i], offset, count, 919 err = fuse_copy_page(cs, &req->pages[i], offset, count,
860 zeroing); 920 zeroing);
@@ -862,8 +922,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
862 return err; 922 return err;
863 923
864 nbytes -= count; 924 nbytes -= count;
865 count = min(nbytes, (unsigned) PAGE_SIZE);
866 offset = 0;
867 } 925 }
868 return 0; 926 return 0;
869} 927}
@@ -1536,29 +1594,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1536 unsigned int num; 1594 unsigned int num;
1537 unsigned int offset; 1595 unsigned int offset;
1538 size_t total_len = 0; 1596 size_t total_len = 0;
1597 int num_pages;
1598
1599 offset = outarg->offset & ~PAGE_CACHE_MASK;
1600 file_size = i_size_read(inode);
1601
1602 num = outarg->size;
1603 if (outarg->offset > file_size)
1604 num = 0;
1605 else if (outarg->offset + num > file_size)
1606 num = file_size - outarg->offset;
1607
1608 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609 num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1539 1610
1540 req = fuse_get_req(fc); 1611 req = fuse_get_req(fc, num_pages);
1541 if (IS_ERR(req)) 1612 if (IS_ERR(req))
1542 return PTR_ERR(req); 1613 return PTR_ERR(req);
1543 1614
1544 offset = outarg->offset & ~PAGE_CACHE_MASK;
1545
1546 req->in.h.opcode = FUSE_NOTIFY_REPLY; 1615 req->in.h.opcode = FUSE_NOTIFY_REPLY;
1547 req->in.h.nodeid = outarg->nodeid; 1616 req->in.h.nodeid = outarg->nodeid;
1548 req->in.numargs = 2; 1617 req->in.numargs = 2;
1549 req->in.argpages = 1; 1618 req->in.argpages = 1;
1550 req->page_offset = offset; 1619 req->page_descs[0].offset = offset;
1551 req->end = fuse_retrieve_end; 1620 req->end = fuse_retrieve_end;
1552 1621
1553 index = outarg->offset >> PAGE_CACHE_SHIFT; 1622 index = outarg->offset >> PAGE_CACHE_SHIFT;
1554 file_size = i_size_read(inode);
1555 num = outarg->size;
1556 if (outarg->offset > file_size)
1557 num = 0;
1558 else if (outarg->offset + num > file_size)
1559 num = file_size - outarg->offset;
1560 1623
1561 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { 1624 while (num && req->num_pages < num_pages) {
1562 struct page *page; 1625 struct page *page;
1563 unsigned int this_num; 1626 unsigned int this_num;
1564 1627
@@ -1568,6 +1631,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1568 1631
1569 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); 1632 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1570 req->pages[req->num_pages] = page; 1633 req->pages[req->num_pages] = page;
1634 req->page_descs[req->num_pages].length = this_num;
1571 req->num_pages++; 1635 req->num_pages++;
1572 1636
1573 offset = 0; 1637 offset = 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..85065221a58a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,29 @@
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17static bool fuse_use_readdirplus(struct inode *dir, struct file *filp)
18{
19 struct fuse_conn *fc = get_fuse_conn(dir);
20 struct fuse_inode *fi = get_fuse_inode(dir);
21
22 if (!fc->do_readdirplus)
23 return false;
24 if (!fc->readdirplus_auto)
25 return true;
26 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 return true;
28 if (filp->f_pos == 0)
29 return true;
30 return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35 struct fuse_inode *fi = get_fuse_inode(dir);
36
37 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
17#if BITS_PER_LONG >= 64 40#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time) 41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
19{ 42{
@@ -178,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
178 return -ECHILD; 201 return -ECHILD;
179 202
180 fc = get_fuse_conn(inode); 203 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 204 req = fuse_get_req_nopages(fc);
182 if (IS_ERR(req)) 205 if (IS_ERR(req))
183 return 0; 206 return 0;
184 207
@@ -219,6 +242,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
219 attr_version); 242 attr_version);
220 fuse_change_entry_timeout(entry, &outarg); 243 fuse_change_entry_timeout(entry, &outarg);
221 } 244 }
245 fuse_advise_use_readdirplus(inode);
222 return 1; 246 return 1;
223} 247}
224 248
@@ -271,7 +295,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
271 if (name->len > FUSE_NAME_MAX) 295 if (name->len > FUSE_NAME_MAX)
272 goto out; 296 goto out;
273 297
274 req = fuse_get_req(fc); 298 req = fuse_get_req_nopages(fc);
275 err = PTR_ERR(req); 299 err = PTR_ERR(req);
276 if (IS_ERR(req)) 300 if (IS_ERR(req))
277 goto out; 301 goto out;
@@ -355,6 +379,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
355 else 379 else
356 fuse_invalidate_entry_cache(entry); 380 fuse_invalidate_entry_cache(entry);
357 381
382 fuse_advise_use_readdirplus(dir);
358 return newent; 383 return newent;
359 384
360 out_iput: 385 out_iput:
@@ -391,7 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
391 if (!forget) 416 if (!forget)
392 goto out_err; 417 goto out_err;
393 418
394 req = fuse_get_req(fc); 419 req = fuse_get_req_nopages(fc);
395 err = PTR_ERR(req); 420 err = PTR_ERR(req);
396 if (IS_ERR(req)) 421 if (IS_ERR(req))
397 goto out_put_forget_req; 422 goto out_put_forget_req;
@@ -592,7 +617,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
592{ 617{
593 struct fuse_mknod_in inarg; 618 struct fuse_mknod_in inarg;
594 struct fuse_conn *fc = get_fuse_conn(dir); 619 struct fuse_conn *fc = get_fuse_conn(dir);
595 struct fuse_req *req = fuse_get_req(fc); 620 struct fuse_req *req = fuse_get_req_nopages(fc);
596 if (IS_ERR(req)) 621 if (IS_ERR(req))
597 return PTR_ERR(req); 622 return PTR_ERR(req);
598 623
@@ -623,7 +648,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
623{ 648{
624 struct fuse_mkdir_in inarg; 649 struct fuse_mkdir_in inarg;
625 struct fuse_conn *fc = get_fuse_conn(dir); 650 struct fuse_conn *fc = get_fuse_conn(dir);
626 struct fuse_req *req = fuse_get_req(fc); 651 struct fuse_req *req = fuse_get_req_nopages(fc);
627 if (IS_ERR(req)) 652 if (IS_ERR(req))
628 return PTR_ERR(req); 653 return PTR_ERR(req);
629 654
@@ -647,7 +672,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
647{ 672{
648 struct fuse_conn *fc = get_fuse_conn(dir); 673 struct fuse_conn *fc = get_fuse_conn(dir);
649 unsigned len = strlen(link) + 1; 674 unsigned len = strlen(link) + 1;
650 struct fuse_req *req = fuse_get_req(fc); 675 struct fuse_req *req = fuse_get_req_nopages(fc);
651 if (IS_ERR(req)) 676 if (IS_ERR(req))
652 return PTR_ERR(req); 677 return PTR_ERR(req);
653 678
@@ -664,7 +689,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
664{ 689{
665 int err; 690 int err;
666 struct fuse_conn *fc = get_fuse_conn(dir); 691 struct fuse_conn *fc = get_fuse_conn(dir);
667 struct fuse_req *req = fuse_get_req(fc); 692 struct fuse_req *req = fuse_get_req_nopages(fc);
668 if (IS_ERR(req)) 693 if (IS_ERR(req))
669 return PTR_ERR(req); 694 return PTR_ERR(req);
670 695
@@ -682,7 +707,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
682 707
683 spin_lock(&fc->lock); 708 spin_lock(&fc->lock);
684 fi->attr_version = ++fc->attr_version; 709 fi->attr_version = ++fc->attr_version;
685 drop_nlink(inode); 710 /*
711 * If i_nlink == 0 then unlink doesn't make sense, yet this can
712 * happen if userspace filesystem is careless. It would be
713 * difficult to enforce correct nlink usage so just ignore this
714 * condition here
715 */
716 if (inode->i_nlink > 0)
717 drop_nlink(inode);
686 spin_unlock(&fc->lock); 718 spin_unlock(&fc->lock);
687 fuse_invalidate_attr(inode); 719 fuse_invalidate_attr(inode);
688 fuse_invalidate_attr(dir); 720 fuse_invalidate_attr(dir);
@@ -696,7 +728,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
696{ 728{
697 int err; 729 int err;
698 struct fuse_conn *fc = get_fuse_conn(dir); 730 struct fuse_conn *fc = get_fuse_conn(dir);
699 struct fuse_req *req = fuse_get_req(fc); 731 struct fuse_req *req = fuse_get_req_nopages(fc);
700 if (IS_ERR(req)) 732 if (IS_ERR(req))
701 return PTR_ERR(req); 733 return PTR_ERR(req);
702 734
@@ -723,7 +755,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
723 int err; 755 int err;
724 struct fuse_rename_in inarg; 756 struct fuse_rename_in inarg;
725 struct fuse_conn *fc = get_fuse_conn(olddir); 757 struct fuse_conn *fc = get_fuse_conn(olddir);
726 struct fuse_req *req = fuse_get_req(fc); 758 struct fuse_req *req = fuse_get_req_nopages(fc);
727 759
728 if (IS_ERR(req)) 760 if (IS_ERR(req))
729 return PTR_ERR(req); 761 return PTR_ERR(req);
@@ -776,7 +808,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
776 struct fuse_link_in inarg; 808 struct fuse_link_in inarg;
777 struct inode *inode = entry->d_inode; 809 struct inode *inode = entry->d_inode;
778 struct fuse_conn *fc = get_fuse_conn(inode); 810 struct fuse_conn *fc = get_fuse_conn(inode);
779 struct fuse_req *req = fuse_get_req(fc); 811 struct fuse_req *req = fuse_get_req_nopages(fc);
780 if (IS_ERR(req)) 812 if (IS_ERR(req))
781 return PTR_ERR(req); 813 return PTR_ERR(req);
782 814
@@ -848,7 +880,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
848 struct fuse_req *req; 880 struct fuse_req *req;
849 u64 attr_version; 881 u64 attr_version;
850 882
851 req = fuse_get_req(fc); 883 req = fuse_get_req_nopages(fc);
852 if (IS_ERR(req)) 884 if (IS_ERR(req))
853 return PTR_ERR(req); 885 return PTR_ERR(req);
854 886
@@ -985,7 +1017,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
985 1017
986/* 1018/*
987 * Calling into a user-controlled filesystem gives the filesystem 1019 * Calling into a user-controlled filesystem gives the filesystem
988 * daemon ptrace-like capabilities over the requester process. This 1020 * daemon ptrace-like capabilities over the current process. This
989 * means, that the filesystem daemon is able to record the exact 1021 * means, that the filesystem daemon is able to record the exact
990 * filesystem operations performed, and can also control the behavior 1022 * filesystem operations performed, and can also control the behavior
991 * of the requester process in otherwise impossible ways. For example 1023 * of the requester process in otherwise impossible ways. For example
@@ -996,27 +1028,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
996 * for which the owner of the mount has ptrace privilege. This 1028 * for which the owner of the mount has ptrace privilege. This
997 * excludes processes started by other users, suid or sgid processes. 1029 * excludes processes started by other users, suid or sgid processes.
998 */ 1030 */
999int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) 1031int fuse_allow_current_process(struct fuse_conn *fc)
1000{ 1032{
1001 const struct cred *cred; 1033 const struct cred *cred;
1002 int ret;
1003 1034
1004 if (fc->flags & FUSE_ALLOW_OTHER) 1035 if (fc->flags & FUSE_ALLOW_OTHER)
1005 return 1; 1036 return 1;
1006 1037
1007 rcu_read_lock(); 1038 cred = current_cred();
1008 ret = 0;
1009 cred = __task_cred(task);
1010 if (uid_eq(cred->euid, fc->user_id) && 1039 if (uid_eq(cred->euid, fc->user_id) &&
1011 uid_eq(cred->suid, fc->user_id) && 1040 uid_eq(cred->suid, fc->user_id) &&
1012 uid_eq(cred->uid, fc->user_id) && 1041 uid_eq(cred->uid, fc->user_id) &&
1013 gid_eq(cred->egid, fc->group_id) && 1042 gid_eq(cred->egid, fc->group_id) &&
1014 gid_eq(cred->sgid, fc->group_id) && 1043 gid_eq(cred->sgid, fc->group_id) &&
1015 gid_eq(cred->gid, fc->group_id)) 1044 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1045 return 1;
1017 rcu_read_unlock();
1018 1046
1019 return ret; 1047 return 0;
1020} 1048}
1021 1049
1022static int fuse_access(struct inode *inode, int mask) 1050static int fuse_access(struct inode *inode, int mask)
@@ -1029,7 +1057,7 @@ static int fuse_access(struct inode *inode, int mask)
1029 if (fc->no_access) 1057 if (fc->no_access)
1030 return 0; 1058 return 0;
1031 1059
1032 req = fuse_get_req(fc); 1060 req = fuse_get_req_nopages(fc);
1033 if (IS_ERR(req)) 1061 if (IS_ERR(req))
1034 return PTR_ERR(req); 1062 return PTR_ERR(req);
1035 1063
@@ -1077,7 +1105,7 @@ static int fuse_permission(struct inode *inode, int mask)
1077 bool refreshed = false; 1105 bool refreshed = false;
1078 int err = 0; 1106 int err = 0;
1079 1107
1080 if (!fuse_allow_task(fc, current)) 1108 if (!fuse_allow_current_process(fc))
1081 return -EACCES; 1109 return -EACCES;
1082 1110
1083 /* 1111 /*
@@ -1155,19 +1183,157 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1155 return 0; 1183 return 0;
1156} 1184}
1157 1185
1158static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) 1186static int fuse_direntplus_link(struct file *file,
1187 struct fuse_direntplus *direntplus,
1188 u64 attr_version)
1159{ 1189{
1160 int err; 1190 int err;
1191 struct fuse_entry_out *o = &direntplus->entry_out;
1192 struct fuse_dirent *dirent = &direntplus->dirent;
1193 struct dentry *parent = file->f_path.dentry;
1194 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1195 struct dentry *dentry;
1196 struct dentry *alias;
1197 struct inode *dir = parent->d_inode;
1198 struct fuse_conn *fc;
1199 struct inode *inode;
1200
1201 if (!o->nodeid) {
1202 /*
1203 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1204 * ENOENT. Instead, it only means the userspace filesystem did
1205 * not want to return attributes/handle for this entry.
1206 *
1207 * So do nothing.
1208 */
1209 return 0;
1210 }
1211
1212 if (name.name[0] == '.') {
1213 /*
1214 * We could potentially refresh the attributes of the directory
1215 * and its parent?
1216 */
1217 if (name.len == 1)
1218 return 0;
1219 if (name.name[1] == '.' && name.len == 2)
1220 return 0;
1221 }
1222 fc = get_fuse_conn(dir);
1223
1224 name.hash = full_name_hash(name.name, name.len);
1225 dentry = d_lookup(parent, &name);
1226 if (dentry && dentry->d_inode) {
1227 inode = dentry->d_inode;
1228 if (get_node_id(inode) == o->nodeid) {
1229 struct fuse_inode *fi;
1230 fi = get_fuse_inode(inode);
1231 spin_lock(&fc->lock);
1232 fi->nlookup++;
1233 spin_unlock(&fc->lock);
1234
1235 /*
1236 * The other branch to 'found' comes via fuse_iget()
1237 * which bumps nlookup inside
1238 */
1239 goto found;
1240 }
1241 err = d_invalidate(dentry);
1242 if (err)
1243 goto out;
1244 dput(dentry);
1245 dentry = NULL;
1246 }
1247
1248 dentry = d_alloc(parent, &name);
1249 err = -ENOMEM;
1250 if (!dentry)
1251 goto out;
1252
1253 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1254 &o->attr, entry_attr_timeout(o), attr_version);
1255 if (!inode)
1256 goto out;
1257
1258 alias = d_materialise_unique(dentry, inode);
1259 err = PTR_ERR(alias);
1260 if (IS_ERR(alias))
1261 goto out;
1262 if (alias) {
1263 dput(dentry);
1264 dentry = alias;
1265 }
1266
1267found:
1268 fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
1269 attr_version);
1270
1271 fuse_change_entry_timeout(dentry, o);
1272
1273 err = 0;
1274out:
1275 if (dentry)
1276 dput(dentry);
1277 return err;
1278}
1279
1280static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1281 void *dstbuf, filldir_t filldir, u64 attr_version)
1282{
1283 struct fuse_direntplus *direntplus;
1284 struct fuse_dirent *dirent;
1285 size_t reclen;
1286 int over = 0;
1287 int ret;
1288
1289 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1290 direntplus = (struct fuse_direntplus *) buf;
1291 dirent = &direntplus->dirent;
1292 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1293
1294 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1295 return -EIO;
1296 if (reclen > nbytes)
1297 break;
1298
1299 if (!over) {
1300 /* We fill entries into dstbuf only as much as
1301 it can hold. But we still continue iterating
1302 over remaining entries to link them. If not,
1303 we need to send a FORGET for each of those
1304 which we did not link.
1305 */
1306 over = filldir(dstbuf, dirent->name, dirent->namelen,
1307 file->f_pos, dirent->ino,
1308 dirent->type);
1309 file->f_pos = dirent->off;
1310 }
1311
1312 buf += reclen;
1313 nbytes -= reclen;
1314
1315 ret = fuse_direntplus_link(file, direntplus, attr_version);
1316 if (ret)
1317 fuse_force_forget(file, direntplus->entry_out.nodeid);
1318 }
1319
1320 return 0;
1321}
1322
1323static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1324{
1325 int plus, err;
1161 size_t nbytes; 1326 size_t nbytes;
1162 struct page *page; 1327 struct page *page;
1163 struct inode *inode = file->f_path.dentry->d_inode; 1328 struct inode *inode = file->f_path.dentry->d_inode;
1164 struct fuse_conn *fc = get_fuse_conn(inode); 1329 struct fuse_conn *fc = get_fuse_conn(inode);
1165 struct fuse_req *req; 1330 struct fuse_req *req;
1331 u64 attr_version = 0;
1166 1332
1167 if (is_bad_inode(inode)) 1333 if (is_bad_inode(inode))
1168 return -EIO; 1334 return -EIO;
1169 1335
1170 req = fuse_get_req(fc); 1336 req = fuse_get_req(fc, 1);
1171 if (IS_ERR(req)) 1337 if (IS_ERR(req))
1172 return PTR_ERR(req); 1338 return PTR_ERR(req);
1173 1339
@@ -1176,17 +1342,34 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1176 fuse_put_request(fc, req); 1342 fuse_put_request(fc, req);
1177 return -ENOMEM; 1343 return -ENOMEM;
1178 } 1344 }
1345
1346 plus = fuse_use_readdirplus(inode, file);
1179 req->out.argpages = 1; 1347 req->out.argpages = 1;
1180 req->num_pages = 1; 1348 req->num_pages = 1;
1181 req->pages[0] = page; 1349 req->pages[0] = page;
1182 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); 1350 req->page_descs[0].length = PAGE_SIZE;
1351 if (plus) {
1352 attr_version = fuse_get_attr_version(fc);
1353 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1354 FUSE_READDIRPLUS);
1355 } else {
1356 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1357 FUSE_READDIR);
1358 }
1183 fuse_request_send(fc, req); 1359 fuse_request_send(fc, req);
1184 nbytes = req->out.args[0].size; 1360 nbytes = req->out.args[0].size;
1185 err = req->out.h.error; 1361 err = req->out.h.error;
1186 fuse_put_request(fc, req); 1362 fuse_put_request(fc, req);
1187 if (!err) 1363 if (!err) {
1188 err = parse_dirfile(page_address(page), nbytes, file, dstbuf, 1364 if (plus) {
1189 filldir); 1365 err = parse_dirplusfile(page_address(page), nbytes,
1366 file, dstbuf, filldir,
1367 attr_version);
1368 } else {
1369 err = parse_dirfile(page_address(page), nbytes, file,
1370 dstbuf, filldir);
1371 }
1372 }
1190 1373
1191 __free_page(page); 1374 __free_page(page);
1192 fuse_invalidate_attr(inode); /* atime changed */ 1375 fuse_invalidate_attr(inode); /* atime changed */
@@ -1197,7 +1380,7 @@ static char *read_link(struct dentry *dentry)
1197{ 1380{
1198 struct inode *inode = dentry->d_inode; 1381 struct inode *inode = dentry->d_inode;
1199 struct fuse_conn *fc = get_fuse_conn(inode); 1382 struct fuse_conn *fc = get_fuse_conn(inode);
1200 struct fuse_req *req = fuse_get_req(fc); 1383 struct fuse_req *req = fuse_get_req_nopages(fc);
1201 char *link; 1384 char *link;
1202 1385
1203 if (IS_ERR(req)) 1386 if (IS_ERR(req))
@@ -1391,7 +1574,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1391 loff_t oldsize; 1574 loff_t oldsize;
1392 int err; 1575 int err;
1393 1576
1394 if (!fuse_allow_task(fc, current)) 1577 if (!fuse_allow_current_process(fc))
1395 return -EACCES; 1578 return -EACCES;
1396 1579
1397 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 1580 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
@@ -1410,7 +1593,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1410 if (attr->ia_valid & ATTR_SIZE) 1593 if (attr->ia_valid & ATTR_SIZE)
1411 is_truncate = true; 1594 is_truncate = true;
1412 1595
1413 req = fuse_get_req(fc); 1596 req = fuse_get_req_nopages(fc);
1414 if (IS_ERR(req)) 1597 if (IS_ERR(req))
1415 return PTR_ERR(req); 1598 return PTR_ERR(req);
1416 1599
@@ -1500,7 +1683,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1500 struct inode *inode = entry->d_inode; 1683 struct inode *inode = entry->d_inode;
1501 struct fuse_conn *fc = get_fuse_conn(inode); 1684 struct fuse_conn *fc = get_fuse_conn(inode);
1502 1685
1503 if (!fuse_allow_task(fc, current)) 1686 if (!fuse_allow_current_process(fc))
1504 return -EACCES; 1687 return -EACCES;
1505 1688
1506 return fuse_update_attributes(inode, stat, NULL, NULL); 1689 return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1518,7 +1701,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1518 if (fc->no_setxattr) 1701 if (fc->no_setxattr)
1519 return -EOPNOTSUPP; 1702 return -EOPNOTSUPP;
1520 1703
1521 req = fuse_get_req(fc); 1704 req = fuse_get_req_nopages(fc);
1522 if (IS_ERR(req)) 1705 if (IS_ERR(req))
1523 return PTR_ERR(req); 1706 return PTR_ERR(req);
1524 1707
@@ -1557,7 +1740,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1557 if (fc->no_getxattr) 1740 if (fc->no_getxattr)
1558 return -EOPNOTSUPP; 1741 return -EOPNOTSUPP;
1559 1742
1560 req = fuse_get_req(fc); 1743 req = fuse_get_req_nopages(fc);
1561 if (IS_ERR(req)) 1744 if (IS_ERR(req))
1562 return PTR_ERR(req); 1745 return PTR_ERR(req);
1563 1746
@@ -1603,13 +1786,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1603 struct fuse_getxattr_out outarg; 1786 struct fuse_getxattr_out outarg;
1604 ssize_t ret; 1787 ssize_t ret;
1605 1788
1606 if (!fuse_allow_task(fc, current)) 1789 if (!fuse_allow_current_process(fc))
1607 return -EACCES; 1790 return -EACCES;
1608 1791
1609 if (fc->no_listxattr) 1792 if (fc->no_listxattr)
1610 return -EOPNOTSUPP; 1793 return -EOPNOTSUPP;
1611 1794
1612 req = fuse_get_req(fc); 1795 req = fuse_get_req_nopages(fc);
1613 if (IS_ERR(req)) 1796 if (IS_ERR(req))
1614 return PTR_ERR(req); 1797 return PTR_ERR(req);
1615 1798
@@ -1654,7 +1837,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1654 if (fc->no_removexattr) 1837 if (fc->no_removexattr)
1655 return -EOPNOTSUPP; 1838 return -EOPNOTSUPP;
1656 1839
1657 req = fuse_get_req(fc); 1840 req = fuse_get_req_nopages(fc);
1658 if (IS_ERR(req)) 1841 if (IS_ERR(req))
1659 return PTR_ERR(req); 1842 return PTR_ERR(req);
1660 1843
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f3ab824fa302..c8071768b950 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
25 struct fuse_req *req; 25 struct fuse_req *req;
26 int err; 26 int err;
27 27
28 req = fuse_get_req(fc); 28 req = fuse_get_req_nopages(fc);
29 if (IS_ERR(req)) 29 if (IS_ERR(req))
30 return PTR_ERR(req); 30 return PTR_ERR(req);
31 31
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
57 return NULL; 57 return NULL;
58 58
59 ff->fc = fc; 59 ff->fc = fc;
60 ff->reserved_req = fuse_request_alloc(); 60 ff->reserved_req = fuse_request_alloc(0);
61 if (unlikely(!ff->reserved_req)) { 61 if (unlikely(!ff->reserved_req)) {
62 kfree(ff); 62 kfree(ff);
63 return NULL; 63 return NULL;
@@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
368 if (fc->no_flush) 368 if (fc->no_flush)
369 return 0; 369 return 0;
370 370
371 req = fuse_get_req_nofail(fc, file); 371 req = fuse_get_req_nofail_nopages(fc, file);
372 memset(&inarg, 0, sizeof(inarg)); 372 memset(&inarg, 0, sizeof(inarg));
373 inarg.fh = ff->fh; 373 inarg.fh = ff->fh;
374 inarg.lock_owner = fuse_lock_owner_id(fc, id); 374 inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
436 436
437 fuse_sync_writes(inode); 437 fuse_sync_writes(inode);
438 438
439 req = fuse_get_req(fc); 439 req = fuse_get_req_nopages(fc);
440 if (IS_ERR(req)) { 440 if (IS_ERR(req)) {
441 err = PTR_ERR(req); 441 err = PTR_ERR(req);
442 goto out; 442 goto out;
@@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page)
544 */ 544 */
545 fuse_wait_on_page_writeback(inode, page->index); 545 fuse_wait_on_page_writeback(inode, page->index);
546 546
547 req = fuse_get_req(fc); 547 req = fuse_get_req(fc, 1);
548 err = PTR_ERR(req); 548 err = PTR_ERR(req);
549 if (IS_ERR(req)) 549 if (IS_ERR(req))
550 goto out; 550 goto out;
@@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page)
555 req->out.argpages = 1; 555 req->out.argpages = 1;
556 req->num_pages = 1; 556 req->num_pages = 1;
557 req->pages[0] = page; 557 req->pages[0] = page;
558 req->page_descs[0].length = count;
558 num_read = fuse_send_read(req, file, pos, count, NULL); 559 num_read = fuse_send_read(req, file, pos, count, NULL);
559 err = req->out.h.error; 560 err = req->out.h.error;
560 fuse_put_request(fc, req); 561 fuse_put_request(fc, req);
@@ -641,6 +642,7 @@ struct fuse_fill_data {
641 struct fuse_req *req; 642 struct fuse_req *req;
642 struct file *file; 643 struct file *file;
643 struct inode *inode; 644 struct inode *inode;
645 unsigned nr_pages;
644}; 646};
645 647
646static int fuse_readpages_fill(void *_data, struct page *page) 648static int fuse_readpages_fill(void *_data, struct page *page)
@@ -656,16 +658,26 @@ static int fuse_readpages_fill(void *_data, struct page *page)
656 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 658 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
657 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 659 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
658 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 660 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
661 int nr_alloc = min_t(unsigned, data->nr_pages,
662 FUSE_MAX_PAGES_PER_REQ);
659 fuse_send_readpages(req, data->file); 663 fuse_send_readpages(req, data->file);
660 data->req = req = fuse_get_req(fc); 664 data->req = req = fuse_get_req(fc, nr_alloc);
661 if (IS_ERR(req)) { 665 if (IS_ERR(req)) {
662 unlock_page(page); 666 unlock_page(page);
663 return PTR_ERR(req); 667 return PTR_ERR(req);
664 } 668 }
665 } 669 }
670
671 if (WARN_ON(req->num_pages >= req->max_pages)) {
672 fuse_put_request(fc, req);
673 return -EIO;
674 }
675
666 page_cache_get(page); 676 page_cache_get(page);
667 req->pages[req->num_pages] = page; 677 req->pages[req->num_pages] = page;
678 req->page_descs[req->num_pages].length = PAGE_SIZE;
668 req->num_pages++; 679 req->num_pages++;
680 data->nr_pages--;
669 return 0; 681 return 0;
670} 682}
671 683
@@ -676,6 +688,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
676 struct fuse_conn *fc = get_fuse_conn(inode); 688 struct fuse_conn *fc = get_fuse_conn(inode);
677 struct fuse_fill_data data; 689 struct fuse_fill_data data;
678 int err; 690 int err;
691 int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
679 692
680 err = -EIO; 693 err = -EIO;
681 if (is_bad_inode(inode)) 694 if (is_bad_inode(inode))
@@ -683,7 +696,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
683 696
684 data.file = file; 697 data.file = file;
685 data.inode = inode; 698 data.inode = inode;
686 data.req = fuse_get_req(fc); 699 data.req = fuse_get_req(fc, nr_alloc);
700 data.nr_pages = nr_pages;
687 err = PTR_ERR(data.req); 701 err = PTR_ERR(data.req);
688 if (IS_ERR(data.req)) 702 if (IS_ERR(data.req))
689 goto out; 703 goto out;
@@ -786,7 +800,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
786 800
787 res = fuse_send_write(req, file, pos, count, NULL); 801 res = fuse_send_write(req, file, pos, count, NULL);
788 802
789 offset = req->page_offset; 803 offset = req->page_descs[0].offset;
790 count = res; 804 count = res;
791 for (i = 0; i < req->num_pages; i++) { 805 for (i = 0; i < req->num_pages; i++) {
792 struct page *page = req->pages[i]; 806 struct page *page = req->pages[i];
@@ -817,7 +831,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
817 int err; 831 int err;
818 832
819 req->in.argpages = 1; 833 req->in.argpages = 1;
820 req->page_offset = offset; 834 req->page_descs[0].offset = offset;
821 835
822 do { 836 do {
823 size_t tmp; 837 size_t tmp;
@@ -857,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
857 871
858 err = 0; 872 err = 0;
859 req->pages[req->num_pages] = page; 873 req->pages[req->num_pages] = page;
874 req->page_descs[req->num_pages].length = tmp;
860 req->num_pages++; 875 req->num_pages++;
861 876
862 iov_iter_advance(ii, tmp); 877 iov_iter_advance(ii, tmp);
@@ -869,11 +884,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
869 if (!fc->big_writes) 884 if (!fc->big_writes)
870 break; 885 break;
871 } while (iov_iter_count(ii) && count < fc->max_write && 886 } while (iov_iter_count(ii) && count < fc->max_write &&
872 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); 887 req->num_pages < req->max_pages && offset == 0);
873 888
874 return count > 0 ? count : err; 889 return count > 0 ? count : err;
875} 890}
876 891
892static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
893{
894 return min_t(unsigned,
895 ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
896 (pos >> PAGE_CACHE_SHIFT) + 1,
897 FUSE_MAX_PAGES_PER_REQ);
898}
899
877static ssize_t fuse_perform_write(struct file *file, 900static ssize_t fuse_perform_write(struct file *file,
878 struct address_space *mapping, 901 struct address_space *mapping,
879 struct iov_iter *ii, loff_t pos) 902 struct iov_iter *ii, loff_t pos)
@@ -889,8 +912,9 @@ static ssize_t fuse_perform_write(struct file *file,
889 do { 912 do {
890 struct fuse_req *req; 913 struct fuse_req *req;
891 ssize_t count; 914 ssize_t count;
915 unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
892 916
893 req = fuse_get_req(fc); 917 req = fuse_get_req(fc, nr_pages);
894 if (IS_ERR(req)) { 918 if (IS_ERR(req)) {
895 err = PTR_ERR(req); 919 err = PTR_ERR(req);
896 break; 920 break;
@@ -1023,47 +1047,110 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
1023 } 1047 }
1024} 1048}
1025 1049
1026static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, 1050static inline void fuse_page_descs_length_init(struct fuse_req *req,
1051 unsigned index, unsigned nr_pages)
1052{
1053 int i;
1054
1055 for (i = index; i < index + nr_pages; i++)
1056 req->page_descs[i].length = PAGE_SIZE -
1057 req->page_descs[i].offset;
1058}
1059
1060static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
1061{
1062 return (unsigned long)ii->iov->iov_base + ii->iov_offset;
1063}
1064
1065static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
1066 size_t max_size)
1067{
1068 return min(iov_iter_single_seg_count(ii), max_size);
1069}
1070
1071static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1027 size_t *nbytesp, int write) 1072 size_t *nbytesp, int write)
1028{ 1073{
1029 size_t nbytes = *nbytesp; 1074 size_t nbytes = 0; /* # bytes already packed in req */
1030 unsigned long user_addr = (unsigned long) buf;
1031 unsigned offset = user_addr & ~PAGE_MASK;
1032 int npages;
1033 1075
1034 /* Special case for kernel I/O: can copy directly into the buffer */ 1076 /* Special case for kernel I/O: can copy directly into the buffer */
1035 if (segment_eq(get_fs(), KERNEL_DS)) { 1077 if (segment_eq(get_fs(), KERNEL_DS)) {
1078 unsigned long user_addr = fuse_get_user_addr(ii);
1079 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
1080
1036 if (write) 1081 if (write)
1037 req->in.args[1].value = (void *) user_addr; 1082 req->in.args[1].value = (void *) user_addr;
1038 else 1083 else
1039 req->out.args[0].value = (void *) user_addr; 1084 req->out.args[0].value = (void *) user_addr;
1040 1085
1086 iov_iter_advance(ii, frag_size);
1087 *nbytesp = frag_size;
1041 return 0; 1088 return 0;
1042 } 1089 }
1043 1090
1044 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 1091 while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1045 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1092 unsigned npages;
1046 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); 1093 unsigned long user_addr = fuse_get_user_addr(ii);
1047 npages = get_user_pages_fast(user_addr, npages, !write, req->pages); 1094 unsigned offset = user_addr & ~PAGE_MASK;
1048 if (npages < 0) 1095 size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
1049 return npages; 1096 int ret;
1097
1098 unsigned n = req->max_pages - req->num_pages;
1099 frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
1100
1101 npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1102 npages = clamp(npages, 1U, n);
1103
1104 ret = get_user_pages_fast(user_addr, npages, !write,
1105 &req->pages[req->num_pages]);
1106 if (ret < 0)
1107 return ret;
1050 1108
1051 req->num_pages = npages; 1109 npages = ret;
1052 req->page_offset = offset; 1110 frag_size = min_t(size_t, frag_size,
1111 (npages << PAGE_SHIFT) - offset);
1112 iov_iter_advance(ii, frag_size);
1113
1114 req->page_descs[req->num_pages].offset = offset;
1115 fuse_page_descs_length_init(req, req->num_pages, npages);
1116
1117 req->num_pages += npages;
1118 req->page_descs[req->num_pages - 1].length -=
1119 (npages << PAGE_SHIFT) - offset - frag_size;
1120
1121 nbytes += frag_size;
1122 }
1053 1123
1054 if (write) 1124 if (write)
1055 req->in.argpages = 1; 1125 req->in.argpages = 1;
1056 else 1126 else
1057 req->out.argpages = 1; 1127 req->out.argpages = 1;
1058 1128
1059 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 1129 *nbytesp = nbytes;
1060 *nbytesp = min(*nbytesp, nbytes);
1061 1130
1062 return 0; 1131 return 0;
1063} 1132}
1064 1133
1065ssize_t fuse_direct_io(struct file *file, const char __user *buf, 1134static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1066 size_t count, loff_t *ppos, int write) 1135{
1136 struct iov_iter ii = *ii_p;
1137 int npages = 0;
1138
1139 while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
1140 unsigned long user_addr = fuse_get_user_addr(&ii);
1141 unsigned offset = user_addr & ~PAGE_MASK;
1142 size_t frag_size = iov_iter_single_seg_count(&ii);
1143
1144 npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1145 iov_iter_advance(&ii, frag_size);
1146 }
1147
1148 return min(npages, FUSE_MAX_PAGES_PER_REQ);
1149}
1150
1151ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
1152 unsigned long nr_segs, size_t count, loff_t *ppos,
1153 int write)
1067{ 1154{
1068 struct fuse_file *ff = file->private_data; 1155 struct fuse_file *ff = file->private_data;
1069 struct fuse_conn *fc = ff->fc; 1156 struct fuse_conn *fc = ff->fc;
@@ -1071,8 +1158,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1071 loff_t pos = *ppos; 1158 loff_t pos = *ppos;
1072 ssize_t res = 0; 1159 ssize_t res = 0;
1073 struct fuse_req *req; 1160 struct fuse_req *req;
1161 struct iov_iter ii;
1162
1163 iov_iter_init(&ii, iov, nr_segs, count, 0);
1074 1164
1075 req = fuse_get_req(fc); 1165 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1076 if (IS_ERR(req)) 1166 if (IS_ERR(req))
1077 return PTR_ERR(req); 1167 return PTR_ERR(req);
1078 1168
@@ -1080,7 +1170,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1080 size_t nres; 1170 size_t nres;
1081 fl_owner_t owner = current->files; 1171 fl_owner_t owner = current->files;
1082 size_t nbytes = min(count, nmax); 1172 size_t nbytes = min(count, nmax);
1083 int err = fuse_get_user_pages(req, buf, &nbytes, write); 1173 int err = fuse_get_user_pages(req, &ii, &nbytes, write);
1084 if (err) { 1174 if (err) {
1085 res = err; 1175 res = err;
1086 break; 1176 break;
@@ -1103,12 +1193,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1103 count -= nres; 1193 count -= nres;
1104 res += nres; 1194 res += nres;
1105 pos += nres; 1195 pos += nres;
1106 buf += nres;
1107 if (nres != nbytes) 1196 if (nres != nbytes)
1108 break; 1197 break;
1109 if (count) { 1198 if (count) {
1110 fuse_put_request(fc, req); 1199 fuse_put_request(fc, req);
1111 req = fuse_get_req(fc); 1200 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1112 if (IS_ERR(req)) 1201 if (IS_ERR(req))
1113 break; 1202 break;
1114 } 1203 }
@@ -1122,8 +1211,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1122} 1211}
1123EXPORT_SYMBOL_GPL(fuse_direct_io); 1212EXPORT_SYMBOL_GPL(fuse_direct_io);
1124 1213
1125static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1214static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov,
1126 size_t count, loff_t *ppos) 1215 unsigned long nr_segs, loff_t *ppos)
1127{ 1216{
1128 ssize_t res; 1217 ssize_t res;
1129 struct inode *inode = file->f_path.dentry->d_inode; 1218 struct inode *inode = file->f_path.dentry->d_inode;
@@ -1131,22 +1220,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1131 if (is_bad_inode(inode)) 1220 if (is_bad_inode(inode))
1132 return -EIO; 1221 return -EIO;
1133 1222
1134 res = fuse_direct_io(file, buf, count, ppos, 0); 1223 res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs),
1224 ppos, 0);
1135 1225
1136 fuse_invalidate_attr(inode); 1226 fuse_invalidate_attr(inode);
1137 1227
1138 return res; 1228 return res;
1139} 1229}
1140 1230
1141static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, 1231static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1142 size_t count, loff_t *ppos) 1232 size_t count, loff_t *ppos)
1233{
1234 struct iovec iov = { .iov_base = buf, .iov_len = count };
1235 return __fuse_direct_read(file, &iov, 1, ppos);
1236}
1237
1238static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov,
1239 unsigned long nr_segs, loff_t *ppos)
1143{ 1240{
1144 struct inode *inode = file->f_path.dentry->d_inode; 1241 struct inode *inode = file->f_path.dentry->d_inode;
1242 size_t count = iov_length(iov, nr_segs);
1145 ssize_t res; 1243 ssize_t res;
1146 1244
1147 res = generic_write_checks(file, ppos, &count, 0); 1245 res = generic_write_checks(file, ppos, &count, 0);
1148 if (!res) { 1246 if (!res) {
1149 res = fuse_direct_io(file, buf, count, ppos, 1); 1247 res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1);
1150 if (res > 0) 1248 if (res > 0)
1151 fuse_write_update_size(inode, *ppos); 1249 fuse_write_update_size(inode, *ppos);
1152 } 1250 }
@@ -1159,6 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
1159static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1257static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1160 size_t count, loff_t *ppos) 1258 size_t count, loff_t *ppos)
1161{ 1259{
1260 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
1162 struct inode *inode = file->f_path.dentry->d_inode; 1261 struct inode *inode = file->f_path.dentry->d_inode;
1163 ssize_t res; 1262 ssize_t res;
1164 1263
@@ -1167,7 +1266,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1167 1266
1168 /* Don't allow parallel writes to the same file */ 1267 /* Don't allow parallel writes to the same file */
1169 mutex_lock(&inode->i_mutex); 1268 mutex_lock(&inode->i_mutex);
1170 res = __fuse_direct_write(file, buf, count, ppos); 1269 res = __fuse_direct_write(file, &iov, 1, ppos);
1171 mutex_unlock(&inode->i_mutex); 1270 mutex_unlock(&inode->i_mutex);
1172 1271
1173 return res; 1272 return res;
@@ -1272,7 +1371,7 @@ static int fuse_writepage_locked(struct page *page)
1272 1371
1273 set_page_writeback(page); 1372 set_page_writeback(page);
1274 1373
1275 req = fuse_request_alloc_nofs(); 1374 req = fuse_request_alloc_nofs(1);
1276 if (!req) 1375 if (!req)
1277 goto err; 1376 goto err;
1278 1377
@@ -1293,7 +1392,8 @@ static int fuse_writepage_locked(struct page *page)
1293 req->in.argpages = 1; 1392 req->in.argpages = 1;
1294 req->num_pages = 1; 1393 req->num_pages = 1;
1295 req->pages[0] = tmp_page; 1394 req->pages[0] = tmp_page;
1296 req->page_offset = 0; 1395 req->page_descs[0].offset = 0;
1396 req->page_descs[0].length = PAGE_SIZE;
1297 req->end = fuse_writepage_end; 1397 req->end = fuse_writepage_end;
1298 req->inode = inode; 1398 req->inode = inode;
1299 1399
@@ -1471,7 +1571,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
1471 struct fuse_lk_out outarg; 1571 struct fuse_lk_out outarg;
1472 int err; 1572 int err;
1473 1573
1474 req = fuse_get_req(fc); 1574 req = fuse_get_req_nopages(fc);
1475 if (IS_ERR(req)) 1575 if (IS_ERR(req))
1476 return PTR_ERR(req); 1576 return PTR_ERR(req);
1477 1577
@@ -1506,7 +1606,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1506 if (fl->fl_flags & FL_CLOSE) 1606 if (fl->fl_flags & FL_CLOSE)
1507 return 0; 1607 return 0;
1508 1608
1509 req = fuse_get_req(fc); 1609 req = fuse_get_req_nopages(fc);
1510 if (IS_ERR(req)) 1610 if (IS_ERR(req))
1511 return PTR_ERR(req); 1611 return PTR_ERR(req);
1512 1612
@@ -1575,7 +1675,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
1575 if (!inode->i_sb->s_bdev || fc->no_bmap) 1675 if (!inode->i_sb->s_bdev || fc->no_bmap)
1576 return 0; 1676 return 0;
1577 1677
1578 req = fuse_get_req(fc); 1678 req = fuse_get_req_nopages(fc);
1579 if (IS_ERR(req)) 1679 if (IS_ERR(req))
1580 return 0; 1680 return 0;
1581 1681
@@ -1873,7 +1973,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1873 num_pages++; 1973 num_pages++;
1874 } 1974 }
1875 1975
1876 req = fuse_get_req(fc); 1976 req = fuse_get_req(fc, num_pages);
1877 if (IS_ERR(req)) { 1977 if (IS_ERR(req)) {
1878 err = PTR_ERR(req); 1978 err = PTR_ERR(req);
1879 req = NULL; 1979 req = NULL;
@@ -1881,6 +1981,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1881 } 1981 }
1882 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); 1982 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1883 req->num_pages = num_pages; 1983 req->num_pages = num_pages;
1984 fuse_page_descs_length_init(req, 0, req->num_pages);
1884 1985
1885 /* okay, let's send it to the client */ 1986 /* okay, let's send it to the client */
1886 req->in.h.opcode = FUSE_IOCTL; 1987 req->in.h.opcode = FUSE_IOCTL;
@@ -1981,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
1981 struct inode *inode = file->f_dentry->d_inode; 2082 struct inode *inode = file->f_dentry->d_inode;
1982 struct fuse_conn *fc = get_fuse_conn(inode); 2083 struct fuse_conn *fc = get_fuse_conn(inode);
1983 2084
1984 if (!fuse_allow_task(fc, current)) 2085 if (!fuse_allow_current_process(fc))
1985 return -EACCES; 2086 return -EACCES;
1986 2087
1987 if (is_bad_inode(inode)) 2088 if (is_bad_inode(inode))
@@ -2066,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2066 return DEFAULT_POLLMASK; 2167 return DEFAULT_POLLMASK;
2067 2168
2068 poll_wait(file, &ff->poll_wait, wait); 2169 poll_wait(file, &ff->poll_wait, wait);
2170 inarg.events = (__u32)poll_requested_events(wait);
2069 2171
2070 /* 2172 /*
2071 * Ask for notification iff there's someone waiting for it. 2173 * Ask for notification iff there's someone waiting for it.
@@ -2076,7 +2178,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2076 fuse_register_polled_file(fc, ff); 2178 fuse_register_polled_file(fc, ff);
2077 } 2179 }
2078 2180
2079 req = fuse_get_req(fc); 2181 req = fuse_get_req_nopages(fc);
2080 if (IS_ERR(req)) 2182 if (IS_ERR(req))
2081 return POLLERR; 2183 return POLLERR;
2082 2184
@@ -2126,41 +2228,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2126 return 0; 2228 return 0;
2127} 2229}
2128 2230
2129static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
2130 unsigned long nr_segs, loff_t *ppos, int rw)
2131{
2132 const struct iovec *vector = iov;
2133 ssize_t ret = 0;
2134
2135 while (nr_segs > 0) {
2136 void __user *base;
2137 size_t len;
2138 ssize_t nr;
2139
2140 base = vector->iov_base;
2141 len = vector->iov_len;
2142 vector++;
2143 nr_segs--;
2144
2145 if (rw == WRITE)
2146 nr = __fuse_direct_write(filp, base, len, ppos);
2147 else
2148 nr = fuse_direct_read(filp, base, len, ppos);
2149
2150 if (nr < 0) {
2151 if (!ret)
2152 ret = nr;
2153 break;
2154 }
2155 ret += nr;
2156 if (nr != len)
2157 break;
2158 }
2159
2160 return ret;
2161}
2162
2163
2164static ssize_t 2231static ssize_t
2165fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2232fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2166 loff_t offset, unsigned long nr_segs) 2233 loff_t offset, unsigned long nr_segs)
@@ -2172,7 +2239,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2172 file = iocb->ki_filp; 2239 file = iocb->ki_filp;
2173 pos = offset; 2240 pos = offset;
2174 2241
2175 ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); 2242 if (rw == WRITE)
2243 ret = __fuse_direct_write(file, iov, nr_segs, &pos);
2244 else
2245 ret = __fuse_direct_read(file, iov, nr_segs, &pos);
2176 2246
2177 return ret; 2247 return ret;
2178} 2248}
@@ -2194,7 +2264,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2194 if (fc->no_fallocate) 2264 if (fc->no_fallocate)
2195 return -EOPNOTSUPP; 2265 return -EOPNOTSUPP;
2196 2266
2197 req = fuse_get_req(fc); 2267 req = fuse_get_req_nopages(fc);
2198 if (IS_ERR(req)) 2268 if (IS_ERR(req))
2199 return PTR_ERR(req); 2269 return PTR_ERR(req);
2200 2270
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e105a53fc72d..6aeba864f070 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,9 @@
44 doing the mount will be allowed to access the filesystem */ 44 doing the mount will be allowed to access the filesystem */
45#define FUSE_ALLOW_OTHER (1 << 1) 45#define FUSE_ALLOW_OTHER (1 << 1)
46 46
47/** Number of page pointers embedded in fuse_req */
48#define FUSE_REQ_INLINE_PAGES 1
49
47/** List of active connections */ 50/** List of active connections */
48extern struct list_head fuse_conn_list; 51extern struct list_head fuse_conn_list;
49 52
@@ -103,6 +106,15 @@ struct fuse_inode {
103 106
104 /** List of writepage requestst (pending or sent) */ 107 /** List of writepage requestst (pending or sent) */
105 struct list_head writepages; 108 struct list_head writepages;
109
110 /** Miscellaneous bits describing inode state */
111 unsigned long state;
112};
113
114/** FUSE inode state bits */
115enum {
116 /** Advise readdirplus */
117 FUSE_I_ADVISE_RDPLUS,
106}; 118};
107 119
108struct fuse_conn; 120struct fuse_conn;
@@ -200,6 +212,12 @@ struct fuse_out {
200 struct fuse_arg args[3]; 212 struct fuse_arg args[3];
201}; 213};
202 214
215/** FUSE page descriptor */
216struct fuse_page_desc {
217 unsigned int length;
218 unsigned int offset;
219};
220
203/** The request state */ 221/** The request state */
204enum fuse_req_state { 222enum fuse_req_state {
205 FUSE_REQ_INIT = 0, 223 FUSE_REQ_INIT = 0,
@@ -291,14 +309,23 @@ struct fuse_req {
291 } misc; 309 } misc;
292 310
293 /** page vector */ 311 /** page vector */
294 struct page *pages[FUSE_MAX_PAGES_PER_REQ]; 312 struct page **pages;
313
314 /** page-descriptor vector */
315 struct fuse_page_desc *page_descs;
316
317 /** size of the 'pages' array */
318 unsigned max_pages;
319
320 /** inline page vector */
321 struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
322
323 /** inline page-descriptor vector */
324 struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
295 325
296 /** number of pages in vector */ 326 /** number of pages in vector */
297 unsigned num_pages; 327 unsigned num_pages;
298 328
299 /** offset of data on first page */
300 unsigned page_offset;
301
302 /** File used in the request (or NULL) */ 329 /** File used in the request (or NULL) */
303 struct fuse_file *ff; 330 struct fuse_file *ff;
304 331
@@ -487,6 +514,12 @@ struct fuse_conn {
487 /** Use enhanced/automatic page cache invalidation. */ 514 /** Use enhanced/automatic page cache invalidation. */
488 unsigned auto_inval_data:1; 515 unsigned auto_inval_data:1;
489 516
517 /** Does the filesystem support readdirplus? */
518 unsigned do_readdirplus:1;
519
520 /** Does the filesystem want adaptive readdirplus? */
521 unsigned readdirplus_auto:1;
522
490 /** The number of requests waiting for completion */ 523 /** The number of requests waiting for completion */
491 atomic_t num_waiting; 524 atomic_t num_waiting;
492 525
@@ -578,6 +611,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
578 611
579struct fuse_forget_link *fuse_alloc_forget(void); 612struct fuse_forget_link *fuse_alloc_forget(void);
580 613
614/* Used by READDIRPLUS */
615void fuse_force_forget(struct file *file, u64 nodeid);
616
581/** 617/**
582 * Initialize READ or READDIR request 618 * Initialize READ or READDIR request
583 */ 619 */
@@ -658,9 +694,9 @@ void fuse_ctl_cleanup(void);
658/** 694/**
659 * Allocate a request 695 * Allocate a request
660 */ 696 */
661struct fuse_req *fuse_request_alloc(void); 697struct fuse_req *fuse_request_alloc(unsigned npages);
662 698
663struct fuse_req *fuse_request_alloc_nofs(void); 699struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
664 700
665/** 701/**
666 * Free a request 702 * Free a request
@@ -668,14 +704,25 @@ struct fuse_req *fuse_request_alloc_nofs(void);
668void fuse_request_free(struct fuse_req *req); 704void fuse_request_free(struct fuse_req *req);
669 705
670/** 706/**
671 * Get a request, may fail with -ENOMEM 707 * Get a request, may fail with -ENOMEM,
708 * caller should specify # elements in req->pages[] explicitly
672 */ 709 */
673struct fuse_req *fuse_get_req(struct fuse_conn *fc); 710struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
711
712/**
713 * Get a request, may fail with -ENOMEM,
714 * useful for callers who doesn't use req->pages[]
715 */
716static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
717{
718 return fuse_get_req(fc, 0);
719}
674 720
675/** 721/**
676 * Gets a requests for a file operation, always succeeds 722 * Gets a requests for a file operation, always succeeds
677 */ 723 */
678struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); 724struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
725 struct file *file);
679 726
680/** 727/**
681 * Decrement reference count of a request. If count goes to zero free 728 * Decrement reference count of a request. If count goes to zero free
@@ -739,9 +786,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
739int fuse_valid_type(int m); 786int fuse_valid_type(int m);
740 787
741/** 788/**
742 * Is task allowed to perform filesystem operation? 789 * Is current process allowed to perform filesystem operation?
743 */ 790 */
744int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); 791int fuse_allow_current_process(struct fuse_conn *fc);
745 792
746u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); 793u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
747 794
@@ -776,8 +823,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
776 823
777int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 824int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
778 bool isdir); 825 bool isdir);
779ssize_t fuse_direct_io(struct file *file, const char __user *buf, 826ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
780 size_t count, loff_t *ppos, int write); 827 unsigned long nr_segs, size_t count, loff_t *ppos,
828 int write);
781long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 829long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
782 unsigned int flags); 830 unsigned int flags);
783long fuse_ioctl_common(struct file *file, unsigned int cmd, 831long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b72beaf..01353ed75750 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0; 94 fi->orig_ino = 0;
95 fi->state = 0;
95 INIT_LIST_HEAD(&fi->write_files); 96 INIT_LIST_HEAD(&fi->write_files);
96 INIT_LIST_HEAD(&fi->queued_writes); 97 INIT_LIST_HEAD(&fi->queued_writes);
97 INIT_LIST_HEAD(&fi->writepages); 98 INIT_LIST_HEAD(&fi->writepages);
@@ -408,12 +409,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
408 struct fuse_statfs_out outarg; 409 struct fuse_statfs_out outarg;
409 int err; 410 int err;
410 411
411 if (!fuse_allow_task(fc, current)) { 412 if (!fuse_allow_current_process(fc)) {
412 buf->f_type = FUSE_SUPER_MAGIC; 413 buf->f_type = FUSE_SUPER_MAGIC;
413 return 0; 414 return 0;
414 } 415 }
415 416
416 req = fuse_get_req(fc); 417 req = fuse_get_req_nopages(fc);
417 if (IS_ERR(req)) 418 if (IS_ERR(req))
418 return PTR_ERR(req); 419 return PTR_ERR(req);
419 420
@@ -863,6 +864,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
863 fc->dont_mask = 1; 864 fc->dont_mask = 1;
864 if (arg->flags & FUSE_AUTO_INVAL_DATA) 865 if (arg->flags & FUSE_AUTO_INVAL_DATA)
865 fc->auto_inval_data = 1; 866 fc->auto_inval_data = 1;
867 if (arg->flags & FUSE_DO_READDIRPLUS)
868 fc->do_readdirplus = 1;
869 if (arg->flags & FUSE_READDIRPLUS_AUTO)
870 fc->readdirplus_auto = 1;
866 } else { 871 } else {
867 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 872 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
868 fc->no_lock = 1; 873 fc->no_lock = 1;
@@ -889,7 +894,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
889 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 894 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
890 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 895 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
891 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 896 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
892 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; 897 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
898 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
893 req->in.h.opcode = FUSE_INIT; 899 req->in.h.opcode = FUSE_INIT;
894 req->in.numargs = 1; 900 req->in.numargs = 1;
895 req->in.args[0].size = sizeof(*arg); 901 req->in.args[0].size = sizeof(*arg);
@@ -1034,12 +1040,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1034 /* only now - we want root dentry with NULL ->d_op */ 1040 /* only now - we want root dentry with NULL ->d_op */
1035 sb->s_d_op = &fuse_dentry_operations; 1041 sb->s_d_op = &fuse_dentry_operations;
1036 1042
1037 init_req = fuse_request_alloc(); 1043 init_req = fuse_request_alloc(0);
1038 if (!init_req) 1044 if (!init_req)
1039 goto err_put_root; 1045 goto err_put_root;
1040 1046
1041 if (is_bdev) { 1047 if (is_bdev) {
1042 fc->destroy_req = fuse_request_alloc(); 1048 fc->destroy_req = fuse_request_alloc(0);
1043 if (!fc->destroy_req) 1049 if (!fc->destroy_req)
1044 goto err_free_init_req; 1050 goto err_free_init_req;
1045 } 1051 }
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 30de4f2a2ea9..24f414f0ce61 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -51,7 +51,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
51 continue; 51 continue;
52 if (gfs2_is_jdata(ip)) 52 if (gfs2_is_jdata(ip))
53 set_buffer_uptodate(bh); 53 set_buffer_uptodate(bh);
54 gfs2_trans_add_bh(ip->i_gl, bh, 0); 54 gfs2_trans_add_data(ip->i_gl, bh);
55 } 55 }
56} 56}
57 57
@@ -230,16 +230,14 @@ out_ignore:
230} 230}
231 231
232/** 232/**
233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk 233 * gfs2_writepages - Write a bunch of dirty pages back to disk
234 * @mapping: The mapping to write 234 * @mapping: The mapping to write
235 * @wbc: Write-back control 235 * @wbc: Write-back control
236 * 236 *
237 * For the data=writeback case we can already ignore buffer heads 237 * Used for both ordered and writeback modes.
238 * and write whole extents at once. This is a big reduction in the
239 * number of I/O requests we send and the bmap calls we make in this case.
240 */ 238 */
241static int gfs2_writeback_writepages(struct address_space *mapping, 239static int gfs2_writepages(struct address_space *mapping,
242 struct writeback_control *wbc) 240 struct writeback_control *wbc)
243{ 241{
244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 242 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
245} 243}
@@ -852,7 +850,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
852 goto failed; 850 goto failed;
853 } 851 }
854 852
855 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 853 gfs2_trans_add_meta(ip->i_gl, dibh);
856 854
857 if (gfs2_is_stuffed(ip)) 855 if (gfs2_is_stuffed(ip))
858 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); 856 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
@@ -1102,7 +1100,7 @@ cannot_release:
1102 1100
1103static const struct address_space_operations gfs2_writeback_aops = { 1101static const struct address_space_operations gfs2_writeback_aops = {
1104 .writepage = gfs2_writeback_writepage, 1102 .writepage = gfs2_writeback_writepage,
1105 .writepages = gfs2_writeback_writepages, 1103 .writepages = gfs2_writepages,
1106 .readpage = gfs2_readpage, 1104 .readpage = gfs2_readpage,
1107 .readpages = gfs2_readpages, 1105 .readpages = gfs2_readpages,
1108 .write_begin = gfs2_write_begin, 1106 .write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1118 1116
1119static const struct address_space_operations gfs2_ordered_aops = { 1117static const struct address_space_operations gfs2_ordered_aops = {
1120 .writepage = gfs2_ordered_writepage, 1118 .writepage = gfs2_ordered_writepage,
1119 .writepages = gfs2_writepages,
1121 .readpage = gfs2_readpage, 1120 .readpage = gfs2_readpage,
1122 .readpages = gfs2_readpages, 1121 .readpages = gfs2_readpages,
1123 .write_begin = gfs2_write_begin, 1122 .write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a68e91bcef3d..df686d13a7d2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
22#include "meta_io.h" 22#include "meta_io.h"
23#include "quota.h" 23#include "quota.h"
24#include "rgrp.h" 24#include "rgrp.h"
25#include "log.h"
25#include "super.h" 26#include "super.h"
26#include "trans.h" 27#include "trans.h"
27#include "dir.h" 28#include "dir.h"
@@ -93,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
93 if (!gfs2_is_jdata(ip)) 94 if (!gfs2_is_jdata(ip))
94 mark_buffer_dirty(bh); 95 mark_buffer_dirty(bh);
95 if (!gfs2_is_writeback(ip)) 96 if (!gfs2_is_writeback(ip))
96 gfs2_trans_add_bh(ip->i_gl, bh, 0); 97 gfs2_trans_add_data(ip->i_gl, bh);
97 98
98 if (release) { 99 if (release) {
99 unlock_page(page); 100 unlock_page(page);
@@ -153,7 +154,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
153 154
154 /* Set up the pointer to the new block */ 155 /* Set up the pointer to the new block */
155 156
156 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 157 gfs2_trans_add_meta(ip->i_gl, dibh);
157 di = (struct gfs2_dinode *)dibh->b_data; 158 di = (struct gfs2_dinode *)dibh->b_data;
158 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
159 160
@@ -405,7 +406,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp,
405 BUG_ON(i < 1); 406 BUG_ON(i < 1);
406 BUG_ON(mp->mp_bh[i] != NULL); 407 BUG_ON(mp->mp_bh[i] != NULL);
407 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 408 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
408 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); 409 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
409 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 410 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
410 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 411 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
411 ptr += offset; 412 ptr += offset;
@@ -468,7 +469,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
468 BUG_ON(sheight < 1); 469 BUG_ON(sheight < 1);
469 BUG_ON(dibh == NULL); 470 BUG_ON(dibh == NULL);
470 471
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 472 gfs2_trans_add_meta(ip->i_gl, dibh);
472 473
473 if (height == sheight) { 474 if (height == sheight) {
474 struct buffer_head *bh; 475 struct buffer_head *bh;
@@ -544,7 +545,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
544 /* Branching from existing tree */ 545 /* Branching from existing tree */
545 case ALLOC_GROW_DEPTH: 546 case ALLOC_GROW_DEPTH:
546 if (i > 1 && i < height) 547 if (i > 1 && i < height)
547 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); 548 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
548 for (; i < height && n > 0; i++, n--) 549 for (; i < height && n > 0; i++, n--)
549 gfs2_indirect_init(mp, ip->i_gl, i, 550 gfs2_indirect_init(mp, ip->i_gl, i,
550 mp->mp_list[i-1], bn++); 551 mp->mp_list[i-1], bn++);
@@ -556,7 +557,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
556 case ALLOC_DATA: 557 case ALLOC_DATA:
557 BUG_ON(n > dblks); 558 BUG_ON(n > dblks);
558 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 559 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
559 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); 560 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
560 dblks = n; 561 dblks = n;
561 ptr = metapointer(end_of_metadata, mp); 562 ptr = metapointer(end_of_metadata, mp);
562 dblock = bn; 563 dblock = bn;
@@ -796,8 +797,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
796 797
797 down_write(&ip->i_rw_mutex); 798 down_write(&ip->i_rw_mutex);
798 799
799 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 800 gfs2_trans_add_meta(ip->i_gl, dibh);
800 gfs2_trans_add_bh(ip->i_gl, bh, 1); 801 gfs2_trans_add_meta(ip->i_gl, bh);
801 802
802 bstart = 0; 803 bstart = 0;
803 blen = 0; 804 blen = 0;
@@ -981,7 +982,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
981 } 982 }
982 983
983 if (!gfs2_is_writeback(ip)) 984 if (!gfs2_is_writeback(ip))
984 gfs2_trans_add_bh(ip->i_gl, bh, 0); 985 gfs2_trans_add_data(ip->i_gl, bh);
985 986
986 zero_user(page, offset, length); 987 zero_user(page, offset, length);
987 mark_buffer_dirty(bh); 988 mark_buffer_dirty(bh);
@@ -1046,7 +1047,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1046 if (error) 1047 if (error)
1047 goto out; 1048 goto out;
1048 1049
1049 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1050 gfs2_trans_add_meta(ip->i_gl, dibh);
1050 1051
1051 if (gfs2_is_stuffed(ip)) { 1052 if (gfs2_is_stuffed(ip)) {
1052 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1053 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
@@ -1137,11 +1138,12 @@ static int trunc_end(struct gfs2_inode *ip)
1137 ip->i_height = 0; 1138 ip->i_height = 0;
1138 ip->i_goal = ip->i_no_addr; 1139 ip->i_goal = ip->i_no_addr;
1139 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1140 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1141 gfs2_ordered_del_inode(ip);
1140 } 1142 }
1141 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1142 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1144 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1143 1145
1144 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1146 gfs2_trans_add_meta(ip->i_gl, dibh);
1145 gfs2_dinode_out(ip, dibh->b_data); 1147 gfs2_dinode_out(ip, dibh->b_data);
1146 brelse(dibh); 1148 brelse(dibh);
1147 1149
@@ -1246,7 +1248,7 @@ static int do_grow(struct inode *inode, u64 size)
1246 1248
1247 i_size_write(inode, size); 1249 i_size_write(inode, size);
1248 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1250 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1249 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1251 gfs2_trans_add_meta(ip->i_gl, dibh);
1250 gfs2_dinode_out(ip, dibh->b_data); 1252 gfs2_dinode_out(ip, dibh->b_data);
1251 brelse(dibh); 1253 brelse(dibh);
1252 1254
@@ -1286,6 +1288,10 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1286 1288
1287 inode_dio_wait(inode); 1289 inode_dio_wait(inode);
1288 1290
1291 ret = gfs2_rs_alloc(GFS2_I(inode));
1292 if (ret)
1293 return ret;
1294
1289 oldsize = inode->i_size; 1295 oldsize = inode->i_size;
1290 if (newsize >= oldsize) 1296 if (newsize >= oldsize)
1291 return do_grow(inode, newsize); 1297 return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 9a35670fdc38..7179478e5a28 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -93,7 +93,7 @@ int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
93 struct buffer_head *bh; 93 struct buffer_head *bh;
94 94
95 bh = gfs2_meta_new(ip->i_gl, block); 95 bh = gfs2_meta_new(ip->i_gl, block);
96 gfs2_trans_add_bh(ip->i_gl, bh, 1); 96 gfs2_trans_add_meta(ip->i_gl, bh);
97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99 *bhp = bh; 99 *bhp = bh;
@@ -127,7 +127,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
127 if (error) 127 if (error)
128 return error; 128 return error;
129 129
130 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 130 gfs2_trans_add_meta(ip->i_gl, dibh);
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_inode.i_size < offset + size) 132 if (ip->i_inode.i_size < offset + size)
133 i_size_write(&ip->i_inode, offset + size); 133 i_size_write(&ip->i_inode, offset + size);
@@ -209,7 +209,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
209 if (error) 209 if (error)
210 goto fail; 210 goto fail;
211 211
212 gfs2_trans_add_bh(ip->i_gl, bh, 1); 212 gfs2_trans_add_meta(ip->i_gl, bh);
213 memcpy(bh->b_data + o, buf, amount); 213 memcpy(bh->b_data + o, buf, amount);
214 brelse(bh); 214 brelse(bh);
215 215
@@ -231,7 +231,7 @@ out:
231 i_size_write(&ip->i_inode, offset + copied); 231 i_size_write(&ip->i_inode, offset + copied);
232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
233 233
234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 234 gfs2_trans_add_meta(ip->i_gl, dibh);
235 gfs2_dinode_out(ip, dibh->b_data); 235 gfs2_dinode_out(ip, dibh->b_data);
236 brelse(dibh); 236 brelse(dibh);
237 237
@@ -647,7 +647,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
647 return; 647 return;
648 } 648 }
649 649
650 gfs2_trans_add_bh(dip->i_gl, bh, 1); 650 gfs2_trans_add_meta(dip->i_gl, bh);
651 651
652 /* If there is no prev entry, this is the first entry in the block. 652 /* If there is no prev entry, this is the first entry in the block.
653 The de_rec_len is already as big as it needs to be. Just zero 653 The de_rec_len is already as big as it needs to be. Just zero
@@ -690,7 +690,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
691 totlen = be16_to_cpu(dent->de_rec_len); 691 totlen = be16_to_cpu(dent->de_rec_len);
692 BUG_ON(offset + name->len > totlen); 692 BUG_ON(offset + name->len > totlen);
693 gfs2_trans_add_bh(ip->i_gl, bh, 1); 693 gfs2_trans_add_meta(ip->i_gl, bh);
694 ndent = (struct gfs2_dirent *)((char *)dent + offset); 694 ndent = (struct gfs2_dirent *)((char *)dent + offset);
695 dent->de_rec_len = cpu_to_be16(offset); 695 dent->de_rec_len = cpu_to_be16(offset);
696 gfs2_qstr2dirent(name, totlen - offset, ndent); 696 gfs2_qstr2dirent(name, totlen - offset, ndent);
@@ -831,7 +831,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
831 return NULL; 831 return NULL;
832 832
833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
834 gfs2_trans_add_bh(ip->i_gl, bh, 1); 834 gfs2_trans_add_meta(ip->i_gl, bh);
835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
836 leaf = (struct gfs2_leaf *)bh->b_data; 836 leaf = (struct gfs2_leaf *)bh->b_data;
837 leaf->lf_depth = cpu_to_be16(depth); 837 leaf->lf_depth = cpu_to_be16(depth);
@@ -916,7 +916,7 @@ static int dir_make_exhash(struct inode *inode)
916 /* We're done with the new leaf block, now setup the new 916 /* We're done with the new leaf block, now setup the new
917 hash table. */ 917 hash table. */
918 918
919 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 919 gfs2_trans_add_meta(dip->i_gl, dibh);
920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
921 921
922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
976 return 1; /* can't split */ 976 return 1; /* can't split */
977 } 977 }
978 978
979 gfs2_trans_add_bh(dip->i_gl, obh, 1); 979 gfs2_trans_add_meta(dip->i_gl, obh);
980 980
981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
982 if (!nleaf) { 982 if (!nleaf) {
@@ -1069,7 +1069,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1069 1069
1070 error = gfs2_meta_inode_buffer(dip, &dibh); 1070 error = gfs2_meta_inode_buffer(dip, &dibh);
1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1072 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1072 gfs2_trans_add_meta(dip->i_gl, dibh);
1073 gfs2_add_inode_blocks(&dip->i_inode, 1); 1073 gfs2_add_inode_blocks(&dip->i_inode, 1);
1074 gfs2_dinode_out(dip, dibh->b_data); 1074 gfs2_dinode_out(dip, dibh->b_data);
1075 brelse(dibh); 1075 brelse(dibh);
@@ -1622,7 +1622,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1622 return error; 1622 return error;
1623 } while(1); 1623 } while(1);
1624 1624
1625 gfs2_trans_add_bh(ip->i_gl, obh, 1); 1625 gfs2_trans_add_meta(ip->i_gl, obh);
1626 1626
1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1628 if (!leaf) { 1628 if (!leaf) {
@@ -1636,7 +1636,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1636 error = gfs2_meta_inode_buffer(ip, &bh); 1636 error = gfs2_meta_inode_buffer(ip, &bh);
1637 if (error) 1637 if (error)
1638 return error; 1638 return error;
1639 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1639 gfs2_trans_add_meta(ip->i_gl, bh);
1640 gfs2_add_inode_blocks(&ip->i_inode, 1); 1640 gfs2_add_inode_blocks(&ip->i_inode, 1);
1641 gfs2_dinode_out(ip, bh->b_data); 1641 gfs2_dinode_out(ip, bh->b_data);
1642 brelse(bh); 1642 brelse(bh);
@@ -1795,7 +1795,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1795 if (IS_ERR(dent)) 1795 if (IS_ERR(dent))
1796 return PTR_ERR(dent); 1796 return PTR_ERR(dent);
1797 1797
1798 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1798 gfs2_trans_add_meta(dip->i_gl, bh);
1799 gfs2_inum_out(nip, dent); 1799 gfs2_inum_out(nip, dent);
1800 dent->de_type = cpu_to_be16(new_type); 1800 dent->de_type = cpu_to_be16(new_type);
1801 1801
@@ -1804,7 +1804,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1804 error = gfs2_meta_inode_buffer(dip, &bh); 1804 error = gfs2_meta_inode_buffer(dip, &bh);
1805 if (error) 1805 if (error)
1806 return error; 1806 return error;
1807 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1807 gfs2_trans_add_meta(dip->i_gl, bh);
1808 } 1808 }
1809 1809
1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
@@ -1917,7 +1917,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1917 if (error) 1917 if (error)
1918 goto out_end_trans; 1918 goto out_end_trans;
1919 1919
1920 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1920 gfs2_trans_add_meta(dip->i_gl, dibh);
1921 /* On the last dealloc, make this a regular file in case we crash. 1921 /* On the last dealloc, make this a regular file in case we crash.
1922 (We don't want to free these blocks a second time.) */ 1922 (We don't want to free these blocks a second time.) */
1923 if (last_dealloc) 1923 if (last_dealloc)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 991ab2d484dd..2687f50d98cb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -276,7 +276,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
276 error = gfs2_meta_inode_buffer(ip, &bh); 276 error = gfs2_meta_inode_buffer(ip, &bh);
277 if (error) 277 if (error)
278 goto out_trans_end; 278 goto out_trans_end;
279 gfs2_trans_add_bh(ip->i_gl, bh, 1); 279 gfs2_trans_add_meta(ip->i_gl, bh);
280 ip->i_diskflags = new_flags; 280 ip->i_diskflags = new_flags;
281 gfs2_dinode_out(ip, bh->b_data); 281 gfs2_dinode_out(ip, bh->b_data);
282 brelse(bh); 282 brelse(bh);
@@ -483,7 +483,7 @@ out:
483 gfs2_holder_uninit(&gh); 483 gfs2_holder_uninit(&gh);
484 if (ret == 0) { 484 if (ret == 0) {
485 set_page_dirty(page); 485 set_page_dirty(page);
486 wait_on_page_writeback(page); 486 wait_for_stable_page(page);
487 } 487 }
488 sb_end_pagefault(inode->i_sb); 488 sb_end_pagefault(inode->i_sb);
489 return block_page_mkwrite_return(ret); 489 return block_page_mkwrite_return(ret);
@@ -709,7 +709,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
709 if (unlikely(error)) 709 if (unlikely(error))
710 return error; 710 return error;
711 711
712 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 712 gfs2_trans_add_meta(ip->i_gl, dibh);
713 713
714 if (gfs2_is_stuffed(ip)) { 714 if (gfs2_is_stuffed(ip)) {
715 error = gfs2_unstuff_dinode(ip, NULL); 715 error = gfs2_unstuff_dinode(ip, NULL);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 992c5c0cb504..cf3515546739 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -30,6 +30,7 @@
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h> 32#include <linux/percpu.h>
33#include <linux/list_sort.h>
33 34
34#include "gfs2.h" 35#include "gfs2.h"
35#include "incore.h" 36#include "incore.h"
@@ -1376,56 +1377,105 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1376 gfs2_glock_put(gl); 1377 gfs2_glock_put(gl);
1377} 1378}
1378 1379
1380static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1381{
1382 struct gfs2_glock *gla, *glb;
1379 1383
1380static int gfs2_shrink_glock_memory(struct shrinker *shrink, 1384 gla = list_entry(a, struct gfs2_glock, gl_lru);
1381 struct shrink_control *sc) 1385 glb = list_entry(b, struct gfs2_glock, gl_lru);
1386
1387 if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1388 return 1;
1389 if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1390 return -1;
1391
1392 return 0;
1393}
1394
1395/**
1396 * gfs2_dispose_glock_lru - Demote a list of glocks
1397 * @list: The list to dispose of
1398 *
1399 * Disposing of glocks may involve disk accesses, so that here we sort
1400 * the glocks by number (i.e. disk location of the inodes) so that if
1401 * there are any such accesses, they'll be sent in order (mostly).
1402 *
1403 * Must be called under the lru_lock, but may drop and retake this
1404 * lock. While the lru_lock is dropped, entries may vanish from the
1405 * list, but no new entries will appear on the list (since it is
1406 * private)
1407 */
1408
1409static void gfs2_dispose_glock_lru(struct list_head *list)
1410__releases(&lru_lock)
1411__acquires(&lru_lock)
1382{ 1412{
1383 struct gfs2_glock *gl; 1413 struct gfs2_glock *gl;
1384 int may_demote;
1385 int nr_skipped = 0;
1386 int nr = sc->nr_to_scan;
1387 gfp_t gfp_mask = sc->gfp_mask;
1388 LIST_HEAD(skipped);
1389 1414
1390 if (nr == 0) 1415 list_sort(NULL, list, glock_cmp);
1391 goto out;
1392 1416
1393 if (!(gfp_mask & __GFP_FS)) 1417 while(!list_empty(list)) {
1394 return -1; 1418 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1419 list_del_init(&gl->gl_lru);
1420 clear_bit(GLF_LRU, &gl->gl_flags);
1421 gfs2_glock_hold(gl);
1422 spin_unlock(&lru_lock);
1423 spin_lock(&gl->gl_spin);
1424 if (demote_ok(gl))
1425 handle_callback(gl, LM_ST_UNLOCKED, 0);
1426 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1427 smp_mb__after_clear_bit();
1428 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1429 gfs2_glock_put_nolock(gl);
1430 spin_unlock(&gl->gl_spin);
1431 spin_lock(&lru_lock);
1432 }
1433}
1434
1435/**
1436 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1437 * @nr: The number of entries to scan
1438 *
1439 * This function selects the entries on the LRU which are able to
1440 * be demoted, and then kicks off the process by calling
1441 * gfs2_dispose_glock_lru() above.
1442 */
1443
1444static void gfs2_scan_glock_lru(int nr)
1445{
1446 struct gfs2_glock *gl;
1447 LIST_HEAD(skipped);
1448 LIST_HEAD(dispose);
1395 1449
1396 spin_lock(&lru_lock); 1450 spin_lock(&lru_lock);
1397 while(nr && !list_empty(&lru_list)) { 1451 while(nr && !list_empty(&lru_list)) {
1398 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1452 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1399 list_del_init(&gl->gl_lru);
1400 clear_bit(GLF_LRU, &gl->gl_flags);
1401 atomic_dec(&lru_count);
1402 1453
1403 /* Test for being demotable */ 1454 /* Test for being demotable */
1404 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1455 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1405 gfs2_glock_hold(gl); 1456 list_move(&gl->gl_lru, &dispose);
1406 spin_unlock(&lru_lock); 1457 atomic_dec(&lru_count);
1407 spin_lock(&gl->gl_spin); 1458 nr--;
1408 may_demote = demote_ok(gl);
1409 if (may_demote) {
1410 handle_callback(gl, LM_ST_UNLOCKED, 0);
1411 nr--;
1412 }
1413 clear_bit(GLF_LOCK, &gl->gl_flags);
1414 smp_mb__after_clear_bit();
1415 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1416 gfs2_glock_put_nolock(gl);
1417 spin_unlock(&gl->gl_spin);
1418 spin_lock(&lru_lock);
1419 continue; 1459 continue;
1420 } 1460 }
1421 nr_skipped++; 1461
1422 list_add(&gl->gl_lru, &skipped); 1462 list_move(&gl->gl_lru, &skipped);
1423 set_bit(GLF_LRU, &gl->gl_flags);
1424 } 1463 }
1425 list_splice(&skipped, &lru_list); 1464 list_splice(&skipped, &lru_list);
1426 atomic_add(nr_skipped, &lru_count); 1465 if (!list_empty(&dispose))
1466 gfs2_dispose_glock_lru(&dispose);
1427 spin_unlock(&lru_lock); 1467 spin_unlock(&lru_lock);
1428out: 1468}
1469
1470static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1471 struct shrink_control *sc)
1472{
1473 if (sc->nr_to_scan) {
1474 if (!(sc->gfp_mask & __GFP_FS))
1475 return -1;
1476 gfs2_scan_glock_lru(sc->nr_to_scan);
1477 }
1478
1429 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; 1479 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
1430} 1480}
1431 1481
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c373a24fedd9..e2601ba38ef5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -52,7 +52,6 @@ struct gfs2_log_header_host {
52 */ 52 */
53 53
54struct gfs2_log_operations { 54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
56 void (*lo_before_commit) (struct gfs2_sbd *sdp); 55 void (*lo_before_commit) (struct gfs2_sbd *sdp);
57 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); 56 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
58 void (*lo_before_scan) (struct gfs2_jdesc *jd, 57 void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -341,6 +340,7 @@ enum {
341 GIF_QD_LOCKED = 1, 340 GIF_QD_LOCKED = 1,
342 GIF_ALLOC_FAILED = 2, 341 GIF_ALLOC_FAILED = 2,
343 GIF_SW_PAGED = 3, 342 GIF_SW_PAGED = 3,
343 GIF_ORDERED = 4,
344}; 344};
345 345
346struct gfs2_inode { 346struct gfs2_inode {
@@ -357,6 +357,7 @@ struct gfs2_inode {
357 struct gfs2_rgrpd *i_rgd; 357 struct gfs2_rgrpd *i_rgd;
358 u64 i_goal; /* goal block for allocations */ 358 u64 i_goal; /* goal block for allocations */
359 struct rw_semaphore i_rw_mutex; 359 struct rw_semaphore i_rw_mutex;
360 struct list_head i_ordered;
360 struct list_head i_trunc_list; 361 struct list_head i_trunc_list;
361 __be64 *i_hash_cache; 362 __be64 *i_hash_cache;
362 u32 i_entries; 363 u32 i_entries;
@@ -641,6 +642,7 @@ struct gfs2_sbd {
641 wait_queue_head_t sd_glock_wait; 642 wait_queue_head_t sd_glock_wait;
642 atomic_t sd_glock_disposal; 643 atomic_t sd_glock_disposal;
643 struct completion sd_locking_init; 644 struct completion sd_locking_init;
645 struct completion sd_wdack;
644 struct delayed_work sd_control_work; 646 struct delayed_work sd_control_work;
645 647
646 /* Inode Stuff */ 648 /* Inode Stuff */
@@ -723,6 +725,7 @@ struct gfs2_sbd {
723 struct list_head sd_log_le_revoke; 725 struct list_head sd_log_le_revoke;
724 struct list_head sd_log_le_databuf; 726 struct list_head sd_log_le_databuf;
725 struct list_head sd_log_le_ordered; 727 struct list_head sd_log_le_ordered;
728 spinlock_t sd_ordered_lock;
726 729
727 atomic_t sd_log_thresh1; 730 atomic_t sd_log_thresh1;
728 atomic_t sd_log_thresh2; 731 atomic_t sd_log_thresh2;
@@ -758,10 +761,7 @@ struct gfs2_sbd {
758 unsigned int sd_replayed_blocks; 761 unsigned int sd_replayed_blocks;
759 762
760 /* For quiescing the filesystem */ 763 /* For quiescing the filesystem */
761
762 struct gfs2_holder sd_freeze_gh; 764 struct gfs2_holder sd_freeze_gh;
763 struct mutex sd_freeze_lock;
764 unsigned int sd_freeze_count;
765 765
766 char sd_fsname[GFS2_FSNAME_LEN]; 766 char sd_fsname[GFS2_FSNAME_LEN];
767 char sd_table_name[GFS2_FSNAME_LEN]; 767 char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b6f5698ef18..db048a8ab6a8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -447,7 +447,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
447 struct timespec tv = CURRENT_TIME; 447 struct timespec tv = CURRENT_TIME;
448 448
449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
450 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 450 gfs2_trans_add_meta(ip->i_gl, dibh);
451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
453 di = (struct gfs2_dinode *)dibh->b_data; 453 di = (struct gfs2_dinode *)dibh->b_data;
@@ -584,7 +584,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
584 if (error) 584 if (error)
585 goto fail_end_trans; 585 goto fail_end_trans;
586 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); 586 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
587 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 587 gfs2_trans_add_meta(ip->i_gl, dibh);
588 gfs2_dinode_out(ip, dibh->b_data); 588 gfs2_dinode_out(ip, dibh->b_data);
589 brelse(dibh); 589 brelse(dibh);
590 return 0; 590 return 0;
@@ -931,7 +931,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
931 if (error) 931 if (error)
932 goto out_brelse; 932 goto out_brelse;
933 933
934 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 934 gfs2_trans_add_meta(ip->i_gl, dibh);
935 inc_nlink(&ip->i_inode); 935 inc_nlink(&ip->i_inode);
936 ip->i_inode.i_ctime = CURRENT_TIME; 936 ip->i_inode.i_ctime = CURRENT_TIME;
937 ihold(inode); 937 ihold(inode);
@@ -1412,7 +1412,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1412 if (error) 1412 if (error)
1413 goto out_end_trans; 1413 goto out_end_trans;
1414 ip->i_inode.i_ctime = CURRENT_TIME; 1414 ip->i_inode.i_ctime = CURRENT_TIME;
1415 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1415 gfs2_trans_add_meta(ip->i_gl, dibh);
1416 gfs2_dinode_out(ip, dibh->b_data); 1416 gfs2_dinode_out(ip, dibh->b_data);
1417 brelse(dibh); 1417 brelse(dibh);
1418 } 1418 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9c81c1..9a2ca8be7647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
482 } 482 }
483} 483}
484 484
485static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) 485static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
486{ 486{
487 struct gfs2_bufdata *bda, *bdb; 487 struct gfs2_inode *ipa, *ipb;
488 488
489 bda = list_entry(a, struct gfs2_bufdata, bd_list); 489 ipa = list_entry(a, struct gfs2_inode, i_ordered);
490 bdb = list_entry(b, struct gfs2_bufdata, bd_list); 490 ipb = list_entry(b, struct gfs2_inode, i_ordered);
491 491
492 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) 492 if (ipa->i_no_addr < ipb->i_no_addr)
493 return -1; 493 return -1;
494 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) 494 if (ipa->i_no_addr > ipb->i_no_addr)
495 return 1; 495 return 1;
496 return 0; 496 return 0;
497} 497}
498 498
499static void gfs2_ordered_write(struct gfs2_sbd *sdp) 499static void gfs2_ordered_write(struct gfs2_sbd *sdp)
500{ 500{
501 struct gfs2_bufdata *bd; 501 struct gfs2_inode *ip;
502 struct buffer_head *bh;
503 LIST_HEAD(written); 502 LIST_HEAD(written);
504 503
505 gfs2_log_lock(sdp); 504 spin_lock(&sdp->sd_ordered_lock);
506 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); 505 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
507 while (!list_empty(&sdp->sd_log_le_ordered)) { 506 while (!list_empty(&sdp->sd_log_le_ordered)) {
508 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); 507 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
509 list_move(&bd->bd_list, &written); 508 list_move(&ip->i_ordered, &written);
510 bh = bd->bd_bh; 509 if (ip->i_inode.i_mapping->nrpages == 0)
511 if (!buffer_dirty(bh))
512 continue; 510 continue;
513 get_bh(bh); 511 spin_unlock(&sdp->sd_ordered_lock);
514 gfs2_log_unlock(sdp); 512 filemap_fdatawrite(ip->i_inode.i_mapping);
515 lock_buffer(bh); 513 spin_lock(&sdp->sd_ordered_lock);
516 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
517 bh->b_end_io = end_buffer_write_sync;
518 submit_bh(WRITE_SYNC, bh);
519 } else {
520 unlock_buffer(bh);
521 brelse(bh);
522 }
523 gfs2_log_lock(sdp);
524 } 514 }
525 list_splice(&written, &sdp->sd_log_le_ordered); 515 list_splice(&written, &sdp->sd_log_le_ordered);
526 gfs2_log_unlock(sdp); 516 spin_unlock(&sdp->sd_ordered_lock);
527} 517}
528 518
529static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 519static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
530{ 520{
531 struct gfs2_bufdata *bd; 521 struct gfs2_inode *ip;
532 struct buffer_head *bh;
533 522
534 gfs2_log_lock(sdp); 523 spin_lock(&sdp->sd_ordered_lock);
535 while (!list_empty(&sdp->sd_log_le_ordered)) { 524 while (!list_empty(&sdp->sd_log_le_ordered)) {
536 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); 525 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
537 bh = bd->bd_bh; 526 list_del(&ip->i_ordered);
538 if (buffer_locked(bh)) { 527 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
539 get_bh(bh); 528 if (ip->i_inode.i_mapping->nrpages == 0)
540 gfs2_log_unlock(sdp);
541 wait_on_buffer(bh);
542 brelse(bh);
543 gfs2_log_lock(sdp);
544 continue; 529 continue;
545 } 530 spin_unlock(&sdp->sd_ordered_lock);
546 list_del_init(&bd->bd_list); 531 filemap_fdatawait(ip->i_inode.i_mapping);
532 spin_lock(&sdp->sd_ordered_lock);
547 } 533 }
548 gfs2_log_unlock(sdp); 534 spin_unlock(&sdp->sd_ordered_lock);
535}
536
537void gfs2_ordered_del_inode(struct gfs2_inode *ip)
538{
539 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
540
541 spin_lock(&sdp->sd_ordered_lock);
542 if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
543 list_del(&ip->i_ordered);
544 spin_unlock(&sdp->sd_ordered_lock);
549} 545}
550 546
551/** 547/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3fd5215ea25f..3566f35915e0 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,6 +48,18 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
48 sdp->sd_log_head = sdp->sd_log_tail = value; 48 sdp->sd_log_head = sdp->sd_log_tail = value;
49} 49}
50 50
51static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
52{
53 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
54
55 if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
56 spin_lock(&sdp->sd_ordered_lock);
57 if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
58 list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
59 spin_unlock(&sdp->sd_ordered_lock);
60 }
61}
62extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
51extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, 63extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
52 unsigned int ssize); 64 unsigned int ssize);
53 65
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9ceccb1595a3..a5055977a214 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -37,7 +37,7 @@
37 * 37 *
38 * The log lock must be held when calling this function 38 * The log lock must be held when calling this function
39 */ 39 */
40static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 40void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
41{ 41{
42 struct gfs2_bufdata *bd; 42 struct gfs2_bufdata *bd;
43 43
@@ -388,32 +388,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
388 return page; 388 return page;
389} 389}
390 390
391static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
392{
393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr;
395
396 tr = current->journal_info;
397 tr->tr_touched = 1;
398 if (!list_empty(&bd->bd_list))
399 return;
400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
403 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
404 printk(KERN_ERR
405 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
406 (unsigned long long)bd->bd_bh->b_blocknr);
407 BUG();
408 }
409 gfs2_pin(sdp, bd->bd_bh);
410 mh->__pad0 = cpu_to_be64(0);
411 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
412 sdp->sd_log_num_buf++;
413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
414 tr->tr_num_buf_new++;
415}
416
417static void gfs2_check_magic(struct buffer_head *bh) 391static void gfs2_check_magic(struct buffer_head *bh)
418{ 392{
419 void *kaddr; 393 void *kaddr;
@@ -600,20 +574,6 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
600 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 574 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
601} 575}
602 576
603static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
604{
605 struct gfs2_glock *gl = bd->bd_gl;
606 struct gfs2_trans *tr;
607
608 tr = current->journal_info;
609 tr->tr_touched = 1;
610 tr->tr_num_revoke++;
611 sdp->sd_log_num_revoke++;
612 atomic_inc(&gl->gl_revokes);
613 set_bit(GLF_LFLUSH, &gl->gl_flags);
614 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
615}
616
617static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 577static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
618{ 578{
619 struct gfs2_meta_header *mh; 579 struct gfs2_meta_header *mh;
@@ -749,44 +709,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
749} 709}
750 710
751/** 711/**
752 * databuf_lo_add - Add a databuf to the transaction.
753 *
754 * This is used in two distinct cases:
755 * i) In ordered write mode
756 * We put the data buffer on a list so that we can ensure that its
757 * synced to disk at the right time
758 * ii) In journaled data mode
759 * We need to journal the data block in the same way as metadata in
760 * the functions above. The difference is that here we have a tag
761 * which is two __be64's being the block number (as per meta data)
762 * and a flag which says whether the data block needs escaping or
763 * not. This means we need a new log entry for each 251 or so data
764 * blocks, which isn't an enormous overhead but twice as much as
765 * for normal metadata blocks.
766 */
767static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
768{
769 struct gfs2_trans *tr = current->journal_info;
770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
771 struct gfs2_inode *ip = GFS2_I(mapping->host);
772
773 if (tr)
774 tr->tr_touched = 1;
775 if (!list_empty(&bd->bd_list))
776 return;
777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
779 if (gfs2_is_jdata(ip)) {
780 gfs2_pin(sdp, bd->bd_bh);
781 tr->tr_num_databuf_new++;
782 sdp->sd_log_num_databuf++;
783 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
784 } else {
785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
786 }
787}
788
789/**
790 * databuf_lo_before_commit - Scan the data buffers, writing as we go 712 * databuf_lo_before_commit - Scan the data buffers, writing as we go
791 * 713 *
792 */ 714 */
@@ -885,7 +807,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
885 807
886 808
887const struct gfs2_log_operations gfs2_buf_lops = { 809const struct gfs2_log_operations gfs2_buf_lops = {
888 .lo_add = buf_lo_add,
889 .lo_before_commit = buf_lo_before_commit, 810 .lo_before_commit = buf_lo_before_commit,
890 .lo_after_commit = buf_lo_after_commit, 811 .lo_after_commit = buf_lo_after_commit,
891 .lo_before_scan = buf_lo_before_scan, 812 .lo_before_scan = buf_lo_before_scan,
@@ -895,7 +816,6 @@ const struct gfs2_log_operations gfs2_buf_lops = {
895}; 816};
896 817
897const struct gfs2_log_operations gfs2_revoke_lops = { 818const struct gfs2_log_operations gfs2_revoke_lops = {
898 .lo_add = revoke_lo_add,
899 .lo_before_commit = revoke_lo_before_commit, 819 .lo_before_commit = revoke_lo_before_commit,
900 .lo_after_commit = revoke_lo_after_commit, 820 .lo_after_commit = revoke_lo_after_commit,
901 .lo_before_scan = revoke_lo_before_scan, 821 .lo_before_scan = revoke_lo_before_scan,
@@ -909,7 +829,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
909}; 829};
910 830
911const struct gfs2_log_operations gfs2_databuf_lops = { 831const struct gfs2_log_operations gfs2_databuf_lops = {
912 .lo_add = databuf_lo_add,
913 .lo_before_commit = databuf_lo_before_commit, 832 .lo_before_commit = databuf_lo_before_commit,
914 .lo_after_commit = databuf_lo_after_commit, 833 .lo_after_commit = databuf_lo_after_commit,
915 .lo_scan_elements = databuf_lo_scan_elements, 834 .lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 954a330585f4..ba77b7da8325 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -29,6 +29,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
29extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); 30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); 31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
32extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
32 33
33static inline unsigned int buf_limit(struct gfs2_sbd *sdp) 34static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
34{ 35{
@@ -46,19 +47,6 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
46 return limit; 47 return limit;
47} 48}
48 49
49static inline void lops_init_le(struct gfs2_bufdata *bd,
50 const struct gfs2_log_operations *lops)
51{
52 INIT_LIST_HEAD(&bd->bd_list);
53 bd->bd_ops = lops;
54}
55
56static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
57{
58 if (bd->bd_ops->lo_add)
59 bd->bd_ops->lo_add(sdp, bd);
60}
61
62static inline void lops_before_commit(struct gfs2_sbd *sdp) 50static inline void lops_before_commit(struct gfs2_sbd *sdp)
63{ 51{
64 int x; 52 int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 22255d96b27e..b059bbb5059e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -271,41 +271,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
271 return 0; 271 return 0;
272} 272}
273 273
274/**
275 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
276 * @gl: the glock the buffer belongs to
277 * @bh: The buffer to be attached to
278 * @meta: Flag to indicate whether its metadata or not
279 */
280
281void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
282 int meta)
283{
284 struct gfs2_bufdata *bd;
285
286 if (meta)
287 lock_page(bh->b_page);
288
289 if (bh->b_private) {
290 if (meta)
291 unlock_page(bh->b_page);
292 return;
293 }
294
295 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
296 bd->bd_bh = bh;
297 bd->bd_gl = gl;
298
299 if (meta)
300 lops_init_le(bd, &gfs2_buf_lops);
301 else
302 lops_init_le(bd, &gfs2_databuf_lops);
303 bh->b_private = bd;
304
305 if (meta)
306 unlock_page(bh->b_page);
307}
308
309void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 274void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
310{ 275{
311 struct address_space *mapping = bh->b_page->mapping; 276 struct address_space *mapping = bh->b_page->mapping;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index c30973b07a7c..0d4c843b6f8e 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,9 +56,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); 56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); 57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
58 58
59void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
60 int meta);
61
62void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, 59void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
63 int meta); 60 int meta);
64 61
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0e3554edb8f2..1b612be4b873 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -81,6 +81,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
81 init_waitqueue_head(&sdp->sd_glock_wait); 81 init_waitqueue_head(&sdp->sd_glock_wait);
82 atomic_set(&sdp->sd_glock_disposal, 0); 82 atomic_set(&sdp->sd_glock_disposal, 0);
83 init_completion(&sdp->sd_locking_init); 83 init_completion(&sdp->sd_locking_init);
84 init_completion(&sdp->sd_wdack);
84 spin_lock_init(&sdp->sd_statfs_spin); 85 spin_lock_init(&sdp->sd_statfs_spin);
85 86
86 spin_lock_init(&sdp->sd_rindex_spin); 87 spin_lock_init(&sdp->sd_rindex_spin);
@@ -102,6 +103,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
102 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 103 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 104 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 105 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
106 spin_lock_init(&sdp->sd_ordered_lock);
105 107
106 init_waitqueue_head(&sdp->sd_log_waitq); 108 init_waitqueue_head(&sdp->sd_log_waitq);
107 init_waitqueue_head(&sdp->sd_logd_waitq); 109 init_waitqueue_head(&sdp->sd_logd_waitq);
@@ -115,8 +117,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
115 117
116 INIT_LIST_HEAD(&sdp->sd_revoke_list); 118 INIT_LIST_HEAD(&sdp->sd_revoke_list);
117 119
118 mutex_init(&sdp->sd_freeze_lock);
119
120 return sdp; 120 return sdp;
121} 121}
122 122
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ae55e248c3b7..06122d09c0d1 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -590,7 +590,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
590 s64 x; 590 s64 x;
591 591
592 mutex_lock(&sdp->sd_quota_mutex); 592 mutex_lock(&sdp->sd_quota_mutex);
593 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); 593 gfs2_trans_add_meta(ip->i_gl, qd->qd_bh);
594 594
595 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { 595 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
596 qc->qc_change = 0; 596 qc->qc_change = 0;
@@ -726,7 +726,7 @@ get_a_page:
726 goto unlock_out; 726 goto unlock_out;
727 } 727 }
728 728
729 gfs2_trans_add_bh(ip->i_gl, bh, 0); 729 gfs2_trans_add_meta(ip->i_gl, bh);
730 730
731 kaddr = kmap_atomic(page); 731 kaddr = kmap_atomic(page);
732 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 732 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b7eff078fe90..52c2aeaf45ce 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1323,7 +1323,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1323 if (ret == 0) { 1323 if (ret == 0) {
1324 bh = rgd->rd_bits[0].bi_bh; 1324 bh = rgd->rd_bits[0].bi_bh;
1325 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1325 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1326 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1326 gfs2_trans_add_meta(rgd->rd_gl, bh);
1327 gfs2_rgrp_out(rgd, bh->b_data); 1327 gfs2_rgrp_out(rgd, bh->b_data);
1328 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1328 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1329 gfs2_trans_end(sdp); 1329 gfs2_trans_end(sdp);
@@ -1968,14 +1968,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1968 1968
1969 *n = 1; 1969 *n = 1;
1970 block = gfs2_rbm_to_block(rbm); 1970 block = gfs2_rbm_to_block(rbm);
1971 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1971 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
1972 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1972 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1973 block++; 1973 block++;
1974 while (*n < elen) { 1974 while (*n < elen) {
1975 ret = gfs2_rbm_from_block(&pos, block); 1975 ret = gfs2_rbm_from_block(&pos, block);
1976 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1976 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1977 break; 1977 break;
1978 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1978 gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
1979 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1979 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1980 (*n)++; 1980 (*n)++;
1981 block++; 1981 block++;
@@ -2014,7 +2014,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2014 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 2014 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
2015 rbm.bi->bi_len); 2015 rbm.bi->bi_len);
2016 } 2016 }
2017 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 2017 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
2018 gfs2_setbit(&rbm, false, new_state); 2018 gfs2_setbit(&rbm, false, new_state);
2019 } 2019 }
2020 2020
@@ -2157,7 +2157,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2157 if (error == 0) { 2157 if (error == 0) {
2158 struct gfs2_dinode *di = 2158 struct gfs2_dinode *di =
2159 (struct gfs2_dinode *)dibh->b_data; 2159 (struct gfs2_dinode *)dibh->b_data;
2160 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2160 gfs2_trans_add_meta(ip->i_gl, dibh);
2161 di->di_goal_meta = di->di_goal_data = 2161 di->di_goal_meta = di->di_goal_data =
2162 cpu_to_be64(ip->i_goal); 2162 cpu_to_be64(ip->i_goal);
2163 brelse(dibh); 2163 brelse(dibh);
@@ -2176,7 +2176,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2176 *generation = rbm.rgd->rd_igeneration++; 2176 *generation = rbm.rgd->rd_igeneration++;
2177 } 2177 }
2178 2178
2179 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2179 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2180 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2180 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2181 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2181 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2182 2182
@@ -2223,7 +2223,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2223 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2223 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2224 rgd->rd_free += blen; 2224 rgd->rd_free += blen;
2225 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2225 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2226 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2226 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2227 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2227 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2228 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2228 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2229 2229
@@ -2260,7 +2260,7 @@ void gfs2_unlink_di(struct inode *inode)
2260 if (!rgd) 2260 if (!rgd)
2261 return; 2261 return;
2262 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2262 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2263 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2263 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2264 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2264 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2265 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2265 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2266 update_rgrp_lvb_unlinked(rgd, 1); 2266 update_rgrp_lvb_unlinked(rgd, 1);
@@ -2281,7 +2281,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2281 rgd->rd_dinodes--; 2281 rgd->rd_dinodes--;
2282 rgd->rd_free++; 2282 rgd->rd_free++;
2283 2283
2284 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2284 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2285 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2285 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2286 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2286 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2287 update_rgrp_lvb_unlinked(rgd, -1); 2287 update_rgrp_lvb_unlinked(rgd, -1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d6488674d916..a3b40eeaa6e2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -500,7 +500,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
500 if (error) 500 if (error)
501 return; 501 return;
502 502
503 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 503 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
504 504
505 spin_lock(&sdp->sd_statfs_spin); 505 spin_lock(&sdp->sd_statfs_spin);
506 l_sc->sc_total += total; 506 l_sc->sc_total += total;
@@ -528,7 +528,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
530 530
531 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 531 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
532 532
533 spin_lock(&sdp->sd_statfs_spin); 533 spin_lock(&sdp->sd_statfs_spin);
534 m_sc->sc_total += l_sc->sc_total; 534 m_sc->sc_total += l_sc->sc_total;
@@ -539,7 +539,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
539 0, sizeof(struct gfs2_statfs_change)); 539 0, sizeof(struct gfs2_statfs_change));
540 spin_unlock(&sdp->sd_statfs_spin); 540 spin_unlock(&sdp->sd_statfs_spin);
541 541
542 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 542 gfs2_trans_add_meta(m_ip->i_gl, m_bh);
543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); 543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
544} 544}
545 545
@@ -663,54 +663,6 @@ out:
663 return error; 663 return error;
664} 664}
665 665
666/**
667 * gfs2_freeze_fs - freezes the file system
668 * @sdp: the file system
669 *
670 * This function flushes data and meta data for all machines by
671 * acquiring the transaction log exclusively. All journals are
672 * ensured to be in a clean state as well.
673 *
674 * Returns: errno
675 */
676
677int gfs2_freeze_fs(struct gfs2_sbd *sdp)
678{
679 int error = 0;
680
681 mutex_lock(&sdp->sd_freeze_lock);
682
683 if (!sdp->sd_freeze_count++) {
684 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
685 if (error)
686 sdp->sd_freeze_count--;
687 }
688
689 mutex_unlock(&sdp->sd_freeze_lock);
690
691 return error;
692}
693
694/**
695 * gfs2_unfreeze_fs - unfreezes the file system
696 * @sdp: the file system
697 *
698 * This function allows the file system to proceed by unlocking
699 * the exclusively held transaction lock. Other GFS2 nodes are
700 * now free to acquire the lock shared and go on with their lives.
701 *
702 */
703
704void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
705{
706 mutex_lock(&sdp->sd_freeze_lock);
707
708 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
709 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
710
711 mutex_unlock(&sdp->sd_freeze_lock);
712}
713
714void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 666void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
715{ 667{
716 struct gfs2_dinode *str = buf; 668 struct gfs2_dinode *str = buf;
@@ -824,7 +776,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
824 776
825 ret = gfs2_meta_inode_buffer(ip, &bh); 777 ret = gfs2_meta_inode_buffer(ip, &bh);
826 if (ret == 0) { 778 if (ret == 0) {
827 gfs2_trans_add_bh(ip->i_gl, bh, 1); 779 gfs2_trans_add_meta(ip->i_gl, bh);
828 gfs2_dinode_out(ip, bh->b_data); 780 gfs2_dinode_out(ip, bh->b_data);
829 brelse(bh); 781 brelse(bh);
830 } 782 }
@@ -888,13 +840,6 @@ static void gfs2_put_super(struct super_block *sb)
888 int error; 840 int error;
889 struct gfs2_jdesc *jd; 841 struct gfs2_jdesc *jd;
890 842
891 /* Unfreeze the filesystem, if we need to */
892
893 mutex_lock(&sdp->sd_freeze_lock);
894 if (sdp->sd_freeze_count)
895 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
896 mutex_unlock(&sdp->sd_freeze_lock);
897
898 /* No more recovery requests */ 843 /* No more recovery requests */
899 set_bit(SDF_NORECOVERY, &sdp->sd_flags); 844 set_bit(SDF_NORECOVERY, &sdp->sd_flags);
900 smp_mb(); 845 smp_mb();
@@ -985,7 +930,7 @@ static int gfs2_freeze(struct super_block *sb)
985 return -EINVAL; 930 return -EINVAL;
986 931
987 for (;;) { 932 for (;;) {
988 error = gfs2_freeze_fs(sdp); 933 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
989 if (!error) 934 if (!error)
990 break; 935 break;
991 936
@@ -1013,7 +958,9 @@ static int gfs2_freeze(struct super_block *sb)
1013 958
1014static int gfs2_unfreeze(struct super_block *sb) 959static int gfs2_unfreeze(struct super_block *sb)
1015{ 960{
1016 gfs2_unfreeze_fs(sb->s_fs_info); 961 struct gfs2_sbd *sdp = sb->s_fs_info;
962
963 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
1017 return 0; 964 return 0;
1018} 965}
1019 966
@@ -1577,6 +1524,7 @@ out:
1577 /* Case 3 starts here */ 1524 /* Case 3 starts here */
1578 truncate_inode_pages(&inode->i_data, 0); 1525 truncate_inode_pages(&inode->i_data, 0);
1579 gfs2_rs_delete(ip); 1526 gfs2_rs_delete(ip);
1527 gfs2_ordered_del_inode(ip);
1580 clear_inode(inode); 1528 clear_inode(inode);
1581 gfs2_dir_hash_inval(ip); 1529 gfs2_dir_hash_inval(ip);
1582 ip->i_gl->gl_object = NULL; 1530 ip->i_gl->gl_object = NULL;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index a0464680af0b..90e3322ffa10 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh); 46 struct buffer_head *l_bh);
47extern int gfs2_statfs_sync(struct super_block *sb, int type); 47extern int gfs2_statfs_sync(struct super_block *sb, int type);
48 48
49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
51
52extern struct file_system_type gfs2_fs_type; 49extern struct file_system_type gfs2_fs_type;
53extern struct file_system_type gfs2meta_fs_type; 50extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 51extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8056b7b7238e..4fb9ad80d260 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -91,19 +91,15 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
91 91
92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
93{ 93{
94 unsigned int count; 94 struct super_block *sb = sdp->sd_vfs;
95 95 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
96 mutex_lock(&sdp->sd_freeze_lock);
97 count = sdp->sd_freeze_count;
98 mutex_unlock(&sdp->sd_freeze_lock);
99 96
100 return snprintf(buf, PAGE_SIZE, "%u\n", count); 97 return snprintf(buf, PAGE_SIZE, "%u\n", frozen);
101} 98}
102 99
103static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 100static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
104{ 101{
105 ssize_t ret = len; 102 int error;
106 int error = 0;
107 int n = simple_strtol(buf, NULL, 0); 103 int n = simple_strtol(buf, NULL, 0);
108 104
109 if (!capable(CAP_SYS_ADMIN)) 105 if (!capable(CAP_SYS_ADMIN))
@@ -111,19 +107,21 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
111 107
112 switch (n) { 108 switch (n) {
113 case 0: 109 case 0:
114 gfs2_unfreeze_fs(sdp); 110 error = thaw_super(sdp->sd_vfs);
115 break; 111 break;
116 case 1: 112 case 1:
117 error = gfs2_freeze_fs(sdp); 113 error = freeze_super(sdp->sd_vfs);
118 break; 114 break;
119 default: 115 default:
120 ret = -EINVAL; 116 return -EINVAL;
121 } 117 }
122 118
123 if (error) 119 if (error) {
124 fs_warn(sdp, "freeze %d error %d", n, error); 120 fs_warn(sdp, "freeze %d error %d", n, error);
121 return error;
122 }
125 123
126 return ret; 124 return len;
127} 125}
128 126
129static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) 127static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
@@ -332,6 +330,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
332 return ret; 330 return ret;
333} 331}
334 332
333static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
334{
335 int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
336
337 return sprintf(buf, "%d\n", val);
338}
339
340static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
341{
342 ssize_t ret = len;
343 int val;
344
345 val = simple_strtol(buf, NULL, 0);
346
347 if ((val == 1) &&
348 !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
349 complete(&sdp->sd_wdack);
350 else
351 ret = -EINVAL;
352 return ret;
353}
354
335static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 355static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
336{ 356{
337 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 357 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -463,7 +483,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
463 483
464GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 484GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
465GDLM_ATTR(block, 0644, block_show, block_store); 485GDLM_ATTR(block, 0644, block_show, block_store);
466GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 486GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
467GDLM_ATTR(jid, 0644, jid_show, jid_store); 487GDLM_ATTR(jid, 0644, jid_show, jid_store);
468GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); 488GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
469GDLM_ATTR(first_done, 0444, first_done_show, NULL); 489GDLM_ATTR(first_done, 0444, first_done_show, NULL);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 413627072f36..88162fae27a5 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -18,6 +18,7 @@
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "glock.h" 20#include "glock.h"
21#include "inode.h"
21#include "log.h" 22#include "log.h"
22#include "lops.h" 23#include "lops.h"
23#include "meta_io.h" 24#include "meta_io.h"
@@ -142,44 +143,143 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
142 sb_end_intwrite(sdp->sd_vfs); 143 sb_end_intwrite(sdp->sd_vfs);
143} 144}
144 145
146static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
147 struct buffer_head *bh,
148 const struct gfs2_log_operations *lops)
149{
150 struct gfs2_bufdata *bd;
151
152 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
153 bd->bd_bh = bh;
154 bd->bd_gl = gl;
155 bd->bd_ops = lops;
156 INIT_LIST_HEAD(&bd->bd_list);
157 bh->b_private = bd;
158 return bd;
159}
160
145/** 161/**
146 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction 162 * gfs2_trans_add_data - Add a databuf to the transaction.
147 * @gl: the glock the buffer belongs to 163 * @gl: The inode glock associated with the buffer
148 * @bh: The buffer to add 164 * @bh: The buffer to add
149 * @meta: True in the case of adding metadata
150 * 165 *
166 * This is used in two distinct cases:
167 * i) In ordered write mode
168 * We put the data buffer on a list so that we can ensure that its
169 * synced to disk at the right time
170 * ii) In journaled data mode
171 * We need to journal the data block in the same way as metadata in
172 * the functions above. The difference is that here we have a tag
173 * which is two __be64's being the block number (as per meta data)
174 * and a flag which says whether the data block needs escaping or
175 * not. This means we need a new log entry for each 251 or so data
176 * blocks, which isn't an enormous overhead but twice as much as
177 * for normal metadata blocks.
151 */ 178 */
179void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
180{
181 struct gfs2_trans *tr = current->journal_info;
182 struct gfs2_sbd *sdp = gl->gl_sbd;
183 struct address_space *mapping = bh->b_page->mapping;
184 struct gfs2_inode *ip = GFS2_I(mapping->host);
185 struct gfs2_bufdata *bd;
152 186
153void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) 187 if (!gfs2_is_jdata(ip)) {
188 gfs2_ordered_add_inode(ip);
189 return;
190 }
191
192 lock_buffer(bh);
193 gfs2_log_lock(sdp);
194 bd = bh->b_private;
195 if (bd == NULL) {
196 gfs2_log_unlock(sdp);
197 unlock_buffer(bh);
198 if (bh->b_private == NULL)
199 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
200 lock_buffer(bh);
201 gfs2_log_lock(sdp);
202 }
203 gfs2_assert(sdp, bd->bd_gl == gl);
204 tr->tr_touched = 1;
205 if (list_empty(&bd->bd_list)) {
206 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
207 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
208 gfs2_pin(sdp, bd->bd_bh);
209 tr->tr_num_databuf_new++;
210 sdp->sd_log_num_databuf++;
211 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
212 }
213 gfs2_log_unlock(sdp);
214 unlock_buffer(bh);
215}
216
217static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
154{ 218{
219 struct gfs2_meta_header *mh;
220 struct gfs2_trans *tr;
221
222 tr = current->journal_info;
223 tr->tr_touched = 1;
224 if (!list_empty(&bd->bd_list))
225 return;
226 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
227 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
228 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
229 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
230 printk(KERN_ERR
231 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
232 (unsigned long long)bd->bd_bh->b_blocknr);
233 BUG();
234 }
235 gfs2_pin(sdp, bd->bd_bh);
236 mh->__pad0 = cpu_to_be64(0);
237 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
238 sdp->sd_log_num_buf++;
239 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
240 tr->tr_num_buf_new++;
241}
242
243void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
244{
245
155 struct gfs2_sbd *sdp = gl->gl_sbd; 246 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 247 struct gfs2_bufdata *bd;
157 248
158 lock_buffer(bh); 249 lock_buffer(bh);
159 gfs2_log_lock(sdp); 250 gfs2_log_lock(sdp);
160 bd = bh->b_private; 251 bd = bh->b_private;
161 if (bd) 252 if (bd == NULL) {
162 gfs2_assert(sdp, bd->bd_gl == gl);
163 else {
164 gfs2_log_unlock(sdp); 253 gfs2_log_unlock(sdp);
165 unlock_buffer(bh); 254 unlock_buffer(bh);
166 gfs2_attach_bufdata(gl, bh, meta); 255 lock_page(bh->b_page);
167 bd = bh->b_private; 256 if (bh->b_private == NULL)
257 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
258 unlock_page(bh->b_page);
168 lock_buffer(bh); 259 lock_buffer(bh);
169 gfs2_log_lock(sdp); 260 gfs2_log_lock(sdp);
170 } 261 }
171 lops_add(sdp, bd); 262 gfs2_assert(sdp, bd->bd_gl == gl);
263 meta_lo_add(sdp, bd);
172 gfs2_log_unlock(sdp); 264 gfs2_log_unlock(sdp);
173 unlock_buffer(bh); 265 unlock_buffer(bh);
174} 266}
175 267
176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 268void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
177{ 269{
270 struct gfs2_glock *gl = bd->bd_gl;
271 struct gfs2_trans *tr = current->journal_info;
272
178 BUG_ON(!list_empty(&bd->bd_list)); 273 BUG_ON(!list_empty(&bd->bd_list));
179 BUG_ON(!list_empty(&bd->bd_ail_st_list)); 274 BUG_ON(!list_empty(&bd->bd_ail_st_list));
180 BUG_ON(!list_empty(&bd->bd_ail_gl_list)); 275 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
181 lops_init_le(bd, &gfs2_revoke_lops); 276 bd->bd_ops = &gfs2_revoke_lops;
182 lops_add(sdp, bd); 277 tr->tr_touched = 1;
278 tr->tr_num_revoke++;
279 sdp->sd_log_num_revoke++;
280 atomic_inc(&gl->gl_revokes);
281 set_bit(GLF_LFLUSH, &gl->gl_flags);
282 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
183} 283}
184 284
185void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) 285void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index bf2ae9aeee7a..1e6e7da25a17 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -39,7 +39,8 @@ extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
39 unsigned int revokes); 39 unsigned int revokes);
40 40
41extern void gfs2_trans_end(struct gfs2_sbd *sdp); 41extern void gfs2_trans_end(struct gfs2_sbd *sdp);
42extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 42extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); 44extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
44extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); 45extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
45 46
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f00d7c5744f6..6402fb69d71b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -54,6 +54,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
54 54
55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
56 56
57 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
58 wait_for_completion(&sdp->sd_wdack);
59
57 if (lm->lm_unmount) { 60 if (lm->lm_unmount) {
58 fs_err(sdp, "telling LM to unmount\n"); 61 fs_err(sdp, "telling LM to unmount\n");
59 lm->lm_unmount(sdp); 62 lm->lm_unmount(sdp);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 76c144b3c9bb..cbb46c2baa69 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -270,7 +270,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
270 if (error) 270 if (error)
271 goto out_gunlock; 271 goto out_gunlock;
272 272
273 gfs2_trans_add_bh(ip->i_gl, bh, 1); 273 gfs2_trans_add_meta(ip->i_gl, bh);
274 274
275 dataptrs = GFS2_EA2DATAPTRS(ea); 275 dataptrs = GFS2_EA2DATAPTRS(ea);
276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { 276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
@@ -309,7 +309,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
309 error = gfs2_meta_inode_buffer(ip, &dibh); 309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) { 310 if (!error) {
311 ip->i_inode.i_ctime = CURRENT_TIME; 311 ip->i_inode.i_ctime = CURRENT_TIME;
312 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 312 gfs2_trans_add_meta(ip->i_gl, dibh);
313 gfs2_dinode_out(ip, dibh->b_data); 313 gfs2_dinode_out(ip, dibh->b_data);
314 brelse(dibh); 314 brelse(dibh);
315 } 315 }
@@ -509,7 +509,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
509 } 509 }
510 510
511 if (din) { 511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1); 512 gfs2_trans_add_meta(ip->i_gl, bh[x]);
513 memcpy(pos, din, cp_size); 513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize; 514 din += sdp->sd_jbsize;
515 } 515 }
@@ -629,7 +629,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
629 return error; 629 return error;
630 gfs2_trans_add_unrevoke(sdp, block, 1); 630 gfs2_trans_add_unrevoke(sdp, block, 1);
631 *bhp = gfs2_meta_new(ip->i_gl, block); 631 *bhp = gfs2_meta_new(ip->i_gl, block);
632 gfs2_trans_add_bh(ip->i_gl, *bhp, 1); 632 gfs2_trans_add_meta(ip->i_gl, *bhp);
633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); 633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); 634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
635 635
@@ -691,7 +691,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
691 return error; 691 return error;
692 gfs2_trans_add_unrevoke(sdp, block, 1); 692 gfs2_trans_add_unrevoke(sdp, block, 1);
693 bh = gfs2_meta_new(ip->i_gl, block); 693 bh = gfs2_meta_new(ip->i_gl, block);
694 gfs2_trans_add_bh(ip->i_gl, bh, 1); 694 gfs2_trans_add_meta(ip->i_gl, bh);
695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); 695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
696 696
697 gfs2_add_inode_blocks(&ip->i_inode, 1); 697 gfs2_add_inode_blocks(&ip->i_inode, 1);
@@ -751,7 +751,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
751 error = gfs2_meta_inode_buffer(ip, &dibh); 751 error = gfs2_meta_inode_buffer(ip, &dibh);
752 if (!error) { 752 if (!error) {
753 ip->i_inode.i_ctime = CURRENT_TIME; 753 ip->i_inode.i_ctime = CURRENT_TIME;
754 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 754 gfs2_trans_add_meta(ip->i_gl, dibh);
755 gfs2_dinode_out(ip, dibh->b_data); 755 gfs2_dinode_out(ip, dibh->b_data);
756 brelse(dibh); 756 brelse(dibh);
757 } 757 }
@@ -834,7 +834,7 @@ static void ea_set_remove_stuffed(struct gfs2_inode *ip,
834 struct gfs2_ea_header *prev = el->el_prev; 834 struct gfs2_ea_header *prev = el->el_prev;
835 u32 len; 835 u32 len;
836 836
837 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 837 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
838 838
839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) { 839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
840 ea->ea_type = GFS2_EATYPE_UNUSED; 840 ea->ea_type = GFS2_EATYPE_UNUSED;
@@ -872,7 +872,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
872 if (error) 872 if (error)
873 return error; 873 return error;
874 874
875 gfs2_trans_add_bh(ip->i_gl, bh, 1); 875 gfs2_trans_add_meta(ip->i_gl, bh);
876 876
877 if (es->ea_split) 877 if (es->ea_split)
878 ea = ea_split_ea(ea); 878 ea = ea_split_ea(ea);
@@ -886,7 +886,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
886 if (error) 886 if (error)
887 goto out; 887 goto out;
888 ip->i_inode.i_ctime = CURRENT_TIME; 888 ip->i_inode.i_ctime = CURRENT_TIME;
889 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 889 gfs2_trans_add_meta(ip->i_gl, dibh);
890 gfs2_dinode_out(ip, dibh->b_data); 890 gfs2_dinode_out(ip, dibh->b_data);
891 brelse(dibh); 891 brelse(dibh);
892out: 892out:
@@ -901,7 +901,7 @@ static int ea_set_simple_alloc(struct gfs2_inode *ip,
901 struct gfs2_ea_header *ea = es->es_ea; 901 struct gfs2_ea_header *ea = es->es_ea;
902 int error; 902 int error;
903 903
904 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); 904 gfs2_trans_add_meta(ip->i_gl, es->es_bh);
905 905
906 if (es->ea_split) 906 if (es->ea_split)
907 ea = ea_split_ea(ea); 907 ea = ea_split_ea(ea);
@@ -997,7 +997,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
997 goto out; 997 goto out;
998 } 998 }
999 999
1000 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1000 gfs2_trans_add_meta(ip->i_gl, indbh);
1001 } else { 1001 } else {
1002 u64 blk; 1002 u64 blk;
1003 unsigned int n = 1; 1003 unsigned int n = 1;
@@ -1006,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1006 return error; 1006 return error;
1007 gfs2_trans_add_unrevoke(sdp, blk, 1); 1007 gfs2_trans_add_unrevoke(sdp, blk, 1);
1008 indbh = gfs2_meta_new(ip->i_gl, blk); 1008 indbh = gfs2_meta_new(ip->i_gl, blk);
1009 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1009 gfs2_trans_add_meta(ip->i_gl, indbh);
1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1011 gfs2_buffer_clear_tail(indbh, mh_size); 1011 gfs2_buffer_clear_tail(indbh, mh_size);
1012 1012
@@ -1092,7 +1092,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1092 if (error) 1092 if (error)
1093 return error; 1093 return error;
1094 1094
1095 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1095 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
1096 1096
1097 if (prev) { 1097 if (prev) {
1098 u32 len; 1098 u32 len;
@@ -1109,7 +1109,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1109 error = gfs2_meta_inode_buffer(ip, &dibh); 1109 error = gfs2_meta_inode_buffer(ip, &dibh);
1110 if (!error) { 1110 if (!error) {
1111 ip->i_inode.i_ctime = CURRENT_TIME; 1111 ip->i_inode.i_ctime = CURRENT_TIME;
1112 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1112 gfs2_trans_add_meta(ip->i_gl, dibh);
1113 gfs2_dinode_out(ip, dibh->b_data); 1113 gfs2_dinode_out(ip, dibh->b_data);
1114 brelse(dibh); 1114 brelse(dibh);
1115 } 1115 }
@@ -1265,7 +1265,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0); 1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1267 if (error == 0) { 1267 if (error == 0) {
1268 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1268 gfs2_trans_add_meta(ip->i_gl, el.el_bh);
1269 memcpy(GFS2_EA2DATA(el.el_ea), data, 1269 memcpy(GFS2_EA2DATA(el.el_ea), data,
1270 GFS2_EA_DATA_LEN(el.el_ea)); 1270 GFS2_EA_DATA_LEN(el.el_ea));
1271 } 1271 }
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352 if (error) 1352 if (error)
1353 goto out_gunlock; 1353 goto out_gunlock;
1354 1354
1355 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1355 gfs2_trans_add_meta(ip->i_gl, indbh);
1356 1356
1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); 1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1358 bstart = 0; 1358 bstart = 0;
@@ -1384,7 +1384,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1384 1384
1385 error = gfs2_meta_inode_buffer(ip, &dibh); 1385 error = gfs2_meta_inode_buffer(ip, &dibh);
1386 if (!error) { 1386 if (!error) {
1387 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1387 gfs2_trans_add_meta(ip->i_gl, dibh);
1388 gfs2_dinode_out(ip, dibh->b_data); 1388 gfs2_dinode_out(ip, dibh->b_data);
1389 brelse(dibh); 1389 brelse(dibh);
1390 } 1390 }
@@ -1434,7 +1434,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1434 1434
1435 error = gfs2_meta_inode_buffer(ip, &dibh); 1435 error = gfs2_meta_inode_buffer(ip, &dibh);
1436 if (!error) { 1436 if (!error) {
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1437 gfs2_trans_add_meta(ip->i_gl, dibh);
1438 gfs2_dinode_out(ip, dibh->b_data); 1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh); 1439 brelse(dibh);
1440 } 1440 }
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index b77c5bc20f8a..998e3a6decf3 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -1,6 +1,6 @@
1config HFS_FS 1config HFS_FS
2 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 2 tristate "Apple Macintosh file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 If you say Y here, you will be able to mount Macintosh-formatted 6 If you say Y here, you will be able to mount Macintosh-formatted
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 5dc06c837105..9edeeb0ea97e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -147,7 +147,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) { 147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
148 Some unknown structures like ACL may be in fnode, 148 Some unknown structures like ACL may be in fnode,
149 we'd better not overwrite them 149 we'd better not overwrite them
150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 structures", i->i_ino);
151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { 151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
152 __le32 ea; 152 __le32 ea;
153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { 153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index 6ae169cd8faa..d8bb6c411e96 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -50,8 +50,8 @@ config JFFS2_FS_WBUF_VERIFY
50 write-buffer, and check for errors. 50 write-buffer, and check for errors.
51 51
52config JFFS2_SUMMARY 52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)" 53 bool "JFFS2 summary support"
54 depends on JFFS2_FS && EXPERIMENTAL 54 depends on JFFS2_FS
55 default n 55 default n
56 help 56 help
57 This feature makes it possible to use summary information 57 This feature makes it possible to use summary information
@@ -63,8 +63,8 @@ config JFFS2_SUMMARY
63 If unsure, say 'N'. 63 If unsure, say 'N'.
64 64
65config JFFS2_FS_XATTR 65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)" 66 bool "JFFS2 XATTR support"
67 depends on JFFS2_FS && EXPERIMENTAL 67 depends on JFFS2_FS
68 default n 68 default n
69 help 69 help
70 Extended attributes are name:value pairs associated with inodes by 70 Extended attributes are name:value pairs associated with inodes by
@@ -173,7 +173,7 @@ config JFFS2_CMODE_PRIORITY
173 successful one. 173 successful one.
174 174
175config JFFS2_CMODE_SIZE 175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)" 176 bool "size"
177 help 177 help
178 Tries all compressors and chooses the one which has the smallest 178 Tries all compressors and chooses the one which has the smallest
179 result. 179 result.
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1a543be09c79..060ba638becb 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -154,7 +154,7 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
154 /* 154 /*
155 * If we really return the number of allocated & free inodes, some 155 * If we really return the number of allocated & free inodes, some
156 * applications will fail because they won't see enough free inodes. 156 * applications will fail because they won't see enough free inodes.
157 * We'll try to calculate some guess as to how may inodes we can 157 * We'll try to calculate some guess as to how many inodes we can
158 * really allocate 158 * really allocate
159 * 159 *
160 * buf->f_files = atomic_read(&imap->im_numinos); 160 * buf->f_files = atomic_read(&imap->im_numinos);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 54f9e6ce0430..52e5120bb159 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -550,6 +550,9 @@ again:
550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
551 if (status < 0) 551 if (status < 0)
552 break; 552 break;
553 /* Resend the blocking lock request after a server reboot */
554 if (resp->status == nlm_lck_denied_grace_period)
555 continue;
553 if (resp->status != nlm_lck_blocked) 556 if (resp->status != nlm_lck_blocked)
554 break; 557 break;
555 } 558 }
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
index daf9a9b32dd3..09ed066c0221 100644
--- a/fs/logfs/Kconfig
+++ b/fs/logfs/Kconfig
@@ -1,6 +1,6 @@
1config LOGFS 1config LOGFS
2 tristate "LogFS file system (EXPERIMENTAL)" 2 tristate "LogFS file system"
3 depends on (MTD || BLOCK) && EXPERIMENTAL 3 depends on (MTD || BLOCK)
4 select ZLIB_INFLATE 4 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
6 select CRC32 6 select CRC32
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f46..434b93ec0970 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1273,6 +1273,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
1273static struct pnfs_layoutdriver_type blocklayout_type = { 1273static struct pnfs_layoutdriver_type blocklayout_type = {
1274 .id = LAYOUT_BLOCK_VOLUME, 1274 .id = LAYOUT_BLOCK_VOLUME,
1275 .name = "LAYOUT_BLOCK_VOLUME", 1275 .name = "LAYOUT_BLOCK_VOLUME",
1276 .owner = THIS_MODULE,
1276 .read_pagelist = bl_read_pagelist, 1277 .read_pagelist = bl_read_pagelist,
1277 .write_pagelist = bl_write_pagelist, 1278 .write_pagelist = bl_write_pagelist,
1278 .alloc_layout_hdr = bl_alloc_layout_hdr, 1279 .alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 264d1aa935f2..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183static u32 initiate_bulk_draining(struct nfs_client *clp, 183static u32 initiate_bulk_draining(struct nfs_client *clp,
184 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
185{ 185{
186 struct nfs_server *server; 186 int stat;
187 struct pnfs_layout_hdr *lo;
188 struct inode *ino;
189 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190 struct pnfs_layout_hdr *tmp;
191 LIST_HEAD(recall_list);
192 LIST_HEAD(free_me_list);
193 struct pnfs_layout_range range = {
194 .iomode = IOMODE_ANY,
195 .offset = 0,
196 .length = NFS4_MAX_UINT64,
197 };
198
199 spin_lock(&clp->cl_lock);
200 rcu_read_lock();
201 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202 if ((args->cbl_recall_type == RETURN_FSID) &&
203 memcmp(&server->fsid, &args->cbl_fsid,
204 sizeof(struct nfs_fsid)))
205 continue;
206 187
207 list_for_each_entry(lo, &server->layouts, plh_layouts) { 188 if (args->cbl_recall_type == RETURN_FSID)
208 ino = igrab(lo->plh_inode); 189 stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
209 if (!ino) 190 else
210 continue; 191 stat = pnfs_destroy_layouts_byclid(clp, true);
211 spin_lock(&ino->i_lock); 192 if (stat != 0)
212 /* Is this layout in the process of being freed? */ 193 return NFS4ERR_DELAY;
213 if (NFS_I(ino)->layout != lo) { 194 return NFS4ERR_NOMATCHING_LAYOUT;
214 spin_unlock(&ino->i_lock);
215 iput(ino);
216 continue;
217 }
218 pnfs_get_layout_hdr(lo);
219 spin_unlock(&ino->i_lock);
220 list_add(&lo->plh_bulk_recall, &recall_list);
221 }
222 }
223 rcu_read_unlock();
224 spin_unlock(&clp->cl_lock);
225
226 list_for_each_entry_safe(lo, tmp,
227 &recall_list, plh_bulk_recall) {
228 ino = lo->plh_inode;
229 spin_lock(&ino->i_lock);
230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
232 rv = NFS4ERR_DELAY;
233 list_del_init(&lo->plh_bulk_recall);
234 spin_unlock(&ino->i_lock);
235 pnfs_free_lseg_list(&free_me_list);
236 pnfs_put_layout_hdr(lo);
237 iput(ino);
238 }
239 return rv;
240} 195}
241 196
242static u32 do_callback_layoutrecall(struct nfs_client *clp, 197static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf38..6390a4b5fee7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -55,7 +55,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
55 flags &= FMODE_READ|FMODE_WRITE; 55 flags &= FMODE_READ|FMODE_WRITE;
56 rcu_read_lock(); 56 rcu_read_lock();
57 delegation = rcu_dereference(NFS_I(inode)->delegation); 57 delegation = rcu_dereference(NFS_I(inode)->delegation);
58 if (delegation != NULL && (delegation->type & flags) == flags) { 58 if (delegation != NULL && (delegation->type & flags) == flags &&
59 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
59 nfs_mark_delegation_referenced(delegation); 60 nfs_mark_delegation_referenced(delegation);
60 ret = 1; 61 ret = 1;
61 } 62 }
@@ -70,8 +71,10 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
70 int status = 0; 71 int status = 0;
71 72
72 if (inode->i_flock == NULL) 73 if (inode->i_flock == NULL)
73 goto out; 74 return 0;
74 75
76 if (inode->i_flock == NULL)
77 goto out;
75 /* Protect inode->i_flock using the file locks lock */ 78 /* Protect inode->i_flock using the file locks lock */
76 lock_flocks(); 79 lock_flocks();
77 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 80 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -94,7 +97,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
94{ 97{
95 struct nfs_inode *nfsi = NFS_I(inode); 98 struct nfs_inode *nfsi = NFS_I(inode);
96 struct nfs_open_context *ctx; 99 struct nfs_open_context *ctx;
100 struct nfs4_state_owner *sp;
97 struct nfs4_state *state; 101 struct nfs4_state *state;
102 unsigned int seq;
98 int err; 103 int err;
99 104
100again: 105again:
@@ -109,9 +114,16 @@ again:
109 continue; 114 continue;
110 get_nfs_open_context(ctx); 115 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 116 spin_unlock(&inode->i_lock);
117 sp = state->owner;
118 /* Block nfs4_proc_unlck */
119 mutex_lock(&sp->so_delegreturn_mutex);
120 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
112 err = nfs4_open_delegation_recall(ctx, state, stateid); 121 err = nfs4_open_delegation_recall(ctx, state, stateid);
113 if (err >= 0) 122 if (!err)
114 err = nfs_delegation_claim_locks(ctx, state); 123 err = nfs_delegation_claim_locks(ctx, state);
124 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
125 err = -EAGAIN;
126 mutex_unlock(&sp->so_delegreturn_mutex);
115 put_nfs_open_context(ctx); 127 put_nfs_open_context(ctx);
116 if (err != 0) 128 if (err != 0)
117 return err; 129 return err;
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
182} 194}
183 195
184static struct nfs_delegation * 196static struct nfs_delegation *
197nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
198{
199 struct nfs_delegation *ret = NULL;
200 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
201
202 if (delegation == NULL)
203 goto out;
204 spin_lock(&delegation->lock);
205 if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
206 ret = delegation;
207 spin_unlock(&delegation->lock);
208out:
209 return ret;
210}
211
212static struct nfs_delegation *
213nfs_start_delegation_return(struct nfs_inode *nfsi)
214{
215 struct nfs_delegation *delegation;
216
217 rcu_read_lock();
218 delegation = nfs_start_delegation_return_locked(nfsi);
219 rcu_read_unlock();
220 return delegation;
221}
222
223static void
224nfs_abort_delegation_return(struct nfs_delegation *delegation,
225 struct nfs_client *clp)
226{
227
228 spin_lock(&delegation->lock);
229 clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
230 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
231 spin_unlock(&delegation->lock);
232 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
233}
234
235static struct nfs_delegation *
185nfs_detach_delegation_locked(struct nfs_inode *nfsi, 236nfs_detach_delegation_locked(struct nfs_inode *nfsi,
186 struct nfs_server *server) 237 struct nfs_delegation *delegation,
238 struct nfs_client *clp)
187{ 239{
188 struct nfs_delegation *delegation = 240 struct nfs_delegation *deleg_cur =
189 rcu_dereference_protected(nfsi->delegation, 241 rcu_dereference_protected(nfsi->delegation,
190 lockdep_is_held(&server->nfs_client->cl_lock)); 242 lockdep_is_held(&clp->cl_lock));
191 243
192 if (delegation == NULL) 244 if (deleg_cur == NULL || delegation != deleg_cur)
193 goto nomatch; 245 return NULL;
194 246
195 spin_lock(&delegation->lock); 247 spin_lock(&delegation->lock);
248 set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
196 list_del_rcu(&delegation->super_list); 249 list_del_rcu(&delegation->super_list);
197 delegation->inode = NULL; 250 delegation->inode = NULL;
198 nfsi->delegation_state = 0; 251 nfsi->delegation_state = 0;
199 rcu_assign_pointer(nfsi->delegation, NULL); 252 rcu_assign_pointer(nfsi->delegation, NULL);
200 spin_unlock(&delegation->lock); 253 spin_unlock(&delegation->lock);
201 return delegation; 254 return delegation;
202nomatch:
203 return NULL;
204} 255}
205 256
206static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, 257static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
207 struct nfs_server *server) 258 struct nfs_delegation *delegation,
259 struct nfs_server *server)
208{ 260{
209 struct nfs_client *clp = server->nfs_client; 261 struct nfs_client *clp = server->nfs_client;
210 struct nfs_delegation *delegation;
211 262
212 spin_lock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
213 delegation = nfs_detach_delegation_locked(nfsi, server); 264 delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
214 spin_unlock(&clp->cl_lock); 265 spin_unlock(&clp->cl_lock);
215 return delegation; 266 return delegation;
216} 267}
217 268
269static struct nfs_delegation *
270nfs_inode_detach_delegation(struct inode *inode)
271{
272 struct nfs_inode *nfsi = NFS_I(inode);
273 struct nfs_server *server = NFS_SERVER(inode);
274 struct nfs_delegation *delegation;
275
276 delegation = nfs_start_delegation_return(nfsi);
277 if (delegation == NULL)
278 return NULL;
279 return nfs_detach_delegation(nfsi, delegation, server);
280}
281
218/** 282/**
219 * nfs_inode_set_delegation - set up a delegation on an inode 283 * nfs_inode_set_delegation - set up a delegation on an inode
220 * @inode: inode to which delegation applies 284 * @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
268 delegation = NULL; 332 delegation = NULL;
269 goto out; 333 goto out;
270 } 334 }
271 freeme = nfs_detach_delegation_locked(nfsi, server); 335 freeme = nfs_detach_delegation_locked(nfsi,
336 old_delegation, clp);
337 if (freeme == NULL)
338 goto out;
272 } 339 }
273 list_add_rcu(&delegation->super_list, &server->delegations); 340 list_add_rcu(&delegation->super_list, &server->delegations);
274 nfsi->delegation_state = delegation->type; 341 nfsi->delegation_state = delegation->type;
@@ -292,19 +359,29 @@ out:
292/* 359/*
293 * Basic procedure for returning a delegation to the server 360 * Basic procedure for returning a delegation to the server
294 */ 361 */
295static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 362static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
296{ 363{
364 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
297 struct nfs_inode *nfsi = NFS_I(inode); 365 struct nfs_inode *nfsi = NFS_I(inode);
298 int err; 366 int err;
299 367
300 /* 368 if (delegation == NULL)
301 * Guard against new delegated open/lock/unlock calls and against 369 return 0;
302 * state recovery 370 do {
303 */ 371 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
304 down_write(&nfsi->rwsem); 372 if (!issync || err != -EAGAIN)
305 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 373 break;
306 up_write(&nfsi->rwsem); 374 /*
307 if (err) 375 * Guard against state recovery
376 */
377 err = nfs4_wait_clnt_recover(clp);
378 } while (err == 0);
379
380 if (err) {
381 nfs_abort_delegation_return(delegation, clp);
382 goto out;
383 }
384 if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
308 goto out; 385 goto out;
309 386
310 err = nfs_do_return_delegation(inode, delegation, issync); 387 err = nfs_do_return_delegation(inode, delegation, issync);
@@ -340,13 +417,10 @@ restart:
340 inode = nfs_delegation_grab_inode(delegation); 417 inode = nfs_delegation_grab_inode(delegation);
341 if (inode == NULL) 418 if (inode == NULL)
342 continue; 419 continue;
343 delegation = nfs_detach_delegation(NFS_I(inode), 420 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
344 server);
345 rcu_read_unlock(); 421 rcu_read_unlock();
346 422
347 if (delegation != NULL) 423 err = nfs_end_delegation_return(inode, delegation, 0);
348 err = __nfs_inode_return_delegation(inode,
349 delegation, 0);
350 iput(inode); 424 iput(inode);
351 if (!err) 425 if (!err)
352 goto restart; 426 goto restart;
@@ -367,15 +441,11 @@ restart:
367 */ 441 */
368void nfs_inode_return_delegation_noreclaim(struct inode *inode) 442void nfs_inode_return_delegation_noreclaim(struct inode *inode)
369{ 443{
370 struct nfs_server *server = NFS_SERVER(inode);
371 struct nfs_inode *nfsi = NFS_I(inode);
372 struct nfs_delegation *delegation; 444 struct nfs_delegation *delegation;
373 445
374 if (rcu_access_pointer(nfsi->delegation) != NULL) { 446 delegation = nfs_inode_detach_delegation(inode);
375 delegation = nfs_detach_delegation(nfsi, server); 447 if (delegation != NULL)
376 if (delegation != NULL) 448 nfs_do_return_delegation(inode, delegation, 0);
377 nfs_do_return_delegation(inode, delegation, 0);
378 }
379} 449}
380 450
381/** 451/**
@@ -390,18 +460,14 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
390 */ 460 */
391int nfs4_inode_return_delegation(struct inode *inode) 461int nfs4_inode_return_delegation(struct inode *inode)
392{ 462{
393 struct nfs_server *server = NFS_SERVER(inode);
394 struct nfs_inode *nfsi = NFS_I(inode); 463 struct nfs_inode *nfsi = NFS_I(inode);
395 struct nfs_delegation *delegation; 464 struct nfs_delegation *delegation;
396 int err = 0; 465 int err = 0;
397 466
398 nfs_wb_all(inode); 467 nfs_wb_all(inode);
399 if (rcu_access_pointer(nfsi->delegation) != NULL) { 468 delegation = nfs_start_delegation_return(nfsi);
400 delegation = nfs_detach_delegation(nfsi, server); 469 if (delegation != NULL)
401 if (delegation != NULL) { 470 err = nfs_end_delegation_return(inode, delegation, 1);
402 err = __nfs_inode_return_delegation(inode, delegation, 1);
403 }
404 }
405 return err; 471 return err;
406} 472}
407 473
@@ -471,7 +537,7 @@ void nfs_remove_bad_delegation(struct inode *inode)
471{ 537{
472 struct nfs_delegation *delegation; 538 struct nfs_delegation *delegation;
473 539
474 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); 540 delegation = nfs_inode_detach_delegation(inode);
475 if (delegation) { 541 if (delegation) {
476 nfs_inode_find_state_and_recover(inode, &delegation->stateid); 542 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
477 nfs_free_delegation(delegation); 543 nfs_free_delegation(delegation);
@@ -649,7 +715,7 @@ restart:
649 if (inode == NULL) 715 if (inode == NULL)
650 continue; 716 continue;
651 delegation = nfs_detach_delegation(NFS_I(inode), 717 delegation = nfs_detach_delegation(NFS_I(inode),
652 server); 718 delegation, server);
653 rcu_read_unlock(); 719 rcu_read_unlock();
654 720
655 if (delegation != NULL) 721 if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d8..d54d4fca6793 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0, 29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN, 30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED, 31 NFS_DELEGATION_REFERENCED,
32 NFS_DELEGATION_RETURNING,
32}; 33};
33 34
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c36644..44efaa8c5f78 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -126,8 +126,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
126 } 126 }
127 spin_unlock(&ret->d_lock); 127 spin_unlock(&ret->d_lock);
128out: 128out:
129 if (name) 129 kfree(name);
130 kfree(name);
131 nfs_free_fattr(fsinfo.fattr); 130 nfs_free_fattr(fsinfo.fattr);
132 return ret; 131 return ret;
133} 132}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0b..6acc73c80d7f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -694,10 +694,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
694 if (ctx->cred != NULL) 694 if (ctx->cred != NULL)
695 put_rpccred(ctx->cred); 695 put_rpccred(ctx->cred);
696 dput(ctx->dentry); 696 dput(ctx->dentry);
697 if (is_sync) 697 nfs_sb_deactive(sb);
698 nfs_sb_deactive(sb);
699 else
700 nfs_sb_deactive_async(sb);
701 kfree(ctx->mdsthreshold); 698 kfree(ctx->mdsthreshold);
702 kfree(ctx); 699 kfree(ctx);
703} 700}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a07..541c9ebdbc5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -329,7 +329,6 @@ extern int __init register_nfs_fs(void);
329extern void __exit unregister_nfs_fs(void); 329extern void __exit unregister_nfs_fs(void);
330extern void nfs_sb_active(struct super_block *sb); 330extern void nfs_sb_active(struct super_block *sb);
331extern void nfs_sb_deactive(struct super_block *sb); 331extern void nfs_sb_deactive(struct super_block *sb);
332extern void nfs_sb_deactive_async(struct super_block *sb);
333 332
334/* namespace.c */ 333/* namespace.c */
335#define NFS_PATH_CANONICAL 1 334#define NFS_PATH_CANONICAL 1
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a2..944c9a5c1039 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,6 +13,8 @@
13 13
14#define NFS4_MAX_LOOP_ON_RECOVER (10) 14#define NFS4_MAX_LOOP_ON_RECOVER (10)
15 15
16#include <linux/seqlock.h>
17
16struct idmap; 18struct idmap;
17 19
18enum nfs4_client_state { 20enum nfs4_client_state {
@@ -90,6 +92,8 @@ struct nfs4_state_owner {
90 unsigned long so_flags; 92 unsigned long so_flags;
91 struct list_head so_states; 93 struct list_head so_states;
92 struct nfs_seqid_counter so_seqid; 94 struct nfs_seqid_counter so_seqid;
95 seqcount_t so_reclaim_seqcount;
96 struct mutex so_delegreturn_mutex;
93}; 97};
94 98
95enum { 99enum {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf747ef86650..eae83bf96c6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -896,6 +896,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896 return 0; 896 return 0;
897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
898 return 0; 898 return 0;
899 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
900 return 0;
899 nfs_mark_delegation_referenced(delegation); 901 nfs_mark_delegation_referenced(delegation);
900 return 1; 902 return 1;
901} 903}
@@ -973,6 +975,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
973 975
974 spin_lock(&deleg_cur->lock); 976 spin_lock(&deleg_cur->lock);
975 if (nfsi->delegation != deleg_cur || 977 if (nfsi->delegation != deleg_cur ||
978 test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
976 (deleg_cur->type & fmode) != fmode) 979 (deleg_cur->type & fmode) != fmode)
977 goto no_delegation_unlock; 980 goto no_delegation_unlock;
978 981
@@ -1352,19 +1355,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1352 case -NFS4ERR_BAD_HIGH_SLOT: 1355 case -NFS4ERR_BAD_HIGH_SLOT:
1353 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1356 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1354 case -NFS4ERR_DEADSESSION: 1357 case -NFS4ERR_DEADSESSION:
1358 set_bit(NFS_DELEGATED_STATE, &state->flags);
1355 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 1359 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1360 err = -EAGAIN;
1356 goto out; 1361 goto out;
1357 case -NFS4ERR_STALE_CLIENTID: 1362 case -NFS4ERR_STALE_CLIENTID:
1358 case -NFS4ERR_STALE_STATEID: 1363 case -NFS4ERR_STALE_STATEID:
1364 set_bit(NFS_DELEGATED_STATE, &state->flags);
1359 case -NFS4ERR_EXPIRED: 1365 case -NFS4ERR_EXPIRED:
1360 /* Don't recall a delegation if it was lost */ 1366 /* Don't recall a delegation if it was lost */
1361 nfs4_schedule_lease_recovery(server->nfs_client); 1367 nfs4_schedule_lease_recovery(server->nfs_client);
1368 err = -EAGAIN;
1362 goto out; 1369 goto out;
1363 case -ERESTARTSYS:
1364 /*
1365 * The show must go on: exit, but mark the
1366 * stateid as needing recovery.
1367 */
1368 case -NFS4ERR_DELEG_REVOKED: 1370 case -NFS4ERR_DELEG_REVOKED:
1369 case -NFS4ERR_ADMIN_REVOKED: 1371 case -NFS4ERR_ADMIN_REVOKED:
1370 case -NFS4ERR_BAD_STATEID: 1372 case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1377,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1375 err = 0; 1377 err = 0;
1376 goto out; 1378 goto out;
1377 } 1379 }
1380 set_bit(NFS_DELEGATED_STATE, &state->flags);
1378 err = nfs4_handle_exception(server, err, &exception); 1381 err = nfs4_handle_exception(server, err, &exception);
1379 } while (exception.retry); 1382 } while (exception.retry);
1380out: 1383out:
@@ -1463,7 +1466,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1463 struct nfs4_state_owner *sp = data->owner; 1466 struct nfs4_state_owner *sp = data->owner;
1464 1467
1465 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 1468 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1466 return; 1469 goto out_wait;
1467 /* 1470 /*
1468 * Check if we still need to send an OPEN call, or if we can use 1471 * Check if we still need to send an OPEN call, or if we can use
1469 * a delegation instead. 1472 * a delegation instead.
@@ -1498,6 +1501,7 @@ unlock_no_action:
1498 rcu_read_unlock(); 1501 rcu_read_unlock();
1499out_no_action: 1502out_no_action:
1500 task->tk_action = NULL; 1503 task->tk_action = NULL;
1504out_wait:
1501 nfs4_sequence_done(task, &data->o_res.seq_res); 1505 nfs4_sequence_done(task, &data->o_res.seq_res);
1502} 1506}
1503 1507
@@ -1845,6 +1849,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1845 sattr->ia_valid |= ATTR_MTIME; 1849 sattr->ia_valid |= ATTR_MTIME;
1846} 1850}
1847 1851
1852static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
1853 fmode_t fmode,
1854 int flags,
1855 struct nfs4_state **res)
1856{
1857 struct nfs4_state_owner *sp = opendata->owner;
1858 struct nfs_server *server = sp->so_server;
1859 struct nfs4_state *state;
1860 unsigned int seq;
1861 int ret;
1862
1863 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
1864
1865 ret = _nfs4_proc_open(opendata);
1866 if (ret != 0)
1867 goto out;
1868
1869 state = nfs4_opendata_to_nfs4_state(opendata);
1870 ret = PTR_ERR(state);
1871 if (IS_ERR(state))
1872 goto out;
1873 if (server->caps & NFS_CAP_POSIX_LOCK)
1874 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1875
1876 ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
1877 if (ret != 0)
1878 goto out;
1879
1880 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
1881 nfs4_schedule_stateid_recovery(server, state);
1882 nfs4_wait_clnt_recover(server->nfs_client);
1883 }
1884 *res = state;
1885out:
1886 return ret;
1887}
1888
1848/* 1889/*
1849 * Returns a referenced nfs4_state 1890 * Returns a referenced nfs4_state
1850 */ 1891 */
@@ -1889,18 +1930,7 @@ static int _nfs4_do_open(struct inode *dir,
1889 if (dentry->d_inode != NULL) 1930 if (dentry->d_inode != NULL)
1890 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1931 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1891 1932
1892 status = _nfs4_proc_open(opendata); 1933 status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
1893 if (status != 0)
1894 goto err_opendata_put;
1895
1896 state = nfs4_opendata_to_nfs4_state(opendata);
1897 status = PTR_ERR(state);
1898 if (IS_ERR(state))
1899 goto err_opendata_put;
1900 if (server->caps & NFS_CAP_POSIX_LOCK)
1901 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1902
1903 status = nfs4_opendata_access(cred, opendata, state, fmode, flags);
1904 if (status != 0) 1934 if (status != 0)
1905 goto err_opendata_put; 1935 goto err_opendata_put;
1906 1936
@@ -2088,7 +2118,7 @@ static void nfs4_free_closedata(void *data)
2088 nfs4_put_open_state(calldata->state); 2118 nfs4_put_open_state(calldata->state);
2089 nfs_free_seqid(calldata->arg.seqid); 2119 nfs_free_seqid(calldata->arg.seqid);
2090 nfs4_put_state_owner(sp); 2120 nfs4_put_state_owner(sp);
2091 nfs_sb_deactive_async(sb); 2121 nfs_sb_deactive(sb);
2092 kfree(calldata); 2122 kfree(calldata);
2093} 2123}
2094 2124
@@ -2150,7 +2180,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2150 2180
2151 dprintk("%s: begin!\n", __func__); 2181 dprintk("%s: begin!\n", __func__);
2152 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2182 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
2153 return; 2183 goto out_wait;
2154 2184
2155 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2185 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2156 calldata->arg.fmode = FMODE_READ|FMODE_WRITE; 2186 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2172,16 +2202,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2172 2202
2173 if (!call_close) { 2203 if (!call_close) {
2174 /* Note: exit _without_ calling nfs4_close_done */ 2204 /* Note: exit _without_ calling nfs4_close_done */
2175 task->tk_action = NULL; 2205 goto out_no_action;
2176 nfs4_sequence_done(task, &calldata->res.seq_res);
2177 goto out;
2178 } 2206 }
2179 2207
2180 if (calldata->arg.fmode == 0) { 2208 if (calldata->arg.fmode == 0) {
2181 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2209 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2182 if (calldata->roc && 2210 if (calldata->roc &&
2183 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) 2211 pnfs_roc_drain(inode, &calldata->roc_barrier, task))
2184 goto out; 2212 goto out_wait;
2185 } 2213 }
2186 2214
2187 nfs_fattr_init(calldata->res.fattr); 2215 nfs_fattr_init(calldata->res.fattr);
@@ -2191,8 +2219,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2191 &calldata->res.seq_res, 2219 &calldata->res.seq_res,
2192 task) != 0) 2220 task) != 0)
2193 nfs_release_seqid(calldata->arg.seqid); 2221 nfs_release_seqid(calldata->arg.seqid);
2194out:
2195 dprintk("%s: done!\n", __func__); 2222 dprintk("%s: done!\n", __func__);
2223 return;
2224out_no_action:
2225 task->tk_action = NULL;
2226out_wait:
2227 nfs4_sequence_done(task, &calldata->res.seq_res);
2196} 2228}
2197 2229
2198static const struct rpc_call_ops nfs4_close_ops = { 2230static const struct rpc_call_ops nfs4_close_ops = {
@@ -4423,12 +4455,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4423 struct nfs4_unlockdata *calldata = data; 4455 struct nfs4_unlockdata *calldata = data;
4424 4456
4425 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 4457 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
4426 return; 4458 goto out_wait;
4427 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { 4459 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
4428 /* Note: exit _without_ running nfs4_locku_done */ 4460 /* Note: exit _without_ running nfs4_locku_done */
4429 task->tk_action = NULL; 4461 goto out_no_action;
4430 nfs4_sequence_done(task, &calldata->res.seq_res);
4431 return;
4432 } 4462 }
4433 calldata->timestamp = jiffies; 4463 calldata->timestamp = jiffies;
4434 if (nfs4_setup_sequence(calldata->server, 4464 if (nfs4_setup_sequence(calldata->server,
@@ -4436,6 +4466,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4436 &calldata->res.seq_res, 4466 &calldata->res.seq_res,
4437 task) != 0) 4467 task) != 0)
4438 nfs_release_seqid(calldata->arg.seqid); 4468 nfs_release_seqid(calldata->arg.seqid);
4469 return;
4470out_no_action:
4471 task->tk_action = NULL;
4472out_wait:
4473 nfs4_sequence_done(task, &calldata->res.seq_res);
4439} 4474}
4440 4475
4441static const struct rpc_call_ops nfs4_locku_ops = { 4476static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4482,7 +4517,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4482 4517
4483static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 4518static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
4484{ 4519{
4485 struct nfs_inode *nfsi = NFS_I(state->inode); 4520 struct inode *inode = state->inode;
4521 struct nfs4_state_owner *sp = state->owner;
4522 struct nfs_inode *nfsi = NFS_I(inode);
4486 struct nfs_seqid *seqid; 4523 struct nfs_seqid *seqid;
4487 struct nfs4_lock_state *lsp; 4524 struct nfs4_lock_state *lsp;
4488 struct rpc_task *task; 4525 struct rpc_task *task;
@@ -4492,12 +4529,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
4492 status = nfs4_set_lock_state(state, request); 4529 status = nfs4_set_lock_state(state, request);
4493 /* Unlock _before_ we do the RPC call */ 4530 /* Unlock _before_ we do the RPC call */
4494 request->fl_flags |= FL_EXISTS; 4531 request->fl_flags |= FL_EXISTS;
4532 /* Exclude nfs_delegation_claim_locks() */
4533 mutex_lock(&sp->so_delegreturn_mutex);
4534 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
4495 down_read(&nfsi->rwsem); 4535 down_read(&nfsi->rwsem);
4496 if (do_vfs_lock(request->fl_file, request) == -ENOENT) { 4536 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
4497 up_read(&nfsi->rwsem); 4537 up_read(&nfsi->rwsem);
4538 mutex_unlock(&sp->so_delegreturn_mutex);
4498 goto out; 4539 goto out;
4499 } 4540 }
4500 up_read(&nfsi->rwsem); 4541 up_read(&nfsi->rwsem);
4542 mutex_unlock(&sp->so_delegreturn_mutex);
4501 if (status != 0) 4543 if (status != 0)
4502 goto out; 4544 goto out;
4503 /* Is this a delegated lock? */ 4545 /* Is this a delegated lock? */
@@ -4576,7 +4618,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4576 4618
4577 dprintk("%s: begin!\n", __func__); 4619 dprintk("%s: begin!\n", __func__);
4578 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) 4620 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
4579 return; 4621 goto out_wait;
4580 /* Do we need to do an open_to_lock_owner? */ 4622 /* Do we need to do an open_to_lock_owner? */
4581 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4623 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4582 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { 4624 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4596,6 +4638,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4596 nfs_release_seqid(data->arg.open_seqid); 4638 nfs_release_seqid(data->arg.open_seqid);
4597out_release_lock_seqid: 4639out_release_lock_seqid:
4598 nfs_release_seqid(data->arg.lock_seqid); 4640 nfs_release_seqid(data->arg.lock_seqid);
4641out_wait:
4642 nfs4_sequence_done(task, &data->res.seq_res);
4599 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4643 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
4600} 4644}
4601 4645
@@ -4813,8 +4857,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
4813 4857
4814static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4858static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4815{ 4859{
4860 struct nfs4_state_owner *sp = state->owner;
4816 struct nfs_inode *nfsi = NFS_I(state->inode); 4861 struct nfs_inode *nfsi = NFS_I(state->inode);
4817 unsigned char fl_flags = request->fl_flags; 4862 unsigned char fl_flags = request->fl_flags;
4863 unsigned int seq;
4818 int status = -ENOLCK; 4864 int status = -ENOLCK;
4819 4865
4820 if ((fl_flags & FL_POSIX) && 4866 if ((fl_flags & FL_POSIX) &&
@@ -4836,9 +4882,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4836 status = do_vfs_lock(request->fl_file, request); 4882 status = do_vfs_lock(request->fl_file, request);
4837 goto out_unlock; 4883 goto out_unlock;
4838 } 4884 }
4885 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
4886 up_read(&nfsi->rwsem);
4839 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); 4887 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4840 if (status != 0) 4888 if (status != 0)
4889 goto out;
4890 down_read(&nfsi->rwsem);
4891 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
4892 status = -NFS4ERR_DELAY;
4841 goto out_unlock; 4893 goto out_unlock;
4894 }
4842 /* Note: we always want to sleep here! */ 4895 /* Note: we always want to sleep here! */
4843 request->fl_flags = fl_flags | FL_SLEEP; 4896 request->fl_flags = fl_flags | FL_SLEEP;
4844 if (do_vfs_lock(request->fl_file, request) < 0) 4897 if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4945,24 +4998,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4945 case 0: 4998 case 0:
4946 case -ESTALE: 4999 case -ESTALE:
4947 goto out; 5000 goto out;
4948 case -NFS4ERR_EXPIRED:
4949 nfs4_schedule_stateid_recovery(server, state);
4950 case -NFS4ERR_STALE_CLIENTID: 5001 case -NFS4ERR_STALE_CLIENTID:
4951 case -NFS4ERR_STALE_STATEID: 5002 case -NFS4ERR_STALE_STATEID:
5003 set_bit(NFS_DELEGATED_STATE, &state->flags);
5004 case -NFS4ERR_EXPIRED:
4952 nfs4_schedule_lease_recovery(server->nfs_client); 5005 nfs4_schedule_lease_recovery(server->nfs_client);
5006 err = -EAGAIN;
4953 goto out; 5007 goto out;
4954 case -NFS4ERR_BADSESSION: 5008 case -NFS4ERR_BADSESSION:
4955 case -NFS4ERR_BADSLOT: 5009 case -NFS4ERR_BADSLOT:
4956 case -NFS4ERR_BAD_HIGH_SLOT: 5010 case -NFS4ERR_BAD_HIGH_SLOT:
4957 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 5011 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4958 case -NFS4ERR_DEADSESSION: 5012 case -NFS4ERR_DEADSESSION:
5013 set_bit(NFS_DELEGATED_STATE, &state->flags);
4959 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 5014 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
5015 err = -EAGAIN;
4960 goto out; 5016 goto out;
4961 case -ERESTARTSYS:
4962 /*
4963 * The show must go on: exit, but mark the
4964 * stateid as needing recovery.
4965 */
4966 case -NFS4ERR_DELEG_REVOKED: 5017 case -NFS4ERR_DELEG_REVOKED:
4967 case -NFS4ERR_ADMIN_REVOKED: 5018 case -NFS4ERR_ADMIN_REVOKED:
4968 case -NFS4ERR_BAD_STATEID: 5019 case -NFS4ERR_BAD_STATEID:
@@ -4975,9 +5026,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4975 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 5026 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4976 err = 0; 5027 err = 0;
4977 goto out; 5028 goto out;
4978 case -NFS4ERR_DELAY:
4979 break;
4980 } 5029 }
5030 set_bit(NFS_DELEGATED_STATE, &state->flags);
4981 err = nfs4_handle_exception(server, err, &exception); 5031 err = nfs4_handle_exception(server, err, &exception);
4982 } while (exception.retry); 5032 } while (exception.retry);
4983out: 5033out:
@@ -6134,7 +6184,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6134 status = nfs4_wait_for_completion_rpc_task(task); 6184 status = nfs4_wait_for_completion_rpc_task(task);
6135 if (status == 0) 6185 if (status == 0)
6136 status = task->tk_status; 6186 status = task->tk_status;
6137 if (status == 0) 6187 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
6188 if (status == 0 && lgp->res.layoutp->len)
6138 lseg = pnfs_layout_process(lgp); 6189 lseg = pnfs_layout_process(lgp);
6139 rpc_put_task(task); 6190 rpc_put_task(task);
6140 dprintk("<-- %s status=%d\n", __func__, status); 6191 dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e61f68d5ef21..6ace365c6334 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -518,6 +518,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
518 nfs4_init_seqid_counter(&sp->so_seqid); 518 nfs4_init_seqid_counter(&sp->so_seqid);
519 atomic_set(&sp->so_count, 1); 519 atomic_set(&sp->so_count, 1);
520 INIT_LIST_HEAD(&sp->so_lru); 520 INIT_LIST_HEAD(&sp->so_lru);
521 seqcount_init(&sp->so_reclaim_seqcount);
522 mutex_init(&sp->so_delegreturn_mutex);
521 return sp; 523 return sp;
522} 524}
523 525
@@ -1390,8 +1392,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1390 * recovering after a network partition or a reboot from a 1392 * recovering after a network partition or a reboot from a
1391 * server that doesn't support a grace period. 1393 * server that doesn't support a grace period.
1392 */ 1394 */
1393restart:
1394 spin_lock(&sp->so_lock); 1395 spin_lock(&sp->so_lock);
1396 write_seqcount_begin(&sp->so_reclaim_seqcount);
1397restart:
1395 list_for_each_entry(state, &sp->so_states, open_states) { 1398 list_for_each_entry(state, &sp->so_states, open_states) {
1396 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1399 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1397 continue; 1400 continue;
@@ -1412,6 +1415,7 @@ restart:
1412 } 1415 }
1413 spin_unlock(&state->state_lock); 1416 spin_unlock(&state->state_lock);
1414 nfs4_put_open_state(state); 1417 nfs4_put_open_state(state);
1418 spin_lock(&sp->so_lock);
1415 goto restart; 1419 goto restart;
1416 } 1420 }
1417 } 1421 }
@@ -1449,12 +1453,17 @@ restart:
1449 goto out_err; 1453 goto out_err;
1450 } 1454 }
1451 nfs4_put_open_state(state); 1455 nfs4_put_open_state(state);
1456 spin_lock(&sp->so_lock);
1452 goto restart; 1457 goto restart;
1453 } 1458 }
1459 write_seqcount_end(&sp->so_reclaim_seqcount);
1454 spin_unlock(&sp->so_lock); 1460 spin_unlock(&sp->so_lock);
1455 return 0; 1461 return 0;
1456out_err: 1462out_err:
1457 nfs4_put_open_state(state); 1463 nfs4_put_open_state(state);
1464 spin_lock(&sp->so_lock);
1465 write_seqcount_end(&sp->so_reclaim_seqcount);
1466 spin_unlock(&sp->so_lock);
1458 return status; 1467 return status;
1459} 1468}
1460 1469
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f89..88f9611a945c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -647,6 +647,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
647 .flags = PNFS_LAYOUTRET_ON_SETATTR | 647 .flags = PNFS_LAYOUTRET_ON_SETATTR |
648 PNFS_LAYOUTRET_ON_ERROR, 648 PNFS_LAYOUTRET_ON_ERROR,
649 649
650 .owner = THIS_MODULE,
650 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 651 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
651 .free_layout_hdr = objlayout_free_layout_hdr, 652 .free_layout_hdr = objlayout_free_layout_hdr,
652 653
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d00260b08103..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505} 505}
506EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 506EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507 507
508/* 508static bool
509 * Called by the state manger to remove all layouts established under an 509pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510 * expired lease. 510 struct list_head *layout_list)
511 */
512void
513pnfs_destroy_all_layouts(struct nfs_client *clp)
514{ 511{
515 struct nfs_server *server;
516 struct pnfs_layout_hdr *lo; 512 struct pnfs_layout_hdr *lo;
517 LIST_HEAD(tmp_list); 513 bool ret = false;
518 514
519 nfs4_deviceid_mark_client_invalid(clp); 515 spin_lock(&inode->i_lock);
520 nfs4_deviceid_purge_client(clp); 516 lo = NFS_I(inode)->layout;
517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518 pnfs_get_layout_hdr(lo);
519 list_add(&lo->plh_bulk_destroy, layout_list);
520 ret = true;
521 }
522 spin_unlock(&inode->i_lock);
523 return ret;
524}
525
526/* Caller must hold rcu_read_lock and clp->cl_lock */
527static int
528pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529 struct nfs_server *server,
530 struct list_head *layout_list)
531{
532 struct pnfs_layout_hdr *lo, *next;
533 struct inode *inode;
534
535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536 inode = igrab(lo->plh_inode);
537 if (inode == NULL)
538 continue;
539 list_del_init(&lo->plh_layouts);
540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541 continue;
542 rcu_read_unlock();
543 spin_unlock(&clp->cl_lock);
544 iput(inode);
545 spin_lock(&clp->cl_lock);
546 rcu_read_lock();
547 return -EAGAIN;
548 }
549 return 0;
550}
551
552static int
553pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554 bool is_bulk_recall)
555{
556 struct pnfs_layout_hdr *lo;
557 struct inode *inode;
558 struct pnfs_layout_range range = {
559 .iomode = IOMODE_ANY,
560 .offset = 0,
561 .length = NFS4_MAX_UINT64,
562 };
563 LIST_HEAD(lseg_list);
564 int ret = 0;
565
566 while (!list_empty(layout_list)) {
567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568 plh_bulk_destroy);
569 dprintk("%s freeing layout for inode %lu\n", __func__,
570 lo->plh_inode->i_ino);
571 inode = lo->plh_inode;
572 spin_lock(&inode->i_lock);
573 list_del_init(&lo->plh_bulk_destroy);
574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575 if (is_bulk_recall)
576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578 ret = -EAGAIN;
579 spin_unlock(&inode->i_lock);
580 pnfs_free_lseg_list(&lseg_list);
581 pnfs_put_layout_hdr(lo);
582 iput(inode);
583 }
584 return ret;
585}
586
587int
588pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589 struct nfs_fsid *fsid,
590 bool is_recall)
591{
592 struct nfs_server *server;
593 LIST_HEAD(layout_list);
521 594
522 spin_lock(&clp->cl_lock); 595 spin_lock(&clp->cl_lock);
523 rcu_read_lock(); 596 rcu_read_lock();
597restart:
524 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525 if (!list_empty(&server->layouts)) 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
526 list_splice_init(&server->layouts, &tmp_list); 600 continue;
601 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602 server,
603 &layout_list) != 0)
604 goto restart;
527 } 605 }
528 rcu_read_unlock(); 606 rcu_read_unlock();
529 spin_unlock(&clp->cl_lock); 607 spin_unlock(&clp->cl_lock);
530 608
531 while (!list_empty(&tmp_list)) { 609 if (list_empty(&layout_list))
532 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 610 return 0;
533 plh_layouts); 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
534 dprintk("%s freeing layout for inode %lu\n", __func__, 612}
535 lo->plh_inode->i_ino); 613
536 list_del_init(&lo->plh_layouts); 614int
537 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 615pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616 bool is_recall)
617{
618 struct nfs_server *server;
619 LIST_HEAD(layout_list);
620
621 spin_lock(&clp->cl_lock);
622 rcu_read_lock();
623restart:
624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626 server,
627 &layout_list) != 0)
628 goto restart;
538 } 629 }
630 rcu_read_unlock();
631 spin_unlock(&clp->cl_lock);
632
633 if (list_empty(&layout_list))
634 return 0;
635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636}
637
638/*
639 * Called by the state manger to remove all layouts established under an
640 * expired lease.
641 */
642void
643pnfs_destroy_all_layouts(struct nfs_client *clp)
644{
645 nfs4_deviceid_mark_client_invalid(clp);
646 nfs4_deviceid_purge_client(clp);
647
648 pnfs_destroy_layouts_byclid(clp, false);
539} 649}
540 650
541/* 651/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888 atomic_set(&lo->plh_refcount, 1); 998 atomic_set(&lo->plh_refcount, 1);
889 INIT_LIST_HEAD(&lo->plh_layouts); 999 INIT_LIST_HEAD(&lo->plh_layouts);
890 INIT_LIST_HEAD(&lo->plh_segs); 1000 INIT_LIST_HEAD(&lo->plh_segs);
891 INIT_LIST_HEAD(&lo->plh_bulk_recall); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy);
892 lo->plh_inode = ino; 1002 lo->plh_inode = ino;
893 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
894 return lo; 1004 return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132struct pnfs_layout_hdr { 132struct pnfs_layout_hdr {
133 atomic_t plh_refcount; 133 atomic_t plh_refcount;
134 struct list_head plh_layouts; /* other client layouts */ 134 struct list_head plh_layouts; /* other client layouts */
135 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ 135 struct list_head plh_bulk_destroy;
136 struct list_head plh_segs; /* layout segments list */ 136 struct list_head plh_segs; /* layout segments list */
137 nfs4_stateid plh_stateid; 137 nfs4_stateid plh_stateid;
138 atomic_t plh_outstanding; /* number of RPCs out */ 138 atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 196void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 197void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 198void pnfs_destroy_all_layouts(struct nfs_client *);
199int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200 struct nfs_fsid *fsid,
201 bool is_recall);
202int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203 bool is_recall);
199void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); 204void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 205void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 206 const nfs4_stateid *new,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b056b1628722..befbae0cce41 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,7 +54,6 @@
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
57#include <linux/kthread.h>
58 57
59#include <asm/uaccess.h> 58#include <asm/uaccess.h>
60 59
@@ -418,54 +417,6 @@ void nfs_sb_deactive(struct super_block *sb)
418} 417}
419EXPORT_SYMBOL_GPL(nfs_sb_deactive); 418EXPORT_SYMBOL_GPL(nfs_sb_deactive);
420 419
421static int nfs_deactivate_super_async_work(void *ptr)
422{
423 struct super_block *sb = ptr;
424
425 deactivate_super(sb);
426 module_put_and_exit(0);
427 return 0;
428}
429
430/*
431 * same effect as deactivate_super, but will do final unmount in kthread
432 * context
433 */
434static void nfs_deactivate_super_async(struct super_block *sb)
435{
436 struct task_struct *task;
437 char buf[INET6_ADDRSTRLEN + 1];
438 struct nfs_server *server = NFS_SB(sb);
439 struct nfs_client *clp = server->nfs_client;
440
441 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
442 rcu_read_lock();
443 snprintf(buf, sizeof(buf),
444 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
445 rcu_read_unlock();
446
447 __module_get(THIS_MODULE);
448 task = kthread_run(nfs_deactivate_super_async_work, sb,
449 "%s-deactivate-super", buf);
450 if (IS_ERR(task)) {
451 pr_err("%s: kthread_run: %ld\n",
452 __func__, PTR_ERR(task));
453 /* make synchronous call and hope for the best */
454 deactivate_super(sb);
455 module_put(THIS_MODULE);
456 }
457 }
458}
459
460void nfs_sb_deactive_async(struct super_block *sb)
461{
462 struct nfs_server *server = NFS_SB(sb);
463
464 if (atomic_dec_and_test(&server->active))
465 nfs_deactivate_super_async(sb);
466}
467EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
468
469/* 420/*
470 * Deliver file system statistics to userspace 421 * Deliver file system statistics to userspace
471 */ 422 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b8..d26a32f5b53b 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive_async(sb); 98 nfs_sb_deactive(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -268,8 +268,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
268 * point dentry is definitely not a root, so we won't need 268 * point dentry is definitely not a root, so we won't need
269 * that anymore. 269 * that anymore.
270 */ 270 */
271 if (devname_garbage) 271 kfree(devname_garbage);
272 kfree(devname_garbage);
273 return 0; 272 return 0;
274out_unlock: 273out_unlock:
275 spin_unlock(&dentry->d_lock); 274 spin_unlock(&dentry->d_lock);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 8df1ea4a6ff9..430b6872806f 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -65,8 +65,8 @@ config NFSD_V3_ACL
65 If unsure, say N. 65 If unsure, say N.
66 66
67config NFSD_V4 67config NFSD_V4
68 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" 68 bool "NFS server support for NFS version 4"
69 depends on NFSD && PROC_FS && EXPERIMENTAL 69 depends on NFSD && PROC_FS
70 select NFSD_V3 70 select NFSD_V3
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS 72 select SUNRPC_GSS
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 251da07b2a1d..80da8eb27393 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,5 @@
1config NILFS2_FS 1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support"
3 depends on EXPERIMENTAL
4 select CRC32 3 select CRC32
5 help 4 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous 5 NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..bec4af6eab13 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
126 nilfs_transaction_commit(inode->i_sb); 126 nilfs_transaction_commit(inode->i_sb);
127 127
128 mapped: 128 mapped:
129 wait_on_page_writeback(page); 129 wait_for_stable_page(page);
130 out: 130 out:
131 sb_end_pagefault(inode->i_sb); 131 sb_end_pagefault(inode->i_sb);
132 return block_page_mkwrite_return(ret); 132 return block_page_mkwrite_return(ret);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 228a2c2ad8d7..07f7a92fe88e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -576,8 +576,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
576 576
577 /* don't allow invalid bits: we don't want flags set */ 577 /* don't allow invalid bits: we don't want flags set */
578 mask = inotify_arg_to_mask(arg); 578 mask = inotify_arg_to_mask(arg);
579 if (unlikely(!(mask & IN_ALL_EVENTS)))
580 return -EINVAL;
581 579
582 fsn_mark = fsnotify_find_inode_mark(group, inode); 580 fsn_mark = fsnotify_find_inode_mark(group, inode);
583 if (!fsn_mark) 581 if (!fsn_mark)
@@ -629,8 +627,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
629 627
630 /* don't allow invalid bits: we don't want flags set */ 628 /* don't allow invalid bits: we don't want flags set */
631 mask = inotify_arg_to_mask(arg); 629 mask = inotify_arg_to_mask(arg);
632 if (unlikely(!(mask & IN_ALL_EVENTS)))
633 return -EINVAL;
634 630
635 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 631 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
636 if (unlikely(!tmp_i_mark)) 632 if (unlikely(!tmp_i_mark))
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 31b9463fba1f..b8a9d87231b1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6751,8 +6751,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
6751 mlog_errno(ret); 6751 mlog_errno(ret);
6752 6752
6753out: 6753out:
6754 if (pages) 6754 kfree(pages);
6755 kfree(pages);
6756 6755
6757 return ret; 6756 return ret;
6758} 6757}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..9796330d8f04 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1194,6 +1194,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1194 goto out; 1194 goto out;
1195 } 1195 }
1196 } 1196 }
1197 wait_for_stable_page(wc->w_pages[i]);
1197 1198
1198 if (index == target_index) 1199 if (index == target_index)
1199 wc->w_target_page = wc->w_pages[i]; 1200 wc->w_target_page = wc->w_pages[i];
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f7c648d7d6bf..42252bf64b51 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1471,8 +1471,7 @@ static void o2hb_region_release(struct config_item *item)
1471 1471
1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); 1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
1473 1473
1474 if (reg->hr_tmp_block) 1474 kfree(reg->hr_tmp_block);
1475 kfree(reg->hr_tmp_block);
1476 1475
1477 if (reg->hr_slot_data) { 1476 if (reg->hr_slot_data) {
1478 for (i = 0; i < reg->hr_num_pages; i++) { 1477 for (i = 0; i < reg->hr_num_pages; i++) {
@@ -1486,8 +1485,7 @@ static void o2hb_region_release(struct config_item *item)
1486 if (reg->hr_bdev) 1485 if (reg->hr_bdev)
1487 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1486 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
1488 1487
1489 if (reg->hr_slots) 1488 kfree(reg->hr_slots);
1490 kfree(reg->hr_slots);
1491 1489
1492 kfree(reg->hr_db_regnum); 1490 kfree(reg->hr_db_regnum);
1493 kfree(reg->hr_db_livenodes); 1491 kfree(reg->hr_db_livenodes);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1bfe8802cc1e..0d2bf566e39a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -870,7 +870,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
870 /* we've had some trouble with handlers seemingly vanishing. */ 870 /* we've had some trouble with handlers seemingly vanishing. */
871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p, 871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p,
872 &parent) == NULL, 872 &parent) == NULL,
873 "couldn't find handler we *just* registerd " 873 "couldn't find handler we *just* registered "
874 "for type %u key %08x\n", msg_type, key); 874 "for type %u key %08x\n", msg_type, key);
875 } 875 }
876 write_unlock(&o2net_handler_lock); 876 write_unlock(&o2net_handler_lock);
@@ -1165,10 +1165,8 @@ out:
1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ 1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
1166 if (sc) 1166 if (sc)
1167 sc_put(sc); 1167 sc_put(sc);
1168 if (vec) 1168 kfree(vec);
1169 kfree(vec); 1169 kfree(msg);
1170 if (msg)
1171 kfree(msg);
1172 o2net_complete_nsw(nn, &nsw, 0, 0, 0); 1170 o2net_complete_nsw(nn, &nsw, 0, 0, 0);
1173 return ret; 1171 return ret;
1174} 1172}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9e89d70df337..dbb17c07656a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -319,9 +319,7 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
319 if (dlm->master_hash) 319 if (dlm->master_hash)
320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); 320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
321 321
322 if (dlm->name) 322 kfree(dlm->name);
323 kfree(dlm->name);
324
325 kfree(dlm); 323 kfree(dlm);
326} 324}
327 325
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4f7795fb5fc0..88577eb5d712 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2545 * everything is up to the caller :) */ 2545 * everything is up to the caller :) */
2546 status = ocfs2_should_refresh_lock_res(lockres); 2546 status = ocfs2_should_refresh_lock_res(lockres);
2547 if (status < 0) { 2547 if (status < 0) {
2548 ocfs2_cluster_unlock(osb, lockres, level);
2548 mlog_errno(status); 2549 mlog_errno(status);
2549 goto bail; 2550 goto bail;
2550 } 2551 }
@@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2553 2554
2554 ocfs2_complete_lock_res_refresh(lockres, status); 2555 ocfs2_complete_lock_res_refresh(lockres, status);
2555 2556
2556 if (status < 0) 2557 if (status < 0) {
2558 ocfs2_cluster_unlock(osb, lockres, level);
2557 mlog_errno(status); 2559 mlog_errno(status);
2560 }
2558 ocfs2_track_lock_refresh(lockres); 2561 ocfs2_track_lock_refresh(lockres);
2559 } 2562 }
2560bail: 2563bail:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f487aa343442..1c39efb71bab 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -282,8 +282,7 @@ search:
282 spin_unlock(&oi->ip_lock); 282 spin_unlock(&oi->ip_lock);
283 283
284out: 284out:
285 if (new_emi) 285 kfree(new_emi);
286 kfree(new_emi);
287} 286}
288 287
289static int ocfs2_last_eb_is_empty(struct inode *inode, 288static int ocfs2_last_eb_is_empty(struct inode *inode,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 2dd36af79e26..8eccfabcd12e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1234,11 +1234,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1234 /* Though we wish to avoid it, we are in fact safe in 1234 /* Though we wish to avoid it, we are in fact safe in
1235 * skipping local alloc cleanup as fsck.ocfs2 is more 1235 * skipping local alloc cleanup as fsck.ocfs2 is more
1236 * than capable of reclaiming unused space. */ 1236 * than capable of reclaiming unused space. */
1237 if (la_dinode) 1237 kfree(la_dinode);
1238 kfree(la_dinode); 1238 kfree(tl_dinode);
1239
1240 if (tl_dinode)
1241 kfree(tl_dinode);
1242 1239
1243 if (qrec) 1240 if (qrec)
1244 ocfs2_free_quota_recovery(qrec); 1241 ocfs2_free_quota_recovery(qrec);
@@ -1408,8 +1405,7 @@ bail:
1408 1405
1409 mutex_unlock(&osb->recovery_lock); 1406 mutex_unlock(&osb->recovery_lock);
1410 1407
1411 if (rm_quota) 1408 kfree(rm_quota);
1412 kfree(rm_quota);
1413 1409
1414 /* no one is callint kthread_stop() for us so the kthread() api 1410 /* no one is callint kthread_stop() for us so the kthread() api
1415 * requires that we call do_exit(). And it isn't exported, but 1411 * requires that we call do_exit(). And it isn't exported, but
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index a9f78c74d687..aebeacd807c3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -476,8 +476,7 @@ out:
476 if (local_alloc_inode) 476 if (local_alloc_inode)
477 iput(local_alloc_inode); 477 iput(local_alloc_inode);
478 478
479 if (alloc_copy) 479 kfree(alloc_copy);
480 kfree(alloc_copy);
481} 480}
482 481
483/* 482/*
@@ -534,7 +533,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
534 mlog_errno(status); 533 mlog_errno(status);
535 534
536bail: 535bail:
537 if ((status < 0) && (*alloc_copy)) { 536 if (status < 0) {
538 kfree(*alloc_copy); 537 kfree(*alloc_copy);
539 *alloc_copy = NULL; 538 *alloc_copy = NULL;
540 } 539 }
@@ -1290,8 +1289,7 @@ bail:
1290 if (main_bm_inode) 1289 if (main_bm_inode)
1291 iput(main_bm_inode); 1290 iput(main_bm_inode);
1292 1291
1293 if (alloc_copy) 1292 kfree(alloc_copy);
1294 kfree(alloc_copy);
1295 1293
1296 if (ac) 1294 if (ac)
1297 ocfs2_free_alloc_context(ac); 1295 ocfs2_free_alloc_context(ac);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 94368017edb3..bf1f8930456f 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -376,7 +376,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); 376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
377 377
378out_free: 378out_free:
379 if (rc && conn->cc_private) 379 if (rc)
380 kfree(conn->cc_private); 380 kfree(conn->cc_private);
381 381
382out: 382out:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0e91ec22a940..9b6910dec4ba 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2525,8 +2525,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2525 mlog_errno(status); 2525 mlog_errno(status);
2526 2526
2527finally: 2527finally:
2528 if (local_alloc) 2528 kfree(local_alloc);
2529 kfree(local_alloc);
2530 2529
2531 if (status) 2530 if (status)
2532 mlog_errno(status); 2531 mlog_errno(status);
@@ -2553,8 +2552,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2553 * we free it here. 2552 * we free it here.
2554 */ 2553 */
2555 kfree(osb->journal); 2554 kfree(osb->journal);
2556 if (osb->local_alloc_copy) 2555 kfree(osb->local_alloc_copy);
2557 kfree(osb->local_alloc_copy);
2558 kfree(osb->uuid_str); 2556 kfree(osb->uuid_str);
2559 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2557 ocfs2_put_dlm_debug(osb->osb_dlm_debug);
2560 memset(osb, 0, sizeof(struct ocfs2_super)); 2558 memset(osb, 0, sizeof(struct ocfs2_super));
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 3d635f4bbb20..f053688d22a3 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -91,8 +91,7 @@ static struct inode **get_local_system_inode(struct ocfs2_super *osb,
91 } else 91 } else
92 osb->local_system_inodes = local_system_inodes; 92 osb->local_system_inodes = local_system_inodes;
93 spin_unlock(&osb->osb_lock); 93 spin_unlock(&osb->osb_lock);
94 if (unlikely(free)) 94 kfree(free);
95 kfree(free);
96 } 95 }
97 96
98 index = (slot * NUM_LOCAL_SYSTEM_INODES) + 97 index = (slot * NUM_LOCAL_SYSTEM_INODES) +
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 981b05601931..712f24db9600 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,8 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o fd.o 11 fd.o
12proc-$(CONFIG_TTY) += proc_tty.o
12proc-y += cmdline.o 13proc-y += cmdline.o
13proc-y += consoles.o 14proc-y += consoles.o
14proc-y += cpuinfo.o 15proc-y += cpuinfo.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
449 do { 449 do {
450 min_flt += t->min_flt; 450 min_flt += t->min_flt;
451 maj_flt += t->maj_flt; 451 maj_flt += t->maj_flt;
452 gtime += t->gtime; 452 gtime += task_gtime(t);
453 t = next_thread(t); 453 t = next_thread(t);
454 } while (t != task); 454 } while (t != task);
455 455
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
472 min_flt = task->min_flt; 472 min_flt = task->min_flt;
473 maj_flt = task->maj_flt; 473 maj_flt = task->maj_flt;
474 task_cputime_adjusted(task, &utime, &stime); 474 task_cputime_adjusted(task, &utime, &stime);
475 gtime = task->gtime; 475 gtime = task_gtime(task);
476 } 476 }
477 477
478 /* scale priority and nice values from timeslices to -20..20 */ 478 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..3131a03d7d37 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = {
177 .readdir = proc_tgid_net_readdir, 177 .readdir = proc_tgid_net_readdir,
178}; 178};
179 179
180
181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, umode_t mode, const struct file_operations *fops)
183{
184 return proc_create(name, mode, net->proc_net, fops);
185}
186EXPORT_SYMBOL_GPL(proc_net_fops_create);
187
188void proc_net_remove(struct net *net, const char *name)
189{
190 remove_proc_entry(name, net->proc_net);
191}
192EXPORT_SYMBOL_GPL(proc_net_remove);
193
194static __net_init int proc_net_ns_init(struct net *net) 180static __net_init int proc_net_ns_init(struct net *net)
195{ 181{
196 struct proc_dir_entry *netd, *net_statd; 182 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 67de74ca85f4..e4bcb2cf055a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -418,9 +418,25 @@ static struct file_system_type pstore_fs_type = {
418 .kill_sb = pstore_kill_sb, 418 .kill_sb = pstore_kill_sb,
419}; 419};
420 420
421static struct kobject *pstore_kobj;
422
421static int __init init_pstore_fs(void) 423static int __init init_pstore_fs(void)
422{ 424{
423 return register_filesystem(&pstore_fs_type); 425 int err = 0;
426
427 /* Create a convenient mount point for people to access pstore */
428 pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
429 if (!pstore_kobj) {
430 err = -ENOMEM;
431 goto out;
432 }
433
434 err = register_filesystem(&pstore_fs_type);
435 if (err < 0)
436 kobject_put(pstore_kobj);
437
438out:
439 return err;
424} 440}
425module_init(init_pstore_fs) 441module_init(init_pstore_fs)
426 442
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5ea2e77ff023..86d1038b5a12 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -96,6 +96,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
96 } 96 }
97} 97}
98 98
99bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
100{
101 /*
102 * In case of NMI path, pstore shouldn't be blocked
103 * regardless of reason.
104 */
105 if (in_nmi())
106 return true;
107
108 switch (reason) {
109 /* In panic case, other cpus are stopped by smp_send_stop(). */
110 case KMSG_DUMP_PANIC:
111 /* Emergency restart shouldn't be blocked by spin lock. */
112 case KMSG_DUMP_EMERG:
113 return true;
114 default:
115 return false;
116 }
117}
118EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
119
99/* 120/*
100 * callback from kmsg_dump. (s2,l2) has the most recently 121 * callback from kmsg_dump. (s2,l2) has the most recently
101 * written bytes, older bytes are in (s1,l1). Save as much 122 * written bytes, older bytes are in (s1,l1). Save as much
@@ -114,10 +135,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
114 135
115 why = get_reason_str(reason); 136 why = get_reason_str(reason);
116 137
117 if (in_nmi()) { 138 if (pstore_cannot_block_path(reason)) {
118 is_locked = spin_trylock(&psinfo->buf_lock); 139 is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
119 if (!is_locked) 140 if (!is_locked) {
120 pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); 141 pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
142 , in_nmi() ? "NMI" : why);
143 }
121 } else 144 } else
122 spin_lock_irqsave(&psinfo->buf_lock, flags); 145 spin_lock_irqsave(&psinfo->buf_lock, flags);
123 oopscount++; 146 oopscount++;
@@ -143,9 +166,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
143 total += hsize + len; 166 total += hsize + len;
144 part++; 167 part++;
145 } 168 }
146 if (in_nmi()) { 169 if (pstore_cannot_block_path(reason)) {
147 if (is_locked) 170 if (is_locked)
148 spin_unlock(&psinfo->buf_lock); 171 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
149 } else 172 } else
150 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 173 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
151} 174}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 7003e5266f25..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) 167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
168{ 168{
169 char *hdr; 169 char *hdr;
170 struct timeval timestamp; 170 struct timespec timestamp;
171 size_t len; 171 size_t len;
172 172
173 do_gettimeofday(&timestamp); 173 /* Report zeroed timestamp if called before timekeeping has resumed. */
174 if (__getnstimeofday(&timestamp)) {
175 timestamp.tv_sec = 0;
176 timestamp.tv_nsec = 0;
177 }
174 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", 178 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
175 (long)timestamp.tv_sec, (long)timestamp.tv_usec); 179 (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
176 WARN_ON_ONCE(!hdr); 180 WARN_ON_ONCE(!hdr);
177 len = hdr ? strlen(hdr) : 0; 181 len = hdr ? strlen(hdr) : 0;
178 persistent_ram_write(prz, hdr, len); 182 persistent_ram_write(prz, hdr, len);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b6addf560483..57199a52a351 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -285,7 +285,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { 285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */ 286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different" 287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n")); 288 " endianness.\n"));
289 return bh; 289 return bh;
290 } else 290 } else
291 sbi->s_bytesex = BYTESEX_LE; 291 sbi->s_bytesex = BYTESEX_LE;
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/sched/rt.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31 32
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
205} 205}
206EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 206EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
207 207
208/**
209 * sysfs_add_link_to_group - add a symlink to an attribute group.
210 * @kobj: The kobject containing the group.
211 * @group_name: The name of the group.
212 * @target: The target kobject of the symlink to create.
213 * @link_name: The name of the symlink to create.
214 */
215int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
216 struct kobject *target, const char *link_name)
217{
218 struct sysfs_dirent *dir_sd;
219 int error = 0;
220
221 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
222 if (!dir_sd)
223 return -ENOENT;
224
225 error = sysfs_create_link_sd(dir_sd, target, link_name);
226 sysfs_put(dir_sd);
227
228 return error;
229}
230EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
231
232/**
233 * sysfs_remove_link_from_group - remove a symlink from an attribute group.
234 * @kobj: The kobject containing the group.
235 * @group_name: The name of the group.
236 * @link_name: The name of the symlink to remove.
237 */
238void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
239 const char *link_name)
240{
241 struct sysfs_dirent *dir_sd;
242
243 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
244 if (dir_sd) {
245 sysfs_hash_and_remove(dir_sd, NULL, link_name);
246 sysfs_put(dir_sd);
247 }
248}
249EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
208 250
209EXPORT_SYMBOL_GPL(sysfs_create_group); 251EXPORT_SYMBOL_GPL(sysfs_create_group);
210EXPORT_SYMBOL_GPL(sysfs_update_group); 252EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db940a9be045..8d924b5ec733 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -10,7 +10,7 @@
10 * Please see Documentation/filesystems/sysfs.txt for more information. 10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 11 */
12 12
13#define DEBUG 13#define DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
21 21
22#include "sysfs.h" 22#include "sysfs.h"
23 23
24static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, 24static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
25 const char *name, int warn) 25 struct kobject *target,
26 const char *name, int warn)
26{ 27{
27 struct sysfs_dirent *parent_sd = NULL;
28 struct sysfs_dirent *target_sd = NULL; 28 struct sysfs_dirent *target_sd = NULL;
29 struct sysfs_dirent *sd = NULL; 29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt; 30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type; 31 enum kobj_ns_type ns_type;
32 int error; 32 int error;
33 33
34 BUG_ON(!name); 34 BUG_ON(!name || !parent_sd);
35
36 if (!kobj)
37 parent_sd = &sysfs_root;
38 else
39 parent_sd = kobj->sd;
40
41 error = -EFAULT;
42 if (!parent_sd)
43 goto out_put;
44 35
45 /* target->sd can go away beneath us but is protected with 36 /* target->sd can go away beneath us but is protected with
46 * sysfs_assoc_lock. Fetch target_sd from it. 37 * sysfs_assoc_lock. Fetch target_sd from it.
@@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
96} 87}
97 88
98/** 89/**
90 * sysfs_create_link_sd - create symlink to a given object.
91 * @sd: directory we're creating the link in.
92 * @target: object we're pointing to.
93 * @name: name of the symlink.
94 */
95int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
96 const char *name)
97{
98 return sysfs_do_create_link_sd(sd, target, name, 1);
99}
100
101static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
102 const char *name, int warn)
103{
104 struct sysfs_dirent *parent_sd = NULL;
105
106 if (!kobj)
107 parent_sd = &sysfs_root;
108 else
109 parent_sd = kobj->sd;
110
111 if (!parent_sd)
112 return -EFAULT;
113
114 return sysfs_do_create_link_sd(parent_sd, target, name, warn);
115}
116
117/**
99 * sysfs_create_link - create symlink between two objects. 118 * sysfs_create_link - create symlink between two objects.
100 * @kobj: object whose directory we're creating the link in. 119 * @kobj: object whose directory we're creating the link in.
101 * @target: object we're pointing to. 120 * @target: object we're pointing to.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
240 * symlink.c 240 * symlink.c
241 */ 241 */
242extern const struct inode_operations sysfs_symlink_inode_operations; 242extern const struct inode_operations sysfs_symlink_inode_operations;
243int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
244 const char *name);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..4f6493c130e0 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1522,6 +1522,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1522 ubifs_release_dirty_inode_budget(c, ui); 1522 ubifs_release_dirty_inode_budget(c, ui);
1523 } 1523 }
1524 1524
1525 wait_for_stable_page(page);
1525 unlock_page(page); 1526 unlock_page(page);
1526 return 0; 1527 return 0;
1527 1528
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index e4f10a40768a..0bf6e16f8d79 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -29,7 +29,7 @@ config UFS_FS
29 29
30config UFS_FS_WRITE 30config UFS_FS_WRITE
31 bool "UFS file system write support (DANGEROUS)" 31 bool "UFS file system write support (DANGEROUS)"
32 depends on UFS_FS && EXPERIMENTAL 32 depends on UFS_FS
33 help 33 help
34 Say Y here if you want to try writing to UFS partitions. This is 34 Say Y here if you want to try writing to UFS partitions. This is
35 experimental, so you should back up your UFS partitions beforehand. 35 experimental, so you should back up your UFS partitions beforehand.
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5a7ffe54f5d5..cc33aaf219f1 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -70,8 +70,8 @@ config XFS_RT
70 If unsure, say N. 70 If unsure, say N.
71 71
72config XFS_DEBUG 72config XFS_DEBUG
73 bool "XFS Debugging support (EXPERIMENTAL)" 73 bool "XFS Debugging support"
74 depends on XFS_FS && EXPERIMENTAL 74 depends on XFS_FS
75 help 75 help
76 Say Y here to get an XFS build with many debugging features, 76 Say Y here to get an XFS build with many debugging features,
77 including ASSERT checks, function wrappers around macros, 77 including ASSERT checks, function wrappers around macros,
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 393055fe3aef..0ad23253e8b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist(
1925 targs.mp = mp; 1925 targs.mp = mp;
1926 targs.agbp = agbp; 1926 targs.agbp = agbp;
1927 targs.agno = args->agno; 1927 targs.agno = args->agno;
1928 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1929 targs.minalignslop = 0;
1930 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1928 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1931 targs.type = XFS_ALLOCTYPE_THIS_AG; 1929 targs.type = XFS_ALLOCTYPE_THIS_AG;
1932 targs.pag = pag; 1930 targs.pag = pag;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index aaf472532b3c..888683844d98 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -300,9 +300,12 @@ xfs_attr_set_int(
300 if (rsvd) 300 if (rsvd)
301 args.trans->t_flags |= XFS_TRANS_RESERVE; 301 args.trans->t_flags |= XFS_TRANS_RESERVE;
302 302
303 if ((error = xfs_trans_reserve(args.trans, args.total, 303 error = xfs_trans_reserve(args.trans, args.total,
304 XFS_ATTRSET_LOG_RES(mp, args.total), 0, 304 XFS_ATTRSETM_LOG_RES(mp) +
305 XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { 305 XFS_ATTRSETRT_LOG_RES(mp) * args.total,
306 0, XFS_TRANS_PERM_LOG_RES,
307 XFS_ATTRSET_LOG_COUNT);
308 if (error) {
306 xfs_trans_cancel(args.trans, 0); 309 xfs_trans_cancel(args.trans, 0);
307 return(error); 310 return(error);
308 } 311 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index cdb2d3348583..b44af9211bd9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -147,7 +147,10 @@ xfs_bmap_local_to_extents(
147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
148 xfs_extlen_t total, /* total blocks needed by transaction */ 148 xfs_extlen_t total, /* total blocks needed by transaction */
149 int *logflagsp, /* inode logging flags */ 149 int *logflagsp, /* inode logging flags */
150 int whichfork); /* data or attr fork */ 150 int whichfork, /* data or attr fork */
151 void (*init_fn)(struct xfs_buf *bp,
152 struct xfs_inode *ip,
153 struct xfs_ifork *ifp));
151 154
152/* 155/*
153 * Search the extents list for the inode, for the extent containing bno. 156 * Search the extents list for the inode, for the extent containing bno.
@@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents(
357} 360}
358 361
359/* 362/*
360 * Called from xfs_bmap_add_attrfork to handle local format files. 363 * Block initialisation functions for local to extent format conversion.
364 * As these get more complex, they will be moved to the relevant files,
365 * but for now they are too simple to worry about.
366 */
367STATIC void
368xfs_bmap_local_to_extents_init_fn(
369 struct xfs_buf *bp,
370 struct xfs_inode *ip,
371 struct xfs_ifork *ifp)
372{
373 bp->b_ops = &xfs_bmbt_buf_ops;
374 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
375}
376
377STATIC void
378xfs_symlink_local_to_remote(
379 struct xfs_buf *bp,
380 struct xfs_inode *ip,
381 struct xfs_ifork *ifp)
382{
383 /* remote symlink blocks are not verifiable until CRCs come along */
384 bp->b_ops = NULL;
385 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
386}
387
388/*
389 * Called from xfs_bmap_add_attrfork to handle local format files. Each
390 * different data fork content type needs a different callout to do the
391 * conversion. Some are basic and only require special block initialisation
392 * callouts for the data formating, others (directories) are so specialised they
393 * handle everything themselves.
394 *
395 * XXX (dgc): investigate whether directory conversion can use the generic
396 * formatting callout. It should be possible - it's just a very complex
397 * formatter. it would also require passing the transaction through to the init
398 * function.
361 */ 399 */
362STATIC int /* error */ 400STATIC int /* error */
363xfs_bmap_add_attrfork_local( 401xfs_bmap_add_attrfork_local(
@@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local(
368 int *flags) /* inode logging flags */ 406 int *flags) /* inode logging flags */
369{ 407{
370 xfs_da_args_t dargs; /* args for dir/attr code */ 408 xfs_da_args_t dargs; /* args for dir/attr code */
371 int error; /* error return value */
372 xfs_mount_t *mp; /* mount structure pointer */
373 409
374 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 410 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
375 return 0; 411 return 0;
412
376 if (S_ISDIR(ip->i_d.di_mode)) { 413 if (S_ISDIR(ip->i_d.di_mode)) {
377 mp = ip->i_mount;
378 memset(&dargs, 0, sizeof(dargs)); 414 memset(&dargs, 0, sizeof(dargs));
379 dargs.dp = ip; 415 dargs.dp = ip;
380 dargs.firstblock = firstblock; 416 dargs.firstblock = firstblock;
381 dargs.flist = flist; 417 dargs.flist = flist;
382 dargs.total = mp->m_dirblkfsbs; 418 dargs.total = ip->i_mount->m_dirblkfsbs;
383 dargs.whichfork = XFS_DATA_FORK; 419 dargs.whichfork = XFS_DATA_FORK;
384 dargs.trans = tp; 420 dargs.trans = tp;
385 error = xfs_dir2_sf_to_block(&dargs); 421 return xfs_dir2_sf_to_block(&dargs);
386 } else 422 }
387 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, 423
388 XFS_DATA_FORK); 424 if (S_ISLNK(ip->i_d.di_mode))
389 return error; 425 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
426 flags, XFS_DATA_FORK,
427 xfs_symlink_local_to_remote);
428
429 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
430 XFS_DATA_FORK,
431 xfs_bmap_local_to_extents_init_fn);
390} 432}
391 433
392/* 434/*
@@ -3099,8 +3141,6 @@ xfs_bmap_extents_to_btree(
3099 args.fsbno = *firstblock; 3141 args.fsbno = *firstblock;
3100 } 3142 }
3101 args.minlen = args.maxlen = args.prod = 1; 3143 args.minlen = args.maxlen = args.prod = 1;
3102 args.total = args.minleft = args.alignment = args.mod = args.isfl =
3103 args.minalignslop = 0;
3104 args.wasdel = wasdel; 3144 args.wasdel = wasdel;
3105 *logflagsp = 0; 3145 *logflagsp = 0;
3106 if ((error = xfs_alloc_vextent(&args))) { 3146 if ((error = xfs_alloc_vextent(&args))) {
@@ -3221,7 +3261,10 @@ xfs_bmap_local_to_extents(
3221 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 3261 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
3222 xfs_extlen_t total, /* total blocks needed by transaction */ 3262 xfs_extlen_t total, /* total blocks needed by transaction */
3223 int *logflagsp, /* inode logging flags */ 3263 int *logflagsp, /* inode logging flags */
3224 int whichfork) /* data or attr fork */ 3264 int whichfork,
3265 void (*init_fn)(struct xfs_buf *bp,
3266 struct xfs_inode *ip,
3267 struct xfs_ifork *ifp))
3225{ 3268{
3226 int error; /* error return value */ 3269 int error; /* error return value */
3227 int flags; /* logging flags returned */ 3270 int flags; /* logging flags returned */
@@ -3241,12 +3284,12 @@ xfs_bmap_local_to_extents(
3241 xfs_buf_t *bp; /* buffer for extent block */ 3284 xfs_buf_t *bp; /* buffer for extent block */
3242 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3285 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3243 3286
3287 ASSERT((ifp->if_flags &
3288 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3244 memset(&args, 0, sizeof(args)); 3289 memset(&args, 0, sizeof(args));
3245 args.tp = tp; 3290 args.tp = tp;
3246 args.mp = ip->i_mount; 3291 args.mp = ip->i_mount;
3247 args.firstblock = *firstblock; 3292 args.firstblock = *firstblock;
3248 ASSERT((ifp->if_flags &
3249 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3250 /* 3293 /*
3251 * Allocate a block. We know we need only one, since the 3294 * Allocate a block. We know we need only one, since the
3252 * file currently fits in an inode. 3295 * file currently fits in an inode.
@@ -3259,20 +3302,21 @@ xfs_bmap_local_to_extents(
3259 args.type = XFS_ALLOCTYPE_NEAR_BNO; 3302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3260 } 3303 }
3261 args.total = total; 3304 args.total = total;
3262 args.mod = args.minleft = args.alignment = args.wasdel =
3263 args.isfl = args.minalignslop = 0;
3264 args.minlen = args.maxlen = args.prod = 1; 3305 args.minlen = args.maxlen = args.prod = 1;
3265 if ((error = xfs_alloc_vextent(&args))) 3306 error = xfs_alloc_vextent(&args);
3307 if (error)
3266 goto done; 3308 goto done;
3267 /* 3309
3268 * Can't fail, the space was reserved. 3310 /* Can't fail, the space was reserved. */
3269 */
3270 ASSERT(args.fsbno != NULLFSBLOCK); 3311 ASSERT(args.fsbno != NULLFSBLOCK);
3271 ASSERT(args.len == 1); 3312 ASSERT(args.len == 1);
3272 *firstblock = args.fsbno; 3313 *firstblock = args.fsbno;
3273 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 3314 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3274 bp->b_ops = &xfs_bmbt_buf_ops; 3315
3275 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 3316 /* initialise the block and copy the data */
3317 init_fn(bp, ip, ifp);
3318
3319 /* account for the change in fork size and log everything */
3276 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); 3320 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3277 xfs_bmap_forkoff_reset(args.mp, ip, whichfork); 3321 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3278 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 3322 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -4919,8 +4963,32 @@ xfs_bmapi_write(
4919 XFS_STATS_INC(xs_blk_mapw); 4963 XFS_STATS_INC(xs_blk_mapw);
4920 4964
4921 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 4965 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4966 /*
4967 * XXX (dgc): This assumes we are only called for inodes that
4968 * contain content neutral data in local format. Anything that
4969 * contains caller-specific data in local format that needs
4970 * transformation to move to a block format needs to do the
4971 * conversion to extent format itself.
4972 *
4973 * Directory data forks and attribute forks handle this
4974 * themselves, but with the addition of metadata verifiers every
4975 * data fork in local format now contains caller specific data
4976 * and as such conversion through this function is likely to be
4977 * broken.
4978 *
4979 * The only likely user of this branch is for remote symlinks,
4980 * but we cannot overwrite the data fork contents of the symlink
4981 * (EEXIST occurs higher up the stack) and so it will never go
4982 * from local format to extent format here. Hence I don't think
4983 * this branch is ever executed intentionally and we should
4984 * consider removing it and asserting that xfs_bmapi_write()
4985 * cannot be called directly on local format forks. i.e. callers
4986 * are completely responsible for local to extent format
4987 * conversion, not xfs_bmapi_write().
4988 */
4922 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, 4989 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
4923 &bma.logflags, whichfork); 4990 &bma.logflags, whichfork,
4991 xfs_bmap_local_to_extents_init_fn);
4924 if (error) 4992 if (error)
4925 goto error0; 4993 goto error0;
4926 } 4994 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index fbbb9eb92e32..4e8f0df82d02 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -951,8 +951,6 @@ xfs_buf_trylock(
951 locked = down_trylock(&bp->b_sema) == 0; 951 locked = down_trylock(&bp->b_sema) == 0;
952 if (locked) 952 if (locked)
953 XB_SET_OWNER(bp); 953 XB_SET_OWNER(bp);
954 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
955 xfs_log_force(bp->b_target->bt_mount, 0);
956 954
957 trace_xfs_buf_trylock(bp, _RET_IP_); 955 trace_xfs_buf_trylock(bp, _RET_IP_);
958 return locked; 956 return locked;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 3f9949fee391..cf263476d6b4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
37 return container_of(lip, struct xfs_buf_log_item, bli_item); 37 return container_of(lip, struct xfs_buf_log_item, bli_item);
38} 38}
39 39
40
41#ifdef XFS_TRANS_DEBUG
42/*
43 * This function uses an alternate strategy for tracking the bytes
44 * that the user requests to be logged. This can then be used
45 * in conjunction with the bli_orig array in the buf log item to
46 * catch bugs in our callers' code.
47 *
48 * We also double check the bits set in xfs_buf_item_log using a
49 * simple algorithm to check that every byte is accounted for.
50 */
51STATIC void
52xfs_buf_item_log_debug(
53 xfs_buf_log_item_t *bip,
54 uint first,
55 uint last)
56{
57 uint x;
58 uint byte;
59 uint nbytes;
60 uint chunk_num;
61 uint word_num;
62 uint bit_num;
63 uint bit_set;
64 uint *wordp;
65
66 ASSERT(bip->bli_logged != NULL);
67 byte = first;
68 nbytes = last - first + 1;
69 bfset(bip->bli_logged, first, nbytes);
70 for (x = 0; x < nbytes; x++) {
71 chunk_num = byte >> XFS_BLF_SHIFT;
72 word_num = chunk_num >> BIT_TO_WORD_SHIFT;
73 bit_num = chunk_num & (NBWORD - 1);
74 wordp = &(bip->__bli_format.blf_data_map[word_num]);
75 bit_set = *wordp & (1 << bit_num);
76 ASSERT(bit_set);
77 byte++;
78 }
79}
80
81/*
82 * This function is called when we flush something into a buffer without
83 * logging it. This happens for things like inodes which are logged
84 * separately from the buffer.
85 */
86void
87xfs_buf_item_flush_log_debug(
88 xfs_buf_t *bp,
89 uint first,
90 uint last)
91{
92 xfs_buf_log_item_t *bip = bp->b_fspriv;
93 uint nbytes;
94
95 if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
96 return;
97
98 ASSERT(bip->bli_logged != NULL);
99 nbytes = last - first + 1;
100 bfset(bip->bli_logged, first, nbytes);
101}
102
103/*
104 * This function is called to verify that our callers have logged
105 * all the bytes that they changed.
106 *
107 * It does this by comparing the original copy of the buffer stored in
108 * the buf log item's bli_orig array to the current copy of the buffer
109 * and ensuring that all bytes which mismatch are set in the bli_logged
110 * array of the buf log item.
111 */
112STATIC void
113xfs_buf_item_log_check(
114 xfs_buf_log_item_t *bip)
115{
116 char *orig;
117 char *buffer;
118 int x;
119 xfs_buf_t *bp;
120
121 ASSERT(bip->bli_orig != NULL);
122 ASSERT(bip->bli_logged != NULL);
123
124 bp = bip->bli_buf;
125 ASSERT(bp->b_length > 0);
126 ASSERT(bp->b_addr != NULL);
127 orig = bip->bli_orig;
128 buffer = bp->b_addr;
129 for (x = 0; x < BBTOB(bp->b_length); x++) {
130 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
131 xfs_emerg(bp->b_mount,
132 "%s: bip %x buffer %x orig %x index %d",
133 __func__, bip, bp, orig, x);
134 ASSERT(0);
135 }
136 }
137}
138#else
139#define xfs_buf_item_log_debug(x,y,z)
140#define xfs_buf_item_log_check(x)
141#endif
142
143STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); 40STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
144 41
145/* 42/*
@@ -429,7 +326,6 @@ xfs_buf_item_format(
429 * Check to make sure everything is consistent. 326 * Check to make sure everything is consistent.
430 */ 327 */
431 trace_xfs_buf_item_format(bip); 328 trace_xfs_buf_item_format(bip);
432 xfs_buf_item_log_check(bip);
433} 329}
434 330
435/* 331/*
@@ -573,8 +469,18 @@ xfs_buf_item_push(
573 469
574 if (xfs_buf_ispinned(bp)) 470 if (xfs_buf_ispinned(bp))
575 return XFS_ITEM_PINNED; 471 return XFS_ITEM_PINNED;
576 if (!xfs_buf_trylock(bp)) 472 if (!xfs_buf_trylock(bp)) {
473 /*
474 * If we have just raced with a buffer being pinned and it has
475 * been marked stale, we could end up stalling until someone else
476 * issues a log force to unpin the stale buffer. Check for the
477 * race condition here so xfsaild recognizes the buffer is pinned
478 * and queues a log force to move it along.
479 */
480 if (xfs_buf_ispinned(bp))
481 return XFS_ITEM_PINNED;
577 return XFS_ITEM_LOCKED; 482 return XFS_ITEM_LOCKED;
483 }
578 484
579 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 485 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
580 486
@@ -923,8 +829,6 @@ xfs_buf_item_log_segment(
923 mask = (1 << end_bit) - 1; 829 mask = (1 << end_bit) - 1;
924 *wordp |= mask; 830 *wordp |= mask;
925 } 831 }
926
927 xfs_buf_item_log_debug(bip, first, last);
928} 832}
929 833
930/* 834/*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 16def435944a..ee36c88ecfde 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -98,10 +98,6 @@ typedef struct xfs_buf_log_item {
98 unsigned int bli_flags; /* misc flags */ 98 unsigned int bli_flags; /* misc flags */
99 unsigned int bli_recur; /* lock recursion count */ 99 unsigned int bli_recur; /* lock recursion count */
100 atomic_t bli_refcount; /* cnt of tp refs */ 100 atomic_t bli_refcount; /* cnt of tp refs */
101#ifdef XFS_TRANS_DEBUG
102 char *bli_orig; /* original buffer copy */
103 char *bli_logged; /* bytes logged (bitmap) */
104#endif
105 int bli_format_count; /* count of headers */ 101 int bli_format_count; /* count of headers */
106 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 102 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
107 struct xfs_buf_log_format __bli_format; /* embedded in-log header */ 103 struct xfs_buf_log_format __bli_format; /* embedded in-log header */
@@ -117,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
117void xfs_buf_iodone_callbacks(struct xfs_buf *); 113void xfs_buf_iodone_callbacks(struct xfs_buf *);
118void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 114void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
119 115
120#ifdef XFS_TRANS_DEBUG
121void
122xfs_buf_item_flush_log_debug(
123 struct xfs_buf *bp,
124 uint first,
125 uint last);
126#else
127#define xfs_buf_item_flush_log_debug(bp, first, last)
128#endif
129
130#endif /* __KERNEL__ */ 116#endif /* __KERNEL__ */
131 117
132#endif /* __XFS_BUF_ITEM_H__ */ 118#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9e1bf5294c91..8025eb23ad72 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -612,15 +612,9 @@ xfs_qm_dqread(
612 if (flags & XFS_QMOPT_DQALLOC) { 612 if (flags & XFS_QMOPT_DQALLOC) {
613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), 614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
615 XFS_WRITE_LOG_RES(mp) + 615 XFS_QM_DQALLOC_LOG_RES(mp), 0,
616 /* 616 XFS_TRANS_PERM_LOG_RES,
617 * Round the chunklen up to the next multiple 617 XFS_WRITE_LOG_COUNT);
618 * of 128 (buf log item chunk size)).
619 */
620 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
621 0,
622 XFS_TRANS_PERM_LOG_RES,
623 XFS_WRITE_LOG_COUNT);
624 if (error) 618 if (error)
625 goto error1; 619 goto error1;
626 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 620 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 94eaeedc5498..2866b8c78b7a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -709,8 +709,8 @@ xfs_fs_log_dummy(
709 int error; 709 int error;
710 710
711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); 711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
712 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 712 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
713 XFS_DEFAULT_LOG_COUNT); 713 XFS_DEFAULT_LOG_COUNT);
714 if (error) { 714 if (error) {
715 xfs_trans_cancel(tp, 0); 715 xfs_trans_cancel(tp, 0);
716 return error; 716 return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a815412eab80..515bf71ce01c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc(
279 (args.agbno < be32_to_cpu(agi->agi_length)))) { 279 (args.agbno < be32_to_cpu(agi->agi_length)))) {
280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
281 args.type = XFS_ALLOCTYPE_THIS_BNO; 281 args.type = XFS_ALLOCTYPE_THIS_BNO;
282 args.mod = args.total = args.wasdel = args.isfl =
283 args.userdata = args.minalignslop = 0;
284 args.prod = 1; 282 args.prod = 1;
285 283
286 /* 284 /*
@@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc(
333 * Allocate a fixed-size extent of inodes. 331 * Allocate a fixed-size extent of inodes.
334 */ 332 */
335 args.type = XFS_ALLOCTYPE_NEAR_BNO; 333 args.type = XFS_ALLOCTYPE_NEAR_BNO;
336 args.mod = args.total = args.wasdel = args.isfl =
337 args.userdata = args.minalignslop = 0;
338 args.prod = 1; 334 args.prod = 1;
339 /* 335 /*
340 * Allow space for the inode btree to split. 336 * Allow space for the inode btree to split.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66282dcb821b..4f201656d2d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2379,9 +2379,6 @@ xfs_iflush_fork(
2379 char *cp; 2379 char *cp;
2380 xfs_ifork_t *ifp; 2380 xfs_ifork_t *ifp;
2381 xfs_mount_t *mp; 2381 xfs_mount_t *mp;
2382#ifdef XFS_TRANS_DEBUG
2383 int first;
2384#endif
2385 static const short brootflag[2] = 2382 static const short brootflag[2] =
2386 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2383 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
2387 static const short dataflag[2] = 2384 static const short dataflag[2] =
@@ -2724,9 +2721,6 @@ xfs_iflush_int(
2724 xfs_inode_log_item_t *iip; 2721 xfs_inode_log_item_t *iip;
2725 xfs_dinode_t *dip; 2722 xfs_dinode_t *dip;
2726 xfs_mount_t *mp; 2723 xfs_mount_t *mp;
2727#ifdef XFS_TRANS_DEBUG
2728 int first;
2729#endif
2730 2724
2731 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2732 ASSERT(xfs_isiflocked(ip)); 2726 ASSERT(xfs_isiflocked(ip));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 22baf6ea4fac..237e7f6f2ab3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip)
419static inline void xfs_ifunlock(struct xfs_inode *ip) 419static inline void xfs_ifunlock(struct xfs_inode *ip)
420{ 420{
421 xfs_iflags_clear(ip, XFS_IFLOCK); 421 xfs_iflags_clear(ip, XFS_IFLOCK);
422 smp_mb();
422 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); 423 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
423} 424}
424 425
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d041d47d9d86..f034bd1652f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -269,17 +269,6 @@ xfs_inode_item_format(
269 } else { 269 } else {
270 ASSERT(!(iip->ili_fields & 270 ASSERT(!(iip->ili_fields &
271 XFS_ILOG_DBROOT)); 271 XFS_ILOG_DBROOT));
272#ifdef XFS_TRANS_DEBUG
273 if (iip->ili_root_size > 0) {
274 ASSERT(iip->ili_root_size ==
275 ip->i_df.if_broot_bytes);
276 ASSERT(memcmp(iip->ili_orig_root,
277 ip->i_df.if_broot,
278 iip->ili_root_size) == 0);
279 } else {
280 ASSERT(ip->i_df.if_broot_bytes == 0);
281 }
282#endif
283 iip->ili_fields &= ~XFS_ILOG_DBROOT; 272 iip->ili_fields &= ~XFS_ILOG_DBROOT;
284 } 273 }
285 break; 274 break;
@@ -678,11 +667,6 @@ void
678xfs_inode_item_destroy( 667xfs_inode_item_destroy(
679 xfs_inode_t *ip) 668 xfs_inode_t *ip)
680{ 669{
681#ifdef XFS_TRANS_DEBUG
682 if (ip->i_itemp->ili_root_size != 0) {
683 kmem_free(ip->i_itemp->ili_orig_root);
684 }
685#endif
686 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 670 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
687} 671}
688 672
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 376d4d0b2635..779812fb3d80 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item {
148 data exts */ 148 data exts */
149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
150 attr exts */ 150 attr exts */
151#ifdef XFS_TRANS_DEBUG
152 int ili_root_size;
153 char *ili_orig_root;
154#endif
155 xfs_inode_log_format_t ili_format; /* logged structure */ 151 xfs_inode_log_format_t ili_format; /* logged structure */
156} xfs_inode_log_item_t; 152} xfs_inode_log_item_t;
157 153
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 364818eef40e..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
311} 311}
312 312
313/* 313/*
314 * Determine the initial size of the preallocation. We are beyond the current
315 * EOF here, but we need to take into account whether this is a sparse write or
316 * an extending write when determining the preallocation size. Hence we need to
317 * look up the extent that ends at the current write offset and use the result
318 * to determine the preallocation size.
319 *
320 * If the extent is a hole, then preallocation is essentially disabled.
321 * Otherwise we take the size of the preceeding data extent as the basis for the
322 * preallocation size. If the size of the extent is greater than half the
323 * maximum extent length, then use the current offset as the basis. This ensures
324 * that for large files the preallocation size always extends to MAXEXTLEN
325 * rather than falling short due to things like stripe unit/width alignment of
326 * real extents.
327 */
328STATIC int
329xfs_iomap_eof_prealloc_initial_size(
330 struct xfs_mount *mp,
331 struct xfs_inode *ip,
332 xfs_off_t offset,
333 xfs_bmbt_irec_t *imap,
334 int nimaps)
335{
336 xfs_fileoff_t start_fsb;
337 int imaps = 1;
338 int error;
339
340 ASSERT(nimaps >= imaps);
341
342 /* if we are using a specific prealloc size, return now */
343 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
344 return 0;
345
346 /*
347 * As we write multiple pages, the offset will always align to the
348 * start of a page and hence point to a hole at EOF. i.e. if the size is
349 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
350 * will return FSB 1. Hence if there are blocks in the file, we want to
351 * point to the block prior to the EOF block and not the hole that maps
352 * directly at @offset.
353 */
354 start_fsb = XFS_B_TO_FSB(mp, offset);
355 if (start_fsb)
356 start_fsb--;
357 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
358 if (error)
359 return 0;
360
361 ASSERT(imaps == 1);
362 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount;
366 return XFS_B_TO_FSB(mp, offset);
367}
368
369/*
314 * If we don't have a user specified preallocation size, dynamically increase 370 * If we don't have a user specified preallocation size, dynamically increase
315 * the preallocation size as the size of the file grows. Cap the maximum size 371 * the preallocation size as the size of the file grows. Cap the maximum size
316 * at a single extent or less if the filesystem is near full. The closer the 372 * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
319STATIC xfs_fsblock_t 375STATIC xfs_fsblock_t
320xfs_iomap_prealloc_size( 376xfs_iomap_prealloc_size(
321 struct xfs_mount *mp, 377 struct xfs_mount *mp,
322 struct xfs_inode *ip) 378 struct xfs_inode *ip,
379 xfs_off_t offset,
380 struct xfs_bmbt_irec *imap,
381 int nimaps)
323{ 382{
324 xfs_fsblock_t alloc_blocks = 0; 383 xfs_fsblock_t alloc_blocks = 0;
325 384
326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps);
387 if (alloc_blocks > 0) {
327 int shift = 0; 388 int shift = 0;
328 int64_t freesp; 389 int64_t freesp;
329 390
330 /*
331 * rounddown_pow_of_two() returns an undefined result
332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
333 * ensure we always pass in a non-zero value.
334 */
335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
337 rounddown_pow_of_two(alloc_blocks)); 392 rounddown_pow_of_two(alloc_blocks));
338 393
@@ -399,7 +454,6 @@ xfs_iomap_write_delay(
399 extsz = xfs_get_extsz_hint(ip); 454 extsz = xfs_get_extsz_hint(ip);
400 offset_fsb = XFS_B_TO_FSBT(mp, offset); 455 offset_fsb = XFS_B_TO_FSBT(mp, offset);
401 456
402
403 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 457 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
404 imap, XFS_WRITE_IMAPS, &prealloc); 458 imap, XFS_WRITE_IMAPS, &prealloc);
405 if (error) 459 if (error)
@@ -407,7 +461,10 @@ xfs_iomap_write_delay(
407 461
408retry: 462retry:
409 if (prealloc) { 463 if (prealloc) {
410 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); 464 xfs_fsblock_t alloc_blocks;
465
466 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
467 XFS_WRITE_IMAPS);
411 468
412 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 469 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
413 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 470 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46bd9d52ab51..eec226f78a40 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -120,7 +120,7 @@ xlog_verify_iclog(
120 struct xlog *log, 120 struct xlog *log,
121 struct xlog_in_core *iclog, 121 struct xlog_in_core *iclog,
122 int count, 122 int count,
123 boolean_t syncing); 123 bool syncing);
124STATIC void 124STATIC void
125xlog_verify_tail_lsn( 125xlog_verify_tail_lsn(
126 struct xlog *log, 126 struct xlog *log,
@@ -1737,7 +1737,7 @@ xlog_sync(
1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1739 1739
1740 xlog_verify_iclog(log, iclog, count, B_TRUE); 1740 xlog_verify_iclog(log, iclog, count, true);
1741 1741
1742 /* account for log which doesn't start at block #0 */ 1742 /* account for log which doesn't start at block #0 */
1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -3611,7 +3611,7 @@ xlog_verify_iclog(
3611 struct xlog *log, 3611 struct xlog *log,
3612 struct xlog_in_core *iclog, 3612 struct xlog_in_core *iclog,
3613 int count, 3613 int count,
3614 boolean_t syncing) 3614 bool syncing)
3615{ 3615{
3616 xlog_op_header_t *ophead; 3616 xlog_op_header_t *ophead;
3617 xlog_in_core_t *icptr; 3617 xlog_in_core_t *icptr;
@@ -3659,7 +3659,7 @@ xlog_verify_iclog(
3659 /* clientid is only 1 byte */ 3659 /* clientid is only 1 byte */
3660 field_offset = (__psint_t) 3660 field_offset = (__psint_t)
3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); 3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
3662 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3662 if (!syncing || (field_offset & 0x1ff)) {
3663 clientid = ophead->oh_clientid; 3663 clientid = ophead->oh_clientid;
3664 } else { 3664 } else {
3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); 3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3682,7 +3682,7 @@ xlog_verify_iclog(
3682 /* check length */ 3682 /* check length */
3683 field_offset = (__psint_t) 3683 field_offset = (__psint_t)
3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); 3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
3685 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3685 if (!syncing || (field_offset & 0x1ff)) {
3686 op_len = be32_to_cpu(ophead->oh_len); 3686 op_len = be32_to_cpu(ophead->oh_len);
3687 } else { 3687 } else {
3688 idx = BTOBBT((__psint_t)&ophead->oh_len - 3688 idx = BTOBBT((__psint_t)&ophead->oh_len -
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7d6df7c00c36..3806088a8f77 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags(
1109 return 0; 1109 return 0;
1110 1110
1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1112 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1112 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1113 XFS_DEFAULT_LOG_COUNT); 1113 0, 0, XFS_DEFAULT_LOG_COUNT);
1114 if (error) { 1114 if (error) {
1115 xfs_trans_cancel(tp, 0); 1115 xfs_trans_cancel(tp, 0);
1116 xfs_alert(mp, "%s: Superblock update failed!", __func__); 1116 xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
1583 return 0; 1583 return 0;
1584 1584
1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); 1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1586 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1586 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1587 XFS_DEFAULT_LOG_COUNT); 1587 XFS_DEFAULT_LOG_COUNT);
1588 if (error) { 1588 if (error) {
1589 xfs_trans_cancel(tp, 0); 1589 xfs_trans_cancel(tp, 0);
1590 return error; 1590 return error;
@@ -1945,8 +1945,8 @@ xfs_mount_log_sb(
1945 XFS_SB_VERSIONNUM)); 1945 XFS_SB_VERSIONNUM));
1946 1946
1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1948 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1948 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1949 XFS_DEFAULT_LOG_COUNT); 1949 XFS_DEFAULT_LOG_COUNT);
1950 if (error) { 1950 if (error) {
1951 xfs_trans_cancel(tp, 0); 1951 xfs_trans_cancel(tp, 0);
1952 return error; 1952 return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bab8314507e4..bc907061d392 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -34,12 +34,19 @@ typedef struct xfs_trans_reservations {
34 uint tr_addafork; /* cvt inode to attributed trans */ 34 uint tr_addafork; /* cvt inode to attributed trans */
35 uint tr_writeid; /* write setuid/setgid file */ 35 uint tr_writeid; /* write setuid/setgid file */
36 uint tr_attrinval; /* attr fork buffer invalidation */ 36 uint tr_attrinval; /* attr fork buffer invalidation */
37 uint tr_attrset; /* set/create an attribute */ 37 uint tr_attrsetm; /* set/create an attribute at mount time */
38 uint tr_attrsetrt; /* set/create an attribute at runtime */
38 uint tr_attrrm; /* remove an attribute */ 39 uint tr_attrrm; /* remove an attribute */
39 uint tr_clearagi; /* clear bad agi unlinked ino bucket */ 40 uint tr_clearagi; /* clear bad agi unlinked ino bucket */
40 uint tr_growrtalloc; /* grow realtime allocations */ 41 uint tr_growrtalloc; /* grow realtime allocations */
41 uint tr_growrtzero; /* grow realtime zeroing */ 42 uint tr_growrtzero; /* grow realtime zeroing */
42 uint tr_growrtfree; /* grow realtime freeing */ 43 uint tr_growrtfree; /* grow realtime freeing */
44 uint tr_qm_sbchange; /* change quota flags */
45 uint tr_qm_setqlim; /* adjust quota limits */
46 uint tr_qm_dqalloc; /* allocate quota on disk */
47 uint tr_qm_quotaoff; /* turn quota off */
48 uint tr_qm_equotaoff;/* end of turn quota off */
49 uint tr_sb; /* modify superblock */
43} xfs_trans_reservations_t; 50} xfs_trans_reservations_t;
44 51
45#ifndef __KERNEL__ 52#ifndef __KERNEL__
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 60eff4763156..e5b5cf973781 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes(
1584 int error; 1584 int error;
1585 1585
1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1587 if ((error = xfs_trans_reserve(tp, 0, 1587 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1588 mp->m_sb.sb_sectsize + 128, 0, 1588 0, 0, XFS_DEFAULT_LOG_COUNT);
1589 0, 1589 if (error) {
1590 XFS_DEFAULT_LOG_COUNT))) {
1591 xfs_trans_cancel(tp, 0); 1590 xfs_trans_cancel(tp, 0);
1592 return error; 1591 return error;
1593 } 1592 }
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 6b39115bf145..2d02eac1c9a8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -146,7 +146,7 @@ xfs_qm_newmount(
146 * inode goes inactive and wants to free blocks, 146 * inode goes inactive and wants to free blocks,
147 * or via xfs_log_mount_finish. 147 * or via xfs_log_mount_finish.
148 */ 148 */
149 *needquotamount = B_TRUE; 149 *needquotamount = true;
150 *quotaflags = mp->m_qflags; 150 *quotaflags = mp->m_qflags;
151 mp->m_qflags = 0; 151 mp->m_qflags = 0;
152 } 152 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 8a59f8546552..cf9a34051e07 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -408,10 +408,10 @@ xfs_qm_scall_getqstat(
408{ 408{
409 struct xfs_quotainfo *q = mp->m_quotainfo; 409 struct xfs_quotainfo *q = mp->m_quotainfo;
410 struct xfs_inode *uip, *gip; 410 struct xfs_inode *uip, *gip;
411 boolean_t tempuqip, tempgqip; 411 bool tempuqip, tempgqip;
412 412
413 uip = gip = NULL; 413 uip = gip = NULL;
414 tempuqip = tempgqip = B_FALSE; 414 tempuqip = tempgqip = false;
415 memset(out, 0, sizeof(fs_quota_stat_t)); 415 memset(out, 0, sizeof(fs_quota_stat_t));
416 416
417 out->qs_version = FS_QSTAT_VERSION; 417 out->qs_version = FS_QSTAT_VERSION;
@@ -434,12 +434,12 @@ xfs_qm_scall_getqstat(
434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
436 0, 0, &uip) == 0) 436 0, 0, &uip) == 0)
437 tempuqip = B_TRUE; 437 tempuqip = true;
438 } 438 }
439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
441 0, 0, &gip) == 0) 441 0, 0, &gip) == 0)
442 tempgqip = B_TRUE; 442 tempgqip = true;
443 } 443 }
444 if (uip) { 444 if (uip) {
445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; 445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
@@ -490,8 +490,9 @@ xfs_qm_scall_setqlim(
490 return 0; 490 return 0;
491 491
492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
493 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 493 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
494 0, 0, XFS_DEFAULT_LOG_COUNT))) { 494 0, 0, XFS_DEFAULT_LOG_COUNT);
495 if (error) {
495 xfs_trans_cancel(tp, 0); 496 xfs_trans_cancel(tp, 0);
496 return (error); 497 return (error);
497 } 498 }
@@ -638,8 +639,9 @@ xfs_qm_log_quotaoff_end(
638 639
639 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); 640 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
640 641
641 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, 642 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
642 0, 0, XFS_DEFAULT_LOG_COUNT))) { 643 0, 0, XFS_DEFAULT_LOG_COUNT);
644 if (error) {
643 xfs_trans_cancel(tp, 0); 645 xfs_trans_cancel(tp, 0);
644 return (error); 646 return (error);
645 } 647 }
@@ -671,14 +673,10 @@ xfs_qm_log_quotaoff(
671 uint oldsbqflag=0; 673 uint oldsbqflag=0;
672 674
673 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); 675 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
674 if ((error = xfs_trans_reserve(tp, 0, 676 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
675 sizeof(xfs_qoff_logitem_t) * 2 + 677 0, 0, XFS_DEFAULT_LOG_COUNT);
676 mp->m_sb.sb_sectsize + 128, 678 if (error)
677 0,
678 0,
679 XFS_DEFAULT_LOG_COUNT))) {
680 goto error0; 679 goto error0;
681 }
682 680
683 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); 681 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
684 xfs_trans_log_quotaoff_item(tp, qoffi); 682 xfs_trans_log_quotaoff_item(tp, qoffi);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ab8839b26272..c407121873b4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -139,9 +139,9 @@ static const match_table_t tokens = {
139 139
140 140
141STATIC unsigned long 141STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 142suffix_kstrtoint(char *s, unsigned int base, int *res)
143{ 143{
144 int last, shift_left_factor = 0; 144 int last, shift_left_factor = 0, _res;
145 char *value = s; 145 char *value = s;
146 146
147 last = strlen(value) - 1; 147 last = strlen(value) - 1;
@@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
158 value[last] = '\0'; 158 value[last] = '\0';
159 } 159 }
160 160
161 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 161 if (kstrtoint(s, base, &_res))
162 return -EINVAL;
163 *res = _res << shift_left_factor;
164 return 0;
162} 165}
163 166
164/* 167/*
@@ -174,7 +177,7 @@ xfs_parseargs(
174 char *options) 177 char *options)
175{ 178{
176 struct super_block *sb = mp->m_super; 179 struct super_block *sb = mp->m_super;
177 char *this_char, *value, *eov; 180 char *this_char, *value;
178 int dsunit = 0; 181 int dsunit = 0;
179 int dswidth = 0; 182 int dswidth = 0;
180 int iosize = 0; 183 int iosize = 0;
@@ -230,14 +233,16 @@ xfs_parseargs(
230 this_char); 233 this_char);
231 return EINVAL; 234 return EINVAL;
232 } 235 }
233 mp->m_logbufs = simple_strtoul(value, &eov, 10); 236 if (kstrtoint(value, 10, &mp->m_logbufs))
237 return EINVAL;
234 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 238 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
235 if (!value || !*value) { 239 if (!value || !*value) {
236 xfs_warn(mp, "%s option requires an argument", 240 xfs_warn(mp, "%s option requires an argument",
237 this_char); 241 this_char);
238 return EINVAL; 242 return EINVAL;
239 } 243 }
240 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 244 if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
245 return EINVAL;
241 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 246 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
242 if (!value || !*value) { 247 if (!value || !*value) {
243 xfs_warn(mp, "%s option requires an argument", 248 xfs_warn(mp, "%s option requires an argument",
@@ -266,7 +271,8 @@ xfs_parseargs(
266 this_char); 271 this_char);
267 return EINVAL; 272 return EINVAL;
268 } 273 }
269 iosize = simple_strtoul(value, &eov, 10); 274 if (kstrtoint(value, 10, &iosize))
275 return EINVAL;
270 iosizelog = ffs(iosize) - 1; 276 iosizelog = ffs(iosize) - 1;
271 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 277 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
272 if (!value || !*value) { 278 if (!value || !*value) {
@@ -274,7 +280,8 @@ xfs_parseargs(
274 this_char); 280 this_char);
275 return EINVAL; 281 return EINVAL;
276 } 282 }
277 iosize = suffix_strtoul(value, &eov, 10); 283 if (suffix_kstrtoint(value, 10, &iosize))
284 return EINVAL;
278 iosizelog = ffs(iosize) - 1; 285 iosizelog = ffs(iosize) - 1;
279 } else if (!strcmp(this_char, MNTOPT_GRPID) || 286 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
280 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 287 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -296,14 +303,16 @@ xfs_parseargs(
296 this_char); 303 this_char);
297 return EINVAL; 304 return EINVAL;
298 } 305 }
299 dsunit = simple_strtoul(value, &eov, 10); 306 if (kstrtoint(value, 10, &dsunit))
307 return EINVAL;
300 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 308 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
301 if (!value || !*value) { 309 if (!value || !*value) {
302 xfs_warn(mp, "%s option requires an argument", 310 xfs_warn(mp, "%s option requires an argument",
303 this_char); 311 this_char);
304 return EINVAL; 312 return EINVAL;
305 } 313 }
306 dswidth = simple_strtoul(value, &eov, 10); 314 if (kstrtoint(value, 10, &dswidth))
315 return EINVAL;
307 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { 316 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
308 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 317 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
309 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 318 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06ed520a767f..2fd7c1ff1d21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -37,14 +37,45 @@
37#include "xfs_extent_busy.h" 37#include "xfs_extent_busy.h"
38#include "xfs_bmap.h" 38#include "xfs_bmap.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_qm.h"
40#include "xfs_trans_priv.h" 41#include "xfs_trans_priv.h"
41#include "xfs_trans_space.h" 42#include "xfs_trans_space.h"
42#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_log_priv.h"
45#include "xfs_buf_item.h"
43#include "xfs_trace.h" 46#include "xfs_trace.h"
44 47
45kmem_zone_t *xfs_trans_zone; 48kmem_zone_t *xfs_trans_zone;
46kmem_zone_t *xfs_log_item_desc_zone; 49kmem_zone_t *xfs_log_item_desc_zone;
47 50
51/*
52 * A buffer has a format structure overhead in the log in addition
53 * to the data, so we need to take this into account when reserving
54 * space in a transaction for a buffer. Round the space required up
55 * to a multiple of 128 bytes so that we don't change the historical
56 * reservation that has been used for this overhead.
57 */
58STATIC uint
59xfs_buf_log_overhead(void)
60{
61 return round_up(sizeof(struct xlog_op_header) +
62 sizeof(struct xfs_buf_log_format), 128);
63}
64
65/*
66 * Calculate out transaction log reservation per item in bytes.
67 *
68 * The nbufs argument is used to indicate the number of items that
69 * will be changed in a transaction. size is used to tell how many
70 * bytes should be reserved per item.
71 */
72STATIC uint
73xfs_calc_buf_res(
74 uint nbufs,
75 uint size)
76{
77 return nbufs * (size + xfs_buf_log_overhead());
78}
48 79
49/* 80/*
50 * Various log reservation values. 81 * Various log reservation values.
@@ -85,18 +116,15 @@ xfs_calc_write_reservation(
85 struct xfs_mount *mp) 116 struct xfs_mount *mp)
86{ 117{
87 return XFS_DQUOT_LOGRES(mp) + 118 return XFS_DQUOT_LOGRES(mp) +
88 MAX((mp->m_sb.sb_inodesize + 119 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
89 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
90 2 * mp->m_sb.sb_sectsize + 121 XFS_FSB_TO_B(mp, 1)) +
91 mp->m_sb.sb_sectsize + 122 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
92 XFS_ALLOCFREE_LOG_RES(mp, 2) + 123 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
93 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 124 XFS_FSB_TO_B(mp, 1))),
94 XFS_ALLOCFREE_LOG_COUNT(mp, 2))), 125 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
95 (2 * mp->m_sb.sb_sectsize + 126 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
96 2 * mp->m_sb.sb_sectsize + 127 XFS_FSB_TO_B(mp, 1))));
97 mp->m_sb.sb_sectsize +
98 XFS_ALLOCFREE_LOG_RES(mp, 2) +
99 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
100} 128}
101 129
102/* 130/*
@@ -117,18 +145,17 @@ xfs_calc_itruncate_reservation(
117 struct xfs_mount *mp) 145 struct xfs_mount *mp)
118{ 146{
119 return XFS_DQUOT_LOGRES(mp) + 147 return XFS_DQUOT_LOGRES(mp) +
120 MAX((mp->m_sb.sb_inodesize + 148 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
121 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + 149 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
122 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 150 XFS_FSB_TO_B(mp, 1))),
123 (4 * mp->m_sb.sb_sectsize + 151 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
124 4 * mp->m_sb.sb_sectsize + 152 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
125 mp->m_sb.sb_sectsize + 153 XFS_FSB_TO_B(mp, 1)) +
126 XFS_ALLOCFREE_LOG_RES(mp, 4) + 154 xfs_calc_buf_res(5, 0) +
127 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + 155 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
128 128 * 5 + 156 XFS_FSB_TO_B(mp, 1)) +
129 XFS_ALLOCFREE_LOG_RES(mp, 1) + 157 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
130 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 158 mp->m_in_maxlevels, 0)));
131 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
132} 159}
133 160
134/* 161/*
@@ -148,14 +175,12 @@ xfs_calc_rename_reservation(
148 struct xfs_mount *mp) 175 struct xfs_mount *mp)
149{ 176{
150 return XFS_DQUOT_LOGRES(mp) + 177 return XFS_DQUOT_LOGRES(mp) +
151 MAX((4 * mp->m_sb.sb_inodesize + 178 MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
152 2 * XFS_DIROP_LOG_RES(mp) + 179 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
153 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), 180 XFS_FSB_TO_B(mp, 1))),
154 (3 * mp->m_sb.sb_sectsize + 181 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
155 3 * mp->m_sb.sb_sectsize + 182 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
156 mp->m_sb.sb_sectsize + 183 XFS_FSB_TO_B(mp, 1))));
157 XFS_ALLOCFREE_LOG_RES(mp, 3) +
158 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
159} 184}
160 185
161/* 186/*
@@ -175,15 +200,12 @@ xfs_calc_link_reservation(
175 struct xfs_mount *mp) 200 struct xfs_mount *mp)
176{ 201{
177 return XFS_DQUOT_LOGRES(mp) + 202 return XFS_DQUOT_LOGRES(mp) +
178 MAX((mp->m_sb.sb_inodesize + 203 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
179 mp->m_sb.sb_inodesize + 204 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
180 XFS_DIROP_LOG_RES(mp) + 205 XFS_FSB_TO_B(mp, 1))),
181 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 206 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
182 (mp->m_sb.sb_sectsize + 207 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
183 mp->m_sb.sb_sectsize + 208 XFS_FSB_TO_B(mp, 1))));
184 mp->m_sb.sb_sectsize +
185 XFS_ALLOCFREE_LOG_RES(mp, 1) +
186 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
187} 209}
188 210
189/* 211/*
@@ -203,15 +225,12 @@ xfs_calc_remove_reservation(
203 struct xfs_mount *mp) 225 struct xfs_mount *mp)
204{ 226{
205 return XFS_DQUOT_LOGRES(mp) + 227 return XFS_DQUOT_LOGRES(mp) +
206 MAX((mp->m_sb.sb_inodesize + 228 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
207 mp->m_sb.sb_inodesize + 229 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
208 XFS_DIROP_LOG_RES(mp) + 230 XFS_FSB_TO_B(mp, 1))),
209 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 231 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
210 (2 * mp->m_sb.sb_sectsize + 232 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
211 2 * mp->m_sb.sb_sectsize + 233 XFS_FSB_TO_B(mp, 1))));
212 mp->m_sb.sb_sectsize +
213 XFS_ALLOCFREE_LOG_RES(mp, 2) +
214 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
215} 234}
216 235
217/* 236/*
@@ -233,18 +252,18 @@ xfs_calc_symlink_reservation(
233 struct xfs_mount *mp) 252 struct xfs_mount *mp)
234{ 253{
235 return XFS_DQUOT_LOGRES(mp) + 254 return XFS_DQUOT_LOGRES(mp) +
236 MAX((mp->m_sb.sb_inodesize + 255 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
237 mp->m_sb.sb_inodesize + 256 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
238 XFS_FSB_TO_B(mp, 1) + 257 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
239 XFS_DIROP_LOG_RES(mp) + 258 XFS_FSB_TO_B(mp, 1)) +
240 1024 + 259 xfs_calc_buf_res(1, 1024)),
241 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), 260 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
242 (2 * mp->m_sb.sb_sectsize + 261 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
243 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 262 XFS_FSB_TO_B(mp, 1)) +
244 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 263 xfs_calc_buf_res(mp->m_in_maxlevels,
245 XFS_ALLOCFREE_LOG_RES(mp, 1) + 264 XFS_FSB_TO_B(mp, 1)) +
246 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 265 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
247 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 266 XFS_FSB_TO_B(mp, 1))));
248} 267}
249 268
250/* 269/*
@@ -267,18 +286,19 @@ xfs_calc_create_reservation(
267 struct xfs_mount *mp) 286 struct xfs_mount *mp)
268{ 287{
269 return XFS_DQUOT_LOGRES(mp) + 288 return XFS_DQUOT_LOGRES(mp) +
270 MAX((mp->m_sb.sb_inodesize + 289 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
271 mp->m_sb.sb_inodesize + 290 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
291 (uint)XFS_FSB_TO_B(mp, 1) +
292 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
293 XFS_FSB_TO_B(mp, 1))),
294 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
272 mp->m_sb.sb_sectsize + 295 mp->m_sb.sb_sectsize +
273 XFS_FSB_TO_B(mp, 1) + 296 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
274 XFS_DIROP_LOG_RES(mp) + 297 XFS_FSB_TO_B(mp, 1)) +
275 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), 298 xfs_calc_buf_res(mp->m_in_maxlevels,
276 (3 * mp->m_sb.sb_sectsize + 299 XFS_FSB_TO_B(mp, 1)) +
277 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 300 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
278 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 301 XFS_FSB_TO_B(mp, 1))));
279 XFS_ALLOCFREE_LOG_RES(mp, 1) +
280 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
281 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
282} 302}
283 303
284/* 304/*
@@ -306,16 +326,16 @@ xfs_calc_ifree_reservation(
306 struct xfs_mount *mp) 326 struct xfs_mount *mp)
307{ 327{
308 return XFS_DQUOT_LOGRES(mp) + 328 return XFS_DQUOT_LOGRES(mp) +
309 mp->m_sb.sb_inodesize + 329 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
310 mp->m_sb.sb_sectsize + 330 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
311 mp->m_sb.sb_sectsize + 331 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
312 XFS_FSB_TO_B(mp, 1) +
313 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), 332 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
314 XFS_INODE_CLUSTER_SIZE(mp)) + 333 XFS_INODE_CLUSTER_SIZE(mp)) +
315 128 * 5 + 334 xfs_calc_buf_res(1, 0) +
316 XFS_ALLOCFREE_LOG_RES(mp, 1) + 335 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
317 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 336 mp->m_in_maxlevels, 0) +
318 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 337 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
338 XFS_FSB_TO_B(mp, 1));
319} 339}
320 340
321/* 341/*
@@ -343,9 +363,9 @@ STATIC uint
343xfs_calc_growdata_reservation( 363xfs_calc_growdata_reservation(
344 struct xfs_mount *mp) 364 struct xfs_mount *mp)
345{ 365{
346 return mp->m_sb.sb_sectsize * 3 + 366 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
347 XFS_ALLOCFREE_LOG_RES(mp, 1) + 367 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
348 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 368 XFS_FSB_TO_B(mp, 1));
349} 369}
350 370
351/* 371/*
@@ -362,12 +382,12 @@ STATIC uint
362xfs_calc_growrtalloc_reservation( 382xfs_calc_growrtalloc_reservation(
363 struct xfs_mount *mp) 383 struct xfs_mount *mp)
364{ 384{
365 return 2 * mp->m_sb.sb_sectsize + 385 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
366 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 386 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
367 mp->m_sb.sb_inodesize + 387 XFS_FSB_TO_B(mp, 1)) +
368 XFS_ALLOCFREE_LOG_RES(mp, 1) + 388 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
369 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 389 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
370 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 390 XFS_FSB_TO_B(mp, 1));
371} 391}
372 392
373/* 393/*
@@ -379,7 +399,7 @@ STATIC uint
379xfs_calc_growrtzero_reservation( 399xfs_calc_growrtzero_reservation(
380 struct xfs_mount *mp) 400 struct xfs_mount *mp)
381{ 401{
382 return mp->m_sb.sb_blocksize + 128; 402 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
383} 403}
384 404
385/* 405/*
@@ -396,11 +416,10 @@ STATIC uint
396xfs_calc_growrtfree_reservation( 416xfs_calc_growrtfree_reservation(
397 struct xfs_mount *mp) 417 struct xfs_mount *mp)
398{ 418{
399 return mp->m_sb.sb_sectsize + 419 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
400 2 * mp->m_sb.sb_inodesize + 420 xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
401 mp->m_sb.sb_blocksize + 421 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
402 mp->m_rsumsize + 422 xfs_calc_buf_res(1, mp->m_rsumsize);
403 128 * 5;
404} 423}
405 424
406/* 425/*
@@ -411,7 +430,7 @@ STATIC uint
411xfs_calc_swrite_reservation( 430xfs_calc_swrite_reservation(
412 struct xfs_mount *mp) 431 struct xfs_mount *mp)
413{ 432{
414 return mp->m_sb.sb_inodesize + 128; 433 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
415} 434}
416 435
417/* 436/*
@@ -421,7 +440,7 @@ xfs_calc_swrite_reservation(
421STATIC uint 440STATIC uint
422xfs_calc_writeid_reservation(xfs_mount_t *mp) 441xfs_calc_writeid_reservation(xfs_mount_t *mp)
423{ 442{
424 return mp->m_sb.sb_inodesize + 128; 443 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
425} 444}
426 445
427/* 446/*
@@ -437,13 +456,13 @@ xfs_calc_addafork_reservation(
437 struct xfs_mount *mp) 456 struct xfs_mount *mp)
438{ 457{
439 return XFS_DQUOT_LOGRES(mp) + 458 return XFS_DQUOT_LOGRES(mp) +
440 mp->m_sb.sb_inodesize + 459 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
441 mp->m_sb.sb_sectsize * 2 + 460 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
442 mp->m_dirblksize + 461 xfs_calc_buf_res(1, mp->m_dirblksize) +
443 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + 462 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
444 XFS_ALLOCFREE_LOG_RES(mp, 1) + 463 XFS_FSB_TO_B(mp, 1)) +
445 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + 464 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
446 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 465 XFS_FSB_TO_B(mp, 1));
447} 466}
448 467
449/* 468/*
@@ -461,35 +480,51 @@ STATIC uint
461xfs_calc_attrinval_reservation( 480xfs_calc_attrinval_reservation(
462 struct xfs_mount *mp) 481 struct xfs_mount *mp)
463{ 482{
464 return MAX((mp->m_sb.sb_inodesize + 483 return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
465 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 484 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
466 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), 485 XFS_FSB_TO_B(mp, 1))),
467 (4 * mp->m_sb.sb_sectsize + 486 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
468 4 * mp->m_sb.sb_sectsize + 487 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
469 mp->m_sb.sb_sectsize + 488 XFS_FSB_TO_B(mp, 1))));
470 XFS_ALLOCFREE_LOG_RES(mp, 4) +
471 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
472} 489}
473 490
474/* 491/*
475 * Setting an attribute. 492 * Setting an attribute at mount time.
476 * the inode getting the attribute 493 * the inode getting the attribute
477 * the superblock for allocations 494 * the superblock for allocations
478 * the agfs extents are allocated from 495 * the agfs extents are allocated from
479 * the attribute btree * max depth 496 * the attribute btree * max depth
480 * the inode allocation btree 497 * the inode allocation btree
481 * Since attribute transaction space is dependent on the size of the attribute, 498 * Since attribute transaction space is dependent on the size of the attribute,
482 * the calculation is done partially at mount time and partially at runtime. 499 * the calculation is done partially at mount time and partially at runtime(see
500 * below).
483 */ 501 */
484STATIC uint 502STATIC uint
485xfs_calc_attrset_reservation( 503xfs_calc_attrsetm_reservation(
486 struct xfs_mount *mp) 504 struct xfs_mount *mp)
487{ 505{
488 return XFS_DQUOT_LOGRES(mp) + 506 return XFS_DQUOT_LOGRES(mp) +
489 mp->m_sb.sb_inodesize + 507 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
490 mp->m_sb.sb_sectsize + 508 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
491 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 509 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
492 128 * (2 + XFS_DA_NODE_MAXDEPTH); 510}
511
512/*
513 * Setting an attribute at runtime, transaction space unit per block.
514 * the superblock for allocations: sector size
515 * the inode bmap btree could join or split: max depth * block size
516 * Since the runtime attribute transaction space is dependent on the total
517 * blocks needed for the 1st bmap, here we calculate out the space unit for
518 * one block so that the caller could figure out the total space according
519 * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
520 */
521STATIC uint
522xfs_calc_attrsetrt_reservation(
523 struct xfs_mount *mp)
524{
525 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
526 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
527 XFS_FSB_TO_B(mp, 1));
493} 528}
494 529
495/* 530/*
@@ -508,16 +543,15 @@ xfs_calc_attrrm_reservation(
508 struct xfs_mount *mp) 543 struct xfs_mount *mp)
509{ 544{
510 return XFS_DQUOT_LOGRES(mp) + 545 return XFS_DQUOT_LOGRES(mp) +
511 MAX((mp->m_sb.sb_inodesize + 546 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
512 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 547 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
513 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 548 XFS_FSB_TO_B(mp, 1)) +
514 128 * (1 + XFS_DA_NODE_MAXDEPTH + 549 (uint)XFS_FSB_TO_B(mp,
515 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 550 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
516 (2 * mp->m_sb.sb_sectsize + 551 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
517 2 * mp->m_sb.sb_sectsize + 552 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
518 mp->m_sb.sb_sectsize + 553 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
519 XFS_ALLOCFREE_LOG_RES(mp, 2) + 554 XFS_FSB_TO_B(mp, 1))));
520 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
521} 555}
522 556
523/* 557/*
@@ -527,7 +561,78 @@ STATIC uint
527xfs_calc_clear_agi_bucket_reservation( 561xfs_calc_clear_agi_bucket_reservation(
528 struct xfs_mount *mp) 562 struct xfs_mount *mp)
529{ 563{
530 return mp->m_sb.sb_sectsize + 128; 564 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
565}
566
567/*
568 * Clearing the quotaflags in the superblock.
569 * the super block for changing quota flags: sector size
570 */
571STATIC uint
572xfs_calc_qm_sbchange_reservation(
573 struct xfs_mount *mp)
574{
575 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
576}
577
578/*
579 * Adjusting quota limits.
580 * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
581 */
582STATIC uint
583xfs_calc_qm_setqlim_reservation(
584 struct xfs_mount *mp)
585{
586 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
587}
588
589/*
590 * Allocating quota on disk if needed.
591 * the write transaction log space: XFS_WRITE_LOG_RES(mp)
592 * the unit of quota allocation: one system block size
593 */
594STATIC uint
595xfs_calc_qm_dqalloc_reservation(
596 struct xfs_mount *mp)
597{
598 return XFS_WRITE_LOG_RES(mp) +
599 xfs_calc_buf_res(1,
600 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
601}
602
603/*
604 * Turning off quotas.
605 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
606 * the superblock for the quota flags: sector size
607 */
608STATIC uint
609xfs_calc_qm_quotaoff_reservation(
610 struct xfs_mount *mp)
611{
612 return sizeof(struct xfs_qoff_logitem) * 2 +
613 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
614}
615
616/*
617 * End of turning off quotas.
618 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
619 */
620STATIC uint
621xfs_calc_qm_quotaoff_end_reservation(
622 struct xfs_mount *mp)
623{
624 return sizeof(struct xfs_qoff_logitem) * 2;
625}
626
627/*
628 * Syncing the incore super block changes to disk.
629 * the super block to reflect the changes: sector size
630 */
631STATIC uint
632xfs_calc_sb_reservation(
633 struct xfs_mount *mp)
634{
635 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
531} 636}
532 637
533/* 638/*
@@ -555,12 +660,19 @@ xfs_trans_init(
555 resp->tr_writeid = xfs_calc_writeid_reservation(mp); 660 resp->tr_writeid = xfs_calc_writeid_reservation(mp);
556 resp->tr_addafork = xfs_calc_addafork_reservation(mp); 661 resp->tr_addafork = xfs_calc_addafork_reservation(mp);
557 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); 662 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
558 resp->tr_attrset = xfs_calc_attrset_reservation(mp); 663 resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
664 resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
559 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); 665 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
560 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); 666 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
561 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); 667 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
562 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); 668 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
563 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); 669 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
670 resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
671 resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
672 resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
673 resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
674 resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
675 resp->tr_sb = xfs_calc_sb_reservation(mp);
564} 676}
565 677
566/* 678/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6c0601abd7a..cd29f6171021 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -252,17 +252,19 @@ struct xfs_log_item_desc {
252 * as long as SWRITE logs the entire inode core 252 * as long as SWRITE logs the entire inode core
253 */ 253 */
254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
258#define XFS_ATTRSET_LOG_RES(mp, ext) \ 258#define XFS_ATTRSETM_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetm)
259 ((mp)->m_reservations.tr_attrset + \ 259#define XFS_ATTRSETRT_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetrt)
260 (ext * (mp)->m_sb.sb_sectsize) + \ 260#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
261 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
262 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
263#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
264#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 261#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
265 262#define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange)
263#define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim)
264#define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc)
265#define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff)
266#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff)
267#define XFS_SB_LOG_RES(mp) ((mp)->m_reservations.tr_sb)
266 268
267/* 269/*
268 * Various log count values. 270 * Various log count values.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6011ee661339..0eda7254305f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -55,20 +55,6 @@ xfs_ail_check(
55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
56 56
57 57
58#ifdef XFS_TRANS_DEBUG
59 /*
60 * Walk the list checking lsn ordering, and that every entry has the
61 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
62 * when specifically debugging the transaction subsystem.
63 */
64 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
65 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
66 if (&prev_lip->li_ail != &ailp->xa_ail)
67 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
68 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
69 prev_lip = lip;
70 }
71#endif /* XFS_TRANS_DEBUG */
72} 58}
73#else /* !DEBUG */ 59#else /* !DEBUG */
74#define xfs_ail_check(a,l) 60#define xfs_ail_check(a,l)
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 0c7fa54f309e..642c2d6e1db1 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots(
516 int i, j; 516 int i, j;
517 xfs_dquot_t *dqp; 517 xfs_dquot_t *dqp;
518 xfs_dqtrx_t *qtrx, *qa; 518 xfs_dqtrx_t *qtrx, *qa;
519 boolean_t locked; 519 bool locked;
520 520
521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) 521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
522 return; 522 return;
@@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots(
537 * about the number of blocks used field, or deltas. 537 * about the number of blocks used field, or deltas.
538 * Also we don't bother to zero the fields. 538 * Also we don't bother to zero the fields.
539 */ 539 */
540 locked = B_FALSE; 540 locked = false;
541 if (qtrx->qt_blk_res) { 541 if (qtrx->qt_blk_res) {
542 xfs_dqlock(dqp); 542 xfs_dqlock(dqp);
543 locked = B_TRUE; 543 locked = true;
544 dqp->q_res_bcount -= 544 dqp->q_res_bcount -=
545 (xfs_qcnt_t)qtrx->qt_blk_res; 545 (xfs_qcnt_t)qtrx->qt_blk_res;
546 } 546 }
547 if (qtrx->qt_ino_res) { 547 if (qtrx->qt_ino_res) {
548 if (!locked) { 548 if (!locked) {
549 xfs_dqlock(dqp); 549 xfs_dqlock(dqp);
550 locked = B_TRUE; 550 locked = true;
551 } 551 }
552 dqp->q_res_icount -= 552 dqp->q_res_icount -=
553 (xfs_qcnt_t)qtrx->qt_ino_res; 553 (xfs_qcnt_t)qtrx->qt_ino_res;
@@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots(
556 if (qtrx->qt_rtblk_res) { 556 if (qtrx->qt_rtblk_res) {
557 if (!locked) { 557 if (!locked) {
558 xfs_dqlock(dqp); 558 xfs_dqlock(dqp);
559 locked = B_TRUE; 559 locked = true;
560 } 560 }
561 dqp->q_res_rtbcount -= 561 dqp->q_res_rtbcount -=
562 (xfs_qcnt_t)qtrx->qt_rtblk_res; 562 (xfs_qcnt_t)qtrx->qt_rtblk_res;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index d2eee20d5f5b..ac6d567704db 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -33,14 +33,6 @@
33#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35 35
36#ifdef XFS_TRANS_DEBUG
37STATIC void
38xfs_trans_inode_broot_debug(
39 xfs_inode_t *ip);
40#else
41#define xfs_trans_inode_broot_debug(ip)
42#endif
43
44/* 36/*
45 * Add a locked inode to the transaction. 37 * Add a locked inode to the transaction.
46 * 38 *
@@ -67,8 +59,6 @@ xfs_trans_ijoin(
67 * Get a log_item_desc to point at the new item. 59 * Get a log_item_desc to point at the new item.
68 */ 60 */
69 xfs_trans_add_item(tp, &iip->ili_item); 61 xfs_trans_add_item(tp, &iip->ili_item);
70
71 xfs_trans_inode_broot_debug(ip);
72} 62}
73 63
74/* 64/*
@@ -135,34 +125,3 @@ xfs_trans_log_inode(
135 flags |= ip->i_itemp->ili_last_fields; 125 flags |= ip->i_itemp->ili_last_fields;
136 ip->i_itemp->ili_fields |= flags; 126 ip->i_itemp->ili_fields |= flags;
137} 127}
138
139#ifdef XFS_TRANS_DEBUG
140/*
141 * Keep track of the state of the inode btree root to make sure we
142 * log it properly.
143 */
144STATIC void
145xfs_trans_inode_broot_debug(
146 xfs_inode_t *ip)
147{
148 xfs_inode_log_item_t *iip;
149
150 ASSERT(ip->i_itemp != NULL);
151 iip = ip->i_itemp;
152 if (iip->ili_root_size != 0) {
153 ASSERT(iip->ili_orig_root != NULL);
154 kmem_free(iip->ili_orig_root);
155 iip->ili_root_size = 0;
156 iip->ili_orig_root = NULL;
157 }
158 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
159 ASSERT((ip->i_df.if_broot != NULL) &&
160 (ip->i_df.if_broot_bytes > 0));
161 iip->ili_root_size = ip->i_df.if_broot_bytes;
162 iip->ili_orig_root =
163 (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
164 memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
165 iip->ili_root_size);
166 }
167}
168#endif
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 7a41874f4c20..61ba1cfa974c 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -32,7 +32,6 @@ typedef unsigned int __uint32_t;
32typedef signed long long int __int64_t; 32typedef signed long long int __int64_t;
33typedef unsigned long long int __uint64_t; 33typedef unsigned long long int __uint64_t;
34 34
35typedef enum { B_FALSE,B_TRUE } boolean_t;
36typedef __uint32_t prid_t; /* project ID */ 35typedef __uint32_t prid_t; /* project ID */
37typedef __uint32_t inst_t; /* an instruction */ 36typedef __uint32_t inst_t; /* an instruction */
38 37
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d95f565a390e..77ad74834baa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -725,7 +725,7 @@ xfs_create(
725 int error; 725 int error;
726 xfs_bmap_free_t free_list; 726 xfs_bmap_free_t free_list;
727 xfs_fsblock_t first_block; 727 xfs_fsblock_t first_block;
728 boolean_t unlock_dp_on_error = B_FALSE; 728 bool unlock_dp_on_error = false;
729 uint cancel_flags; 729 uint cancel_flags;
730 int committed; 730 int committed;
731 prid_t prid; 731 prid_t prid;
@@ -794,7 +794,7 @@ xfs_create(
794 } 794 }
795 795
796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
797 unlock_dp_on_error = B_TRUE; 797 unlock_dp_on_error = true;
798 798
799 xfs_bmap_init(&free_list, &first_block); 799 xfs_bmap_init(&free_list, &first_block);
800 800
@@ -830,7 +830,7 @@ xfs_create(
830 * error path. 830 * error path.
831 */ 831 */
832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
833 unlock_dp_on_error = B_FALSE; 833 unlock_dp_on_error = false;
834 834
835 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 835 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
836 &first_block, &free_list, resblks ? 836 &first_block, &free_list, resblks ?
@@ -1367,7 +1367,7 @@ xfs_symlink(
1367 int pathlen; 1367 int pathlen;
1368 xfs_bmap_free_t free_list; 1368 xfs_bmap_free_t free_list;
1369 xfs_fsblock_t first_block; 1369 xfs_fsblock_t first_block;
1370 boolean_t unlock_dp_on_error = B_FALSE; 1370 bool unlock_dp_on_error = false;
1371 uint cancel_flags; 1371 uint cancel_flags;
1372 int committed; 1372 int committed;
1373 xfs_fileoff_t first_fsb; 1373 xfs_fileoff_t first_fsb;
@@ -1438,7 +1438,7 @@ xfs_symlink(
1438 } 1438 }
1439 1439
1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1441 unlock_dp_on_error = B_TRUE; 1441 unlock_dp_on_error = true;
1442 1442
1443 /* 1443 /*
1444 * Check whether the directory allows new symlinks or not. 1444 * Check whether the directory allows new symlinks or not.
@@ -1484,7 +1484,7 @@ xfs_symlink(
1484 * error path. 1484 * error path.
1485 */ 1485 */
1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1487 unlock_dp_on_error = B_FALSE; 1487 unlock_dp_on_error = false;
1488 1488
1489 /* 1489 /*
1490 * Also attach the dquot(s) to it, if applicable. 1490 * Also attach the dquot(s) to it, if applicable.