aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig3
-rw-r--r--fs/9p/vfs_dir.c92
-rw-r--r--fs/9p/vfs_file.c5
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/adfs/Kconfig4
-rw-r--r--fs/affs/Kconfig4
-rw-r--r--fs/afs/Kconfig7
-rw-r--r--fs/aio.c7
-rw-r--r--fs/befs/Kconfig4
-rw-r--r--fs/bfs/Kconfig4
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/extent-tree.c34
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c20
-rw-r--r--fs/btrfs/inode.c137
-rw-r--r--fs/btrfs/ioctl.c134
-rw-r--r--fs/btrfs/ordered-data.c13
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c47
-rw-r--r--fs/btrfs/tree-log.c10
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/ceph/Kconfig4
-rw-r--r--fs/cifs/Kconfig8
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/file.c141
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/compat.c52
-rw-r--r--fs/configfs/dir.c5
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c15
-rw-r--r--fs/dlm/user.c8
-rw-r--r--fs/ecryptfs/Kconfig4
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/kthread.c6
-rw-r--r--fs/ecryptfs/mmap.c12
-rw-r--r--fs/efs/Kconfig4
-rw-r--r--fs/eventpoll.c22
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/extents.c22
-rw-r--r--fs/ext4/file.c8
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/inode.c101
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/super.c30
-rw-r--r--fs/f2fs/acl.c13
-rw-r--r--fs/f2fs/checkpoint.c3
-rw-r--r--fs/f2fs/data.c18
-rw-r--r--fs/f2fs/debug.c50
-rw-r--r--fs/f2fs/dir.c18
-rw-r--r--fs/f2fs/f2fs.h20
-rw-r--r--fs/f2fs/file.c26
-rw-r--r--fs/f2fs/gc.c102
-rw-r--r--fs/f2fs/hash.c18
-rw-r--r--fs/f2fs/inode.c4
-rw-r--r--fs/f2fs/namei.c34
-rw-r--r--fs/f2fs/node.c56
-rw-r--r--fs/f2fs/recovery.c18
-rw-r--r--fs/f2fs/segment.c48
-rw-r--r--fs/f2fs/segment.h15
-rw-r--r--fs/f2fs/super.c112
-rw-r--r--fs/f2fs/xattr.c7
-rw-r--r--fs/file.c4
-rw-r--r--fs/fuse/Kconfig16
-rw-r--r--fs/fuse/cuse.c46
-rw-r--r--fs/fuse/dev.c133
-rw-r--r--fs/fuse/dir.c259
-rw-r--r--fs/fuse/file.c243
-rw-r--r--fs/fuse/fuse_i.h74
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/gfs2/aops.c17
-rw-r--r--fs/gfs2/bmap.c30
-rw-r--r--fs/gfs2/dir.c30
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/glock.c116
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c8
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/log.c76
-rw-r--r--fs/gfs2/log.h12
-rw-r--r--fs/gfs2/lops.c83
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/meta_io.c35
-rw-r--r--fs/gfs2/meta_io.h3
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/rgrp.c53
-rw-r--r--fs/gfs2/super.c70
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/gfs2/sys.c48
-rw-r--r--fs/gfs2/trans.c124
-rw-r--r--fs/gfs2/trans.h3
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/xattr.c36
-rw-r--r--fs/hfs/Kconfig4
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jbd2/transaction.c30
-rw-r--r--fs/jffs2/Kconfig10
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/logfs/Kconfig4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/callback_proc.c61
-rw-r--r--fs/nfs/delegation.c154
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4client.c62
-rw-r--r--fs/nfs/nfs4proc.c149
-rw-r--r--fs/nfs/nfs4state.c33
-rw-r--r--fs/nfs/objlayout/objio_osd.c1
-rw-r--r--fs/nfs/pnfs.c152
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/super.c73
-rw-r--r--fs/nfs/unlink.c5
-rw-r--r--fs/nfs/write.c10
-rw-r--r--fs/nfsd/Kconfig4
-rw-r--r--fs/nfsd/nfs4state.c6
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nilfs2/Kconfig3
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/ioctl.c5
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c8
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/extent_map.c3
-rw-r--r--fs/ocfs2/journal.c10
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c3
-rw-r--r--fs/proc/Makefile3
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/proc_net.c14
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/pstore/inode.c18
-rw-r--r--fs/pstore/platform.c35
-rw-r--r--fs/pstore/ram.c24
-rw-r--r--fs/pstore/ram_core.c9
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/select.c1
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/splice.c4
-rw-r--r--fs/sysfs/group.c42
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/symlink.c45
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/timerfd.c85
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/udf/super.c3
-rw-r--r--fs/ufs/Kconfig2
-rw-r--r--fs/xfs/Kconfig4
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_attr.c9
-rw-r--r--fs/xfs/xfs_bmap.c124
-rw-r--r--fs/xfs/xfs_buf.c34
-rw-r--r--fs/xfs/xfs_buf.h6
-rw-r--r--fs/xfs/xfs_buf_item.c177
-rw-r--r--fs/xfs/xfs_buf_item.h16
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2_block.c6
-rw-r--r--fs/xfs/xfs_dquot.c12
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_inode.c6
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h4
-rw-r--r--fs/xfs/xfs_iomap.c86
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_mount.c14
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c32
-rw-r--r--fs/xfs/xfs_super.c29
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c376
-rw-r--r--fs/xfs/xfs_trans.h18
-rw-r--r--fs/xfs/xfs_trans_ail.c14
-rw-r--r--fs/xfs/xfs_trans_buf.c27
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_trans_inode.c41
-rw-r--r--fs/xfs/xfs_types.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c12
219 files changed, 3429 insertions, 2270 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 0a93dc1cb4ac..55abfd62654a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -11,8 +11,7 @@ config 9P_FS
11 11
12if 9P_FS 12if 9P_FS
13config 9P_FSCACHE 13config 9P_FSCACHE
14 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support"
15 depends on EXPERIMENTAL
16 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y 15 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
17 help 16 help
18 Choose Y here to enable persistent, read-only local 17 Choose Y here to enable persistent, read-only local
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ff911e779651..be1e34adc3c6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -52,10 +52,9 @@
52 */ 52 */
53 53
54struct p9_rdir { 54struct p9_rdir {
55 struct mutex mutex;
56 int head; 55 int head;
57 int tail; 56 int tail;
58 uint8_t *buf; 57 uint8_t buf[];
59}; 58};
60 59
61/** 60/**
@@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf)
93 * 92 *
94 */ 93 */
95 94
96static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) 95static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen)
97{ 96{
98 struct p9_rdir *rdir; 97 struct p9_fid *fid = filp->private_data;
99 struct p9_fid *fid; 98 if (!fid->rdir)
100 int err = 0; 99 fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
101 100 return fid->rdir;
102 fid = filp->private_data;
103 if (!fid->rdir) {
104 rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
105
106 if (rdir == NULL) {
107 err = -ENOMEM;
108 goto exit;
109 }
110 spin_lock(&filp->f_dentry->d_lock);
111 if (!fid->rdir) {
112 rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir);
113 mutex_init(&rdir->mutex);
114 rdir->head = rdir->tail = 0;
115 fid->rdir = (void *) rdir;
116 rdir = NULL;
117 }
118 spin_unlock(&filp->f_dentry->d_lock);
119 kfree(rdir);
120 }
121exit:
122 return err;
123} 101}
124 102
125/** 103/**
@@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
145 123
146 buflen = fid->clnt->msize - P9_IOHDRSZ; 124 buflen = fid->clnt->msize - P9_IOHDRSZ;
147 125
148 err = v9fs_alloc_rdir_buf(filp, buflen); 126 rdir = v9fs_alloc_rdir_buf(filp, buflen);
149 if (err) 127 if (!rdir)
150 goto exit; 128 return -ENOMEM;
151 rdir = (struct p9_rdir *) fid->rdir;
152 129
153 err = mutex_lock_interruptible(&rdir->mutex); 130 while (1) {
154 if (err)
155 return err;
156 while (err == 0) {
157 if (rdir->tail == rdir->head) { 131 if (rdir->tail == rdir->head) {
158 err = v9fs_file_readn(filp, rdir->buf, NULL, 132 err = v9fs_file_readn(filp, rdir->buf, NULL,
159 buflen, filp->f_pos); 133 buflen, filp->f_pos);
160 if (err <= 0) 134 if (err <= 0)
161 goto unlock_and_exit; 135 return err;
162 136
163 rdir->head = 0; 137 rdir->head = 0;
164 rdir->tail = err; 138 rdir->tail = err;
@@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
169 rdir->tail - rdir->head, &st); 143 rdir->tail - rdir->head, &st);
170 if (err) { 144 if (err) {
171 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 145 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
172 err = -EIO;
173 p9stat_free(&st); 146 p9stat_free(&st);
174 goto unlock_and_exit; 147 return -EIO;
175 } 148 }
176 reclen = st.size+2; 149 reclen = st.size+2;
177 150
@@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
180 153
181 p9stat_free(&st); 154 p9stat_free(&st);
182 155
183 if (over) { 156 if (over)
184 err = 0; 157 return 0;
185 goto unlock_and_exit; 158
186 }
187 rdir->head += reclen; 159 rdir->head += reclen;
188 filp->f_pos += reclen; 160 filp->f_pos += reclen;
189 } 161 }
190 } 162 }
191
192unlock_and_exit:
193 mutex_unlock(&rdir->mutex);
194exit:
195 return err;
196} 163}
197 164
198/** 165/**
@@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
218 185
219 buflen = fid->clnt->msize - P9_READDIRHDRSZ; 186 buflen = fid->clnt->msize - P9_READDIRHDRSZ;
220 187
221 err = v9fs_alloc_rdir_buf(filp, buflen); 188 rdir = v9fs_alloc_rdir_buf(filp, buflen);
222 if (err) 189 if (!rdir)
223 goto exit; 190 return -ENOMEM;
224 rdir = (struct p9_rdir *) fid->rdir;
225 191
226 err = mutex_lock_interruptible(&rdir->mutex); 192 while (1) {
227 if (err)
228 return err;
229
230 while (err == 0) {
231 if (rdir->tail == rdir->head) { 193 if (rdir->tail == rdir->head) {
232 err = p9_client_readdir(fid, rdir->buf, buflen, 194 err = p9_client_readdir(fid, rdir->buf, buflen,
233 filp->f_pos); 195 filp->f_pos);
234 if (err <= 0) 196 if (err <= 0)
235 goto unlock_and_exit; 197 return err;
236 198
237 rdir->head = 0; 199 rdir->head = 0;
238 rdir->tail = err; 200 rdir->tail = err;
@@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
245 &curdirent); 207 &curdirent);
246 if (err < 0) { 208 if (err < 0) {
247 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 209 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
248 err = -EIO; 210 return -EIO;
249 goto unlock_and_exit;
250 } 211 }
251 212
252 /* d_off in dirent structure tracks the offset into 213 /* d_off in dirent structure tracks the offset into
@@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
261 curdirent.d_type); 222 curdirent.d_type);
262 oldoffset = curdirent.d_off; 223 oldoffset = curdirent.d_off;
263 224
264 if (over) { 225 if (over)
265 err = 0; 226 return 0;
266 goto unlock_and_exit;
267 }
268 227
269 filp->f_pos = curdirent.d_off; 228 filp->f_pos = curdirent.d_off;
270 rdir->head += err; 229 rdir->head += err;
271 } 230 }
272 } 231 }
273
274unlock_and_exit:
275 mutex_unlock(&rdir->mutex);
276exit:
277 return err;
278} 232}
279 233
280 234
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..c921ac92ea4c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file)
80 p9_client_clunk(fid); 80 p9_client_clunk(fid);
81 return err; 81 return err;
82 } 82 }
83 if (file->f_flags & O_TRUNC) {
84 i_size_write(inode, 0);
85 inode->i_blocks = 0;
86 }
87 if ((file->f_flags & O_APPEND) && 83 if ((file->f_flags & O_APPEND) &&
88 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) 84 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
89 generic_file_llseek(file, 0, SEEK_END); 85 generic_file_llseek(file, 0, SEEK_END);
@@ -620,6 +616,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
620 lock_page(page); 616 lock_page(page);
621 if (page->mapping != inode->i_mapping) 617 if (page->mapping != inode->i_mapping)
622 goto out_unlock; 618 goto out_unlock;
619 wait_for_stable_page(page);
623 620
624 return VM_FAULT_LOCKED; 621 return VM_FAULT_LOCKED;
625out_unlock: 622out_unlock:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 1581fe218934..b5340c829de1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended)
192 break; 192 break;
193 } 193 }
194 194
195 if (uflags & O_TRUNC)
196 ret |= P9_OTRUNC;
197
198 if (extended) { 195 if (extended) {
199 if (uflags & O_EXCL) 196 if (uflags & O_EXCL)
200 ret |= P9_OEXCL; 197 ret |= P9_OEXCL;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index e27886573e7d..07f409288d1b 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags)
186 { O_CREAT, P9_DOTL_CREATE }, 186 { O_CREAT, P9_DOTL_CREATE },
187 { O_EXCL, P9_DOTL_EXCL }, 187 { O_EXCL, P9_DOTL_EXCL },
188 { O_NOCTTY, P9_DOTL_NOCTTY }, 188 { O_NOCTTY, P9_DOTL_NOCTTY },
189 { O_TRUNC, P9_DOTL_TRUNC },
190 { O_APPEND, P9_DOTL_APPEND }, 189 { O_APPEND, P9_DOTL_APPEND },
191 { O_NONBLOCK, P9_DOTL_NONBLOCK }, 190 { O_NONBLOCK, P9_DOTL_NONBLOCK },
192 { O_DSYNC, P9_DOTL_DSYNC }, 191 { O_DSYNC, P9_DOTL_DSYNC },
@@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
268 } 267 }
269 268
270 /* Only creates */ 269 /* Only creates */
271 if (!(flags & O_CREAT) || dentry->d_inode) 270 if (!(flags & O_CREAT))
272 return finish_no_open(file, res); 271 return finish_no_open(file, res);
272 else if (dentry->d_inode) {
273 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
274 return -EEXIST;
275 else
276 return finish_no_open(file, res);
277 }
273 278
274 v9ses = v9fs_inode2v9ses(dir); 279 v9ses = v9fs_inode2v9ses(dir);
275 280
diff --git a/fs/Kconfig b/fs/Kconfig
index cfe512fd1caf..780725a463b1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -68,16 +68,6 @@ source "fs/quota/Kconfig"
68source "fs/autofs4/Kconfig" 68source "fs/autofs4/Kconfig"
69source "fs/fuse/Kconfig" 69source "fs/fuse/Kconfig"
70 70
71config CUSE
72 tristate "Character device in Userspace support"
73 depends on FUSE_FS
74 help
75 This FUSE extension allows character devices to be
76 implemented in userspace.
77
78 If you want to develop or use userspace character device
79 based on CUSE, answer Y or M.
80
81config GENERIC_ACL 71config GENERIC_ACL
82 bool 72 bool
83 select FS_POSIX_ACL 73 select FS_POSIX_ACL
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index e55182a74605..c5a7787dd5e9 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,6 +1,6 @@
1config ADFS_FS 1config ADFS_FS
2 tristate "ADFS file system support (EXPERIMENTAL)" 2 tristate "ADFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Acorn Disc Filing System is the standard file system of the 5 The Acorn Disc Filing System is the standard file system of the
6 RiscOS operating system which runs on Acorn's ARM-based Risc PC 6 RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index cfad9afb4762..a04d9e848d05 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -1,6 +1,6 @@
1config AFFS_FS 1config AFFS_FS
2 tristate "Amiga FFS file system support (EXPERIMENTAL)" 2 tristate "Amiga FFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Fast File System (FFS) is the common file system used on hard 5 The Fast File System (FFS) is the common file system used on hard
6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y 6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 8f975f25b486..ebba3b18e5da 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -1,6 +1,6 @@
1config AFS_FS 1config AFS_FS
2 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2 tristate "Andrew File System support (AFS)"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select AF_RXRPC 4 select AF_RXRPC
5 select DNS_RESOLVER 5 select DNS_RESOLVER
6 help 6 help
@@ -22,8 +22,7 @@ config AFS_DEBUG
22 If unsure, say N. 22 If unsure, say N.
23 23
24config AFS_FSCACHE 24config AFS_FSCACHE
25 bool "Provide AFS client caching support (EXPERIMENTAL)" 25 bool "Provide AFS client caching support"
26 depends on EXPERIMENTAL
27 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y 26 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
28 help 27 help
29 Say Y here if you want AFS data to be cached locally on disk through 28 Say Y here if you want AFS data to be cached locally on disk through
diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..064bfbe37566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -101,7 +101,7 @@ static int aio_setup_ring(struct kioctx *ctx)
101 struct aio_ring *ring; 101 struct aio_ring *ring;
102 struct aio_ring_info *info = &ctx->ring_info; 102 struct aio_ring_info *info = &ctx->ring_info;
103 unsigned nr_events = ctx->max_reqs; 103 unsigned nr_events = ctx->max_reqs;
104 unsigned long size; 104 unsigned long size, populate;
105 int nr_pages; 105 int nr_pages;
106 106
107 /* Compensate for the ring buffer's head/tail overlap entry */ 107 /* Compensate for the ring buffer's head/tail overlap entry */
@@ -129,7 +129,8 @@ static int aio_setup_ring(struct kioctx *ctx)
129 down_write(&ctx->mm->mmap_sem); 129 down_write(&ctx->mm->mmap_sem);
130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
131 PROT_READ|PROT_WRITE, 131 PROT_READ|PROT_WRITE,
132 MAP_ANONYMOUS|MAP_PRIVATE, 0); 132 MAP_ANONYMOUS|MAP_PRIVATE, 0,
133 &populate);
133 if (IS_ERR((void *)info->mmap_base)) { 134 if (IS_ERR((void *)info->mmap_base)) {
134 up_write(&ctx->mm->mmap_sem); 135 up_write(&ctx->mm->mmap_sem);
135 info->mmap_size = 0; 136 info->mmap_size = 0;
@@ -147,6 +148,8 @@ static int aio_setup_ring(struct kioctx *ctx)
147 aio_free_ring(ctx); 148 aio_free_ring(ctx);
148 return -EAGAIN; 149 return -EAGAIN;
149 } 150 }
151 if (populate)
152 mm_populate(info->mmap_base, populate);
150 153
151 ctx->user_id = info->mmap_base; 154 ctx->user_id = info->mmap_base;
152 155
diff --git a/fs/befs/Kconfig b/fs/befs/Kconfig
index 7835d30f211f..edc5cc2aefad 100644
--- a/fs/befs/Kconfig
+++ b/fs/befs/Kconfig
@@ -1,6 +1,6 @@
1config BEFS_FS 1config BEFS_FS
2 tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" 2 tristate "BeOS file system (BeFS) support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 The BeOS File System (BeFS) is the native file system of Be, Inc's 6 The BeOS File System (BeFS) is the native file system of Be, Inc's
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index c2336c62024f..3728a6479c64 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -1,6 +1,6 @@
1config BFS_FS 1config BFS_FS
2 tristate "BFS file system support (EXPERIMENTAL)" 2 tristate "BFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 Boot File System (BFS) is a file system used under SCO UnixWare to 5 Boot File System (BFS) is a file system used under SCO UnixWare to
6 allow the bootloader access to the kernel image and other important 6 allow the bootloader access to the kernel image and other important
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..ff9dbc630efa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/utsname.h> 34#include <linux/utsname.h>
35#include <linux/coredump.h> 35#include <linux/coredump.h>
36#include <linux/sched.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/param.h> 38#include <asm/param.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -1248,7 +1249,7 @@ static int writenote(struct memelfnote *men, struct file *file,
1248#undef DUMP_WRITE 1249#undef DUMP_WRITE
1249 1250
1250static void fill_elf_header(struct elfhdr *elf, int segs, 1251static void fill_elf_header(struct elfhdr *elf, int segs,
1251 u16 machine, u32 flags, u8 osabi) 1252 u16 machine, u32 flags)
1252{ 1253{
1253 memset(elf, 0, sizeof(*elf)); 1254 memset(elf, 0, sizeof(*elf));
1254 1255
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1320 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1321 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1321 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1322 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1322 } else { 1323 } else {
1323 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1324 cputime_t utime, stime;
1324 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1325
1326 task_cputime(p, &utime, &stime);
1327 cputime_to_timeval(utime, &prstatus->pr_utime);
1328 cputime_to_timeval(stime, &prstatus->pr_stime);
1325 } 1329 }
1326 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1330 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1327 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1331 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
@@ -1630,7 +1634,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1630 * Initialize the ELF file header. 1634 * Initialize the ELF file header.
1631 */ 1635 */
1632 fill_elf_header(elf, phdrs, 1636 fill_elf_header(elf, phdrs,
1633 view->e_machine, view->e_flags, view->ei_osabi); 1637 view->e_machine, view->e_flags);
1634 1638
1635 /* 1639 /*
1636 * Allocate a structure for each thread. 1640 * Allocate a structure for each thread.
@@ -1870,7 +1874,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1870 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 1874 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1871 1875
1872 /* Set up header */ 1876 /* Set up header */
1873 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI); 1877 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1874 1878
1875 /* 1879 /*
1876 * Set up the notes in similar form to SVR4 core dumps made 1880 * Set up the notes in similar form to SVR4 core dumps made
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1377 } else { 1377 } else {
1378 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1378 cputime_t utime, stime;
1379 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1379
1380 task_cputime(p, &utime, &stime);
1381 cputime_to_timeval(utime, &prstatus->pr_utime);
1382 cputime_to_timeval(stime, &prstatus->pr_stime);
1380 } 1383 }
1381 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1384 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1382 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1385 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..78333a37f49d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -994,6 +994,7 @@ int revalidate_disk(struct gendisk *disk)
994 994
995 mutex_lock(&bdev->bd_mutex); 995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev); 996 check_disk_size_change(disk, bdev);
997 bdev->bd_invalidated = 0;
997 mutex_unlock(&bdev->bd_mutex); 998 mutex_unlock(&bdev->bd_mutex);
998 bdput(bdev); 999 bdput(bdev);
999 return ret; 1000 return ret;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index d33f01c08b60..ccd25ba7a9ac 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,5 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" 2 tristate "Btrfs filesystem Unstable disk format"
3 depends on EXPERIMENTAL
4 select LIBCRC32C 3 select LIBCRC32C
5 select ZLIB_INFLATE 4 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f6..1e59ed575cc9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3997,7 +3997,7 @@ again:
3997 * We make the other tasks wait for the flush only when we can flush 3997 * We make the other tasks wait for the flush only when we can flush
3998 * all things. 3998 * all things.
3999 */ 3999 */
4000 if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { 4000 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4001 flushing = true; 4001 flushing = true;
4002 space_info->flush = 1; 4002 space_info->flush = 1;
4003 } 4003 }
@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4534 unsigned nr_extents = 0; 4534 unsigned nr_extents = 0;
4535 int extra_reserve = 0; 4535 int extra_reserve = 0;
4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
4537 int ret; 4537 int ret = 0;
4538 bool delalloc_lock = true; 4538 bool delalloc_lock = true;
4539 4539
4540 /* If we are a free space inode we need to not flush since we will be in 4540 /* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4579 csum_bytes = BTRFS_I(inode)->csum_bytes; 4579 csum_bytes = BTRFS_I(inode)->csum_bytes;
4580 spin_unlock(&BTRFS_I(inode)->lock); 4580 spin_unlock(&BTRFS_I(inode)->lock);
4581 4581
4582 if (root->fs_info->quota_enabled) { 4582 if (root->fs_info->quota_enabled)
4583 ret = btrfs_qgroup_reserve(root, num_bytes + 4583 ret = btrfs_qgroup_reserve(root, num_bytes +
4584 nr_extents * root->leafsize); 4584 nr_extents * root->leafsize);
4585 if (ret) {
4586 spin_lock(&BTRFS_I(inode)->lock);
4587 calc_csum_metadata_size(inode, num_bytes, 0);
4588 spin_unlock(&BTRFS_I(inode)->lock);
4589 if (delalloc_lock)
4590 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4591 return ret;
4592 }
4593 }
4594 4585
4595 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4586 /*
4587 * ret != 0 here means the qgroup reservation failed, we go straight to
4588 * the shared error handling then.
4589 */
4590 if (ret == 0)
4591 ret = reserve_metadata_bytes(root, block_rsv,
4592 to_reserve, flush);
4593
4596 if (ret) { 4594 if (ret) {
4597 u64 to_free = 0; 4595 u64 to_free = 0;
4598 unsigned dropped; 4596 unsigned dropped;
@@ -5560,7 +5558,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5560 int empty_cluster = 2 * 1024 * 1024; 5558 int empty_cluster = 2 * 1024 * 1024;
5561 struct btrfs_space_info *space_info; 5559 struct btrfs_space_info *space_info;
5562 int loop = 0; 5560 int loop = 0;
5563 int index = 0; 5561 int index = __get_raid_index(data);
5564 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5562 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
5565 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 5563 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
5566 bool found_uncached_bg = false; 5564 bool found_uncached_bg = false;
@@ -6524,7 +6522,7 @@ reada:
6524} 6522}
6525 6523
6526/* 6524/*
6527 * hepler to process tree block while walking down the tree. 6525 * helper to process tree block while walking down the tree.
6528 * 6526 *
6529 * when wc->stage == UPDATE_BACKREF, this function updates 6527 * when wc->stage == UPDATE_BACKREF, this function updates
6530 * back refs for pointers in the block. 6528 * back refs for pointers in the block.
@@ -6599,7 +6597,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6599} 6597}
6600 6598
6601/* 6599/*
6602 * hepler to process tree block pointer. 6600 * helper to process tree block pointer.
6603 * 6601 *
6604 * when wc->stage == DROP_REFERENCE, this function checks 6602 * when wc->stage == DROP_REFERENCE, this function checks
6605 * reference count of the block pointed to. if the block 6603 * reference count of the block pointed to. if the block
@@ -6737,7 +6735,7 @@ skip:
6737} 6735}
6738 6736
6739/* 6737/*
6740 * hepler to process tree block while walking up the tree. 6738 * helper to process tree block while walking up the tree.
6741 * 6739 *
6742 * when wc->stage == DROP_REFERENCE, this function drops 6740 * when wc->stage == DROP_REFERENCE, this function drops
6743 * reference count on the block. 6741 * reference count on the block.
@@ -6788,11 +6786,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6788 &wc->flags[level]); 6786 &wc->flags[level]);
6789 if (ret < 0) { 6787 if (ret < 0) {
6790 btrfs_tree_unlock_rw(eb, path->locks[level]); 6788 btrfs_tree_unlock_rw(eb, path->locks[level]);
6789 path->locks[level] = 0;
6791 return ret; 6790 return ret;
6792 } 6791 }
6793 BUG_ON(wc->refs[level] == 0); 6792 BUG_ON(wc->refs[level] == 0);
6794 if (wc->refs[level] == 1) { 6793 if (wc->refs[level] == 1) {
6795 btrfs_tree_unlock_rw(eb, path->locks[level]); 6794 btrfs_tree_unlock_rw(eb, path->locks[level]);
6795 path->locks[level] = 0;
6796 return 1; 6796 return 1;
6797 } 6797 }
6798 } 6798 }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f169d6b11d7f..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) 171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
172 return 0; 172 return 0;
173 173
174 if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
175 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
176 return 0;
177
174 if (extent_map_end(prev) == next->start && 178 if (extent_map_end(prev) == next->start &&
175 prev->flags == next->flags && 179 prev->flags == next->flags &&
176 prev->bdev == next->bdev && 180 prev->bdev == next->bdev &&
@@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
255 if (!em) 259 if (!em)
256 goto out; 260 goto out;
257 261
258 list_move(&em->list, &tree->modified_extents); 262 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
263 list_move(&em->list, &tree->modified_extents);
259 em->generation = gen; 264 em->generation = gen;
260 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 265 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
261 em->mod_start = em->start; 266 em->mod_start = em->start;
@@ -280,6 +285,13 @@ out:
280 285
281} 286}
282 287
288void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
289{
290 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
291 if (em->in_tree)
292 try_merge_map(tree, em);
293}
294
283/** 295/**
284 * add_extent_mapping - add new extent map to the extent tree 296 * add_extent_mapping - add new extent map to the extent tree
285 * @tree: tree to insert new map in 297 * @tree: tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 922943ce29e8..c6598c89cff8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
69int __init extent_map_init(void); 69int __init extent_map_init(void);
70void extent_map_exit(void); 70void extent_map_exit(void);
71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); 71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
72void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
72struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 73struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
73 u64 start, u64 len); 74 u64 start, u64 len);
74#endif 75#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index bd38cef42358..94aa53b38721 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
460 if (!contig) 460 if (!contig)
461 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 461 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
462 462
463 if (!contig && (offset >= ordered->file_offset + ordered->len || 463 if (offset >= ordered->file_offset + ordered->len ||
464 offset < ordered->file_offset)) { 464 offset < ordered->file_offset) {
465 unsigned long bytes_left; 465 unsigned long bytes_left;
466 sums->len = this_sum_bytes; 466 sums->len = this_sum_bytes;
467 this_sum_bytes = 0; 467 this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43edb..aeb84469d2c4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
293 struct btrfs_key key; 293 struct btrfs_key key;
294 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
295 int num_defrag; 295 int num_defrag;
296 int index;
297 int ret;
296 298
297 /* get the inode */ 299 /* get the inode */
298 key.objectid = defrag->root; 300 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 301 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1; 302 key.offset = (u64)-1;
303
304 index = srcu_read_lock(&fs_info->subvol_srcu);
305
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key); 306 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) { 307 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 308 ret = PTR_ERR(inode_root);
304 return PTR_ERR(inode_root); 309 goto cleanup;
310 }
311 if (btrfs_root_refs(&inode_root->root_item) == 0) {
312 ret = -ENOENT;
313 goto cleanup;
305 } 314 }
306 315
307 key.objectid = defrag->ino; 316 key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
309 key.offset = 0; 318 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); 319 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) { 320 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 321 ret = PTR_ERR(inode);
313 return PTR_ERR(inode); 322 goto cleanup;
314 } 323 }
324 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 325
316 /* do a chunk of defrag */ 326 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 327 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
346 356
347 iput(inode); 357 iput(inode);
348 return 0; 358 return 0;
359cleanup:
360 srcu_read_unlock(&fs_info->subvol_srcu, index);
361 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
362 return ret;
349} 363}
350 364
351/* 365/*
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1594 if (err < 0 && num_written > 0) 1608 if (err < 0 && num_written > 0)
1595 num_written = err; 1609 num_written = err;
1596 } 1610 }
1597out: 1611
1598 if (sync) 1612 if (sync)
1599 atomic_dec(&BTRFS_I(inode)->sync_writers); 1613 atomic_dec(&BTRFS_I(inode)->sync_writers);
1614out:
1600 sb_end_write(inode->i_sb); 1615 sb_end_write(inode->i_sb);
1601 current->backing_dev_info = NULL; 1616 current->backing_dev_info = NULL;
1602 return num_written ? num_written : err; 1617 return num_written ? num_written : err;
@@ -2241,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2241 if (lockend <= lockstart) 2256 if (lockend <= lockstart)
2242 lockend = lockstart + root->sectorsize; 2257 lockend = lockstart + root->sectorsize;
2243 2258
2259 lockend--;
2244 len = lockend - lockstart + 1; 2260 len = lockend - lockstart + 1;
2245 2261
2246 len = max_t(u64, len, root->sectorsize); 2262 len = max_t(u64, len, root->sectorsize);
@@ -2307,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2307 } 2323 }
2308 } 2324 }
2309 2325
2310 *offset = start; 2326 if (!test_bit(EXTENT_FLAG_PREALLOC,
2311 free_extent_map(em); 2327 &em->flags)) {
2312 break; 2328 *offset = start;
2329 free_extent_map(em);
2330 break;
2331 }
2313 } 2332 }
2314 } 2333 }
2315 2334
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 59ea2e4349c9..0be7a8742a43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1862{ 1862{
1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1864 struct btrfs_free_space *info; 1864 struct btrfs_free_space *info;
1865 int ret = 0; 1865 int ret;
1866 bool re_search = false;
1866 1867
1867 spin_lock(&ctl->tree_lock); 1868 spin_lock(&ctl->tree_lock);
1868 1869
1869again: 1870again:
1871 ret = 0;
1870 if (!bytes) 1872 if (!bytes)
1871 goto out_lock; 1873 goto out_lock;
1872 1874
@@ -1879,17 +1881,17 @@ again:
1879 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1881 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1880 1, 0); 1882 1, 0);
1881 if (!info) { 1883 if (!info) {
1882 /* the tree logging code might be calling us before we 1884 /*
1883 * have fully loaded the free space rbtree for this 1885 * If we found a partial bit of our free space in a
1884 * block group. So it is possible the entry won't 1886 * bitmap but then couldn't find the other part this may
1885 * be in the rbtree yet at all. The caching code 1887 * be a problem, so WARN about it.
1886 * will make sure not to put it in the rbtree if
1887 * the logging code has pinned it.
1888 */ 1888 */
1889 WARN_ON(re_search);
1889 goto out_lock; 1890 goto out_lock;
1890 } 1891 }
1891 } 1892 }
1892 1893
1894 re_search = false;
1893 if (!info->bitmap) { 1895 if (!info->bitmap) {
1894 unlink_free_space(ctl, info); 1896 unlink_free_space(ctl, info);
1895 if (offset == info->offset) { 1897 if (offset == info->offset) {
@@ -1935,8 +1937,10 @@ again:
1935 } 1937 }
1936 1938
1937 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1939 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1938 if (ret == -EAGAIN) 1940 if (ret == -EAGAIN) {
1941 re_search = true;
1939 goto again; 1942 goto again;
1943 }
1940 BUG_ON(ret); /* logic error */ 1944 BUG_ON(ret); /* logic error */
1941out_lock: 1945out_lock:
1942 spin_unlock(&ctl->tree_lock); 1946 spin_unlock(&ctl->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e6..cc93b23ca352 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, 88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
89}; 89};
90 90
91static int btrfs_setsize(struct inode *inode, loff_t newsize); 91static int btrfs_setsize(struct inode *inode, struct iattr *attr);
92static int btrfs_truncate(struct inode *inode); 92static int btrfs_truncate(struct inode *inode);
93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); 93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
94static noinline int cow_file_range(struct inode *inode, 94static noinline int cow_file_range(struct inode *inode,
@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2478 continue; 2478 continue;
2479 } 2479 }
2480 nr_truncate++; 2480 nr_truncate++;
2481
2482 /* 1 for the orphan item deletion. */
2483 trans = btrfs_start_transaction(root, 1);
2484 if (IS_ERR(trans)) {
2485 ret = PTR_ERR(trans);
2486 goto out;
2487 }
2488 ret = btrfs_orphan_add(trans, inode);
2489 btrfs_end_transaction(trans, root);
2490 if (ret)
2491 goto out;
2492
2481 ret = btrfs_truncate(inode); 2493 ret = btrfs_truncate(inode);
2482 } else { 2494 } else {
2483 nr_unlink++; 2495 nr_unlink++;
@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3665 block_end - cur_offset, 0); 3677 block_end - cur_offset, 0);
3666 if (IS_ERR(em)) { 3678 if (IS_ERR(em)) {
3667 err = PTR_ERR(em); 3679 err = PTR_ERR(em);
3680 em = NULL;
3668 break; 3681 break;
3669 } 3682 }
3670 last_byte = min(extent_map_end(em), block_end); 3683 last_byte = min(extent_map_end(em), block_end);
@@ -3748,16 +3761,27 @@ next:
3748 return err; 3761 return err;
3749} 3762}
3750 3763
3751static int btrfs_setsize(struct inode *inode, loff_t newsize) 3764static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3752{ 3765{
3753 struct btrfs_root *root = BTRFS_I(inode)->root; 3766 struct btrfs_root *root = BTRFS_I(inode)->root;
3754 struct btrfs_trans_handle *trans; 3767 struct btrfs_trans_handle *trans;
3755 loff_t oldsize = i_size_read(inode); 3768 loff_t oldsize = i_size_read(inode);
3769 loff_t newsize = attr->ia_size;
3770 int mask = attr->ia_valid;
3756 int ret; 3771 int ret;
3757 3772
3758 if (newsize == oldsize) 3773 if (newsize == oldsize)
3759 return 0; 3774 return 0;
3760 3775
3776 /*
3777 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
3778 * special case where we need to update the times despite not having
3779 * these flags set. For all other operations the VFS set these flags
3780 * explicitly if it wants a timestamp update.
3781 */
3782 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
3783 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
3784
3761 if (newsize > oldsize) { 3785 if (newsize > oldsize) {
3762 truncate_pagecache(inode, oldsize, newsize); 3786 truncate_pagecache(inode, oldsize, newsize);
3763 ret = btrfs_cont_expand(inode, oldsize, newsize); 3787 ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3783 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 3807 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
3784 &BTRFS_I(inode)->runtime_flags); 3808 &BTRFS_I(inode)->runtime_flags);
3785 3809
3810 /*
3811 * 1 for the orphan item we're going to add
3812 * 1 for the orphan item deletion.
3813 */
3814 trans = btrfs_start_transaction(root, 2);
3815 if (IS_ERR(trans))
3816 return PTR_ERR(trans);
3817
3818 /*
3819 * We need to do this in case we fail at _any_ point during the
3820 * actual truncate. Once we do the truncate_setsize we could
3821 * invalidate pages which forces any outstanding ordered io to
3822 * be instantly completed which will give us extents that need
3823 * to be truncated. If we fail to get an orphan inode down we
3824 * could have left over extents that were never meant to live,
3825 * so we need to garuntee from this point on that everything
3826 * will be consistent.
3827 */
3828 ret = btrfs_orphan_add(trans, inode);
3829 btrfs_end_transaction(trans, root);
3830 if (ret)
3831 return ret;
3832
3786 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3787 truncate_setsize(inode, newsize); 3834 truncate_setsize(inode, newsize);
3788 ret = btrfs_truncate(inode); 3835 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode);
3789 } 3838 }
3790 3839
3791 return ret; 3840 return ret;
@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3805 return err; 3854 return err;
3806 3855
3807 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3856 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3808 err = btrfs_setsize(inode, attr->ia_size); 3857 err = btrfs_setsize(inode, attr);
3809 if (err) 3858 if (err)
3810 return err; 3859 return err;
3811 } 3860 }
@@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5572 return em; 5621 return em;
5573 if (em) { 5622 if (em) {
5574 /* 5623 /*
5575 * if our em maps to a hole, there might 5624 * if our em maps to
5576 * actually be delalloc bytes behind it 5625 * - a hole or
5626 * - a pre-alloc extent,
5627 * there might actually be delalloc bytes behind it.
5577 */ 5628 */
5578 if (em->block_start != EXTENT_MAP_HOLE) 5629 if (em->block_start != EXTENT_MAP_HOLE &&
5630 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5579 return em; 5631 return em;
5580 else 5632 else
5581 hole_em = em; 5633 hole_em = em;
@@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5657 */ 5709 */
5658 em->block_start = hole_em->block_start; 5710 em->block_start = hole_em->block_start;
5659 em->block_len = hole_len; 5711 em->block_len = hole_len;
5712 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
5713 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
5660 } else { 5714 } else {
5661 em->start = range_start; 5715 em->start = range_start;
5662 em->len = found; 5716 em->len = found;
@@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
6915 6969
6916 /* 6970 /*
6917 * 1 for the truncate slack space 6971 * 1 for the truncate slack space
6918 * 1 for the orphan item we're going to add
6919 * 1 for the orphan item deletion
6920 * 1 for updating the inode. 6972 * 1 for updating the inode.
6921 */ 6973 */
6922 trans = btrfs_start_transaction(root, 4); 6974 trans = btrfs_start_transaction(root, 2);
6923 if (IS_ERR(trans)) { 6975 if (IS_ERR(trans)) {
6924 err = PTR_ERR(trans); 6976 err = PTR_ERR(trans);
6925 goto out; 6977 goto out;
@@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
6930 min_size); 6982 min_size);
6931 BUG_ON(ret); 6983 BUG_ON(ret);
6932 6984
6933 ret = btrfs_orphan_add(trans, inode);
6934 if (ret) {
6935 btrfs_end_transaction(trans, root);
6936 goto out;
6937 }
6938
6939 /* 6985 /*
6940 * setattr is responsible for setting the ordered_data_close flag, 6986 * setattr is responsible for setting the ordered_data_close flag,
6941 * but that is only tested during the last file release. That 6987 * but that is only tested during the last file release. That
@@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
7004 ret = btrfs_orphan_del(trans, inode); 7050 ret = btrfs_orphan_del(trans, inode);
7005 if (ret) 7051 if (ret)
7006 err = ret; 7052 err = ret;
7007 } else if (ret && inode->i_nlink > 0) {
7008 /*
7009 * Failed to do the truncate, remove us from the in memory
7010 * orphan list.
7011 */
7012 ret = btrfs_orphan_del(NULL, inode);
7013 } 7053 }
7014 7054
7015 if (trans) { 7055 if (trans) {
@@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
7531 */ 7571 */
7532int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) 7572int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7533{ 7573{
7534 struct list_head *head = &root->fs_info->delalloc_inodes;
7535 struct btrfs_inode *binode; 7574 struct btrfs_inode *binode;
7536 struct inode *inode; 7575 struct inode *inode;
7537 struct btrfs_delalloc_work *work, *next; 7576 struct btrfs_delalloc_work *work, *next;
7538 struct list_head works; 7577 struct list_head works;
7578 struct list_head splice;
7539 int ret = 0; 7579 int ret = 0;
7540 7580
7541 if (root->fs_info->sb->s_flags & MS_RDONLY) 7581 if (root->fs_info->sb->s_flags & MS_RDONLY)
7542 return -EROFS; 7582 return -EROFS;
7543 7583
7544 INIT_LIST_HEAD(&works); 7584 INIT_LIST_HEAD(&works);
7545 7585 INIT_LIST_HEAD(&splice);
7586again:
7546 spin_lock(&root->fs_info->delalloc_lock); 7587 spin_lock(&root->fs_info->delalloc_lock);
7547 while (!list_empty(head)) { 7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7548 binode = list_entry(head->next, struct btrfs_inode, 7589 while (!list_empty(&splice)) {
7590 binode = list_entry(splice.next, struct btrfs_inode,
7549 delalloc_inodes); 7591 delalloc_inodes);
7592
7593 list_del_init(&binode->delalloc_inodes);
7594
7550 inode = igrab(&binode->vfs_inode); 7595 inode = igrab(&binode->vfs_inode);
7551 if (!inode) 7596 if (!inode)
7552 list_del_init(&binode->delalloc_inodes); 7597 continue;
7598
7599 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes);
7553 spin_unlock(&root->fs_info->delalloc_lock); 7601 spin_unlock(&root->fs_info->delalloc_lock);
7554 if (inode) { 7602
7555 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 7603 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
7556 if (!work) { 7604 if (unlikely(!work)) {
7557 ret = -ENOMEM; 7605 ret = -ENOMEM;
7558 goto out; 7606 goto out;
7559 }
7560 list_add_tail(&work->list, &works);
7561 btrfs_queue_worker(&root->fs_info->flush_workers,
7562 &work->work);
7563 } 7607 }
7608 list_add_tail(&work->list, &works);
7609 btrfs_queue_worker(&root->fs_info->flush_workers,
7610 &work->work);
7611
7564 cond_resched(); 7612 cond_resched();
7565 spin_lock(&root->fs_info->delalloc_lock); 7613 spin_lock(&root->fs_info->delalloc_lock);
7566 } 7614 }
7567 spin_unlock(&root->fs_info->delalloc_lock); 7615 spin_unlock(&root->fs_info->delalloc_lock);
7568 7616
7617 list_for_each_entry_safe(work, next, &works, list) {
7618 list_del_init(&work->list);
7619 btrfs_wait_and_free_delalloc_work(work);
7620 }
7621
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7569 /* the filemap_flush will queue IO into the worker threads, but 7629 /* the filemap_flush will queue IO into the worker threads, but
7570 * we have to make sure the IO is actually started and that 7630 * we have to make sure the IO is actually started and that
7571 * ordered extents get created before we return 7631 * ordered extents get created before we return
@@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7578 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 7638 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
7579 } 7639 }
7580 atomic_dec(&root->fs_info->async_submit_draining); 7640 atomic_dec(&root->fs_info->async_submit_draining);
7641 return 0;
7581out: 7642out:
7582 list_for_each_entry_safe(work, next, &works, list) { 7643 list_for_each_entry_safe(work, next, &works, list) {
7583 list_del_init(&work->list); 7644 list_del_init(&work->list);
7584 btrfs_wait_and_free_delalloc_work(work); 7645 btrfs_wait_and_free_delalloc_work(work);
7585 } 7646 }
7647
7648 if (!list_empty_careful(&splice)) {
7649 spin_lock(&root->fs_info->delalloc_lock);
7650 list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
7651 spin_unlock(&root->fs_info->delalloc_lock);
7652 }
7586 return ret; 7653 return ret;
7587} 7654}
7588 7655
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f05..338f2597bf7f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
515 515
516 BUG_ON(ret); 516 BUG_ON(ret);
517 517
518 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
519fail: 518fail:
520 if (async_transid) { 519 if (async_transid) {
521 *async_transid = trans->transid; 520 *async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
525 } 524 }
526 if (err && !ret) 525 if (err && !ret)
527 ret = err; 526 ret = err;
527
528 if (!ret)
529 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
530
528 return ret; 531 return ret;
529} 532}
530 533
@@ -1339,7 +1342,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1339 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1342 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1340 1)) { 1343 1)) {
1341 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 1344 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
1342 return -EINPROGRESS; 1345 mnt_drop_write_file(file);
1346 return -EINVAL;
1343 } 1347 }
1344 1348
1345 mutex_lock(&root->fs_info->volume_mutex); 1349 mutex_lock(&root->fs_info->volume_mutex);
@@ -1362,6 +1366,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1362 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1366 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1363 (unsigned long long)devid); 1367 (unsigned long long)devid);
1364 } 1368 }
1369
1365 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1370 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1366 if (!device) { 1371 if (!device) {
1367 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1372 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@@ -1369,9 +1374,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1369 ret = -EINVAL; 1374 ret = -EINVAL;
1370 goto out_free; 1375 goto out_free;
1371 } 1376 }
1372 if (device->fs_devices && device->fs_devices->seeding) { 1377
1378 if (!device->writeable) {
1373 printk(KERN_INFO "btrfs: resizer unable to apply on " 1379 printk(KERN_INFO "btrfs: resizer unable to apply on "
1374 "seeding device %llu\n", 1380 "readonly device %llu\n",
1375 (unsigned long long)devid); 1381 (unsigned long long)devid);
1376 ret = -EINVAL; 1382 ret = -EINVAL;
1377 goto out_free; 1383 goto out_free;
@@ -1443,8 +1449,8 @@ out_free:
1443 kfree(vol_args); 1449 kfree(vol_args);
1444out: 1450out:
1445 mutex_unlock(&root->fs_info->volume_mutex); 1451 mutex_unlock(&root->fs_info->volume_mutex);
1446 mnt_drop_write_file(file);
1447 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1452 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
1453 mnt_drop_write_file(file);
1448 return ret; 1454 return ret;
1449} 1455}
1450 1456
@@ -2095,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2095 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2101 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2096 if (err) 2102 if (err)
2097 goto out_dput; 2103 goto out_dput;
2098
2099 /* check if subvolume may be deleted by a non-root user */
2100 err = btrfs_may_delete(dir, dentry, 1);
2101 if (err)
2102 goto out_dput;
2103 } 2104 }
2104 2105
2106 /* check if subvolume may be deleted by a user */
2107 err = btrfs_may_delete(dir, dentry, 1);
2108 if (err)
2109 goto out_dput;
2110
2105 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2111 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2106 err = -EINVAL; 2112 err = -EINVAL;
2107 goto out_dput; 2113 goto out_dput;
@@ -2183,19 +2189,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2183 struct btrfs_ioctl_defrag_range_args *range; 2189 struct btrfs_ioctl_defrag_range_args *range;
2184 int ret; 2190 int ret;
2185 2191
2186 if (btrfs_root_readonly(root)) 2192 ret = mnt_want_write_file(file);
2187 return -EROFS; 2193 if (ret)
2194 return ret;
2188 2195
2189 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2196 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2190 1)) { 2197 1)) {
2191 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2198 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2192 return -EINPROGRESS; 2199 mnt_drop_write_file(file);
2200 return -EINVAL;
2193 } 2201 }
2194 ret = mnt_want_write_file(file); 2202
2195 if (ret) { 2203 if (btrfs_root_readonly(root)) {
2196 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 2204 ret = -EROFS;
2197 0); 2205 goto out;
2198 return ret;
2199 } 2206 }
2200 2207
2201 switch (inode->i_mode & S_IFMT) { 2208 switch (inode->i_mode & S_IFMT) {
@@ -2247,8 +2254,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2247 ret = -EINVAL; 2254 ret = -EINVAL;
2248 } 2255 }
2249out: 2256out:
2250 mnt_drop_write_file(file);
2251 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2257 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2258 mnt_drop_write_file(file);
2252 return ret; 2259 return ret;
2253} 2260}
2254 2261
@@ -2263,7 +2270,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2263 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2270 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2264 1)) { 2271 1)) {
2265 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2272 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2266 return -EINPROGRESS; 2273 return -EINVAL;
2267 } 2274 }
2268 2275
2269 mutex_lock(&root->fs_info->volume_mutex); 2276 mutex_lock(&root->fs_info->volume_mutex);
@@ -2300,7 +2307,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2300 1)) { 2307 1)) {
2301 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2308 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2302 mnt_drop_write_file(file); 2309 mnt_drop_write_file(file);
2303 return -EINPROGRESS; 2310 return -EINVAL;
2304 } 2311 }
2305 2312
2306 mutex_lock(&root->fs_info->volume_mutex); 2313 mutex_lock(&root->fs_info->volume_mutex);
@@ -2316,8 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2316 kfree(vol_args); 2323 kfree(vol_args);
2317out: 2324out:
2318 mutex_unlock(&root->fs_info->volume_mutex); 2325 mutex_unlock(&root->fs_info->volume_mutex);
2319 mnt_drop_write_file(file);
2320 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2326 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2327 mnt_drop_write_file(file);
2321 return ret; 2328 return ret;
2322} 2329}
2323 2330
@@ -3437,8 +3444,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3437 struct btrfs_fs_info *fs_info = root->fs_info; 3444 struct btrfs_fs_info *fs_info = root->fs_info;
3438 struct btrfs_ioctl_balance_args *bargs; 3445 struct btrfs_ioctl_balance_args *bargs;
3439 struct btrfs_balance_control *bctl; 3446 struct btrfs_balance_control *bctl;
3447 bool need_unlock; /* for mut. excl. ops lock */
3440 int ret; 3448 int ret;
3441 int need_to_clear_lock = 0;
3442 3449
3443 if (!capable(CAP_SYS_ADMIN)) 3450 if (!capable(CAP_SYS_ADMIN))
3444 return -EPERM; 3451 return -EPERM;
@@ -3447,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3447 if (ret) 3454 if (ret)
3448 return ret; 3455 return ret;
3449 3456
3450 mutex_lock(&fs_info->volume_mutex); 3457again:
3458 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
3459 mutex_lock(&fs_info->volume_mutex);
3460 mutex_lock(&fs_info->balance_mutex);
3461 need_unlock = true;
3462 goto locked;
3463 }
3464
3465 /*
3466 * mut. excl. ops lock is locked. Three possibilites:
3467 * (1) some other op is running
3468 * (2) balance is running
3469 * (3) balance is paused -- special case (think resume)
3470 */
3451 mutex_lock(&fs_info->balance_mutex); 3471 mutex_lock(&fs_info->balance_mutex);
3472 if (fs_info->balance_ctl) {
3473 /* this is either (2) or (3) */
3474 if (!atomic_read(&fs_info->balance_running)) {
3475 mutex_unlock(&fs_info->balance_mutex);
3476 if (!mutex_trylock(&fs_info->volume_mutex))
3477 goto again;
3478 mutex_lock(&fs_info->balance_mutex);
3479
3480 if (fs_info->balance_ctl &&
3481 !atomic_read(&fs_info->balance_running)) {
3482 /* this is (3) */
3483 need_unlock = false;
3484 goto locked;
3485 }
3486
3487 mutex_unlock(&fs_info->balance_mutex);
3488 mutex_unlock(&fs_info->volume_mutex);
3489 goto again;
3490 } else {
3491 /* this is (2) */
3492 mutex_unlock(&fs_info->balance_mutex);
3493 ret = -EINPROGRESS;
3494 goto out;
3495 }
3496 } else {
3497 /* this is (1) */
3498 mutex_unlock(&fs_info->balance_mutex);
3499 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3500 ret = -EINVAL;
3501 goto out;
3502 }
3503
3504locked:
3505 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
3452 3506
3453 if (arg) { 3507 if (arg) {
3454 bargs = memdup_user(arg, sizeof(*bargs)); 3508 bargs = memdup_user(arg, sizeof(*bargs));
3455 if (IS_ERR(bargs)) { 3509 if (IS_ERR(bargs)) {
3456 ret = PTR_ERR(bargs); 3510 ret = PTR_ERR(bargs);
3457 goto out; 3511 goto out_unlock;
3458 } 3512 }
3459 3513
3460 if (bargs->flags & BTRFS_BALANCE_RESUME) { 3514 if (bargs->flags & BTRFS_BALANCE_RESUME) {
@@ -3474,13 +3528,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3474 bargs = NULL; 3528 bargs = NULL;
3475 } 3529 }
3476 3530
3477 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 3531 if (fs_info->balance_ctl) {
3478 1)) {
3479 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3480 ret = -EINPROGRESS; 3532 ret = -EINPROGRESS;
3481 goto out_bargs; 3533 goto out_bargs;
3482 } 3534 }
3483 need_to_clear_lock = 1;
3484 3535
3485 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 3536 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3486 if (!bctl) { 3537 if (!bctl) {
@@ -3501,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3501 } 3552 }
3502 3553
3503do_balance: 3554do_balance:
3504 ret = btrfs_balance(bctl, bargs);
3505 /* 3555 /*
3506 * bctl is freed in __cancel_balance or in free_fs_info if 3556 * Ownership of bctl and mutually_exclusive_operation_running
3507 * restriper was paused all the way until unmount 3557 * goes to to btrfs_balance. bctl is freed in __cancel_balance,
3558 * or, if restriper was paused all the way until unmount, in
3559 * free_fs_info. mutually_exclusive_operation_running is
3560 * cleared in __cancel_balance.
3508 */ 3561 */
3562 need_unlock = false;
3563
3564 ret = btrfs_balance(bctl, bargs);
3565
3509 if (arg) { 3566 if (arg) {
3510 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3567 if (copy_to_user(arg, bargs, sizeof(*bargs)))
3511 ret = -EFAULT; 3568 ret = -EFAULT;
@@ -3513,12 +3570,12 @@ do_balance:
3513 3570
3514out_bargs: 3571out_bargs:
3515 kfree(bargs); 3572 kfree(bargs);
3516out: 3573out_unlock:
3517 if (need_to_clear_lock)
3518 atomic_set(&root->fs_info->mutually_exclusive_operation_running,
3519 0);
3520 mutex_unlock(&fs_info->balance_mutex); 3574 mutex_unlock(&fs_info->balance_mutex);
3521 mutex_unlock(&fs_info->volume_mutex); 3575 mutex_unlock(&fs_info->volume_mutex);
3576 if (need_unlock)
3577 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3578out:
3522 mnt_drop_write_file(file); 3579 mnt_drop_write_file(file);
3523 return ret; 3580 return ret;
3524} 3581}
@@ -3698,6 +3755,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
3698 goto drop_write; 3755 goto drop_write;
3699 } 3756 }
3700 3757
3758 if (!sa->qgroupid) {
3759 ret = -EINVAL;
3760 goto out;
3761 }
3762
3701 trans = btrfs_join_transaction(root); 3763 trans = btrfs_join_transaction(root);
3702 if (IS_ERR(trans)) { 3764 if (IS_ERR(trans)) {
3703 ret = PTR_ERR(trans); 3765 ret = PTR_ERR(trans);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
836 * if the disk i_size is already at the inode->i_size, or 836 * if the disk i_size is already at the inode->i_size, or
837 * this ordered extent is inside the disk i_size, we're done 837 * this ordered extent is inside the disk i_size, we're done
838 */ 838 */
839 if (disk_i_size == i_size || offset <= disk_i_size) { 839 if (disk_i_size == i_size)
840 goto out;
841
842 /*
843 * We still need to update disk_i_size if outstanding_isize is greater
844 * than disk_i_size.
845 */
846 if (offset <= disk_i_size &&
847 (!ordered || ordered->outstanding_isize <= disk_i_size))
840 goto out; 848 goto out;
841 }
842 849
843 /* 850 /*
844 * walk backward from this ordered extent to disk_i_size. 851 * walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
870 break; 877 break;
871 if (test->file_offset >= i_size) 878 if (test->file_offset >= i_size)
872 break; 879 break;
873 if (test->file_offset >= disk_i_size) { 880 if (entry_end(test) > disk_i_size) {
874 /* 881 /*
875 * we don't update disk_i_size now, so record this 882 * we don't update disk_i_size now, so record this
876 * undealt i_size. Or we will not know the real 883 * undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe9d02c45f8e..a5c856234323 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -379,6 +379,13 @@ next1:
379 379
380 ret = add_relation_rb(fs_info, found_key.objectid, 380 ret = add_relation_rb(fs_info, found_key.objectid,
381 found_key.offset); 381 found_key.offset);
382 if (ret == -ENOENT) {
383 printk(KERN_WARNING
384 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
385 (unsigned long long)found_key.objectid,
386 (unsigned long long)found_key.offset);
387 ret = 0; /* ignore the error */
388 }
382 if (ret) 389 if (ret)
383 goto out; 390 goto out;
384next2: 391next2:
@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
956 struct btrfs_fs_info *fs_info, u64 qgroupid) 963 struct btrfs_fs_info *fs_info, u64 qgroupid)
957{ 964{
958 struct btrfs_root *quota_root; 965 struct btrfs_root *quota_root;
966 struct btrfs_qgroup *qgroup;
959 int ret = 0; 967 int ret = 0;
960 968
961 quota_root = fs_info->quota_root; 969 quota_root = fs_info->quota_root;
962 if (!quota_root) 970 if (!quota_root)
963 return -EINVAL; 971 return -EINVAL;
964 972
973 /* check if there are no relations to this qgroup */
974 spin_lock(&fs_info->qgroup_lock);
975 qgroup = find_qgroup_rb(fs_info, qgroupid);
976 if (qgroup) {
977 if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
978 spin_unlock(&fs_info->qgroup_lock);
979 return -EBUSY;
980 }
981 }
982 spin_unlock(&fs_info->qgroup_lock);
983
965 ret = del_qgroup_item(trans, quota_root, qgroupid); 984 ret = del_qgroup_item(trans, quota_root, qgroupid);
966 985
967 spin_lock(&fs_info->qgroup_lock); 986 spin_lock(&fs_info->qgroup_lock);
968 del_qgroup_rb(quota_root->fs_info, qgroupid); 987 del_qgroup_rb(quota_root->fs_info, qgroupid);
969
970 spin_unlock(&fs_info->qgroup_lock); 988 spin_unlock(&fs_info->qgroup_lock);
971 989
972 return ret; 990 return ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 300e09ac3659..17c306bf177a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3472,7 +3472,7 @@ out:
3472} 3472}
3473 3473
3474/* 3474/*
3475 * hepler to find all tree blocks that reference a given data extent 3475 * helper to find all tree blocks that reference a given data extent
3476 */ 3476 */
3477static noinline_for_stack 3477static noinline_for_stack
3478int add_data_references(struct reloc_control *rc, 3478int add_data_references(struct reloc_control *rc,
@@ -3566,7 +3566,7 @@ int add_data_references(struct reloc_control *rc,
3566} 3566}
3567 3567
3568/* 3568/*
3569 * hepler to find next unprocessed extent 3569 * helper to find next unprocessed extent
3570 */ 3570 */
3571static noinline_for_stack 3571static noinline_for_stack
3572int find_next_extent(struct btrfs_trans_handle *trans, 3572int find_next_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
580 int corrected = 0; 580 int corrected = 0;
581 struct btrfs_key key; 581 struct btrfs_key key;
582 struct inode *inode = NULL; 582 struct inode *inode = NULL;
583 struct btrfs_fs_info *fs_info;
583 u64 end = offset + PAGE_SIZE - 1; 584 u64 end = offset + PAGE_SIZE - 1;
584 struct btrfs_root *local_root; 585 struct btrfs_root *local_root;
586 int srcu_index;
585 587
586 key.objectid = root; 588 key.objectid = root;
587 key.type = BTRFS_ROOT_ITEM_KEY; 589 key.type = BTRFS_ROOT_ITEM_KEY;
588 key.offset = (u64)-1; 590 key.offset = (u64)-1;
589 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); 591
590 if (IS_ERR(local_root)) 592 fs_info = fixup->root->fs_info;
593 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
594
595 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
596 if (IS_ERR(local_root)) {
597 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
591 return PTR_ERR(local_root); 598 return PTR_ERR(local_root);
599 }
592 600
593 key.type = BTRFS_INODE_ITEM_KEY; 601 key.type = BTRFS_INODE_ITEM_KEY;
594 key.objectid = inum; 602 key.objectid = inum;
595 key.offset = 0; 603 key.offset = 0;
596 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); 604 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
605 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
597 if (IS_ERR(inode)) 606 if (IS_ERR(inode))
598 return PTR_ERR(inode); 607 return PTR_ERR(inode);
599 608
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
606 } 615 }
607 616
608 if (PageUptodate(page)) { 617 if (PageUptodate(page)) {
609 struct btrfs_fs_info *fs_info;
610 if (PageDirty(page)) { 618 if (PageDirty(page)) {
611 /* 619 /*
612 * we need to write the data to the defect sector. the 620 * we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3180 u64 physical_for_dev_replace; 3188 u64 physical_for_dev_replace;
3181 u64 len; 3189 u64 len;
3182 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3190 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3191 int srcu_index;
3183 3192
3184 key.objectid = root; 3193 key.objectid = root;
3185 key.type = BTRFS_ROOT_ITEM_KEY; 3194 key.type = BTRFS_ROOT_ITEM_KEY;
3186 key.offset = (u64)-1; 3195 key.offset = (u64)-1;
3196
3197 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3198
3187 local_root = btrfs_read_fs_root_no_name(fs_info, &key); 3199 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3188 if (IS_ERR(local_root)) 3200 if (IS_ERR(local_root)) {
3201 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3189 return PTR_ERR(local_root); 3202 return PTR_ERR(local_root);
3203 }
3190 3204
3191 key.type = BTRFS_INODE_ITEM_KEY; 3205 key.type = BTRFS_INODE_ITEM_KEY;
3192 key.objectid = inum; 3206 key.objectid = inum;
3193 key.offset = 0; 3207 key.offset = 0;
3194 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); 3208 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3209 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3195 if (IS_ERR(inode)) 3210 if (IS_ERR(inode))
3196 return PTR_ERR(inode); 3211 return PTR_ERR(inode);
3197 3212
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad40..321b7fb4e441 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
1814 (unsigned long)nce->ino); 1814 (unsigned long)nce->ino);
1815 if (!nce_head) { 1815 if (!nce_head) {
1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1817 if (!nce_head) 1817 if (!nce_head) {
1818 kfree(nce);
1818 return -ENOMEM; 1819 return -ENOMEM;
1820 }
1819 INIT_LIST_HEAD(nce_head); 1821 INIT_LIST_HEAD(nce_head);
1820 1822
1821 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1823 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 99545df1b86c..d8982e9601d3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
267 function, line, errstr); 267 function, line, errstr);
268 return; 268 return;
269 } 269 }
270 trans->transaction->aborted = errno; 270 ACCESS_ONCE(trans->transaction->aborted) = errno;
271 __btrfs_std_error(root->fs_info, function, line, errno, NULL); 271 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
272} 272}
273/* 273/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87fac9a21ea5..4c0067c4f76d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -112,7 +112,6 @@ loop:
112 * to redo the trans_no_join checks above 112 * to redo the trans_no_join checks above
113 */ 113 */
114 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 114 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
115 cur_trans = fs_info->running_transaction;
116 goto loop; 115 goto loop;
117 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 116 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
118 spin_unlock(&fs_info->trans_lock); 117 spin_unlock(&fs_info->trans_lock);
@@ -333,12 +332,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
333 &root->fs_info->trans_block_rsv, 332 &root->fs_info->trans_block_rsv,
334 num_bytes, flush); 333 num_bytes, flush);
335 if (ret) 334 if (ret)
336 return ERR_PTR(ret); 335 goto reserve_fail;
337 } 336 }
338again: 337again:
339 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 338 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
340 if (!h) 339 if (!h) {
341 return ERR_PTR(-ENOMEM); 340 ret = -ENOMEM;
341 goto alloc_fail;
342 }
342 343
343 /* 344 /*
344 * If we are JOIN_NOLOCK we're already committing a transaction and 345 * If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +366,7 @@ again:
365 if (ret < 0) { 366 if (ret < 0) {
366 /* We must get the transaction if we are JOIN_NOLOCK. */ 367 /* We must get the transaction if we are JOIN_NOLOCK. */
367 BUG_ON(type == TRANS_JOIN_NOLOCK); 368 BUG_ON(type == TRANS_JOIN_NOLOCK);
368 369 goto join_fail;
369 if (type < TRANS_JOIN_NOLOCK)
370 sb_end_intwrite(root->fs_info->sb);
371 kmem_cache_free(btrfs_trans_handle_cachep, h);
372 return ERR_PTR(ret);
373 } 370 }
374 371
375 cur_trans = root->fs_info->running_transaction; 372 cur_trans = root->fs_info->running_transaction;
@@ -410,6 +407,19 @@ got_it:
410 if (!current->journal_info && type != TRANS_USERSPACE) 407 if (!current->journal_info && type != TRANS_USERSPACE)
411 current->journal_info = h; 408 current->journal_info = h;
412 return h; 409 return h;
410
411join_fail:
412 if (type < TRANS_JOIN_NOLOCK)
413 sb_end_intwrite(root->fs_info->sb);
414 kmem_cache_free(btrfs_trans_handle_cachep, h);
415alloc_fail:
416 if (num_bytes)
417 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
418 num_bytes);
419reserve_fail:
420 if (qgroup_reserved)
421 btrfs_qgroup_free(root, qgroup_reserved);
422 return ERR_PTR(ret);
413} 423}
414 424
415struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 425struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
@@ -1468,7 +1478,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1468 goto cleanup_transaction; 1478 goto cleanup_transaction;
1469 } 1479 }
1470 1480
1471 if (cur_trans->aborted) { 1481 /* Stop the commit early if ->aborted is set */
1482 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1472 ret = cur_trans->aborted; 1483 ret = cur_trans->aborted;
1473 goto cleanup_transaction; 1484 goto cleanup_transaction;
1474 } 1485 }
@@ -1574,6 +1585,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1574 wait_event(cur_trans->writer_wait, 1585 wait_event(cur_trans->writer_wait,
1575 atomic_read(&cur_trans->num_writers) == 1); 1586 atomic_read(&cur_trans->num_writers) == 1);
1576 1587
1588 /* ->aborted might be set after the previous check, so check it */
1589 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1590 ret = cur_trans->aborted;
1591 goto cleanup_transaction;
1592 }
1577 /* 1593 /*
1578 * the reloc mutex makes sure that we stop 1594 * the reloc mutex makes sure that we stop
1579 * the balancing code from coming in and moving 1595 * the balancing code from coming in and moving
@@ -1657,6 +1673,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1657 goto cleanup_transaction; 1673 goto cleanup_transaction;
1658 } 1674 }
1659 1675
1676 /*
1677 * The tasks which save the space cache and inode cache may also
1678 * update ->aborted, check it.
1679 */
1680 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1681 ret = cur_trans->aborted;
1682 mutex_unlock(&root->fs_info->tree_log_mutex);
1683 mutex_unlock(&root->fs_info->reloc_mutex);
1684 goto cleanup_transaction;
1685 }
1686
1660 btrfs_prepare_extent_commit(trans, root); 1687 btrfs_prepare_extent_commit(trans, root);
1661 1688
1662 cur_trans = root->fs_info->running_transaction; 1689 cur_trans = root->fs_info->running_transaction;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 83186c7e45d4..9027bb1e7466 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3357 if (skip_csum) 3357 if (skip_csum)
3358 return 0; 3358 return 0;
3359 3359
3360 if (em->compress_type) {
3361 csum_offset = 0;
3362 csum_len = block_len;
3363 }
3364
3360 /* block start is already adjusted for the file extent offset. */ 3365 /* block start is already adjusted for the file extent offset. */
3361 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3362 em->block_start + csum_offset, 3367 em->block_start + csum_offset,
@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3410 em = list_entry(extents.next, struct extent_map, list); 3415 em = list_entry(extents.next, struct extent_map, list);
3411 3416
3412 list_del_init(&em->list); 3417 list_del_init(&em->list);
3413 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
3414 3418
3415 /* 3419 /*
3416 * If we had an error we just need to delete everybody from our 3420 * If we had an error we just need to delete everybody from our
3417 * private list. 3421 * private list.
3418 */ 3422 */
3419 if (ret) { 3423 if (ret) {
3424 clear_em_logging(tree, em);
3420 free_extent_map(em); 3425 free_extent_map(em);
3421 continue; 3426 continue;
3422 } 3427 }
@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3424 write_unlock(&tree->lock); 3429 write_unlock(&tree->lock);
3425 3430
3426 ret = log_one_extent(trans, inode, root, em, path); 3431 ret = log_one_extent(trans, inode, root, em, path);
3427 free_extent_map(em);
3428 write_lock(&tree->lock); 3432 write_lock(&tree->lock);
3433 clear_em_logging(tree, em);
3434 free_extent_map(em);
3429 } 3435 }
3430 WARN_ON(!list_empty(&extents)); 3436 WARN_ON(!list_empty(&extents));
3431 write_unlock(&tree->lock); 3437 write_unlock(&tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cce6aa74012..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1431 } 1431 }
1432 } else { 1432 } else {
1433 ret = btrfs_get_bdev_and_sb(device_path, 1433 ret = btrfs_get_bdev_and_sb(device_path,
1434 FMODE_READ | FMODE_EXCL, 1434 FMODE_WRITE | FMODE_EXCL,
1435 root->fs_info->bdev_holder, 0, 1435 root->fs_info->bdev_holder, 0,
1436 &bdev, &bh); 1436 &bdev, &bh);
1437 if (ret) 1437 if (ret)
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1556 ret = 0; 1556 ret = 0;
1557 1557
1558 /* Notify udev that device has changed */ 1558 /* Notify udev that device has changed */
1559 btrfs_kobject_uevent(bdev, KOBJ_CHANGE); 1559 if (bdev)
1560 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
1560 1561
1561error_brelse: 1562error_brelse:
1562 brelse(bh); 1563 brelse(bh);
@@ -2614,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2614 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 2615 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2615 chunk_used = btrfs_block_group_used(&cache->item); 2616 chunk_used = btrfs_block_group_used(&cache->item);
2616 2617
2617 user_thresh = div_factor_fine(cache->key.offset, bargs->usage); 2618 if (bargs->usage == 0)
2619 user_thresh = 0;
2620 else if (bargs->usage > 100)
2621 user_thresh = cache->key.offset;
2622 else
2623 user_thresh = div_factor_fine(cache->key.offset,
2624 bargs->usage);
2625
2618 if (chunk_used < user_thresh) 2626 if (chunk_used < user_thresh)
2619 ret = 0; 2627 ret = 0;
2620 2628
@@ -2959,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
2959 unset_balance_control(fs_info); 2967 unset_balance_control(fs_info);
2960 ret = del_balance_item(fs_info->tree_root); 2968 ret = del_balance_item(fs_info->tree_root);
2961 BUG_ON(ret); 2969 BUG_ON(ret);
2970
2971 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
2962} 2972}
2963 2973
2964void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 2974void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -3138,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3138out: 3148out:
3139 if (bctl->flags & BTRFS_BALANCE_RESUME) 3149 if (bctl->flags & BTRFS_BALANCE_RESUME)
3140 __cancel_balance(fs_info); 3150 __cancel_balance(fs_info);
3141 else 3151 else {
3142 kfree(bctl); 3152 kfree(bctl);
3153 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3154 }
3143 return ret; 3155 return ret;
3144} 3156}
3145 3157
@@ -3156,7 +3168,6 @@ static int balance_kthread(void *data)
3156 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3168 ret = btrfs_balance(fs_info->balance_ctl, NULL);
3157 } 3169 }
3158 3170
3159 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3160 mutex_unlock(&fs_info->balance_mutex); 3171 mutex_unlock(&fs_info->balance_mutex);
3161 mutex_unlock(&fs_info->volume_mutex); 3172 mutex_unlock(&fs_info->volume_mutex);
3162 3173
@@ -3179,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
3179 return 0; 3190 return 0;
3180 } 3191 }
3181 3192
3182 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3183 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); 3193 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
3184 if (IS_ERR(tsk)) 3194 if (IS_ERR(tsk))
3185 return PTR_ERR(tsk); 3195 return PTR_ERR(tsk);
@@ -3233,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
3233 btrfs_balance_sys(leaf, item, &disk_bargs); 3243 btrfs_balance_sys(leaf, item, &disk_bargs);
3234 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 3244 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
3235 3245
3246 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3247
3236 mutex_lock(&fs_info->volume_mutex); 3248 mutex_lock(&fs_info->volume_mutex);
3237 mutex_lock(&fs_info->balance_mutex); 3249 mutex_lock(&fs_info->balance_mutex);
3238 3250
@@ -3496,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3496 { 1, 1, 2, 2, 2, 2 /* raid1 */ }, 3508 { 1, 1, 2, 2, 2, 2 /* raid1 */ },
3497 { 1, 2, 1, 1, 1, 2 /* dup */ }, 3509 { 1, 2, 1, 1, 1, 2 /* dup */ },
3498 { 1, 1, 0, 2, 1, 1 /* raid0 */ }, 3510 { 1, 1, 0, 2, 1, 1 /* raid0 */ },
3499 { 1, 1, 0, 1, 1, 1 /* single */ }, 3511 { 1, 1, 1, 1, 1, 1 /* single */ },
3500}; 3512};
3501 3513
3502static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3514static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/buffer.c b/fs/buffer.c
index c017a2dfb909..62169c192c21 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2359 if (unlikely(ret < 0)) 2359 if (unlikely(ret < 0))
2360 goto out_unlock; 2360 goto out_unlock;
2361 set_page_dirty(page); 2361 set_page_dirty(page);
2362 wait_on_page_writeback(page); 2362 wait_for_stable_page(page);
2363 return 0; 2363 return 0;
2364out_unlock: 2364out_unlock:
2365 unlock_page(page); 2365 unlock_page(page);
@@ -2935,6 +2935,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2935 void *kaddr = kmap_atomic(bh->b_page); 2935 void *kaddr = kmap_atomic(bh->b_page);
2936 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); 2936 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2937 kunmap_atomic(kaddr); 2937 kunmap_atomic(kaddr);
2938 flush_dcache_page(bh->b_page);
2938 } 2939 }
2939} 2940}
2940 2941
@@ -3226,7 +3227,7 @@ static struct kmem_cache *bh_cachep __read_mostly;
3226 * Once the number of bh's in the machine exceeds this level, we start 3227 * Once the number of bh's in the machine exceeds this level, we start
3227 * stripping them in writeback. 3228 * stripping them in writeback.
3228 */ 3229 */
3229static int max_buffer_heads; 3230static unsigned long max_buffer_heads;
3230 3231
3231int buffer_heads_over_limit; 3232int buffer_heads_over_limit;
3232 3233
@@ -3342,7 +3343,7 @@ EXPORT_SYMBOL(bh_submit_read);
3342 3343
3343void __init buffer_init(void) 3344void __init buffer_init(void)
3344{ 3345{
3345 int nrpages; 3346 unsigned long nrpages;
3346 3347
3347 bh_cachep = kmem_cache_create("buffer_head", 3348 bh_cachep = kmem_cache_create("buffer_head",
3348 sizeof(struct buffer_head), 0, 3349 sizeof(struct buffer_head), 0,
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 9eb134ea6eb2..49bc78243db9 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,6 +1,6 @@
1config CEPH_FS 1config CEPH_FS
2 tristate "Ceph distributed file system (EXPERIMENTAL)" 2 tristate "Ceph distributed file system"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select CEPH_LIB 4 select CEPH_LIB
5 select LIBCRC32C 5 select LIBCRC32C
6 select CRYPTO_AES 6 select CRYPTO_AES
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 21ff76c22a17..2906ee276408 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -155,14 +155,14 @@ config CIFS_DFS_UPCALL
155 points. If unsure, say N. 155 points. If unsure, say N.
156 156
157config CIFS_NFSD_EXPORT 157config CIFS_NFSD_EXPORT
158 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 158 bool "Allow nfsd to export CIFS file system"
159 depends on CIFS && EXPERIMENTAL && BROKEN 159 depends on CIFS && BROKEN
160 help 160 help
161 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 161 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
162 162
163config CIFS_SMB2 163config CIFS_SMB2
164 bool "SMB2 network file system support (EXPERIMENTAL)" 164 bool "SMB2 network file system support"
165 depends on CIFS && EXPERIMENTAL && INET 165 depends on CIFS && INET
166 select NLS 166 select NLS
167 select KEYS 167 select KEYS
168 select FSCACHE 168 select FSCACHE
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ce5cbd717bfc..210fce2df308 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
226compose_mount_options_err: 226compose_mount_options_err:
227 kfree(mountdata); 227 kfree(mountdata);
228 mountdata = ERR_PTR(rc); 228 mountdata = ERR_PTR(rc);
229 kfree(*devname);
230 *devname = NULL;
229 goto compose_mount_options_out; 231 goto compose_mount_options_out;
230} 232}
231 233
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 17590731786d..9be09b21b4e0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -228,7 +228,6 @@ cifs_alloc_inode(struct super_block *sb)
228 cifs_set_oplock_level(cifs_inode, 0); 228 cifs_set_oplock_level(cifs_inode, 0);
229 cifs_inode->delete_pending = false; 229 cifs_inode->delete_pending = false;
230 cifs_inode->invalid_mapping = false; 230 cifs_inode->invalid_mapping = false;
231 cifs_inode->leave_pages_clean = false;
232 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 231 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
233 cifs_inode->server_eof = 0; 232 cifs_inode->server_eof = 0;
234 cifs_inode->uniqueid = 0; 233 cifs_inode->uniqueid = 0;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index aaef57beba0e..4f07f6fbe494 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -386,6 +386,7 @@ struct smb_version_values {
386 unsigned int cap_unix; 386 unsigned int cap_unix;
387 unsigned int cap_nt_find; 387 unsigned int cap_nt_find;
388 unsigned int cap_large_files; 388 unsigned int cap_large_files;
389 unsigned int oplock_read;
389}; 390};
390 391
391#define HEADER_SIZE(server) (server->vals->header_size) 392#define HEADER_SIZE(server) (server->vals->header_size)
@@ -1030,7 +1031,6 @@ struct cifsInodeInfo {
1030 bool clientCanCacheAll; /* read and writebehind oplock */ 1031 bool clientCanCacheAll; /* read and writebehind oplock */
1031 bool delete_pending; /* DELETE_ON_CLOSE is set */ 1032 bool delete_pending; /* DELETE_ON_CLOSE is set */
1032 bool invalid_mapping; /* pagecache is invalid */ 1033 bool invalid_mapping; /* pagecache is invalid */
1033 bool leave_pages_clean; /* protected by i_mutex, not set pages dirty */
1034 unsigned long time; /* jiffies of last update of inode */ 1034 unsigned long time; /* jiffies of last update of inode */
1035 u64 server_eof; /* current file size on server -- protected by i_lock */ 1035 u64 server_eof; /* current file size on server -- protected by i_lock */
1036 u64 uniqueid; /* server inode number */ 1036 u64 uniqueid; /* server inode number */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 067b0e169fe4..4474a57f30ab 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1947,7 +1947,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1947 } 1947 }
1948 case AF_INET6: { 1948 case AF_INET6: {
1949 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; 1949 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
1950 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs; 1950 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
1951 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr); 1951 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
1952 } 1952 }
1953 default: 1953 default:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c23fbd81fe1a..a8d8b589ee0e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -238,6 +238,23 @@ out:
238 return rc; 238 return rc;
239} 239}
240 240
241static bool
242cifs_has_mand_locks(struct cifsInodeInfo *cinode)
243{
244 struct cifs_fid_locks *cur;
245 bool has_locks = false;
246
247 down_read(&cinode->lock_sem);
248 list_for_each_entry(cur, &cinode->llist, llist) {
249 if (!list_empty(&cur->locks)) {
250 has_locks = true;
251 break;
252 }
253 }
254 up_read(&cinode->lock_sem);
255 return has_locks;
256}
257
241struct cifsFileInfo * 258struct cifsFileInfo *
242cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 259cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243 struct tcon_link *tlink, __u32 oplock) 260 struct tcon_link *tlink, __u32 oplock)
@@ -248,6 +265,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
248 struct cifsFileInfo *cfile; 265 struct cifsFileInfo *cfile;
249 struct cifs_fid_locks *fdlocks; 266 struct cifs_fid_locks *fdlocks;
250 struct cifs_tcon *tcon = tlink_tcon(tlink); 267 struct cifs_tcon *tcon = tlink_tcon(tlink);
268 struct TCP_Server_Info *server = tcon->ses->server;
251 269
252 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 270 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253 if (cfile == NULL) 271 if (cfile == NULL)
@@ -276,12 +294,22 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
276 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 294 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277 mutex_init(&cfile->fh_mutex); 295 mutex_init(&cfile->fh_mutex);
278 296
297 /*
298 * If the server returned a read oplock and we have mandatory brlocks,
299 * set oplock level to None.
300 */
301 if (oplock == server->vals->oplock_read &&
302 cifs_has_mand_locks(cinode)) {
303 cFYI(1, "Reset oplock val from read to None due to mand locks");
304 oplock = 0;
305 }
306
279 spin_lock(&cifs_file_list_lock); 307 spin_lock(&cifs_file_list_lock);
280 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE) 308 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
281 oplock = fid->pending_open->oplock; 309 oplock = fid->pending_open->oplock;
282 list_del(&fid->pending_open->olist); 310 list_del(&fid->pending_open->olist);
283 311
284 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); 312 server->ops->set_fid(cfile, fid, oplock);
285 313
286 list_add(&cfile->tlist, &tcon->openFileList); 314 list_add(&cfile->tlist, &tcon->openFileList);
287 /* if readable file instance put first in list*/ 315 /* if readable file instance put first in list*/
@@ -1422,6 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1422 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1450 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1423 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1451 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1424 struct TCP_Server_Info *server = tcon->ses->server; 1452 struct TCP_Server_Info *server = tcon->ses->server;
1453 struct inode *inode = cfile->dentry->d_inode;
1425 1454
1426 if (posix_lck) { 1455 if (posix_lck) {
1427 int posix_lock_type; 1456 int posix_lock_type;
@@ -1459,6 +1488,21 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1459 if (!rc) 1488 if (!rc)
1460 goto out; 1489 goto out;
1461 1490
1491 /*
1492 * Windows 7 server can delay breaking lease from read to None
1493 * if we set a byte-range lock on a file - break it explicitly
1494 * before sending the lock to the server to be sure the next
1495 * read won't conflict with non-overlapted locks due to
1496 * pagereading.
1497 */
1498 if (!CIFS_I(inode)->clientCanCacheAll &&
1499 CIFS_I(inode)->clientCanCacheRead) {
1500 cifs_invalidate_mapping(inode);
1501 cFYI(1, "Set no oplock for inode=%p due to mand locks",
1502 inode);
1503 CIFS_I(inode)->clientCanCacheRead = false;
1504 }
1505
1462 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1506 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1463 type, 1, 0, wait_flag); 1507 type, 1, 0, wait_flag);
1464 if (rc) { 1508 if (rc) {
@@ -2103,15 +2147,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
2103 } else { 2147 } else {
2104 rc = copied; 2148 rc = copied;
2105 pos += copied; 2149 pos += copied;
2106 /* 2150 set_page_dirty(page);
2107 * When we use strict cache mode and cifs_strict_writev was run
2108 * with level II oplock (indicated by leave_pages_clean field of
2109 * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev
2110 * sent the data to the server itself.
2111 */
2112 if (!CIFS_I(inode)->leave_pages_clean ||
2113 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO))
2114 set_page_dirty(page);
2115 } 2151 }
2116 2152
2117 if (rc > 0) { 2153 if (rc > 0) {
@@ -2462,8 +2498,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2462} 2498}
2463 2499
2464static ssize_t 2500static ssize_t
2465cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov, 2501cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2466 unsigned long nr_segs, loff_t pos, bool cache_ex) 2502 unsigned long nr_segs, loff_t pos)
2467{ 2503{
2468 struct file *file = iocb->ki_filp; 2504 struct file *file = iocb->ki_filp;
2469 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2505 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2485,12 +2521,8 @@ cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
2485 server->vals->exclusive_lock_type, NULL, 2521 server->vals->exclusive_lock_type, NULL,
2486 CIFS_WRITE_OP)) { 2522 CIFS_WRITE_OP)) {
2487 mutex_lock(&inode->i_mutex); 2523 mutex_lock(&inode->i_mutex);
2488 if (!cache_ex)
2489 cinode->leave_pages_clean = true;
2490 rc = __generic_file_aio_write(iocb, iov, nr_segs, 2524 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2491 &iocb->ki_pos); 2525 &iocb->ki_pos);
2492 if (!cache_ex)
2493 cinode->leave_pages_clean = false;
2494 mutex_unlock(&inode->i_mutex); 2526 mutex_unlock(&inode->i_mutex);
2495 } 2527 }
2496 2528
@@ -2517,60 +2549,32 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2517 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 2549 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2518 iocb->ki_filp->private_data; 2550 iocb->ki_filp->private_data;
2519 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2551 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2520 ssize_t written, written2; 2552 ssize_t written;
2521 /*
2522 * We need to store clientCanCacheAll here to prevent race
2523 * conditions - this value can be changed during an execution
2524 * of generic_file_aio_write. For CIFS it can be changed from
2525 * true to false only, but for SMB2 it can be changed both from
2526 * true to false and vice versa. So, we can end up with a data
2527 * stored in the cache, not marked dirty and not sent to the
2528 * server if this value changes its state from false to true
2529 * after cifs_write_end.
2530 */
2531 bool cache_ex = cinode->clientCanCacheAll;
2532 bool cache_read = cinode->clientCanCacheRead;
2533 int rc;
2534 loff_t saved_pos;
2535 2553
2536 if (cache_ex) { 2554 if (cinode->clientCanCacheAll) {
2537 if (cap_unix(tcon->ses) && 2555 if (cap_unix(tcon->ses) &&
2538 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && 2556 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2539 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( 2557 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2540 tcon->fsUnixInfo.Capability)))
2541 return generic_file_aio_write(iocb, iov, nr_segs, pos); 2558 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2542 return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex); 2559 return cifs_writev(iocb, iov, nr_segs, pos);
2543 } 2560 }
2544
2545 /* 2561 /*
2546 * For files without exclusive oplock in strict cache mode we need to 2562 * For non-oplocked files in strict cache mode we need to write the data
2547 * write the data to the server exactly from the pos to pos+len-1 rather 2563 * to the server exactly from the pos to pos+len-1 rather than flush all
2548 * than flush all affected pages because it may cause a error with 2564 * affected pages because it may cause a error with mandatory locks on
2549 * mandatory locks on these pages but not on the region from pos to 2565 * these pages but not on the region from pos to ppos+len-1.
2550 * ppos+len-1.
2551 */ 2566 */
2552 written = cifs_user_writev(iocb, iov, nr_segs, pos); 2567 written = cifs_user_writev(iocb, iov, nr_segs, pos);
2553 if (!cache_read || written <= 0) 2568 if (written > 0 && cinode->clientCanCacheRead) {
2554 return written; 2569 /*
2555 2570 * Windows 7 server can delay breaking level2 oplock if a write
2556 saved_pos = iocb->ki_pos; 2571 * request comes - break it on the client to prevent reading
2557 iocb->ki_pos = pos; 2572 * an old data.
2558 /* we have a read oplock - need to store a data in the page cache */ 2573 */
2559 if (cap_unix(tcon->ses) && 2574 cifs_invalidate_mapping(inode);
2560 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && 2575 cFYI(1, "Set no oplock for inode=%p after a write operation",
2561 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( 2576 inode);
2562 tcon->fsUnixInfo.Capability))) 2577 cinode->clientCanCacheRead = false;
2563 written2 = generic_file_aio_write(iocb, iov, nr_segs, pos);
2564 else
2565 written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos,
2566 cache_ex);
2567 /* errors occured during writing - invalidate the page cache */
2568 if (written2 < 0) {
2569 rc = cifs_invalidate_mapping(inode);
2570 if (rc)
2571 written = (ssize_t)rc;
2572 else
2573 iocb->ki_pos = saved_pos;
2574 } 2578 }
2575 return written; 2579 return written;
2576} 2580}
@@ -3577,6 +3581,13 @@ void cifs_oplock_break(struct work_struct *work)
3577 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3581 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3578 int rc = 0; 3582 int rc = 0;
3579 3583
3584 if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3585 cifs_has_mand_locks(cinode)) {
3586 cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
3587 inode);
3588 cinode->clientCanCacheRead = false;
3589 }
3590
3580 if (inode && S_ISREG(inode->i_mode)) { 3591 if (inode && S_ISREG(inode->i_mode)) {
3581 if (cinode->clientCanCacheRead) 3592 if (cinode->clientCanCacheRead)
3582 break_lease(inode, O_RDONLY); 3593 break_lease(inode, O_RDONLY);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 51dc2fb6e854..9f6c4c45d21e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -76,7 +76,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
76 } 76 }
77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); 77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
78 if (rc) { 78 if (rc) {
79 cERROR(1, "%s: Could not update iwth link_str", __func__); 79 cERROR(1, "%s: Could not update with link_str", __func__);
80 goto symlink_hash_err; 80 goto symlink_hash_err;
81 } 81 }
82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash); 82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index a5d234c8d5d9..47bc5a87f94e 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -53,6 +53,13 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
53 mutex_unlock(&server->srv_mutex); 53 mutex_unlock(&server->srv_mutex);
54 return rc; 54 return rc;
55 } 55 }
56
57 /*
58 * The response to this call was already factored into the sequence
59 * number when the call went out, so we must adjust it back downward
60 * after signing here.
61 */
62 --server->sequence_number;
56 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 63 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
57 mutex_unlock(&server->srv_mutex); 64 mutex_unlock(&server->srv_mutex);
58 65
@@ -952,4 +959,5 @@ struct smb_version_values smb1_values = {
952 .cap_unix = CAP_UNIX, 959 .cap_unix = CAP_UNIX,
953 .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, 960 .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
954 .cap_large_files = CAP_LARGE_FILES, 961 .cap_large_files = CAP_LARGE_FILES,
962 .oplock_read = OPLOCK_READ,
955}; 963};
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d79de7bc4435..c9c7aa7ed966 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -708,6 +708,7 @@ struct smb_version_values smb20_values = {
708 .cap_unix = 0, 708 .cap_unix = 0,
709 .cap_nt_find = SMB2_NT_FIND, 709 .cap_nt_find = SMB2_NT_FIND,
710 .cap_large_files = SMB2_LARGE_FILES, 710 .cap_large_files = SMB2_LARGE_FILES,
711 .oplock_read = SMB2_OPLOCK_LEVEL_II,
711}; 712};
712 713
713struct smb_version_values smb21_values = { 714struct smb_version_values smb21_values = {
@@ -725,6 +726,7 @@ struct smb_version_values smb21_values = {
725 .cap_unix = 0, 726 .cap_unix = 0,
726 .cap_nt_find = SMB2_NT_FIND, 727 .cap_nt_find = SMB2_NT_FIND,
727 .cap_large_files = SMB2_LARGE_FILES, 728 .cap_large_files = SMB2_LARGE_FILES,
729 .oplock_read = SMB2_OPLOCK_LEVEL_II,
728}; 730};
729 731
730struct smb_version_values smb30_values = { 732struct smb_version_values smb30_values = {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 76d974c952fe..1a528680ec5a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -144,9 +144,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
144 144
145 *sent = 0; 145 *sent = 0;
146 146
147 if (ssocket == NULL)
148 return -ENOTSOCK; /* BB eventually add reconnect code here */
149
150 smb_msg.msg_name = (struct sockaddr *) &server->dstaddr; 147 smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
151 smb_msg.msg_namelen = sizeof(struct sockaddr); 148 smb_msg.msg_namelen = sizeof(struct sockaddr);
152 smb_msg.msg_control = NULL; 149 smb_msg.msg_control = NULL;
@@ -291,6 +288,9 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
291 struct socket *ssocket = server->ssocket; 288 struct socket *ssocket = server->ssocket;
292 int val = 1; 289 int val = 1;
293 290
291 if (ssocket == NULL)
292 return -ENOTSOCK;
293
294 cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); 294 cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
295 dump_smb(iov[0].iov_base, iov[0].iov_len); 295 dump_smb(iov[0].iov_base, iov[0].iov_len);
296 296
diff --git a/fs/compat.c b/fs/compat.c
index 015e1e1f87c6..fe40fde29111 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1278,8 +1278,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1279 * O_LARGEFILE flag. 1279 * O_LARGEFILE flag.
1280 */ 1280 */
1281asmlinkage long 1281COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1282compat_sys_open(const char __user *filename, int flags, umode_t mode)
1283{ 1282{
1284 return do_sys_open(AT_FDCWD, filename, flags, mode); 1283 return do_sys_open(AT_FDCWD, filename, flags, mode);
1285} 1284}
@@ -1288,8 +1287,7 @@ compat_sys_open(const char __user *filename, int flags, umode_t mode)
1288 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the 1287 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
1289 * O_LARGEFILE flag. 1288 * O_LARGEFILE flag.
1290 */ 1289 */
1291asmlinkage long 1290COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1292compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
1293{ 1291{
1294 return do_sys_open(dfd, filename, flags, mode); 1292 return do_sys_open(dfd, filename, flags, mode);
1295} 1293}
@@ -1739,55 +1737,13 @@ asmlinkage long compat_sys_signalfd(int ufd,
1739} 1737}
1740#endif /* CONFIG_SIGNALFD */ 1738#endif /* CONFIG_SIGNALFD */
1741 1739
1742#ifdef CONFIG_TIMERFD
1743
1744asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
1745 const struct compat_itimerspec __user *utmr,
1746 struct compat_itimerspec __user *otmr)
1747{
1748 int error;
1749 struct itimerspec t;
1750 struct itimerspec __user *ut;
1751
1752 if (get_compat_itimerspec(&t, utmr))
1753 return -EFAULT;
1754 ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
1755 if (copy_to_user(&ut[0], &t, sizeof(t)))
1756 return -EFAULT;
1757 error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
1758 if (!error && otmr)
1759 error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
1760 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
1761
1762 return error;
1763}
1764
1765asmlinkage long compat_sys_timerfd_gettime(int ufd,
1766 struct compat_itimerspec __user *otmr)
1767{
1768 int error;
1769 struct itimerspec t;
1770 struct itimerspec __user *ut;
1771
1772 ut = compat_alloc_user_space(sizeof(struct itimerspec));
1773 error = sys_timerfd_gettime(ufd, ut);
1774 if (!error)
1775 error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
1776 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
1777
1778 return error;
1779}
1780
1781#endif /* CONFIG_TIMERFD */
1782
1783#ifdef CONFIG_FHANDLE 1740#ifdef CONFIG_FHANDLE
1784/* 1741/*
1785 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it 1742 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
1786 * doesn't set the O_LARGEFILE flag. 1743 * doesn't set the O_LARGEFILE flag.
1787 */ 1744 */
1788asmlinkage long 1745COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
1789compat_sys_open_by_handle_at(int mountdirfd, 1746 struct file_handle __user *, handle, int, flags)
1790 struct file_handle __user *handle, int flags)
1791{ 1747{
1792 return do_handle_open(mountdirfd, handle, flags); 1748 return do_handle_open(mountdirfd, handle, flags);
1793} 1749}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 712b10f64c70..e9dcfa3c208c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1037,10 +1037,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
1037static int configfs_depend_prep(struct dentry *origin, 1037static int configfs_depend_prep(struct dentry *origin,
1038 struct config_item *target) 1038 struct config_item *target)
1039{ 1039{
1040 struct configfs_dirent *child_sd, *sd = origin->d_fsdata; 1040 struct configfs_dirent *child_sd, *sd;
1041 int ret = 0; 1041 int ret = 0;
1042 1042
1043 BUG_ON(!origin || !sd); 1043 BUG_ON(!origin || !origin->d_fsdata);
1044 sd = origin->d_fsdata;
1044 1045
1045 if (sd->s_element == target) /* Boo-yah */ 1046 if (sd->s_element == target) /* Boo-yah */
1046 goto out; 1047 goto out;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 153bb1e42e63..0c4f80b447fb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -176,7 +176,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
176 opts->uid = uid; 176 opts->uid = uid;
177 break; 177 break;
178 case Opt_gid: 178 case Opt_gid:
179 if (match_octal(&args[0], &option)) 179 if (match_int(&args[0], &option))
180 return -EINVAL; 180 return -EINVAL;
181 gid = make_kgid(current_user_ns(), option); 181 gid = make_kgid(current_user_ns(), option);
182 if (!gid_valid(gid)) 182 if (!gid_valid(gid))
@@ -322,7 +322,6 @@ static struct dentry *__create_file(const char *name, umode_t mode,
322 if (!parent) 322 if (!parent)
323 parent = debugfs_mount->mnt_root; 323 parent = debugfs_mount->mnt_root;
324 324
325 dentry = NULL;
326 mutex_lock(&parent->d_inode->i_mutex); 325 mutex_lock(&parent->d_inode->i_mutex);
327 dentry = lookup_one_len(name, parent, strlen(name)); 326 dentry = lookup_one_len(name, parent, strlen(name));
328 if (!IS_ERR(dentry)) { 327 if (!IS_ERR(dentry)) {
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 77c0f70f8fe8..e7665c31f7b1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -96,10 +96,13 @@ do { \
96} 96}
97 97
98 98
99#define DLM_RTF_SHRINK 0x00000001
100
99struct dlm_rsbtable { 101struct dlm_rsbtable {
100 struct rb_root keep; 102 struct rb_root keep;
101 struct rb_root toss; 103 struct rb_root toss;
102 spinlock_t lock; 104 spinlock_t lock;
105 uint32_t flags;
103}; 106};
104 107
105 108
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index a579f30f237d..f7501651762d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1132,6 +1132,7 @@ static void toss_rsb(struct kref *kref)
1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); 1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); 1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
1134 r->res_toss_time = jiffies; 1134 r->res_toss_time = jiffies;
1135 ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
1135 if (r->res_lvbptr) { 1136 if (r->res_lvbptr) {
1136 dlm_free_lvb(r->res_lvbptr); 1137 dlm_free_lvb(r->res_lvbptr);
1137 r->res_lvbptr = NULL; 1138 r->res_lvbptr = NULL;
@@ -1659,11 +1660,18 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1659 char *name; 1660 char *name;
1660 int our_nodeid = dlm_our_nodeid(); 1661 int our_nodeid = dlm_our_nodeid();
1661 int remote_count = 0; 1662 int remote_count = 0;
1663 int need_shrink = 0;
1662 int i, len, rv; 1664 int i, len, rv;
1663 1665
1664 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX); 1666 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
1665 1667
1666 spin_lock(&ls->ls_rsbtbl[b].lock); 1668 spin_lock(&ls->ls_rsbtbl[b].lock);
1669
1670 if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
1671 spin_unlock(&ls->ls_rsbtbl[b].lock);
1672 return;
1673 }
1674
1667 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) { 1675 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
1668 next = rb_next(n); 1676 next = rb_next(n);
1669 r = rb_entry(n, struct dlm_rsb, res_hashnode); 1677 r = rb_entry(n, struct dlm_rsb, res_hashnode);
@@ -1679,6 +1687,8 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1679 continue; 1687 continue;
1680 } 1688 }
1681 1689
1690 need_shrink = 1;
1691
1682 if (!time_after_eq(jiffies, r->res_toss_time + 1692 if (!time_after_eq(jiffies, r->res_toss_time +
1683 dlm_config.ci_toss_secs * HZ)) { 1693 dlm_config.ci_toss_secs * HZ)) {
1684 continue; 1694 continue;
@@ -1710,6 +1720,11 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1710 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 1720 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1711 dlm_free_rsb(r); 1721 dlm_free_rsb(r);
1712 } 1722 }
1723
1724 if (need_shrink)
1725 ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
1726 else
1727 ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
1713 spin_unlock(&ls->ls_rsbtbl[b].lock); 1728 spin_unlock(&ls->ls_rsbtbl[b].lock);
1714 1729
1715 /* 1730 /*
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
503#endif 503#endif
504 return -EINVAL; 504 return -EINVAL;
505 505
506#ifdef CONFIG_COMPAT 506 /*
507 if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) 507 * can't compare against COMPAT/dlm_write_request32 because
508#else 508 * we don't yet know if is64bit is zero
509 */
509 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) 510 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
510#endif
511 return -EINVAL; 511 return -EINVAL;
512 512
513 kbuf = kzalloc(count + 1, GFP_NOFS); 513 kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index cc16562654de..e15ef38c24fa 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
1config ECRYPT_FS 1config ECRYPT_FS
2 tristate "eCrypt filesystem layer support (EXPERIMENTAL)" 2 tristate "eCrypt filesystem layer support"
3 depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) 3 depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
4 select CRYPTO_ECB 4 select CRYPTO_ECB
5 select CRYPTO_CBC 5 select CRYPTO_CBC
6 select CRYPTO_MD5 6 select CRYPTO_MD5
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index ea9931281557..a7b0c2dfb3db 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1935,7 +1935,7 @@ static const unsigned char filename_rev_map[256] = {
1935 * @src: Source location for the filename to encode 1935 * @src: Source location for the filename to encode
1936 * @src_size: Size of the source in bytes 1936 * @src_size: Size of the source in bytes
1937 */ 1937 */
1938void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size, 1938static void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
1939 unsigned char *src, size_t src_size) 1939 unsigned char *src, size_t src_size)
1940{ 1940{
1941 size_t num_blocks; 1941 size_t num_blocks;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 809e67d05ca3..f1ea610362c6 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -102,12 +102,12 @@ int __init ecryptfs_init_kthread(void)
102 102
103void ecryptfs_destroy_kthread(void) 103void ecryptfs_destroy_kthread(void)
104{ 104{
105 struct ecryptfs_open_req *req; 105 struct ecryptfs_open_req *req, *tmp;
106 106
107 mutex_lock(&ecryptfs_kthread_ctl.mux); 107 mutex_lock(&ecryptfs_kthread_ctl.mux);
108 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE; 108 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
109 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list, 109 list_for_each_entry_safe(req, tmp, &ecryptfs_kthread_ctl.req_list,
110 kthread_ctl_list) { 110 kthread_ctl_list) {
111 list_del(&req->kthread_ctl_list); 111 list_del(&req->kthread_ctl_list);
112 *req->lower_file = ERR_PTR(-EIO); 112 *req->lower_file = ERR_PTR(-EIO);
113 complete(&req->done); 113 complete(&req->done);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index bd1d57f98f74..564a1fa34b99 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -338,7 +338,8 @@ static int ecryptfs_write_begin(struct file *file,
338 if (prev_page_end_size 338 if (prev_page_end_size
339 >= i_size_read(page->mapping->host)) { 339 >= i_size_read(page->mapping->host)) {
340 zero_user(page, 0, PAGE_CACHE_SIZE); 340 zero_user(page, 0, PAGE_CACHE_SIZE);
341 } else { 341 SetPageUptodate(page);
342 } else if (len < PAGE_CACHE_SIZE) {
342 rc = ecryptfs_decrypt_page(page); 343 rc = ecryptfs_decrypt_page(page);
343 if (rc) { 344 if (rc) {
344 printk(KERN_ERR "%s: Error decrypting " 345 printk(KERN_ERR "%s: Error decrypting "
@@ -348,8 +349,8 @@ static int ecryptfs_write_begin(struct file *file,
348 ClearPageUptodate(page); 349 ClearPageUptodate(page);
349 goto out; 350 goto out;
350 } 351 }
352 SetPageUptodate(page);
351 } 353 }
352 SetPageUptodate(page);
353 } 354 }
354 } 355 }
355 /* If creating a page or more of holes, zero them out via truncate. 356 /* If creating a page or more of holes, zero them out via truncate.
@@ -499,6 +500,13 @@ static int ecryptfs_write_end(struct file *file,
499 } 500 }
500 goto out; 501 goto out;
501 } 502 }
503 if (!PageUptodate(page)) {
504 if (copied < PAGE_CACHE_SIZE) {
505 rc = 0;
506 goto out;
507 }
508 SetPageUptodate(page);
509 }
502 /* Fills in zeros if 'to' goes beyond inode size */ 510 /* Fills in zeros if 'to' goes beyond inode size */
503 rc = fill_zeros_to_end_of_page(page, to); 511 rc = fill_zeros_to_end_of_page(page, to);
504 if (rc) { 512 if (rc) {
diff --git a/fs/efs/Kconfig b/fs/efs/Kconfig
index 6ebfc1c207a8..d020e3c30fea 100644
--- a/fs/efs/Kconfig
+++ b/fs/efs/Kconfig
@@ -1,6 +1,6 @@
1config EFS_FS 1config EFS_FS
2 tristate "EFS file system support (read only) (EXPERIMENTAL)" 2 tristate "EFS file system support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard 5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard
6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer 6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index be56b21435f8..9fec1836057a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1313,7 +1313,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1313 * otherwise we might miss an event that happens between the 1313 * otherwise we might miss an event that happens between the
1314 * f_op->poll() call and the new event set registering. 1314 * f_op->poll() call and the new event set registering.
1315 */ 1315 */
1316 epi->event.events = event->events; 1316 epi->event.events = event->events; /* need barrier below */
1317 pt._key = event->events; 1317 pt._key = event->events;
1318 epi->event.data = event->data; /* protected by mtx */ 1318 epi->event.data = event->data; /* protected by mtx */
1319 if (epi->event.events & EPOLLWAKEUP) { 1319 if (epi->event.events & EPOLLWAKEUP) {
@@ -1324,6 +1324,26 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1324 } 1324 }
1325 1325
1326 /* 1326 /*
1327 * The following barrier has two effects:
1328 *
1329 * 1) Flush epi changes above to other CPUs. This ensures
1330 * we do not miss events from ep_poll_callback if an
1331 * event occurs immediately after we call f_op->poll().
1332 * We need this because we did not take ep->lock while
1333 * changing epi above (but ep_poll_callback does take
1334 * ep->lock).
1335 *
1336 * 2) We also need to ensure we do not miss _past_ events
1337 * when calling f_op->poll(). This barrier also
1338 * pairs with the barrier in wq_has_sleeper (see
1339 * comments for wq_has_sleeper).
1340 *
1341 * This barrier will now guarantee ep_poll_callback or f_op->poll
1342 * (or both) will notice the readiness of an item.
1343 */
1344 smp_mb();
1345
1346 /*
1327 * Get current event bits. We can safely use the file* here because 1347 * Get current event bits. We can safely use the file* here because
1328 * its usage count has been increased by the caller of this function. 1348 * its usage count has been increased by the caller of this function.
1329 */ 1349 */
diff --git a/fs/exec.c b/fs/exec.c
index 18c45cac368f..20df02c1cc70 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -434,8 +434,9 @@ static int count(struct user_arg_ptr argv, int max)
434 if (IS_ERR(p)) 434 if (IS_ERR(p))
435 return -EFAULT; 435 return -EFAULT;
436 436
437 if (i++ >= max) 437 if (i >= max)
438 return -E2BIG; 438 return -E2BIG;
439 ++i;
439 440
440 if (fatal_signal_pending(current)) 441 if (fatal_signal_pending(current))
441 return -ERESTARTNOHAND; 442 return -ERESTARTNOHAND;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e50223b3299..4ba2683c1d44 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2065,6 +2065,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2065 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2065 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2067 "writeback"); 2067 "writeback");
2068 sb->s_flags |= MS_SNAP_STABLE;
2068 2069
2069 return 0; 2070 return 0;
2070 2071
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 0a475c881852..987358740cb9 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -41,6 +41,7 @@ config EXT4_USE_FOR_EXT23
41 41
42config EXT4_FS_POSIX_ACL 42config EXT4_FS_POSIX_ACL
43 bool "Ext4 POSIX Access Control Lists" 43 bool "Ext4 POSIX Access Control Lists"
44 depends on EXT4_FS
44 select FS_POSIX_ACL 45 select FS_POSIX_ACL
45 help 46 help
46 POSIX Access Control Lists (ACLs) support permissions for users and 47 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -53,6 +54,7 @@ config EXT4_FS_POSIX_ACL
53 54
54config EXT4_FS_SECURITY 55config EXT4_FS_SECURITY
55 bool "Ext4 Security Labels" 56 bool "Ext4 Security Labels"
57 depends on EXT4_FS
56 help 58 help
57 Security labels support alternative access control models 59 Security labels support alternative access control models
58 implemented by security modules like SELinux. This option 60 implemented by security modules like SELinux. This option
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 26af22832a84..5ae1674ec12f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2226,13 +2226,14 @@ errout:
2226 * removes index from the index block. 2226 * removes index from the index block.
2227 */ 2227 */
2228static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 2228static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2229 struct ext4_ext_path *path) 2229 struct ext4_ext_path *path, int depth)
2230{ 2230{
2231 int err; 2231 int err;
2232 ext4_fsblk_t leaf; 2232 ext4_fsblk_t leaf;
2233 2233
2234 /* free index block */ 2234 /* free index block */
2235 path--; 2235 depth--;
2236 path = path + depth;
2236 leaf = ext4_idx_pblock(path->p_idx); 2237 leaf = ext4_idx_pblock(path->p_idx);
2237 if (unlikely(path->p_hdr->eh_entries == 0)) { 2238 if (unlikely(path->p_hdr->eh_entries == 0)) {
2238 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2239 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2257 2258
2258 ext4_free_blocks(handle, inode, NULL, leaf, 1, 2259 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2259 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2260 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2261
2262 while (--depth >= 0) {
2263 if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2264 break;
2265 path--;
2266 err = ext4_ext_get_access(handle, inode, path);
2267 if (err)
2268 break;
2269 path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2270 err = ext4_ext_dirty(handle, inode, path);
2271 if (err)
2272 break;
2273 }
2260 return err; 2274 return err;
2261} 2275}
2262 2276
@@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2599 /* if this leaf is free, then we should 2613 /* if this leaf is free, then we should
2600 * remove it from index block above */ 2614 * remove it from index block above */
2601 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2615 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2602 err = ext4_ext_rm_idx(handle, inode, path + depth); 2616 err = ext4_ext_rm_idx(handle, inode, path, depth);
2603 2617
2604out: 2618out:
2605 return err; 2619 return err;
@@ -2802,7 +2816,7 @@ again:
2802 /* index is empty, remove it; 2816 /* index is empty, remove it;
2803 * handle must be already prepared by the 2817 * handle must be already prepared by the
2804 * truncatei_leaf() */ 2818 * truncatei_leaf() */
2805 err = ext4_ext_rm_idx(handle, inode, path + i); 2819 err = ext4_ext_rm_idx(handle, inode, path, i);
2806 } 2820 }
2807 /* root level has p_bh == NULL, brelse() eats this */ 2821 /* root level has p_bh == NULL, brelse() eats this */
2808 brelse(path[i].p_bh); 2822 brelse(path[i].p_bh);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d07c27ca594a..405565a62277 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
108 108
109 /* Unaligned direct AIO must be serialized; see comment above */ 109 /* Unaligned direct AIO must be serialized; see comment above */
110 if (unaligned_aio) { 110 if (unaligned_aio) {
111 static unsigned long unaligned_warn_time;
112
113 /* Warn about this once per day */
114 if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
115 ext4_msg(inode->i_sb, KERN_WARNING,
116 "Unaligned AIO/DIO on inode %ld by %s; "
117 "performance will be poor.",
118 inode->i_ino, current->comm);
119 mutex_lock(ext4_aio_mutex(inode)); 111 mutex_lock(ext4_aio_mutex(inode));
120 ext4_unwritten_wait(inode); 112 ext4_unwritten_wait(inode);
121 } 113 }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index dfbc1fe96674..3278e64e57b6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync)
109 * 109 *
110 * What we do is just kick off a commit and wait on it. This will snapshot the 110 * What we do is just kick off a commit and wait on it. This will snapshot the
111 * inode to disk. 111 * inode to disk.
112 *
113 * i_mutex lock is held when entering and exiting this function
114 */ 112 */
115 113
116int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 114int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cb1c1ab2720b..cd818d8bb221 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs
2880 2880
2881static void ext4_invalidatepage(struct page *page, unsigned long offset) 2881static void ext4_invalidatepage(struct page *page, unsigned long offset)
2882{ 2882{
2883 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2884
2885 trace_ext4_invalidatepage(page, offset); 2883 trace_ext4_invalidatepage(page, offset);
2886 2884
2887 /* 2885 /*
@@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
2889 */ 2887 */
2890 if (ext4_should_dioread_nolock(page->mapping->host)) 2888 if (ext4_should_dioread_nolock(page->mapping->host))
2891 ext4_invalidatepage_free_endio(page, offset); 2889 ext4_invalidatepage_free_endio(page, offset);
2890
2891 /* No journalling happens on data buffers when this function is used */
2892 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
2893
2894 block_invalidatepage(page, offset);
2895}
2896
2897static int __ext4_journalled_invalidatepage(struct page *page,
2898 unsigned long offset)
2899{
2900 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2901
2902 trace_ext4_journalled_invalidatepage(page, offset);
2903
2892 /* 2904 /*
2893 * If it's a full truncate we just forget about the pending dirtying 2905 * If it's a full truncate we just forget about the pending dirtying
2894 */ 2906 */
2895 if (offset == 0) 2907 if (offset == 0)
2896 ClearPageChecked(page); 2908 ClearPageChecked(page);
2897 2909
2898 if (journal) 2910 return jbd2_journal_invalidatepage(journal, page, offset);
2899 jbd2_journal_invalidatepage(journal, page, offset); 2911}
2900 else 2912
2901 block_invalidatepage(page, offset); 2913/* Wrapper for aops... */
2914static void ext4_journalled_invalidatepage(struct page *page,
2915 unsigned long offset)
2916{
2917 WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
2902} 2918}
2903 2919
2904static int ext4_releasepage(struct page *page, gfp_t wait) 2920static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = {
3264 .write_end = ext4_journalled_write_end, 3280 .write_end = ext4_journalled_write_end,
3265 .set_page_dirty = ext4_journalled_set_page_dirty, 3281 .set_page_dirty = ext4_journalled_set_page_dirty,
3266 .bmap = ext4_bmap, 3282 .bmap = ext4_bmap,
3267 .invalidatepage = ext4_invalidatepage, 3283 .invalidatepage = ext4_journalled_invalidatepage,
3268 .releasepage = ext4_releasepage, 3284 .releasepage = ext4_releasepage,
3269 .direct_IO = ext4_direct_IO, 3285 .direct_IO = ext4_direct_IO,
3270 .is_partially_uptodate = block_is_partially_uptodate, 3286 .is_partially_uptodate = block_is_partially_uptodate,
@@ -4305,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4305} 4321}
4306 4322
4307/* 4323/*
4324 * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
4325 * buffers that are attached to a page stradding i_size and are undergoing
4326 * commit. In that case we have to wait for commit to finish and try again.
4327 */
4328static void ext4_wait_for_tail_page_commit(struct inode *inode)
4329{
4330 struct page *page;
4331 unsigned offset;
4332 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
4333 tid_t commit_tid = 0;
4334 int ret;
4335
4336 offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
4337 /*
4338 * All buffers in the last page remain valid? Then there's nothing to
4339 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
4340 * blocksize case
4341 */
4342 if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
4343 return;
4344 while (1) {
4345 page = find_lock_page(inode->i_mapping,
4346 inode->i_size >> PAGE_CACHE_SHIFT);
4347 if (!page)
4348 return;
4349 ret = __ext4_journalled_invalidatepage(page, offset);
4350 unlock_page(page);
4351 page_cache_release(page);
4352 if (ret != -EBUSY)
4353 return;
4354 commit_tid = 0;
4355 read_lock(&journal->j_state_lock);
4356 if (journal->j_committing_transaction)
4357 commit_tid = journal->j_committing_transaction->t_tid;
4358 read_unlock(&journal->j_state_lock);
4359 if (commit_tid)
4360 jbd2_log_wait_commit(journal, commit_tid);
4361 }
4362}
4363
4364/*
4308 * ext4_setattr() 4365 * ext4_setattr()
4309 * 4366 *
4310 * Called from notify_change. 4367 * Called from notify_change.
@@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4417 } 4474 }
4418 4475
4419 if (attr->ia_valid & ATTR_SIZE) { 4476 if (attr->ia_valid & ATTR_SIZE) {
4420 if (attr->ia_size != i_size_read(inode)) { 4477 if (attr->ia_size != inode->i_size) {
4421 truncate_setsize(inode, attr->ia_size); 4478 loff_t oldsize = inode->i_size;
4422 /* Inode size will be reduced, wait for dio in flight. 4479
4423 * Temporarily disable dioread_nolock to prevent 4480 i_size_write(inode, attr->ia_size);
4424 * livelock. */ 4481 /*
4482 * Blocks are going to be removed from the inode. Wait
4483 * for dio in flight. Temporarily disable
4484 * dioread_nolock to prevent livelock.
4485 */
4425 if (orphan) { 4486 if (orphan) {
4426 ext4_inode_block_unlocked_dio(inode); 4487 if (!ext4_should_journal_data(inode)) {
4427 inode_dio_wait(inode); 4488 ext4_inode_block_unlocked_dio(inode);
4428 ext4_inode_resume_unlocked_dio(inode); 4489 inode_dio_wait(inode);
4490 ext4_inode_resume_unlocked_dio(inode);
4491 } else
4492 ext4_wait_for_tail_page_commit(inode);
4429 } 4493 }
4494 /*
4495 * Truncate pagecache after we've waited for commit
4496 * in data=journal mode to make pages freeable.
4497 */
4498 truncate_pagecache(inode, oldsize, inode->i_size);
4430 } 4499 }
4431 ext4_truncate(inode); 4500 ext4_truncate(inode);
4432 } 4501 }
@@ -4899,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4899 0, len, NULL, 4968 0, len, NULL,
4900 ext4_bh_unmapped)) { 4969 ext4_bh_unmapped)) {
4901 /* Wait so that we don't change page under IO */ 4970 /* Wait so that we don't change page under IO */
4902 wait_on_page_writeback(page); 4971 wait_for_stable_page(page);
4903 ret = VM_FAULT_LOCKED; 4972 ret = VM_FAULT_LOCKED;
4904 goto out; 4973 goto out;
4905 } 4974 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cac448282331..f9ed946a448e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -722,7 +722,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
722 ext4_warning(dir->i_sb, "Node failed checksum"); 722 ext4_warning(dir->i_sb, "Node failed checksum");
723 brelse(bh); 723 brelse(bh);
724 *err = ERR_BAD_DX_DIR; 724 *err = ERR_BAD_DX_DIR;
725 goto fail; 725 goto fail2;
726 } 726 }
727 set_buffer_verified(bh); 727 set_buffer_verified(bh);
728 728
@@ -2368,7 +2368,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2368 } 2368 }
2369 2369
2370 inode->i_size = EXT4_I(inode)->i_disksize = blocksize; 2370 inode->i_size = EXT4_I(inode)->i_disksize = blocksize;
2371 dir_block = ext4_bread(handle, inode, 0, 1, &err);
2372 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { 2371 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
2373 if (!err) { 2372 if (!err) {
2374 err = -EIO; 2373 err = -EIO;
@@ -2648,7 +2647,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2648 struct ext4_iloc iloc; 2647 struct ext4_iloc iloc;
2649 int err = 0; 2648 int err = 0;
2650 2649
2651 if (!EXT4_SB(inode->i_sb)->s_journal) 2650 if ((!EXT4_SB(inode->i_sb)->s_journal) &&
2651 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
2652 return 0; 2652 return 0;
2653 2653
2654 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); 2654 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3cdb0a2fc648..3d4fb81bacd5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
1645 unsigned int *journal_ioprio, 1645 unsigned int *journal_ioprio,
1646 int is_remount) 1646 int is_remount)
1647{ 1647{
1648#ifdef CONFIG_QUOTA
1649 struct ext4_sb_info *sbi = EXT4_SB(sb); 1648 struct ext4_sb_info *sbi = EXT4_SB(sb);
1650#endif
1651 char *p; 1649 char *p;
1652 substring_t args[MAX_OPT_ARGS]; 1650 substring_t args[MAX_OPT_ARGS];
1653 int token; 1651 int token;
@@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
1696 } 1694 }
1697 } 1695 }
1698#endif 1696#endif
1697 if (test_opt(sb, DIOREAD_NOLOCK)) {
1698 int blocksize =
1699 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1700
1701 if (blocksize < PAGE_CACHE_SIZE) {
1702 ext4_msg(sb, KERN_ERR, "can't mount with "
1703 "dioread_nolock if block size != PAGE_SIZE");
1704 return 0;
1705 }
1706 }
1699 return 1; 1707 return 1;
1700} 1708}
1701 1709
@@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2212 __func__, inode->i_ino, inode->i_size); 2220 __func__, inode->i_ino, inode->i_size);
2213 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2221 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2214 inode->i_ino, inode->i_size); 2222 inode->i_ino, inode->i_size);
2223 mutex_lock(&inode->i_mutex);
2215 ext4_truncate(inode); 2224 ext4_truncate(inode);
2225 mutex_unlock(&inode->i_mutex);
2216 nr_truncates++; 2226 nr_truncates++;
2217 } else { 2227 } else {
2218 ext4_msg(sb, KERN_DEBUG, 2228 ext4_msg(sb, KERN_DEBUG,
@@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb)
3223 memset(buf, 0, PAGE_SIZE); 3233 memset(buf, 0, PAGE_SIZE);
3224 cond_resched(); 3234 cond_resched();
3225 } 3235 }
3236 /* Add the journal blocks as well */
3237 if (sbi->s_journal)
3238 overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
3239
3226 sbi->s_overhead = overhead; 3240 sbi->s_overhead = overhead;
3227 smp_wmb(); 3241 smp_wmb();
3228 free_page((unsigned long) buf); 3242 free_page((unsigned long) buf);
@@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3436 clear_opt(sb, DELALLOC); 3450 clear_opt(sb, DELALLOC);
3437 } 3451 }
3438 3452
3439 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3440 if (test_opt(sb, DIOREAD_NOLOCK)) {
3441 if (blocksize < PAGE_SIZE) {
3442 ext4_msg(sb, KERN_ERR, "can't mount with "
3443 "dioread_nolock if block size != PAGE_SIZE");
3444 goto failed_mount;
3445 }
3446 }
3447
3448 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3453 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3449 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3454 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3450 3455
@@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3486 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3491 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3487 goto failed_mount; 3492 goto failed_mount;
3488 3493
3494 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3489 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3495 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3490 blocksize > EXT4_MAX_BLOCK_SIZE) { 3496 blocksize > EXT4_MAX_BLOCK_SIZE) {
3491 ext4_msg(sb, KERN_ERR, 3497 ext4_msg(sb, KERN_ERR,
@@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4725 } 4731 }
4726 4732
4727 ext4_setup_system_zone(sb); 4733 ext4_setup_system_zone(sb);
4728 if (sbi->s_journal == NULL) 4734 if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
4729 ext4_commit_super(sb, 1); 4735 ext4_commit_super(sb, 1);
4730 4736
4731#ifdef CONFIG_QUOTA 4737#ifdef CONFIG_QUOTA
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e95b94945d5f..137af4255da6 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -191,15 +191,14 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
191 retval = f2fs_getxattr(inode, name_index, "", value, retval); 191 retval = f2fs_getxattr(inode, name_index, "", value, retval);
192 } 192 }
193 193
194 if (retval < 0) { 194 if (retval > 0)
195 if (retval == -ENODATA)
196 acl = NULL;
197 else
198 acl = ERR_PTR(retval);
199 } else {
200 acl = f2fs_acl_from_disk(value, retval); 195 acl = f2fs_acl_from_disk(value, retval);
201 } 196 else if (retval == -ENODATA)
197 acl = NULL;
198 else
199 acl = ERR_PTR(retval);
202 kfree(value); 200 kfree(value);
201
203 if (!IS_ERR(acl)) 202 if (!IS_ERR(acl))
204 set_cached_acl(inode, type, acl); 203 set_cached_acl(inode, type, acl);
205 204
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6ef36c37e2be..ff3c8439af87 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -214,7 +214,6 @@ retry:
214 goto retry; 214 goto retry;
215 } 215 }
216 new->ino = ino; 216 new->ino = ino;
217 INIT_LIST_HEAD(&new->list);
218 217
219 /* add new_oentry into list which is sorted by inode number */ 218 /* add new_oentry into list which is sorted by inode number */
220 if (orphan) { 219 if (orphan) {
@@ -772,7 +771,7 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
772 sbi->n_orphans = 0; 771 sbi->n_orphans = 0;
773} 772}
774 773
775int create_checkpoint_caches(void) 774int __init create_checkpoint_caches(void)
776{ 775{
777 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 776 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
778 sizeof(struct orphan_inode_entry), NULL); 777 sizeof(struct orphan_inode_entry), NULL);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 655aeabc1dd4..7bd22a201125 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -16,6 +16,7 @@
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/bio.h> 18#include <linux/bio.h>
19#include <linux/prefetch.h>
19 20
20#include "f2fs.h" 21#include "f2fs.h"
21#include "node.h" 22#include "node.h"
@@ -546,6 +547,15 @@ redirty_out:
546 547
547#define MAX_DESIRED_PAGES_WP 4096 548#define MAX_DESIRED_PAGES_WP 4096
548 549
550static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
551 void *data)
552{
553 struct address_space *mapping = data;
554 int ret = mapping->a_ops->writepage(page, wbc);
555 mapping_set_error(mapping, ret);
556 return ret;
557}
558
549static int f2fs_write_data_pages(struct address_space *mapping, 559static int f2fs_write_data_pages(struct address_space *mapping,
550 struct writeback_control *wbc) 560 struct writeback_control *wbc)
551{ 561{
@@ -562,7 +572,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
562 572
563 if (!S_ISDIR(inode->i_mode)) 573 if (!S_ISDIR(inode->i_mode))
564 mutex_lock(&sbi->writepages); 574 mutex_lock(&sbi->writepages);
565 ret = generic_writepages(mapping, wbc); 575 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
566 if (!S_ISDIR(inode->i_mode)) 576 if (!S_ISDIR(inode->i_mode))
567 mutex_unlock(&sbi->writepages); 577 mutex_unlock(&sbi->writepages);
568 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 578 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
@@ -688,6 +698,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
688 return 0; 698 return 0;
689} 699}
690 700
701static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
702{
703 return generic_block_bmap(mapping, block, get_data_block_ro);
704}
705
691const struct address_space_operations f2fs_dblock_aops = { 706const struct address_space_operations f2fs_dblock_aops = {
692 .readpage = f2fs_read_data_page, 707 .readpage = f2fs_read_data_page,
693 .readpages = f2fs_read_data_pages, 708 .readpages = f2fs_read_data_pages,
@@ -699,4 +714,5 @@ const struct address_space_operations f2fs_dblock_aops = {
699 .invalidatepage = f2fs_invalidate_data_page, 714 .invalidatepage = f2fs_invalidate_data_page,
700 .releasepage = f2fs_release_data_page, 715 .releasepage = f2fs_release_data_page,
701 .direct_IO = f2fs_direct_IO, 716 .direct_IO = f2fs_direct_IO,
717 .bmap = f2fs_bmap,
702}; 718};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0e0380a588ad..c8c37307b326 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -26,6 +26,7 @@
26 26
27static LIST_HEAD(f2fs_stat_list); 27static LIST_HEAD(f2fs_stat_list);
28static struct dentry *debugfs_root; 28static struct dentry *debugfs_root;
29static DEFINE_MUTEX(f2fs_stat_mutex);
29 30
30static void update_general_status(struct f2fs_sb_info *sbi) 31static void update_general_status(struct f2fs_sb_info *sbi)
31{ 32{
@@ -180,18 +181,14 @@ static int stat_show(struct seq_file *s, void *v)
180 int i = 0; 181 int i = 0;
181 int j; 182 int j;
182 183
184 mutex_lock(&f2fs_stat_mutex);
183 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { 185 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
184 186
185 mutex_lock(&si->stat_lock);
186 if (!si->sbi) {
187 mutex_unlock(&si->stat_lock);
188 continue;
189 }
190 update_general_status(si->sbi); 187 update_general_status(si->sbi);
191 188
192 seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++); 189 seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++);
193 seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ", 190 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
194 si->nat_area_segs, si->sit_area_segs); 191 si->sit_area_segs, si->nat_area_segs);
195 seq_printf(s, "[SSA: %d] [MAIN: %d", 192 seq_printf(s, "[SSA: %d] [MAIN: %d",
196 si->ssa_area_segs, si->main_area_segs); 193 si->ssa_area_segs, si->main_area_segs);
197 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", 194 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
@@ -286,8 +283,8 @@ static int stat_show(struct seq_file *s, void *v)
286 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", 283 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
287 (si->base_mem + si->cache_mem) >> 10, 284 (si->base_mem + si->cache_mem) >> 10,
288 si->base_mem >> 10, si->cache_mem >> 10); 285 si->base_mem >> 10, si->cache_mem >> 10);
289 mutex_unlock(&si->stat_lock);
290 } 286 }
287 mutex_unlock(&f2fs_stat_mutex);
291 return 0; 288 return 0;
292} 289}
293 290
@@ -303,7 +300,7 @@ static const struct file_operations stat_fops = {
303 .release = single_release, 300 .release = single_release,
304}; 301};
305 302
306static int init_stats(struct f2fs_sb_info *sbi) 303int f2fs_build_stats(struct f2fs_sb_info *sbi)
307{ 304{
308 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 305 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
309 struct f2fs_stat_info *si; 306 struct f2fs_stat_info *si;
@@ -313,9 +310,6 @@ static int init_stats(struct f2fs_sb_info *sbi)
313 return -ENOMEM; 310 return -ENOMEM;
314 311
315 si = sbi->stat_info; 312 si = sbi->stat_info;
316 mutex_init(&si->stat_lock);
317 list_add_tail(&si->stat_list, &f2fs_stat_list);
318
319 si->all_area_segs = le32_to_cpu(raw_super->segment_count); 313 si->all_area_segs = le32_to_cpu(raw_super->segment_count);
320 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); 314 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
321 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); 315 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -325,21 +319,11 @@ static int init_stats(struct f2fs_sb_info *sbi)
325 si->main_area_zones = si->main_area_sections / 319 si->main_area_zones = si->main_area_sections /
326 le32_to_cpu(raw_super->secs_per_zone); 320 le32_to_cpu(raw_super->secs_per_zone);
327 si->sbi = sbi; 321 si->sbi = sbi;
328 return 0;
329}
330 322
331int f2fs_build_stats(struct f2fs_sb_info *sbi) 323 mutex_lock(&f2fs_stat_mutex);
332{ 324 list_add_tail(&si->stat_list, &f2fs_stat_list);
333 int retval; 325 mutex_unlock(&f2fs_stat_mutex);
334
335 retval = init_stats(sbi);
336 if (retval)
337 return retval;
338
339 if (!debugfs_root)
340 debugfs_root = debugfs_create_dir("f2fs", NULL);
341 326
342 debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
343 return 0; 327 return 0;
344} 328}
345 329
@@ -347,14 +331,22 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
347{ 331{
348 struct f2fs_stat_info *si = sbi->stat_info; 332 struct f2fs_stat_info *si = sbi->stat_info;
349 333
334 mutex_lock(&f2fs_stat_mutex);
350 list_del(&si->stat_list); 335 list_del(&si->stat_list);
351 mutex_lock(&si->stat_lock); 336 mutex_unlock(&f2fs_stat_mutex);
352 si->sbi = NULL; 337
353 mutex_unlock(&si->stat_lock);
354 kfree(sbi->stat_info); 338 kfree(sbi->stat_info);
355} 339}
356 340
357void destroy_root_stats(void) 341void __init f2fs_create_root_stats(void)
342{
343 debugfs_root = debugfs_create_dir("f2fs", NULL);
344 if (debugfs_root)
345 debugfs_create_file("status", S_IRUGO, debugfs_root,
346 NULL, &stat_fops);
347}
348
349void f2fs_destroy_root_stats(void)
358{ 350{
359 debugfs_remove_recursive(debugfs_root); 351 debugfs_remove_recursive(debugfs_root);
360 debugfs_root = NULL; 352 debugfs_root = NULL;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b4e24f32b54e..989980e16d0b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -11,6 +11,7 @@
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include "f2fs.h" 13#include "f2fs.h"
14#include "node.h"
14#include "acl.h" 15#include "acl.h"
15 16
16static unsigned long dir_blocks(struct inode *inode) 17static unsigned long dir_blocks(struct inode *inode)
@@ -74,7 +75,7 @@ static unsigned long dir_block_index(unsigned int level, unsigned int idx)
74 return bidx; 75 return bidx;
75} 76}
76 77
77static bool early_match_name(const char *name, int namelen, 78static bool early_match_name(const char *name, size_t namelen,
78 f2fs_hash_t namehash, struct f2fs_dir_entry *de) 79 f2fs_hash_t namehash, struct f2fs_dir_entry *de)
79{ 80{
80 if (le16_to_cpu(de->name_len) != namelen) 81 if (le16_to_cpu(de->name_len) != namelen)
@@ -87,7 +88,7 @@ static bool early_match_name(const char *name, int namelen,
87} 88}
88 89
89static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, 90static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
90 const char *name, int namelen, int *max_slots, 91 const char *name, size_t namelen, int *max_slots,
91 f2fs_hash_t namehash, struct page **res_page) 92 f2fs_hash_t namehash, struct page **res_page)
92{ 93{
93 struct f2fs_dir_entry *de; 94 struct f2fs_dir_entry *de;
@@ -126,7 +127,7 @@ found:
126} 127}
127 128
128static struct f2fs_dir_entry *find_in_level(struct inode *dir, 129static struct f2fs_dir_entry *find_in_level(struct inode *dir,
129 unsigned int level, const char *name, int namelen, 130 unsigned int level, const char *name, size_t namelen,
130 f2fs_hash_t namehash, struct page **res_page) 131 f2fs_hash_t namehash, struct page **res_page)
131{ 132{
132 int s = GET_DENTRY_SLOTS(namelen); 133 int s = GET_DENTRY_SLOTS(namelen);
@@ -181,7 +182,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
181 struct qstr *child, struct page **res_page) 182 struct qstr *child, struct page **res_page)
182{ 183{
183 const char *name = child->name; 184 const char *name = child->name;
184 int namelen = child->len; 185 size_t namelen = child->len;
185 unsigned long npages = dir_blocks(dir); 186 unsigned long npages = dir_blocks(dir);
186 struct f2fs_dir_entry *de = NULL; 187 struct f2fs_dir_entry *de = NULL;
187 f2fs_hash_t name_hash; 188 f2fs_hash_t name_hash;
@@ -308,6 +309,7 @@ static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
308 ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 309 ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
309 if (IS_ERR(ipage)) 310 if (IS_ERR(ipage))
310 return PTR_ERR(ipage); 311 return PTR_ERR(ipage);
312 set_cold_node(inode, ipage);
311 init_dent_inode(dentry, ipage); 313 init_dent_inode(dentry, ipage);
312 f2fs_put_page(ipage, 1); 314 f2fs_put_page(ipage, 1);
313 } 315 }
@@ -381,7 +383,7 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
381 struct inode *dir = dentry->d_parent->d_inode; 383 struct inode *dir = dentry->d_parent->d_inode;
382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 384 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
383 const char *name = dentry->d_name.name; 385 const char *name = dentry->d_name.name;
384 int namelen = dentry->d_name.len; 386 size_t namelen = dentry->d_name.len;
385 struct page *dentry_page = NULL; 387 struct page *dentry_page = NULL;
386 struct f2fs_dentry_block *dentry_blk = NULL; 388 struct f2fs_dentry_block *dentry_blk = NULL;
387 int slots = GET_DENTRY_SLOTS(namelen); 389 int slots = GET_DENTRY_SLOTS(namelen);
@@ -501,7 +503,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
501 } 503 }
502 504
503 if (inode) { 505 if (inode) {
504 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 506 inode->i_ctime = CURRENT_TIME;
505 drop_nlink(inode); 507 drop_nlink(inode);
506 if (S_ISDIR(inode->i_mode)) { 508 if (S_ISDIR(inode->i_mode)) {
507 drop_nlink(inode); 509 drop_nlink(inode);
@@ -540,13 +542,13 @@ int f2fs_make_empty(struct inode *inode, struct inode *parent)
540 542
541 de = &dentry_blk->dentry[0]; 543 de = &dentry_blk->dentry[0];
542 de->name_len = cpu_to_le16(1); 544 de->name_len = cpu_to_le16(1);
543 de->hash_code = 0; 545 de->hash_code = f2fs_dentry_hash(".", 1);
544 de->ino = cpu_to_le32(inode->i_ino); 546 de->ino = cpu_to_le32(inode->i_ino);
545 memcpy(dentry_blk->filename[0], ".", 1); 547 memcpy(dentry_blk->filename[0], ".", 1);
546 set_de_type(de, inode); 548 set_de_type(de, inode);
547 549
548 de = &dentry_blk->dentry[1]; 550 de = &dentry_blk->dentry[1];
549 de->hash_code = 0; 551 de->hash_code = f2fs_dentry_hash("..", 2);
550 de->name_len = cpu_to_le16(2); 552 de->name_len = cpu_to_le16(2);
551 de->ino = cpu_to_le32(parent->i_ino); 553 de->ino = cpu_to_le32(parent->i_ino);
552 memcpy(dentry_blk->filename[1], "..", 2); 554 memcpy(dentry_blk->filename[1], "..", 2);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a18d63db2fb6..c8e2d751ef9c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -211,11 +211,11 @@ struct dnode_of_data {
211static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, 211static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
212 struct page *ipage, struct page *npage, nid_t nid) 212 struct page *ipage, struct page *npage, nid_t nid)
213{ 213{
214 memset(dn, 0, sizeof(*dn));
214 dn->inode = inode; 215 dn->inode = inode;
215 dn->inode_page = ipage; 216 dn->inode_page = ipage;
216 dn->node_page = npage; 217 dn->node_page = npage;
217 dn->nid = nid; 218 dn->nid = nid;
218 dn->inode_page_locked = 0;
219} 219}
220 220
221/* 221/*
@@ -877,11 +877,13 @@ bool f2fs_empty_dir(struct inode *);
877 * super.c 877 * super.c
878 */ 878 */
879int f2fs_sync_fs(struct super_block *, int); 879int f2fs_sync_fs(struct super_block *, int);
880extern __printf(3, 4)
881void f2fs_msg(struct super_block *, const char *, const char *, ...);
880 882
881/* 883/*
882 * hash.c 884 * hash.c
883 */ 885 */
884f2fs_hash_t f2fs_dentry_hash(const char *, int); 886f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
885 887
886/* 888/*
887 * node.c 889 * node.c
@@ -912,7 +914,7 @@ int restore_node_summary(struct f2fs_sb_info *, unsigned int,
912void flush_nat_entries(struct f2fs_sb_info *); 914void flush_nat_entries(struct f2fs_sb_info *);
913int build_node_manager(struct f2fs_sb_info *); 915int build_node_manager(struct f2fs_sb_info *);
914void destroy_node_manager(struct f2fs_sb_info *); 916void destroy_node_manager(struct f2fs_sb_info *);
915int create_node_manager_caches(void); 917int __init create_node_manager_caches(void);
916void destroy_node_manager_caches(void); 918void destroy_node_manager_caches(void);
917 919
918/* 920/*
@@ -964,7 +966,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *);
964void block_operations(struct f2fs_sb_info *); 966void block_operations(struct f2fs_sb_info *);
965void write_checkpoint(struct f2fs_sb_info *, bool, bool); 967void write_checkpoint(struct f2fs_sb_info *, bool, bool);
966void init_orphan_info(struct f2fs_sb_info *); 968void init_orphan_info(struct f2fs_sb_info *);
967int create_checkpoint_caches(void); 969int __init create_checkpoint_caches(void);
968void destroy_checkpoint_caches(void); 970void destroy_checkpoint_caches(void);
969 971
970/* 972/*
@@ -984,9 +986,9 @@ int do_write_data_page(struct page *);
984int start_gc_thread(struct f2fs_sb_info *); 986int start_gc_thread(struct f2fs_sb_info *);
985void stop_gc_thread(struct f2fs_sb_info *); 987void stop_gc_thread(struct f2fs_sb_info *);
986block_t start_bidx_of_node(unsigned int); 988block_t start_bidx_of_node(unsigned int);
987int f2fs_gc(struct f2fs_sb_info *, int); 989int f2fs_gc(struct f2fs_sb_info *);
988void build_gc_manager(struct f2fs_sb_info *); 990void build_gc_manager(struct f2fs_sb_info *);
989int create_gc_caches(void); 991int __init create_gc_caches(void);
990void destroy_gc_caches(void); 992void destroy_gc_caches(void);
991 993
992/* 994/*
@@ -1058,7 +1060,8 @@ struct f2fs_stat_info {
1058 1060
1059int f2fs_build_stats(struct f2fs_sb_info *); 1061int f2fs_build_stats(struct f2fs_sb_info *);
1060void f2fs_destroy_stats(struct f2fs_sb_info *); 1062void f2fs_destroy_stats(struct f2fs_sb_info *);
1061void destroy_root_stats(void); 1063void __init f2fs_create_root_stats(void);
1064void f2fs_destroy_root_stats(void);
1062#else 1065#else
1063#define stat_inc_call_count(si) 1066#define stat_inc_call_count(si)
1064#define stat_inc_seg_count(si, type) 1067#define stat_inc_seg_count(si, type)
@@ -1068,7 +1071,8 @@ void destroy_root_stats(void);
1068 1071
1069static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 1072static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1070static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 1073static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
1071static inline void destroy_root_stats(void) { } 1074static inline void __init f2fs_create_root_stats(void) { }
1075static inline void f2fs_destroy_root_stats(void) { }
1072#endif 1076#endif
1073 1077
1074extern const struct file_operations f2fs_dir_operations; 1078extern const struct file_operations f2fs_dir_operations;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f9e085dfb1f0..3191b52aafb0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -96,8 +96,9 @@ out:
96} 96}
97 97
98static const struct vm_operations_struct f2fs_file_vm_ops = { 98static const struct vm_operations_struct f2fs_file_vm_ops = {
99 .fault = filemap_fault, 99 .fault = filemap_fault,
100 .page_mkwrite = f2fs_vm_page_mkwrite, 100 .page_mkwrite = f2fs_vm_page_mkwrite,
101 .remap_pages = generic_file_remap_pages,
101}; 102};
102 103
103static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode) 104static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
@@ -137,6 +138,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
137 if (ret) 138 if (ret)
138 return ret; 139 return ret;
139 140
141 /* guarantee free sections for fsync */
142 f2fs_balance_fs(sbi);
143
140 mutex_lock(&inode->i_mutex); 144 mutex_lock(&inode->i_mutex);
141 145
142 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 146 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
@@ -160,15 +164,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
160 if (need_to_sync_dir(sbi, inode)) 164 if (need_to_sync_dir(sbi, inode))
161 need_cp = true; 165 need_cp = true;
162 166
163 f2fs_write_inode(inode, NULL);
164
165 if (need_cp) { 167 if (need_cp) {
166 /* all the dirty node pages should be flushed for POR */ 168 /* all the dirty node pages should be flushed for POR */
167 ret = f2fs_sync_fs(inode->i_sb, 1); 169 ret = f2fs_sync_fs(inode->i_sb, 1);
168 clear_inode_flag(F2FS_I(inode), FI_NEED_CP); 170 clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
169 } else { 171 } else {
170 while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0) 172 /* if there is no written node page, write its inode page */
171 f2fs_write_inode(inode, NULL); 173 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
174 ret = f2fs_write_inode(inode, NULL);
175 if (ret)
176 goto out;
177 }
172 filemap_fdatawait_range(sbi->node_inode->i_mapping, 178 filemap_fdatawait_range(sbi->node_inode->i_mapping,
173 0, LONG_MAX); 179 0, LONG_MAX);
174 } 180 }
@@ -405,6 +411,8 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
405 struct dnode_of_data dn; 411 struct dnode_of_data dn;
406 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 412 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
407 413
414 f2fs_balance_fs(sbi);
415
408 mutex_lock_op(sbi, DATA_TRUNC); 416 mutex_lock_op(sbi, DATA_TRUNC);
409 set_new_dnode(&dn, inode, NULL, NULL, 0); 417 set_new_dnode(&dn, inode, NULL, NULL, 0);
410 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 418 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
@@ -532,7 +540,6 @@ static long f2fs_fallocate(struct file *file, int mode,
532 loff_t offset, loff_t len) 540 loff_t offset, loff_t len)
533{ 541{
534 struct inode *inode = file->f_path.dentry->d_inode; 542 struct inode *inode = file->f_path.dentry->d_inode;
535 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
536 long ret; 543 long ret;
537 544
538 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 545 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -543,7 +550,10 @@ static long f2fs_fallocate(struct file *file, int mode,
543 else 550 else
544 ret = expand_inode_data(inode, offset, len, mode); 551 ret = expand_inode_data(inode, offset, len, mode);
545 552
546 f2fs_balance_fs(sbi); 553 if (!ret) {
554 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
555 mark_inode_dirty(inode);
556 }
547 return ret; 557 return ret;
548} 558}
549 559
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 644aa3808273..c386910dacc5 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -78,7 +78,7 @@ static int gc_thread_func(void *data)
78 78
79 sbi->bg_gc++; 79 sbi->bg_gc++;
80 80
81 if (f2fs_gc(sbi, 1) == GC_NONE) 81 if (f2fs_gc(sbi) == GC_NONE)
82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME; 82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) 83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
84 wait_ms = GC_THREAD_MAX_SLEEP_TIME; 84 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
@@ -390,9 +390,7 @@ next_step:
390 } 390 }
391 391
392 err = check_valid_map(sbi, segno, off); 392 err = check_valid_map(sbi, segno, off);
393 if (err == GC_ERROR) 393 if (err == GC_NEXT)
394 return err;
395 else if (err == GC_NEXT)
396 continue; 394 continue;
397 395
398 if (initial) { 396 if (initial) {
@@ -426,32 +424,30 @@ next_step:
426} 424}
427 425
428/* 426/*
429 * Calculate start block index that this node page contains 427 * Calculate start block index indicating the given node offset.
428 * Be careful, caller should give this node offset only indicating direct node
429 * blocks. If any node offsets, which point the other types of node blocks such
430 * as indirect or double indirect node blocks, are given, it must be a caller's
431 * bug.
430 */ 432 */
431block_t start_bidx_of_node(unsigned int node_ofs) 433block_t start_bidx_of_node(unsigned int node_ofs)
432{ 434{
433 block_t start_bidx; 435 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
434 unsigned int bidx, indirect_blks; 436 unsigned int bidx;
435 int dec;
436 437
437 indirect_blks = 2 * NIDS_PER_BLOCK + 4; 438 if (node_ofs == 0)
439 return 0;
438 440
439 start_bidx = 1; 441 if (node_ofs <= 2) {
440 if (node_ofs == 0) {
441 start_bidx = 0;
442 } else if (node_ofs <= 2) {
443 bidx = node_ofs - 1; 442 bidx = node_ofs - 1;
444 } else if (node_ofs <= indirect_blks) { 443 } else if (node_ofs <= indirect_blks) {
445 dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); 444 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
446 bidx = node_ofs - 2 - dec; 445 bidx = node_ofs - 2 - dec;
447 } else { 446 } else {
448 dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); 447 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
449 bidx = node_ofs - 5 - dec; 448 bidx = node_ofs - 5 - dec;
450 } 449 }
451 450 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
452 if (start_bidx)
453 start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
454 return start_bidx;
455} 451}
456 452
457static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 453static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -556,9 +552,7 @@ next_step:
556 } 552 }
557 553
558 err = check_valid_map(sbi, segno, off); 554 err = check_valid_map(sbi, segno, off);
559 if (err == GC_ERROR) 555 if (err == GC_NEXT)
560 goto stop;
561 else if (err == GC_NEXT)
562 continue; 556 continue;
563 557
564 if (phase == 0) { 558 if (phase == 0) {
@@ -568,9 +562,7 @@ next_step:
568 562
569 /* Get an inode by ino with checking validity */ 563 /* Get an inode by ino with checking validity */
570 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); 564 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
571 if (err == GC_ERROR) 565 if (err == GC_NEXT)
572 goto stop;
573 else if (err == GC_NEXT)
574 continue; 566 continue;
575 567
576 if (phase == 1) { 568 if (phase == 1) {
@@ -663,62 +655,44 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
663 return ret; 655 return ret;
664} 656}
665 657
666int f2fs_gc(struct f2fs_sb_info *sbi, int nGC) 658int f2fs_gc(struct f2fs_sb_info *sbi)
667{ 659{
668 unsigned int segno;
669 int old_free_secs, cur_free_secs;
670 int gc_status, nfree;
671 struct list_head ilist; 660 struct list_head ilist;
661 unsigned int segno, i;
672 int gc_type = BG_GC; 662 int gc_type = BG_GC;
663 int gc_status = GC_NONE;
673 664
674 INIT_LIST_HEAD(&ilist); 665 INIT_LIST_HEAD(&ilist);
675gc_more: 666gc_more:
676 nfree = 0; 667 if (!(sbi->sb->s_flags & MS_ACTIVE))
677 gc_status = GC_NONE; 668 goto stop;
678 669
679 if (has_not_enough_free_secs(sbi)) 670 if (has_not_enough_free_secs(sbi))
680 old_free_secs = reserved_sections(sbi); 671 gc_type = FG_GC;
681 else
682 old_free_secs = free_sections(sbi);
683
684 while (sbi->sb->s_flags & MS_ACTIVE) {
685 int i;
686 if (has_not_enough_free_secs(sbi))
687 gc_type = FG_GC;
688 672
689 cur_free_secs = free_sections(sbi) + nfree; 673 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
674 goto stop;
690 675
691 /* We got free space successfully. */ 676 for (i = 0; i < sbi->segs_per_sec; i++) {
692 if (nGC < cur_free_secs - old_free_secs) 677 /*
693 break; 678 * do_garbage_collect will give us three gc_status:
694 679 * GC_ERROR, GC_DONE, and GC_BLOCKED.
695 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) 680 * If GC is finished uncleanly, we have to return
681 * the victim to dirty segment list.
682 */
683 gc_status = do_garbage_collect(sbi, segno + i, &ilist, gc_type);
684 if (gc_status != GC_DONE)
696 break; 685 break;
697
698 for (i = 0; i < sbi->segs_per_sec; i++) {
699 /*
700 * do_garbage_collect will give us three gc_status:
701 * GC_ERROR, GC_DONE, and GC_BLOCKED.
702 * If GC is finished uncleanly, we have to return
703 * the victim to dirty segment list.
704 */
705 gc_status = do_garbage_collect(sbi, segno + i,
706 &ilist, gc_type);
707 if (gc_status != GC_DONE)
708 goto stop;
709 nfree++;
710 }
711 } 686 }
712stop: 687 if (has_not_enough_free_secs(sbi)) {
713 if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
714 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false); 688 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
715 if (nfree) 689 if (has_not_enough_free_secs(sbi))
716 goto gc_more; 690 goto gc_more;
717 } 691 }
692stop:
718 mutex_unlock(&sbi->gc_mutex); 693 mutex_unlock(&sbi->gc_mutex);
719 694
720 put_gc_inode(&ilist); 695 put_gc_inode(&ilist);
721 BUG_ON(!list_empty(&ilist));
722 return gc_status; 696 return gc_status;
723} 697}
724 698
@@ -727,7 +701,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
727 DIRTY_I(sbi)->v_ops = &default_v_ops; 701 DIRTY_I(sbi)->v_ops = &default_v_ops;
728} 702}
729 703
730int create_gc_caches(void) 704int __init create_gc_caches(void)
731{ 705{
732 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", 706 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
733 sizeof(struct inode_entry), NULL); 707 sizeof(struct inode_entry), NULL);
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index a60f04200f8b..6eb8d269b53b 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,7 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
42 buf[1] += b1; 42 buf[1] += b1;
43} 43}
44 44
45static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num) 45static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
46{ 46{
47 unsigned pad, val; 47 unsigned pad, val;
48 int i; 48 int i;
@@ -69,13 +69,17 @@ static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
69 *buf++ = pad; 69 *buf++ = pad;
70} 70}
71 71
72f2fs_hash_t f2fs_dentry_hash(const char *name, int len) 72f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
73{ 73{
74 __u32 hash, minor_hash; 74 __u32 hash;
75 f2fs_hash_t f2fs_hash; 75 f2fs_hash_t f2fs_hash;
76 const char *p; 76 const char *p;
77 __u32 in[8], buf[4]; 77 __u32 in[8], buf[4];
78 78
79 if ((len <= 2) && (name[0] == '.') &&
80 (name[1] == '.' || name[1] == '\0'))
81 return 0;
82
79 /* Initialize the default seed for the hash checksum functions */ 83 /* Initialize the default seed for the hash checksum functions */
80 buf[0] = 0x67452301; 84 buf[0] = 0x67452301;
81 buf[1] = 0xefcdab89; 85 buf[1] = 0xefcdab89;
@@ -83,15 +87,15 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
83 buf[3] = 0x10325476; 87 buf[3] = 0x10325476;
84 88
85 p = name; 89 p = name;
86 while (len > 0) { 90 while (1) {
87 str2hashbuf(p, len, in, 4); 91 str2hashbuf(p, len, in, 4);
88 TEA_transform(buf, in); 92 TEA_transform(buf, in);
89 len -= 16;
90 p += 16; 93 p += 16;
94 if (len <= 16)
95 break;
96 len -= 16;
91 } 97 }
92 hash = buf[0]; 98 hash = buf[0];
93 minor_hash = buf[1];
94
95 f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); 99 f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
96 return f2fs_hash; 100 return f2fs_hash;
97} 101}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index df5fb381ebf1..794241777322 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -203,6 +203,7 @@ void update_inode(struct inode *inode, struct page *node_page)
203 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); 203 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
204 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 204 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
205 ri->i_generation = cpu_to_le32(inode->i_generation); 205 ri->i_generation = cpu_to_le32(inode->i_generation);
206 set_cold_node(inode, node_page);
206 set_page_dirty(node_page); 207 set_page_dirty(node_page);
207} 208}
208 209
@@ -216,6 +217,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
216 inode->i_ino == F2FS_META_INO(sbi)) 217 inode->i_ino == F2FS_META_INO(sbi))
217 return 0; 218 return 0;
218 219
220 if (wbc)
221 f2fs_balance_fs(sbi);
222
219 node_page = get_node_page(sbi, inode->i_ino); 223 node_page = get_node_page(sbi, inode->i_ino);
220 if (IS_ERR(node_page)) 224 if (IS_ERR(node_page))
221 return PTR_ERR(node_page); 225 return PTR_ERR(node_page);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 89b7675dc377..1a49b881bac0 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -77,8 +77,8 @@ fail:
77 77
78static int is_multimedia_file(const unsigned char *s, const char *sub) 78static int is_multimedia_file(const unsigned char *s, const char *sub)
79{ 79{
80 int slen = strlen(s); 80 size_t slen = strlen(s);
81 int sublen = strlen(sub); 81 size_t sublen = strlen(sub);
82 int ret; 82 int ret;
83 83
84 if (sublen > slen) 84 if (sublen > slen)
@@ -123,6 +123,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
123 nid_t ino = 0; 123 nid_t ino = 0;
124 int err; 124 int err;
125 125
126 f2fs_balance_fs(sbi);
127
126 inode = f2fs_new_inode(dir, mode); 128 inode = f2fs_new_inode(dir, mode);
127 if (IS_ERR(inode)) 129 if (IS_ERR(inode))
128 return PTR_ERR(inode); 130 return PTR_ERR(inode);
@@ -144,8 +146,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
144 if (!sbi->por_doing) 146 if (!sbi->por_doing)
145 d_instantiate(dentry, inode); 147 d_instantiate(dentry, inode);
146 unlock_new_inode(inode); 148 unlock_new_inode(inode);
147
148 f2fs_balance_fs(sbi);
149 return 0; 149 return 0;
150out: 150out:
151 clear_nlink(inode); 151 clear_nlink(inode);
@@ -163,6 +163,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
163 struct f2fs_sb_info *sbi = F2FS_SB(sb); 163 struct f2fs_sb_info *sbi = F2FS_SB(sb);
164 int err; 164 int err;
165 165
166 f2fs_balance_fs(sbi);
167
166 inode->i_ctime = CURRENT_TIME; 168 inode->i_ctime = CURRENT_TIME;
167 atomic_inc(&inode->i_count); 169 atomic_inc(&inode->i_count);
168 170
@@ -172,8 +174,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
172 goto out; 174 goto out;
173 175
174 d_instantiate(dentry, inode); 176 d_instantiate(dentry, inode);
175
176 f2fs_balance_fs(sbi);
177 return 0; 177 return 0;
178out: 178out:
179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -223,6 +223,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
223 struct page *page; 223 struct page *page;
224 int err = -ENOENT; 224 int err = -ENOENT;
225 225
226 f2fs_balance_fs(sbi);
227
226 de = f2fs_find_entry(dir, &dentry->d_name, &page); 228 de = f2fs_find_entry(dir, &dentry->d_name, &page);
227 if (!de) 229 if (!de)
228 goto fail; 230 goto fail;
@@ -238,7 +240,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
238 240
239 /* In order to evict this inode, we set it dirty */ 241 /* In order to evict this inode, we set it dirty */
240 mark_inode_dirty(inode); 242 mark_inode_dirty(inode);
241 f2fs_balance_fs(sbi);
242fail: 243fail:
243 return err; 244 return err;
244} 245}
@@ -249,9 +250,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
249 struct super_block *sb = dir->i_sb; 250 struct super_block *sb = dir->i_sb;
250 struct f2fs_sb_info *sbi = F2FS_SB(sb); 251 struct f2fs_sb_info *sbi = F2FS_SB(sb);
251 struct inode *inode; 252 struct inode *inode;
252 unsigned symlen = strlen(symname) + 1; 253 size_t symlen = strlen(symname) + 1;
253 int err; 254 int err;
254 255
256 f2fs_balance_fs(sbi);
257
255 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); 258 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
256 if (IS_ERR(inode)) 259 if (IS_ERR(inode))
257 return PTR_ERR(inode); 260 return PTR_ERR(inode);
@@ -268,9 +271,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
268 271
269 d_instantiate(dentry, inode); 272 d_instantiate(dentry, inode);
270 unlock_new_inode(inode); 273 unlock_new_inode(inode);
271
272 f2fs_balance_fs(sbi);
273
274 return err; 274 return err;
275out: 275out:
276 clear_nlink(inode); 276 clear_nlink(inode);
@@ -286,6 +286,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
286 struct inode *inode; 286 struct inode *inode;
287 int err; 287 int err;
288 288
289 f2fs_balance_fs(sbi);
290
289 inode = f2fs_new_inode(dir, S_IFDIR | mode); 291 inode = f2fs_new_inode(dir, S_IFDIR | mode);
290 if (IS_ERR(inode)) 292 if (IS_ERR(inode))
291 return PTR_ERR(inode); 293 return PTR_ERR(inode);
@@ -305,7 +307,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
305 d_instantiate(dentry, inode); 307 d_instantiate(dentry, inode);
306 unlock_new_inode(inode); 308 unlock_new_inode(inode);
307 309
308 f2fs_balance_fs(sbi);
309 return 0; 310 return 0;
310 311
311out_fail: 312out_fail:
@@ -336,6 +337,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
336 if (!new_valid_dev(rdev)) 337 if (!new_valid_dev(rdev))
337 return -EINVAL; 338 return -EINVAL;
338 339
340 f2fs_balance_fs(sbi);
341
339 inode = f2fs_new_inode(dir, mode); 342 inode = f2fs_new_inode(dir, mode);
340 if (IS_ERR(inode)) 343 if (IS_ERR(inode))
341 return PTR_ERR(inode); 344 return PTR_ERR(inode);
@@ -350,9 +353,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
350 alloc_nid_done(sbi, inode->i_ino); 353 alloc_nid_done(sbi, inode->i_ino);
351 d_instantiate(dentry, inode); 354 d_instantiate(dentry, inode);
352 unlock_new_inode(inode); 355 unlock_new_inode(inode);
353
354 f2fs_balance_fs(sbi);
355
356 return 0; 356 return 0;
357out: 357out:
358 clear_nlink(inode); 358 clear_nlink(inode);
@@ -376,6 +376,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
376 struct f2fs_dir_entry *new_entry; 376 struct f2fs_dir_entry *new_entry;
377 int err = -ENOENT; 377 int err = -ENOENT;
378 378
379 f2fs_balance_fs(sbi);
380
379 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); 381 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
380 if (!old_entry) 382 if (!old_entry)
381 goto out; 383 goto out;
@@ -441,8 +443,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
441 } 443 }
442 444
443 mutex_unlock_op(sbi, RENAME); 445 mutex_unlock_op(sbi, RENAME);
444
445 f2fs_balance_fs(sbi);
446 return 0; 446 return 0;
447 447
448out_dir: 448out_dir:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 19870361497e..9bda63c9c166 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -484,12 +484,14 @@ static void truncate_node(struct dnode_of_data *dn)
484 struct node_info ni; 484 struct node_info ni;
485 485
486 get_node_info(sbi, dn->nid, &ni); 486 get_node_info(sbi, dn->nid, &ni);
487 if (dn->inode->i_blocks == 0) {
488 BUG_ON(ni.blk_addr != NULL_ADDR);
489 goto invalidate;
490 }
487 BUG_ON(ni.blk_addr == NULL_ADDR); 491 BUG_ON(ni.blk_addr == NULL_ADDR);
488 492
489 if (ni.blk_addr != NULL_ADDR)
490 invalidate_blocks(sbi, ni.blk_addr);
491
492 /* Deallocate node address */ 493 /* Deallocate node address */
494 invalidate_blocks(sbi, ni.blk_addr);
493 dec_valid_node_count(sbi, dn->inode, 1); 495 dec_valid_node_count(sbi, dn->inode, 1);
494 set_node_addr(sbi, &ni, NULL_ADDR); 496 set_node_addr(sbi, &ni, NULL_ADDR);
495 497
@@ -499,7 +501,7 @@ static void truncate_node(struct dnode_of_data *dn)
499 } else { 501 } else {
500 sync_inode_page(dn); 502 sync_inode_page(dn);
501 } 503 }
502 504invalidate:
503 clear_node_page_dirty(dn->node_page); 505 clear_node_page_dirty(dn->node_page);
504 F2FS_SET_SB_DIRT(sbi); 506 F2FS_SET_SB_DIRT(sbi);
505 507
@@ -768,20 +770,12 @@ int remove_inode_page(struct inode *inode)
768 dn.inode_page_locked = 1; 770 dn.inode_page_locked = 1;
769 truncate_node(&dn); 771 truncate_node(&dn);
770 } 772 }
771 if (inode->i_blocks == 1) {
772 /* inernally call f2fs_put_page() */
773 set_new_dnode(&dn, inode, page, page, ino);
774 truncate_node(&dn);
775 } else if (inode->i_blocks == 0) {
776 struct node_info ni;
777 get_node_info(sbi, inode->i_ino, &ni);
778 773
779 /* called after f2fs_new_inode() is failed */ 774 /* 0 is possible, after f2fs_new_inode() is failed */
780 BUG_ON(ni.blk_addr != NULL_ADDR); 775 BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
781 f2fs_put_page(page, 1); 776 set_new_dnode(&dn, inode, page, page, ino);
782 } else { 777 truncate_node(&dn);
783 BUG(); 778
784 }
785 mutex_unlock_op(sbi, NODE_TRUNC); 779 mutex_unlock_op(sbi, NODE_TRUNC);
786 return 0; 780 return 0;
787} 781}
@@ -834,17 +828,18 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
834 goto fail; 828 goto fail;
835 } 829 }
836 set_node_addr(sbi, &new_ni, NEW_ADDR); 830 set_node_addr(sbi, &new_ni, NEW_ADDR);
831 set_cold_node(dn->inode, page);
837 832
838 dn->node_page = page; 833 dn->node_page = page;
839 sync_inode_page(dn); 834 sync_inode_page(dn);
840 set_page_dirty(page); 835 set_page_dirty(page);
841 set_cold_node(dn->inode, page);
842 if (ofs == 0) 836 if (ofs == 0)
843 inc_valid_inode_count(sbi); 837 inc_valid_inode_count(sbi);
844 838
845 return page; 839 return page;
846 840
847fail: 841fail:
842 clear_node_page_dirty(page);
848 f2fs_put_page(page, 1); 843 f2fs_put_page(page, 1);
849 return ERR_PTR(err); 844 return ERR_PTR(err);
850} 845}
@@ -1093,7 +1088,6 @@ static int f2fs_write_node_page(struct page *page,
1093{ 1088{
1094 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1089 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1095 nid_t nid; 1090 nid_t nid;
1096 unsigned int nofs;
1097 block_t new_addr; 1091 block_t new_addr;
1098 struct node_info ni; 1092 struct node_info ni;
1099 1093
@@ -1110,7 +1104,6 @@ static int f2fs_write_node_page(struct page *page,
1110 1104
1111 /* get old block addr of this node page */ 1105 /* get old block addr of this node page */
1112 nid = nid_of_node(page); 1106 nid = nid_of_node(page);
1113 nofs = ofs_of_node(page);
1114 BUG_ON(page->index != nid); 1107 BUG_ON(page->index != nid);
1115 1108
1116 get_node_info(sbi, nid, &ni); 1109 get_node_info(sbi, nid, &ni);
@@ -1131,6 +1124,12 @@ static int f2fs_write_node_page(struct page *page,
1131 return 0; 1124 return 0;
1132} 1125}
1133 1126
1127/*
1128 * It is very important to gather dirty pages and write at once, so that we can
1129 * submit a big bio without interfering other data writes.
1130 * Be default, 512 pages (2MB), a segment size, is quite reasonable.
1131 */
1132#define COLLECT_DIRTY_NODES 512
1134static int f2fs_write_node_pages(struct address_space *mapping, 1133static int f2fs_write_node_pages(struct address_space *mapping,
1135 struct writeback_control *wbc) 1134 struct writeback_control *wbc)
1136{ 1135{
@@ -1138,17 +1137,16 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1138 struct block_device *bdev = sbi->sb->s_bdev; 1137 struct block_device *bdev = sbi->sb->s_bdev;
1139 long nr_to_write = wbc->nr_to_write; 1138 long nr_to_write = wbc->nr_to_write;
1140 1139
1141 if (wbc->for_kupdate) 1140 /* First check balancing cached NAT entries */
1142 return 0;
1143
1144 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1145 return 0;
1146
1147 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { 1141 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1148 write_checkpoint(sbi, false, false); 1142 write_checkpoint(sbi, false, false);
1149 return 0; 1143 return 0;
1150 } 1144 }
1151 1145
1146 /* collect a number of dirty node pages and write together */
1147 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
1148 return 0;
1149
1152 /* if mounting is failed, skip writing node pages */ 1150 /* if mounting is failed, skip writing node pages */
1153 wbc->nr_to_write = bio_get_nr_vecs(bdev); 1151 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1154 sync_node_pages(sbi, 0, wbc); 1152 sync_node_pages(sbi, 0, wbc);
@@ -1571,7 +1569,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1571 nid_t nid; 1569 nid_t nid;
1572 struct f2fs_nat_entry raw_ne; 1570 struct f2fs_nat_entry raw_ne;
1573 int offset = -1; 1571 int offset = -1;
1574 block_t old_blkaddr, new_blkaddr; 1572 block_t new_blkaddr;
1575 1573
1576 ne = list_entry(cur, struct nat_entry, list); 1574 ne = list_entry(cur, struct nat_entry, list);
1577 nid = nat_get_nid(ne); 1575 nid = nat_get_nid(ne);
@@ -1585,7 +1583,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1585 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1583 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1586 if (offset >= 0) { 1584 if (offset >= 0) {
1587 raw_ne = nat_in_journal(sum, offset); 1585 raw_ne = nat_in_journal(sum, offset);
1588 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1589 goto flush_now; 1586 goto flush_now;
1590 } 1587 }
1591to_nat_page: 1588to_nat_page:
@@ -1607,7 +1604,6 @@ to_nat_page:
1607 1604
1608 BUG_ON(!nat_blk); 1605 BUG_ON(!nat_blk);
1609 raw_ne = nat_blk->entries[nid - start_nid]; 1606 raw_ne = nat_blk->entries[nid - start_nid];
1610 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1611flush_now: 1607flush_now:
1612 new_blkaddr = nat_get_blkaddr(ne); 1608 new_blkaddr = nat_get_blkaddr(ne);
1613 1609
@@ -1741,7 +1737,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1741 kfree(nm_i); 1737 kfree(nm_i);
1742} 1738}
1743 1739
1744int create_node_manager_caches(void) 1740int __init create_node_manager_caches(void)
1745{ 1741{
1746 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1742 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1747 sizeof(struct nat_entry), NULL); 1743 sizeof(struct nat_entry), NULL);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b07e9b6ef376..f42e4060b399 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -67,7 +67,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
67 kunmap(page); 67 kunmap(page);
68 f2fs_put_page(page, 0); 68 f2fs_put_page(page, 0);
69 } else { 69 } else {
70 f2fs_add_link(&dent, inode); 70 err = f2fs_add_link(&dent, inode);
71 } 71 }
72 iput(dir); 72 iput(dir);
73out: 73out:
@@ -144,14 +144,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
144 goto out; 144 goto out;
145 } 145 }
146 146
147 INIT_LIST_HEAD(&entry->list);
148 list_add_tail(&entry->list, head);
149
150 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 147 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
151 if (IS_ERR(entry->inode)) { 148 if (IS_ERR(entry->inode)) {
152 err = PTR_ERR(entry->inode); 149 err = PTR_ERR(entry->inode);
150 kmem_cache_free(fsync_entry_slab, entry);
153 goto out; 151 goto out;
154 } 152 }
153
154 list_add_tail(&entry->list, head);
155 entry->blkaddr = blkaddr; 155 entry->blkaddr = blkaddr;
156 } 156 }
157 if (IS_INODE(page)) { 157 if (IS_INODE(page)) {
@@ -173,10 +173,9 @@ out:
173static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, 173static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
174 struct list_head *head) 174 struct list_head *head)
175{ 175{
176 struct list_head *this; 176 struct fsync_inode_entry *entry, *tmp;
177 struct fsync_inode_entry *entry; 177
178 list_for_each(this, head) { 178 list_for_each_entry_safe(entry, tmp, head, list) {
179 entry = list_entry(this, struct fsync_inode_entry, list);
180 iput(entry->inode); 179 iput(entry->inode);
181 list_del(&entry->list); 180 list_del(&entry->list);
182 kmem_cache_free(fsync_entry_slab, entry); 181 kmem_cache_free(fsync_entry_slab, entry);
@@ -228,6 +227,9 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
228 227
229 /* Deallocate previous index in the node page */ 228 /* Deallocate previous index in the node page */
230 inode = f2fs_iget_nowait(sbi->sb, ino); 229 inode = f2fs_iget_nowait(sbi->sb, ino);
230 if (IS_ERR(inode))
231 return;
232
231 truncate_hole(inode, bidx, bidx + 1); 233 truncate_hole(inode, bidx, bidx + 1);
232 iput(inode); 234 iput(inode);
233} 235}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1b26e4ea1016..4b0099066582 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -12,57 +12,26 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/prefetch.h>
15#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
16 17
17#include "f2fs.h" 18#include "f2fs.h"
18#include "segment.h" 19#include "segment.h"
19#include "node.h" 20#include "node.h"
20 21
21static int need_to_flush(struct f2fs_sb_info *sbi)
22{
23 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
24 sbi->segs_per_sec;
25 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
26 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
27 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
28 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
29
30 if (sbi->por_doing)
31 return 0;
32
33 if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
34 reserved_sections(sbi)))
35 return 1;
36 return 0;
37}
38
39/* 22/*
40 * This function balances dirty node and dentry pages. 23 * This function balances dirty node and dentry pages.
41 * In addition, it controls garbage collection. 24 * In addition, it controls garbage collection.
42 */ 25 */
43void f2fs_balance_fs(struct f2fs_sb_info *sbi) 26void f2fs_balance_fs(struct f2fs_sb_info *sbi)
44{ 27{
45 struct writeback_control wbc = {
46 .sync_mode = WB_SYNC_ALL,
47 .nr_to_write = LONG_MAX,
48 .for_reclaim = 0,
49 };
50
51 if (sbi->por_doing)
52 return;
53
54 /* 28 /*
55 * We should do checkpoint when there are so many dirty node pages 29 * We should do GC or end up with checkpoint, if there are so many dirty
56 * with enough free segments. After then, we should do GC. 30 * dir/node pages without enough free segments.
57 */ 31 */
58 if (need_to_flush(sbi)) {
59 sync_dirty_dir_inodes(sbi);
60 sync_node_pages(sbi, 0, &wbc);
61 }
62
63 if (has_not_enough_free_secs(sbi)) { 32 if (has_not_enough_free_secs(sbi)) {
64 mutex_lock(&sbi->gc_mutex); 33 mutex_lock(&sbi->gc_mutex);
65 f2fs_gc(sbi, 1); 34 f2fs_gc(sbi);
66 } 35 }
67} 36}
68 37
@@ -631,7 +600,6 @@ static void f2fs_end_io_write(struct bio *bio, int err)
631 if (page->mapping) 600 if (page->mapping)
632 set_bit(AS_EIO, &page->mapping->flags); 601 set_bit(AS_EIO, &page->mapping->flags);
633 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); 602 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
634 set_page_dirty(page);
635 } 603 }
636 end_page_writeback(page); 604 end_page_writeback(page);
637 dec_page_count(p->sbi, F2FS_WRITEBACK); 605 dec_page_count(p->sbi, F2FS_WRITEBACK);
@@ -791,11 +759,10 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
791 return __get_segment_type_2(page, p_type); 759 return __get_segment_type_2(page, p_type);
792 case 4: 760 case 4:
793 return __get_segment_type_4(page, p_type); 761 return __get_segment_type_4(page, p_type);
794 case 6:
795 return __get_segment_type_6(page, p_type);
796 default:
797 BUG();
798 } 762 }
763 /* NR_CURSEG_TYPE(6) logs by default */
764 BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
765 return __get_segment_type_6(page, p_type);
799} 766}
800 767
801static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 768static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1608,7 +1575,6 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1608 1575
1609 for (i = 0; i < NR_DIRTY_TYPE; i++) { 1576 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1610 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); 1577 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1611 dirty_i->nr_dirty[i] = 0;
1612 if (!dirty_i->dirty_segmap[i]) 1578 if (!dirty_i->dirty_segmap[i])
1613 return -ENOMEM; 1579 return -ENOMEM;
1614 } 1580 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0948405af6f5..66a288a52fd3 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -459,7 +459,20 @@ static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
459 459
460static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) 460static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
461{ 461{
462 return free_sections(sbi) <= reserved_sections(sbi); 462 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
463 sbi->segs_per_sec;
464 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
465 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
466 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
467 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
468
469 if (sbi->por_doing)
470 return false;
471
472 if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
473 reserved_sections(sbi)))
474 return true;
475 return false;
463} 476}
464 477
465static inline int utilization(struct f2fs_sb_info *sbi) 478static inline int utilization(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13867322cf5a..37fad04c8669 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -53,6 +53,18 @@ static match_table_t f2fs_tokens = {
53 {Opt_err, NULL}, 53 {Opt_err, NULL},
54}; 54};
55 55
56void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
57{
58 struct va_format vaf;
59 va_list args;
60
61 va_start(args, fmt);
62 vaf.fmt = fmt;
63 vaf.va = &args;
64 printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
65 va_end(args);
66}
67
56static void init_once(void *foo) 68static void init_once(void *foo)
57{ 69{
58 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; 70 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -119,15 +131,16 @@ static void f2fs_put_super(struct super_block *sb)
119int f2fs_sync_fs(struct super_block *sb, int sync) 131int f2fs_sync_fs(struct super_block *sb, int sync)
120{ 132{
121 struct f2fs_sb_info *sbi = F2FS_SB(sb); 133 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 int ret = 0;
123 134
124 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) 135 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
125 return 0; 136 return 0;
126 137
127 if (sync) 138 if (sync)
128 write_checkpoint(sbi, false, false); 139 write_checkpoint(sbi, false, false);
140 else
141 f2fs_balance_fs(sbi);
129 142
130 return ret; 143 return 0;
131} 144}
132 145
133static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) 146static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -148,8 +161,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
148 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; 161 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
149 buf->f_bavail = user_block_count - valid_user_blocks(sbi); 162 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
150 163
151 buf->f_files = valid_inode_count(sbi); 164 buf->f_files = sbi->total_node_count;
152 buf->f_ffree = sbi->total_node_count - valid_node_count(sbi); 165 buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
153 166
154 buf->f_namelen = F2FS_MAX_NAME_LEN; 167 buf->f_namelen = F2FS_MAX_NAME_LEN;
155 buf->f_fsid.val[0] = (u32)id; 168 buf->f_fsid.val[0] = (u32)id;
@@ -248,7 +261,8 @@ static const struct export_operations f2fs_export_ops = {
248 .get_parent = f2fs_get_parent, 261 .get_parent = f2fs_get_parent,
249}; 262};
250 263
251static int parse_options(struct f2fs_sb_info *sbi, char *options) 264static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi,
265 char *options)
252{ 266{
253 substring_t args[MAX_OPT_ARGS]; 267 substring_t args[MAX_OPT_ARGS];
254 char *p; 268 char *p;
@@ -287,7 +301,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
287 break; 301 break;
288#else 302#else
289 case Opt_nouser_xattr: 303 case Opt_nouser_xattr:
290 pr_info("nouser_xattr options not supported\n"); 304 f2fs_msg(sb, KERN_INFO,
305 "nouser_xattr options not supported");
291 break; 306 break;
292#endif 307#endif
293#ifdef CONFIG_F2FS_FS_POSIX_ACL 308#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -296,13 +311,13 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
296 break; 311 break;
297#else 312#else
298 case Opt_noacl: 313 case Opt_noacl:
299 pr_info("noacl options not supported\n"); 314 f2fs_msg(sb, KERN_INFO, "noacl options not supported");
300 break; 315 break;
301#endif 316#endif
302 case Opt_active_logs: 317 case Opt_active_logs:
303 if (args->from && match_int(args, &arg)) 318 if (args->from && match_int(args, &arg))
304 return -EINVAL; 319 return -EINVAL;
305 if (arg != 2 && arg != 4 && arg != 6) 320 if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
306 return -EINVAL; 321 return -EINVAL;
307 sbi->active_logs = arg; 322 sbi->active_logs = arg;
308 break; 323 break;
@@ -310,8 +325,9 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
310 set_opt(sbi, DISABLE_EXT_IDENTIFY); 325 set_opt(sbi, DISABLE_EXT_IDENTIFY);
311 break; 326 break;
312 default: 327 default:
313 pr_err("Unrecognized mount option \"%s\" or missing value\n", 328 f2fs_msg(sb, KERN_ERR,
314 p); 329 "Unrecognized mount option \"%s\" or missing value",
330 p);
315 return -EINVAL; 331 return -EINVAL;
316 } 332 }
317 } 333 }
@@ -338,23 +354,36 @@ static loff_t max_file_size(unsigned bits)
338 return result; 354 return result;
339} 355}
340 356
341static int sanity_check_raw_super(struct f2fs_super_block *raw_super) 357static int sanity_check_raw_super(struct super_block *sb,
358 struct f2fs_super_block *raw_super)
342{ 359{
343 unsigned int blocksize; 360 unsigned int blocksize;
344 361
345 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) 362 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
363 f2fs_msg(sb, KERN_INFO,
364 "Magic Mismatch, valid(0x%x) - read(0x%x)",
365 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
346 return 1; 366 return 1;
367 }
347 368
348 /* Currently, support only 4KB block size */ 369 /* Currently, support only 4KB block size */
349 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); 370 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
350 if (blocksize != PAGE_CACHE_SIZE) 371 if (blocksize != PAGE_CACHE_SIZE) {
372 f2fs_msg(sb, KERN_INFO,
373 "Invalid blocksize (%u), supports only 4KB\n",
374 blocksize);
351 return 1; 375 return 1;
376 }
352 if (le32_to_cpu(raw_super->log_sectorsize) != 377 if (le32_to_cpu(raw_super->log_sectorsize) !=
353 F2FS_LOG_SECTOR_SIZE) 378 F2FS_LOG_SECTOR_SIZE) {
379 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
354 return 1; 380 return 1;
381 }
355 if (le32_to_cpu(raw_super->log_sectors_per_block) != 382 if (le32_to_cpu(raw_super->log_sectors_per_block) !=
356 F2FS_LOG_SECTORS_PER_BLOCK) 383 F2FS_LOG_SECTORS_PER_BLOCK) {
384 f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
357 return 1; 385 return 1;
386 }
358 return 0; 387 return 0;
359} 388}
360 389
@@ -414,14 +443,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
414 if (!sbi) 443 if (!sbi)
415 return -ENOMEM; 444 return -ENOMEM;
416 445
417 /* set a temporary block size */ 446 /* set a block size */
418 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) 447 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
448 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
419 goto free_sbi; 449 goto free_sbi;
450 }
420 451
421 /* read f2fs raw super block */ 452 /* read f2fs raw super block */
422 raw_super_buf = sb_bread(sb, 0); 453 raw_super_buf = sb_bread(sb, 0);
423 if (!raw_super_buf) { 454 if (!raw_super_buf) {
424 err = -EIO; 455 err = -EIO;
456 f2fs_msg(sb, KERN_ERR, "unable to read superblock");
425 goto free_sbi; 457 goto free_sbi;
426 } 458 }
427 raw_super = (struct f2fs_super_block *) 459 raw_super = (struct f2fs_super_block *)
@@ -439,12 +471,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
439 set_opt(sbi, POSIX_ACL); 471 set_opt(sbi, POSIX_ACL);
440#endif 472#endif
441 /* parse mount options */ 473 /* parse mount options */
442 if (parse_options(sbi, (char *)data)) 474 if (parse_options(sb, sbi, (char *)data))
443 goto free_sb_buf; 475 goto free_sb_buf;
444 476
445 /* sanity checking of raw super */ 477 /* sanity checking of raw super */
446 if (sanity_check_raw_super(raw_super)) 478 if (sanity_check_raw_super(sb, raw_super)) {
479 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem");
447 goto free_sb_buf; 480 goto free_sb_buf;
481 }
448 482
449 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); 483 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
450 sb->s_max_links = F2FS_LINK_MAX; 484 sb->s_max_links = F2FS_LINK_MAX;
@@ -478,18 +512,23 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
478 /* get an inode for meta space */ 512 /* get an inode for meta space */
479 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 513 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
480 if (IS_ERR(sbi->meta_inode)) { 514 if (IS_ERR(sbi->meta_inode)) {
515 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
481 err = PTR_ERR(sbi->meta_inode); 516 err = PTR_ERR(sbi->meta_inode);
482 goto free_sb_buf; 517 goto free_sb_buf;
483 } 518 }
484 519
485 err = get_valid_checkpoint(sbi); 520 err = get_valid_checkpoint(sbi);
486 if (err) 521 if (err) {
522 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
487 goto free_meta_inode; 523 goto free_meta_inode;
524 }
488 525
489 /* sanity checking of checkpoint */ 526 /* sanity checking of checkpoint */
490 err = -EINVAL; 527 err = -EINVAL;
491 if (sanity_check_ckpt(raw_super, sbi->ckpt)) 528 if (sanity_check_ckpt(raw_super, sbi->ckpt)) {
529 f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
492 goto free_cp; 530 goto free_cp;
531 }
493 532
494 sbi->total_valid_node_count = 533 sbi->total_valid_node_count =
495 le32_to_cpu(sbi->ckpt->valid_node_count); 534 le32_to_cpu(sbi->ckpt->valid_node_count);
@@ -503,38 +542,41 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
503 INIT_LIST_HEAD(&sbi->dir_inode_list); 542 INIT_LIST_HEAD(&sbi->dir_inode_list);
504 spin_lock_init(&sbi->dir_inode_lock); 543 spin_lock_init(&sbi->dir_inode_lock);
505 544
506 /* init super block */
507 if (!sb_set_blocksize(sb, sbi->blocksize))
508 goto free_cp;
509
510 init_orphan_info(sbi); 545 init_orphan_info(sbi);
511 546
512 /* setup f2fs internal modules */ 547 /* setup f2fs internal modules */
513 err = build_segment_manager(sbi); 548 err = build_segment_manager(sbi);
514 if (err) 549 if (err) {
550 f2fs_msg(sb, KERN_ERR,
551 "Failed to initialize F2FS segment manager");
515 goto free_sm; 552 goto free_sm;
553 }
516 err = build_node_manager(sbi); 554 err = build_node_manager(sbi);
517 if (err) 555 if (err) {
556 f2fs_msg(sb, KERN_ERR,
557 "Failed to initialize F2FS node manager");
518 goto free_nm; 558 goto free_nm;
559 }
519 560
520 build_gc_manager(sbi); 561 build_gc_manager(sbi);
521 562
522 /* get an inode for node space */ 563 /* get an inode for node space */
523 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); 564 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
524 if (IS_ERR(sbi->node_inode)) { 565 if (IS_ERR(sbi->node_inode)) {
566 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
525 err = PTR_ERR(sbi->node_inode); 567 err = PTR_ERR(sbi->node_inode);
526 goto free_nm; 568 goto free_nm;
527 } 569 }
528 570
529 /* if there are nt orphan nodes free them */ 571 /* if there are nt orphan nodes free them */
530 err = -EINVAL; 572 err = -EINVAL;
531 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && 573 if (recover_orphan_inodes(sbi))
532 recover_orphan_inodes(sbi))
533 goto free_node_inode; 574 goto free_node_inode;
534 575
535 /* read root inode and dentry */ 576 /* read root inode and dentry */
536 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); 577 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
537 if (IS_ERR(root)) { 578 if (IS_ERR(root)) {
579 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
538 err = PTR_ERR(root); 580 err = PTR_ERR(root);
539 goto free_node_inode; 581 goto free_node_inode;
540 } 582 }
@@ -548,8 +590,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
548 } 590 }
549 591
550 /* recover fsynced data */ 592 /* recover fsynced data */
551 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && 593 if (!test_opt(sbi, DISABLE_ROLL_FORWARD))
552 !test_opt(sbi, DISABLE_ROLL_FORWARD))
553 recover_fsync_data(sbi); 594 recover_fsync_data(sbi);
554 595
555 /* After POR, we can run background GC thread */ 596 /* After POR, we can run background GC thread */
@@ -599,7 +640,7 @@ static struct file_system_type f2fs_fs_type = {
599 .fs_flags = FS_REQUIRES_DEV, 640 .fs_flags = FS_REQUIRES_DEV,
600}; 641};
601 642
602static int init_inodecache(void) 643static int __init init_inodecache(void)
603{ 644{
604 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 645 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
605 sizeof(struct f2fs_inode_info), NULL); 646 sizeof(struct f2fs_inode_info), NULL);
@@ -634,14 +675,17 @@ static int __init init_f2fs_fs(void)
634 err = create_checkpoint_caches(); 675 err = create_checkpoint_caches();
635 if (err) 676 if (err)
636 goto fail; 677 goto fail;
637 return register_filesystem(&f2fs_fs_type); 678 err = register_filesystem(&f2fs_fs_type);
679 if (err)
680 goto fail;
681 f2fs_create_root_stats();
638fail: 682fail:
639 return err; 683 return err;
640} 684}
641 685
642static void __exit exit_f2fs_fs(void) 686static void __exit exit_f2fs_fs(void)
643{ 687{
644 destroy_root_stats(); 688 f2fs_destroy_root_stats();
645 unregister_filesystem(&f2fs_fs_type); 689 unregister_filesystem(&f2fs_fs_type);
646 destroy_checkpoint_caches(); 690 destroy_checkpoint_caches();
647 destroy_gc_caches(); 691 destroy_gc_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7d52e8dc0c59..8038c0496504 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -208,7 +208,7 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
208 struct page *page; 208 struct page *page;
209 void *base_addr; 209 void *base_addr;
210 int error = 0, found = 0; 210 int error = 0, found = 0;
211 int value_len, name_len; 211 size_t value_len, name_len;
212 212
213 if (name == NULL) 213 if (name == NULL)
214 return -EINVAL; 214 return -EINVAL;
@@ -304,7 +304,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
304 struct f2fs_xattr_entry *here, *last; 304 struct f2fs_xattr_entry *here, *last;
305 struct page *page; 305 struct page *page;
306 void *base_addr; 306 void *base_addr;
307 int error, found, free, name_len, newsize; 307 int error, found, free, newsize;
308 size_t name_len;
308 char *pval; 309 char *pval;
309 310
310 if (name == NULL) 311 if (name == NULL)
@@ -317,6 +318,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
317 if (name_len > 255 || value_len > MAX_VALUE_LEN) 318 if (name_len > 255 || value_len > MAX_VALUE_LEN)
318 return -ERANGE; 319 return -ERANGE;
319 320
321 f2fs_balance_fs(sbi);
322
320 mutex_lock_op(sbi, NODE_NEW); 323 mutex_lock_op(sbi, NODE_NEW);
321 if (!fi->i_xattr_nid) { 324 if (!fi->i_xattr_nid) {
322 /* Allocate new attribute block */ 325 /* Allocate new attribute block */
diff --git a/fs/file.c b/fs/file.c
index 15cb8618e95d..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -490,7 +490,7 @@ void exit_files(struct task_struct *tsk)
490 } 490 }
491} 491}
492 492
493static void __devinit fdtable_defer_list_init(int cpu) 493static void fdtable_defer_list_init(int cpu)
494{ 494{
495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
496 spin_lock_init(&fddef->lock); 496 spin_lock_init(&fddef->lock);
@@ -516,7 +516,7 @@ struct files_struct init_files = {
516 .close_on_exec = init_files.close_on_exec_init, 516 .close_on_exec = init_files.close_on_exec_init,
517 .open_fds = init_files.open_fds_init, 517 .open_fds = init_files.open_fds_init,
518 }, 518 },
519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
520}; 520};
521 521
522/* 522/*
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 0cf160a94eda..1b2f6c2c3aaf 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -4,12 +4,24 @@ config FUSE_FS
4 With FUSE it is possible to implement a fully functional filesystem 4 With FUSE it is possible to implement a fully functional filesystem
5 in a userspace program. 5 in a userspace program.
6 6
7 There's also companion library: libfuse. This library along with 7 There's also a companion library: libfuse2. This library is available
8 utilities is available from the FUSE homepage: 8 from the FUSE homepage:
9 <http://fuse.sourceforge.net/> 9 <http://fuse.sourceforge.net/>
10 although chances are your distribution already has that library
11 installed if you've installed the "fuse" package itself.
10 12
11 See <file:Documentation/filesystems/fuse.txt> for more information. 13 See <file:Documentation/filesystems/fuse.txt> for more information.
12 See <file:Documentation/Changes> for needed library/utility version. 14 See <file:Documentation/Changes> for needed library/utility version.
13 15
14 If you want to develop a userspace FS, or if you want to use 16 If you want to develop a userspace FS, or if you want to use
15 a filesystem based on FUSE, answer Y or M. 17 a filesystem based on FUSE, answer Y or M.
18
19config CUSE
20 tristate "Character device in Userspace support"
21 depends on FUSE_FS
22 help
23 This FUSE extension allows character devices to be
24 implemented in userspace.
25
26 If you want to develop or use a userspace character device
27 based on CUSE, answer Y or M.
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index ee8d55042298..6f96a8def147 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -45,7 +45,6 @@
45#include <linux/miscdevice.h> 45#include <linux/miscdevice.h>
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/spinlock.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
51 50
@@ -63,7 +62,7 @@ struct cuse_conn {
63 bool unrestricted_ioctl; 62 bool unrestricted_ioctl;
64}; 63};
65 64
66static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ 65static DEFINE_MUTEX(cuse_lock); /* protects registration */
67static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; 66static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
68static struct class *cuse_class; 67static struct class *cuse_class;
69 68
@@ -92,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
92 loff_t *ppos) 91 loff_t *ppos)
93{ 92{
94 loff_t pos = 0; 93 loff_t pos = 0;
94 struct iovec iov = { .iov_base = buf, .iov_len = count };
95 95
96 return fuse_direct_io(file, buf, count, &pos, 0); 96 return fuse_direct_io(file, &iov, 1, count, &pos, 0);
97} 97}
98 98
99static ssize_t cuse_write(struct file *file, const char __user *buf, 99static ssize_t cuse_write(struct file *file, const char __user *buf,
100 size_t count, loff_t *ppos) 100 size_t count, loff_t *ppos)
101{ 101{
102 loff_t pos = 0; 102 loff_t pos = 0;
103 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
104
103 /* 105 /*
104 * No locking or generic_write_checks(), the server is 106 * No locking or generic_write_checks(), the server is
105 * responsible for locking and sanity checks. 107 * responsible for locking and sanity checks.
106 */ 108 */
107 return fuse_direct_io(file, buf, count, &pos, 1); 109 return fuse_direct_io(file, &iov, 1, count, &pos, 1);
108} 110}
109 111
110static int cuse_open(struct inode *inode, struct file *file) 112static int cuse_open(struct inode *inode, struct file *file)
@@ -114,14 +116,14 @@ static int cuse_open(struct inode *inode, struct file *file)
114 int rc; 116 int rc;
115 117
116 /* look up and get the connection */ 118 /* look up and get the connection */
117 spin_lock(&cuse_lock); 119 mutex_lock(&cuse_lock);
118 list_for_each_entry(pos, cuse_conntbl_head(devt), list) 120 list_for_each_entry(pos, cuse_conntbl_head(devt), list)
119 if (pos->dev->devt == devt) { 121 if (pos->dev->devt == devt) {
120 fuse_conn_get(&pos->fc); 122 fuse_conn_get(&pos->fc);
121 cc = pos; 123 cc = pos;
122 break; 124 break;
123 } 125 }
124 spin_unlock(&cuse_lock); 126 mutex_unlock(&cuse_lock);
125 127
126 /* dead? */ 128 /* dead? */
127 if (!cc) 129 if (!cc)
@@ -267,7 +269,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
267static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) 269static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
268{ 270{
269 char *end = p + len; 271 char *end = p + len;
270 char *key, *val; 272 char *uninitialized_var(key), *uninitialized_var(val);
271 int rc; 273 int rc;
272 274
273 while (true) { 275 while (true) {
@@ -305,14 +307,14 @@ static void cuse_gendev_release(struct device *dev)
305 */ 307 */
306static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 308static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
307{ 309{
308 struct cuse_conn *cc = fc_to_cc(fc); 310 struct cuse_conn *cc = fc_to_cc(fc), *pos;
309 struct cuse_init_out *arg = req->out.args[0].value; 311 struct cuse_init_out *arg = req->out.args[0].value;
310 struct page *page = req->pages[0]; 312 struct page *page = req->pages[0];
311 struct cuse_devinfo devinfo = { }; 313 struct cuse_devinfo devinfo = { };
312 struct device *dev; 314 struct device *dev;
313 struct cdev *cdev; 315 struct cdev *cdev;
314 dev_t devt; 316 dev_t devt;
315 int rc; 317 int rc, i;
316 318
317 if (req->out.h.error || 319 if (req->out.h.error ||
318 arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { 320 arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
@@ -356,15 +358,24 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
356 dev_set_drvdata(dev, cc); 358 dev_set_drvdata(dev, cc);
357 dev_set_name(dev, "%s", devinfo.name); 359 dev_set_name(dev, "%s", devinfo.name);
358 360
361 mutex_lock(&cuse_lock);
362
363 /* make sure the device-name is unique */
364 for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
365 list_for_each_entry(pos, &cuse_conntbl[i], list)
366 if (!strcmp(dev_name(pos->dev), dev_name(dev)))
367 goto err_unlock;
368 }
369
359 rc = device_add(dev); 370 rc = device_add(dev);
360 if (rc) 371 if (rc)
361 goto err_device; 372 goto err_unlock;
362 373
363 /* register cdev */ 374 /* register cdev */
364 rc = -ENOMEM; 375 rc = -ENOMEM;
365 cdev = cdev_alloc(); 376 cdev = cdev_alloc();
366 if (!cdev) 377 if (!cdev)
367 goto err_device; 378 goto err_unlock;
368 379
369 cdev->owner = THIS_MODULE; 380 cdev->owner = THIS_MODULE;
370 cdev->ops = &cuse_frontend_fops; 381 cdev->ops = &cuse_frontend_fops;
@@ -377,9 +388,8 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
377 cc->cdev = cdev; 388 cc->cdev = cdev;
378 389
379 /* make the device available */ 390 /* make the device available */
380 spin_lock(&cuse_lock);
381 list_add(&cc->list, cuse_conntbl_head(devt)); 391 list_add(&cc->list, cuse_conntbl_head(devt));
382 spin_unlock(&cuse_lock); 392 mutex_unlock(&cuse_lock);
383 393
384 /* announce device availability */ 394 /* announce device availability */
385 dev_set_uevent_suppress(dev, 0); 395 dev_set_uevent_suppress(dev, 0);
@@ -391,7 +401,8 @@ out:
391 401
392err_cdev: 402err_cdev:
393 cdev_del(cdev); 403 cdev_del(cdev);
394err_device: 404err_unlock:
405 mutex_unlock(&cuse_lock);
395 put_device(dev); 406 put_device(dev);
396err_region: 407err_region:
397 unregister_chrdev_region(devt, 1); 408 unregister_chrdev_region(devt, 1);
@@ -411,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc)
411 422
412 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 423 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
413 424
414 req = fuse_get_req(fc); 425 req = fuse_get_req(fc, 1);
415 if (IS_ERR(req)) { 426 if (IS_ERR(req)) {
416 rc = PTR_ERR(req); 427 rc = PTR_ERR(req);
417 goto err; 428 goto err;
@@ -441,6 +452,7 @@ static int cuse_send_init(struct cuse_conn *cc)
441 req->out.argvar = 1; 452 req->out.argvar = 1;
442 req->out.argpages = 1; 453 req->out.argpages = 1;
443 req->pages[0] = page; 454 req->pages[0] = page;
455 req->page_descs[0].length = req->out.args[1].size;
444 req->num_pages = 1; 456 req->num_pages = 1;
445 req->end = cuse_process_init_reply; 457 req->end = cuse_process_init_reply;
446 fuse_request_send_background(fc, req); 458 fuse_request_send_background(fc, req);
@@ -520,9 +532,9 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
520 int rc; 532 int rc;
521 533
522 /* remove from the conntbl, no more access from this point on */ 534 /* remove from the conntbl, no more access from this point on */
523 spin_lock(&cuse_lock); 535 mutex_lock(&cuse_lock);
524 list_del_init(&cc->list); 536 list_del_init(&cc->list);
525 spin_unlock(&cuse_lock); 537 mutex_unlock(&cuse_lock);
526 538
527 /* remove device */ 539 /* remove device */
528 if (cc->dev) 540 if (cc->dev)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c16335315e5d..e9bdec0b16d9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,34 +34,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
34 return file->private_data; 34 return file->private_data;
35} 35}
36 36
37static void fuse_request_init(struct fuse_req *req) 37static void fuse_request_init(struct fuse_req *req, struct page **pages,
38 struct fuse_page_desc *page_descs,
39 unsigned npages)
38{ 40{
39 memset(req, 0, sizeof(*req)); 41 memset(req, 0, sizeof(*req));
42 memset(pages, 0, sizeof(*pages) * npages);
43 memset(page_descs, 0, sizeof(*page_descs) * npages);
40 INIT_LIST_HEAD(&req->list); 44 INIT_LIST_HEAD(&req->list);
41 INIT_LIST_HEAD(&req->intr_entry); 45 INIT_LIST_HEAD(&req->intr_entry);
42 init_waitqueue_head(&req->waitq); 46 init_waitqueue_head(&req->waitq);
43 atomic_set(&req->count, 1); 47 atomic_set(&req->count, 1);
48 req->pages = pages;
49 req->page_descs = page_descs;
50 req->max_pages = npages;
44} 51}
45 52
46struct fuse_req *fuse_request_alloc(void) 53static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
47{ 54{
48 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); 55 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
49 if (req) 56 if (req) {
50 fuse_request_init(req); 57 struct page **pages;
58 struct fuse_page_desc *page_descs;
59
60 if (npages <= FUSE_REQ_INLINE_PAGES) {
61 pages = req->inline_pages;
62 page_descs = req->inline_page_descs;
63 } else {
64 pages = kmalloc(sizeof(struct page *) * npages, flags);
65 page_descs = kmalloc(sizeof(struct fuse_page_desc) *
66 npages, flags);
67 }
68
69 if (!pages || !page_descs) {
70 kfree(pages);
71 kfree(page_descs);
72 kmem_cache_free(fuse_req_cachep, req);
73 return NULL;
74 }
75
76 fuse_request_init(req, pages, page_descs, npages);
77 }
51 return req; 78 return req;
52} 79}
80
81struct fuse_req *fuse_request_alloc(unsigned npages)
82{
83 return __fuse_request_alloc(npages, GFP_KERNEL);
84}
53EXPORT_SYMBOL_GPL(fuse_request_alloc); 85EXPORT_SYMBOL_GPL(fuse_request_alloc);
54 86
55struct fuse_req *fuse_request_alloc_nofs(void) 87struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
56{ 88{
57 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); 89 return __fuse_request_alloc(npages, GFP_NOFS);
58 if (req)
59 fuse_request_init(req);
60 return req;
61} 90}
62 91
63void fuse_request_free(struct fuse_req *req) 92void fuse_request_free(struct fuse_req *req)
64{ 93{
94 if (req->pages != req->inline_pages) {
95 kfree(req->pages);
96 kfree(req->page_descs);
97 }
65 kmem_cache_free(fuse_req_cachep, req); 98 kmem_cache_free(fuse_req_cachep, req);
66} 99}
67 100
@@ -97,7 +130,7 @@ static void fuse_req_init_context(struct fuse_req *req)
97 req->in.h.pid = current->pid; 130 req->in.h.pid = current->pid;
98} 131}
99 132
100struct fuse_req *fuse_get_req(struct fuse_conn *fc) 133struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
101{ 134{
102 struct fuse_req *req; 135 struct fuse_req *req;
103 sigset_t oldset; 136 sigset_t oldset;
@@ -116,7 +149,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
116 if (!fc->connected) 149 if (!fc->connected)
117 goto out; 150 goto out;
118 151
119 req = fuse_request_alloc(); 152 req = fuse_request_alloc(npages);
120 err = -ENOMEM; 153 err = -ENOMEM;
121 if (!req) 154 if (!req)
122 goto out; 155 goto out;
@@ -165,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
165 struct fuse_file *ff = file->private_data; 198 struct fuse_file *ff = file->private_data;
166 199
167 spin_lock(&fc->lock); 200 spin_lock(&fc->lock);
168 fuse_request_init(req); 201 fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
169 BUG_ON(ff->reserved_req); 202 BUG_ON(ff->reserved_req);
170 ff->reserved_req = req; 203 ff->reserved_req = req;
171 wake_up_all(&fc->reserved_req_waitq); 204 wake_up_all(&fc->reserved_req_waitq);
@@ -186,13 +219,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
186 * filesystem should not have it's own file open. If deadlock is 219 * filesystem should not have it's own file open. If deadlock is
187 * intentional, it can still be broken by "aborting" the filesystem. 220 * intentional, it can still be broken by "aborting" the filesystem.
188 */ 221 */
189struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) 222struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
223 struct file *file)
190{ 224{
191 struct fuse_req *req; 225 struct fuse_req *req;
192 226
193 atomic_inc(&fc->num_waiting); 227 atomic_inc(&fc->num_waiting);
194 wait_event(fc->blocked_waitq, !fc->blocked); 228 wait_event(fc->blocked_waitq, !fc->blocked);
195 req = fuse_request_alloc(); 229 req = fuse_request_alloc(0);
196 if (!req) 230 if (!req)
197 req = get_reserved_req(fc, file); 231 req = get_reserved_req(fc, file);
198 232
@@ -406,9 +440,8 @@ __acquires(fc->lock)
406 } 440 }
407} 441}
408 442
409void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 443static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
410{ 444{
411 req->isreply = 1;
412 spin_lock(&fc->lock); 445 spin_lock(&fc->lock);
413 if (!fc->connected) 446 if (!fc->connected)
414 req->out.h.error = -ENOTCONN; 447 req->out.h.error = -ENOTCONN;
@@ -425,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
425 } 458 }
426 spin_unlock(&fc->lock); 459 spin_unlock(&fc->lock);
427} 460}
461
462void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
463{
464 req->isreply = 1;
465 __fuse_request_send(fc, req);
466}
428EXPORT_SYMBOL_GPL(fuse_request_send); 467EXPORT_SYMBOL_GPL(fuse_request_send);
429 468
430static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 469static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
@@ -491,6 +530,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
491 fuse_request_send_nowait_locked(fc, req); 530 fuse_request_send_nowait_locked(fc, req);
492} 531}
493 532
533void fuse_force_forget(struct file *file, u64 nodeid)
534{
535 struct inode *inode = file->f_path.dentry->d_inode;
536 struct fuse_conn *fc = get_fuse_conn(inode);
537 struct fuse_req *req;
538 struct fuse_forget_in inarg;
539
540 memset(&inarg, 0, sizeof(inarg));
541 inarg.nlookup = 1;
542 req = fuse_get_req_nofail_nopages(fc, file);
543 req->in.h.opcode = FUSE_FORGET;
544 req->in.h.nodeid = nodeid;
545 req->in.numargs = 1;
546 req->in.args[0].size = sizeof(inarg);
547 req->in.args[0].value = &inarg;
548 req->isreply = 0;
549 __fuse_request_send(fc, req);
550 /* ignore errors */
551 fuse_put_request(fc, req);
552}
553
494/* 554/*
495 * Lock the request. Up to the next unlock_request() there mustn't be 555 * Lock the request. Up to the next unlock_request() there mustn't be
496 * anything that could cause a page-fault. If the request was already 556 * anything that could cause a page-fault. If the request was already
@@ -692,8 +752,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
692 struct page *oldpage = *pagep; 752 struct page *oldpage = *pagep;
693 struct page *newpage; 753 struct page *newpage;
694 struct pipe_buffer *buf = cs->pipebufs; 754 struct pipe_buffer *buf = cs->pipebufs;
695 struct address_space *mapping;
696 pgoff_t index;
697 755
698 unlock_request(cs->fc, cs->req); 756 unlock_request(cs->fc, cs->req);
699 fuse_copy_finish(cs); 757 fuse_copy_finish(cs);
@@ -724,9 +782,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
724 if (fuse_check_page(newpage) != 0) 782 if (fuse_check_page(newpage) != 0)
725 goto out_fallback_unlock; 783 goto out_fallback_unlock;
726 784
727 mapping = oldpage->mapping;
728 index = oldpage->index;
729
730 /* 785 /*
731 * This is a new and locked page, it shouldn't be mapped or 786 * This is a new and locked page, it shouldn't be mapped or
732 * have any special flags on it 787 * have any special flags on it
@@ -855,11 +910,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
855{ 910{
856 unsigned i; 911 unsigned i;
857 struct fuse_req *req = cs->req; 912 struct fuse_req *req = cs->req;
858 unsigned offset = req->page_offset;
859 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
860 913
861 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 914 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
862 int err; 915 int err;
916 unsigned offset = req->page_descs[i].offset;
917 unsigned count = min(nbytes, req->page_descs[i].length);
863 918
864 err = fuse_copy_page(cs, &req->pages[i], offset, count, 919 err = fuse_copy_page(cs, &req->pages[i], offset, count,
865 zeroing); 920 zeroing);
@@ -867,8 +922,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
867 return err; 922 return err;
868 923
869 nbytes -= count; 924 nbytes -= count;
870 count = min(nbytes, (unsigned) PAGE_SIZE);
871 offset = 0;
872 } 925 }
873 return 0; 926 return 0;
874} 927}
@@ -1541,29 +1594,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1541 unsigned int num; 1594 unsigned int num;
1542 unsigned int offset; 1595 unsigned int offset;
1543 size_t total_len = 0; 1596 size_t total_len = 0;
1597 int num_pages;
1598
1599 offset = outarg->offset & ~PAGE_CACHE_MASK;
1600 file_size = i_size_read(inode);
1601
1602 num = outarg->size;
1603 if (outarg->offset > file_size)
1604 num = 0;
1605 else if (outarg->offset + num > file_size)
1606 num = file_size - outarg->offset;
1544 1607
1545 req = fuse_get_req(fc); 1608 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609 num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1610
1611 req = fuse_get_req(fc, num_pages);
1546 if (IS_ERR(req)) 1612 if (IS_ERR(req))
1547 return PTR_ERR(req); 1613 return PTR_ERR(req);
1548 1614
1549 offset = outarg->offset & ~PAGE_CACHE_MASK;
1550
1551 req->in.h.opcode = FUSE_NOTIFY_REPLY; 1615 req->in.h.opcode = FUSE_NOTIFY_REPLY;
1552 req->in.h.nodeid = outarg->nodeid; 1616 req->in.h.nodeid = outarg->nodeid;
1553 req->in.numargs = 2; 1617 req->in.numargs = 2;
1554 req->in.argpages = 1; 1618 req->in.argpages = 1;
1555 req->page_offset = offset; 1619 req->page_descs[0].offset = offset;
1556 req->end = fuse_retrieve_end; 1620 req->end = fuse_retrieve_end;
1557 1621
1558 index = outarg->offset >> PAGE_CACHE_SHIFT; 1622 index = outarg->offset >> PAGE_CACHE_SHIFT;
1559 file_size = i_size_read(inode);
1560 num = outarg->size;
1561 if (outarg->offset > file_size)
1562 num = 0;
1563 else if (outarg->offset + num > file_size)
1564 num = file_size - outarg->offset;
1565 1623
1566 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { 1624 while (num && req->num_pages < num_pages) {
1567 struct page *page; 1625 struct page *page;
1568 unsigned int this_num; 1626 unsigned int this_num;
1569 1627
@@ -1573,6 +1631,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1573 1631
1574 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); 1632 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1575 req->pages[req->num_pages] = page; 1633 req->pages[req->num_pages] = page;
1634 req->page_descs[req->num_pages].length = this_num;
1576 req->num_pages++; 1635 req->num_pages++;
1577 1636
1578 offset = 0; 1637 offset = 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..85065221a58a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,29 @@
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17static bool fuse_use_readdirplus(struct inode *dir, struct file *filp)
18{
19 struct fuse_conn *fc = get_fuse_conn(dir);
20 struct fuse_inode *fi = get_fuse_inode(dir);
21
22 if (!fc->do_readdirplus)
23 return false;
24 if (!fc->readdirplus_auto)
25 return true;
26 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 return true;
28 if (filp->f_pos == 0)
29 return true;
30 return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35 struct fuse_inode *fi = get_fuse_inode(dir);
36
37 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
17#if BITS_PER_LONG >= 64 40#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time) 41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
19{ 42{
@@ -178,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
178 return -ECHILD; 201 return -ECHILD;
179 202
180 fc = get_fuse_conn(inode); 203 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 204 req = fuse_get_req_nopages(fc);
182 if (IS_ERR(req)) 205 if (IS_ERR(req))
183 return 0; 206 return 0;
184 207
@@ -219,6 +242,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
219 attr_version); 242 attr_version);
220 fuse_change_entry_timeout(entry, &outarg); 243 fuse_change_entry_timeout(entry, &outarg);
221 } 244 }
245 fuse_advise_use_readdirplus(inode);
222 return 1; 246 return 1;
223} 247}
224 248
@@ -271,7 +295,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
271 if (name->len > FUSE_NAME_MAX) 295 if (name->len > FUSE_NAME_MAX)
272 goto out; 296 goto out;
273 297
274 req = fuse_get_req(fc); 298 req = fuse_get_req_nopages(fc);
275 err = PTR_ERR(req); 299 err = PTR_ERR(req);
276 if (IS_ERR(req)) 300 if (IS_ERR(req))
277 goto out; 301 goto out;
@@ -355,6 +379,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
355 else 379 else
356 fuse_invalidate_entry_cache(entry); 380 fuse_invalidate_entry_cache(entry);
357 381
382 fuse_advise_use_readdirplus(dir);
358 return newent; 383 return newent;
359 384
360 out_iput: 385 out_iput:
@@ -391,7 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
391 if (!forget) 416 if (!forget)
392 goto out_err; 417 goto out_err;
393 418
394 req = fuse_get_req(fc); 419 req = fuse_get_req_nopages(fc);
395 err = PTR_ERR(req); 420 err = PTR_ERR(req);
396 if (IS_ERR(req)) 421 if (IS_ERR(req))
397 goto out_put_forget_req; 422 goto out_put_forget_req;
@@ -592,7 +617,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
592{ 617{
593 struct fuse_mknod_in inarg; 618 struct fuse_mknod_in inarg;
594 struct fuse_conn *fc = get_fuse_conn(dir); 619 struct fuse_conn *fc = get_fuse_conn(dir);
595 struct fuse_req *req = fuse_get_req(fc); 620 struct fuse_req *req = fuse_get_req_nopages(fc);
596 if (IS_ERR(req)) 621 if (IS_ERR(req))
597 return PTR_ERR(req); 622 return PTR_ERR(req);
598 623
@@ -623,7 +648,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
623{ 648{
624 struct fuse_mkdir_in inarg; 649 struct fuse_mkdir_in inarg;
625 struct fuse_conn *fc = get_fuse_conn(dir); 650 struct fuse_conn *fc = get_fuse_conn(dir);
626 struct fuse_req *req = fuse_get_req(fc); 651 struct fuse_req *req = fuse_get_req_nopages(fc);
627 if (IS_ERR(req)) 652 if (IS_ERR(req))
628 return PTR_ERR(req); 653 return PTR_ERR(req);
629 654
@@ -647,7 +672,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
647{ 672{
648 struct fuse_conn *fc = get_fuse_conn(dir); 673 struct fuse_conn *fc = get_fuse_conn(dir);
649 unsigned len = strlen(link) + 1; 674 unsigned len = strlen(link) + 1;
650 struct fuse_req *req = fuse_get_req(fc); 675 struct fuse_req *req = fuse_get_req_nopages(fc);
651 if (IS_ERR(req)) 676 if (IS_ERR(req))
652 return PTR_ERR(req); 677 return PTR_ERR(req);
653 678
@@ -664,7 +689,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
664{ 689{
665 int err; 690 int err;
666 struct fuse_conn *fc = get_fuse_conn(dir); 691 struct fuse_conn *fc = get_fuse_conn(dir);
667 struct fuse_req *req = fuse_get_req(fc); 692 struct fuse_req *req = fuse_get_req_nopages(fc);
668 if (IS_ERR(req)) 693 if (IS_ERR(req))
669 return PTR_ERR(req); 694 return PTR_ERR(req);
670 695
@@ -682,7 +707,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
682 707
683 spin_lock(&fc->lock); 708 spin_lock(&fc->lock);
684 fi->attr_version = ++fc->attr_version; 709 fi->attr_version = ++fc->attr_version;
685 drop_nlink(inode); 710 /*
711 * If i_nlink == 0 then unlink doesn't make sense, yet this can
712 * happen if userspace filesystem is careless. It would be
713 * difficult to enforce correct nlink usage so just ignore this
714 * condition here
715 */
716 if (inode->i_nlink > 0)
717 drop_nlink(inode);
686 spin_unlock(&fc->lock); 718 spin_unlock(&fc->lock);
687 fuse_invalidate_attr(inode); 719 fuse_invalidate_attr(inode);
688 fuse_invalidate_attr(dir); 720 fuse_invalidate_attr(dir);
@@ -696,7 +728,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
696{ 728{
697 int err; 729 int err;
698 struct fuse_conn *fc = get_fuse_conn(dir); 730 struct fuse_conn *fc = get_fuse_conn(dir);
699 struct fuse_req *req = fuse_get_req(fc); 731 struct fuse_req *req = fuse_get_req_nopages(fc);
700 if (IS_ERR(req)) 732 if (IS_ERR(req))
701 return PTR_ERR(req); 733 return PTR_ERR(req);
702 734
@@ -723,7 +755,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
723 int err; 755 int err;
724 struct fuse_rename_in inarg; 756 struct fuse_rename_in inarg;
725 struct fuse_conn *fc = get_fuse_conn(olddir); 757 struct fuse_conn *fc = get_fuse_conn(olddir);
726 struct fuse_req *req = fuse_get_req(fc); 758 struct fuse_req *req = fuse_get_req_nopages(fc);
727 759
728 if (IS_ERR(req)) 760 if (IS_ERR(req))
729 return PTR_ERR(req); 761 return PTR_ERR(req);
@@ -776,7 +808,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
776 struct fuse_link_in inarg; 808 struct fuse_link_in inarg;
777 struct inode *inode = entry->d_inode; 809 struct inode *inode = entry->d_inode;
778 struct fuse_conn *fc = get_fuse_conn(inode); 810 struct fuse_conn *fc = get_fuse_conn(inode);
779 struct fuse_req *req = fuse_get_req(fc); 811 struct fuse_req *req = fuse_get_req_nopages(fc);
780 if (IS_ERR(req)) 812 if (IS_ERR(req))
781 return PTR_ERR(req); 813 return PTR_ERR(req);
782 814
@@ -848,7 +880,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
848 struct fuse_req *req; 880 struct fuse_req *req;
849 u64 attr_version; 881 u64 attr_version;
850 882
851 req = fuse_get_req(fc); 883 req = fuse_get_req_nopages(fc);
852 if (IS_ERR(req)) 884 if (IS_ERR(req))
853 return PTR_ERR(req); 885 return PTR_ERR(req);
854 886
@@ -985,7 +1017,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
985 1017
986/* 1018/*
987 * Calling into a user-controlled filesystem gives the filesystem 1019 * Calling into a user-controlled filesystem gives the filesystem
988 * daemon ptrace-like capabilities over the requester process. This 1020 * daemon ptrace-like capabilities over the current process. This
989 * means, that the filesystem daemon is able to record the exact 1021 * means, that the filesystem daemon is able to record the exact
990 * filesystem operations performed, and can also control the behavior 1022 * filesystem operations performed, and can also control the behavior
991 * of the requester process in otherwise impossible ways. For example 1023 * of the requester process in otherwise impossible ways. For example
@@ -996,27 +1028,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
996 * for which the owner of the mount has ptrace privilege. This 1028 * for which the owner of the mount has ptrace privilege. This
997 * excludes processes started by other users, suid or sgid processes. 1029 * excludes processes started by other users, suid or sgid processes.
998 */ 1030 */
999int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) 1031int fuse_allow_current_process(struct fuse_conn *fc)
1000{ 1032{
1001 const struct cred *cred; 1033 const struct cred *cred;
1002 int ret;
1003 1034
1004 if (fc->flags & FUSE_ALLOW_OTHER) 1035 if (fc->flags & FUSE_ALLOW_OTHER)
1005 return 1; 1036 return 1;
1006 1037
1007 rcu_read_lock(); 1038 cred = current_cred();
1008 ret = 0;
1009 cred = __task_cred(task);
1010 if (uid_eq(cred->euid, fc->user_id) && 1039 if (uid_eq(cred->euid, fc->user_id) &&
1011 uid_eq(cred->suid, fc->user_id) && 1040 uid_eq(cred->suid, fc->user_id) &&
1012 uid_eq(cred->uid, fc->user_id) && 1041 uid_eq(cred->uid, fc->user_id) &&
1013 gid_eq(cred->egid, fc->group_id) && 1042 gid_eq(cred->egid, fc->group_id) &&
1014 gid_eq(cred->sgid, fc->group_id) && 1043 gid_eq(cred->sgid, fc->group_id) &&
1015 gid_eq(cred->gid, fc->group_id)) 1044 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1045 return 1;
1017 rcu_read_unlock();
1018 1046
1019 return ret; 1047 return 0;
1020} 1048}
1021 1049
1022static int fuse_access(struct inode *inode, int mask) 1050static int fuse_access(struct inode *inode, int mask)
@@ -1029,7 +1057,7 @@ static int fuse_access(struct inode *inode, int mask)
1029 if (fc->no_access) 1057 if (fc->no_access)
1030 return 0; 1058 return 0;
1031 1059
1032 req = fuse_get_req(fc); 1060 req = fuse_get_req_nopages(fc);
1033 if (IS_ERR(req)) 1061 if (IS_ERR(req))
1034 return PTR_ERR(req); 1062 return PTR_ERR(req);
1035 1063
@@ -1077,7 +1105,7 @@ static int fuse_permission(struct inode *inode, int mask)
1077 bool refreshed = false; 1105 bool refreshed = false;
1078 int err = 0; 1106 int err = 0;
1079 1107
1080 if (!fuse_allow_task(fc, current)) 1108 if (!fuse_allow_current_process(fc))
1081 return -EACCES; 1109 return -EACCES;
1082 1110
1083 /* 1111 /*
@@ -1155,19 +1183,157 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1155 return 0; 1183 return 0;
1156} 1184}
1157 1185
1158static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) 1186static int fuse_direntplus_link(struct file *file,
1187 struct fuse_direntplus *direntplus,
1188 u64 attr_version)
1159{ 1189{
1160 int err; 1190 int err;
1191 struct fuse_entry_out *o = &direntplus->entry_out;
1192 struct fuse_dirent *dirent = &direntplus->dirent;
1193 struct dentry *parent = file->f_path.dentry;
1194 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1195 struct dentry *dentry;
1196 struct dentry *alias;
1197 struct inode *dir = parent->d_inode;
1198 struct fuse_conn *fc;
1199 struct inode *inode;
1200
1201 if (!o->nodeid) {
1202 /*
1203 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1204 * ENOENT. Instead, it only means the userspace filesystem did
1205 * not want to return attributes/handle for this entry.
1206 *
1207 * So do nothing.
1208 */
1209 return 0;
1210 }
1211
1212 if (name.name[0] == '.') {
1213 /*
1214 * We could potentially refresh the attributes of the directory
1215 * and its parent?
1216 */
1217 if (name.len == 1)
1218 return 0;
1219 if (name.name[1] == '.' && name.len == 2)
1220 return 0;
1221 }
1222 fc = get_fuse_conn(dir);
1223
1224 name.hash = full_name_hash(name.name, name.len);
1225 dentry = d_lookup(parent, &name);
1226 if (dentry && dentry->d_inode) {
1227 inode = dentry->d_inode;
1228 if (get_node_id(inode) == o->nodeid) {
1229 struct fuse_inode *fi;
1230 fi = get_fuse_inode(inode);
1231 spin_lock(&fc->lock);
1232 fi->nlookup++;
1233 spin_unlock(&fc->lock);
1234
1235 /*
1236 * The other branch to 'found' comes via fuse_iget()
1237 * which bumps nlookup inside
1238 */
1239 goto found;
1240 }
1241 err = d_invalidate(dentry);
1242 if (err)
1243 goto out;
1244 dput(dentry);
1245 dentry = NULL;
1246 }
1247
1248 dentry = d_alloc(parent, &name);
1249 err = -ENOMEM;
1250 if (!dentry)
1251 goto out;
1252
1253 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1254 &o->attr, entry_attr_timeout(o), attr_version);
1255 if (!inode)
1256 goto out;
1257
1258 alias = d_materialise_unique(dentry, inode);
1259 err = PTR_ERR(alias);
1260 if (IS_ERR(alias))
1261 goto out;
1262 if (alias) {
1263 dput(dentry);
1264 dentry = alias;
1265 }
1266
1267found:
1268 fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
1269 attr_version);
1270
1271 fuse_change_entry_timeout(dentry, o);
1272
1273 err = 0;
1274out:
1275 if (dentry)
1276 dput(dentry);
1277 return err;
1278}
1279
1280static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1281 void *dstbuf, filldir_t filldir, u64 attr_version)
1282{
1283 struct fuse_direntplus *direntplus;
1284 struct fuse_dirent *dirent;
1285 size_t reclen;
1286 int over = 0;
1287 int ret;
1288
1289 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1290 direntplus = (struct fuse_direntplus *) buf;
1291 dirent = &direntplus->dirent;
1292 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1293
1294 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1295 return -EIO;
1296 if (reclen > nbytes)
1297 break;
1298
1299 if (!over) {
1300 /* We fill entries into dstbuf only as much as
1301 it can hold. But we still continue iterating
1302 over remaining entries to link them. If not,
1303 we need to send a FORGET for each of those
1304 which we did not link.
1305 */
1306 over = filldir(dstbuf, dirent->name, dirent->namelen,
1307 file->f_pos, dirent->ino,
1308 dirent->type);
1309 file->f_pos = dirent->off;
1310 }
1311
1312 buf += reclen;
1313 nbytes -= reclen;
1314
1315 ret = fuse_direntplus_link(file, direntplus, attr_version);
1316 if (ret)
1317 fuse_force_forget(file, direntplus->entry_out.nodeid);
1318 }
1319
1320 return 0;
1321}
1322
1323static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1324{
1325 int plus, err;
1161 size_t nbytes; 1326 size_t nbytes;
1162 struct page *page; 1327 struct page *page;
1163 struct inode *inode = file->f_path.dentry->d_inode; 1328 struct inode *inode = file->f_path.dentry->d_inode;
1164 struct fuse_conn *fc = get_fuse_conn(inode); 1329 struct fuse_conn *fc = get_fuse_conn(inode);
1165 struct fuse_req *req; 1330 struct fuse_req *req;
1331 u64 attr_version = 0;
1166 1332
1167 if (is_bad_inode(inode)) 1333 if (is_bad_inode(inode))
1168 return -EIO; 1334 return -EIO;
1169 1335
1170 req = fuse_get_req(fc); 1336 req = fuse_get_req(fc, 1);
1171 if (IS_ERR(req)) 1337 if (IS_ERR(req))
1172 return PTR_ERR(req); 1338 return PTR_ERR(req);
1173 1339
@@ -1176,17 +1342,34 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1176 fuse_put_request(fc, req); 1342 fuse_put_request(fc, req);
1177 return -ENOMEM; 1343 return -ENOMEM;
1178 } 1344 }
1345
1346 plus = fuse_use_readdirplus(inode, file);
1179 req->out.argpages = 1; 1347 req->out.argpages = 1;
1180 req->num_pages = 1; 1348 req->num_pages = 1;
1181 req->pages[0] = page; 1349 req->pages[0] = page;
1182 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); 1350 req->page_descs[0].length = PAGE_SIZE;
1351 if (plus) {
1352 attr_version = fuse_get_attr_version(fc);
1353 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1354 FUSE_READDIRPLUS);
1355 } else {
1356 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1357 FUSE_READDIR);
1358 }
1183 fuse_request_send(fc, req); 1359 fuse_request_send(fc, req);
1184 nbytes = req->out.args[0].size; 1360 nbytes = req->out.args[0].size;
1185 err = req->out.h.error; 1361 err = req->out.h.error;
1186 fuse_put_request(fc, req); 1362 fuse_put_request(fc, req);
1187 if (!err) 1363 if (!err) {
1188 err = parse_dirfile(page_address(page), nbytes, file, dstbuf, 1364 if (plus) {
1189 filldir); 1365 err = parse_dirplusfile(page_address(page), nbytes,
1366 file, dstbuf, filldir,
1367 attr_version);
1368 } else {
1369 err = parse_dirfile(page_address(page), nbytes, file,
1370 dstbuf, filldir);
1371 }
1372 }
1190 1373
1191 __free_page(page); 1374 __free_page(page);
1192 fuse_invalidate_attr(inode); /* atime changed */ 1375 fuse_invalidate_attr(inode); /* atime changed */
@@ -1197,7 +1380,7 @@ static char *read_link(struct dentry *dentry)
1197{ 1380{
1198 struct inode *inode = dentry->d_inode; 1381 struct inode *inode = dentry->d_inode;
1199 struct fuse_conn *fc = get_fuse_conn(inode); 1382 struct fuse_conn *fc = get_fuse_conn(inode);
1200 struct fuse_req *req = fuse_get_req(fc); 1383 struct fuse_req *req = fuse_get_req_nopages(fc);
1201 char *link; 1384 char *link;
1202 1385
1203 if (IS_ERR(req)) 1386 if (IS_ERR(req))
@@ -1391,7 +1574,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1391 loff_t oldsize; 1574 loff_t oldsize;
1392 int err; 1575 int err;
1393 1576
1394 if (!fuse_allow_task(fc, current)) 1577 if (!fuse_allow_current_process(fc))
1395 return -EACCES; 1578 return -EACCES;
1396 1579
1397 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 1580 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
@@ -1410,7 +1593,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1410 if (attr->ia_valid & ATTR_SIZE) 1593 if (attr->ia_valid & ATTR_SIZE)
1411 is_truncate = true; 1594 is_truncate = true;
1412 1595
1413 req = fuse_get_req(fc); 1596 req = fuse_get_req_nopages(fc);
1414 if (IS_ERR(req)) 1597 if (IS_ERR(req))
1415 return PTR_ERR(req); 1598 return PTR_ERR(req);
1416 1599
@@ -1500,7 +1683,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1500 struct inode *inode = entry->d_inode; 1683 struct inode *inode = entry->d_inode;
1501 struct fuse_conn *fc = get_fuse_conn(inode); 1684 struct fuse_conn *fc = get_fuse_conn(inode);
1502 1685
1503 if (!fuse_allow_task(fc, current)) 1686 if (!fuse_allow_current_process(fc))
1504 return -EACCES; 1687 return -EACCES;
1505 1688
1506 return fuse_update_attributes(inode, stat, NULL, NULL); 1689 return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1518,7 +1701,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1518 if (fc->no_setxattr) 1701 if (fc->no_setxattr)
1519 return -EOPNOTSUPP; 1702 return -EOPNOTSUPP;
1520 1703
1521 req = fuse_get_req(fc); 1704 req = fuse_get_req_nopages(fc);
1522 if (IS_ERR(req)) 1705 if (IS_ERR(req))
1523 return PTR_ERR(req); 1706 return PTR_ERR(req);
1524 1707
@@ -1557,7 +1740,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1557 if (fc->no_getxattr) 1740 if (fc->no_getxattr)
1558 return -EOPNOTSUPP; 1741 return -EOPNOTSUPP;
1559 1742
1560 req = fuse_get_req(fc); 1743 req = fuse_get_req_nopages(fc);
1561 if (IS_ERR(req)) 1744 if (IS_ERR(req))
1562 return PTR_ERR(req); 1745 return PTR_ERR(req);
1563 1746
@@ -1603,13 +1786,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1603 struct fuse_getxattr_out outarg; 1786 struct fuse_getxattr_out outarg;
1604 ssize_t ret; 1787 ssize_t ret;
1605 1788
1606 if (!fuse_allow_task(fc, current)) 1789 if (!fuse_allow_current_process(fc))
1607 return -EACCES; 1790 return -EACCES;
1608 1791
1609 if (fc->no_listxattr) 1792 if (fc->no_listxattr)
1610 return -EOPNOTSUPP; 1793 return -EOPNOTSUPP;
1611 1794
1612 req = fuse_get_req(fc); 1795 req = fuse_get_req_nopages(fc);
1613 if (IS_ERR(req)) 1796 if (IS_ERR(req))
1614 return PTR_ERR(req); 1797 return PTR_ERR(req);
1615 1798
@@ -1654,7 +1837,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1654 if (fc->no_removexattr) 1837 if (fc->no_removexattr)
1655 return -EOPNOTSUPP; 1838 return -EOPNOTSUPP;
1656 1839
1657 req = fuse_get_req(fc); 1840 req = fuse_get_req_nopages(fc);
1658 if (IS_ERR(req)) 1841 if (IS_ERR(req))
1659 return PTR_ERR(req); 1842 return PTR_ERR(req);
1660 1843
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e21d4d8f87e3..c8071768b950 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
25 struct fuse_req *req; 25 struct fuse_req *req;
26 int err; 26 int err;
27 27
28 req = fuse_get_req(fc); 28 req = fuse_get_req_nopages(fc);
29 if (IS_ERR(req)) 29 if (IS_ERR(req))
30 return PTR_ERR(req); 30 return PTR_ERR(req);
31 31
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
57 return NULL; 57 return NULL;
58 58
59 ff->fc = fc; 59 ff->fc = fc;
60 ff->reserved_req = fuse_request_alloc(); 60 ff->reserved_req = fuse_request_alloc(0);
61 if (unlikely(!ff->reserved_req)) { 61 if (unlikely(!ff->reserved_req)) {
62 kfree(ff); 62 kfree(ff);
63 return NULL; 63 return NULL;
@@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
368 if (fc->no_flush) 368 if (fc->no_flush)
369 return 0; 369 return 0;
370 370
371 req = fuse_get_req_nofail(fc, file); 371 req = fuse_get_req_nofail_nopages(fc, file);
372 memset(&inarg, 0, sizeof(inarg)); 372 memset(&inarg, 0, sizeof(inarg));
373 inarg.fh = ff->fh; 373 inarg.fh = ff->fh;
374 inarg.lock_owner = fuse_lock_owner_id(fc, id); 374 inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
436 436
437 fuse_sync_writes(inode); 437 fuse_sync_writes(inode);
438 438
439 req = fuse_get_req(fc); 439 req = fuse_get_req_nopages(fc);
440 if (IS_ERR(req)) { 440 if (IS_ERR(req)) {
441 err = PTR_ERR(req); 441 err = PTR_ERR(req);
442 goto out; 442 goto out;
@@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page)
544 */ 544 */
545 fuse_wait_on_page_writeback(inode, page->index); 545 fuse_wait_on_page_writeback(inode, page->index);
546 546
547 req = fuse_get_req(fc); 547 req = fuse_get_req(fc, 1);
548 err = PTR_ERR(req); 548 err = PTR_ERR(req);
549 if (IS_ERR(req)) 549 if (IS_ERR(req))
550 goto out; 550 goto out;
@@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page)
555 req->out.argpages = 1; 555 req->out.argpages = 1;
556 req->num_pages = 1; 556 req->num_pages = 1;
557 req->pages[0] = page; 557 req->pages[0] = page;
558 req->page_descs[0].length = count;
558 num_read = fuse_send_read(req, file, pos, count, NULL); 559 num_read = fuse_send_read(req, file, pos, count, NULL);
559 err = req->out.h.error; 560 err = req->out.h.error;
560 fuse_put_request(fc, req); 561 fuse_put_request(fc, req);
@@ -641,6 +642,7 @@ struct fuse_fill_data {
641 struct fuse_req *req; 642 struct fuse_req *req;
642 struct file *file; 643 struct file *file;
643 struct inode *inode; 644 struct inode *inode;
645 unsigned nr_pages;
644}; 646};
645 647
646static int fuse_readpages_fill(void *_data, struct page *page) 648static int fuse_readpages_fill(void *_data, struct page *page)
@@ -656,16 +658,26 @@ static int fuse_readpages_fill(void *_data, struct page *page)
656 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 658 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
657 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 659 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
658 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 660 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
661 int nr_alloc = min_t(unsigned, data->nr_pages,
662 FUSE_MAX_PAGES_PER_REQ);
659 fuse_send_readpages(req, data->file); 663 fuse_send_readpages(req, data->file);
660 data->req = req = fuse_get_req(fc); 664 data->req = req = fuse_get_req(fc, nr_alloc);
661 if (IS_ERR(req)) { 665 if (IS_ERR(req)) {
662 unlock_page(page); 666 unlock_page(page);
663 return PTR_ERR(req); 667 return PTR_ERR(req);
664 } 668 }
665 } 669 }
670
671 if (WARN_ON(req->num_pages >= req->max_pages)) {
672 fuse_put_request(fc, req);
673 return -EIO;
674 }
675
666 page_cache_get(page); 676 page_cache_get(page);
667 req->pages[req->num_pages] = page; 677 req->pages[req->num_pages] = page;
678 req->page_descs[req->num_pages].length = PAGE_SIZE;
668 req->num_pages++; 679 req->num_pages++;
680 data->nr_pages--;
669 return 0; 681 return 0;
670} 682}
671 683
@@ -676,6 +688,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
676 struct fuse_conn *fc = get_fuse_conn(inode); 688 struct fuse_conn *fc = get_fuse_conn(inode);
677 struct fuse_fill_data data; 689 struct fuse_fill_data data;
678 int err; 690 int err;
691 int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
679 692
680 err = -EIO; 693 err = -EIO;
681 if (is_bad_inode(inode)) 694 if (is_bad_inode(inode))
@@ -683,7 +696,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
683 696
684 data.file = file; 697 data.file = file;
685 data.inode = inode; 698 data.inode = inode;
686 data.req = fuse_get_req(fc); 699 data.req = fuse_get_req(fc, nr_alloc);
700 data.nr_pages = nr_pages;
687 err = PTR_ERR(data.req); 701 err = PTR_ERR(data.req);
688 if (IS_ERR(data.req)) 702 if (IS_ERR(data.req))
689 goto out; 703 goto out;
@@ -786,7 +800,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
786 800
787 res = fuse_send_write(req, file, pos, count, NULL); 801 res = fuse_send_write(req, file, pos, count, NULL);
788 802
789 offset = req->page_offset; 803 offset = req->page_descs[0].offset;
790 count = res; 804 count = res;
791 for (i = 0; i < req->num_pages; i++) { 805 for (i = 0; i < req->num_pages; i++) {
792 struct page *page = req->pages[i]; 806 struct page *page = req->pages[i];
@@ -817,7 +831,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
817 int err; 831 int err;
818 832
819 req->in.argpages = 1; 833 req->in.argpages = 1;
820 req->page_offset = offset; 834 req->page_descs[0].offset = offset;
821 835
822 do { 836 do {
823 size_t tmp; 837 size_t tmp;
@@ -857,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
857 871
858 err = 0; 872 err = 0;
859 req->pages[req->num_pages] = page; 873 req->pages[req->num_pages] = page;
874 req->page_descs[req->num_pages].length = tmp;
860 req->num_pages++; 875 req->num_pages++;
861 876
862 iov_iter_advance(ii, tmp); 877 iov_iter_advance(ii, tmp);
@@ -869,11 +884,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
869 if (!fc->big_writes) 884 if (!fc->big_writes)
870 break; 885 break;
871 } while (iov_iter_count(ii) && count < fc->max_write && 886 } while (iov_iter_count(ii) && count < fc->max_write &&
872 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); 887 req->num_pages < req->max_pages && offset == 0);
873 888
874 return count > 0 ? count : err; 889 return count > 0 ? count : err;
875} 890}
876 891
892static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
893{
894 return min_t(unsigned,
895 ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
896 (pos >> PAGE_CACHE_SHIFT) + 1,
897 FUSE_MAX_PAGES_PER_REQ);
898}
899
877static ssize_t fuse_perform_write(struct file *file, 900static ssize_t fuse_perform_write(struct file *file,
878 struct address_space *mapping, 901 struct address_space *mapping,
879 struct iov_iter *ii, loff_t pos) 902 struct iov_iter *ii, loff_t pos)
@@ -889,8 +912,9 @@ static ssize_t fuse_perform_write(struct file *file,
889 do { 912 do {
890 struct fuse_req *req; 913 struct fuse_req *req;
891 ssize_t count; 914 ssize_t count;
915 unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
892 916
893 req = fuse_get_req(fc); 917 req = fuse_get_req(fc, nr_pages);
894 if (IS_ERR(req)) { 918 if (IS_ERR(req)) {
895 err = PTR_ERR(req); 919 err = PTR_ERR(req);
896 break; 920 break;
@@ -1023,47 +1047,110 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
1023 } 1047 }
1024} 1048}
1025 1049
1026static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, 1050static inline void fuse_page_descs_length_init(struct fuse_req *req,
1051 unsigned index, unsigned nr_pages)
1052{
1053 int i;
1054
1055 for (i = index; i < index + nr_pages; i++)
1056 req->page_descs[i].length = PAGE_SIZE -
1057 req->page_descs[i].offset;
1058}
1059
1060static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
1061{
1062 return (unsigned long)ii->iov->iov_base + ii->iov_offset;
1063}
1064
1065static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
1066 size_t max_size)
1067{
1068 return min(iov_iter_single_seg_count(ii), max_size);
1069}
1070
1071static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1027 size_t *nbytesp, int write) 1072 size_t *nbytesp, int write)
1028{ 1073{
1029 size_t nbytes = *nbytesp; 1074 size_t nbytes = 0; /* # bytes already packed in req */
1030 unsigned long user_addr = (unsigned long) buf;
1031 unsigned offset = user_addr & ~PAGE_MASK;
1032 int npages;
1033 1075
1034 /* Special case for kernel I/O: can copy directly into the buffer */ 1076 /* Special case for kernel I/O: can copy directly into the buffer */
1035 if (segment_eq(get_fs(), KERNEL_DS)) { 1077 if (segment_eq(get_fs(), KERNEL_DS)) {
1078 unsigned long user_addr = fuse_get_user_addr(ii);
1079 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
1080
1036 if (write) 1081 if (write)
1037 req->in.args[1].value = (void *) user_addr; 1082 req->in.args[1].value = (void *) user_addr;
1038 else 1083 else
1039 req->out.args[0].value = (void *) user_addr; 1084 req->out.args[0].value = (void *) user_addr;
1040 1085
1086 iov_iter_advance(ii, frag_size);
1087 *nbytesp = frag_size;
1041 return 0; 1088 return 0;
1042 } 1089 }
1043 1090
1044 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 1091 while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1045 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1092 unsigned npages;
1046 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); 1093 unsigned long user_addr = fuse_get_user_addr(ii);
1047 npages = get_user_pages_fast(user_addr, npages, !write, req->pages); 1094 unsigned offset = user_addr & ~PAGE_MASK;
1048 if (npages < 0) 1095 size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
1049 return npages; 1096 int ret;
1097
1098 unsigned n = req->max_pages - req->num_pages;
1099 frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
1100
1101 npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1102 npages = clamp(npages, 1U, n);
1103
1104 ret = get_user_pages_fast(user_addr, npages, !write,
1105 &req->pages[req->num_pages]);
1106 if (ret < 0)
1107 return ret;
1050 1108
1051 req->num_pages = npages; 1109 npages = ret;
1052 req->page_offset = offset; 1110 frag_size = min_t(size_t, frag_size,
1111 (npages << PAGE_SHIFT) - offset);
1112 iov_iter_advance(ii, frag_size);
1113
1114 req->page_descs[req->num_pages].offset = offset;
1115 fuse_page_descs_length_init(req, req->num_pages, npages);
1116
1117 req->num_pages += npages;
1118 req->page_descs[req->num_pages - 1].length -=
1119 (npages << PAGE_SHIFT) - offset - frag_size;
1120
1121 nbytes += frag_size;
1122 }
1053 1123
1054 if (write) 1124 if (write)
1055 req->in.argpages = 1; 1125 req->in.argpages = 1;
1056 else 1126 else
1057 req->out.argpages = 1; 1127 req->out.argpages = 1;
1058 1128
1059 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 1129 *nbytesp = nbytes;
1060 *nbytesp = min(*nbytesp, nbytes);
1061 1130
1062 return 0; 1131 return 0;
1063} 1132}
1064 1133
1065ssize_t fuse_direct_io(struct file *file, const char __user *buf, 1134static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1066 size_t count, loff_t *ppos, int write) 1135{
1136 struct iov_iter ii = *ii_p;
1137 int npages = 0;
1138
1139 while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
1140 unsigned long user_addr = fuse_get_user_addr(&ii);
1141 unsigned offset = user_addr & ~PAGE_MASK;
1142 size_t frag_size = iov_iter_single_seg_count(&ii);
1143
1144 npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1145 iov_iter_advance(&ii, frag_size);
1146 }
1147
1148 return min(npages, FUSE_MAX_PAGES_PER_REQ);
1149}
1150
1151ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
1152 unsigned long nr_segs, size_t count, loff_t *ppos,
1153 int write)
1067{ 1154{
1068 struct fuse_file *ff = file->private_data; 1155 struct fuse_file *ff = file->private_data;
1069 struct fuse_conn *fc = ff->fc; 1156 struct fuse_conn *fc = ff->fc;
@@ -1071,8 +1158,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1071 loff_t pos = *ppos; 1158 loff_t pos = *ppos;
1072 ssize_t res = 0; 1159 ssize_t res = 0;
1073 struct fuse_req *req; 1160 struct fuse_req *req;
1161 struct iov_iter ii;
1162
1163 iov_iter_init(&ii, iov, nr_segs, count, 0);
1074 1164
1075 req = fuse_get_req(fc); 1165 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1076 if (IS_ERR(req)) 1166 if (IS_ERR(req))
1077 return PTR_ERR(req); 1167 return PTR_ERR(req);
1078 1168
@@ -1080,7 +1170,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1080 size_t nres; 1170 size_t nres;
1081 fl_owner_t owner = current->files; 1171 fl_owner_t owner = current->files;
1082 size_t nbytes = min(count, nmax); 1172 size_t nbytes = min(count, nmax);
1083 int err = fuse_get_user_pages(req, buf, &nbytes, write); 1173 int err = fuse_get_user_pages(req, &ii, &nbytes, write);
1084 if (err) { 1174 if (err) {
1085 res = err; 1175 res = err;
1086 break; 1176 break;
@@ -1103,12 +1193,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1103 count -= nres; 1193 count -= nres;
1104 res += nres; 1194 res += nres;
1105 pos += nres; 1195 pos += nres;
1106 buf += nres;
1107 if (nres != nbytes) 1196 if (nres != nbytes)
1108 break; 1197 break;
1109 if (count) { 1198 if (count) {
1110 fuse_put_request(fc, req); 1199 fuse_put_request(fc, req);
1111 req = fuse_get_req(fc); 1200 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1112 if (IS_ERR(req)) 1201 if (IS_ERR(req))
1113 break; 1202 break;
1114 } 1203 }
@@ -1122,8 +1211,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1122} 1211}
1123EXPORT_SYMBOL_GPL(fuse_direct_io); 1212EXPORT_SYMBOL_GPL(fuse_direct_io);
1124 1213
1125static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1214static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov,
1126 size_t count, loff_t *ppos) 1215 unsigned long nr_segs, loff_t *ppos)
1127{ 1216{
1128 ssize_t res; 1217 ssize_t res;
1129 struct inode *inode = file->f_path.dentry->d_inode; 1218 struct inode *inode = file->f_path.dentry->d_inode;
@@ -1131,22 +1220,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1131 if (is_bad_inode(inode)) 1220 if (is_bad_inode(inode))
1132 return -EIO; 1221 return -EIO;
1133 1222
1134 res = fuse_direct_io(file, buf, count, ppos, 0); 1223 res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs),
1224 ppos, 0);
1135 1225
1136 fuse_invalidate_attr(inode); 1226 fuse_invalidate_attr(inode);
1137 1227
1138 return res; 1228 return res;
1139} 1229}
1140 1230
1141static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, 1231static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1142 size_t count, loff_t *ppos) 1232 size_t count, loff_t *ppos)
1233{
1234 struct iovec iov = { .iov_base = buf, .iov_len = count };
1235 return __fuse_direct_read(file, &iov, 1, ppos);
1236}
1237
1238static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov,
1239 unsigned long nr_segs, loff_t *ppos)
1143{ 1240{
1144 struct inode *inode = file->f_path.dentry->d_inode; 1241 struct inode *inode = file->f_path.dentry->d_inode;
1242 size_t count = iov_length(iov, nr_segs);
1145 ssize_t res; 1243 ssize_t res;
1146 1244
1147 res = generic_write_checks(file, ppos, &count, 0); 1245 res = generic_write_checks(file, ppos, &count, 0);
1148 if (!res) { 1246 if (!res) {
1149 res = fuse_direct_io(file, buf, count, ppos, 1); 1247 res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1);
1150 if (res > 0) 1248 if (res > 0)
1151 fuse_write_update_size(inode, *ppos); 1249 fuse_write_update_size(inode, *ppos);
1152 } 1250 }
@@ -1159,6 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
1159static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1257static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1160 size_t count, loff_t *ppos) 1258 size_t count, loff_t *ppos)
1161{ 1259{
1260 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
1162 struct inode *inode = file->f_path.dentry->d_inode; 1261 struct inode *inode = file->f_path.dentry->d_inode;
1163 ssize_t res; 1262 ssize_t res;
1164 1263
@@ -1167,7 +1266,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1167 1266
1168 /* Don't allow parallel writes to the same file */ 1267 /* Don't allow parallel writes to the same file */
1169 mutex_lock(&inode->i_mutex); 1268 mutex_lock(&inode->i_mutex);
1170 res = __fuse_direct_write(file, buf, count, ppos); 1269 res = __fuse_direct_write(file, &iov, 1, ppos);
1171 mutex_unlock(&inode->i_mutex); 1270 mutex_unlock(&inode->i_mutex);
1172 1271
1173 return res; 1272 return res;
@@ -1272,7 +1371,7 @@ static int fuse_writepage_locked(struct page *page)
1272 1371
1273 set_page_writeback(page); 1372 set_page_writeback(page);
1274 1373
1275 req = fuse_request_alloc_nofs(); 1374 req = fuse_request_alloc_nofs(1);
1276 if (!req) 1375 if (!req)
1277 goto err; 1376 goto err;
1278 1377
@@ -1293,7 +1392,8 @@ static int fuse_writepage_locked(struct page *page)
1293 req->in.argpages = 1; 1392 req->in.argpages = 1;
1294 req->num_pages = 1; 1393 req->num_pages = 1;
1295 req->pages[0] = tmp_page; 1394 req->pages[0] = tmp_page;
1296 req->page_offset = 0; 1395 req->page_descs[0].offset = 0;
1396 req->page_descs[0].length = PAGE_SIZE;
1297 req->end = fuse_writepage_end; 1397 req->end = fuse_writepage_end;
1298 req->inode = inode; 1398 req->inode = inode;
1299 1399
@@ -1471,7 +1571,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
1471 struct fuse_lk_out outarg; 1571 struct fuse_lk_out outarg;
1472 int err; 1572 int err;
1473 1573
1474 req = fuse_get_req(fc); 1574 req = fuse_get_req_nopages(fc);
1475 if (IS_ERR(req)) 1575 if (IS_ERR(req))
1476 return PTR_ERR(req); 1576 return PTR_ERR(req);
1477 1577
@@ -1506,7 +1606,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1506 if (fl->fl_flags & FL_CLOSE) 1606 if (fl->fl_flags & FL_CLOSE)
1507 return 0; 1607 return 0;
1508 1608
1509 req = fuse_get_req(fc); 1609 req = fuse_get_req_nopages(fc);
1510 if (IS_ERR(req)) 1610 if (IS_ERR(req))
1511 return PTR_ERR(req); 1611 return PTR_ERR(req);
1512 1612
@@ -1575,7 +1675,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
1575 if (!inode->i_sb->s_bdev || fc->no_bmap) 1675 if (!inode->i_sb->s_bdev || fc->no_bmap)
1576 return 0; 1676 return 0;
1577 1677
1578 req = fuse_get_req(fc); 1678 req = fuse_get_req_nopages(fc);
1579 if (IS_ERR(req)) 1679 if (IS_ERR(req))
1580 return 0; 1680 return 0;
1581 1681
@@ -1873,7 +1973,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1873 num_pages++; 1973 num_pages++;
1874 } 1974 }
1875 1975
1876 req = fuse_get_req(fc); 1976 req = fuse_get_req(fc, num_pages);
1877 if (IS_ERR(req)) { 1977 if (IS_ERR(req)) {
1878 err = PTR_ERR(req); 1978 err = PTR_ERR(req);
1879 req = NULL; 1979 req = NULL;
@@ -1881,6 +1981,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1881 } 1981 }
1882 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); 1982 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1883 req->num_pages = num_pages; 1983 req->num_pages = num_pages;
1984 fuse_page_descs_length_init(req, 0, req->num_pages);
1884 1985
1885 /* okay, let's send it to the client */ 1986 /* okay, let's send it to the client */
1886 req->in.h.opcode = FUSE_IOCTL; 1987 req->in.h.opcode = FUSE_IOCTL;
@@ -1981,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
1981 struct inode *inode = file->f_dentry->d_inode; 2082 struct inode *inode = file->f_dentry->d_inode;
1982 struct fuse_conn *fc = get_fuse_conn(inode); 2083 struct fuse_conn *fc = get_fuse_conn(inode);
1983 2084
1984 if (!fuse_allow_task(fc, current)) 2085 if (!fuse_allow_current_process(fc))
1985 return -EACCES; 2086 return -EACCES;
1986 2087
1987 if (is_bad_inode(inode)) 2088 if (is_bad_inode(inode))
@@ -2066,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2066 return DEFAULT_POLLMASK; 2167 return DEFAULT_POLLMASK;
2067 2168
2068 poll_wait(file, &ff->poll_wait, wait); 2169 poll_wait(file, &ff->poll_wait, wait);
2170 inarg.events = (__u32)poll_requested_events(wait);
2069 2171
2070 /* 2172 /*
2071 * Ask for notification iff there's someone waiting for it. 2173 * Ask for notification iff there's someone waiting for it.
@@ -2076,7 +2178,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2076 fuse_register_polled_file(fc, ff); 2178 fuse_register_polled_file(fc, ff);
2077 } 2179 }
2078 2180
2079 req = fuse_get_req(fc); 2181 req = fuse_get_req_nopages(fc);
2080 if (IS_ERR(req)) 2182 if (IS_ERR(req))
2081 return POLLERR; 2183 return POLLERR;
2082 2184
@@ -2126,41 +2228,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2126 return 0; 2228 return 0;
2127} 2229}
2128 2230
2129static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
2130 unsigned long nr_segs, loff_t *ppos, int rw)
2131{
2132 const struct iovec *vector = iov;
2133 ssize_t ret = 0;
2134
2135 while (nr_segs > 0) {
2136 void __user *base;
2137 size_t len;
2138 ssize_t nr;
2139
2140 base = vector->iov_base;
2141 len = vector->iov_len;
2142 vector++;
2143 nr_segs--;
2144
2145 if (rw == WRITE)
2146 nr = __fuse_direct_write(filp, base, len, ppos);
2147 else
2148 nr = fuse_direct_read(filp, base, len, ppos);
2149
2150 if (nr < 0) {
2151 if (!ret)
2152 ret = nr;
2153 break;
2154 }
2155 ret += nr;
2156 if (nr != len)
2157 break;
2158 }
2159
2160 return ret;
2161}
2162
2163
2164static ssize_t 2231static ssize_t
2165fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2232fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2166 loff_t offset, unsigned long nr_segs) 2233 loff_t offset, unsigned long nr_segs)
@@ -2172,13 +2239,16 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2172 file = iocb->ki_filp; 2239 file = iocb->ki_filp;
2173 pos = offset; 2240 pos = offset;
2174 2241
2175 ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); 2242 if (rw == WRITE)
2243 ret = __fuse_direct_write(file, iov, nr_segs, &pos);
2244 else
2245 ret = __fuse_direct_read(file, iov, nr_segs, &pos);
2176 2246
2177 return ret; 2247 return ret;
2178} 2248}
2179 2249
2180long fuse_file_fallocate(struct file *file, int mode, loff_t offset, 2250static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2181 loff_t length) 2251 loff_t length)
2182{ 2252{
2183 struct fuse_file *ff = file->private_data; 2253 struct fuse_file *ff = file->private_data;
2184 struct fuse_conn *fc = ff->fc; 2254 struct fuse_conn *fc = ff->fc;
@@ -2194,7 +2264,7 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2194 if (fc->no_fallocate) 2264 if (fc->no_fallocate)
2195 return -EOPNOTSUPP; 2265 return -EOPNOTSUPP;
2196 2266
2197 req = fuse_get_req(fc); 2267 req = fuse_get_req_nopages(fc);
2198 if (IS_ERR(req)) 2268 if (IS_ERR(req))
2199 return PTR_ERR(req); 2269 return PTR_ERR(req);
2200 2270
@@ -2213,7 +2283,6 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2213 2283
2214 return err; 2284 return err;
2215} 2285}
2216EXPORT_SYMBOL_GPL(fuse_file_fallocate);
2217 2286
2218static const struct file_operations fuse_file_operations = { 2287static const struct file_operations fuse_file_operations = {
2219 .llseek = fuse_file_llseek, 2288 .llseek = fuse_file_llseek,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e105a53fc72d..6aeba864f070 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,9 @@
44 doing the mount will be allowed to access the filesystem */ 44 doing the mount will be allowed to access the filesystem */
45#define FUSE_ALLOW_OTHER (1 << 1) 45#define FUSE_ALLOW_OTHER (1 << 1)
46 46
47/** Number of page pointers embedded in fuse_req */
48#define FUSE_REQ_INLINE_PAGES 1
49
47/** List of active connections */ 50/** List of active connections */
48extern struct list_head fuse_conn_list; 51extern struct list_head fuse_conn_list;
49 52
@@ -103,6 +106,15 @@ struct fuse_inode {
103 106
104 /** List of writepage requestst (pending or sent) */ 107 /** List of writepage requestst (pending or sent) */
105 struct list_head writepages; 108 struct list_head writepages;
109
110 /** Miscellaneous bits describing inode state */
111 unsigned long state;
112};
113
114/** FUSE inode state bits */
115enum {
116 /** Advise readdirplus */
117 FUSE_I_ADVISE_RDPLUS,
106}; 118};
107 119
108struct fuse_conn; 120struct fuse_conn;
@@ -200,6 +212,12 @@ struct fuse_out {
200 struct fuse_arg args[3]; 212 struct fuse_arg args[3];
201}; 213};
202 214
215/** FUSE page descriptor */
216struct fuse_page_desc {
217 unsigned int length;
218 unsigned int offset;
219};
220
203/** The request state */ 221/** The request state */
204enum fuse_req_state { 222enum fuse_req_state {
205 FUSE_REQ_INIT = 0, 223 FUSE_REQ_INIT = 0,
@@ -291,14 +309,23 @@ struct fuse_req {
291 } misc; 309 } misc;
292 310
293 /** page vector */ 311 /** page vector */
294 struct page *pages[FUSE_MAX_PAGES_PER_REQ]; 312 struct page **pages;
313
314 /** page-descriptor vector */
315 struct fuse_page_desc *page_descs;
316
317 /** size of the 'pages' array */
318 unsigned max_pages;
319
320 /** inline page vector */
321 struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
322
323 /** inline page-descriptor vector */
324 struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
295 325
296 /** number of pages in vector */ 326 /** number of pages in vector */
297 unsigned num_pages; 327 unsigned num_pages;
298 328
299 /** offset of data on first page */
300 unsigned page_offset;
301
302 /** File used in the request (or NULL) */ 329 /** File used in the request (or NULL) */
303 struct fuse_file *ff; 330 struct fuse_file *ff;
304 331
@@ -487,6 +514,12 @@ struct fuse_conn {
487 /** Use enhanced/automatic page cache invalidation. */ 514 /** Use enhanced/automatic page cache invalidation. */
488 unsigned auto_inval_data:1; 515 unsigned auto_inval_data:1;
489 516
517 /** Does the filesystem support readdirplus? */
518 unsigned do_readdirplus:1;
519
520 /** Does the filesystem want adaptive readdirplus? */
521 unsigned readdirplus_auto:1;
522
490 /** The number of requests waiting for completion */ 523 /** The number of requests waiting for completion */
491 atomic_t num_waiting; 524 atomic_t num_waiting;
492 525
@@ -578,6 +611,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
578 611
579struct fuse_forget_link *fuse_alloc_forget(void); 612struct fuse_forget_link *fuse_alloc_forget(void);
580 613
614/* Used by READDIRPLUS */
615void fuse_force_forget(struct file *file, u64 nodeid);
616
581/** 617/**
582 * Initialize READ or READDIR request 618 * Initialize READ or READDIR request
583 */ 619 */
@@ -658,9 +694,9 @@ void fuse_ctl_cleanup(void);
658/** 694/**
659 * Allocate a request 695 * Allocate a request
660 */ 696 */
661struct fuse_req *fuse_request_alloc(void); 697struct fuse_req *fuse_request_alloc(unsigned npages);
662 698
663struct fuse_req *fuse_request_alloc_nofs(void); 699struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
664 700
665/** 701/**
666 * Free a request 702 * Free a request
@@ -668,14 +704,25 @@ struct fuse_req *fuse_request_alloc_nofs(void);
668void fuse_request_free(struct fuse_req *req); 704void fuse_request_free(struct fuse_req *req);
669 705
670/** 706/**
671 * Get a request, may fail with -ENOMEM 707 * Get a request, may fail with -ENOMEM,
708 * caller should specify # elements in req->pages[] explicitly
672 */ 709 */
673struct fuse_req *fuse_get_req(struct fuse_conn *fc); 710struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
711
712/**
713 * Get a request, may fail with -ENOMEM,
714 * useful for callers who doesn't use req->pages[]
715 */
716static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
717{
718 return fuse_get_req(fc, 0);
719}
674 720
675/** 721/**
676 * Gets a requests for a file operation, always succeeds 722 * Gets a requests for a file operation, always succeeds
677 */ 723 */
678struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); 724struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
725 struct file *file);
679 726
680/** 727/**
681 * Decrement reference count of a request. If count goes to zero free 728 * Decrement reference count of a request. If count goes to zero free
@@ -739,9 +786,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
739int fuse_valid_type(int m); 786int fuse_valid_type(int m);
740 787
741/** 788/**
742 * Is task allowed to perform filesystem operation? 789 * Is current process allowed to perform filesystem operation?
743 */ 790 */
744int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); 791int fuse_allow_current_process(struct fuse_conn *fc);
745 792
746u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); 793u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
747 794
@@ -776,8 +823,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
776 823
777int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 824int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
778 bool isdir); 825 bool isdir);
779ssize_t fuse_direct_io(struct file *file, const char __user *buf, 826ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
780 size_t count, loff_t *ppos, int write); 827 unsigned long nr_segs, size_t count, loff_t *ppos,
828 int write);
781long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 829long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
782 unsigned int flags); 830 unsigned int flags);
783long fuse_ioctl_common(struct file *file, unsigned int cmd, 831long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b72beaf..01353ed75750 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0; 94 fi->orig_ino = 0;
95 fi->state = 0;
95 INIT_LIST_HEAD(&fi->write_files); 96 INIT_LIST_HEAD(&fi->write_files);
96 INIT_LIST_HEAD(&fi->queued_writes); 97 INIT_LIST_HEAD(&fi->queued_writes);
97 INIT_LIST_HEAD(&fi->writepages); 98 INIT_LIST_HEAD(&fi->writepages);
@@ -408,12 +409,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
408 struct fuse_statfs_out outarg; 409 struct fuse_statfs_out outarg;
409 int err; 410 int err;
410 411
411 if (!fuse_allow_task(fc, current)) { 412 if (!fuse_allow_current_process(fc)) {
412 buf->f_type = FUSE_SUPER_MAGIC; 413 buf->f_type = FUSE_SUPER_MAGIC;
413 return 0; 414 return 0;
414 } 415 }
415 416
416 req = fuse_get_req(fc); 417 req = fuse_get_req_nopages(fc);
417 if (IS_ERR(req)) 418 if (IS_ERR(req))
418 return PTR_ERR(req); 419 return PTR_ERR(req);
419 420
@@ -863,6 +864,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
863 fc->dont_mask = 1; 864 fc->dont_mask = 1;
864 if (arg->flags & FUSE_AUTO_INVAL_DATA) 865 if (arg->flags & FUSE_AUTO_INVAL_DATA)
865 fc->auto_inval_data = 1; 866 fc->auto_inval_data = 1;
867 if (arg->flags & FUSE_DO_READDIRPLUS)
868 fc->do_readdirplus = 1;
869 if (arg->flags & FUSE_READDIRPLUS_AUTO)
870 fc->readdirplus_auto = 1;
866 } else { 871 } else {
867 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 872 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
868 fc->no_lock = 1; 873 fc->no_lock = 1;
@@ -889,7 +894,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
889 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 894 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
890 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 895 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
891 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 896 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
892 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; 897 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
898 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
893 req->in.h.opcode = FUSE_INIT; 899 req->in.h.opcode = FUSE_INIT;
894 req->in.numargs = 1; 900 req->in.numargs = 1;
895 req->in.args[0].size = sizeof(*arg); 901 req->in.args[0].size = sizeof(*arg);
@@ -1034,12 +1040,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1034 /* only now - we want root dentry with NULL ->d_op */ 1040 /* only now - we want root dentry with NULL ->d_op */
1035 sb->s_d_op = &fuse_dentry_operations; 1041 sb->s_d_op = &fuse_dentry_operations;
1036 1042
1037 init_req = fuse_request_alloc(); 1043 init_req = fuse_request_alloc(0);
1038 if (!init_req) 1044 if (!init_req)
1039 goto err_put_root; 1045 goto err_put_root;
1040 1046
1041 if (is_bdev) { 1047 if (is_bdev) {
1042 fc->destroy_req = fuse_request_alloc(); 1048 fc->destroy_req = fuse_request_alloc(0);
1043 if (!fc->destroy_req) 1049 if (!fc->destroy_req)
1044 goto err_free_init_req; 1050 goto err_free_init_req;
1045 } 1051 }
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 30de4f2a2ea9..24f414f0ce61 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -51,7 +51,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
51 continue; 51 continue;
52 if (gfs2_is_jdata(ip)) 52 if (gfs2_is_jdata(ip))
53 set_buffer_uptodate(bh); 53 set_buffer_uptodate(bh);
54 gfs2_trans_add_bh(ip->i_gl, bh, 0); 54 gfs2_trans_add_data(ip->i_gl, bh);
55 } 55 }
56} 56}
57 57
@@ -230,16 +230,14 @@ out_ignore:
230} 230}
231 231
232/** 232/**
233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk 233 * gfs2_writepages - Write a bunch of dirty pages back to disk
234 * @mapping: The mapping to write 234 * @mapping: The mapping to write
235 * @wbc: Write-back control 235 * @wbc: Write-back control
236 * 236 *
237 * For the data=writeback case we can already ignore buffer heads 237 * Used for both ordered and writeback modes.
238 * and write whole extents at once. This is a big reduction in the
239 * number of I/O requests we send and the bmap calls we make in this case.
240 */ 238 */
241static int gfs2_writeback_writepages(struct address_space *mapping, 239static int gfs2_writepages(struct address_space *mapping,
242 struct writeback_control *wbc) 240 struct writeback_control *wbc)
243{ 241{
244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 242 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
245} 243}
@@ -852,7 +850,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
852 goto failed; 850 goto failed;
853 } 851 }
854 852
855 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 853 gfs2_trans_add_meta(ip->i_gl, dibh);
856 854
857 if (gfs2_is_stuffed(ip)) 855 if (gfs2_is_stuffed(ip))
858 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); 856 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
@@ -1102,7 +1100,7 @@ cannot_release:
1102 1100
1103static const struct address_space_operations gfs2_writeback_aops = { 1101static const struct address_space_operations gfs2_writeback_aops = {
1104 .writepage = gfs2_writeback_writepage, 1102 .writepage = gfs2_writeback_writepage,
1105 .writepages = gfs2_writeback_writepages, 1103 .writepages = gfs2_writepages,
1106 .readpage = gfs2_readpage, 1104 .readpage = gfs2_readpage,
1107 .readpages = gfs2_readpages, 1105 .readpages = gfs2_readpages,
1108 .write_begin = gfs2_write_begin, 1106 .write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1118 1116
1119static const struct address_space_operations gfs2_ordered_aops = { 1117static const struct address_space_operations gfs2_ordered_aops = {
1120 .writepage = gfs2_ordered_writepage, 1118 .writepage = gfs2_ordered_writepage,
1119 .writepages = gfs2_writepages,
1121 .readpage = gfs2_readpage, 1120 .readpage = gfs2_readpage,
1122 .readpages = gfs2_readpages, 1121 .readpages = gfs2_readpages,
1123 .write_begin = gfs2_write_begin, 1122 .write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 10c54e3c2e72..5e83657f046e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
22#include "meta_io.h" 22#include "meta_io.h"
23#include "quota.h" 23#include "quota.h"
24#include "rgrp.h" 24#include "rgrp.h"
25#include "log.h"
25#include "super.h" 26#include "super.h"
26#include "trans.h" 27#include "trans.h"
27#include "dir.h" 28#include "dir.h"
@@ -93,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
93 if (!gfs2_is_jdata(ip)) 94 if (!gfs2_is_jdata(ip))
94 mark_buffer_dirty(bh); 95 mark_buffer_dirty(bh);
95 if (!gfs2_is_writeback(ip)) 96 if (!gfs2_is_writeback(ip))
96 gfs2_trans_add_bh(ip->i_gl, bh, 0); 97 gfs2_trans_add_data(ip->i_gl, bh);
97 98
98 if (release) { 99 if (release) {
99 unlock_page(page); 100 unlock_page(page);
@@ -153,7 +154,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
153 154
154 /* Set up the pointer to the new block */ 155 /* Set up the pointer to the new block */
155 156
156 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 157 gfs2_trans_add_meta(ip->i_gl, dibh);
157 di = (struct gfs2_dinode *)dibh->b_data; 158 di = (struct gfs2_dinode *)dibh->b_data;
158 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
159 160
@@ -405,7 +406,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp,
405 BUG_ON(i < 1); 406 BUG_ON(i < 1);
406 BUG_ON(mp->mp_bh[i] != NULL); 407 BUG_ON(mp->mp_bh[i] != NULL);
407 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 408 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
408 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); 409 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
409 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 410 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
410 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 411 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
411 ptr += offset; 412 ptr += offset;
@@ -468,7 +469,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
468 BUG_ON(sheight < 1); 469 BUG_ON(sheight < 1);
469 BUG_ON(dibh == NULL); 470 BUG_ON(dibh == NULL);
470 471
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 472 gfs2_trans_add_meta(ip->i_gl, dibh);
472 473
473 if (height == sheight) { 474 if (height == sheight) {
474 struct buffer_head *bh; 475 struct buffer_head *bh;
@@ -544,7 +545,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
544 /* Branching from existing tree */ 545 /* Branching from existing tree */
545 case ALLOC_GROW_DEPTH: 546 case ALLOC_GROW_DEPTH:
546 if (i > 1 && i < height) 547 if (i > 1 && i < height)
547 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); 548 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
548 for (; i < height && n > 0; i++, n--) 549 for (; i < height && n > 0; i++, n--)
549 gfs2_indirect_init(mp, ip->i_gl, i, 550 gfs2_indirect_init(mp, ip->i_gl, i,
550 mp->mp_list[i-1], bn++); 551 mp->mp_list[i-1], bn++);
@@ -556,7 +557,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
556 case ALLOC_DATA: 557 case ALLOC_DATA:
557 BUG_ON(n > dblks); 558 BUG_ON(n > dblks);
558 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 559 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
559 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); 560 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
560 dblks = n; 561 dblks = n;
561 ptr = metapointer(end_of_metadata, mp); 562 ptr = metapointer(end_of_metadata, mp);
562 dblock = bn; 563 dblock = bn;
@@ -796,8 +797,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
796 797
797 down_write(&ip->i_rw_mutex); 798 down_write(&ip->i_rw_mutex);
798 799
799 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 800 gfs2_trans_add_meta(ip->i_gl, dibh);
800 gfs2_trans_add_bh(ip->i_gl, bh, 1); 801 gfs2_trans_add_meta(ip->i_gl, bh);
801 802
802 bstart = 0; 803 bstart = 0;
803 blen = 0; 804 blen = 0;
@@ -981,7 +982,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
981 } 982 }
982 983
983 if (!gfs2_is_writeback(ip)) 984 if (!gfs2_is_writeback(ip))
984 gfs2_trans_add_bh(ip->i_gl, bh, 0); 985 gfs2_trans_add_data(ip->i_gl, bh);
985 986
986 zero_user(page, offset, length); 987 zero_user(page, offset, length);
987 mark_buffer_dirty(bh); 988 mark_buffer_dirty(bh);
@@ -1046,7 +1047,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1046 if (error) 1047 if (error)
1047 goto out; 1048 goto out;
1048 1049
1049 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1050 gfs2_trans_add_meta(ip->i_gl, dibh);
1050 1051
1051 if (gfs2_is_stuffed(ip)) { 1052 if (gfs2_is_stuffed(ip)) {
1052 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1053 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
@@ -1137,11 +1138,12 @@ static int trunc_end(struct gfs2_inode *ip)
1137 ip->i_height = 0; 1138 ip->i_height = 0;
1138 ip->i_goal = ip->i_no_addr; 1139 ip->i_goal = ip->i_no_addr;
1139 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1140 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1141 gfs2_ordered_del_inode(ip);
1140 } 1142 }
1141 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1142 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1144 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1143 1145
1144 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1146 gfs2_trans_add_meta(ip->i_gl, dibh);
1145 gfs2_dinode_out(ip, dibh->b_data); 1147 gfs2_dinode_out(ip, dibh->b_data);
1146 brelse(dibh); 1148 brelse(dibh);
1147 1149
@@ -1246,7 +1248,7 @@ static int do_grow(struct inode *inode, u64 size)
1246 1248
1247 i_size_write(inode, size); 1249 i_size_write(inode, size);
1248 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1250 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1249 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1251 gfs2_trans_add_meta(ip->i_gl, dibh);
1250 gfs2_dinode_out(ip, dibh->b_data); 1252 gfs2_dinode_out(ip, dibh->b_data);
1251 brelse(dibh); 1253 brelse(dibh);
1252 1254
@@ -1286,6 +1288,10 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1286 1288
1287 inode_dio_wait(inode); 1289 inode_dio_wait(inode);
1288 1290
1291 ret = gfs2_rs_alloc(GFS2_I(inode));
1292 if (ret)
1293 return ret;
1294
1289 oldsize = inode->i_size; 1295 oldsize = inode->i_size;
1290 if (newsize >= oldsize) 1296 if (newsize >= oldsize)
1291 return do_grow(inode, newsize); 1297 return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 6d3e3e2cabf8..c3e82bd23179 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -93,7 +93,7 @@ int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
93 struct buffer_head *bh; 93 struct buffer_head *bh;
94 94
95 bh = gfs2_meta_new(ip->i_gl, block); 95 bh = gfs2_meta_new(ip->i_gl, block);
96 gfs2_trans_add_bh(ip->i_gl, bh, 1); 96 gfs2_trans_add_meta(ip->i_gl, bh);
97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99 *bhp = bh; 99 *bhp = bh;
@@ -127,7 +127,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
127 if (error) 127 if (error)
128 return error; 128 return error;
129 129
130 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 130 gfs2_trans_add_meta(ip->i_gl, dibh);
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_inode.i_size < offset + size) 132 if (ip->i_inode.i_size < offset + size)
133 i_size_write(&ip->i_inode, offset + size); 133 i_size_write(&ip->i_inode, offset + size);
@@ -209,7 +209,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
209 if (error) 209 if (error)
210 goto fail; 210 goto fail;
211 211
212 gfs2_trans_add_bh(ip->i_gl, bh, 1); 212 gfs2_trans_add_meta(ip->i_gl, bh);
213 memcpy(bh->b_data + o, buf, amount); 213 memcpy(bh->b_data + o, buf, amount);
214 brelse(bh); 214 brelse(bh);
215 215
@@ -231,7 +231,7 @@ out:
231 i_size_write(&ip->i_inode, offset + copied); 231 i_size_write(&ip->i_inode, offset + copied);
232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
233 233
234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 234 gfs2_trans_add_meta(ip->i_gl, dibh);
235 gfs2_dinode_out(ip, dibh->b_data); 235 gfs2_dinode_out(ip, dibh->b_data);
236 brelse(dibh); 236 brelse(dibh);
237 237
@@ -647,7 +647,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
647 return; 647 return;
648 } 648 }
649 649
650 gfs2_trans_add_bh(dip->i_gl, bh, 1); 650 gfs2_trans_add_meta(dip->i_gl, bh);
651 651
652 /* If there is no prev entry, this is the first entry in the block. 652 /* If there is no prev entry, this is the first entry in the block.
653 The de_rec_len is already as big as it needs to be. Just zero 653 The de_rec_len is already as big as it needs to be. Just zero
@@ -690,7 +690,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
691 totlen = be16_to_cpu(dent->de_rec_len); 691 totlen = be16_to_cpu(dent->de_rec_len);
692 BUG_ON(offset + name->len > totlen); 692 BUG_ON(offset + name->len > totlen);
693 gfs2_trans_add_bh(ip->i_gl, bh, 1); 693 gfs2_trans_add_meta(ip->i_gl, bh);
694 ndent = (struct gfs2_dirent *)((char *)dent + offset); 694 ndent = (struct gfs2_dirent *)((char *)dent + offset);
695 dent->de_rec_len = cpu_to_be16(offset); 695 dent->de_rec_len = cpu_to_be16(offset);
696 gfs2_qstr2dirent(name, totlen - offset, ndent); 696 gfs2_qstr2dirent(name, totlen - offset, ndent);
@@ -831,7 +831,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
831 return NULL; 831 return NULL;
832 832
833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
834 gfs2_trans_add_bh(ip->i_gl, bh, 1); 834 gfs2_trans_add_meta(ip->i_gl, bh);
835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
836 leaf = (struct gfs2_leaf *)bh->b_data; 836 leaf = (struct gfs2_leaf *)bh->b_data;
837 leaf->lf_depth = cpu_to_be16(depth); 837 leaf->lf_depth = cpu_to_be16(depth);
@@ -916,7 +916,7 @@ static int dir_make_exhash(struct inode *inode)
916 /* We're done with the new leaf block, now setup the new 916 /* We're done with the new leaf block, now setup the new
917 hash table. */ 917 hash table. */
918 918
919 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 919 gfs2_trans_add_meta(dip->i_gl, dibh);
920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
921 921
922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
976 return 1; /* can't split */ 976 return 1; /* can't split */
977 } 977 }
978 978
979 gfs2_trans_add_bh(dip->i_gl, obh, 1); 979 gfs2_trans_add_meta(dip->i_gl, obh);
980 980
981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
982 if (!nleaf) { 982 if (!nleaf) {
@@ -1069,7 +1069,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1069 1069
1070 error = gfs2_meta_inode_buffer(dip, &dibh); 1070 error = gfs2_meta_inode_buffer(dip, &dibh);
1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1072 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1072 gfs2_trans_add_meta(dip->i_gl, dibh);
1073 gfs2_add_inode_blocks(&dip->i_inode, 1); 1073 gfs2_add_inode_blocks(&dip->i_inode, 1);
1074 gfs2_dinode_out(dip, dibh->b_data); 1074 gfs2_dinode_out(dip, dibh->b_data);
1075 brelse(dibh); 1075 brelse(dibh);
@@ -1622,7 +1622,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1622 return error; 1622 return error;
1623 } while(1); 1623 } while(1);
1624 1624
1625 gfs2_trans_add_bh(ip->i_gl, obh, 1); 1625 gfs2_trans_add_meta(ip->i_gl, obh);
1626 1626
1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1628 if (!leaf) { 1628 if (!leaf) {
@@ -1636,7 +1636,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1636 error = gfs2_meta_inode_buffer(ip, &bh); 1636 error = gfs2_meta_inode_buffer(ip, &bh);
1637 if (error) 1637 if (error)
1638 return error; 1638 return error;
1639 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1639 gfs2_trans_add_meta(ip->i_gl, bh);
1640 gfs2_add_inode_blocks(&ip->i_inode, 1); 1640 gfs2_add_inode_blocks(&ip->i_inode, 1);
1641 gfs2_dinode_out(ip, bh->b_data); 1641 gfs2_dinode_out(ip, bh->b_data);
1642 brelse(bh); 1642 brelse(bh);
@@ -1795,7 +1795,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1795 if (IS_ERR(dent)) 1795 if (IS_ERR(dent))
1796 return PTR_ERR(dent); 1796 return PTR_ERR(dent);
1797 1797
1798 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1798 gfs2_trans_add_meta(dip->i_gl, bh);
1799 gfs2_inum_out(nip, dent); 1799 gfs2_inum_out(nip, dent);
1800 dent->de_type = cpu_to_be16(new_type); 1800 dent->de_type = cpu_to_be16(new_type);
1801 1801
@@ -1804,7 +1804,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1804 error = gfs2_meta_inode_buffer(dip, &bh); 1804 error = gfs2_meta_inode_buffer(dip, &bh);
1805 if (error) 1805 if (error)
1806 return error; 1806 return error;
1807 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1807 gfs2_trans_add_meta(dip->i_gl, bh);
1808 } 1808 }
1809 1809
1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
@@ -1917,7 +1917,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1917 if (error) 1917 if (error)
1918 goto out_end_trans; 1918 goto out_end_trans;
1919 1919
1920 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1920 gfs2_trans_add_meta(dip->i_gl, dibh);
1921 /* On the last dealloc, make this a regular file in case we crash. 1921 /* On the last dealloc, make this a regular file in case we crash.
1922 (We don't want to free these blocks a second time.) */ 1922 (We don't want to free these blocks a second time.) */
1923 if (last_dealloc) 1923 if (last_dealloc)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 991ab2d484dd..2687f50d98cb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -276,7 +276,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
276 error = gfs2_meta_inode_buffer(ip, &bh); 276 error = gfs2_meta_inode_buffer(ip, &bh);
277 if (error) 277 if (error)
278 goto out_trans_end; 278 goto out_trans_end;
279 gfs2_trans_add_bh(ip->i_gl, bh, 1); 279 gfs2_trans_add_meta(ip->i_gl, bh);
280 ip->i_diskflags = new_flags; 280 ip->i_diskflags = new_flags;
281 gfs2_dinode_out(ip, bh->b_data); 281 gfs2_dinode_out(ip, bh->b_data);
282 brelse(bh); 282 brelse(bh);
@@ -483,7 +483,7 @@ out:
483 gfs2_holder_uninit(&gh); 483 gfs2_holder_uninit(&gh);
484 if (ret == 0) { 484 if (ret == 0) {
485 set_page_dirty(page); 485 set_page_dirty(page);
486 wait_on_page_writeback(page); 486 wait_for_stable_page(page);
487 } 487 }
488 sb_end_pagefault(inode->i_sb); 488 sb_end_pagefault(inode->i_sb);
489 return block_page_mkwrite_return(ret); 489 return block_page_mkwrite_return(ret);
@@ -709,7 +709,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
709 if (unlikely(error)) 709 if (unlikely(error))
710 return error; 710 return error;
711 711
712 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 712 gfs2_trans_add_meta(ip->i_gl, dibh);
713 713
714 if (gfs2_is_stuffed(ip)) { 714 if (gfs2_is_stuffed(ip)) {
715 error = gfs2_unstuff_dinode(ip, NULL); 715 error = gfs2_unstuff_dinode(ip, NULL);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 992c5c0cb504..cf3515546739 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -30,6 +30,7 @@
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h> 32#include <linux/percpu.h>
33#include <linux/list_sort.h>
33 34
34#include "gfs2.h" 35#include "gfs2.h"
35#include "incore.h" 36#include "incore.h"
@@ -1376,56 +1377,105 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1376 gfs2_glock_put(gl); 1377 gfs2_glock_put(gl);
1377} 1378}
1378 1379
1380static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1381{
1382 struct gfs2_glock *gla, *glb;
1379 1383
1380static int gfs2_shrink_glock_memory(struct shrinker *shrink, 1384 gla = list_entry(a, struct gfs2_glock, gl_lru);
1381 struct shrink_control *sc) 1385 glb = list_entry(b, struct gfs2_glock, gl_lru);
1386
1387 if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1388 return 1;
1389 if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1390 return -1;
1391
1392 return 0;
1393}
1394
1395/**
1396 * gfs2_dispose_glock_lru - Demote a list of glocks
1397 * @list: The list to dispose of
1398 *
1399 * Disposing of glocks may involve disk accesses, so that here we sort
1400 * the glocks by number (i.e. disk location of the inodes) so that if
1401 * there are any such accesses, they'll be sent in order (mostly).
1402 *
1403 * Must be called under the lru_lock, but may drop and retake this
1404 * lock. While the lru_lock is dropped, entries may vanish from the
1405 * list, but no new entries will appear on the list (since it is
1406 * private)
1407 */
1408
1409static void gfs2_dispose_glock_lru(struct list_head *list)
1410__releases(&lru_lock)
1411__acquires(&lru_lock)
1382{ 1412{
1383 struct gfs2_glock *gl; 1413 struct gfs2_glock *gl;
1384 int may_demote;
1385 int nr_skipped = 0;
1386 int nr = sc->nr_to_scan;
1387 gfp_t gfp_mask = sc->gfp_mask;
1388 LIST_HEAD(skipped);
1389 1414
1390 if (nr == 0) 1415 list_sort(NULL, list, glock_cmp);
1391 goto out;
1392 1416
1393 if (!(gfp_mask & __GFP_FS)) 1417 while(!list_empty(list)) {
1394 return -1; 1418 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1419 list_del_init(&gl->gl_lru);
1420 clear_bit(GLF_LRU, &gl->gl_flags);
1421 gfs2_glock_hold(gl);
1422 spin_unlock(&lru_lock);
1423 spin_lock(&gl->gl_spin);
1424 if (demote_ok(gl))
1425 handle_callback(gl, LM_ST_UNLOCKED, 0);
1426 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1427 smp_mb__after_clear_bit();
1428 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1429 gfs2_glock_put_nolock(gl);
1430 spin_unlock(&gl->gl_spin);
1431 spin_lock(&lru_lock);
1432 }
1433}
1434
1435/**
1436 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1437 * @nr: The number of entries to scan
1438 *
1439 * This function selects the entries on the LRU which are able to
1440 * be demoted, and then kicks off the process by calling
1441 * gfs2_dispose_glock_lru() above.
1442 */
1443
1444static void gfs2_scan_glock_lru(int nr)
1445{
1446 struct gfs2_glock *gl;
1447 LIST_HEAD(skipped);
1448 LIST_HEAD(dispose);
1395 1449
1396 spin_lock(&lru_lock); 1450 spin_lock(&lru_lock);
1397 while(nr && !list_empty(&lru_list)) { 1451 while(nr && !list_empty(&lru_list)) {
1398 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1452 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1399 list_del_init(&gl->gl_lru);
1400 clear_bit(GLF_LRU, &gl->gl_flags);
1401 atomic_dec(&lru_count);
1402 1453
1403 /* Test for being demotable */ 1454 /* Test for being demotable */
1404 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1455 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1405 gfs2_glock_hold(gl); 1456 list_move(&gl->gl_lru, &dispose);
1406 spin_unlock(&lru_lock); 1457 atomic_dec(&lru_count);
1407 spin_lock(&gl->gl_spin); 1458 nr--;
1408 may_demote = demote_ok(gl);
1409 if (may_demote) {
1410 handle_callback(gl, LM_ST_UNLOCKED, 0);
1411 nr--;
1412 }
1413 clear_bit(GLF_LOCK, &gl->gl_flags);
1414 smp_mb__after_clear_bit();
1415 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1416 gfs2_glock_put_nolock(gl);
1417 spin_unlock(&gl->gl_spin);
1418 spin_lock(&lru_lock);
1419 continue; 1459 continue;
1420 } 1460 }
1421 nr_skipped++; 1461
1422 list_add(&gl->gl_lru, &skipped); 1462 list_move(&gl->gl_lru, &skipped);
1423 set_bit(GLF_LRU, &gl->gl_flags);
1424 } 1463 }
1425 list_splice(&skipped, &lru_list); 1464 list_splice(&skipped, &lru_list);
1426 atomic_add(nr_skipped, &lru_count); 1465 if (!list_empty(&dispose))
1466 gfs2_dispose_glock_lru(&dispose);
1427 spin_unlock(&lru_lock); 1467 spin_unlock(&lru_lock);
1428out: 1468}
1469
1470static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1471 struct shrink_control *sc)
1472{
1473 if (sc->nr_to_scan) {
1474 if (!(sc->gfp_mask & __GFP_FS))
1475 return -1;
1476 gfs2_scan_glock_lru(sc->nr_to_scan);
1477 }
1478
1429 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; 1479 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
1430} 1480}
1431 1481
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 5b298bdab90c..156e42ec84ea 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -52,7 +52,6 @@ struct gfs2_log_header_host {
52 */ 52 */
53 53
54struct gfs2_log_operations { 54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
56 void (*lo_before_commit) (struct gfs2_sbd *sdp); 55 void (*lo_before_commit) (struct gfs2_sbd *sdp);
57 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); 56 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
58 void (*lo_before_scan) (struct gfs2_jdesc *jd, 57 void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -341,6 +340,7 @@ enum {
341 GIF_QD_LOCKED = 1, 340 GIF_QD_LOCKED = 1,
342 GIF_ALLOC_FAILED = 2, 341 GIF_ALLOC_FAILED = 2,
343 GIF_SW_PAGED = 3, 342 GIF_SW_PAGED = 3,
343 GIF_ORDERED = 4,
344}; 344};
345 345
346struct gfs2_inode { 346struct gfs2_inode {
@@ -357,6 +357,7 @@ struct gfs2_inode {
357 struct gfs2_rgrpd *i_rgd; 357 struct gfs2_rgrpd *i_rgd;
358 u64 i_goal; /* goal block for allocations */ 358 u64 i_goal; /* goal block for allocations */
359 struct rw_semaphore i_rw_mutex; 359 struct rw_semaphore i_rw_mutex;
360 struct list_head i_ordered;
360 struct list_head i_trunc_list; 361 struct list_head i_trunc_list;
361 __be64 *i_hash_cache; 362 __be64 *i_hash_cache;
362 u32 i_entries; 363 u32 i_entries;
@@ -640,6 +641,7 @@ struct gfs2_sbd {
640 wait_queue_head_t sd_glock_wait; 641 wait_queue_head_t sd_glock_wait;
641 atomic_t sd_glock_disposal; 642 atomic_t sd_glock_disposal;
642 struct completion sd_locking_init; 643 struct completion sd_locking_init;
644 struct completion sd_wdack;
643 struct delayed_work sd_control_work; 645 struct delayed_work sd_control_work;
644 646
645 /* Inode Stuff */ 647 /* Inode Stuff */
@@ -722,6 +724,7 @@ struct gfs2_sbd {
722 struct list_head sd_log_le_revoke; 724 struct list_head sd_log_le_revoke;
723 struct list_head sd_log_le_databuf; 725 struct list_head sd_log_le_databuf;
724 struct list_head sd_log_le_ordered; 726 struct list_head sd_log_le_ordered;
727 spinlock_t sd_ordered_lock;
725 728
726 atomic_t sd_log_thresh1; 729 atomic_t sd_log_thresh1;
727 atomic_t sd_log_thresh2; 730 atomic_t sd_log_thresh2;
@@ -757,10 +760,7 @@ struct gfs2_sbd {
757 unsigned int sd_replayed_blocks; 760 unsigned int sd_replayed_blocks;
758 761
759 /* For quiescing the filesystem */ 762 /* For quiescing the filesystem */
760
761 struct gfs2_holder sd_freeze_gh; 763 struct gfs2_holder sd_freeze_gh;
762 struct mutex sd_freeze_lock;
763 unsigned int sd_freeze_count;
764 764
765 char sd_fsname[GFS2_FSNAME_LEN]; 765 char sd_fsname[GFS2_FSNAME_LEN];
766 char sd_table_name[GFS2_FSNAME_LEN]; 766 char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b862d114e155..cc00bd1d1f87 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
448 struct timespec tv = CURRENT_TIME; 448 struct timespec tv = CURRENT_TIME;
449 449
450 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 450 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
451 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 451 gfs2_trans_add_meta(ip->i_gl, dibh);
452 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 452 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
453 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 453 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
454 di = (struct gfs2_dinode *)dibh->b_data; 454 di = (struct gfs2_dinode *)dibh->b_data;
@@ -585,7 +585,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
585 if (error) 585 if (error)
586 goto fail_end_trans; 586 goto fail_end_trans;
587 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); 587 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
588 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 588 gfs2_trans_add_meta(ip->i_gl, dibh);
589 gfs2_dinode_out(ip, dibh->b_data); 589 gfs2_dinode_out(ip, dibh->b_data);
590 brelse(dibh); 590 brelse(dibh);
591 return 0; 591 return 0;
@@ -932,7 +932,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
932 if (error) 932 if (error)
933 goto out_brelse; 933 goto out_brelse;
934 934
935 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 935 gfs2_trans_add_meta(ip->i_gl, dibh);
936 inc_nlink(&ip->i_inode); 936 inc_nlink(&ip->i_inode);
937 ip->i_inode.i_ctime = CURRENT_TIME; 937 ip->i_inode.i_ctime = CURRENT_TIME;
938 ihold(inode); 938 ihold(inode);
@@ -1413,7 +1413,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1413 if (error) 1413 if (error)
1414 goto out_end_trans; 1414 goto out_end_trans;
1415 ip->i_inode.i_ctime = CURRENT_TIME; 1415 ip->i_inode.i_ctime = CURRENT_TIME;
1416 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1416 gfs2_trans_add_meta(ip->i_gl, dibh);
1417 gfs2_dinode_out(ip, dibh->b_data); 1417 gfs2_dinode_out(ip, dibh->b_data);
1418 brelse(dibh); 1418 brelse(dibh);
1419 } 1419 }
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8dad6b093716..9802de0f85e6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -241,6 +241,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
241 241
242static void gfs2_reverse_hex(char *c, u64 value) 242static void gfs2_reverse_hex(char *c, u64 value)
243{ 243{
244 *c = '0';
244 while (value) { 245 while (value) {
245 *c-- = hex_asc[value & 0x0f]; 246 *c-- = hex_asc[value & 0x0f];
246 value >>= 4; 247 value >>= 4;
@@ -280,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
280{ 281{
281 struct gfs2_sbd *sdp = gl->gl_sbd; 282 struct gfs2_sbd *sdp = gl->gl_sbd;
282 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 283 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
284 int lvb_needs_unlock = 0;
283 int error; 285 int error;
284 286
285 if (gl->gl_lksb.sb_lkid == 0) { 287 if (gl->gl_lksb.sb_lkid == 0) {
@@ -293,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
293 gfs2_update_request_times(gl); 295 gfs2_update_request_times(gl);
294 296
295 /* don't want to skip dlm_unlock writing the lvb when lock is ex */ 297 /* don't want to skip dlm_unlock writing the lvb when lock is ex */
298
299 if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
300 lvb_needs_unlock = 1;
301
296 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && 302 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
297 gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { 303 !lvb_needs_unlock) {
298 gfs2_glock_free(gl); 304 gfs2_glock_free(gl);
299 return; 305 return;
300 } 306 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9c81c1..9a2ca8be7647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
482 } 482 }
483} 483}
484 484
485static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) 485static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
486{ 486{
487 struct gfs2_bufdata *bda, *bdb; 487 struct gfs2_inode *ipa, *ipb;
488 488
489 bda = list_entry(a, struct gfs2_bufdata, bd_list); 489 ipa = list_entry(a, struct gfs2_inode, i_ordered);
490 bdb = list_entry(b, struct gfs2_bufdata, bd_list); 490 ipb = list_entry(b, struct gfs2_inode, i_ordered);
491 491
492 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) 492 if (ipa->i_no_addr < ipb->i_no_addr)
493 return -1; 493 return -1;
494 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) 494 if (ipa->i_no_addr > ipb->i_no_addr)
495 return 1; 495 return 1;
496 return 0; 496 return 0;
497} 497}
498 498
499static void gfs2_ordered_write(struct gfs2_sbd *sdp) 499static void gfs2_ordered_write(struct gfs2_sbd *sdp)
500{ 500{
501 struct gfs2_bufdata *bd; 501 struct gfs2_inode *ip;
502 struct buffer_head *bh;
503 LIST_HEAD(written); 502 LIST_HEAD(written);
504 503
505 gfs2_log_lock(sdp); 504 spin_lock(&sdp->sd_ordered_lock);
506 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); 505 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
507 while (!list_empty(&sdp->sd_log_le_ordered)) { 506 while (!list_empty(&sdp->sd_log_le_ordered)) {
508 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); 507 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
509 list_move(&bd->bd_list, &written); 508 list_move(&ip->i_ordered, &written);
510 bh = bd->bd_bh; 509 if (ip->i_inode.i_mapping->nrpages == 0)
511 if (!buffer_dirty(bh))
512 continue; 510 continue;
513 get_bh(bh); 511 spin_unlock(&sdp->sd_ordered_lock);
514 gfs2_log_unlock(sdp); 512 filemap_fdatawrite(ip->i_inode.i_mapping);
515 lock_buffer(bh); 513 spin_lock(&sdp->sd_ordered_lock);
516 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
517 bh->b_end_io = end_buffer_write_sync;
518 submit_bh(WRITE_SYNC, bh);
519 } else {
520 unlock_buffer(bh);
521 brelse(bh);
522 }
523 gfs2_log_lock(sdp);
524 } 514 }
525 list_splice(&written, &sdp->sd_log_le_ordered); 515 list_splice(&written, &sdp->sd_log_le_ordered);
526 gfs2_log_unlock(sdp); 516 spin_unlock(&sdp->sd_ordered_lock);
527} 517}
528 518
529static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 519static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
530{ 520{
531 struct gfs2_bufdata *bd; 521 struct gfs2_inode *ip;
532 struct buffer_head *bh;
533 522
534 gfs2_log_lock(sdp); 523 spin_lock(&sdp->sd_ordered_lock);
535 while (!list_empty(&sdp->sd_log_le_ordered)) { 524 while (!list_empty(&sdp->sd_log_le_ordered)) {
536 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); 525 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
537 bh = bd->bd_bh; 526 list_del(&ip->i_ordered);
538 if (buffer_locked(bh)) { 527 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
539 get_bh(bh); 528 if (ip->i_inode.i_mapping->nrpages == 0)
540 gfs2_log_unlock(sdp);
541 wait_on_buffer(bh);
542 brelse(bh);
543 gfs2_log_lock(sdp);
544 continue; 529 continue;
545 } 530 spin_unlock(&sdp->sd_ordered_lock);
546 list_del_init(&bd->bd_list); 531 filemap_fdatawait(ip->i_inode.i_mapping);
532 spin_lock(&sdp->sd_ordered_lock);
547 } 533 }
548 gfs2_log_unlock(sdp); 534 spin_unlock(&sdp->sd_ordered_lock);
535}
536
537void gfs2_ordered_del_inode(struct gfs2_inode *ip)
538{
539 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
540
541 spin_lock(&sdp->sd_ordered_lock);
542 if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
543 list_del(&ip->i_ordered);
544 spin_unlock(&sdp->sd_ordered_lock);
549} 545}
550 546
551/** 547/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3fd5215ea25f..3566f35915e0 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,6 +48,18 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
48 sdp->sd_log_head = sdp->sd_log_tail = value; 48 sdp->sd_log_head = sdp->sd_log_tail = value;
49} 49}
50 50
51static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
52{
53 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
54
55 if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
56 spin_lock(&sdp->sd_ordered_lock);
57 if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
58 list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
59 spin_unlock(&sdp->sd_ordered_lock);
60 }
61}
62extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
51extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, 63extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
52 unsigned int ssize); 64 unsigned int ssize);
53 65
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9ceccb1595a3..a5055977a214 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -37,7 +37,7 @@
37 * 37 *
38 * The log lock must be held when calling this function 38 * The log lock must be held when calling this function
39 */ 39 */
40static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 40void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
41{ 41{
42 struct gfs2_bufdata *bd; 42 struct gfs2_bufdata *bd;
43 43
@@ -388,32 +388,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
388 return page; 388 return page;
389} 389}
390 390
391static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
392{
393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr;
395
396 tr = current->journal_info;
397 tr->tr_touched = 1;
398 if (!list_empty(&bd->bd_list))
399 return;
400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
403 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
404 printk(KERN_ERR
405 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
406 (unsigned long long)bd->bd_bh->b_blocknr);
407 BUG();
408 }
409 gfs2_pin(sdp, bd->bd_bh);
410 mh->__pad0 = cpu_to_be64(0);
411 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
412 sdp->sd_log_num_buf++;
413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
414 tr->tr_num_buf_new++;
415}
416
417static void gfs2_check_magic(struct buffer_head *bh) 391static void gfs2_check_magic(struct buffer_head *bh)
418{ 392{
419 void *kaddr; 393 void *kaddr;
@@ -600,20 +574,6 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
600 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 574 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
601} 575}
602 576
603static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
604{
605 struct gfs2_glock *gl = bd->bd_gl;
606 struct gfs2_trans *tr;
607
608 tr = current->journal_info;
609 tr->tr_touched = 1;
610 tr->tr_num_revoke++;
611 sdp->sd_log_num_revoke++;
612 atomic_inc(&gl->gl_revokes);
613 set_bit(GLF_LFLUSH, &gl->gl_flags);
614 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
615}
616
617static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 577static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
618{ 578{
619 struct gfs2_meta_header *mh; 579 struct gfs2_meta_header *mh;
@@ -749,44 +709,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
749} 709}
750 710
751/** 711/**
752 * databuf_lo_add - Add a databuf to the transaction.
753 *
754 * This is used in two distinct cases:
755 * i) In ordered write mode
756 * We put the data buffer on a list so that we can ensure that its
757 * synced to disk at the right time
758 * ii) In journaled data mode
759 * We need to journal the data block in the same way as metadata in
760 * the functions above. The difference is that here we have a tag
761 * which is two __be64's being the block number (as per meta data)
762 * and a flag which says whether the data block needs escaping or
763 * not. This means we need a new log entry for each 251 or so data
764 * blocks, which isn't an enormous overhead but twice as much as
765 * for normal metadata blocks.
766 */
767static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
768{
769 struct gfs2_trans *tr = current->journal_info;
770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
771 struct gfs2_inode *ip = GFS2_I(mapping->host);
772
773 if (tr)
774 tr->tr_touched = 1;
775 if (!list_empty(&bd->bd_list))
776 return;
777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
779 if (gfs2_is_jdata(ip)) {
780 gfs2_pin(sdp, bd->bd_bh);
781 tr->tr_num_databuf_new++;
782 sdp->sd_log_num_databuf++;
783 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
784 } else {
785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
786 }
787}
788
789/**
790 * databuf_lo_before_commit - Scan the data buffers, writing as we go 712 * databuf_lo_before_commit - Scan the data buffers, writing as we go
791 * 713 *
792 */ 714 */
@@ -885,7 +807,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
885 807
886 808
887const struct gfs2_log_operations gfs2_buf_lops = { 809const struct gfs2_log_operations gfs2_buf_lops = {
888 .lo_add = buf_lo_add,
889 .lo_before_commit = buf_lo_before_commit, 810 .lo_before_commit = buf_lo_before_commit,
890 .lo_after_commit = buf_lo_after_commit, 811 .lo_after_commit = buf_lo_after_commit,
891 .lo_before_scan = buf_lo_before_scan, 812 .lo_before_scan = buf_lo_before_scan,
@@ -895,7 +816,6 @@ const struct gfs2_log_operations gfs2_buf_lops = {
895}; 816};
896 817
897const struct gfs2_log_operations gfs2_revoke_lops = { 818const struct gfs2_log_operations gfs2_revoke_lops = {
898 .lo_add = revoke_lo_add,
899 .lo_before_commit = revoke_lo_before_commit, 819 .lo_before_commit = revoke_lo_before_commit,
900 .lo_after_commit = revoke_lo_after_commit, 820 .lo_after_commit = revoke_lo_after_commit,
901 .lo_before_scan = revoke_lo_before_scan, 821 .lo_before_scan = revoke_lo_before_scan,
@@ -909,7 +829,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
909}; 829};
910 830
911const struct gfs2_log_operations gfs2_databuf_lops = { 831const struct gfs2_log_operations gfs2_databuf_lops = {
912 .lo_add = databuf_lo_add,
913 .lo_before_commit = databuf_lo_before_commit, 832 .lo_before_commit = databuf_lo_before_commit,
914 .lo_after_commit = databuf_lo_after_commit, 833 .lo_after_commit = databuf_lo_after_commit,
915 .lo_scan_elements = databuf_lo_scan_elements, 834 .lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 954a330585f4..ba77b7da8325 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -29,6 +29,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
29extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); 30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); 31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
32extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
32 33
33static inline unsigned int buf_limit(struct gfs2_sbd *sdp) 34static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
34{ 35{
@@ -46,19 +47,6 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
46 return limit; 47 return limit;
47} 48}
48 49
49static inline void lops_init_le(struct gfs2_bufdata *bd,
50 const struct gfs2_log_operations *lops)
51{
52 INIT_LIST_HEAD(&bd->bd_list);
53 bd->bd_ops = lops;
54}
55
56static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
57{
58 if (bd->bd_ops->lo_add)
59 bd->bd_ops->lo_add(sdp, bd);
60}
61
62static inline void lops_before_commit(struct gfs2_sbd *sdp) 50static inline void lops_before_commit(struct gfs2_sbd *sdp)
63{ 51{
64 int x; 52 int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 22255d96b27e..b059bbb5059e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -271,41 +271,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
271 return 0; 271 return 0;
272} 272}
273 273
274/**
275 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
276 * @gl: the glock the buffer belongs to
277 * @bh: The buffer to be attached to
278 * @meta: Flag to indicate whether its metadata or not
279 */
280
281void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
282 int meta)
283{
284 struct gfs2_bufdata *bd;
285
286 if (meta)
287 lock_page(bh->b_page);
288
289 if (bh->b_private) {
290 if (meta)
291 unlock_page(bh->b_page);
292 return;
293 }
294
295 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
296 bd->bd_bh = bh;
297 bd->bd_gl = gl;
298
299 if (meta)
300 lops_init_le(bd, &gfs2_buf_lops);
301 else
302 lops_init_le(bd, &gfs2_databuf_lops);
303 bh->b_private = bd;
304
305 if (meta)
306 unlock_page(bh->b_page);
307}
308
309void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 274void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
310{ 275{
311 struct address_space *mapping = bh->b_page->mapping; 276 struct address_space *mapping = bh->b_page->mapping;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index c30973b07a7c..0d4c843b6f8e 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,9 +56,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); 56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); 57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
58 58
59void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
60 int meta);
61
62void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, 59void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
63 int meta); 60 int meta);
64 61
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0e3554edb8f2..1b612be4b873 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -81,6 +81,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
81 init_waitqueue_head(&sdp->sd_glock_wait); 81 init_waitqueue_head(&sdp->sd_glock_wait);
82 atomic_set(&sdp->sd_glock_disposal, 0); 82 atomic_set(&sdp->sd_glock_disposal, 0);
83 init_completion(&sdp->sd_locking_init); 83 init_completion(&sdp->sd_locking_init);
84 init_completion(&sdp->sd_wdack);
84 spin_lock_init(&sdp->sd_statfs_spin); 85 spin_lock_init(&sdp->sd_statfs_spin);
85 86
86 spin_lock_init(&sdp->sd_rindex_spin); 87 spin_lock_init(&sdp->sd_rindex_spin);
@@ -102,6 +103,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
102 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 103 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 104 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 105 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
106 spin_lock_init(&sdp->sd_ordered_lock);
105 107
106 init_waitqueue_head(&sdp->sd_log_waitq); 108 init_waitqueue_head(&sdp->sd_log_waitq);
107 init_waitqueue_head(&sdp->sd_logd_waitq); 109 init_waitqueue_head(&sdp->sd_logd_waitq);
@@ -115,8 +117,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
115 117
116 INIT_LIST_HEAD(&sdp->sd_revoke_list); 118 INIT_LIST_HEAD(&sdp->sd_revoke_list);
117 119
118 mutex_init(&sdp->sd_freeze_lock);
119
120 return sdp; 120 return sdp;
121} 121}
122 122
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index afd2e5d38e5a..c7c840e916f8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -585,7 +585,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
585 s64 x; 585 s64 x;
586 586
587 mutex_lock(&sdp->sd_quota_mutex); 587 mutex_lock(&sdp->sd_quota_mutex);
588 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); 588 gfs2_trans_add_meta(ip->i_gl, qd->qd_bh);
589 589
590 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { 590 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
591 qc->qc_change = 0; 591 qc->qc_change = 0;
@@ -721,7 +721,7 @@ get_a_page:
721 goto unlock_out; 721 goto unlock_out;
722 } 722 }
723 723
724 gfs2_trans_add_bh(ip->i_gl, bh, 0); 724 gfs2_trans_add_meta(ip->i_gl, bh);
725 725
726 kaddr = kmap_atomic(page); 726 kaddr = kmap_atomic(page);
727 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 727 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 37ee061d899e..52c2aeaf45ce 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -350,10 +350,14 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
350 BUG_ON(len < chunk_size); 350 BUG_ON(len < chunk_size);
351 len -= chunk_size; 351 len -= chunk_size;
352 block = gfs2_rbm_to_block(&rbm); 352 block = gfs2_rbm_to_block(&rbm);
353 gfs2_rbm_from_block(&rbm, block + chunk_size); 353 if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
354 n_unaligned = 3; 354 n_unaligned = 0;
355 if (ptr)
356 break; 355 break;
356 }
357 if (ptr) {
358 n_unaligned = 3;
359 break;
360 }
357 n_unaligned = len & 3; 361 n_unaligned = len & 3;
358 } 362 }
359 363
@@ -557,22 +561,20 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
557 */ 561 */
558int gfs2_rs_alloc(struct gfs2_inode *ip) 562int gfs2_rs_alloc(struct gfs2_inode *ip)
559{ 563{
560 struct gfs2_blkreserv *res; 564 int error = 0;
561 565
566 down_write(&ip->i_rw_mutex);
562 if (ip->i_res) 567 if (ip->i_res)
563 return 0; 568 goto out;
564
565 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
566 if (!res)
567 return -ENOMEM;
568 569
569 RB_CLEAR_NODE(&res->rs_node); 570 ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
571 if (!ip->i_res) {
572 error = -ENOMEM;
573 goto out;
574 }
570 575
571 down_write(&ip->i_rw_mutex); 576 RB_CLEAR_NODE(&ip->i_res->rs_node);
572 if (ip->i_res) 577out:
573 kmem_cache_free(gfs2_rsrv_cachep, res);
574 else
575 ip->i_res = res;
576 up_write(&ip->i_rw_mutex); 578 up_write(&ip->i_rw_mutex);
577 return 0; 579 return 0;
578} 580}
@@ -1321,7 +1323,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1321 if (ret == 0) { 1323 if (ret == 0) {
1322 bh = rgd->rd_bits[0].bi_bh; 1324 bh = rgd->rd_bits[0].bi_bh;
1323 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1325 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1324 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1326 gfs2_trans_add_meta(rgd->rd_gl, bh);
1325 gfs2_rgrp_out(rgd, bh->b_data); 1327 gfs2_rgrp_out(rgd, bh->b_data);
1326 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1328 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1327 gfs2_trans_end(sdp); 1329 gfs2_trans_end(sdp);
@@ -1424,6 +1426,9 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1424 rs->rs_free = extlen; 1426 rs->rs_free = extlen;
1425 rs->rs_inum = ip->i_no_addr; 1427 rs->rs_inum = ip->i_no_addr;
1426 rs_insert(ip); 1428 rs_insert(ip);
1429 } else {
1430 if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1431 rgd->rd_last_alloc = 0;
1427 } 1432 }
1428} 1433}
1429 1434
@@ -1963,14 +1968,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1963 1968
1964 *n = 1; 1969 *n = 1;
1965 block = gfs2_rbm_to_block(rbm); 1970 block = gfs2_rbm_to_block(rbm);
1966 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1971 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
1967 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1972 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1968 block++; 1973 block++;
1969 while (*n < elen) { 1974 while (*n < elen) {
1970 ret = gfs2_rbm_from_block(&pos, block); 1975 ret = gfs2_rbm_from_block(&pos, block);
1971 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1976 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1972 break; 1977 break;
1973 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1978 gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
1974 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1979 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1975 (*n)++; 1980 (*n)++;
1976 block++; 1981 block++;
@@ -2009,7 +2014,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2009 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 2014 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
2010 rbm.bi->bi_len); 2015 rbm.bi->bi_len);
2011 } 2016 }
2012 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 2017 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
2013 gfs2_setbit(&rbm, false, new_state); 2018 gfs2_setbit(&rbm, false, new_state);
2014 } 2019 }
2015 2020
@@ -2152,7 +2157,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2152 if (error == 0) { 2157 if (error == 0) {
2153 struct gfs2_dinode *di = 2158 struct gfs2_dinode *di =
2154 (struct gfs2_dinode *)dibh->b_data; 2159 (struct gfs2_dinode *)dibh->b_data;
2155 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2160 gfs2_trans_add_meta(ip->i_gl, dibh);
2156 di->di_goal_meta = di->di_goal_data = 2161 di->di_goal_meta = di->di_goal_data =
2157 cpu_to_be64(ip->i_goal); 2162 cpu_to_be64(ip->i_goal);
2158 brelse(dibh); 2163 brelse(dibh);
@@ -2171,7 +2176,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2171 *generation = rbm.rgd->rd_igeneration++; 2176 *generation = rbm.rgd->rd_igeneration++;
2172 } 2177 }
2173 2178
2174 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2179 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2175 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2180 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2176 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2181 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2177 2182
@@ -2218,7 +2223,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2218 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2223 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2219 rgd->rd_free += blen; 2224 rgd->rd_free += blen;
2220 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2225 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2221 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2226 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2222 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2227 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2223 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2228 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2224 2229
@@ -2255,7 +2260,7 @@ void gfs2_unlink_di(struct inode *inode)
2255 if (!rgd) 2260 if (!rgd)
2256 return; 2261 return;
2257 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2262 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2258 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2263 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2259 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2264 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2260 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2265 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2261 update_rgrp_lvb_unlinked(rgd, 1); 2266 update_rgrp_lvb_unlinked(rgd, 1);
@@ -2276,7 +2281,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2276 rgd->rd_dinodes--; 2281 rgd->rd_dinodes--;
2277 rgd->rd_free++; 2282 rgd->rd_free++;
2278 2283
2279 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2284 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2280 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2285 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2281 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2286 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2282 update_rgrp_lvb_unlinked(rgd, -1); 2287 update_rgrp_lvb_unlinked(rgd, -1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a03425376500..cab77b8ba84f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -500,7 +500,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
500 if (error) 500 if (error)
501 return; 501 return;
502 502
503 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 503 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
504 504
505 spin_lock(&sdp->sd_statfs_spin); 505 spin_lock(&sdp->sd_statfs_spin);
506 l_sc->sc_total += total; 506 l_sc->sc_total += total;
@@ -528,7 +528,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
530 530
531 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 531 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
532 532
533 spin_lock(&sdp->sd_statfs_spin); 533 spin_lock(&sdp->sd_statfs_spin);
534 m_sc->sc_total += l_sc->sc_total; 534 m_sc->sc_total += l_sc->sc_total;
@@ -539,7 +539,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
539 0, sizeof(struct gfs2_statfs_change)); 539 0, sizeof(struct gfs2_statfs_change));
540 spin_unlock(&sdp->sd_statfs_spin); 540 spin_unlock(&sdp->sd_statfs_spin);
541 541
542 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 542 gfs2_trans_add_meta(m_ip->i_gl, m_bh);
543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); 543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
544} 544}
545 545
@@ -663,54 +663,6 @@ out:
663 return error; 663 return error;
664} 664}
665 665
666/**
667 * gfs2_freeze_fs - freezes the file system
668 * @sdp: the file system
669 *
670 * This function flushes data and meta data for all machines by
671 * acquiring the transaction log exclusively. All journals are
672 * ensured to be in a clean state as well.
673 *
674 * Returns: errno
675 */
676
677int gfs2_freeze_fs(struct gfs2_sbd *sdp)
678{
679 int error = 0;
680
681 mutex_lock(&sdp->sd_freeze_lock);
682
683 if (!sdp->sd_freeze_count++) {
684 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
685 if (error)
686 sdp->sd_freeze_count--;
687 }
688
689 mutex_unlock(&sdp->sd_freeze_lock);
690
691 return error;
692}
693
694/**
695 * gfs2_unfreeze_fs - unfreezes the file system
696 * @sdp: the file system
697 *
698 * This function allows the file system to proceed by unlocking
699 * the exclusively held transaction lock. Other GFS2 nodes are
700 * now free to acquire the lock shared and go on with their lives.
701 *
702 */
703
704void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
705{
706 mutex_lock(&sdp->sd_freeze_lock);
707
708 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
709 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
710
711 mutex_unlock(&sdp->sd_freeze_lock);
712}
713
714void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 666void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
715{ 667{
716 struct gfs2_dinode *str = buf; 668 struct gfs2_dinode *str = buf;
@@ -824,7 +776,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
824 776
825 ret = gfs2_meta_inode_buffer(ip, &bh); 777 ret = gfs2_meta_inode_buffer(ip, &bh);
826 if (ret == 0) { 778 if (ret == 0) {
827 gfs2_trans_add_bh(ip->i_gl, bh, 1); 779 gfs2_trans_add_meta(ip->i_gl, bh);
828 gfs2_dinode_out(ip, bh->b_data); 780 gfs2_dinode_out(ip, bh->b_data);
829 brelse(bh); 781 brelse(bh);
830 } 782 }
@@ -888,13 +840,6 @@ static void gfs2_put_super(struct super_block *sb)
888 int error; 840 int error;
889 struct gfs2_jdesc *jd; 841 struct gfs2_jdesc *jd;
890 842
891 /* Unfreeze the filesystem, if we need to */
892
893 mutex_lock(&sdp->sd_freeze_lock);
894 if (sdp->sd_freeze_count)
895 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
896 mutex_unlock(&sdp->sd_freeze_lock);
897
898 /* No more recovery requests */ 843 /* No more recovery requests */
899 set_bit(SDF_NORECOVERY, &sdp->sd_flags); 844 set_bit(SDF_NORECOVERY, &sdp->sd_flags);
900 smp_mb(); 845 smp_mb();
@@ -985,7 +930,7 @@ static int gfs2_freeze(struct super_block *sb)
985 return -EINVAL; 930 return -EINVAL;
986 931
987 for (;;) { 932 for (;;) {
988 error = gfs2_freeze_fs(sdp); 933 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
989 if (!error) 934 if (!error)
990 break; 935 break;
991 936
@@ -1013,7 +958,9 @@ static int gfs2_freeze(struct super_block *sb)
1013 958
1014static int gfs2_unfreeze(struct super_block *sb) 959static int gfs2_unfreeze(struct super_block *sb)
1015{ 960{
1016 gfs2_unfreeze_fs(sb->s_fs_info); 961 struct gfs2_sbd *sdp = sb->s_fs_info;
962
963 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
1017 return 0; 964 return 0;
1018} 965}
1019 966
@@ -1577,6 +1524,7 @@ out:
1577 /* Case 3 starts here */ 1524 /* Case 3 starts here */
1578 truncate_inode_pages(&inode->i_data, 0); 1525 truncate_inode_pages(&inode->i_data, 0);
1579 gfs2_rs_delete(ip); 1526 gfs2_rs_delete(ip);
1527 gfs2_ordered_del_inode(ip);
1580 clear_inode(inode); 1528 clear_inode(inode);
1581 gfs2_dir_hash_inval(ip); 1529 gfs2_dir_hash_inval(ip);
1582 ip->i_gl->gl_object = NULL; 1530 ip->i_gl->gl_object = NULL;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index a0464680af0b..90e3322ffa10 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh); 46 struct buffer_head *l_bh);
47extern int gfs2_statfs_sync(struct super_block *sb, int type); 47extern int gfs2_statfs_sync(struct super_block *sb, int type);
48 48
49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
51
52extern struct file_system_type gfs2_fs_type; 49extern struct file_system_type gfs2_fs_type;
53extern struct file_system_type gfs2meta_fs_type; 50extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 51extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index e6d8d482422f..597a612834dc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -91,19 +91,15 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
91 91
92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
93{ 93{
94 unsigned int count; 94 struct super_block *sb = sdp->sd_vfs;
95 95 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
96 mutex_lock(&sdp->sd_freeze_lock);
97 count = sdp->sd_freeze_count;
98 mutex_unlock(&sdp->sd_freeze_lock);
99 96
100 return snprintf(buf, PAGE_SIZE, "%u\n", count); 97 return snprintf(buf, PAGE_SIZE, "%u\n", frozen);
101} 98}
102 99
103static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 100static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
104{ 101{
105 ssize_t ret = len; 102 int error;
106 int error = 0;
107 int n = simple_strtol(buf, NULL, 0); 103 int n = simple_strtol(buf, NULL, 0);
108 104
109 if (!capable(CAP_SYS_ADMIN)) 105 if (!capable(CAP_SYS_ADMIN))
@@ -111,19 +107,21 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
111 107
112 switch (n) { 108 switch (n) {
113 case 0: 109 case 0:
114 gfs2_unfreeze_fs(sdp); 110 error = thaw_super(sdp->sd_vfs);
115 break; 111 break;
116 case 1: 112 case 1:
117 error = gfs2_freeze_fs(sdp); 113 error = freeze_super(sdp->sd_vfs);
118 break; 114 break;
119 default: 115 default:
120 ret = -EINVAL; 116 return -EINVAL;
121 } 117 }
122 118
123 if (error) 119 if (error) {
124 fs_warn(sdp, "freeze %d error %d", n, error); 120 fs_warn(sdp, "freeze %d error %d", n, error);
121 return error;
122 }
125 123
126 return ret; 124 return len;
127} 125}
128 126
129static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) 127static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
@@ -342,6 +340,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
342 return ret; 340 return ret;
343} 341}
344 342
343static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
344{
345 int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
346
347 return sprintf(buf, "%d\n", val);
348}
349
350static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
351{
352 ssize_t ret = len;
353 int val;
354
355 val = simple_strtol(buf, NULL, 0);
356
357 if ((val == 1) &&
358 !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
359 complete(&sdp->sd_wdack);
360 else
361 ret = -EINVAL;
362 return ret;
363}
364
345static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 365static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
346{ 366{
347 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 367 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -473,7 +493,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
473 493
474GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 494GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
475GDLM_ATTR(block, 0644, block_show, block_store); 495GDLM_ATTR(block, 0644, block_show, block_store);
476GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 496GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
477GDLM_ATTR(jid, 0644, jid_show, jid_store); 497GDLM_ATTR(jid, 0644, jid_show, jid_store);
478GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); 498GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
479GDLM_ATTR(first_done, 0444, first_done_show, NULL); 499GDLM_ATTR(first_done, 0444, first_done_show, NULL);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 413627072f36..88162fae27a5 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -18,6 +18,7 @@
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "glock.h" 20#include "glock.h"
21#include "inode.h"
21#include "log.h" 22#include "log.h"
22#include "lops.h" 23#include "lops.h"
23#include "meta_io.h" 24#include "meta_io.h"
@@ -142,44 +143,143 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
142 sb_end_intwrite(sdp->sd_vfs); 143 sb_end_intwrite(sdp->sd_vfs);
143} 144}
144 145
146static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
147 struct buffer_head *bh,
148 const struct gfs2_log_operations *lops)
149{
150 struct gfs2_bufdata *bd;
151
152 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
153 bd->bd_bh = bh;
154 bd->bd_gl = gl;
155 bd->bd_ops = lops;
156 INIT_LIST_HEAD(&bd->bd_list);
157 bh->b_private = bd;
158 return bd;
159}
160
145/** 161/**
146 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction 162 * gfs2_trans_add_data - Add a databuf to the transaction.
147 * @gl: the glock the buffer belongs to 163 * @gl: The inode glock associated with the buffer
148 * @bh: The buffer to add 164 * @bh: The buffer to add
149 * @meta: True in the case of adding metadata
150 * 165 *
166 * This is used in two distinct cases:
167 * i) In ordered write mode
168 * We put the data buffer on a list so that we can ensure that its
169 * synced to disk at the right time
170 * ii) In journaled data mode
171 * We need to journal the data block in the same way as metadata in
172 * the functions above. The difference is that here we have a tag
173 * which is two __be64's being the block number (as per meta data)
174 * and a flag which says whether the data block needs escaping or
175 * not. This means we need a new log entry for each 251 or so data
176 * blocks, which isn't an enormous overhead but twice as much as
177 * for normal metadata blocks.
151 */ 178 */
179void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
180{
181 struct gfs2_trans *tr = current->journal_info;
182 struct gfs2_sbd *sdp = gl->gl_sbd;
183 struct address_space *mapping = bh->b_page->mapping;
184 struct gfs2_inode *ip = GFS2_I(mapping->host);
185 struct gfs2_bufdata *bd;
152 186
153void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) 187 if (!gfs2_is_jdata(ip)) {
188 gfs2_ordered_add_inode(ip);
189 return;
190 }
191
192 lock_buffer(bh);
193 gfs2_log_lock(sdp);
194 bd = bh->b_private;
195 if (bd == NULL) {
196 gfs2_log_unlock(sdp);
197 unlock_buffer(bh);
198 if (bh->b_private == NULL)
199 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
200 lock_buffer(bh);
201 gfs2_log_lock(sdp);
202 }
203 gfs2_assert(sdp, bd->bd_gl == gl);
204 tr->tr_touched = 1;
205 if (list_empty(&bd->bd_list)) {
206 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
207 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
208 gfs2_pin(sdp, bd->bd_bh);
209 tr->tr_num_databuf_new++;
210 sdp->sd_log_num_databuf++;
211 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
212 }
213 gfs2_log_unlock(sdp);
214 unlock_buffer(bh);
215}
216
217static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
154{ 218{
219 struct gfs2_meta_header *mh;
220 struct gfs2_trans *tr;
221
222 tr = current->journal_info;
223 tr->tr_touched = 1;
224 if (!list_empty(&bd->bd_list))
225 return;
226 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
227 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
228 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
229 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
230 printk(KERN_ERR
231 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
232 (unsigned long long)bd->bd_bh->b_blocknr);
233 BUG();
234 }
235 gfs2_pin(sdp, bd->bd_bh);
236 mh->__pad0 = cpu_to_be64(0);
237 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
238 sdp->sd_log_num_buf++;
239 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
240 tr->tr_num_buf_new++;
241}
242
243void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
244{
245
155 struct gfs2_sbd *sdp = gl->gl_sbd; 246 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 247 struct gfs2_bufdata *bd;
157 248
158 lock_buffer(bh); 249 lock_buffer(bh);
159 gfs2_log_lock(sdp); 250 gfs2_log_lock(sdp);
160 bd = bh->b_private; 251 bd = bh->b_private;
161 if (bd) 252 if (bd == NULL) {
162 gfs2_assert(sdp, bd->bd_gl == gl);
163 else {
164 gfs2_log_unlock(sdp); 253 gfs2_log_unlock(sdp);
165 unlock_buffer(bh); 254 unlock_buffer(bh);
166 gfs2_attach_bufdata(gl, bh, meta); 255 lock_page(bh->b_page);
167 bd = bh->b_private; 256 if (bh->b_private == NULL)
257 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
258 unlock_page(bh->b_page);
168 lock_buffer(bh); 259 lock_buffer(bh);
169 gfs2_log_lock(sdp); 260 gfs2_log_lock(sdp);
170 } 261 }
171 lops_add(sdp, bd); 262 gfs2_assert(sdp, bd->bd_gl == gl);
263 meta_lo_add(sdp, bd);
172 gfs2_log_unlock(sdp); 264 gfs2_log_unlock(sdp);
173 unlock_buffer(bh); 265 unlock_buffer(bh);
174} 266}
175 267
176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 268void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
177{ 269{
270 struct gfs2_glock *gl = bd->bd_gl;
271 struct gfs2_trans *tr = current->journal_info;
272
178 BUG_ON(!list_empty(&bd->bd_list)); 273 BUG_ON(!list_empty(&bd->bd_list));
179 BUG_ON(!list_empty(&bd->bd_ail_st_list)); 274 BUG_ON(!list_empty(&bd->bd_ail_st_list));
180 BUG_ON(!list_empty(&bd->bd_ail_gl_list)); 275 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
181 lops_init_le(bd, &gfs2_revoke_lops); 276 bd->bd_ops = &gfs2_revoke_lops;
182 lops_add(sdp, bd); 277 tr->tr_touched = 1;
278 tr->tr_num_revoke++;
279 sdp->sd_log_num_revoke++;
280 atomic_inc(&gl->gl_revokes);
281 set_bit(GLF_LFLUSH, &gl->gl_flags);
282 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
183} 283}
184 284
185void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) 285void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index bf2ae9aeee7a..1e6e7da25a17 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -39,7 +39,8 @@ extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
39 unsigned int revokes); 39 unsigned int revokes);
40 40
41extern void gfs2_trans_end(struct gfs2_sbd *sdp); 41extern void gfs2_trans_end(struct gfs2_sbd *sdp);
42extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 42extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); 44extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
44extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); 45extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
45 46
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f00d7c5744f6..6402fb69d71b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -54,6 +54,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
54 54
55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
56 56
57 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
58 wait_for_completion(&sdp->sd_wdack);
59
57 if (lm->lm_unmount) { 60 if (lm->lm_unmount) {
58 fs_err(sdp, "telling LM to unmount\n"); 61 fs_err(sdp, "telling LM to unmount\n");
59 lm->lm_unmount(sdp); 62 lm->lm_unmount(sdp);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 80c25ae79048..ecd37f30ab91 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -270,7 +270,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
270 if (error) 270 if (error)
271 goto out_gunlock; 271 goto out_gunlock;
272 272
273 gfs2_trans_add_bh(ip->i_gl, bh, 1); 273 gfs2_trans_add_meta(ip->i_gl, bh);
274 274
275 dataptrs = GFS2_EA2DATAPTRS(ea); 275 dataptrs = GFS2_EA2DATAPTRS(ea);
276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { 276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
@@ -309,7 +309,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
309 error = gfs2_meta_inode_buffer(ip, &dibh); 309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) { 310 if (!error) {
311 ip->i_inode.i_ctime = CURRENT_TIME; 311 ip->i_inode.i_ctime = CURRENT_TIME;
312 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 312 gfs2_trans_add_meta(ip->i_gl, dibh);
313 gfs2_dinode_out(ip, dibh->b_data); 313 gfs2_dinode_out(ip, dibh->b_data);
314 brelse(dibh); 314 brelse(dibh);
315 } 315 }
@@ -509,7 +509,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
509 } 509 }
510 510
511 if (din) { 511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1); 512 gfs2_trans_add_meta(ip->i_gl, bh[x]);
513 memcpy(pos, din, cp_size); 513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize; 514 din += sdp->sd_jbsize;
515 } 515 }
@@ -629,7 +629,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
629 return error; 629 return error;
630 gfs2_trans_add_unrevoke(sdp, block, 1); 630 gfs2_trans_add_unrevoke(sdp, block, 1);
631 *bhp = gfs2_meta_new(ip->i_gl, block); 631 *bhp = gfs2_meta_new(ip->i_gl, block);
632 gfs2_trans_add_bh(ip->i_gl, *bhp, 1); 632 gfs2_trans_add_meta(ip->i_gl, *bhp);
633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); 633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); 634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
635 635
@@ -691,7 +691,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
691 return error; 691 return error;
692 gfs2_trans_add_unrevoke(sdp, block, 1); 692 gfs2_trans_add_unrevoke(sdp, block, 1);
693 bh = gfs2_meta_new(ip->i_gl, block); 693 bh = gfs2_meta_new(ip->i_gl, block);
694 gfs2_trans_add_bh(ip->i_gl, bh, 1); 694 gfs2_trans_add_meta(ip->i_gl, bh);
695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); 695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
696 696
697 gfs2_add_inode_blocks(&ip->i_inode, 1); 697 gfs2_add_inode_blocks(&ip->i_inode, 1);
@@ -751,7 +751,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
751 error = gfs2_meta_inode_buffer(ip, &dibh); 751 error = gfs2_meta_inode_buffer(ip, &dibh);
752 if (!error) { 752 if (!error) {
753 ip->i_inode.i_ctime = CURRENT_TIME; 753 ip->i_inode.i_ctime = CURRENT_TIME;
754 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 754 gfs2_trans_add_meta(ip->i_gl, dibh);
755 gfs2_dinode_out(ip, dibh->b_data); 755 gfs2_dinode_out(ip, dibh->b_data);
756 brelse(dibh); 756 brelse(dibh);
757 } 757 }
@@ -834,7 +834,7 @@ static void ea_set_remove_stuffed(struct gfs2_inode *ip,
834 struct gfs2_ea_header *prev = el->el_prev; 834 struct gfs2_ea_header *prev = el->el_prev;
835 u32 len; 835 u32 len;
836 836
837 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 837 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
838 838
839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) { 839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
840 ea->ea_type = GFS2_EATYPE_UNUSED; 840 ea->ea_type = GFS2_EATYPE_UNUSED;
@@ -872,7 +872,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
872 if (error) 872 if (error)
873 return error; 873 return error;
874 874
875 gfs2_trans_add_bh(ip->i_gl, bh, 1); 875 gfs2_trans_add_meta(ip->i_gl, bh);
876 876
877 if (es->ea_split) 877 if (es->ea_split)
878 ea = ea_split_ea(ea); 878 ea = ea_split_ea(ea);
@@ -886,7 +886,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
886 if (error) 886 if (error)
887 goto out; 887 goto out;
888 ip->i_inode.i_ctime = CURRENT_TIME; 888 ip->i_inode.i_ctime = CURRENT_TIME;
889 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 889 gfs2_trans_add_meta(ip->i_gl, dibh);
890 gfs2_dinode_out(ip, dibh->b_data); 890 gfs2_dinode_out(ip, dibh->b_data);
891 brelse(dibh); 891 brelse(dibh);
892out: 892out:
@@ -901,7 +901,7 @@ static int ea_set_simple_alloc(struct gfs2_inode *ip,
901 struct gfs2_ea_header *ea = es->es_ea; 901 struct gfs2_ea_header *ea = es->es_ea;
902 int error; 902 int error;
903 903
904 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); 904 gfs2_trans_add_meta(ip->i_gl, es->es_bh);
905 905
906 if (es->ea_split) 906 if (es->ea_split)
907 ea = ea_split_ea(ea); 907 ea = ea_split_ea(ea);
@@ -997,7 +997,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
997 goto out; 997 goto out;
998 } 998 }
999 999
1000 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1000 gfs2_trans_add_meta(ip->i_gl, indbh);
1001 } else { 1001 } else {
1002 u64 blk; 1002 u64 blk;
1003 unsigned int n = 1; 1003 unsigned int n = 1;
@@ -1006,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1006 return error; 1006 return error;
1007 gfs2_trans_add_unrevoke(sdp, blk, 1); 1007 gfs2_trans_add_unrevoke(sdp, blk, 1);
1008 indbh = gfs2_meta_new(ip->i_gl, blk); 1008 indbh = gfs2_meta_new(ip->i_gl, blk);
1009 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1009 gfs2_trans_add_meta(ip->i_gl, indbh);
1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1011 gfs2_buffer_clear_tail(indbh, mh_size); 1011 gfs2_buffer_clear_tail(indbh, mh_size);
1012 1012
@@ -1092,7 +1092,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1092 if (error) 1092 if (error)
1093 return error; 1093 return error;
1094 1094
1095 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1095 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
1096 1096
1097 if (prev) { 1097 if (prev) {
1098 u32 len; 1098 u32 len;
@@ -1109,7 +1109,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1109 error = gfs2_meta_inode_buffer(ip, &dibh); 1109 error = gfs2_meta_inode_buffer(ip, &dibh);
1110 if (!error) { 1110 if (!error) {
1111 ip->i_inode.i_ctime = CURRENT_TIME; 1111 ip->i_inode.i_ctime = CURRENT_TIME;
1112 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1112 gfs2_trans_add_meta(ip->i_gl, dibh);
1113 gfs2_dinode_out(ip, dibh->b_data); 1113 gfs2_dinode_out(ip, dibh->b_data);
1114 brelse(dibh); 1114 brelse(dibh);
1115 } 1115 }
@@ -1265,7 +1265,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0); 1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1267 if (error == 0) { 1267 if (error == 0) {
1268 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1268 gfs2_trans_add_meta(ip->i_gl, el.el_bh);
1269 memcpy(GFS2_EA2DATA(el.el_ea), data, 1269 memcpy(GFS2_EA2DATA(el.el_ea), data,
1270 GFS2_EA_DATA_LEN(el.el_ea)); 1270 GFS2_EA_DATA_LEN(el.el_ea));
1271 } 1271 }
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352 if (error) 1352 if (error)
1353 goto out_gunlock; 1353 goto out_gunlock;
1354 1354
1355 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1355 gfs2_trans_add_meta(ip->i_gl, indbh);
1356 1356
1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); 1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1358 bstart = 0; 1358 bstart = 0;
@@ -1384,7 +1384,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1384 1384
1385 error = gfs2_meta_inode_buffer(ip, &dibh); 1385 error = gfs2_meta_inode_buffer(ip, &dibh);
1386 if (!error) { 1386 if (!error) {
1387 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1387 gfs2_trans_add_meta(ip->i_gl, dibh);
1388 gfs2_dinode_out(ip, dibh->b_data); 1388 gfs2_dinode_out(ip, dibh->b_data);
1389 brelse(dibh); 1389 brelse(dibh);
1390 } 1390 }
@@ -1434,7 +1434,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1434 1434
1435 error = gfs2_meta_inode_buffer(ip, &dibh); 1435 error = gfs2_meta_inode_buffer(ip, &dibh);
1436 if (!error) { 1436 if (!error) {
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1437 gfs2_trans_add_meta(ip->i_gl, dibh);
1438 gfs2_dinode_out(ip, dibh->b_data); 1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh); 1439 brelse(dibh);
1440 } 1440 }
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index b77c5bc20f8a..998e3a6decf3 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -1,6 +1,6 @@
1config HFS_FS 1config HFS_FS
2 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 2 tristate "Apple Macintosh file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 If you say Y here, you will be able to mount Macintosh-formatted 6 If you say Y here, you will be able to mount Macintosh-formatted
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 5dc06c837105..9edeeb0ea97e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -147,7 +147,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) { 147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
148 Some unknown structures like ACL may be in fnode, 148 Some unknown structures like ACL may be in fnode,
149 we'd better not overwrite them 149 we'd better not overwrite them
150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 structures", i->i_ino);
151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { 151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
152 __le32 ea; 152 __le32 ea;
153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { 153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a2862339323b..81cc7eaff863 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -446,7 +446,8 @@ int __log_start_commit(journal_t *journal, tid_t target)
446 * currently running transaction (if it exists). Otherwise, 446 * currently running transaction (if it exists). Otherwise,
447 * the target tid must be an old one. 447 * the target tid must be an old one.
448 */ 448 */
449 if (journal->j_running_transaction && 449 if (journal->j_commit_request != target &&
450 journal->j_running_transaction &&
450 journal->j_running_transaction->t_tid == target) { 451 journal->j_running_transaction->t_tid == target) {
451 /* 452 /*
452 * We want a new commit: OK, mark the request and wakeup the 453 * We want a new commit: OK, mark the request and wakeup the
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 42f6615af0ac..df9f29760efa 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -209,7 +209,8 @@ repeat:
209 if (!new_transaction) 209 if (!new_transaction)
210 goto alloc_transaction; 210 goto alloc_transaction;
211 write_lock(&journal->j_state_lock); 211 write_lock(&journal->j_state_lock);
212 if (!journal->j_running_transaction) { 212 if (!journal->j_running_transaction &&
213 !journal->j_barrier_count) {
213 jbd2_get_transaction(journal, new_transaction); 214 jbd2_get_transaction(journal, new_transaction);
214 new_transaction = NULL; 215 new_transaction = NULL;
215 } 216 }
@@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1839 1840
1840 BUFFER_TRACE(bh, "entry"); 1841 BUFFER_TRACE(bh, "entry");
1841 1842
1842retry:
1843 /* 1843 /*
1844 * It is safe to proceed here without the j_list_lock because the 1844 * It is safe to proceed here without the j_list_lock because the
1845 * buffers cannot be stolen by try_to_free_buffers as long as we are 1845 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1934,14 +1934,11 @@ retry:
1934 * for commit and try again. 1934 * for commit and try again.
1935 */ 1935 */
1936 if (partial_page) { 1936 if (partial_page) {
1937 tid_t tid = journal->j_committing_transaction->t_tid;
1938
1939 jbd2_journal_put_journal_head(jh); 1937 jbd2_journal_put_journal_head(jh);
1940 spin_unlock(&journal->j_list_lock); 1938 spin_unlock(&journal->j_list_lock);
1941 jbd_unlock_bh_state(bh); 1939 jbd_unlock_bh_state(bh);
1942 write_unlock(&journal->j_state_lock); 1940 write_unlock(&journal->j_state_lock);
1943 jbd2_log_wait_commit(journal, tid); 1941 return -EBUSY;
1944 goto retry;
1945 } 1942 }
1946 /* 1943 /*
1947 * OK, buffer won't be reachable after truncate. We just set 1944 * OK, buffer won't be reachable after truncate. We just set
@@ -2002,21 +1999,23 @@ zap_buffer_unlocked:
2002 * @page: page to flush 1999 * @page: page to flush
2003 * @offset: length of page to invalidate. 2000 * @offset: length of page to invalidate.
2004 * 2001 *
2005 * Reap page buffers containing data after offset in page. 2002 * Reap page buffers containing data after offset in page. Can return -EBUSY
2006 * 2003 * if buffers are part of the committing transaction and the page is straddling
2004 * i_size. Caller then has to wait for current commit and try again.
2007 */ 2005 */
2008void jbd2_journal_invalidatepage(journal_t *journal, 2006int jbd2_journal_invalidatepage(journal_t *journal,
2009 struct page *page, 2007 struct page *page,
2010 unsigned long offset) 2008 unsigned long offset)
2011{ 2009{
2012 struct buffer_head *head, *bh, *next; 2010 struct buffer_head *head, *bh, *next;
2013 unsigned int curr_off = 0; 2011 unsigned int curr_off = 0;
2014 int may_free = 1; 2012 int may_free = 1;
2013 int ret = 0;
2015 2014
2016 if (!PageLocked(page)) 2015 if (!PageLocked(page))
2017 BUG(); 2016 BUG();
2018 if (!page_has_buffers(page)) 2017 if (!page_has_buffers(page))
2019 return; 2018 return 0;
2020 2019
2021 /* We will potentially be playing with lists other than just the 2020 /* We will potentially be playing with lists other than just the
2022 * data lists (especially for journaled data mode), so be 2021 * data lists (especially for journaled data mode), so be
@@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2030 if (offset <= curr_off) { 2029 if (offset <= curr_off) {
2031 /* This block is wholly outside the truncation point */ 2030 /* This block is wholly outside the truncation point */
2032 lock_buffer(bh); 2031 lock_buffer(bh);
2033 may_free &= journal_unmap_buffer(journal, bh, 2032 ret = journal_unmap_buffer(journal, bh, offset > 0);
2034 offset > 0);
2035 unlock_buffer(bh); 2033 unlock_buffer(bh);
2034 if (ret < 0)
2035 return ret;
2036 may_free &= ret;
2036 } 2037 }
2037 curr_off = next_off; 2038 curr_off = next_off;
2038 bh = next; 2039 bh = next;
@@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2043 if (may_free && try_to_free_buffers(page)) 2044 if (may_free && try_to_free_buffers(page))
2044 J_ASSERT(!page_has_buffers(page)); 2045 J_ASSERT(!page_has_buffers(page));
2045 } 2046 }
2047 return 0;
2046} 2048}
2047 2049
2048/* 2050/*
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index 6ae169cd8faa..d8bb6c411e96 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -50,8 +50,8 @@ config JFFS2_FS_WBUF_VERIFY
50 write-buffer, and check for errors. 50 write-buffer, and check for errors.
51 51
52config JFFS2_SUMMARY 52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)" 53 bool "JFFS2 summary support"
54 depends on JFFS2_FS && EXPERIMENTAL 54 depends on JFFS2_FS
55 default n 55 default n
56 help 56 help
57 This feature makes it possible to use summary information 57 This feature makes it possible to use summary information
@@ -63,8 +63,8 @@ config JFFS2_SUMMARY
63 If unsure, say 'N'. 63 If unsure, say 'N'.
64 64
65config JFFS2_FS_XATTR 65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)" 66 bool "JFFS2 XATTR support"
67 depends on JFFS2_FS && EXPERIMENTAL 67 depends on JFFS2_FS
68 default n 68 default n
69 help 69 help
70 Extended attributes are name:value pairs associated with inodes by 70 Extended attributes are name:value pairs associated with inodes by
@@ -173,7 +173,7 @@ config JFFS2_CMODE_PRIORITY
173 successful one. 173 successful one.
174 174
175config JFFS2_CMODE_SIZE 175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)" 176 bool "size"
177 help 177 help
178 Tries all compressors and chooses the one which has the smallest 178 Tries all compressors and chooses the one which has the smallest
179 result. 179 result.
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1a543be09c79..060ba638becb 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -154,7 +154,7 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
154 /* 154 /*
155 * If we really return the number of allocated & free inodes, some 155 * If we really return the number of allocated & free inodes, some
156 * applications will fail because they won't see enough free inodes. 156 * applications will fail because they won't see enough free inodes.
157 * We'll try to calculate some guess as to how may inodes we can 157 * We'll try to calculate some guess as to how many inodes we can
158 * really allocate 158 * really allocate
159 * 159 *
160 * buf->f_files = atomic_read(&imap->im_numinos); 160 * buf->f_files = atomic_read(&imap->im_numinos);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 54f9e6ce0430..52e5120bb159 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -550,6 +550,9 @@ again:
550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
551 if (status < 0) 551 if (status < 0)
552 break; 552 break;
553 /* Resend the blocking lock request after a server reboot */
554 if (resp->status == nlm_lck_denied_grace_period)
555 continue;
553 if (resp->status != nlm_lck_blocked) 556 if (resp->status != nlm_lck_blocked)
554 break; 557 break;
555 } 558 }
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
index daf9a9b32dd3..09ed066c0221 100644
--- a/fs/logfs/Kconfig
+++ b/fs/logfs/Kconfig
@@ -1,6 +1,6 @@
1config LOGFS 1config LOGFS
2 tristate "LogFS file system (EXPERIMENTAL)" 2 tristate "LogFS file system"
3 depends on (MTD || BLOCK) && EXPERIMENTAL 3 depends on (MTD || BLOCK)
4 select ZLIB_INFLATE 4 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
6 select CRC32 6 select CRC32
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f46..434b93ec0970 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1273,6 +1273,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
1273static struct pnfs_layoutdriver_type blocklayout_type = { 1273static struct pnfs_layoutdriver_type blocklayout_type = {
1274 .id = LAYOUT_BLOCK_VOLUME, 1274 .id = LAYOUT_BLOCK_VOLUME,
1275 .name = "LAYOUT_BLOCK_VOLUME", 1275 .name = "LAYOUT_BLOCK_VOLUME",
1276 .owner = THIS_MODULE,
1276 .read_pagelist = bl_read_pagelist, 1277 .read_pagelist = bl_read_pagelist,
1277 .write_pagelist = bl_write_pagelist, 1278 .write_pagelist = bl_write_pagelist,
1278 .alloc_layout_hdr = bl_alloc_layout_hdr, 1279 .alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c89b26bc9759..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183static u32 initiate_bulk_draining(struct nfs_client *clp, 183static u32 initiate_bulk_draining(struct nfs_client *clp,
184 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
185{ 185{
186 struct nfs_server *server; 186 int stat;
187 struct pnfs_layout_hdr *lo;
188 struct inode *ino;
189 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190 struct pnfs_layout_hdr *tmp;
191 LIST_HEAD(recall_list);
192 LIST_HEAD(free_me_list);
193 struct pnfs_layout_range range = {
194 .iomode = IOMODE_ANY,
195 .offset = 0,
196 .length = NFS4_MAX_UINT64,
197 };
198
199 spin_lock(&clp->cl_lock);
200 rcu_read_lock();
201 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202 if ((args->cbl_recall_type == RETURN_FSID) &&
203 memcmp(&server->fsid, &args->cbl_fsid,
204 sizeof(struct nfs_fsid)))
205 continue;
206
207 list_for_each_entry(lo, &server->layouts, plh_layouts) {
208 ino = igrab(lo->plh_inode);
209 if (ino)
210 continue;
211 spin_lock(&ino->i_lock);
212 /* Is this layout in the process of being freed? */
213 if (NFS_I(ino)->layout != lo) {
214 spin_unlock(&ino->i_lock);
215 iput(ino);
216 continue;
217 }
218 pnfs_get_layout_hdr(lo);
219 spin_unlock(&ino->i_lock);
220 list_add(&lo->plh_bulk_recall, &recall_list);
221 }
222 }
223 rcu_read_unlock();
224 spin_unlock(&clp->cl_lock);
225 187
226 list_for_each_entry_safe(lo, tmp, 188 if (args->cbl_recall_type == RETURN_FSID)
227 &recall_list, plh_bulk_recall) { 189 stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
228 ino = lo->plh_inode; 190 else
229 spin_lock(&ino->i_lock); 191 stat = pnfs_destroy_layouts_byclid(clp, true);
230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 192 if (stat != 0)
231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) 193 return NFS4ERR_DELAY;
232 rv = NFS4ERR_DELAY; 194 return NFS4ERR_NOMATCHING_LAYOUT;
233 list_del_init(&lo->plh_bulk_recall);
234 spin_unlock(&ino->i_lock);
235 pnfs_free_lseg_list(&free_me_list);
236 pnfs_put_layout_hdr(lo);
237 iput(ino);
238 }
239 return rv;
240} 195}
241 196
242static u32 do_callback_layoutrecall(struct nfs_client *clp, 197static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf38..6390a4b5fee7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -55,7 +55,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
55 flags &= FMODE_READ|FMODE_WRITE; 55 flags &= FMODE_READ|FMODE_WRITE;
56 rcu_read_lock(); 56 rcu_read_lock();
57 delegation = rcu_dereference(NFS_I(inode)->delegation); 57 delegation = rcu_dereference(NFS_I(inode)->delegation);
58 if (delegation != NULL && (delegation->type & flags) == flags) { 58 if (delegation != NULL && (delegation->type & flags) == flags &&
59 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
59 nfs_mark_delegation_referenced(delegation); 60 nfs_mark_delegation_referenced(delegation);
60 ret = 1; 61 ret = 1;
61 } 62 }
@@ -70,8 +71,10 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
70 int status = 0; 71 int status = 0;
71 72
72 if (inode->i_flock == NULL) 73 if (inode->i_flock == NULL)
73 goto out; 74 return 0;
74 75
76 if (inode->i_flock == NULL)
77 goto out;
75 /* Protect inode->i_flock using the file locks lock */ 78 /* Protect inode->i_flock using the file locks lock */
76 lock_flocks(); 79 lock_flocks();
77 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 80 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -94,7 +97,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
94{ 97{
95 struct nfs_inode *nfsi = NFS_I(inode); 98 struct nfs_inode *nfsi = NFS_I(inode);
96 struct nfs_open_context *ctx; 99 struct nfs_open_context *ctx;
100 struct nfs4_state_owner *sp;
97 struct nfs4_state *state; 101 struct nfs4_state *state;
102 unsigned int seq;
98 int err; 103 int err;
99 104
100again: 105again:
@@ -109,9 +114,16 @@ again:
109 continue; 114 continue;
110 get_nfs_open_context(ctx); 115 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 116 spin_unlock(&inode->i_lock);
117 sp = state->owner;
118 /* Block nfs4_proc_unlck */
119 mutex_lock(&sp->so_delegreturn_mutex);
120 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
112 err = nfs4_open_delegation_recall(ctx, state, stateid); 121 err = nfs4_open_delegation_recall(ctx, state, stateid);
113 if (err >= 0) 122 if (!err)
114 err = nfs_delegation_claim_locks(ctx, state); 123 err = nfs_delegation_claim_locks(ctx, state);
124 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
125 err = -EAGAIN;
126 mutex_unlock(&sp->so_delegreturn_mutex);
115 put_nfs_open_context(ctx); 127 put_nfs_open_context(ctx);
116 if (err != 0) 128 if (err != 0)
117 return err; 129 return err;
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
182} 194}
183 195
184static struct nfs_delegation * 196static struct nfs_delegation *
197nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
198{
199 struct nfs_delegation *ret = NULL;
200 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
201
202 if (delegation == NULL)
203 goto out;
204 spin_lock(&delegation->lock);
205 if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
206 ret = delegation;
207 spin_unlock(&delegation->lock);
208out:
209 return ret;
210}
211
212static struct nfs_delegation *
213nfs_start_delegation_return(struct nfs_inode *nfsi)
214{
215 struct nfs_delegation *delegation;
216
217 rcu_read_lock();
218 delegation = nfs_start_delegation_return_locked(nfsi);
219 rcu_read_unlock();
220 return delegation;
221}
222
223static void
224nfs_abort_delegation_return(struct nfs_delegation *delegation,
225 struct nfs_client *clp)
226{
227
228 spin_lock(&delegation->lock);
229 clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
230 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
231 spin_unlock(&delegation->lock);
232 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
233}
234
235static struct nfs_delegation *
185nfs_detach_delegation_locked(struct nfs_inode *nfsi, 236nfs_detach_delegation_locked(struct nfs_inode *nfsi,
186 struct nfs_server *server) 237 struct nfs_delegation *delegation,
238 struct nfs_client *clp)
187{ 239{
188 struct nfs_delegation *delegation = 240 struct nfs_delegation *deleg_cur =
189 rcu_dereference_protected(nfsi->delegation, 241 rcu_dereference_protected(nfsi->delegation,
190 lockdep_is_held(&server->nfs_client->cl_lock)); 242 lockdep_is_held(&clp->cl_lock));
191 243
192 if (delegation == NULL) 244 if (deleg_cur == NULL || delegation != deleg_cur)
193 goto nomatch; 245 return NULL;
194 246
195 spin_lock(&delegation->lock); 247 spin_lock(&delegation->lock);
248 set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
196 list_del_rcu(&delegation->super_list); 249 list_del_rcu(&delegation->super_list);
197 delegation->inode = NULL; 250 delegation->inode = NULL;
198 nfsi->delegation_state = 0; 251 nfsi->delegation_state = 0;
199 rcu_assign_pointer(nfsi->delegation, NULL); 252 rcu_assign_pointer(nfsi->delegation, NULL);
200 spin_unlock(&delegation->lock); 253 spin_unlock(&delegation->lock);
201 return delegation; 254 return delegation;
202nomatch:
203 return NULL;
204} 255}
205 256
206static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, 257static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
207 struct nfs_server *server) 258 struct nfs_delegation *delegation,
259 struct nfs_server *server)
208{ 260{
209 struct nfs_client *clp = server->nfs_client; 261 struct nfs_client *clp = server->nfs_client;
210 struct nfs_delegation *delegation;
211 262
212 spin_lock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
213 delegation = nfs_detach_delegation_locked(nfsi, server); 264 delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
214 spin_unlock(&clp->cl_lock); 265 spin_unlock(&clp->cl_lock);
215 return delegation; 266 return delegation;
216} 267}
217 268
269static struct nfs_delegation *
270nfs_inode_detach_delegation(struct inode *inode)
271{
272 struct nfs_inode *nfsi = NFS_I(inode);
273 struct nfs_server *server = NFS_SERVER(inode);
274 struct nfs_delegation *delegation;
275
276 delegation = nfs_start_delegation_return(nfsi);
277 if (delegation == NULL)
278 return NULL;
279 return nfs_detach_delegation(nfsi, delegation, server);
280}
281
218/** 282/**
219 * nfs_inode_set_delegation - set up a delegation on an inode 283 * nfs_inode_set_delegation - set up a delegation on an inode
220 * @inode: inode to which delegation applies 284 * @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
268 delegation = NULL; 332 delegation = NULL;
269 goto out; 333 goto out;
270 } 334 }
271 freeme = nfs_detach_delegation_locked(nfsi, server); 335 freeme = nfs_detach_delegation_locked(nfsi,
336 old_delegation, clp);
337 if (freeme == NULL)
338 goto out;
272 } 339 }
273 list_add_rcu(&delegation->super_list, &server->delegations); 340 list_add_rcu(&delegation->super_list, &server->delegations);
274 nfsi->delegation_state = delegation->type; 341 nfsi->delegation_state = delegation->type;
@@ -292,19 +359,29 @@ out:
292/* 359/*
293 * Basic procedure for returning a delegation to the server 360 * Basic procedure for returning a delegation to the server
294 */ 361 */
295static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 362static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
296{ 363{
364 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
297 struct nfs_inode *nfsi = NFS_I(inode); 365 struct nfs_inode *nfsi = NFS_I(inode);
298 int err; 366 int err;
299 367
300 /* 368 if (delegation == NULL)
301 * Guard against new delegated open/lock/unlock calls and against 369 return 0;
302 * state recovery 370 do {
303 */ 371 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
304 down_write(&nfsi->rwsem); 372 if (!issync || err != -EAGAIN)
305 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 373 break;
306 up_write(&nfsi->rwsem); 374 /*
307 if (err) 375 * Guard against state recovery
376 */
377 err = nfs4_wait_clnt_recover(clp);
378 } while (err == 0);
379
380 if (err) {
381 nfs_abort_delegation_return(delegation, clp);
382 goto out;
383 }
384 if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
308 goto out; 385 goto out;
309 386
310 err = nfs_do_return_delegation(inode, delegation, issync); 387 err = nfs_do_return_delegation(inode, delegation, issync);
@@ -340,13 +417,10 @@ restart:
340 inode = nfs_delegation_grab_inode(delegation); 417 inode = nfs_delegation_grab_inode(delegation);
341 if (inode == NULL) 418 if (inode == NULL)
342 continue; 419 continue;
343 delegation = nfs_detach_delegation(NFS_I(inode), 420 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
344 server);
345 rcu_read_unlock(); 421 rcu_read_unlock();
346 422
347 if (delegation != NULL) 423 err = nfs_end_delegation_return(inode, delegation, 0);
348 err = __nfs_inode_return_delegation(inode,
349 delegation, 0);
350 iput(inode); 424 iput(inode);
351 if (!err) 425 if (!err)
352 goto restart; 426 goto restart;
@@ -367,15 +441,11 @@ restart:
367 */ 441 */
368void nfs_inode_return_delegation_noreclaim(struct inode *inode) 442void nfs_inode_return_delegation_noreclaim(struct inode *inode)
369{ 443{
370 struct nfs_server *server = NFS_SERVER(inode);
371 struct nfs_inode *nfsi = NFS_I(inode);
372 struct nfs_delegation *delegation; 444 struct nfs_delegation *delegation;
373 445
374 if (rcu_access_pointer(nfsi->delegation) != NULL) { 446 delegation = nfs_inode_detach_delegation(inode);
375 delegation = nfs_detach_delegation(nfsi, server); 447 if (delegation != NULL)
376 if (delegation != NULL) 448 nfs_do_return_delegation(inode, delegation, 0);
377 nfs_do_return_delegation(inode, delegation, 0);
378 }
379} 449}
380 450
381/** 451/**
@@ -390,18 +460,14 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
390 */ 460 */
391int nfs4_inode_return_delegation(struct inode *inode) 461int nfs4_inode_return_delegation(struct inode *inode)
392{ 462{
393 struct nfs_server *server = NFS_SERVER(inode);
394 struct nfs_inode *nfsi = NFS_I(inode); 463 struct nfs_inode *nfsi = NFS_I(inode);
395 struct nfs_delegation *delegation; 464 struct nfs_delegation *delegation;
396 int err = 0; 465 int err = 0;
397 466
398 nfs_wb_all(inode); 467 nfs_wb_all(inode);
399 if (rcu_access_pointer(nfsi->delegation) != NULL) { 468 delegation = nfs_start_delegation_return(nfsi);
400 delegation = nfs_detach_delegation(nfsi, server); 469 if (delegation != NULL)
401 if (delegation != NULL) { 470 err = nfs_end_delegation_return(inode, delegation, 1);
402 err = __nfs_inode_return_delegation(inode, delegation, 1);
403 }
404 }
405 return err; 471 return err;
406} 472}
407 473
@@ -471,7 +537,7 @@ void nfs_remove_bad_delegation(struct inode *inode)
471{ 537{
472 struct nfs_delegation *delegation; 538 struct nfs_delegation *delegation;
473 539
474 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); 540 delegation = nfs_inode_detach_delegation(inode);
475 if (delegation) { 541 if (delegation) {
476 nfs_inode_find_state_and_recover(inode, &delegation->stateid); 542 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
477 nfs_free_delegation(delegation); 543 nfs_free_delegation(delegation);
@@ -649,7 +715,7 @@ restart:
649 if (inode == NULL) 715 if (inode == NULL)
650 continue; 716 continue;
651 delegation = nfs_detach_delegation(NFS_I(inode), 717 delegation = nfs_detach_delegation(NFS_I(inode),
652 server); 718 delegation, server);
653 rcu_read_unlock(); 719 rcu_read_unlock();
654 720
655 if (delegation != NULL) 721 if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d8..d54d4fca6793 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0, 29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN, 30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED, 31 NFS_DELEGATION_REFERENCED,
32 NFS_DELEGATION_RETURNING,
32}; 33};
33 34
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32e6c53520e2..1b2d7eb93796 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2153,12 +2153,16 @@ static int nfs_open_permission_mask(int openflags)
2153{ 2153{
2154 int mask = 0; 2154 int mask = 0;
2155 2155
2156 if ((openflags & O_ACCMODE) != O_WRONLY) 2156 if (openflags & __FMODE_EXEC) {
2157 mask |= MAY_READ; 2157 /* ONLY check exec rights */
2158 if ((openflags & O_ACCMODE) != O_RDONLY) 2158 mask = MAY_EXEC;
2159 mask |= MAY_WRITE; 2159 } else {
2160 if (openflags & __FMODE_EXEC) 2160 if ((openflags & O_ACCMODE) != O_WRONLY)
2161 mask |= MAY_EXEC; 2161 mask |= MAY_READ;
2162 if ((openflags & O_ACCMODE) != O_RDONLY)
2163 mask |= MAY_WRITE;
2164 }
2165
2162 return mask; 2166 return mask;
2163} 2167}
2164 2168
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c36644..44efaa8c5f78 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -126,8 +126,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
126 } 126 }
127 spin_unlock(&ret->d_lock); 127 spin_unlock(&ret->d_lock);
128out: 128out:
129 if (name) 129 kfree(name);
130 kfree(name);
131 nfs_free_fattr(fsinfo.fattr); 130 nfs_free_fattr(fsinfo.fattr);
132 return ret; 131 return ret;
133} 132}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ce2b0394c4d6..468ba8bf0f56 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -694,10 +694,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
694 if (ctx->cred != NULL) 694 if (ctx->cred != NULL)
695 put_rpccred(ctx->cred); 695 put_rpccred(ctx->cred);
696 dput(ctx->dentry); 696 dput(ctx->dentry);
697 if (is_sync) 697 nfs_sb_deactive(sb);
698 nfs_sb_deactive(sb);
699 else
700 nfs_sb_deactive_async(sb);
701 kfree(ctx->mdsthreshold); 698 kfree(ctx->mdsthreshold);
702 kfree(ctx); 699 kfree(ctx);
703} 700}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a07..541c9ebdbc5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -329,7 +329,6 @@ extern int __init register_nfs_fs(void);
329extern void __exit unregister_nfs_fs(void); 329extern void __exit unregister_nfs_fs(void);
330extern void nfs_sb_active(struct super_block *sb); 330extern void nfs_sb_active(struct super_block *sb);
331extern void nfs_sb_deactive(struct super_block *sb); 331extern void nfs_sb_deactive(struct super_block *sb);
332extern void nfs_sb_deactive_async(struct super_block *sb);
333 332
334/* namespace.c */ 333/* namespace.c */
335#define NFS_PATH_CANONICAL 1 334#define NFS_PATH_CANONICAL 1
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
177 return mnt; 177 return mnt;
178} 178}
179 179
180static int
181nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
182{
183 if (NFS_FH(dentry->d_inode)->size != 0)
184 return nfs_getattr(mnt, dentry, stat);
185 generic_fillattr(dentry->d_inode, stat);
186 return 0;
187}
188
189static int
190nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
191{
192 if (NFS_FH(dentry->d_inode)->size != 0)
193 return nfs_setattr(dentry, attr);
194 return -EACCES;
195}
196
180const struct inode_operations nfs_mountpoint_inode_operations = { 197const struct inode_operations nfs_mountpoint_inode_operations = {
181 .getattr = nfs_getattr, 198 .getattr = nfs_getattr,
199 .setattr = nfs_setattr,
182}; 200};
183 201
184const struct inode_operations nfs_referral_inode_operations = { 202const struct inode_operations nfs_referral_inode_operations = {
203 .getattr = nfs_namespace_getattr,
204 .setattr = nfs_namespace_setattr,
185}; 205};
186 206
187static void nfs_expire_automounts(struct work_struct *work) 207static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a2..944c9a5c1039 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,6 +13,8 @@
13 13
14#define NFS4_MAX_LOOP_ON_RECOVER (10) 14#define NFS4_MAX_LOOP_ON_RECOVER (10)
15 15
16#include <linux/seqlock.h>
17
16struct idmap; 18struct idmap;
17 19
18enum nfs4_client_state { 20enum nfs4_client_state {
@@ -90,6 +92,8 @@ struct nfs4_state_owner {
90 unsigned long so_flags; 92 unsigned long so_flags;
91 struct list_head so_states; 93 struct list_head so_states;
92 struct nfs_seqid_counter so_seqid; 94 struct nfs_seqid_counter so_seqid;
95 seqcount_t so_reclaim_seqcount;
96 struct mutex so_delegreturn_mutex;
93}; 97};
94 98
95enum { 99enum {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc347268124..2e9779b58b7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
236 error = nfs4_discover_server_trunking(clp, &old); 236 error = nfs4_discover_server_trunking(clp, &old);
237 if (error < 0) 237 if (error < 0)
238 goto error; 238 goto error;
239 nfs_put_client(clp);
239 if (clp != old) { 240 if (clp != old) {
240 clp->cl_preserve_clid = true; 241 clp->cl_preserve_clid = true;
241 nfs_put_client(clp);
242 clp = old; 242 clp = old;
243 atomic_inc(&clp->cl_count);
244 } 243 }
245 244
246 return clp; 245 return clp;
@@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
306 .clientid = new->cl_clientid, 305 .clientid = new->cl_clientid,
307 .confirm = new->cl_confirm, 306 .confirm = new->cl_confirm,
308 }; 307 };
309 int status; 308 int status = -NFS4ERR_STALE_CLIENTID;
310 309
311 spin_lock(&nn->nfs_client_lock); 310 spin_lock(&nn->nfs_client_lock);
312 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 311 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -332,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
332 331
333 if (prev) 332 if (prev)
334 nfs_put_client(prev); 333 nfs_put_client(prev);
334 prev = pos;
335 335
336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred); 336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
337 if (status == 0) { 337 switch (status) {
338 case -NFS4ERR_STALE_CLIENTID:
339 break;
340 case 0:
338 nfs4_swap_callback_idents(pos, new); 341 nfs4_swap_callback_idents(pos, new);
339 342
340 nfs_put_client(pos); 343 prev = NULL;
341 *result = pos; 344 *result = pos;
342 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 345 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
343 __func__, pos, atomic_read(&pos->cl_count)); 346 __func__, pos, atomic_read(&pos->cl_count));
344 return 0; 347 default:
345 } 348 goto out;
346 if (status != -NFS4ERR_STALE_CLIENTID) {
347 nfs_put_client(pos);
348 dprintk("NFS: <-- %s status = %d, no result\n",
349 __func__, status);
350 return status;
351 } 349 }
352 350
353 spin_lock(&nn->nfs_client_lock); 351 spin_lock(&nn->nfs_client_lock);
354 prev = pos;
355 } 352 }
353 spin_unlock(&nn->nfs_client_lock);
356 354
357 /* 355 /* No match found. The server lost our clientid */
358 * No matching nfs_client found. This should be impossible, 356out:
359 * because the new nfs_client has already been added to
360 * nfs_client_list by nfs_get_client().
361 *
362 * Don't BUG(), since the caller is holding a mutex.
363 */
364 if (prev) 357 if (prev)
365 nfs_put_client(prev); 358 nfs_put_client(prev);
366 spin_unlock(&nn->nfs_client_lock); 359 dprintk("NFS: <-- %s status = %d\n", __func__, status);
367 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 360 return status;
368 return -NFS4ERR_STALE_CLIENTID;
369} 361}
370 362
371#ifdef CONFIG_NFS_V4_1 363#ifdef CONFIG_NFS_V4_1
@@ -432,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
432{ 424{
433 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); 425 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
434 struct nfs_client *pos, *n, *prev = NULL; 426 struct nfs_client *pos, *n, *prev = NULL;
435 int error; 427 int status = -NFS4ERR_STALE_CLIENTID;
436 428
437 spin_lock(&nn->nfs_client_lock); 429 spin_lock(&nn->nfs_client_lock);
438 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 430 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -448,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
448 nfs_put_client(prev); 440 nfs_put_client(prev);
449 prev = pos; 441 prev = pos;
450 442
451 error = nfs_wait_client_init_complete(pos); 443 nfs4_schedule_lease_recovery(pos);
452 if (error < 0) { 444 status = nfs_wait_client_init_complete(pos);
445 if (status < 0) {
453 nfs_put_client(pos); 446 nfs_put_client(pos);
454 spin_lock(&nn->nfs_client_lock); 447 spin_lock(&nn->nfs_client_lock);
455 continue; 448 continue;
456 } 449 }
457 450 status = pos->cl_cons_state;
458 spin_lock(&nn->nfs_client_lock); 451 spin_lock(&nn->nfs_client_lock);
452 if (status < 0)
453 continue;
459 } 454 }
460 455
461 if (pos->rpc_ops != new->rpc_ops) 456 if (pos->rpc_ops != new->rpc_ops)
@@ -473,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
473 if (!nfs4_match_serverowners(pos, new)) 468 if (!nfs4_match_serverowners(pos, new))
474 continue; 469 continue;
475 470
471 atomic_inc(&pos->cl_count);
476 spin_unlock(&nn->nfs_client_lock); 472 spin_unlock(&nn->nfs_client_lock);
477 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 473 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
478 __func__, pos, atomic_read(&pos->cl_count)); 474 __func__, pos, atomic_read(&pos->cl_count));
@@ -481,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
481 return 0; 477 return 0;
482 } 478 }
483 479
484 /* 480 /* No matching nfs_client found. */
485 * No matching nfs_client found. This should be impossible,
486 * because the new nfs_client has already been added to
487 * nfs_client_list by nfs_get_client().
488 *
489 * Don't BUG(), since the caller is holding a mutex.
490 */
491 spin_unlock(&nn->nfs_client_lock); 481 spin_unlock(&nn->nfs_client_lock);
492 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 482 dprintk("NFS: <-- %s status = %d\n", __func__, status);
493 return -NFS4ERR_STALE_CLIENTID; 483 return status;
494} 484}
495#endif /* CONFIG_NFS_V4_1 */ 485#endif /* CONFIG_NFS_V4_1 */
496 486
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5d864fb36578..eae83bf96c6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -896,6 +896,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896 return 0; 896 return 0;
897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
898 return 0; 898 return 0;
899 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
900 return 0;
899 nfs_mark_delegation_referenced(delegation); 901 nfs_mark_delegation_referenced(delegation);
900 return 1; 902 return 1;
901} 903}
@@ -973,6 +975,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
973 975
974 spin_lock(&deleg_cur->lock); 976 spin_lock(&deleg_cur->lock);
975 if (nfsi->delegation != deleg_cur || 977 if (nfsi->delegation != deleg_cur ||
978 test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
976 (deleg_cur->type & fmode) != fmode) 979 (deleg_cur->type & fmode) != fmode)
977 goto no_delegation_unlock; 980 goto no_delegation_unlock;
978 981
@@ -1352,19 +1355,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1352 case -NFS4ERR_BAD_HIGH_SLOT: 1355 case -NFS4ERR_BAD_HIGH_SLOT:
1353 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1356 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1354 case -NFS4ERR_DEADSESSION: 1357 case -NFS4ERR_DEADSESSION:
1358 set_bit(NFS_DELEGATED_STATE, &state->flags);
1355 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 1359 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1360 err = -EAGAIN;
1356 goto out; 1361 goto out;
1357 case -NFS4ERR_STALE_CLIENTID: 1362 case -NFS4ERR_STALE_CLIENTID:
1358 case -NFS4ERR_STALE_STATEID: 1363 case -NFS4ERR_STALE_STATEID:
1364 set_bit(NFS_DELEGATED_STATE, &state->flags);
1359 case -NFS4ERR_EXPIRED: 1365 case -NFS4ERR_EXPIRED:
1360 /* Don't recall a delegation if it was lost */ 1366 /* Don't recall a delegation if it was lost */
1361 nfs4_schedule_lease_recovery(server->nfs_client); 1367 nfs4_schedule_lease_recovery(server->nfs_client);
1368 err = -EAGAIN;
1362 goto out; 1369 goto out;
1363 case -ERESTARTSYS:
1364 /*
1365 * The show must go on: exit, but mark the
1366 * stateid as needing recovery.
1367 */
1368 case -NFS4ERR_DELEG_REVOKED: 1370 case -NFS4ERR_DELEG_REVOKED:
1369 case -NFS4ERR_ADMIN_REVOKED: 1371 case -NFS4ERR_ADMIN_REVOKED:
1370 case -NFS4ERR_BAD_STATEID: 1372 case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1377,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1375 err = 0; 1377 err = 0;
1376 goto out; 1378 goto out;
1377 } 1379 }
1380 set_bit(NFS_DELEGATED_STATE, &state->flags);
1378 err = nfs4_handle_exception(server, err, &exception); 1381 err = nfs4_handle_exception(server, err, &exception);
1379 } while (exception.retry); 1382 } while (exception.retry);
1380out: 1383out:
@@ -1463,7 +1466,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1463 struct nfs4_state_owner *sp = data->owner; 1466 struct nfs4_state_owner *sp = data->owner;
1464 1467
1465 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 1468 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1466 return; 1469 goto out_wait;
1467 /* 1470 /*
1468 * Check if we still need to send an OPEN call, or if we can use 1471 * Check if we still need to send an OPEN call, or if we can use
1469 * a delegation instead. 1472 * a delegation instead.
@@ -1498,6 +1501,7 @@ unlock_no_action:
1498 rcu_read_unlock(); 1501 rcu_read_unlock();
1499out_no_action: 1502out_no_action:
1500 task->tk_action = NULL; 1503 task->tk_action = NULL;
1504out_wait:
1501 nfs4_sequence_done(task, &data->o_res.seq_res); 1505 nfs4_sequence_done(task, &data->o_res.seq_res);
1502} 1506}
1503 1507
@@ -1626,7 +1630,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1626 1630
1627static int nfs4_opendata_access(struct rpc_cred *cred, 1631static int nfs4_opendata_access(struct rpc_cred *cred,
1628 struct nfs4_opendata *opendata, 1632 struct nfs4_opendata *opendata,
1629 struct nfs4_state *state, fmode_t fmode) 1633 struct nfs4_state *state, fmode_t fmode,
1634 int openflags)
1630{ 1635{
1631 struct nfs_access_entry cache; 1636 struct nfs_access_entry cache;
1632 u32 mask; 1637 u32 mask;
@@ -1638,11 +1643,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1638 1643
1639 mask = 0; 1644 mask = 0;
1640 /* don't check MAY_WRITE - a newly created file may not have 1645 /* don't check MAY_WRITE - a newly created file may not have
1641 * write mode bits, but POSIX allows the creating process to write */ 1646 * write mode bits, but POSIX allows the creating process to write.
1642 if (fmode & FMODE_READ) 1647 * use openflags to check for exec, because fmode won't
1643 mask |= MAY_READ; 1648 * always have FMODE_EXEC set when file open for exec. */
1644 if (fmode & FMODE_EXEC) 1649 if (openflags & __FMODE_EXEC) {
1645 mask |= MAY_EXEC; 1650 /* ONLY check for exec rights */
1651 mask = MAY_EXEC;
1652 } else if (fmode & FMODE_READ)
1653 mask = MAY_READ;
1646 1654
1647 cache.cred = cred; 1655 cache.cred = cred;
1648 cache.jiffies = jiffies; 1656 cache.jiffies = jiffies;
@@ -1841,6 +1849,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1841 sattr->ia_valid |= ATTR_MTIME; 1849 sattr->ia_valid |= ATTR_MTIME;
1842} 1850}
1843 1851
1852static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
1853 fmode_t fmode,
1854 int flags,
1855 struct nfs4_state **res)
1856{
1857 struct nfs4_state_owner *sp = opendata->owner;
1858 struct nfs_server *server = sp->so_server;
1859 struct nfs4_state *state;
1860 unsigned int seq;
1861 int ret;
1862
1863 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
1864
1865 ret = _nfs4_proc_open(opendata);
1866 if (ret != 0)
1867 goto out;
1868
1869 state = nfs4_opendata_to_nfs4_state(opendata);
1870 ret = PTR_ERR(state);
1871 if (IS_ERR(state))
1872 goto out;
1873 if (server->caps & NFS_CAP_POSIX_LOCK)
1874 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1875
1876 ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
1877 if (ret != 0)
1878 goto out;
1879
1880 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
1881 nfs4_schedule_stateid_recovery(server, state);
1882 nfs4_wait_clnt_recover(server->nfs_client);
1883 }
1884 *res = state;
1885out:
1886 return ret;
1887}
1888
1844/* 1889/*
1845 * Returns a referenced nfs4_state 1890 * Returns a referenced nfs4_state
1846 */ 1891 */
@@ -1885,18 +1930,7 @@ static int _nfs4_do_open(struct inode *dir,
1885 if (dentry->d_inode != NULL) 1930 if (dentry->d_inode != NULL)
1886 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1931 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1887 1932
1888 status = _nfs4_proc_open(opendata); 1933 status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
1889 if (status != 0)
1890 goto err_opendata_put;
1891
1892 state = nfs4_opendata_to_nfs4_state(opendata);
1893 status = PTR_ERR(state);
1894 if (IS_ERR(state))
1895 goto err_opendata_put;
1896 if (server->caps & NFS_CAP_POSIX_LOCK)
1897 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1898
1899 status = nfs4_opendata_access(cred, opendata, state, fmode);
1900 if (status != 0) 1934 if (status != 0)
1901 goto err_opendata_put; 1935 goto err_opendata_put;
1902 1936
@@ -2084,7 +2118,7 @@ static void nfs4_free_closedata(void *data)
2084 nfs4_put_open_state(calldata->state); 2118 nfs4_put_open_state(calldata->state);
2085 nfs_free_seqid(calldata->arg.seqid); 2119 nfs_free_seqid(calldata->arg.seqid);
2086 nfs4_put_state_owner(sp); 2120 nfs4_put_state_owner(sp);
2087 nfs_sb_deactive_async(sb); 2121 nfs_sb_deactive(sb);
2088 kfree(calldata); 2122 kfree(calldata);
2089} 2123}
2090 2124
@@ -2146,7 +2180,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2146 2180
2147 dprintk("%s: begin!\n", __func__); 2181 dprintk("%s: begin!\n", __func__);
2148 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2182 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
2149 return; 2183 goto out_wait;
2150 2184
2151 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2185 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2152 calldata->arg.fmode = FMODE_READ|FMODE_WRITE; 2186 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2168,16 +2202,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2168 2202
2169 if (!call_close) { 2203 if (!call_close) {
2170 /* Note: exit _without_ calling nfs4_close_done */ 2204 /* Note: exit _without_ calling nfs4_close_done */
2171 task->tk_action = NULL; 2205 goto out_no_action;
2172 nfs4_sequence_done(task, &calldata->res.seq_res);
2173 goto out;
2174 } 2206 }
2175 2207
2176 if (calldata->arg.fmode == 0) { 2208 if (calldata->arg.fmode == 0) {
2177 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2209 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2178 if (calldata->roc && 2210 if (calldata->roc &&
2179 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) 2211 pnfs_roc_drain(inode, &calldata->roc_barrier, task))
2180 goto out; 2212 goto out_wait;
2181 } 2213 }
2182 2214
2183 nfs_fattr_init(calldata->res.fattr); 2215 nfs_fattr_init(calldata->res.fattr);
@@ -2187,8 +2219,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2187 &calldata->res.seq_res, 2219 &calldata->res.seq_res,
2188 task) != 0) 2220 task) != 0)
2189 nfs_release_seqid(calldata->arg.seqid); 2221 nfs_release_seqid(calldata->arg.seqid);
2190out:
2191 dprintk("%s: done!\n", __func__); 2222 dprintk("%s: done!\n", __func__);
2223 return;
2224out_no_action:
2225 task->tk_action = NULL;
2226out_wait:
2227 nfs4_sequence_done(task, &calldata->res.seq_res);
2192} 2228}
2193 2229
2194static const struct rpc_call_ops nfs4_close_ops = { 2230static const struct rpc_call_ops nfs4_close_ops = {
@@ -4419,12 +4455,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4419 struct nfs4_unlockdata *calldata = data; 4455 struct nfs4_unlockdata *calldata = data;
4420 4456
4421 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 4457 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
4422 return; 4458 goto out_wait;
4423 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { 4459 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
4424 /* Note: exit _without_ running nfs4_locku_done */ 4460 /* Note: exit _without_ running nfs4_locku_done */
4425 task->tk_action = NULL; 4461 goto out_no_action;
4426 nfs4_sequence_done(task, &calldata->res.seq_res);
4427 return;
4428 } 4462 }
4429 calldata->timestamp = jiffies; 4463 calldata->timestamp = jiffies;
4430 if (nfs4_setup_sequence(calldata->server, 4464 if (nfs4_setup_sequence(calldata->server,
@@ -4432,6 +4466,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4432 &calldata->res.seq_res, 4466 &calldata->res.seq_res,
4433 task) != 0) 4467 task) != 0)
4434 nfs_release_seqid(calldata->arg.seqid); 4468 nfs_release_seqid(calldata->arg.seqid);
4469 return;
4470out_no_action:
4471 task->tk_action = NULL;
4472out_wait:
4473 nfs4_sequence_done(task, &calldata->res.seq_res);
4435} 4474}
4436 4475
4437static const struct rpc_call_ops nfs4_locku_ops = { 4476static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4478,7 +4517,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4478 4517
4479static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 4518static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
4480{ 4519{
4481 struct nfs_inode *nfsi = NFS_I(state->inode); 4520 struct inode *inode = state->inode;
4521 struct nfs4_state_owner *sp = state->owner;
4522 struct nfs_inode *nfsi = NFS_I(inode);
4482 struct nfs_seqid *seqid; 4523 struct nfs_seqid *seqid;
4483 struct nfs4_lock_state *lsp; 4524 struct nfs4_lock_state *lsp;
4484 struct rpc_task *task; 4525 struct rpc_task *task;
@@ -4488,12 +4529,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
4488 status = nfs4_set_lock_state(state, request); 4529 status = nfs4_set_lock_state(state, request);
4489 /* Unlock _before_ we do the RPC call */ 4530 /* Unlock _before_ we do the RPC call */
4490 request->fl_flags |= FL_EXISTS; 4531 request->fl_flags |= FL_EXISTS;
4532 /* Exclude nfs_delegation_claim_locks() */
4533 mutex_lock(&sp->so_delegreturn_mutex);
4534 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
4491 down_read(&nfsi->rwsem); 4535 down_read(&nfsi->rwsem);
4492 if (do_vfs_lock(request->fl_file, request) == -ENOENT) { 4536 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
4493 up_read(&nfsi->rwsem); 4537 up_read(&nfsi->rwsem);
4538 mutex_unlock(&sp->so_delegreturn_mutex);
4494 goto out; 4539 goto out;
4495 } 4540 }
4496 up_read(&nfsi->rwsem); 4541 up_read(&nfsi->rwsem);
4542 mutex_unlock(&sp->so_delegreturn_mutex);
4497 if (status != 0) 4543 if (status != 0)
4498 goto out; 4544 goto out;
4499 /* Is this a delegated lock? */ 4545 /* Is this a delegated lock? */
@@ -4572,7 +4618,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4572 4618
4573 dprintk("%s: begin!\n", __func__); 4619 dprintk("%s: begin!\n", __func__);
4574 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) 4620 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
4575 return; 4621 goto out_wait;
4576 /* Do we need to do an open_to_lock_owner? */ 4622 /* Do we need to do an open_to_lock_owner? */
4577 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4623 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4578 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { 4624 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4592,6 +4638,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4592 nfs_release_seqid(data->arg.open_seqid); 4638 nfs_release_seqid(data->arg.open_seqid);
4593out_release_lock_seqid: 4639out_release_lock_seqid:
4594 nfs_release_seqid(data->arg.lock_seqid); 4640 nfs_release_seqid(data->arg.lock_seqid);
4641out_wait:
4642 nfs4_sequence_done(task, &data->res.seq_res);
4595 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4643 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
4596} 4644}
4597 4645
@@ -4809,8 +4857,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
4809 4857
4810static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4858static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4811{ 4859{
4860 struct nfs4_state_owner *sp = state->owner;
4812 struct nfs_inode *nfsi = NFS_I(state->inode); 4861 struct nfs_inode *nfsi = NFS_I(state->inode);
4813 unsigned char fl_flags = request->fl_flags; 4862 unsigned char fl_flags = request->fl_flags;
4863 unsigned int seq;
4814 int status = -ENOLCK; 4864 int status = -ENOLCK;
4815 4865
4816 if ((fl_flags & FL_POSIX) && 4866 if ((fl_flags & FL_POSIX) &&
@@ -4832,9 +4882,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4832 status = do_vfs_lock(request->fl_file, request); 4882 status = do_vfs_lock(request->fl_file, request);
4833 goto out_unlock; 4883 goto out_unlock;
4834 } 4884 }
4885 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
4886 up_read(&nfsi->rwsem);
4835 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); 4887 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4836 if (status != 0) 4888 if (status != 0)
4889 goto out;
4890 down_read(&nfsi->rwsem);
4891 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
4892 status = -NFS4ERR_DELAY;
4837 goto out_unlock; 4893 goto out_unlock;
4894 }
4838 /* Note: we always want to sleep here! */ 4895 /* Note: we always want to sleep here! */
4839 request->fl_flags = fl_flags | FL_SLEEP; 4896 request->fl_flags = fl_flags | FL_SLEEP;
4840 if (do_vfs_lock(request->fl_file, request) < 0) 4897 if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4941,24 +4998,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4941 case 0: 4998 case 0:
4942 case -ESTALE: 4999 case -ESTALE:
4943 goto out; 5000 goto out;
4944 case -NFS4ERR_EXPIRED:
4945 nfs4_schedule_stateid_recovery(server, state);
4946 case -NFS4ERR_STALE_CLIENTID: 5001 case -NFS4ERR_STALE_CLIENTID:
4947 case -NFS4ERR_STALE_STATEID: 5002 case -NFS4ERR_STALE_STATEID:
5003 set_bit(NFS_DELEGATED_STATE, &state->flags);
5004 case -NFS4ERR_EXPIRED:
4948 nfs4_schedule_lease_recovery(server->nfs_client); 5005 nfs4_schedule_lease_recovery(server->nfs_client);
5006 err = -EAGAIN;
4949 goto out; 5007 goto out;
4950 case -NFS4ERR_BADSESSION: 5008 case -NFS4ERR_BADSESSION:
4951 case -NFS4ERR_BADSLOT: 5009 case -NFS4ERR_BADSLOT:
4952 case -NFS4ERR_BAD_HIGH_SLOT: 5010 case -NFS4ERR_BAD_HIGH_SLOT:
4953 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 5011 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4954 case -NFS4ERR_DEADSESSION: 5012 case -NFS4ERR_DEADSESSION:
5013 set_bit(NFS_DELEGATED_STATE, &state->flags);
4955 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 5014 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
5015 err = -EAGAIN;
4956 goto out; 5016 goto out;
4957 case -ERESTARTSYS:
4958 /*
4959 * The show must go on: exit, but mark the
4960 * stateid as needing recovery.
4961 */
4962 case -NFS4ERR_DELEG_REVOKED: 5017 case -NFS4ERR_DELEG_REVOKED:
4963 case -NFS4ERR_ADMIN_REVOKED: 5018 case -NFS4ERR_ADMIN_REVOKED:
4964 case -NFS4ERR_BAD_STATEID: 5019 case -NFS4ERR_BAD_STATEID:
@@ -4971,9 +5026,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4971 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 5026 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4972 err = 0; 5027 err = 0;
4973 goto out; 5028 goto out;
4974 case -NFS4ERR_DELAY:
4975 break;
4976 } 5029 }
5030 set_bit(NFS_DELEGATED_STATE, &state->flags);
4977 err = nfs4_handle_exception(server, err, &exception); 5031 err = nfs4_handle_exception(server, err, &exception);
4978 } while (exception.retry); 5032 } while (exception.retry);
4979out: 5033out:
@@ -6130,7 +6184,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6130 status = nfs4_wait_for_completion_rpc_task(task); 6184 status = nfs4_wait_for_completion_rpc_task(task);
6131 if (status == 0) 6185 if (status == 0)
6132 status = task->tk_status; 6186 status = task->tk_status;
6133 if (status == 0) 6187 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
6188 if (status == 0 && lgp->res.layoutp->len)
6134 lseg = pnfs_layout_process(lgp); 6189 lseg = pnfs_layout_process(lgp);
6135 rpc_put_task(task); 6190 rpc_put_task(task);
6136 dprintk("<-- %s status=%d\n", __func__, status); 6191 dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..6ace365c6334 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
136 clp->cl_confirm = clid.confirm; 136 clp->cl_confirm = clid.confirm;
137 137
138 status = nfs40_walk_client_list(clp, result, cred); 138 status = nfs40_walk_client_list(clp, result, cred);
139 switch (status) { 139 if (status == 0) {
140 case -NFS4ERR_STALE_CLIENTID:
141 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
142 case 0:
143 /* Sustain the lease, even if it's empty. If the clientid4 140 /* Sustain the lease, even if it's empty. If the clientid4
144 * goes stale it's of no use for trunking discovery. */ 141 * goes stale it's of no use for trunking discovery. */
145 nfs4_schedule_state_renewal(*result); 142 nfs4_schedule_state_renewal(*result);
146 break;
147 } 143 }
148
149out: 144out:
150 return status; 145 return status;
151} 146}
@@ -523,6 +518,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
523 nfs4_init_seqid_counter(&sp->so_seqid); 518 nfs4_init_seqid_counter(&sp->so_seqid);
524 atomic_set(&sp->so_count, 1); 519 atomic_set(&sp->so_count, 1);
525 INIT_LIST_HEAD(&sp->so_lru); 520 INIT_LIST_HEAD(&sp->so_lru);
521 seqcount_init(&sp->so_reclaim_seqcount);
522 mutex_init(&sp->so_delegreturn_mutex);
526 return sp; 523 return sp;
527} 524}
528 525
@@ -1395,8 +1392,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1395 * recovering after a network partition or a reboot from a 1392 * recovering after a network partition or a reboot from a
1396 * server that doesn't support a grace period. 1393 * server that doesn't support a grace period.
1397 */ 1394 */
1398restart:
1399 spin_lock(&sp->so_lock); 1395 spin_lock(&sp->so_lock);
1396 write_seqcount_begin(&sp->so_reclaim_seqcount);
1397restart:
1400 list_for_each_entry(state, &sp->so_states, open_states) { 1398 list_for_each_entry(state, &sp->so_states, open_states) {
1401 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1399 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1402 continue; 1400 continue;
@@ -1417,6 +1415,7 @@ restart:
1417 } 1415 }
1418 spin_unlock(&state->state_lock); 1416 spin_unlock(&state->state_lock);
1419 nfs4_put_open_state(state); 1417 nfs4_put_open_state(state);
1418 spin_lock(&sp->so_lock);
1420 goto restart; 1419 goto restart;
1421 } 1420 }
1422 } 1421 }
@@ -1454,12 +1453,17 @@ restart:
1454 goto out_err; 1453 goto out_err;
1455 } 1454 }
1456 nfs4_put_open_state(state); 1455 nfs4_put_open_state(state);
1456 spin_lock(&sp->so_lock);
1457 goto restart; 1457 goto restart;
1458 } 1458 }
1459 write_seqcount_end(&sp->so_reclaim_seqcount);
1459 spin_unlock(&sp->so_lock); 1460 spin_unlock(&sp->so_lock);
1460 return 0; 1461 return 0;
1461out_err: 1462out_err:
1462 nfs4_put_open_state(state); 1463 nfs4_put_open_state(state);
1464 spin_lock(&sp->so_lock);
1465 write_seqcount_end(&sp->so_reclaim_seqcount);
1466 spin_unlock(&sp->so_lock);
1463 return status; 1467 return status;
1464} 1468}
1465 1469
@@ -1863,6 +1867,7 @@ again:
1863 case -ETIMEDOUT: 1867 case -ETIMEDOUT:
1864 case -EAGAIN: 1868 case -EAGAIN:
1865 ssleep(1); 1869 ssleep(1);
1870 case -NFS4ERR_STALE_CLIENTID:
1866 dprintk("NFS: %s after status %d, retrying\n", 1871 dprintk("NFS: %s after status %d, retrying\n",
1867 __func__, status); 1872 __func__, status);
1868 goto again; 1873 goto again;
@@ -2022,8 +2027,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
2022 nfs4_begin_drain_session(clp); 2027 nfs4_begin_drain_session(clp);
2023 cred = nfs4_get_exchange_id_cred(clp); 2028 cred = nfs4_get_exchange_id_cred(clp);
2024 status = nfs4_proc_destroy_session(clp->cl_session, cred); 2029 status = nfs4_proc_destroy_session(clp->cl_session, cred);
2025 if (status && status != -NFS4ERR_BADSESSION && 2030 switch (status) {
2026 status != -NFS4ERR_DEADSESSION) { 2031 case 0:
2032 case -NFS4ERR_BADSESSION:
2033 case -NFS4ERR_DEADSESSION:
2034 break;
2035 case -NFS4ERR_BACK_CHAN_BUSY:
2036 case -NFS4ERR_DELAY:
2037 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2038 status = 0;
2039 ssleep(1);
2040 goto out;
2041 default:
2027 status = nfs4_recovery_handle_error(clp, status); 2042 status = nfs4_recovery_handle_error(clp, status);
2028 goto out; 2043 goto out;
2029 } 2044 }
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f89..88f9611a945c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -647,6 +647,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
647 .flags = PNFS_LAYOUTRET_ON_SETATTR | 647 .flags = PNFS_LAYOUTRET_ON_SETATTR |
648 PNFS_LAYOUTRET_ON_ERROR, 648 PNFS_LAYOUTRET_ON_ERROR,
649 649
650 .owner = THIS_MODULE,
650 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 651 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
651 .free_layout_hdr = objlayout_free_layout_hdr, 652 .free_layout_hdr = objlayout_free_layout_hdr,
652 653
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e7165d915362..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -254,7 +254,7 @@ static void
254pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 254pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
255{ 255{
256 lo->plh_retry_timestamp = jiffies; 256 lo->plh_retry_timestamp = jiffies;
257 if (test_and_set_bit(fail_bit, &lo->plh_flags)) 257 if (!test_and_set_bit(fail_bit, &lo->plh_flags))
258 atomic_inc(&lo->plh_refcount); 258 atomic_inc(&lo->plh_refcount);
259} 259}
260 260
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505} 505}
506EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 506EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507 507
508/* 508static bool
509 * Called by the state manger to remove all layouts established under an 509pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510 * expired lease. 510 struct list_head *layout_list)
511 */
512void
513pnfs_destroy_all_layouts(struct nfs_client *clp)
514{ 511{
515 struct nfs_server *server;
516 struct pnfs_layout_hdr *lo; 512 struct pnfs_layout_hdr *lo;
517 LIST_HEAD(tmp_list); 513 bool ret = false;
518 514
519 nfs4_deviceid_mark_client_invalid(clp); 515 spin_lock(&inode->i_lock);
520 nfs4_deviceid_purge_client(clp); 516 lo = NFS_I(inode)->layout;
517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518 pnfs_get_layout_hdr(lo);
519 list_add(&lo->plh_bulk_destroy, layout_list);
520 ret = true;
521 }
522 spin_unlock(&inode->i_lock);
523 return ret;
524}
525
526/* Caller must hold rcu_read_lock and clp->cl_lock */
527static int
528pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529 struct nfs_server *server,
530 struct list_head *layout_list)
531{
532 struct pnfs_layout_hdr *lo, *next;
533 struct inode *inode;
534
535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536 inode = igrab(lo->plh_inode);
537 if (inode == NULL)
538 continue;
539 list_del_init(&lo->plh_layouts);
540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541 continue;
542 rcu_read_unlock();
543 spin_unlock(&clp->cl_lock);
544 iput(inode);
545 spin_lock(&clp->cl_lock);
546 rcu_read_lock();
547 return -EAGAIN;
548 }
549 return 0;
550}
551
552static int
553pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554 bool is_bulk_recall)
555{
556 struct pnfs_layout_hdr *lo;
557 struct inode *inode;
558 struct pnfs_layout_range range = {
559 .iomode = IOMODE_ANY,
560 .offset = 0,
561 .length = NFS4_MAX_UINT64,
562 };
563 LIST_HEAD(lseg_list);
564 int ret = 0;
565
566 while (!list_empty(layout_list)) {
567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568 plh_bulk_destroy);
569 dprintk("%s freeing layout for inode %lu\n", __func__,
570 lo->plh_inode->i_ino);
571 inode = lo->plh_inode;
572 spin_lock(&inode->i_lock);
573 list_del_init(&lo->plh_bulk_destroy);
574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575 if (is_bulk_recall)
576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578 ret = -EAGAIN;
579 spin_unlock(&inode->i_lock);
580 pnfs_free_lseg_list(&lseg_list);
581 pnfs_put_layout_hdr(lo);
582 iput(inode);
583 }
584 return ret;
585}
586
587int
588pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589 struct nfs_fsid *fsid,
590 bool is_recall)
591{
592 struct nfs_server *server;
593 LIST_HEAD(layout_list);
521 594
522 spin_lock(&clp->cl_lock); 595 spin_lock(&clp->cl_lock);
523 rcu_read_lock(); 596 rcu_read_lock();
597restart:
524 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525 if (!list_empty(&server->layouts)) 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
526 list_splice_init(&server->layouts, &tmp_list); 600 continue;
601 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602 server,
603 &layout_list) != 0)
604 goto restart;
527 } 605 }
528 rcu_read_unlock(); 606 rcu_read_unlock();
529 spin_unlock(&clp->cl_lock); 607 spin_unlock(&clp->cl_lock);
530 608
531 while (!list_empty(&tmp_list)) { 609 if (list_empty(&layout_list))
532 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 610 return 0;
533 plh_layouts); 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
534 dprintk("%s freeing layout for inode %lu\n", __func__, 612}
535 lo->plh_inode->i_ino); 613
536 list_del_init(&lo->plh_layouts); 614int
537 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 615pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616 bool is_recall)
617{
618 struct nfs_server *server;
619 LIST_HEAD(layout_list);
620
621 spin_lock(&clp->cl_lock);
622 rcu_read_lock();
623restart:
624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626 server,
627 &layout_list) != 0)
628 goto restart;
538 } 629 }
630 rcu_read_unlock();
631 spin_unlock(&clp->cl_lock);
632
633 if (list_empty(&layout_list))
634 return 0;
635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636}
637
638/*
639 * Called by the state manger to remove all layouts established under an
640 * expired lease.
641 */
642void
643pnfs_destroy_all_layouts(struct nfs_client *clp)
644{
645 nfs4_deviceid_mark_client_invalid(clp);
646 nfs4_deviceid_purge_client(clp);
647
648 pnfs_destroy_layouts_byclid(clp, false);
539} 649}
540 650
541/* 651/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888 atomic_set(&lo->plh_refcount, 1); 998 atomic_set(&lo->plh_refcount, 1);
889 INIT_LIST_HEAD(&lo->plh_layouts); 999 INIT_LIST_HEAD(&lo->plh_layouts);
890 INIT_LIST_HEAD(&lo->plh_segs); 1000 INIT_LIST_HEAD(&lo->plh_segs);
891 INIT_LIST_HEAD(&lo->plh_bulk_recall); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy);
892 lo->plh_inode = ino; 1002 lo->plh_inode = ino;
893 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
894 return lo; 1004 return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132struct pnfs_layout_hdr { 132struct pnfs_layout_hdr {
133 atomic_t plh_refcount; 133 atomic_t plh_refcount;
134 struct list_head plh_layouts; /* other client layouts */ 134 struct list_head plh_layouts; /* other client layouts */
135 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ 135 struct list_head plh_bulk_destroy;
136 struct list_head plh_segs; /* layout segments list */ 136 struct list_head plh_segs; /* layout segments list */
137 nfs4_stateid plh_stateid; 137 nfs4_stateid plh_stateid;
138 atomic_t plh_outstanding; /* number of RPCs out */ 138 atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 196void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 197void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 198void pnfs_destroy_all_layouts(struct nfs_client *);
199int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200 struct nfs_fsid *fsid,
201 bool is_recall);
202int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203 bool is_recall);
199void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); 204void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 205void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 206 const nfs4_stateid *new,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b6bdb18e892c..a5e5d9899d56 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -91,12 +91,16 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
91 put_nfs_open_context(rdata->args.context); 91 put_nfs_open_context(rdata->args.context);
92 if (rdata->pages.pagevec != rdata->pages.page_array) 92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec); 93 kfree(rdata->pages.pagevec);
94 if (rdata != &read_header->rpc_data) 94 if (rdata == &read_header->rpc_data) {
95 kfree(rdata);
96 else
97 rdata->header = NULL; 95 rdata->header = NULL;
96 rdata = NULL;
97 }
98 if (atomic_dec_and_test(&hdr->refcnt)) 98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr); 99 hdr->completion_ops->completion(hdr);
100 /* Note: we only free the rpc_task after callbacks are done.
101 * See the comment in rpc_free_task() for why
102 */
103 kfree(rdata);
100} 104}
101EXPORT_SYMBOL_GPL(nfs_readdata_release); 105EXPORT_SYMBOL_GPL(nfs_readdata_release);
102 106
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c25cadf8f8c4..befbae0cce41 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,7 +54,6 @@
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
57#include <linux/kthread.h>
58 57
59#include <asm/uaccess.h> 58#include <asm/uaccess.h>
60 59
@@ -418,54 +417,6 @@ void nfs_sb_deactive(struct super_block *sb)
418} 417}
419EXPORT_SYMBOL_GPL(nfs_sb_deactive); 418EXPORT_SYMBOL_GPL(nfs_sb_deactive);
420 419
421static int nfs_deactivate_super_async_work(void *ptr)
422{
423 struct super_block *sb = ptr;
424
425 deactivate_super(sb);
426 module_put_and_exit(0);
427 return 0;
428}
429
430/*
431 * same effect as deactivate_super, but will do final unmount in kthread
432 * context
433 */
434static void nfs_deactivate_super_async(struct super_block *sb)
435{
436 struct task_struct *task;
437 char buf[INET6_ADDRSTRLEN + 1];
438 struct nfs_server *server = NFS_SB(sb);
439 struct nfs_client *clp = server->nfs_client;
440
441 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
442 rcu_read_lock();
443 snprintf(buf, sizeof(buf),
444 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
445 rcu_read_unlock();
446
447 __module_get(THIS_MODULE);
448 task = kthread_run(nfs_deactivate_super_async_work, sb,
449 "%s-deactivate-super", buf);
450 if (IS_ERR(task)) {
451 pr_err("%s: kthread_run: %ld\n",
452 __func__, PTR_ERR(task));
453 /* make synchronous call and hope for the best */
454 deactivate_super(sb);
455 module_put(THIS_MODULE);
456 }
457 }
458}
459
460void nfs_sb_deactive_async(struct super_block *sb)
461{
462 struct nfs_server *server = NFS_SB(sb);
463
464 if (atomic_dec_and_test(&server->active))
465 nfs_deactivate_super_async(sb);
466}
467EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
468
469/* 420/*
470 * Deliver file system statistics to userspace 421 * Deliver file system statistics to userspace
471 */ 422 */
@@ -1152,7 +1103,7 @@ static int nfs_get_option_str(substring_t args[], char **option)
1152{ 1103{
1153 kfree(*option); 1104 kfree(*option);
1154 *option = match_strdup(args); 1105 *option = match_strdup(args);
1155 return !option; 1106 return !*option;
1156} 1107}
1157 1108
1158static int nfs_get_option_ul(substring_t args[], unsigned long *option) 1109static int nfs_get_option_ul(substring_t args[], unsigned long *option)
@@ -2589,27 +2540,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2589 struct nfs_server *server; 2540 struct nfs_server *server;
2590 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2541 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2591 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; 2542 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
2592 int error;
2593 2543
2594 dprintk("--> nfs_xdev_mount_common()\n"); 2544 dprintk("--> nfs_xdev_mount()\n");
2595 2545
2596 mount_info.mntfh = mount_info.cloned->fh; 2546 mount_info.mntfh = mount_info.cloned->fh;
2597 2547
2598 /* create a new volume representation */ 2548 /* create a new volume representation */
2599 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2549 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2600 if (IS_ERR(server)) {
2601 error = PTR_ERR(server);
2602 goto out_err;
2603 }
2604 2550
2605 mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); 2551 if (IS_ERR(server))
2606 dprintk("<-- nfs_xdev_mount_common() = 0\n"); 2552 mntroot = ERR_CAST(server);
2607out: 2553 else
2608 return mntroot; 2554 mntroot = nfs_fs_mount_common(server, flags,
2555 dev_name, &mount_info, nfs_mod);
2609 2556
2610out_err: 2557 dprintk("<-- nfs_xdev_mount() = %ld\n",
2611 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); 2558 IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
2612 goto out; 2559 return mntroot;
2613} 2560}
2614 2561
2615#if IS_ENABLED(CONFIG_NFS_V4) 2562#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b8..d26a32f5b53b 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive_async(sb); 98 nfs_sb_deactive(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -268,8 +268,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
268 * point dentry is definitely not a root, so we won't need 268 * point dentry is definitely not a root, so we won't need
269 * that anymore. 269 * that anymore.
270 */ 270 */
271 if (devname_garbage) 271 kfree(devname_garbage);
272 kfree(devname_garbage);
273 return 0; 272 return 0;
274out_unlock: 273out_unlock:
275 spin_unlock(&dentry->d_lock); 274 spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b673be31590e..c483cc50b82e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -126,12 +126,16 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
126 put_nfs_open_context(wdata->args.context); 126 put_nfs_open_context(wdata->args.context);
127 if (wdata->pages.pagevec != wdata->pages.page_array) 127 if (wdata->pages.pagevec != wdata->pages.page_array)
128 kfree(wdata->pages.pagevec); 128 kfree(wdata->pages.pagevec);
129 if (wdata != &write_header->rpc_data) 129 if (wdata == &write_header->rpc_data) {
130 kfree(wdata);
131 else
132 wdata->header = NULL; 130 wdata->header = NULL;
131 wdata = NULL;
132 }
133 if (atomic_dec_and_test(&hdr->refcnt)) 133 if (atomic_dec_and_test(&hdr->refcnt))
134 hdr->completion_ops->completion(hdr); 134 hdr->completion_ops->completion(hdr);
135 /* Note: we only free the rpc_task after callbacks are done.
136 * See the comment in rpc_free_task() for why
137 */
138 kfree(wdata);
135} 139}
136EXPORT_SYMBOL_GPL(nfs_writedata_release); 140EXPORT_SYMBOL_GPL(nfs_writedata_release);
137 141
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 8df1ea4a6ff9..430b6872806f 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -65,8 +65,8 @@ config NFSD_V3_ACL
65 If unsure, say N. 65 If unsure, say N.
66 66
67config NFSD_V4 67config NFSD_V4
68 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" 68 bool "NFS server support for NFS version 4"
69 depends on NFSD && PROC_FS && EXPERIMENTAL 69 depends on NFSD && PROC_FS
70 select NFSD_V3 70 select NFSD_V3
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS 72 select SUNRPC_GSS
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0af6d3c114ed..9e7103b6e0ad 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -151,7 +151,7 @@ get_nfs4_file(struct nfs4_file *fi)
151} 151}
152 152
153static int num_delegations; 153static int num_delegations;
154unsigned int max_delegations; 154unsigned long max_delegations;
155 155
156/* 156/*
157 * Open owner state (share locks) 157 * Open owner state (share locks)
@@ -700,8 +700,8 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num)
700 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); 700 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
701 701
702 spin_lock(&nfsd_drc_lock); 702 spin_lock(&nfsd_drc_lock);
703 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, 703 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
704 nfsd_drc_max_mem - nfsd_drc_mem_used); 704 nfsd_drc_max_mem - nfsd_drc_mem_used);
705 num = min_t(int, num, avail / slotsize); 705 num = min_t(int, num, avail / slotsize);
706 nfsd_drc_mem_used += num * slotsize; 706 nfsd_drc_mem_used += num * slotsize;
707 spin_unlock(&nfsd_drc_lock); 707 spin_unlock(&nfsd_drc_lock);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index de23db255c69..07a473fd49bc 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -56,8 +56,8 @@ extern struct svc_version nfsd_version2, nfsd_version3,
56extern u32 nfsd_supported_minorversion; 56extern u32 nfsd_supported_minorversion;
57extern struct mutex nfsd_mutex; 57extern struct mutex nfsd_mutex;
58extern spinlock_t nfsd_drc_lock; 58extern spinlock_t nfsd_drc_lock;
59extern unsigned int nfsd_drc_max_mem; 59extern unsigned long nfsd_drc_max_mem;
60extern unsigned int nfsd_drc_mem_used; 60extern unsigned long nfsd_drc_mem_used;
61 61
62extern const struct seq_operations nfs_exports_op; 62extern const struct seq_operations nfs_exports_op;
63 63
@@ -106,7 +106,7 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
106 * NFSv4 State 106 * NFSv4 State
107 */ 107 */
108#ifdef CONFIG_NFSD_V4 108#ifdef CONFIG_NFSD_V4
109extern unsigned int max_delegations; 109extern unsigned long max_delegations;
110void nfs4_state_init(void); 110void nfs4_state_init(void);
111int nfsd4_init_slabs(void); 111int nfsd4_init_slabs(void);
112void nfsd4_free_slabs(void); 112void nfsd4_free_slabs(void);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cee62ab9d4a3..be7af509930c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -59,8 +59,8 @@ DEFINE_MUTEX(nfsd_mutex);
59 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. 59 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
60 */ 60 */
61spinlock_t nfsd_drc_lock; 61spinlock_t nfsd_drc_lock;
62unsigned int nfsd_drc_max_mem; 62unsigned long nfsd_drc_max_mem;
63unsigned int nfsd_drc_mem_used; 63unsigned long nfsd_drc_mem_used;
64 64
65#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 65#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
66static struct svc_stat nfsd_acl_svcstats; 66static struct svc_stat nfsd_acl_svcstats;
@@ -342,7 +342,7 @@ static void set_max_drc(void)
342 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; 342 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
343 nfsd_drc_mem_used = 0; 343 nfsd_drc_mem_used = 0;
344 spin_lock_init(&nfsd_drc_lock); 344 spin_lock_init(&nfsd_drc_lock);
345 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); 345 dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
346} 346}
347 347
348static int nfsd_get_default_max_blksize(void) 348static int nfsd_get_default_max_blksize(void)
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 251da07b2a1d..80da8eb27393 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,5 @@
1config NILFS2_FS 1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support"
3 depends on EXPERIMENTAL
4 select CRC32 3 select CRC32
5 help 4 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous 5 NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..bec4af6eab13 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
126 nilfs_transaction_commit(inode->i_sb); 126 nilfs_transaction_commit(inode->i_sb);
127 127
128 mapped: 128 mapped:
129 wait_on_page_writeback(page); 129 wait_for_stable_page(page);
130 out: 130 out:
131 sb_end_pagefault(inode->i_sb); 131 sb_end_pagefault(inode->i_sb);
132 return block_page_mkwrite_return(ret); 132 return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..f3859354e41a 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
664 if (ret < 0) 664 if (ret < 0)
665 printk(KERN_ERR "NILFS: GC failed during preparation: " 665 printk(KERN_ERR "NILFS: GC failed during preparation: "
666 "cannot read source blocks: err=%d\n", ret); 666 "cannot read source blocks: err=%d\n", ret);
667 else 667 else {
668 if (nilfs_sb_need_update(nilfs))
669 set_nilfs_discontinued(nilfs);
668 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 670 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
671 }
669 672
670 nilfs_remove_all_gcinodes(nilfs); 673 nilfs_remove_all_gcinodes(nilfs);
671 clear_nilfs_gc_running(nilfs); 674 clear_nilfs_gc_running(nilfs);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 228a2c2ad8d7..07f7a92fe88e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -576,8 +576,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
576 576
577 /* don't allow invalid bits: we don't want flags set */ 577 /* don't allow invalid bits: we don't want flags set */
578 mask = inotify_arg_to_mask(arg); 578 mask = inotify_arg_to_mask(arg);
579 if (unlikely(!(mask & IN_ALL_EVENTS)))
580 return -EINVAL;
581 579
582 fsn_mark = fsnotify_find_inode_mark(group, inode); 580 fsn_mark = fsnotify_find_inode_mark(group, inode);
583 if (!fsn_mark) 581 if (!fsn_mark)
@@ -629,8 +627,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
629 627
630 /* don't allow invalid bits: we don't want flags set */ 628 /* don't allow invalid bits: we don't want flags set */
631 mask = inotify_arg_to_mask(arg); 629 mask = inotify_arg_to_mask(arg);
632 if (unlikely(!(mask & IN_ALL_EVENTS)))
633 return -EINVAL;
634 630
635 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 631 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
636 if (unlikely(!tmp_i_mark)) 632 if (unlikely(!tmp_i_mark))
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 31b9463fba1f..b8a9d87231b1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6751,8 +6751,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
6751 mlog_errno(ret); 6751 mlog_errno(ret);
6752 6752
6753out: 6753out:
6754 if (pages) 6754 kfree(pages);
6755 kfree(pages);
6756 6755
6757 return ret; 6756 return ret;
6758} 6757}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..9796330d8f04 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1194,6 +1194,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1194 goto out; 1194 goto out;
1195 } 1195 }
1196 } 1196 }
1197 wait_for_stable_page(wc->w_pages[i]);
1197 1198
1198 if (index == target_index) 1199 if (index == target_index)
1199 wc->w_target_page = wc->w_pages[i]; 1200 wc->w_target_page = wc->w_pages[i];
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f7c648d7d6bf..42252bf64b51 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1471,8 +1471,7 @@ static void o2hb_region_release(struct config_item *item)
1471 1471
1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); 1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
1473 1473
1474 if (reg->hr_tmp_block) 1474 kfree(reg->hr_tmp_block);
1475 kfree(reg->hr_tmp_block);
1476 1475
1477 if (reg->hr_slot_data) { 1476 if (reg->hr_slot_data) {
1478 for (i = 0; i < reg->hr_num_pages; i++) { 1477 for (i = 0; i < reg->hr_num_pages; i++) {
@@ -1486,8 +1485,7 @@ static void o2hb_region_release(struct config_item *item)
1486 if (reg->hr_bdev) 1485 if (reg->hr_bdev)
1487 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1486 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
1488 1487
1489 if (reg->hr_slots) 1488 kfree(reg->hr_slots);
1490 kfree(reg->hr_slots);
1491 1489
1492 kfree(reg->hr_db_regnum); 1490 kfree(reg->hr_db_regnum);
1493 kfree(reg->hr_db_livenodes); 1491 kfree(reg->hr_db_livenodes);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1bfe8802cc1e..0d2bf566e39a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -870,7 +870,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
870 /* we've had some trouble with handlers seemingly vanishing. */ 870 /* we've had some trouble with handlers seemingly vanishing. */
871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p, 871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p,
872 &parent) == NULL, 872 &parent) == NULL,
873 "couldn't find handler we *just* registerd " 873 "couldn't find handler we *just* registered "
874 "for type %u key %08x\n", msg_type, key); 874 "for type %u key %08x\n", msg_type, key);
875 } 875 }
876 write_unlock(&o2net_handler_lock); 876 write_unlock(&o2net_handler_lock);
@@ -1165,10 +1165,8 @@ out:
1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ 1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
1166 if (sc) 1166 if (sc)
1167 sc_put(sc); 1167 sc_put(sc);
1168 if (vec) 1168 kfree(vec);
1169 kfree(vec); 1169 kfree(msg);
1170 if (msg)
1171 kfree(msg);
1172 o2net_complete_nsw(nn, &nsw, 0, 0, 0); 1170 o2net_complete_nsw(nn, &nsw, 0, 0, 0);
1173 return ret; 1171 return ret;
1174} 1172}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9e89d70df337..dbb17c07656a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -319,9 +319,7 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
319 if (dlm->master_hash) 319 if (dlm->master_hash)
320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); 320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
321 321
322 if (dlm->name) 322 kfree(dlm->name);
323 kfree(dlm->name);
324
325 kfree(dlm); 323 kfree(dlm);
326} 324}
327 325
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f99af1cb849c..12ae194ac943 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2545 * everything is up to the caller :) */ 2545 * everything is up to the caller :) */
2546 status = ocfs2_should_refresh_lock_res(lockres); 2546 status = ocfs2_should_refresh_lock_res(lockres);
2547 if (status < 0) { 2547 if (status < 0) {
2548 ocfs2_cluster_unlock(osb, lockres, level);
2548 mlog_errno(status); 2549 mlog_errno(status);
2549 goto bail; 2550 goto bail;
2550 } 2551 }
@@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2553 2554
2554 ocfs2_complete_lock_res_refresh(lockres, status); 2555 ocfs2_complete_lock_res_refresh(lockres, status);
2555 2556
2556 if (status < 0) 2557 if (status < 0) {
2558 ocfs2_cluster_unlock(osb, lockres, level);
2557 mlog_errno(status); 2559 mlog_errno(status);
2560 }
2558 ocfs2_track_lock_refresh(lockres); 2561 ocfs2_track_lock_refresh(lockres);
2559 } 2562 }
2560bail: 2563bail:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f487aa343442..1c39efb71bab 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -282,8 +282,7 @@ search:
282 spin_unlock(&oi->ip_lock); 282 spin_unlock(&oi->ip_lock);
283 283
284out: 284out:
285 if (new_emi) 285 kfree(new_emi);
286 kfree(new_emi);
287} 286}
288 287
289static int ocfs2_last_eb_is_empty(struct inode *inode, 288static int ocfs2_last_eb_is_empty(struct inode *inode,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 2dd36af79e26..8eccfabcd12e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1234,11 +1234,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1234 /* Though we wish to avoid it, we are in fact safe in 1234 /* Though we wish to avoid it, we are in fact safe in
1235 * skipping local alloc cleanup as fsck.ocfs2 is more 1235 * skipping local alloc cleanup as fsck.ocfs2 is more
1236 * than capable of reclaiming unused space. */ 1236 * than capable of reclaiming unused space. */
1237 if (la_dinode) 1237 kfree(la_dinode);
1238 kfree(la_dinode); 1238 kfree(tl_dinode);
1239
1240 if (tl_dinode)
1241 kfree(tl_dinode);
1242 1239
1243 if (qrec) 1240 if (qrec)
1244 ocfs2_free_quota_recovery(qrec); 1241 ocfs2_free_quota_recovery(qrec);
@@ -1408,8 +1405,7 @@ bail:
1408 1405
1409 mutex_unlock(&osb->recovery_lock); 1406 mutex_unlock(&osb->recovery_lock);
1410 1407
1411 if (rm_quota) 1408 kfree(rm_quota);
1412 kfree(rm_quota);
1413 1409
1414 /* no one is callint kthread_stop() for us so the kthread() api 1410 /* no one is callint kthread_stop() for us so the kthread() api
1415 * requires that we call do_exit(). And it isn't exported, but 1411 * requires that we call do_exit(). And it isn't exported, but
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index a9f78c74d687..aebeacd807c3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -476,8 +476,7 @@ out:
476 if (local_alloc_inode) 476 if (local_alloc_inode)
477 iput(local_alloc_inode); 477 iput(local_alloc_inode);
478 478
479 if (alloc_copy) 479 kfree(alloc_copy);
480 kfree(alloc_copy);
481} 480}
482 481
483/* 482/*
@@ -534,7 +533,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
534 mlog_errno(status); 533 mlog_errno(status);
535 534
536bail: 535bail:
537 if ((status < 0) && (*alloc_copy)) { 536 if (status < 0) {
538 kfree(*alloc_copy); 537 kfree(*alloc_copy);
539 *alloc_copy = NULL; 538 *alloc_copy = NULL;
540 } 539 }
@@ -1290,8 +1289,7 @@ bail:
1290 if (main_bm_inode) 1289 if (main_bm_inode)
1291 iput(main_bm_inode); 1290 iput(main_bm_inode);
1292 1291
1293 if (alloc_copy) 1292 kfree(alloc_copy);
1294 kfree(alloc_copy);
1295 1293
1296 if (ac) 1294 if (ac)
1297 ocfs2_free_alloc_context(ac); 1295 ocfs2_free_alloc_context(ac);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 94368017edb3..bf1f8930456f 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -376,7 +376,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); 376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
377 377
378out_free: 378out_free:
379 if (rc && conn->cc_private) 379 if (rc)
380 kfree(conn->cc_private); 380 kfree(conn->cc_private);
381 381
382out: 382out:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0e91ec22a940..9b6910dec4ba 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2525,8 +2525,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2525 mlog_errno(status); 2525 mlog_errno(status);
2526 2526
2527finally: 2527finally:
2528 if (local_alloc) 2528 kfree(local_alloc);
2529 kfree(local_alloc);
2530 2529
2531 if (status) 2530 if (status)
2532 mlog_errno(status); 2531 mlog_errno(status);
@@ -2553,8 +2552,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2553 * we free it here. 2552 * we free it here.
2554 */ 2553 */
2555 kfree(osb->journal); 2554 kfree(osb->journal);
2556 if (osb->local_alloc_copy) 2555 kfree(osb->local_alloc_copy);
2557 kfree(osb->local_alloc_copy);
2558 kfree(osb->uuid_str); 2556 kfree(osb->uuid_str);
2559 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2557 ocfs2_put_dlm_debug(osb->osb_dlm_debug);
2560 memset(osb, 0, sizeof(struct ocfs2_super)); 2558 memset(osb, 0, sizeof(struct ocfs2_super));
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 3d635f4bbb20..f053688d22a3 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -91,8 +91,7 @@ static struct inode **get_local_system_inode(struct ocfs2_super *osb,
91 } else 91 } else
92 osb->local_system_inodes = local_system_inodes; 92 osb->local_system_inodes = local_system_inodes;
93 spin_unlock(&osb->osb_lock); 93 spin_unlock(&osb->osb_lock);
94 if (unlikely(free)) 94 kfree(free);
95 kfree(free);
96 } 95 }
97 96
98 index = (slot * NUM_LOCAL_SYSTEM_INODES) + 97 index = (slot * NUM_LOCAL_SYSTEM_INODES) +
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 981b05601931..712f24db9600 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,8 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o fd.o 11 fd.o
12proc-$(CONFIG_TTY) += proc_tty.o
12proc-y += cmdline.o 13proc-y += cmdline.o
13proc-y += consoles.o 14proc-y += consoles.o
14proc-y += cpuinfo.o 15proc-y += cpuinfo.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
449 do { 449 do {
450 min_flt += t->min_flt; 450 min_flt += t->min_flt;
451 maj_flt += t->maj_flt; 451 maj_flt += t->maj_flt;
452 gtime += t->gtime; 452 gtime += task_gtime(t);
453 t = next_thread(t); 453 t = next_thread(t);
454 } while (t != task); 454 } while (t != task);
455 455
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
472 min_flt = task->min_flt; 472 min_flt = task->min_flt;
473 maj_flt = task->maj_flt; 473 maj_flt = task->maj_flt;
474 task_cputime_adjusted(task, &utime, &stime); 474 task_cputime_adjusted(task, &utime, &stime);
475 gtime = task->gtime; 475 gtime = task_gtime(task);
476 } 476 }
477 477
478 /* scale priority and nice values from timeslices to -20..20 */ 478 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 80e4645f7990..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
40 * sysctl_overcommit_ratio / 100) + total_swap_pages; 40 * sysctl_overcommit_ratio / 100) + total_swap_pages;
41 41
42 cached = global_page_state(NR_FILE_PAGES) - 42 cached = global_page_state(NR_FILE_PAGES) -
43 total_swapcache_pages - i.bufferram; 43 total_swapcache_pages() - i.bufferram;
44 if (cached < 0) 44 if (cached < 0)
45 cached = 0; 45 cached = 0;
46 46
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
109 K(i.freeram), 109 K(i.freeram),
110 K(i.bufferram), 110 K(i.bufferram),
111 K(cached), 111 K(cached),
112 K(total_swapcache_pages), 112 K(total_swapcache_pages()),
113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), 113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), 114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
115 K(pages[LRU_ACTIVE_ANON]), 115 K(pages[LRU_ACTIVE_ANON]),
@@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
158 vmi.used >> 10, 158 vmi.used >> 10,
159 vmi.largest_chunk >> 10 159 vmi.largest_chunk >> 10
160#ifdef CONFIG_MEMORY_FAILURE 160#ifdef CONFIG_MEMORY_FAILURE
161 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) 161 ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
162#endif 162#endif
163#ifdef CONFIG_TRANSPARENT_HUGEPAGE 163#ifdef CONFIG_TRANSPARENT_HUGEPAGE
164 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 164 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..3131a03d7d37 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = {
177 .readdir = proc_tgid_net_readdir, 177 .readdir = proc_tgid_net_readdir,
178}; 178};
179 179
180
181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, umode_t mode, const struct file_operations *fops)
183{
184 return proc_create(name, mode, net->proc_net, fops);
185}
186EXPORT_SYMBOL_GPL(proc_net_fops_create);
187
188void proc_net_remove(struct net *net, const char *name)
189{
190 remove_proc_entry(name, net->proc_net);
191}
192EXPORT_SYMBOL_GPL(proc_net_remove);
193
194static __net_init int proc_net_ns_init(struct net *net) 180static __net_init int proc_net_ns_init(struct net *net)
195{ 181{
196 struct proc_dir_entry *netd, *net_statd; 182 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 448455b7fd91..ca5ce7f9f800 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1278 walk.mm = mm; 1278 walk.mm = mm;
1279 1279
1280 pol = get_vma_policy(task, vma, vma->vm_start); 1280 pol = get_vma_policy(task, vma, vma->vm_start);
1281 mpol_to_str(buffer, sizeof(buffer), pol, 0); 1281 mpol_to_str(buffer, sizeof(buffer), pol);
1282 mpol_cond_put(pol); 1282 mpol_cond_put(pol);
1283 1283
1284 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1284 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 67de74ca85f4..e4bcb2cf055a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -418,9 +418,25 @@ static struct file_system_type pstore_fs_type = {
418 .kill_sb = pstore_kill_sb, 418 .kill_sb = pstore_kill_sb,
419}; 419};
420 420
421static struct kobject *pstore_kobj;
422
421static int __init init_pstore_fs(void) 423static int __init init_pstore_fs(void)
422{ 424{
423 return register_filesystem(&pstore_fs_type); 425 int err = 0;
426
427 /* Create a convenient mount point for people to access pstore */
428 pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
429 if (!pstore_kobj) {
430 err = -ENOMEM;
431 goto out;
432 }
433
434 err = register_filesystem(&pstore_fs_type);
435 if (err < 0)
436 kobject_put(pstore_kobj);
437
438out:
439 return err;
424} 440}
425module_init(init_pstore_fs) 441module_init(init_pstore_fs)
426 442
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5ea2e77ff023..86d1038b5a12 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -96,6 +96,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
96 } 96 }
97} 97}
98 98
99bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
100{
101 /*
102 * In case of NMI path, pstore shouldn't be blocked
103 * regardless of reason.
104 */
105 if (in_nmi())
106 return true;
107
108 switch (reason) {
109 /* In panic case, other cpus are stopped by smp_send_stop(). */
110 case KMSG_DUMP_PANIC:
111 /* Emergency restart shouldn't be blocked by spin lock. */
112 case KMSG_DUMP_EMERG:
113 return true;
114 default:
115 return false;
116 }
117}
118EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
119
99/* 120/*
100 * callback from kmsg_dump. (s2,l2) has the most recently 121 * callback from kmsg_dump. (s2,l2) has the most recently
101 * written bytes, older bytes are in (s1,l1). Save as much 122 * written bytes, older bytes are in (s1,l1). Save as much
@@ -114,10 +135,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
114 135
115 why = get_reason_str(reason); 136 why = get_reason_str(reason);
116 137
117 if (in_nmi()) { 138 if (pstore_cannot_block_path(reason)) {
118 is_locked = spin_trylock(&psinfo->buf_lock); 139 is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
119 if (!is_locked) 140 if (!is_locked) {
120 pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); 141 pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
142 , in_nmi() ? "NMI" : why);
143 }
121 } else 144 } else
122 spin_lock_irqsave(&psinfo->buf_lock, flags); 145 spin_lock_irqsave(&psinfo->buf_lock, flags);
123 oopscount++; 146 oopscount++;
@@ -143,9 +166,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
143 total += hsize + len; 166 total += hsize + len;
144 part++; 167 part++;
145 } 168 }
146 if (in_nmi()) { 169 if (pstore_cannot_block_path(reason)) {
147 if (is_locked) 170 if (is_locked)
148 spin_unlock(&psinfo->buf_lock); 171 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
149 } else 172 } else
150 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 173 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
151} 174}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f883e7e74305..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) 167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
168{ 168{
169 char *hdr; 169 char *hdr;
170 struct timeval timestamp; 170 struct timespec timestamp;
171 size_t len; 171 size_t len;
172 172
173 do_gettimeofday(&timestamp); 173 /* Report zeroed timestamp if called before timekeeping has resumed. */
174 if (__getnstimeofday(&timestamp)) {
175 timestamp.tv_sec = 0;
176 timestamp.tv_nsec = 0;
177 }
174 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", 178 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
175 (long)timestamp.tv_sec, (long)timestamp.tv_usec); 179 (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
176 WARN_ON_ONCE(!hdr); 180 WARN_ON_ONCE(!hdr);
177 len = hdr ? strlen(hdr) : 0; 181 len = hdr ? strlen(hdr) : 0;
178 persistent_ram_write(prz, hdr, len); 182 persistent_ram_write(prz, hdr, len);
@@ -291,9 +295,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
291 kfree(cxt->przs); 295 kfree(cxt->przs);
292} 296}
293 297
294static int __devinit ramoops_init_przs(struct device *dev, 298static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
295 struct ramoops_context *cxt, 299 phys_addr_t *paddr, size_t dump_mem_sz)
296 phys_addr_t *paddr, size_t dump_mem_sz)
297{ 300{
298 int err = -ENOMEM; 301 int err = -ENOMEM;
299 int i; 302 int i;
@@ -336,10 +339,9 @@ fail_prz:
336 return err; 339 return err;
337} 340}
338 341
339static int __devinit ramoops_init_prz(struct device *dev, 342static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
340 struct ramoops_context *cxt, 343 struct persistent_ram_zone **prz,
341 struct persistent_ram_zone **prz, 344 phys_addr_t *paddr, size_t sz, u32 sig)
342 phys_addr_t *paddr, size_t sz, u32 sig)
343{ 345{
344 if (!sz) 346 if (!sz)
345 return 0; 347 return 0;
@@ -367,7 +369,7 @@ static int __devinit ramoops_init_prz(struct device *dev,
367 return 0; 369 return 0;
368} 370}
369 371
370static int __devinit ramoops_probe(struct platform_device *pdev) 372static int ramoops_probe(struct platform_device *pdev)
371{ 373{
372 struct device *dev = &pdev->dev; 374 struct device *dev = &pdev->dev;
373 struct ramoops_platform_data *pdata = pdev->dev.platform_data; 375 struct ramoops_platform_data *pdata = pdev->dev.platform_data;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eecd2a8a84dd..0306303be372 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
390 return 0; 390 return 0;
391} 391}
392 392
393static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, 393static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
394 u32 sig, int ecc_size) 394 int ecc_size)
395{ 395{
396 int ret; 396 int ret;
397 397
@@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
443 kfree(prz); 443 kfree(prz);
444} 444}
445 445
446struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, 446struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
447 size_t size, u32 sig, 447 u32 sig, int ecc_size)
448 int ecc_size)
449{ 448{
450 struct persistent_ram_zone *prz; 449 struct persistent_ram_zone *prz;
451 int ret = -ENOMEM; 450 int ret = -ENOMEM;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b6addf560483..57199a52a351 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -285,7 +285,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { 285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */ 286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different" 287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n")); 288 " endianness.\n"));
289 return bh; 289 return bh;
290 } else 290 } else
291 sbi->s_bytesex = BYTESEX_LE; 291 sbi->s_bytesex = BYTESEX_LE;
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/sched/rt.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31 32
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 9d863fb501f9..f2bc3dfd0b88 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -296,7 +296,7 @@ EXPORT_SYMBOL(seq_read);
296 * seq_lseek - ->llseek() method for sequential files. 296 * seq_lseek - ->llseek() method for sequential files.
297 * @file: the file in question 297 * @file: the file in question
298 * @offset: new position 298 * @offset: new position
299 * @origin: 0 for absolute, 1 for relative position 299 * @whence: 0 for absolute, 1 for relative position
300 * 300 *
301 * Ready-made ->f_op->llseek() 301 * Ready-made ->f_op->llseek()
302 */ 302 */
diff --git a/fs/splice.c b/fs/splice.c
index 8890604e3fcd..6909d89d0da5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -696,8 +696,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
696 return -EINVAL; 696 return -EINVAL;
697 697
698 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; 698 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
699 if (sd->len < sd->total_len) 699
700 if (sd->len < sd->total_len && pipe->nrbufs > 1)
700 more |= MSG_SENDPAGE_NOTLAST; 701 more |= MSG_SENDPAGE_NOTLAST;
702
701 return file->f_op->sendpage(file, buf->page, buf->offset, 703 return file->f_op->sendpage(file, buf->page, buf->offset,
702 sd->len, &pos, more); 704 sd->len, &pos, more);
703} 705}
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
205} 205}
206EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 206EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
207 207
208/**
209 * sysfs_add_link_to_group - add a symlink to an attribute group.
210 * @kobj: The kobject containing the group.
211 * @group_name: The name of the group.
212 * @target: The target kobject of the symlink to create.
213 * @link_name: The name of the symlink to create.
214 */
215int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
216 struct kobject *target, const char *link_name)
217{
218 struct sysfs_dirent *dir_sd;
219 int error = 0;
220
221 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
222 if (!dir_sd)
223 return -ENOENT;
224
225 error = sysfs_create_link_sd(dir_sd, target, link_name);
226 sysfs_put(dir_sd);
227
228 return error;
229}
230EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
231
232/**
233 * sysfs_remove_link_from_group - remove a symlink from an attribute group.
234 * @kobj: The kobject containing the group.
235 * @group_name: The name of the group.
236 * @link_name: The name of the symlink to remove.
237 */
238void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
239 const char *link_name)
240{
241 struct sysfs_dirent *dir_sd;
242
243 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
244 if (dir_sd) {
245 sysfs_hash_and_remove(dir_sd, NULL, link_name);
246 sysfs_put(dir_sd);
247 }
248}
249EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
208 250
209EXPORT_SYMBOL_GPL(sysfs_create_group); 251EXPORT_SYMBOL_GPL(sysfs_create_group);
210EXPORT_SYMBOL_GPL(sysfs_update_group); 252EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db940a9be045..8d924b5ec733 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -10,7 +10,7 @@
10 * Please see Documentation/filesystems/sysfs.txt for more information. 10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 11 */
12 12
13#define DEBUG 13#define DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
21 21
22#include "sysfs.h" 22#include "sysfs.h"
23 23
24static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, 24static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
25 const char *name, int warn) 25 struct kobject *target,
26 const char *name, int warn)
26{ 27{
27 struct sysfs_dirent *parent_sd = NULL;
28 struct sysfs_dirent *target_sd = NULL; 28 struct sysfs_dirent *target_sd = NULL;
29 struct sysfs_dirent *sd = NULL; 29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt; 30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type; 31 enum kobj_ns_type ns_type;
32 int error; 32 int error;
33 33
34 BUG_ON(!name); 34 BUG_ON(!name || !parent_sd);
35
36 if (!kobj)
37 parent_sd = &sysfs_root;
38 else
39 parent_sd = kobj->sd;
40
41 error = -EFAULT;
42 if (!parent_sd)
43 goto out_put;
44 35
45 /* target->sd can go away beneath us but is protected with 36 /* target->sd can go away beneath us but is protected with
46 * sysfs_assoc_lock. Fetch target_sd from it. 37 * sysfs_assoc_lock. Fetch target_sd from it.
@@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
96} 87}
97 88
98/** 89/**
90 * sysfs_create_link_sd - create symlink to a given object.
91 * @sd: directory we're creating the link in.
92 * @target: object we're pointing to.
93 * @name: name of the symlink.
94 */
95int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
96 const char *name)
97{
98 return sysfs_do_create_link_sd(sd, target, name, 1);
99}
100
101static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
102 const char *name, int warn)
103{
104 struct sysfs_dirent *parent_sd = NULL;
105
106 if (!kobj)
107 parent_sd = &sysfs_root;
108 else
109 parent_sd = kobj->sd;
110
111 if (!parent_sd)
112 return -EFAULT;
113
114 return sysfs_do_create_link_sd(parent_sd, target, name, warn);
115}
116
117/**
99 * sysfs_create_link - create symlink between two objects. 118 * sysfs_create_link - create symlink between two objects.
100 * @kobj: object whose directory we're creating the link in. 119 * @kobj: object whose directory we're creating the link in.
101 * @target: object we're pointing to. 120 * @target: object we're pointing to.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
240 * symlink.c 240 * symlink.c
241 */ 241 */
242extern const struct inode_operations sysfs_symlink_inode_operations; 242extern const struct inode_operations sysfs_symlink_inode_operations;
243int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
244 const char *name);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d03822bbf190..0e606b12a59d 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/compat.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
27struct timerfd_ctx { 28struct timerfd_ctx {
@@ -278,21 +279,17 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
278 return ufd; 279 return ufd;
279} 280}
280 281
281SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 282static int do_timerfd_settime(int ufd, int flags,
282 const struct itimerspec __user *, utmr, 283 const struct itimerspec *new,
283 struct itimerspec __user *, otmr) 284 struct itimerspec *old)
284{ 285{
285 struct fd f; 286 struct fd f;
286 struct timerfd_ctx *ctx; 287 struct timerfd_ctx *ctx;
287 struct itimerspec ktmr, kotmr;
288 int ret; 288 int ret;
289 289
290 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
291 return -EFAULT;
292
293 if ((flags & ~TFD_SETTIME_FLAGS) || 290 if ((flags & ~TFD_SETTIME_FLAGS) ||
294 !timespec_valid(&ktmr.it_value) || 291 !timespec_valid(&new->it_value) ||
295 !timespec_valid(&ktmr.it_interval)) 292 !timespec_valid(&new->it_interval))
296 return -EINVAL; 293 return -EINVAL;
297 294
298 ret = timerfd_fget(ufd, &f); 295 ret = timerfd_fget(ufd, &f);
@@ -323,27 +320,23 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
323 if (ctx->expired && ctx->tintv.tv64) 320 if (ctx->expired && ctx->tintv.tv64)
324 hrtimer_forward_now(&ctx->tmr, ctx->tintv); 321 hrtimer_forward_now(&ctx->tmr, ctx->tintv);
325 322
326 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 323 old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
327 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 324 old->it_interval = ktime_to_timespec(ctx->tintv);
328 325
329 /* 326 /*
330 * Re-program the timer to the new value ... 327 * Re-program the timer to the new value ...
331 */ 328 */
332 ret = timerfd_setup(ctx, flags, &ktmr); 329 ret = timerfd_setup(ctx, flags, new);
333 330
334 spin_unlock_irq(&ctx->wqh.lock); 331 spin_unlock_irq(&ctx->wqh.lock);
335 fdput(f); 332 fdput(f);
336 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
337 return -EFAULT;
338
339 return ret; 333 return ret;
340} 334}
341 335
342SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 336static int do_timerfd_gettime(int ufd, struct itimerspec *t)
343{ 337{
344 struct fd f; 338 struct fd f;
345 struct timerfd_ctx *ctx; 339 struct timerfd_ctx *ctx;
346 struct itimerspec kotmr;
347 int ret = timerfd_fget(ufd, &f); 340 int ret = timerfd_fget(ufd, &f);
348 if (ret) 341 if (ret)
349 return ret; 342 return ret;
@@ -356,11 +349,65 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
356 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; 349 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
357 hrtimer_restart(&ctx->tmr); 350 hrtimer_restart(&ctx->tmr);
358 } 351 }
359 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 352 t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
360 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 353 t->it_interval = ktime_to_timespec(ctx->tintv);
361 spin_unlock_irq(&ctx->wqh.lock); 354 spin_unlock_irq(&ctx->wqh.lock);
362 fdput(f); 355 fdput(f);
356 return 0;
357}
358
359SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
360 const struct itimerspec __user *, utmr,
361 struct itimerspec __user *, otmr)
362{
363 struct itimerspec new, old;
364 int ret;
365
366 if (copy_from_user(&new, utmr, sizeof(new)))
367 return -EFAULT;
368 ret = do_timerfd_settime(ufd, flags, &new, &old);
369 if (ret)
370 return ret;
371 if (otmr && copy_to_user(otmr, &old, sizeof(old)))
372 return -EFAULT;
373
374 return ret;
375}
363 376
377SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
378{
379 struct itimerspec kotmr;
380 int ret = do_timerfd_gettime(ufd, &kotmr);
381 if (ret)
382 return ret;
364 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 383 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
365} 384}
366 385
386#ifdef COMPAT
387COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
388 const struct itimerspec __user *, utmr,
389 struct itimerspec __user *, otmr)
390{
391 struct itimerspec new, old;
392 int ret;
393
394 if (get_compat_itimerspec(&new, utmr))
395 return -EFAULT;
396 ret = do_timerfd_settime(ufd, flags, &new, &old);
397 if (ret)
398 return ret;
399 if (otmr && put_compat_itimerspec(otmr, &old))
400 return -EFAULT;
401 return ret;
402}
403
404COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
405 struct itimerspec __user *, otmr)
406{
407 struct itimerspec kotmr;
408 int ret = do_timerfd_gettime(ufd, &kotmr);
409 if (ret)
410 return ret;
411 return put_compat_itimerspec(otmr, &t) ? -EFAULT: 0;
412}
413#endif
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..4f6493c130e0 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1522,6 +1522,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1522 ubifs_release_dirty_inode_budget(c, ui); 1522 ubifs_release_dirty_inode_budget(c, ui);
1523 } 1523 }
1524 1524
1525 wait_for_stable_page(page);
1525 unlock_page(page); 1526 unlock_page(page);
1526 return 0; 1527 return 0;
1527 1528
diff --git a/fs/udf/super.c b/fs/udf/super.c
index d44fb568abe1..e9be396a558d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -307,7 +307,8 @@ static void udf_sb_free_partitions(struct super_block *sb)
307{ 307{
308 struct udf_sb_info *sbi = UDF_SB(sb); 308 struct udf_sb_info *sbi = UDF_SB(sb);
309 int i; 309 int i;
310 310 if (sbi->s_partmaps == NULL)
311 return;
311 for (i = 0; i < sbi->s_partitions; i++) 312 for (i = 0; i < sbi->s_partitions; i++)
312 udf_free_partition(&sbi->s_partmaps[i]); 313 udf_free_partition(&sbi->s_partmaps[i]);
313 kfree(sbi->s_partmaps); 314 kfree(sbi->s_partmaps);
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index e4f10a40768a..0bf6e16f8d79 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -29,7 +29,7 @@ config UFS_FS
29 29
30config UFS_FS_WRITE 30config UFS_FS_WRITE
31 bool "UFS file system write support (DANGEROUS)" 31 bool "UFS file system write support (DANGEROUS)"
32 depends on UFS_FS && EXPERIMENTAL 32 depends on UFS_FS
33 help 33 help
34 Say Y here if you want to try writing to UFS partitions. This is 34 Say Y here if you want to try writing to UFS partitions. This is
35 experimental, so you should back up your UFS partitions beforehand. 35 experimental, so you should back up your UFS partitions beforehand.
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5a7ffe54f5d5..cc33aaf219f1 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -70,8 +70,8 @@ config XFS_RT
70 If unsure, say N. 70 If unsure, say N.
71 71
72config XFS_DEBUG 72config XFS_DEBUG
73 bool "XFS Debugging support (EXPERIMENTAL)" 73 bool "XFS Debugging support"
74 depends on XFS_FS && EXPERIMENTAL 74 depends on XFS_FS
75 help 75 help
76 Say Y here to get an XFS build with many debugging features, 76 Say Y here to get an XFS build with many debugging features,
77 including ASSERT checks, function wrappers around macros, 77 including ASSERT checks, function wrappers around macros,
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 393055fe3aef..0ad23253e8b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist(
1925 targs.mp = mp; 1925 targs.mp = mp;
1926 targs.agbp = agbp; 1926 targs.agbp = agbp;
1927 targs.agno = args->agno; 1927 targs.agno = args->agno;
1928 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1929 targs.minalignslop = 0;
1930 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1928 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1931 targs.type = XFS_ALLOCTYPE_THIS_AG; 1929 targs.type = XFS_ALLOCTYPE_THIS_AG;
1932 targs.pag = pag; 1930 targs.pag = pag;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
86 } 86 }
87 87
88 if (ioend->io_iocb) { 88 if (ioend->io_iocb) {
89 inode_dio_done(ioend->io_inode);
89 if (ioend->io_isasync) { 90 if (ioend->io_isasync) {
90 aio_complete(ioend->io_iocb, ioend->io_error ? 91 aio_complete(ioend->io_iocb, ioend->io_error ?
91 ioend->io_error : ioend->io_result, 0); 92 ioend->io_error : ioend->io_result, 0);
92 } 93 }
93 inode_dio_done(ioend->io_inode);
94 } 94 }
95 95
96 mempool_free(ioend, xfs_ioend_pool); 96 mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index aaf472532b3c..888683844d98 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -300,9 +300,12 @@ xfs_attr_set_int(
300 if (rsvd) 300 if (rsvd)
301 args.trans->t_flags |= XFS_TRANS_RESERVE; 301 args.trans->t_flags |= XFS_TRANS_RESERVE;
302 302
303 if ((error = xfs_trans_reserve(args.trans, args.total, 303 error = xfs_trans_reserve(args.trans, args.total,
304 XFS_ATTRSET_LOG_RES(mp, args.total), 0, 304 XFS_ATTRSETM_LOG_RES(mp) +
305 XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { 305 XFS_ATTRSETRT_LOG_RES(mp) * args.total,
306 0, XFS_TRANS_PERM_LOG_RES,
307 XFS_ATTRSET_LOG_COUNT);
308 if (error) {
306 xfs_trans_cancel(args.trans, 0); 309 xfs_trans_cancel(args.trans, 0);
307 return(error); 310 return(error);
308 } 311 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..b44af9211bd9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -147,7 +147,10 @@ xfs_bmap_local_to_extents(
147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
148 xfs_extlen_t total, /* total blocks needed by transaction */ 148 xfs_extlen_t total, /* total blocks needed by transaction */
149 int *logflagsp, /* inode logging flags */ 149 int *logflagsp, /* inode logging flags */
150 int whichfork); /* data or attr fork */ 150 int whichfork, /* data or attr fork */
151 void (*init_fn)(struct xfs_buf *bp,
152 struct xfs_inode *ip,
153 struct xfs_ifork *ifp));
151 154
152/* 155/*
153 * Search the extents list for the inode, for the extent containing bno. 156 * Search the extents list for the inode, for the extent containing bno.
@@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents(
357} 360}
358 361
359/* 362/*
360 * Called from xfs_bmap_add_attrfork to handle local format files. 363 * Block initialisation functions for local to extent format conversion.
364 * As these get more complex, they will be moved to the relevant files,
365 * but for now they are too simple to worry about.
366 */
367STATIC void
368xfs_bmap_local_to_extents_init_fn(
369 struct xfs_buf *bp,
370 struct xfs_inode *ip,
371 struct xfs_ifork *ifp)
372{
373 bp->b_ops = &xfs_bmbt_buf_ops;
374 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
375}
376
377STATIC void
378xfs_symlink_local_to_remote(
379 struct xfs_buf *bp,
380 struct xfs_inode *ip,
381 struct xfs_ifork *ifp)
382{
383 /* remote symlink blocks are not verifiable until CRCs come along */
384 bp->b_ops = NULL;
385 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
386}
387
388/*
389 * Called from xfs_bmap_add_attrfork to handle local format files. Each
390 * different data fork content type needs a different callout to do the
391 * conversion. Some are basic and only require special block initialisation
392 * callouts for the data formating, others (directories) are so specialised they
393 * handle everything themselves.
394 *
395 * XXX (dgc): investigate whether directory conversion can use the generic
396 * formatting callout. It should be possible - it's just a very complex
397 * formatter. it would also require passing the transaction through to the init
398 * function.
361 */ 399 */
362STATIC int /* error */ 400STATIC int /* error */
363xfs_bmap_add_attrfork_local( 401xfs_bmap_add_attrfork_local(
@@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local(
368 int *flags) /* inode logging flags */ 406 int *flags) /* inode logging flags */
369{ 407{
370 xfs_da_args_t dargs; /* args for dir/attr code */ 408 xfs_da_args_t dargs; /* args for dir/attr code */
371 int error; /* error return value */
372 xfs_mount_t *mp; /* mount structure pointer */
373 409
374 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 410 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
375 return 0; 411 return 0;
412
376 if (S_ISDIR(ip->i_d.di_mode)) { 413 if (S_ISDIR(ip->i_d.di_mode)) {
377 mp = ip->i_mount;
378 memset(&dargs, 0, sizeof(dargs)); 414 memset(&dargs, 0, sizeof(dargs));
379 dargs.dp = ip; 415 dargs.dp = ip;
380 dargs.firstblock = firstblock; 416 dargs.firstblock = firstblock;
381 dargs.flist = flist; 417 dargs.flist = flist;
382 dargs.total = mp->m_dirblkfsbs; 418 dargs.total = ip->i_mount->m_dirblkfsbs;
383 dargs.whichfork = XFS_DATA_FORK; 419 dargs.whichfork = XFS_DATA_FORK;
384 dargs.trans = tp; 420 dargs.trans = tp;
385 error = xfs_dir2_sf_to_block(&dargs); 421 return xfs_dir2_sf_to_block(&dargs);
386 } else 422 }
387 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, 423
388 XFS_DATA_FORK); 424 if (S_ISLNK(ip->i_d.di_mode))
389 return error; 425 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
426 flags, XFS_DATA_FORK,
427 xfs_symlink_local_to_remote);
428
429 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
430 XFS_DATA_FORK,
431 xfs_bmap_local_to_extents_init_fn);
390} 432}
391 433
392/* 434/*
@@ -3099,8 +3141,6 @@ xfs_bmap_extents_to_btree(
3099 args.fsbno = *firstblock; 3141 args.fsbno = *firstblock;
3100 } 3142 }
3101 args.minlen = args.maxlen = args.prod = 1; 3143 args.minlen = args.maxlen = args.prod = 1;
3102 args.total = args.minleft = args.alignment = args.mod = args.isfl =
3103 args.minalignslop = 0;
3104 args.wasdel = wasdel; 3144 args.wasdel = wasdel;
3105 *logflagsp = 0; 3145 *logflagsp = 0;
3106 if ((error = xfs_alloc_vextent(&args))) { 3146 if ((error = xfs_alloc_vextent(&args))) {
@@ -3221,7 +3261,10 @@ xfs_bmap_local_to_extents(
3221 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 3261 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
3222 xfs_extlen_t total, /* total blocks needed by transaction */ 3262 xfs_extlen_t total, /* total blocks needed by transaction */
3223 int *logflagsp, /* inode logging flags */ 3263 int *logflagsp, /* inode logging flags */
3224 int whichfork) /* data or attr fork */ 3264 int whichfork,
3265 void (*init_fn)(struct xfs_buf *bp,
3266 struct xfs_inode *ip,
3267 struct xfs_ifork *ifp))
3225{ 3268{
3226 int error; /* error return value */ 3269 int error; /* error return value */
3227 int flags; /* logging flags returned */ 3270 int flags; /* logging flags returned */
@@ -3241,12 +3284,12 @@ xfs_bmap_local_to_extents(
3241 xfs_buf_t *bp; /* buffer for extent block */ 3284 xfs_buf_t *bp; /* buffer for extent block */
3242 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3285 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3243 3286
3287 ASSERT((ifp->if_flags &
3288 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3244 memset(&args, 0, sizeof(args)); 3289 memset(&args, 0, sizeof(args));
3245 args.tp = tp; 3290 args.tp = tp;
3246 args.mp = ip->i_mount; 3291 args.mp = ip->i_mount;
3247 args.firstblock = *firstblock; 3292 args.firstblock = *firstblock;
3248 ASSERT((ifp->if_flags &
3249 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3250 /* 3293 /*
3251 * Allocate a block. We know we need only one, since the 3294 * Allocate a block. We know we need only one, since the
3252 * file currently fits in an inode. 3295 * file currently fits in an inode.
@@ -3259,20 +3302,21 @@ xfs_bmap_local_to_extents(
3259 args.type = XFS_ALLOCTYPE_NEAR_BNO; 3302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3260 } 3303 }
3261 args.total = total; 3304 args.total = total;
3262 args.mod = args.minleft = args.alignment = args.wasdel =
3263 args.isfl = args.minalignslop = 0;
3264 args.minlen = args.maxlen = args.prod = 1; 3305 args.minlen = args.maxlen = args.prod = 1;
3265 if ((error = xfs_alloc_vextent(&args))) 3306 error = xfs_alloc_vextent(&args);
3307 if (error)
3266 goto done; 3308 goto done;
3267 /* 3309
3268 * Can't fail, the space was reserved. 3310 /* Can't fail, the space was reserved. */
3269 */
3270 ASSERT(args.fsbno != NULLFSBLOCK); 3311 ASSERT(args.fsbno != NULLFSBLOCK);
3271 ASSERT(args.len == 1); 3312 ASSERT(args.len == 1);
3272 *firstblock = args.fsbno; 3313 *firstblock = args.fsbno;
3273 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 3314 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3274 bp->b_ops = &xfs_bmbt_buf_ops; 3315
3275 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 3316 /* initialise the block and copy the data */
3317 init_fn(bp, ip, ifp);
3318
3319 /* account for the change in fork size and log everything */
3276 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); 3320 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3277 xfs_bmap_forkoff_reset(args.mp, ip, whichfork); 3321 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3278 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 3322 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -4680,9 +4724,6 @@ __xfs_bmapi_allocate(
4680 return error; 4724 return error;
4681 } 4725 }
4682 4726
4683 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4684 bma->stack_switch = 1;
4685
4686 error = xfs_bmap_alloc(bma); 4727 error = xfs_bmap_alloc(bma);
4687 if (error) 4728 if (error)
4688 return error; 4729 return error;
@@ -4922,8 +4963,32 @@ xfs_bmapi_write(
4922 XFS_STATS_INC(xs_blk_mapw); 4963 XFS_STATS_INC(xs_blk_mapw);
4923 4964
4924 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 4965 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4966 /*
4967 * XXX (dgc): This assumes we are only called for inodes that
4968 * contain content neutral data in local format. Anything that
4969 * contains caller-specific data in local format that needs
4970 * transformation to move to a block format needs to do the
4971 * conversion to extent format itself.
4972 *
4973 * Directory data forks and attribute forks handle this
4974 * themselves, but with the addition of metadata verifiers every
4975 * data fork in local format now contains caller specific data
4976 * and as such conversion through this function is likely to be
4977 * broken.
4978 *
4979 * The only likely user of this branch is for remote symlinks,
4980 * but we cannot overwrite the data fork contents of the symlink
4981 * (EEXIST occurs higher up the stack) and so it will never go
4982 * from local format to extent format here. Hence I don't think
4983 * this branch is ever executed intentionally and we should
4984 * consider removing it and asserting that xfs_bmapi_write()
4985 * cannot be called directly on local format forks. i.e. callers
4986 * are completely responsible for local to extent format
4987 * conversion, not xfs_bmapi_write().
4988 */
4925 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, 4989 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
4926 &bma.logflags, whichfork); 4990 &bma.logflags, whichfork,
4991 xfs_bmap_local_to_extents_init_fn);
4927 if (error) 4992 if (error)
4928 goto error0; 4993 goto error0;
4929 } 4994 }
@@ -4956,6 +5021,9 @@ xfs_bmapi_write(
4956 bma.flist = flist; 5021 bma.flist = flist;
4957 bma.firstblock = firstblock; 5022 bma.firstblock = firstblock;
4958 5023
5024 if (flags & XFS_BMAPI_STACK_SWITCH)
5025 bma.stack_switch = 1;
5026
4959 while (bno < end && n < *nmap) { 5027 while (bno < end && n < *nmap) {
4960 inhole = eof || bma.got.br_startoff > bno; 5028 inhole = eof || bma.got.br_startoff > bno;
4961 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 5029 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 26673a0b20e7..4e8f0df82d02 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -175,7 +175,7 @@ xfs_buf_get_maps(
175 bp->b_map_count = map_count; 175 bp->b_map_count = map_count;
176 176
177 if (map_count == 1) { 177 if (map_count == 1) {
178 bp->b_maps = &bp->b_map; 178 bp->b_maps = &bp->__b_map;
179 return 0; 179 return 0;
180 } 180 }
181 181
@@ -193,7 +193,7 @@ static void
193xfs_buf_free_maps( 193xfs_buf_free_maps(
194 struct xfs_buf *bp) 194 struct xfs_buf *bp)
195{ 195{
196 if (bp->b_maps != &bp->b_map) { 196 if (bp->b_maps != &bp->__b_map) {
197 kmem_free(bp->b_maps); 197 kmem_free(bp->b_maps);
198 bp->b_maps = NULL; 198 bp->b_maps = NULL;
199 } 199 }
@@ -377,8 +377,8 @@ xfs_buf_allocate_memory(
377 } 377 }
378 378
379use_alloc_page: 379use_alloc_page:
380 start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT; 380 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
381 end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1) 381 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
382 >> PAGE_SHIFT; 382 >> PAGE_SHIFT;
383 page_count = end - start; 383 page_count = end - start;
384 error = _xfs_buf_get_pages(bp, page_count, flags); 384 error = _xfs_buf_get_pages(bp, page_count, flags);
@@ -487,6 +487,7 @@ _xfs_buf_find(
487 struct rb_node *parent; 487 struct rb_node *parent;
488 xfs_buf_t *bp; 488 xfs_buf_t *bp;
489 xfs_daddr_t blkno = map[0].bm_bn; 489 xfs_daddr_t blkno = map[0].bm_bn;
490 xfs_daddr_t eofs;
490 int numblks = 0; 491 int numblks = 0;
491 int i; 492 int i;
492 493
@@ -498,6 +499,23 @@ _xfs_buf_find(
498 ASSERT(!(numbytes < (1 << btp->bt_sshift))); 499 ASSERT(!(numbytes < (1 << btp->bt_sshift)));
499 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); 500 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
500 501
502 /*
503 * Corrupted block numbers can get through to here, unfortunately, so we
504 * have to check that the buffer falls within the filesystem bounds.
505 */
506 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
507 if (blkno >= eofs) {
508 /*
509 * XXX (dgc): we should really be returning EFSCORRUPTED here,
510 * but none of the higher level infrastructure supports
511 * returning a specific error on buffer lookup failures.
512 */
513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs);
516 return NULL;
517 }
518
501 /* get tree root */ 519 /* get tree root */
502 pag = xfs_perag_get(btp->bt_mount, 520 pag = xfs_perag_get(btp->bt_mount,
503 xfs_daddr_to_agno(btp->bt_mount, blkno)); 521 xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -640,7 +658,7 @@ _xfs_buf_read(
640 xfs_buf_flags_t flags) 658 xfs_buf_flags_t flags)
641{ 659{
642 ASSERT(!(flags & XBF_WRITE)); 660 ASSERT(!(flags & XBF_WRITE));
643 ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL); 661 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
644 662
645 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); 663 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
646 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); 664 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -933,8 +951,6 @@ xfs_buf_trylock(
933 locked = down_trylock(&bp->b_sema) == 0; 951 locked = down_trylock(&bp->b_sema) == 0;
934 if (locked) 952 if (locked)
935 XB_SET_OWNER(bp); 953 XB_SET_OWNER(bp);
936 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
937 xfs_log_force(bp->b_target->bt_mount, 0);
938 954
939 trace_xfs_buf_trylock(bp, _RET_IP_); 955 trace_xfs_buf_trylock(bp, _RET_IP_);
940 return locked; 956 return locked;
@@ -1487,6 +1503,8 @@ restart:
1487 while (!list_empty(&btp->bt_lru)) { 1503 while (!list_empty(&btp->bt_lru)) {
1488 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); 1504 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1489 if (atomic_read(&bp->b_hold) > 1) { 1505 if (atomic_read(&bp->b_hold) > 1) {
1506 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1507 list_move_tail(&bp->b_lru, &btp->bt_lru);
1490 spin_unlock(&btp->bt_lru_lock); 1508 spin_unlock(&btp->bt_lru_lock);
1491 delay(100); 1509 delay(100);
1492 goto restart; 1510 goto restart;
@@ -1709,7 +1727,7 @@ xfs_buf_cmp(
1709 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); 1727 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1710 xfs_daddr_t diff; 1728 xfs_daddr_t diff;
1711 1729
1712 diff = ap->b_map.bm_bn - bp->b_map.bm_bn; 1730 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
1713 if (diff < 0) 1731 if (diff < 0)
1714 return -1; 1732 return -1;
1715 if (diff > 0) 1733 if (diff > 0)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 23f5642480bb..433a12ed7b17 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -151,7 +151,7 @@ typedef struct xfs_buf {
151 struct page **b_pages; /* array of page pointers */ 151 struct page **b_pages; /* array of page pointers */
152 struct page *b_page_array[XB_PAGES]; /* inline pages */ 152 struct page *b_page_array[XB_PAGES]; /* inline pages */
153 struct xfs_buf_map *b_maps; /* compound buffer map */ 153 struct xfs_buf_map *b_maps; /* compound buffer map */
154 struct xfs_buf_map b_map; /* inline compound buffer map */ 154 struct xfs_buf_map __b_map; /* inline compound buffer map */
155 int b_map_count; 155 int b_map_count;
156 int b_io_length; /* IO size in BBs */ 156 int b_io_length; /* IO size in BBs */
157 atomic_t b_pin_count; /* pin count */ 157 atomic_t b_pin_count; /* pin count */
@@ -330,8 +330,8 @@ void xfs_buf_stale(struct xfs_buf *bp);
330 * In future, uncached buffers will pass the block number directly to the io 330 * In future, uncached buffers will pass the block number directly to the io
331 * request function and hence these macros will go away at that point. 331 * request function and hence these macros will go away at that point.
332 */ 332 */
333#define XFS_BUF_ADDR(bp) ((bp)->b_map.bm_bn) 333#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn)
334#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno)) 334#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
335 335
336static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 336static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
337{ 337{
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index becf4a97efc6..cf263476d6b4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
37 return container_of(lip, struct xfs_buf_log_item, bli_item); 37 return container_of(lip, struct xfs_buf_log_item, bli_item);
38} 38}
39 39
40
41#ifdef XFS_TRANS_DEBUG
42/*
43 * This function uses an alternate strategy for tracking the bytes
44 * that the user requests to be logged. This can then be used
45 * in conjunction with the bli_orig array in the buf log item to
46 * catch bugs in our callers' code.
47 *
48 * We also double check the bits set in xfs_buf_item_log using a
49 * simple algorithm to check that every byte is accounted for.
50 */
51STATIC void
52xfs_buf_item_log_debug(
53 xfs_buf_log_item_t *bip,
54 uint first,
55 uint last)
56{
57 uint x;
58 uint byte;
59 uint nbytes;
60 uint chunk_num;
61 uint word_num;
62 uint bit_num;
63 uint bit_set;
64 uint *wordp;
65
66 ASSERT(bip->bli_logged != NULL);
67 byte = first;
68 nbytes = last - first + 1;
69 bfset(bip->bli_logged, first, nbytes);
70 for (x = 0; x < nbytes; x++) {
71 chunk_num = byte >> XFS_BLF_SHIFT;
72 word_num = chunk_num >> BIT_TO_WORD_SHIFT;
73 bit_num = chunk_num & (NBWORD - 1);
74 wordp = &(bip->bli_format.blf_data_map[word_num]);
75 bit_set = *wordp & (1 << bit_num);
76 ASSERT(bit_set);
77 byte++;
78 }
79}
80
81/*
82 * This function is called when we flush something into a buffer without
83 * logging it. This happens for things like inodes which are logged
84 * separately from the buffer.
85 */
86void
87xfs_buf_item_flush_log_debug(
88 xfs_buf_t *bp,
89 uint first,
90 uint last)
91{
92 xfs_buf_log_item_t *bip = bp->b_fspriv;
93 uint nbytes;
94
95 if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
96 return;
97
98 ASSERT(bip->bli_logged != NULL);
99 nbytes = last - first + 1;
100 bfset(bip->bli_logged, first, nbytes);
101}
102
103/*
104 * This function is called to verify that our callers have logged
105 * all the bytes that they changed.
106 *
107 * It does this by comparing the original copy of the buffer stored in
108 * the buf log item's bli_orig array to the current copy of the buffer
109 * and ensuring that all bytes which mismatch are set in the bli_logged
110 * array of the buf log item.
111 */
112STATIC void
113xfs_buf_item_log_check(
114 xfs_buf_log_item_t *bip)
115{
116 char *orig;
117 char *buffer;
118 int x;
119 xfs_buf_t *bp;
120
121 ASSERT(bip->bli_orig != NULL);
122 ASSERT(bip->bli_logged != NULL);
123
124 bp = bip->bli_buf;
125 ASSERT(bp->b_length > 0);
126 ASSERT(bp->b_addr != NULL);
127 orig = bip->bli_orig;
128 buffer = bp->b_addr;
129 for (x = 0; x < BBTOB(bp->b_length); x++) {
130 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
131 xfs_emerg(bp->b_mount,
132 "%s: bip %x buffer %x orig %x index %d",
133 __func__, bip, bp, orig, x);
134 ASSERT(0);
135 }
136 }
137}
138#else
139#define xfs_buf_item_log_debug(x,y,z)
140#define xfs_buf_item_log_check(x)
141#endif
142
143STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); 40STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
144 41
145/* 42/*
@@ -237,7 +134,7 @@ xfs_buf_item_size(
237 * cancel flag in it. 134 * cancel flag in it.
238 */ 135 */
239 trace_xfs_buf_item_size_stale(bip); 136 trace_xfs_buf_item_size_stale(bip);
240 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 137 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
241 return bip->bli_format_count; 138 return bip->bli_format_count;
242 } 139 }
243 140
@@ -278,7 +175,7 @@ xfs_buf_item_format_segment(
278 uint buffer_offset; 175 uint buffer_offset;
279 176
280 /* copy the flags across from the base format item */ 177 /* copy the flags across from the base format item */
281 blfp->blf_flags = bip->bli_format.blf_flags; 178 blfp->blf_flags = bip->__bli_format.blf_flags;
282 179
283 /* 180 /*
284 * Base size is the actual size of the ondisk structure - it reflects 181 * Base size is the actual size of the ondisk structure - it reflects
@@ -287,6 +184,17 @@ xfs_buf_item_format_segment(
287 */ 184 */
288 base_size = offsetof(struct xfs_buf_log_format, blf_data_map) + 185 base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
289 (blfp->blf_map_size * sizeof(blfp->blf_data_map[0])); 186 (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
187
188 nvecs = 0;
189 first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
190 if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
191 /*
192 * If the map is not be dirty in the transaction, mark
193 * the size as zero and do not advance the vector pointer.
194 */
195 goto out;
196 }
197
290 vecp->i_addr = blfp; 198 vecp->i_addr = blfp;
291 vecp->i_len = base_size; 199 vecp->i_len = base_size;
292 vecp->i_type = XLOG_REG_TYPE_BFORMAT; 200 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
@@ -301,15 +209,13 @@ xfs_buf_item_format_segment(
301 */ 209 */
302 trace_xfs_buf_item_format_stale(bip); 210 trace_xfs_buf_item_format_stale(bip);
303 ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); 211 ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
304 blfp->blf_size = nvecs; 212 goto out;
305 return vecp;
306 } 213 }
307 214
308 /* 215 /*
309 * Fill in an iovec for each set of contiguous chunks. 216 * Fill in an iovec for each set of contiguous chunks.
310 */ 217 */
311 first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 218
312 ASSERT(first_bit != -1);
313 last_bit = first_bit; 219 last_bit = first_bit;
314 nbits = 1; 220 nbits = 1;
315 for (;;) { 221 for (;;) {
@@ -371,7 +277,8 @@ xfs_buf_item_format_segment(
371 nbits++; 277 nbits++;
372 } 278 }
373 } 279 }
374 bip->bli_format.blf_size = nvecs; 280out:
281 blfp->blf_size = nvecs;
375 return vecp; 282 return vecp;
376} 283}
377 284
@@ -405,7 +312,7 @@ xfs_buf_item_format(
405 if (bip->bli_flags & XFS_BLI_INODE_BUF) { 312 if (bip->bli_flags & XFS_BLI_INODE_BUF) {
406 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 313 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
407 xfs_log_item_in_current_chkpt(lip))) 314 xfs_log_item_in_current_chkpt(lip)))
408 bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; 315 bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
409 bip->bli_flags &= ~XFS_BLI_INODE_BUF; 316 bip->bli_flags &= ~XFS_BLI_INODE_BUF;
410 } 317 }
411 318
@@ -419,7 +326,6 @@ xfs_buf_item_format(
419 * Check to make sure everything is consistent. 326 * Check to make sure everything is consistent.
420 */ 327 */
421 trace_xfs_buf_item_format(bip); 328 trace_xfs_buf_item_format(bip);
422 xfs_buf_item_log_check(bip);
423} 329}
424 330
425/* 331/*
@@ -485,7 +391,7 @@ xfs_buf_item_unpin(
485 ASSERT(bip->bli_flags & XFS_BLI_STALE); 391 ASSERT(bip->bli_flags & XFS_BLI_STALE);
486 ASSERT(xfs_buf_islocked(bp)); 392 ASSERT(xfs_buf_islocked(bp));
487 ASSERT(XFS_BUF_ISSTALE(bp)); 393 ASSERT(XFS_BUF_ISSTALE(bp));
488 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 394 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
489 395
490 trace_xfs_buf_item_unpin_stale(bip); 396 trace_xfs_buf_item_unpin_stale(bip);
491 397
@@ -563,8 +469,18 @@ xfs_buf_item_push(
563 469
564 if (xfs_buf_ispinned(bp)) 470 if (xfs_buf_ispinned(bp))
565 return XFS_ITEM_PINNED; 471 return XFS_ITEM_PINNED;
566 if (!xfs_buf_trylock(bp)) 472 if (!xfs_buf_trylock(bp)) {
473 /*
474 * If we have just raced with a buffer being pinned and it has
475 * been marked stale, we could end up stalling until someone else
476 * issues a log force to unpin the stale buffer. Check for the
477 * race condition here so xfsaild recognizes the buffer is pinned
478 * and queues a log force to move it along.
479 */
480 if (xfs_buf_ispinned(bp))
481 return XFS_ITEM_PINNED;
567 return XFS_ITEM_LOCKED; 482 return XFS_ITEM_LOCKED;
483 }
568 484
569 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 485 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
570 486
@@ -601,7 +517,7 @@ xfs_buf_item_unlock(
601{ 517{
602 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 518 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
603 struct xfs_buf *bp = bip->bli_buf; 519 struct xfs_buf *bp = bip->bli_buf;
604 int aborted; 520 int aborted, clean, i;
605 uint hold; 521 uint hold;
606 522
607 /* Clear the buffer's association with this transaction. */ 523 /* Clear the buffer's association with this transaction. */
@@ -631,7 +547,7 @@ xfs_buf_item_unlock(
631 */ 547 */
632 if (bip->bli_flags & XFS_BLI_STALE) { 548 if (bip->bli_flags & XFS_BLI_STALE) {
633 trace_xfs_buf_item_unlock_stale(bip); 549 trace_xfs_buf_item_unlock_stale(bip);
634 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 550 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
635 if (!aborted) { 551 if (!aborted) {
636 atomic_dec(&bip->bli_refcount); 552 atomic_dec(&bip->bli_refcount);
637 return; 553 return;
@@ -642,12 +558,27 @@ xfs_buf_item_unlock(
642 558
643 /* 559 /*
644 * If the buf item isn't tracking any data, free it, otherwise drop the 560 * If the buf item isn't tracking any data, free it, otherwise drop the
645 * reference we hold to it. 561 * reference we hold to it. If we are aborting the transaction, this may
562 * be the only reference to the buf item, so we free it anyway
563 * regardless of whether it is dirty or not. A dirty abort implies a
564 * shutdown, anyway.
646 */ 565 */
647 if (xfs_bitmap_empty(bip->bli_format.blf_data_map, 566 clean = 1;
648 bip->bli_format.blf_map_size)) 567 for (i = 0; i < bip->bli_format_count; i++) {
568 if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
569 bip->bli_formats[i].blf_map_size)) {
570 clean = 0;
571 break;
572 }
573 }
574 if (clean)
649 xfs_buf_item_relse(bp); 575 xfs_buf_item_relse(bp);
650 else 576 else if (aborted) {
577 if (atomic_dec_and_test(&bip->bli_refcount)) {
578 ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
579 xfs_buf_item_relse(bp);
580 }
581 } else
651 atomic_dec(&bip->bli_refcount); 582 atomic_dec(&bip->bli_refcount);
652 583
653 if (!hold) 584 if (!hold)
@@ -716,7 +647,7 @@ xfs_buf_item_get_format(
716 bip->bli_format_count = count; 647 bip->bli_format_count = count;
717 648
718 if (count == 1) { 649 if (count == 1) {
719 bip->bli_formats = &bip->bli_format; 650 bip->bli_formats = &bip->__bli_format;
720 return 0; 651 return 0;
721 } 652 }
722 653
@@ -731,7 +662,7 @@ STATIC void
731xfs_buf_item_free_format( 662xfs_buf_item_free_format(
732 struct xfs_buf_log_item *bip) 663 struct xfs_buf_log_item *bip)
733{ 664{
734 if (bip->bli_formats != &bip->bli_format) { 665 if (bip->bli_formats != &bip->__bli_format) {
735 kmem_free(bip->bli_formats); 666 kmem_free(bip->bli_formats);
736 bip->bli_formats = NULL; 667 bip->bli_formats = NULL;
737 } 668 }
@@ -898,8 +829,6 @@ xfs_buf_item_log_segment(
898 mask = (1 << end_bit) - 1; 829 mask = (1 << end_bit) - 1;
899 *wordp |= mask; 830 *wordp |= mask;
900 } 831 }
901
902 xfs_buf_item_log_debug(bip, first, last);
903} 832}
904 833
905/* 834/*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 6850f49f4af3..ee36c88ecfde 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -98,13 +98,9 @@ typedef struct xfs_buf_log_item {
98 unsigned int bli_flags; /* misc flags */ 98 unsigned int bli_flags; /* misc flags */
99 unsigned int bli_recur; /* lock recursion count */ 99 unsigned int bli_recur; /* lock recursion count */
100 atomic_t bli_refcount; /* cnt of tp refs */ 100 atomic_t bli_refcount; /* cnt of tp refs */
101#ifdef XFS_TRANS_DEBUG
102 char *bli_orig; /* original buffer copy */
103 char *bli_logged; /* bytes logged (bitmap) */
104#endif
105 int bli_format_count; /* count of headers */ 101 int bli_format_count; /* count of headers */
106 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 102 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
107 struct xfs_buf_log_format bli_format; /* embedded in-log header */ 103 struct xfs_buf_log_format __bli_format; /* embedded in-log header */
108} xfs_buf_log_item_t; 104} xfs_buf_log_item_t;
109 105
110void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 106void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
@@ -117,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
117void xfs_buf_iodone_callbacks(struct xfs_buf *); 113void xfs_buf_iodone_callbacks(struct xfs_buf *);
118void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 114void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
119 115
120#ifdef XFS_TRANS_DEBUG
121void
122xfs_buf_item_flush_log_debug(
123 struct xfs_buf *bp,
124 uint first,
125 uint last);
126#else
127#define xfs_buf_item_flush_log_debug(bp, first, last)
128#endif
129
130#endif /* __KERNEL__ */ 116#endif /* __KERNEL__ */
131 117
132#endif /* __XFS_BUF_ITEM_H__ */ 118#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..a8bd26b82ecb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -246,10 +246,10 @@ xfs_swap_extents(
246 goto out_unlock; 246 goto out_unlock;
247 } 247 }
248 248
249 error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); 249 error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
250 if (error) 250 if (error)
251 goto out_unlock; 251 goto out_unlock;
252 truncate_pagecache_range(VFS_I(ip), 0, -1); 252 truncate_pagecache_range(VFS_I(tip), 0, -1);
253 253
254 /* Verify O_DIRECT for ftmp */ 254 /* Verify O_DIRECT for ftmp */
255 if (VN_CACHED(VFS_I(tip)) != 0) { 255 if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 7536faaa61e7..12afe07a91d7 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -355,10 +355,12 @@ xfs_dir2_block_addname(
355 /* 355 /*
356 * If need to compact the leaf entries, do it now. 356 * If need to compact the leaf entries, do it now.
357 */ 357 */
358 if (compact) 358 if (compact) {
359 xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, 359 xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
360 &lfloghigh, &lfloglow); 360 &lfloghigh, &lfloglow);
361 else if (btp->stale) { 361 /* recalculate blp post-compaction */
362 blp = xfs_dir2_block_leaf_p(btp);
363 } else if (btp->stale) {
362 /* 364 /*
363 * Set leaf logging boundaries to impossible state. 365 * Set leaf logging boundaries to impossible state.
364 * For the no-stale case they're set explicitly. 366 * For the no-stale case they're set explicitly.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9e1bf5294c91..8025eb23ad72 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -612,15 +612,9 @@ xfs_qm_dqread(
612 if (flags & XFS_QMOPT_DQALLOC) { 612 if (flags & XFS_QMOPT_DQALLOC) {
613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), 614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
615 XFS_WRITE_LOG_RES(mp) + 615 XFS_QM_DQALLOC_LOG_RES(mp), 0,
616 /* 616 XFS_TRANS_PERM_LOG_RES,
617 * Round the chunklen up to the next multiple 617 XFS_WRITE_LOG_COUNT);
618 * of 128 (buf log item chunk size)).
619 */
620 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
621 0,
622 XFS_TRANS_PERM_LOG_RES,
623 XFS_WRITE_LOG_COUNT);
624 if (error) 618 if (error)
625 goto error1; 619 goto error1;
626 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 620 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 94eaeedc5498..2866b8c78b7a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -709,8 +709,8 @@ xfs_fs_log_dummy(
709 int error; 709 int error;
710 710
711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); 711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
712 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 712 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
713 XFS_DEFAULT_LOG_COUNT); 713 XFS_DEFAULT_LOG_COUNT);
714 if (error) { 714 if (error) {
715 xfs_trans_cancel(tp, 0); 715 xfs_trans_cancel(tp, 0);
716 return error; 716 return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a815412eab80..515bf71ce01c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc(
279 (args.agbno < be32_to_cpu(agi->agi_length)))) { 279 (args.agbno < be32_to_cpu(agi->agi_length)))) {
280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
281 args.type = XFS_ALLOCTYPE_THIS_BNO; 281 args.type = XFS_ALLOCTYPE_THIS_BNO;
282 args.mod = args.total = args.wasdel = args.isfl =
283 args.userdata = args.minalignslop = 0;
284 args.prod = 1; 282 args.prod = 1;
285 283
286 /* 284 /*
@@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc(
333 * Allocate a fixed-size extent of inodes. 331 * Allocate a fixed-size extent of inodes.
334 */ 332 */
335 args.type = XFS_ALLOCTYPE_NEAR_BNO; 333 args.type = XFS_ALLOCTYPE_NEAR_BNO;
336 args.mod = args.total = args.wasdel = args.isfl =
337 args.userdata = args.minalignslop = 0;
338 args.prod = 1; 334 args.prod = 1;
339 /* 335 /*
340 * Allow space for the inode btree to split. 336 * Allow space for the inode btree to split.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66282dcb821b..4f201656d2d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2379,9 +2379,6 @@ xfs_iflush_fork(
2379 char *cp; 2379 char *cp;
2380 xfs_ifork_t *ifp; 2380 xfs_ifork_t *ifp;
2381 xfs_mount_t *mp; 2381 xfs_mount_t *mp;
2382#ifdef XFS_TRANS_DEBUG
2383 int first;
2384#endif
2385 static const short brootflag[2] = 2382 static const short brootflag[2] =
2386 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2383 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
2387 static const short dataflag[2] = 2384 static const short dataflag[2] =
@@ -2724,9 +2721,6 @@ xfs_iflush_int(
2724 xfs_inode_log_item_t *iip; 2721 xfs_inode_log_item_t *iip;
2725 xfs_dinode_t *dip; 2722 xfs_dinode_t *dip;
2726 xfs_mount_t *mp; 2723 xfs_mount_t *mp;
2727#ifdef XFS_TRANS_DEBUG
2728 int first;
2729#endif
2730 2724
2731 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2732 ASSERT(xfs_isiflocked(ip)); 2726 ASSERT(xfs_isiflocked(ip));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 22baf6ea4fac..237e7f6f2ab3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip)
419static inline void xfs_ifunlock(struct xfs_inode *ip) 419static inline void xfs_ifunlock(struct xfs_inode *ip)
420{ 420{
421 xfs_iflags_clear(ip, XFS_IFLOCK); 421 xfs_iflags_clear(ip, XFS_IFLOCK);
422 smp_mb();
422 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); 423 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
423} 424}
424 425
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d041d47d9d86..f034bd1652f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -269,17 +269,6 @@ xfs_inode_item_format(
269 } else { 269 } else {
270 ASSERT(!(iip->ili_fields & 270 ASSERT(!(iip->ili_fields &
271 XFS_ILOG_DBROOT)); 271 XFS_ILOG_DBROOT));
272#ifdef XFS_TRANS_DEBUG
273 if (iip->ili_root_size > 0) {
274 ASSERT(iip->ili_root_size ==
275 ip->i_df.if_broot_bytes);
276 ASSERT(memcmp(iip->ili_orig_root,
277 ip->i_df.if_broot,
278 iip->ili_root_size) == 0);
279 } else {
280 ASSERT(ip->i_df.if_broot_bytes == 0);
281 }
282#endif
283 iip->ili_fields &= ~XFS_ILOG_DBROOT; 272 iip->ili_fields &= ~XFS_ILOG_DBROOT;
284 } 273 }
285 break; 274 break;
@@ -678,11 +667,6 @@ void
678xfs_inode_item_destroy( 667xfs_inode_item_destroy(
679 xfs_inode_t *ip) 668 xfs_inode_t *ip)
680{ 669{
681#ifdef XFS_TRANS_DEBUG
682 if (ip->i_itemp->ili_root_size != 0) {
683 kmem_free(ip->i_itemp->ili_orig_root);
684 }
685#endif
686 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 670 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
687} 671}
688 672
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 376d4d0b2635..779812fb3d80 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item {
148 data exts */ 148 data exts */
149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
150 attr exts */ 150 attr exts */
151#ifdef XFS_TRANS_DEBUG
152 int ili_root_size;
153 char *ili_orig_root;
154#endif
155 xfs_inode_log_format_t ili_format; /* logged structure */ 151 xfs_inode_log_format_t ili_format; /* logged structure */
156} xfs_inode_log_item_t; 152} xfs_inode_log_item_t;
157 153
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
311} 311}
312 312
313/* 313/*
314 * Determine the initial size of the preallocation. We are beyond the current
315 * EOF here, but we need to take into account whether this is a sparse write or
316 * an extending write when determining the preallocation size. Hence we need to
317 * look up the extent that ends at the current write offset and use the result
318 * to determine the preallocation size.
319 *
320 * If the extent is a hole, then preallocation is essentially disabled.
321 * Otherwise we take the size of the preceeding data extent as the basis for the
322 * preallocation size. If the size of the extent is greater than half the
323 * maximum extent length, then use the current offset as the basis. This ensures
324 * that for large files the preallocation size always extends to MAXEXTLEN
325 * rather than falling short due to things like stripe unit/width alignment of
326 * real extents.
327 */
328STATIC int
329xfs_iomap_eof_prealloc_initial_size(
330 struct xfs_mount *mp,
331 struct xfs_inode *ip,
332 xfs_off_t offset,
333 xfs_bmbt_irec_t *imap,
334 int nimaps)
335{
336 xfs_fileoff_t start_fsb;
337 int imaps = 1;
338 int error;
339
340 ASSERT(nimaps >= imaps);
341
342 /* if we are using a specific prealloc size, return now */
343 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
344 return 0;
345
346 /*
347 * As we write multiple pages, the offset will always align to the
348 * start of a page and hence point to a hole at EOF. i.e. if the size is
349 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
350 * will return FSB 1. Hence if there are blocks in the file, we want to
351 * point to the block prior to the EOF block and not the hole that maps
352 * directly at @offset.
353 */
354 start_fsb = XFS_B_TO_FSB(mp, offset);
355 if (start_fsb)
356 start_fsb--;
357 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
358 if (error)
359 return 0;
360
361 ASSERT(imaps == 1);
362 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount;
366 return XFS_B_TO_FSB(mp, offset);
367}
368
369/*
314 * If we don't have a user specified preallocation size, dynamically increase 370 * If we don't have a user specified preallocation size, dynamically increase
315 * the preallocation size as the size of the file grows. Cap the maximum size 371 * the preallocation size as the size of the file grows. Cap the maximum size
316 * at a single extent or less if the filesystem is near full. The closer the 372 * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
319STATIC xfs_fsblock_t 375STATIC xfs_fsblock_t
320xfs_iomap_prealloc_size( 376xfs_iomap_prealloc_size(
321 struct xfs_mount *mp, 377 struct xfs_mount *mp,
322 struct xfs_inode *ip) 378 struct xfs_inode *ip,
379 xfs_off_t offset,
380 struct xfs_bmbt_irec *imap,
381 int nimaps)
323{ 382{
324 xfs_fsblock_t alloc_blocks = 0; 383 xfs_fsblock_t alloc_blocks = 0;
325 384
326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps);
387 if (alloc_blocks > 0) {
327 int shift = 0; 388 int shift = 0;
328 int64_t freesp; 389 int64_t freesp;
329 390
330 /*
331 * rounddown_pow_of_two() returns an undefined result
332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
333 * ensure we always pass in a non-zero value.
334 */
335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
337 rounddown_pow_of_two(alloc_blocks)); 392 rounddown_pow_of_two(alloc_blocks));
338 393
@@ -351,6 +406,15 @@ xfs_iomap_prealloc_size(
351 } 406 }
352 if (shift) 407 if (shift)
353 alloc_blocks >>= shift; 408 alloc_blocks >>= shift;
409
410 /*
411 * If we are still trying to allocate more space than is
412 * available, squash the prealloc hard. This can happen if we
413 * have a large file on a small filesystem and the above
414 * lowspace thresholds are smaller than MAXEXTLEN.
415 */
416 while (alloc_blocks >= freesp)
417 alloc_blocks >>= 4;
354 } 418 }
355 419
356 if (alloc_blocks < mp->m_writeio_blocks) 420 if (alloc_blocks < mp->m_writeio_blocks)
@@ -390,7 +454,6 @@ xfs_iomap_write_delay(
390 extsz = xfs_get_extsz_hint(ip); 454 extsz = xfs_get_extsz_hint(ip);
391 offset_fsb = XFS_B_TO_FSBT(mp, offset); 455 offset_fsb = XFS_B_TO_FSBT(mp, offset);
392 456
393
394 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 457 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
395 imap, XFS_WRITE_IMAPS, &prealloc); 458 imap, XFS_WRITE_IMAPS, &prealloc);
396 if (error) 459 if (error)
@@ -398,7 +461,10 @@ xfs_iomap_write_delay(
398 461
399retry: 462retry:
400 if (prealloc) { 463 if (prealloc) {
401 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); 464 xfs_fsblock_t alloc_blocks;
465
466 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
467 XFS_WRITE_IMAPS);
402 468
403 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 469 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
404 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 470 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46bd9d52ab51..eec226f78a40 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -120,7 +120,7 @@ xlog_verify_iclog(
120 struct xlog *log, 120 struct xlog *log,
121 struct xlog_in_core *iclog, 121 struct xlog_in_core *iclog,
122 int count, 122 int count,
123 boolean_t syncing); 123 bool syncing);
124STATIC void 124STATIC void
125xlog_verify_tail_lsn( 125xlog_verify_tail_lsn(
126 struct xlog *log, 126 struct xlog *log,
@@ -1737,7 +1737,7 @@ xlog_sync(
1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1739 1739
1740 xlog_verify_iclog(log, iclog, count, B_TRUE); 1740 xlog_verify_iclog(log, iclog, count, true);
1741 1741
1742 /* account for log which doesn't start at block #0 */ 1742 /* account for log which doesn't start at block #0 */
1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -3611,7 +3611,7 @@ xlog_verify_iclog(
3611 struct xlog *log, 3611 struct xlog *log,
3612 struct xlog_in_core *iclog, 3612 struct xlog_in_core *iclog,
3613 int count, 3613 int count,
3614 boolean_t syncing) 3614 bool syncing)
3615{ 3615{
3616 xlog_op_header_t *ophead; 3616 xlog_op_header_t *ophead;
3617 xlog_in_core_t *icptr; 3617 xlog_in_core_t *icptr;
@@ -3659,7 +3659,7 @@ xlog_verify_iclog(
3659 /* clientid is only 1 byte */ 3659 /* clientid is only 1 byte */
3660 field_offset = (__psint_t) 3660 field_offset = (__psint_t)
3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); 3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
3662 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3662 if (!syncing || (field_offset & 0x1ff)) {
3663 clientid = ophead->oh_clientid; 3663 clientid = ophead->oh_clientid;
3664 } else { 3664 } else {
3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); 3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3682,7 +3682,7 @@ xlog_verify_iclog(
3682 /* check length */ 3682 /* check length */
3683 field_offset = (__psint_t) 3683 field_offset = (__psint_t)
3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); 3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
3685 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3685 if (!syncing || (field_offset & 0x1ff)) {
3686 op_len = be32_to_cpu(ophead->oh_len); 3686 op_len = be32_to_cpu(ophead->oh_len);
3687 } else { 3687 } else {
3688 idx = BTOBBT((__psint_t)&ophead->oh_len - 3688 idx = BTOBBT((__psint_t)&ophead->oh_len -
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..3806088a8f77 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
658 return; 658 return;
659 } 659 }
660 /* quietly fail */ 660 /* quietly fail */
661 xfs_buf_ioerror(bp, EFSCORRUPTED); 661 xfs_buf_ioerror(bp, EWRONGFS);
662} 662}
663 663
664static void 664static void
@@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags(
1109 return 0; 1109 return 0;
1110 1110
1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1112 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1112 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1113 XFS_DEFAULT_LOG_COUNT); 1113 0, 0, XFS_DEFAULT_LOG_COUNT);
1114 if (error) { 1114 if (error) {
1115 xfs_trans_cancel(tp, 0); 1115 xfs_trans_cancel(tp, 0);
1116 xfs_alert(mp, "%s: Superblock update failed!", __func__); 1116 xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
1583 return 0; 1583 return 0;
1584 1584
1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); 1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1586 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1586 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1587 XFS_DEFAULT_LOG_COUNT); 1587 XFS_DEFAULT_LOG_COUNT);
1588 if (error) { 1588 if (error) {
1589 xfs_trans_cancel(tp, 0); 1589 xfs_trans_cancel(tp, 0);
1590 return error; 1590 return error;
@@ -1945,8 +1945,8 @@ xfs_mount_log_sb(
1945 XFS_SB_VERSIONNUM)); 1945 XFS_SB_VERSIONNUM));
1946 1946
1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1948 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1948 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1949 XFS_DEFAULT_LOG_COUNT); 1949 XFS_DEFAULT_LOG_COUNT);
1950 if (error) { 1950 if (error) {
1951 xfs_trans_cancel(tp, 0); 1951 xfs_trans_cancel(tp, 0);
1952 return error; 1952 return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bab8314507e4..bc907061d392 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -34,12 +34,19 @@ typedef struct xfs_trans_reservations {
34 uint tr_addafork; /* cvt inode to attributed trans */ 34 uint tr_addafork; /* cvt inode to attributed trans */
35 uint tr_writeid; /* write setuid/setgid file */ 35 uint tr_writeid; /* write setuid/setgid file */
36 uint tr_attrinval; /* attr fork buffer invalidation */ 36 uint tr_attrinval; /* attr fork buffer invalidation */
37 uint tr_attrset; /* set/create an attribute */ 37 uint tr_attrsetm; /* set/create an attribute at mount time */
38 uint tr_attrsetrt; /* set/create an attribute at runtime */
38 uint tr_attrrm; /* remove an attribute */ 39 uint tr_attrrm; /* remove an attribute */
39 uint tr_clearagi; /* clear bad agi unlinked ino bucket */ 40 uint tr_clearagi; /* clear bad agi unlinked ino bucket */
40 uint tr_growrtalloc; /* grow realtime allocations */ 41 uint tr_growrtalloc; /* grow realtime allocations */
41 uint tr_growrtzero; /* grow realtime zeroing */ 42 uint tr_growrtzero; /* grow realtime zeroing */
42 uint tr_growrtfree; /* grow realtime freeing */ 43 uint tr_growrtfree; /* grow realtime freeing */
44 uint tr_qm_sbchange; /* change quota flags */
45 uint tr_qm_setqlim; /* adjust quota limits */
46 uint tr_qm_dqalloc; /* allocate quota on disk */
47 uint tr_qm_quotaoff; /* turn quota off */
48 uint tr_qm_equotaoff;/* end of turn quota off */
49 uint tr_sb; /* modify superblock */
43} xfs_trans_reservations_t; 50} xfs_trans_reservations_t;
44 51
45#ifndef __KERNEL__ 52#ifndef __KERNEL__
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 60eff4763156..e5b5cf973781 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes(
1584 int error; 1584 int error;
1585 1585
1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1587 if ((error = xfs_trans_reserve(tp, 0, 1587 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1588 mp->m_sb.sb_sectsize + 128, 0, 1588 0, 0, XFS_DEFAULT_LOG_COUNT);
1589 0, 1589 if (error) {
1590 XFS_DEFAULT_LOG_COUNT))) {
1591 xfs_trans_cancel(tp, 0); 1590 xfs_trans_cancel(tp, 0);
1592 return error; 1591 return error;
1593 } 1592 }
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 6b39115bf145..2d02eac1c9a8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -146,7 +146,7 @@ xfs_qm_newmount(
146 * inode goes inactive and wants to free blocks, 146 * inode goes inactive and wants to free blocks,
147 * or via xfs_log_mount_finish. 147 * or via xfs_log_mount_finish.
148 */ 148 */
149 *needquotamount = B_TRUE; 149 *needquotamount = true;
150 *quotaflags = mp->m_qflags; 150 *quotaflags = mp->m_qflags;
151 mp->m_qflags = 0; 151 mp->m_qflags = 0;
152 } 152 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5f53e75409b8..cf9a34051e07 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -408,10 +408,10 @@ xfs_qm_scall_getqstat(
408{ 408{
409 struct xfs_quotainfo *q = mp->m_quotainfo; 409 struct xfs_quotainfo *q = mp->m_quotainfo;
410 struct xfs_inode *uip, *gip; 410 struct xfs_inode *uip, *gip;
411 boolean_t tempuqip, tempgqip; 411 bool tempuqip, tempgqip;
412 412
413 uip = gip = NULL; 413 uip = gip = NULL;
414 tempuqip = tempgqip = B_FALSE; 414 tempuqip = tempgqip = false;
415 memset(out, 0, sizeof(fs_quota_stat_t)); 415 memset(out, 0, sizeof(fs_quota_stat_t));
416 416
417 out->qs_version = FS_QSTAT_VERSION; 417 out->qs_version = FS_QSTAT_VERSION;
@@ -434,12 +434,12 @@ xfs_qm_scall_getqstat(
434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
436 0, 0, &uip) == 0) 436 0, 0, &uip) == 0)
437 tempuqip = B_TRUE; 437 tempuqip = true;
438 } 438 }
439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
441 0, 0, &gip) == 0) 441 0, 0, &gip) == 0)
442 tempgqip = B_TRUE; 442 tempgqip = true;
443 } 443 }
444 if (uip) { 444 if (uip) {
445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; 445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
@@ -490,8 +490,9 @@ xfs_qm_scall_setqlim(
490 return 0; 490 return 0;
491 491
492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
493 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 493 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
494 0, 0, XFS_DEFAULT_LOG_COUNT))) { 494 0, 0, XFS_DEFAULT_LOG_COUNT);
495 if (error) {
495 xfs_trans_cancel(tp, 0); 496 xfs_trans_cancel(tp, 0);
496 return (error); 497 return (error);
497 } 498 }
@@ -638,8 +639,9 @@ xfs_qm_log_quotaoff_end(
638 639
639 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); 640 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
640 641
641 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, 642 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
642 0, 0, XFS_DEFAULT_LOG_COUNT))) { 643 0, 0, XFS_DEFAULT_LOG_COUNT);
644 if (error) {
643 xfs_trans_cancel(tp, 0); 645 xfs_trans_cancel(tp, 0);
644 return (error); 646 return (error);
645 } 647 }
@@ -671,14 +673,10 @@ xfs_qm_log_quotaoff(
671 uint oldsbqflag=0; 673 uint oldsbqflag=0;
672 674
673 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); 675 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
674 if ((error = xfs_trans_reserve(tp, 0, 676 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
675 sizeof(xfs_qoff_logitem_t) * 2 + 677 0, 0, XFS_DEFAULT_LOG_COUNT);
676 mp->m_sb.sb_sectsize + 128, 678 if (error)
677 0,
678 0,
679 XFS_DEFAULT_LOG_COUNT))) {
680 goto error0; 679 goto error0;
681 }
682 680
683 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); 681 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
684 xfs_trans_log_quotaoff_item(tp, qoffi); 682 xfs_trans_log_quotaoff_item(tp, qoffi);
@@ -784,11 +782,11 @@ xfs_qm_scall_getquota(
784 (XFS_IS_OQUOTA_ENFORCED(mp) && 782 (XFS_IS_OQUOTA_ENFORCED(mp) &&
785 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && 783 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
786 dst->d_id != 0) { 784 dst->d_id != 0) {
787 if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) && 785 if ((dst->d_bcount > dst->d_blk_softlimit) &&
788 (dst->d_blk_softlimit > 0)) { 786 (dst->d_blk_softlimit > 0)) {
789 ASSERT(dst->d_btimer != 0); 787 ASSERT(dst->d_btimer != 0);
790 } 788 }
791 if (((int) dst->d_icount > (int) dst->d_ino_softlimit) && 789 if ((dst->d_icount > dst->d_ino_softlimit) &&
792 (dst->d_ino_softlimit > 0)) { 790 (dst->d_ino_softlimit > 0)) {
793 ASSERT(dst->d_itimer != 0); 791 ASSERT(dst->d_itimer != 0);
794 } 792 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ab8839b26272..c407121873b4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -139,9 +139,9 @@ static const match_table_t tokens = {
139 139
140 140
141STATIC unsigned long 141STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 142suffix_kstrtoint(char *s, unsigned int base, int *res)
143{ 143{
144 int last, shift_left_factor = 0; 144 int last, shift_left_factor = 0, _res;
145 char *value = s; 145 char *value = s;
146 146
147 last = strlen(value) - 1; 147 last = strlen(value) - 1;
@@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
158 value[last] = '\0'; 158 value[last] = '\0';
159 } 159 }
160 160
161 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 161 if (kstrtoint(s, base, &_res))
162 return -EINVAL;
163 *res = _res << shift_left_factor;
164 return 0;
162} 165}
163 166
164/* 167/*
@@ -174,7 +177,7 @@ xfs_parseargs(
174 char *options) 177 char *options)
175{ 178{
176 struct super_block *sb = mp->m_super; 179 struct super_block *sb = mp->m_super;
177 char *this_char, *value, *eov; 180 char *this_char, *value;
178 int dsunit = 0; 181 int dsunit = 0;
179 int dswidth = 0; 182 int dswidth = 0;
180 int iosize = 0; 183 int iosize = 0;
@@ -230,14 +233,16 @@ xfs_parseargs(
230 this_char); 233 this_char);
231 return EINVAL; 234 return EINVAL;
232 } 235 }
233 mp->m_logbufs = simple_strtoul(value, &eov, 10); 236 if (kstrtoint(value, 10, &mp->m_logbufs))
237 return EINVAL;
234 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 238 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
235 if (!value || !*value) { 239 if (!value || !*value) {
236 xfs_warn(mp, "%s option requires an argument", 240 xfs_warn(mp, "%s option requires an argument",
237 this_char); 241 this_char);
238 return EINVAL; 242 return EINVAL;
239 } 243 }
240 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 244 if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
245 return EINVAL;
241 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 246 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
242 if (!value || !*value) { 247 if (!value || !*value) {
243 xfs_warn(mp, "%s option requires an argument", 248 xfs_warn(mp, "%s option requires an argument",
@@ -266,7 +271,8 @@ xfs_parseargs(
266 this_char); 271 this_char);
267 return EINVAL; 272 return EINVAL;
268 } 273 }
269 iosize = simple_strtoul(value, &eov, 10); 274 if (kstrtoint(value, 10, &iosize))
275 return EINVAL;
270 iosizelog = ffs(iosize) - 1; 276 iosizelog = ffs(iosize) - 1;
271 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 277 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
272 if (!value || !*value) { 278 if (!value || !*value) {
@@ -274,7 +280,8 @@ xfs_parseargs(
274 this_char); 280 this_char);
275 return EINVAL; 281 return EINVAL;
276 } 282 }
277 iosize = suffix_strtoul(value, &eov, 10); 283 if (suffix_kstrtoint(value, 10, &iosize))
284 return EINVAL;
278 iosizelog = ffs(iosize) - 1; 285 iosizelog = ffs(iosize) - 1;
279 } else if (!strcmp(this_char, MNTOPT_GRPID) || 286 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
280 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 287 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -296,14 +303,16 @@ xfs_parseargs(
296 this_char); 303 this_char);
297 return EINVAL; 304 return EINVAL;
298 } 305 }
299 dsunit = simple_strtoul(value, &eov, 10); 306 if (kstrtoint(value, 10, &dsunit))
307 return EINVAL;
300 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 308 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
301 if (!value || !*value) { 309 if (!value || !*value) {
302 xfs_warn(mp, "%s option requires an argument", 310 xfs_warn(mp, "%s option requires an argument",
303 this_char); 311 this_char);
304 return EINVAL; 312 return EINVAL;
305 } 313 }
306 dswidth = simple_strtoul(value, &eov, 10); 314 if (kstrtoint(value, 10, &dswidth))
315 return EINVAL;
307 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { 316 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
308 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 317 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
309 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 318 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
341DEFINE_BUF_EVENT(xfs_buf_item_iodone); 341DEFINE_BUF_EVENT(xfs_buf_item_iodone);
342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); 342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
343DEFINE_BUF_EVENT(xfs_buf_error_relse); 343DEFINE_BUF_EVENT(xfs_buf_error_relse);
344DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
344DEFINE_BUF_EVENT(xfs_trans_read_buf_io); 345DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
345DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); 346DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
346 347
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06ed520a767f..2fd7c1ff1d21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -37,14 +37,45 @@
37#include "xfs_extent_busy.h" 37#include "xfs_extent_busy.h"
38#include "xfs_bmap.h" 38#include "xfs_bmap.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_qm.h"
40#include "xfs_trans_priv.h" 41#include "xfs_trans_priv.h"
41#include "xfs_trans_space.h" 42#include "xfs_trans_space.h"
42#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_log_priv.h"
45#include "xfs_buf_item.h"
43#include "xfs_trace.h" 46#include "xfs_trace.h"
44 47
45kmem_zone_t *xfs_trans_zone; 48kmem_zone_t *xfs_trans_zone;
46kmem_zone_t *xfs_log_item_desc_zone; 49kmem_zone_t *xfs_log_item_desc_zone;
47 50
51/*
52 * A buffer has a format structure overhead in the log in addition
53 * to the data, so we need to take this into account when reserving
54 * space in a transaction for a buffer. Round the space required up
55 * to a multiple of 128 bytes so that we don't change the historical
56 * reservation that has been used for this overhead.
57 */
58STATIC uint
59xfs_buf_log_overhead(void)
60{
61 return round_up(sizeof(struct xlog_op_header) +
62 sizeof(struct xfs_buf_log_format), 128);
63}
64
65/*
66 * Calculate out transaction log reservation per item in bytes.
67 *
68 * The nbufs argument is used to indicate the number of items that
69 * will be changed in a transaction. size is used to tell how many
70 * bytes should be reserved per item.
71 */
72STATIC uint
73xfs_calc_buf_res(
74 uint nbufs,
75 uint size)
76{
77 return nbufs * (size + xfs_buf_log_overhead());
78}
48 79
49/* 80/*
50 * Various log reservation values. 81 * Various log reservation values.
@@ -85,18 +116,15 @@ xfs_calc_write_reservation(
85 struct xfs_mount *mp) 116 struct xfs_mount *mp)
86{ 117{
87 return XFS_DQUOT_LOGRES(mp) + 118 return XFS_DQUOT_LOGRES(mp) +
88 MAX((mp->m_sb.sb_inodesize + 119 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
89 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
90 2 * mp->m_sb.sb_sectsize + 121 XFS_FSB_TO_B(mp, 1)) +
91 mp->m_sb.sb_sectsize + 122 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
92 XFS_ALLOCFREE_LOG_RES(mp, 2) + 123 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
93 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 124 XFS_FSB_TO_B(mp, 1))),
94 XFS_ALLOCFREE_LOG_COUNT(mp, 2))), 125 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
95 (2 * mp->m_sb.sb_sectsize + 126 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
96 2 * mp->m_sb.sb_sectsize + 127 XFS_FSB_TO_B(mp, 1))));
97 mp->m_sb.sb_sectsize +
98 XFS_ALLOCFREE_LOG_RES(mp, 2) +
99 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
100} 128}
101 129
102/* 130/*
@@ -117,18 +145,17 @@ xfs_calc_itruncate_reservation(
117 struct xfs_mount *mp) 145 struct xfs_mount *mp)
118{ 146{
119 return XFS_DQUOT_LOGRES(mp) + 147 return XFS_DQUOT_LOGRES(mp) +
120 MAX((mp->m_sb.sb_inodesize + 148 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
121 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + 149 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
122 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 150 XFS_FSB_TO_B(mp, 1))),
123 (4 * mp->m_sb.sb_sectsize + 151 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
124 4 * mp->m_sb.sb_sectsize + 152 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
125 mp->m_sb.sb_sectsize + 153 XFS_FSB_TO_B(mp, 1)) +
126 XFS_ALLOCFREE_LOG_RES(mp, 4) + 154 xfs_calc_buf_res(5, 0) +
127 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + 155 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
128 128 * 5 + 156 XFS_FSB_TO_B(mp, 1)) +
129 XFS_ALLOCFREE_LOG_RES(mp, 1) + 157 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
130 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 158 mp->m_in_maxlevels, 0)));
131 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
132} 159}
133 160
134/* 161/*
@@ -148,14 +175,12 @@ xfs_calc_rename_reservation(
148 struct xfs_mount *mp) 175 struct xfs_mount *mp)
149{ 176{
150 return XFS_DQUOT_LOGRES(mp) + 177 return XFS_DQUOT_LOGRES(mp) +
151 MAX((4 * mp->m_sb.sb_inodesize + 178 MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
152 2 * XFS_DIROP_LOG_RES(mp) + 179 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
153 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), 180 XFS_FSB_TO_B(mp, 1))),
154 (3 * mp->m_sb.sb_sectsize + 181 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
155 3 * mp->m_sb.sb_sectsize + 182 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
156 mp->m_sb.sb_sectsize + 183 XFS_FSB_TO_B(mp, 1))));
157 XFS_ALLOCFREE_LOG_RES(mp, 3) +
158 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
159} 184}
160 185
161/* 186/*
@@ -175,15 +200,12 @@ xfs_calc_link_reservation(
175 struct xfs_mount *mp) 200 struct xfs_mount *mp)
176{ 201{
177 return XFS_DQUOT_LOGRES(mp) + 202 return XFS_DQUOT_LOGRES(mp) +
178 MAX((mp->m_sb.sb_inodesize + 203 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
179 mp->m_sb.sb_inodesize + 204 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
180 XFS_DIROP_LOG_RES(mp) + 205 XFS_FSB_TO_B(mp, 1))),
181 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 206 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
182 (mp->m_sb.sb_sectsize + 207 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
183 mp->m_sb.sb_sectsize + 208 XFS_FSB_TO_B(mp, 1))));
184 mp->m_sb.sb_sectsize +
185 XFS_ALLOCFREE_LOG_RES(mp, 1) +
186 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
187} 209}
188 210
189/* 211/*
@@ -203,15 +225,12 @@ xfs_calc_remove_reservation(
203 struct xfs_mount *mp) 225 struct xfs_mount *mp)
204{ 226{
205 return XFS_DQUOT_LOGRES(mp) + 227 return XFS_DQUOT_LOGRES(mp) +
206 MAX((mp->m_sb.sb_inodesize + 228 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
207 mp->m_sb.sb_inodesize + 229 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
208 XFS_DIROP_LOG_RES(mp) + 230 XFS_FSB_TO_B(mp, 1))),
209 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 231 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
210 (2 * mp->m_sb.sb_sectsize + 232 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
211 2 * mp->m_sb.sb_sectsize + 233 XFS_FSB_TO_B(mp, 1))));
212 mp->m_sb.sb_sectsize +
213 XFS_ALLOCFREE_LOG_RES(mp, 2) +
214 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
215} 234}
216 235
217/* 236/*
@@ -233,18 +252,18 @@ xfs_calc_symlink_reservation(
233 struct xfs_mount *mp) 252 struct xfs_mount *mp)
234{ 253{
235 return XFS_DQUOT_LOGRES(mp) + 254 return XFS_DQUOT_LOGRES(mp) +
236 MAX((mp->m_sb.sb_inodesize + 255 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
237 mp->m_sb.sb_inodesize + 256 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
238 XFS_FSB_TO_B(mp, 1) + 257 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
239 XFS_DIROP_LOG_RES(mp) + 258 XFS_FSB_TO_B(mp, 1)) +
240 1024 + 259 xfs_calc_buf_res(1, 1024)),
241 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), 260 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
242 (2 * mp->m_sb.sb_sectsize + 261 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
243 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 262 XFS_FSB_TO_B(mp, 1)) +
244 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 263 xfs_calc_buf_res(mp->m_in_maxlevels,
245 XFS_ALLOCFREE_LOG_RES(mp, 1) + 264 XFS_FSB_TO_B(mp, 1)) +
246 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 265 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
247 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 266 XFS_FSB_TO_B(mp, 1))));
248} 267}
249 268
250/* 269/*
@@ -267,18 +286,19 @@ xfs_calc_create_reservation(
267 struct xfs_mount *mp) 286 struct xfs_mount *mp)
268{ 287{
269 return XFS_DQUOT_LOGRES(mp) + 288 return XFS_DQUOT_LOGRES(mp) +
270 MAX((mp->m_sb.sb_inodesize + 289 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
271 mp->m_sb.sb_inodesize + 290 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
291 (uint)XFS_FSB_TO_B(mp, 1) +
292 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
293 XFS_FSB_TO_B(mp, 1))),
294 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
272 mp->m_sb.sb_sectsize + 295 mp->m_sb.sb_sectsize +
273 XFS_FSB_TO_B(mp, 1) + 296 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
274 XFS_DIROP_LOG_RES(mp) + 297 XFS_FSB_TO_B(mp, 1)) +
275 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), 298 xfs_calc_buf_res(mp->m_in_maxlevels,
276 (3 * mp->m_sb.sb_sectsize + 299 XFS_FSB_TO_B(mp, 1)) +
277 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 300 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
278 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 301 XFS_FSB_TO_B(mp, 1))));
279 XFS_ALLOCFREE_LOG_RES(mp, 1) +
280 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
281 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
282} 302}
283 303
284/* 304/*
@@ -306,16 +326,16 @@ xfs_calc_ifree_reservation(
306 struct xfs_mount *mp) 326 struct xfs_mount *mp)
307{ 327{
308 return XFS_DQUOT_LOGRES(mp) + 328 return XFS_DQUOT_LOGRES(mp) +
309 mp->m_sb.sb_inodesize + 329 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
310 mp->m_sb.sb_sectsize + 330 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
311 mp->m_sb.sb_sectsize + 331 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
312 XFS_FSB_TO_B(mp, 1) +
313 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), 332 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
314 XFS_INODE_CLUSTER_SIZE(mp)) + 333 XFS_INODE_CLUSTER_SIZE(mp)) +
315 128 * 5 + 334 xfs_calc_buf_res(1, 0) +
316 XFS_ALLOCFREE_LOG_RES(mp, 1) + 335 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
317 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 336 mp->m_in_maxlevels, 0) +
318 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 337 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
338 XFS_FSB_TO_B(mp, 1));
319} 339}
320 340
321/* 341/*
@@ -343,9 +363,9 @@ STATIC uint
343xfs_calc_growdata_reservation( 363xfs_calc_growdata_reservation(
344 struct xfs_mount *mp) 364 struct xfs_mount *mp)
345{ 365{
346 return mp->m_sb.sb_sectsize * 3 + 366 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
347 XFS_ALLOCFREE_LOG_RES(mp, 1) + 367 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
348 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 368 XFS_FSB_TO_B(mp, 1));
349} 369}
350 370
351/* 371/*
@@ -362,12 +382,12 @@ STATIC uint
362xfs_calc_growrtalloc_reservation( 382xfs_calc_growrtalloc_reservation(
363 struct xfs_mount *mp) 383 struct xfs_mount *mp)
364{ 384{
365 return 2 * mp->m_sb.sb_sectsize + 385 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
366 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 386 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
367 mp->m_sb.sb_inodesize + 387 XFS_FSB_TO_B(mp, 1)) +
368 XFS_ALLOCFREE_LOG_RES(mp, 1) + 388 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
369 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 389 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
370 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 390 XFS_FSB_TO_B(mp, 1));
371} 391}
372 392
373/* 393/*
@@ -379,7 +399,7 @@ STATIC uint
379xfs_calc_growrtzero_reservation( 399xfs_calc_growrtzero_reservation(
380 struct xfs_mount *mp) 400 struct xfs_mount *mp)
381{ 401{
382 return mp->m_sb.sb_blocksize + 128; 402 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
383} 403}
384 404
385/* 405/*
@@ -396,11 +416,10 @@ STATIC uint
396xfs_calc_growrtfree_reservation( 416xfs_calc_growrtfree_reservation(
397 struct xfs_mount *mp) 417 struct xfs_mount *mp)
398{ 418{
399 return mp->m_sb.sb_sectsize + 419 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
400 2 * mp->m_sb.sb_inodesize + 420 xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
401 mp->m_sb.sb_blocksize + 421 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
402 mp->m_rsumsize + 422 xfs_calc_buf_res(1, mp->m_rsumsize);
403 128 * 5;
404} 423}
405 424
406/* 425/*
@@ -411,7 +430,7 @@ STATIC uint
411xfs_calc_swrite_reservation( 430xfs_calc_swrite_reservation(
412 struct xfs_mount *mp) 431 struct xfs_mount *mp)
413{ 432{
414 return mp->m_sb.sb_inodesize + 128; 433 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
415} 434}
416 435
417/* 436/*
@@ -421,7 +440,7 @@ xfs_calc_swrite_reservation(
421STATIC uint 440STATIC uint
422xfs_calc_writeid_reservation(xfs_mount_t *mp) 441xfs_calc_writeid_reservation(xfs_mount_t *mp)
423{ 442{
424 return mp->m_sb.sb_inodesize + 128; 443 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
425} 444}
426 445
427/* 446/*
@@ -437,13 +456,13 @@ xfs_calc_addafork_reservation(
437 struct xfs_mount *mp) 456 struct xfs_mount *mp)
438{ 457{
439 return XFS_DQUOT_LOGRES(mp) + 458 return XFS_DQUOT_LOGRES(mp) +
440 mp->m_sb.sb_inodesize + 459 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
441 mp->m_sb.sb_sectsize * 2 + 460 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
442 mp->m_dirblksize + 461 xfs_calc_buf_res(1, mp->m_dirblksize) +
443 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + 462 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
444 XFS_ALLOCFREE_LOG_RES(mp, 1) + 463 XFS_FSB_TO_B(mp, 1)) +
445 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + 464 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
446 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 465 XFS_FSB_TO_B(mp, 1));
447} 466}
448 467
449/* 468/*
@@ -461,35 +480,51 @@ STATIC uint
461xfs_calc_attrinval_reservation( 480xfs_calc_attrinval_reservation(
462 struct xfs_mount *mp) 481 struct xfs_mount *mp)
463{ 482{
464 return MAX((mp->m_sb.sb_inodesize + 483 return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
465 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 484 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
466 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), 485 XFS_FSB_TO_B(mp, 1))),
467 (4 * mp->m_sb.sb_sectsize + 486 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
468 4 * mp->m_sb.sb_sectsize + 487 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
469 mp->m_sb.sb_sectsize + 488 XFS_FSB_TO_B(mp, 1))));
470 XFS_ALLOCFREE_LOG_RES(mp, 4) +
471 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
472} 489}
473 490
474/* 491/*
475 * Setting an attribute. 492 * Setting an attribute at mount time.
476 * the inode getting the attribute 493 * the inode getting the attribute
477 * the superblock for allocations 494 * the superblock for allocations
478 * the agfs extents are allocated from 495 * the agfs extents are allocated from
479 * the attribute btree * max depth 496 * the attribute btree * max depth
480 * the inode allocation btree 497 * the inode allocation btree
481 * Since attribute transaction space is dependent on the size of the attribute, 498 * Since attribute transaction space is dependent on the size of the attribute,
482 * the calculation is done partially at mount time and partially at runtime. 499 * the calculation is done partially at mount time and partially at runtime(see
500 * below).
483 */ 501 */
484STATIC uint 502STATIC uint
485xfs_calc_attrset_reservation( 503xfs_calc_attrsetm_reservation(
486 struct xfs_mount *mp) 504 struct xfs_mount *mp)
487{ 505{
488 return XFS_DQUOT_LOGRES(mp) + 506 return XFS_DQUOT_LOGRES(mp) +
489 mp->m_sb.sb_inodesize + 507 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
490 mp->m_sb.sb_sectsize + 508 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
491 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 509 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
492 128 * (2 + XFS_DA_NODE_MAXDEPTH); 510}
511
512/*
513 * Setting an attribute at runtime, transaction space unit per block.
514 * the superblock for allocations: sector size
515 * the inode bmap btree could join or split: max depth * block size
516 * Since the runtime attribute transaction space is dependent on the total
517 * blocks needed for the 1st bmap, here we calculate out the space unit for
518 * one block so that the caller could figure out the total space according
519 * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
520 */
521STATIC uint
522xfs_calc_attrsetrt_reservation(
523 struct xfs_mount *mp)
524{
525 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
526 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
527 XFS_FSB_TO_B(mp, 1));
493} 528}
494 529
495/* 530/*
@@ -508,16 +543,15 @@ xfs_calc_attrrm_reservation(
508 struct xfs_mount *mp) 543 struct xfs_mount *mp)
509{ 544{
510 return XFS_DQUOT_LOGRES(mp) + 545 return XFS_DQUOT_LOGRES(mp) +
511 MAX((mp->m_sb.sb_inodesize + 546 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
512 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 547 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
513 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 548 XFS_FSB_TO_B(mp, 1)) +
514 128 * (1 + XFS_DA_NODE_MAXDEPTH + 549 (uint)XFS_FSB_TO_B(mp,
515 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 550 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
516 (2 * mp->m_sb.sb_sectsize + 551 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
517 2 * mp->m_sb.sb_sectsize + 552 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
518 mp->m_sb.sb_sectsize + 553 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
519 XFS_ALLOCFREE_LOG_RES(mp, 2) + 554 XFS_FSB_TO_B(mp, 1))));
520 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
521} 555}
522 556
523/* 557/*
@@ -527,7 +561,78 @@ STATIC uint
527xfs_calc_clear_agi_bucket_reservation( 561xfs_calc_clear_agi_bucket_reservation(
528 struct xfs_mount *mp) 562 struct xfs_mount *mp)
529{ 563{
530 return mp->m_sb.sb_sectsize + 128; 564 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
565}
566
567/*
568 * Clearing the quotaflags in the superblock.
569 * the super block for changing quota flags: sector size
570 */
571STATIC uint
572xfs_calc_qm_sbchange_reservation(
573 struct xfs_mount *mp)
574{
575 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
576}
577
578/*
579 * Adjusting quota limits.
580 * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
581 */
582STATIC uint
583xfs_calc_qm_setqlim_reservation(
584 struct xfs_mount *mp)
585{
586 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
587}
588
589/*
590 * Allocating quota on disk if needed.
591 * the write transaction log space: XFS_WRITE_LOG_RES(mp)
592 * the unit of quota allocation: one system block size
593 */
594STATIC uint
595xfs_calc_qm_dqalloc_reservation(
596 struct xfs_mount *mp)
597{
598 return XFS_WRITE_LOG_RES(mp) +
599 xfs_calc_buf_res(1,
600 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
601}
602
603/*
604 * Turning off quotas.
605 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
606 * the superblock for the quota flags: sector size
607 */
608STATIC uint
609xfs_calc_qm_quotaoff_reservation(
610 struct xfs_mount *mp)
611{
612 return sizeof(struct xfs_qoff_logitem) * 2 +
613 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
614}
615
616/*
617 * End of turning off quotas.
618 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
619 */
620STATIC uint
621xfs_calc_qm_quotaoff_end_reservation(
622 struct xfs_mount *mp)
623{
624 return sizeof(struct xfs_qoff_logitem) * 2;
625}
626
627/*
628 * Syncing the incore super block changes to disk.
629 * the super block to reflect the changes: sector size
630 */
631STATIC uint
632xfs_calc_sb_reservation(
633 struct xfs_mount *mp)
634{
635 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
531} 636}
532 637
533/* 638/*
@@ -555,12 +660,19 @@ xfs_trans_init(
555 resp->tr_writeid = xfs_calc_writeid_reservation(mp); 660 resp->tr_writeid = xfs_calc_writeid_reservation(mp);
556 resp->tr_addafork = xfs_calc_addafork_reservation(mp); 661 resp->tr_addafork = xfs_calc_addafork_reservation(mp);
557 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); 662 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
558 resp->tr_attrset = xfs_calc_attrset_reservation(mp); 663 resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
664 resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
559 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); 665 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
560 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); 666 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
561 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); 667 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
562 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); 668 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
563 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); 669 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
670 resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
671 resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
672 resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
673 resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
674 resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
675 resp->tr_sb = xfs_calc_sb_reservation(mp);
564} 676}
565 677
566/* 678/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6c0601abd7a..cd29f6171021 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -252,17 +252,19 @@ struct xfs_log_item_desc {
252 * as long as SWRITE logs the entire inode core 252 * as long as SWRITE logs the entire inode core
253 */ 253 */
254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
258#define XFS_ATTRSET_LOG_RES(mp, ext) \ 258#define XFS_ATTRSETM_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetm)
259 ((mp)->m_reservations.tr_attrset + \ 259#define XFS_ATTRSETRT_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetrt)
260 (ext * (mp)->m_sb.sb_sectsize) + \ 260#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
261 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
262 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
263#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
264#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 261#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
265 262#define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange)
263#define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim)
264#define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc)
265#define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff)
266#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff)
267#define XFS_SB_LOG_RES(mp) ((mp)->m_reservations.tr_sb)
266 268
267/* 269/*
268 * Various log count values. 270 * Various log count values.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6011ee661339..0eda7254305f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -55,20 +55,6 @@ xfs_ail_check(
55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
56 56
57 57
58#ifdef XFS_TRANS_DEBUG
59 /*
60 * Walk the list checking lsn ordering, and that every entry has the
61 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
62 * when specifically debugging the transaction subsystem.
63 */
64 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
65 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
66 if (&prev_lip->li_ail != &ailp->xa_ail)
67 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
68 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
69 prev_lip = lip;
70 }
71#endif /* XFS_TRANS_DEBUG */
72} 58}
73#else /* !DEBUG */ 59#else /* !DEBUG */
74#define xfs_ail_check(a,l) 60#define xfs_ail_check(a,l)
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4fc17d479d42..3edf5dbee001 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -93,7 +93,7 @@ _xfs_trans_bjoin(
93 xfs_buf_item_init(bp, tp->t_mountp); 93 xfs_buf_item_init(bp, tp->t_mountp);
94 bip = bp->b_fspriv; 94 bip = bp->b_fspriv;
95 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 95 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
96 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 96 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
97 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 97 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
98 if (reset_recur) 98 if (reset_recur)
99 bip->bli_recur = 0; 99 bip->bli_recur = 0;
@@ -432,7 +432,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
432 bip = bp->b_fspriv; 432 bip = bp->b_fspriv;
433 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 433 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
434 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 434 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
435 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 435 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
436 ASSERT(atomic_read(&bip->bli_refcount) > 0); 436 ASSERT(atomic_read(&bip->bli_refcount) > 0);
437 437
438 trace_xfs_trans_brelse(bip); 438 trace_xfs_trans_brelse(bip);
@@ -519,7 +519,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
519 ASSERT(bp->b_transp == tp); 519 ASSERT(bp->b_transp == tp);
520 ASSERT(bip != NULL); 520 ASSERT(bip != NULL);
521 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 521 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
522 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 522 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
523 ASSERT(atomic_read(&bip->bli_refcount) > 0); 523 ASSERT(atomic_read(&bip->bli_refcount) > 0);
524 524
525 bip->bli_flags |= XFS_BLI_HOLD; 525 bip->bli_flags |= XFS_BLI_HOLD;
@@ -539,7 +539,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
539 ASSERT(bp->b_transp == tp); 539 ASSERT(bp->b_transp == tp);
540 ASSERT(bip != NULL); 540 ASSERT(bip != NULL);
541 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 541 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
542 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 542 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
543 ASSERT(atomic_read(&bip->bli_refcount) > 0); 543 ASSERT(atomic_read(&bip->bli_refcount) > 0);
544 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 544 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
545 545
@@ -598,7 +598,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
598 bip->bli_flags &= ~XFS_BLI_STALE; 598 bip->bli_flags &= ~XFS_BLI_STALE;
599 ASSERT(XFS_BUF_ISSTALE(bp)); 599 ASSERT(XFS_BUF_ISSTALE(bp));
600 XFS_BUF_UNSTALE(bp); 600 XFS_BUF_UNSTALE(bp);
601 bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; 601 bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
602 } 602 }
603 603
604 tp->t_flags |= XFS_TRANS_DIRTY; 604 tp->t_flags |= XFS_TRANS_DIRTY;
@@ -643,6 +643,7 @@ xfs_trans_binval(
643 xfs_buf_t *bp) 643 xfs_buf_t *bp)
644{ 644{
645 xfs_buf_log_item_t *bip = bp->b_fspriv; 645 xfs_buf_log_item_t *bip = bp->b_fspriv;
646 int i;
646 647
647 ASSERT(bp->b_transp == tp); 648 ASSERT(bp->b_transp == tp);
648 ASSERT(bip != NULL); 649 ASSERT(bip != NULL);
@@ -657,8 +658,8 @@ xfs_trans_binval(
657 */ 658 */
658 ASSERT(XFS_BUF_ISSTALE(bp)); 659 ASSERT(XFS_BUF_ISSTALE(bp));
659 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 660 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
660 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); 661 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
661 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 662 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
662 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY); 663 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
663 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 664 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
664 return; 665 return;
@@ -668,10 +669,12 @@ xfs_trans_binval(
668 669
669 bip->bli_flags |= XFS_BLI_STALE; 670 bip->bli_flags |= XFS_BLI_STALE;
670 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); 671 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
671 bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; 672 bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
672 bip->bli_format.blf_flags |= XFS_BLF_CANCEL; 673 bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
673 memset((char *)(bip->bli_format.blf_data_map), 0, 674 for (i = 0; i < bip->bli_format_count; i++) {
674 (bip->bli_format.blf_map_size * sizeof(uint))); 675 memset(bip->bli_formats[i].blf_data_map, 0,
676 (bip->bli_formats[i].blf_map_size * sizeof(uint)));
677 }
675 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; 678 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
676 tp->t_flags |= XFS_TRANS_DIRTY; 679 tp->t_flags |= XFS_TRANS_DIRTY;
677} 680}
@@ -775,5 +778,5 @@ xfs_trans_dquot_buf(
775 type == XFS_BLF_GDQUOT_BUF); 778 type == XFS_BLF_GDQUOT_BUF);
776 ASSERT(atomic_read(&bip->bli_refcount) > 0); 779 ASSERT(atomic_read(&bip->bli_refcount) > 0);
777 780
778 bip->bli_format.blf_flags |= type; 781 bip->__bli_format.blf_flags |= type;
779} 782}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 0c7fa54f309e..642c2d6e1db1 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots(
516 int i, j; 516 int i, j;
517 xfs_dquot_t *dqp; 517 xfs_dquot_t *dqp;
518 xfs_dqtrx_t *qtrx, *qa; 518 xfs_dqtrx_t *qtrx, *qa;
519 boolean_t locked; 519 bool locked;
520 520
521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) 521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
522 return; 522 return;
@@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots(
537 * about the number of blocks used field, or deltas. 537 * about the number of blocks used field, or deltas.
538 * Also we don't bother to zero the fields. 538 * Also we don't bother to zero the fields.
539 */ 539 */
540 locked = B_FALSE; 540 locked = false;
541 if (qtrx->qt_blk_res) { 541 if (qtrx->qt_blk_res) {
542 xfs_dqlock(dqp); 542 xfs_dqlock(dqp);
543 locked = B_TRUE; 543 locked = true;
544 dqp->q_res_bcount -= 544 dqp->q_res_bcount -=
545 (xfs_qcnt_t)qtrx->qt_blk_res; 545 (xfs_qcnt_t)qtrx->qt_blk_res;
546 } 546 }
547 if (qtrx->qt_ino_res) { 547 if (qtrx->qt_ino_res) {
548 if (!locked) { 548 if (!locked) {
549 xfs_dqlock(dqp); 549 xfs_dqlock(dqp);
550 locked = B_TRUE; 550 locked = true;
551 } 551 }
552 dqp->q_res_icount -= 552 dqp->q_res_icount -=
553 (xfs_qcnt_t)qtrx->qt_ino_res; 553 (xfs_qcnt_t)qtrx->qt_ino_res;
@@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots(
556 if (qtrx->qt_rtblk_res) { 556 if (qtrx->qt_rtblk_res) {
557 if (!locked) { 557 if (!locked) {
558 xfs_dqlock(dqp); 558 xfs_dqlock(dqp);
559 locked = B_TRUE; 559 locked = true;
560 } 560 }
561 dqp->q_res_rtbcount -= 561 dqp->q_res_rtbcount -=
562 (xfs_qcnt_t)qtrx->qt_rtblk_res; 562 (xfs_qcnt_t)qtrx->qt_rtblk_res;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index d2eee20d5f5b..ac6d567704db 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -33,14 +33,6 @@
33#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35 35
36#ifdef XFS_TRANS_DEBUG
37STATIC void
38xfs_trans_inode_broot_debug(
39 xfs_inode_t *ip);
40#else
41#define xfs_trans_inode_broot_debug(ip)
42#endif
43
44/* 36/*
45 * Add a locked inode to the transaction. 37 * Add a locked inode to the transaction.
46 * 38 *
@@ -67,8 +59,6 @@ xfs_trans_ijoin(
67 * Get a log_item_desc to point at the new item. 59 * Get a log_item_desc to point at the new item.
68 */ 60 */
69 xfs_trans_add_item(tp, &iip->ili_item); 61 xfs_trans_add_item(tp, &iip->ili_item);
70
71 xfs_trans_inode_broot_debug(ip);
72} 62}
73 63
74/* 64/*
@@ -135,34 +125,3 @@ xfs_trans_log_inode(
135 flags |= ip->i_itemp->ili_last_fields; 125 flags |= ip->i_itemp->ili_last_fields;
136 ip->i_itemp->ili_fields |= flags; 126 ip->i_itemp->ili_fields |= flags;
137} 127}
138
139#ifdef XFS_TRANS_DEBUG
140/*
141 * Keep track of the state of the inode btree root to make sure we
142 * log it properly.
143 */
144STATIC void
145xfs_trans_inode_broot_debug(
146 xfs_inode_t *ip)
147{
148 xfs_inode_log_item_t *iip;
149
150 ASSERT(ip->i_itemp != NULL);
151 iip = ip->i_itemp;
152 if (iip->ili_root_size != 0) {
153 ASSERT(iip->ili_orig_root != NULL);
154 kmem_free(iip->ili_orig_root);
155 iip->ili_root_size = 0;
156 iip->ili_orig_root = NULL;
157 }
158 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
159 ASSERT((ip->i_df.if_broot != NULL) &&
160 (ip->i_df.if_broot_bytes > 0));
161 iip->ili_root_size = ip->i_df.if_broot_bytes;
162 iip->ili_orig_root =
163 (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
164 memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
165 iip->ili_root_size);
166 }
167}
168#endif
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 7a41874f4c20..61ba1cfa974c 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -32,7 +32,6 @@ typedef unsigned int __uint32_t;
32typedef signed long long int __int64_t; 32typedef signed long long int __int64_t;
33typedef unsigned long long int __uint64_t; 33typedef unsigned long long int __uint64_t;
34 34
35typedef enum { B_FALSE,B_TRUE } boolean_t;
36typedef __uint32_t prid_t; /* project ID */ 35typedef __uint32_t prid_t; /* project ID */
37typedef __uint32_t inst_t; /* an instruction */ 36typedef __uint32_t inst_t; /* an instruction */
38 37
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d95f565a390e..77ad74834baa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -725,7 +725,7 @@ xfs_create(
725 int error; 725 int error;
726 xfs_bmap_free_t free_list; 726 xfs_bmap_free_t free_list;
727 xfs_fsblock_t first_block; 727 xfs_fsblock_t first_block;
728 boolean_t unlock_dp_on_error = B_FALSE; 728 bool unlock_dp_on_error = false;
729 uint cancel_flags; 729 uint cancel_flags;
730 int committed; 730 int committed;
731 prid_t prid; 731 prid_t prid;
@@ -794,7 +794,7 @@ xfs_create(
794 } 794 }
795 795
796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
797 unlock_dp_on_error = B_TRUE; 797 unlock_dp_on_error = true;
798 798
799 xfs_bmap_init(&free_list, &first_block); 799 xfs_bmap_init(&free_list, &first_block);
800 800
@@ -830,7 +830,7 @@ xfs_create(
830 * error path. 830 * error path.
831 */ 831 */
832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
833 unlock_dp_on_error = B_FALSE; 833 unlock_dp_on_error = false;
834 834
835 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 835 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
836 &first_block, &free_list, resblks ? 836 &first_block, &free_list, resblks ?
@@ -1367,7 +1367,7 @@ xfs_symlink(
1367 int pathlen; 1367 int pathlen;
1368 xfs_bmap_free_t free_list; 1368 xfs_bmap_free_t free_list;
1369 xfs_fsblock_t first_block; 1369 xfs_fsblock_t first_block;
1370 boolean_t unlock_dp_on_error = B_FALSE; 1370 bool unlock_dp_on_error = false;
1371 uint cancel_flags; 1371 uint cancel_flags;
1372 int committed; 1372 int committed;
1373 xfs_fileoff_t first_fsb; 1373 xfs_fileoff_t first_fsb;
@@ -1438,7 +1438,7 @@ xfs_symlink(
1438 } 1438 }
1439 1439
1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1441 unlock_dp_on_error = B_TRUE; 1441 unlock_dp_on_error = true;
1442 1442
1443 /* 1443 /*
1444 * Check whether the directory allows new symlinks or not. 1444 * Check whether the directory allows new symlinks or not.
@@ -1484,7 +1484,7 @@ xfs_symlink(
1484 * error path. 1484 * error path.
1485 */ 1485 */
1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1487 unlock_dp_on_error = B_FALSE; 1487 unlock_dp_on_error = false;
1488 1488
1489 /* 1489 /*
1490 * Also attach the dquot(s) to it, if applicable. 1490 * Also attach the dquot(s) to it, if applicable.