aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-22 04:19:26 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-22 04:19:30 -0400
commiteff430de53be6f3328c3eebe93755f1ecf499e37 (patch)
treec8e5ae958fe3e6656b4e96c83bbda17e649321a2 /fs
parent9cbdb702092a2d82f909312f4ec3eeded77bb82e (diff)
parent91e8549bde9e5cc88c5a2e8c8114389279e240b5 (diff)
Merge branch 'linus' into perf/core
Merge reason: Pick up upstream fixes. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c15
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c2
-rw-r--r--fs/9p/vfs_super.c80
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/btrfs/acl.c9
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c125
-rw-r--r--fs/btrfs/extent_io.c82
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/free-space-cache.c119
-rw-r--r--fs/btrfs/inode.c165
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/super.c42
-rw-r--r--fs/btrfs/transaction.c48
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/xattr.c33
-rw-r--r--fs/cifs/README16
-rw-r--r--fs/cifs/cache.c2
-rw-r--r--fs/cifs/cifs_debug.c43
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_unicode.c35
-rw-r--r--fs/cifs/cifs_unicode.h2
-rw-r--r--fs/cifs/cifsencrypt.c21
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsglob.h13
-rw-r--r--fs/cifs/cifssmb.c14
-rw-r--r--fs/cifs/connect.c68
-rw-r--r--fs/cifs/file.c70
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c3
-rw-r--r--fs/cifs/sess.c23
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext4/ext4_jbd2.h4
-rw-r--r--fs/ext4/fsync.c17
-rw-r--r--fs/ext4/inode.c35
-rw-r--r--fs/ext4/super.c74
-rw-r--r--fs/fhandle.c1
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/file.c58
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/inode.c56
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c14
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/journal.c3
-rw-r--r--fs/namei.c1
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/namespace.c58
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/lockd.c1
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/partitions/ldm.c16
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/quota/dquot.c13
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/ubifs/debug.h152
-rw-r--r--fs/ubifs/file.c3
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c31
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h24
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c129
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c228
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/quota/xfs_qm.c7
-rw-r--r--fs/xfs/quota/xfs_qm.h5
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_alloc.c30
-rw-r--r--fs/xfs/xfs_inode_item.c67
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_log.c38
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_trans_ail.c421
-rw-r--r--fs/xfs/xfs_trans_priv.h22
86 files changed, 1558 insertions, 1168 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 0ee594569dcc..85b67ffa2a43 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
286 286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) 287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{ 288{
289 int err, flags; 289 int err;
290 struct p9_fid *fid; 290 struct p9_fid *fid;
291 struct v9fs_session_info *v9ses;
292 291
293 v9ses = v9fs_dentry2v9ses(dentry);
294 fid = v9fs_fid_clone_with_uid(dentry, 0); 292 fid = v9fs_fid_clone_with_uid(dentry, 0);
295 if (IS_ERR(fid)) 293 if (IS_ERR(fid))
296 goto error_out; 294 goto error_out;
@@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
299 * dirty pages. We always request for the open fid in read-write 297 * dirty pages. We always request for the open fid in read-write
300 * mode so that a partial page write which result in page 298 * mode so that a partial page write which result in page
301 * read can work. 299 * read can work.
302 *
303 * we don't have a tsyncfs operation for older version
304 * of protocol. So make sure the write back fid is
305 * opened in O_SYNC mode.
306 */ 300 */
307 if (!v9fs_proto_dotl(v9ses)) 301 err = p9_client_open(fid, O_RDWR);
308 flags = O_RDWR | O_SYNC;
309 else
310 flags = O_RDWR;
311
312 err = p9_client_open(fid, flags);
313 if (err < 0) { 302 if (err < 0) {
314 p9_client_clunk(fid); 303 p9_client_clunk(fid);
315 fid = ERR_PTR(err); 304 fid = ERR_PTR(err);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9665c2b840e6..e5ebedfc5ed8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,7 +116,6 @@ struct v9fs_session_info {
116 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
117 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
118 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120}; 119};
121 120
122/* cache_validity flags */ 121/* cache_validity flags */
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b6a3b9f7fe4d..e022890c6f40 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
126 retval = v9fs_refresh_inode_dotl(fid, inode); 126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else 127 else
128 retval = v9fs_refresh_inode(fid, inode); 128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0) 129 if (retval == -ENOENT)
130 return 0;
131 if (retval < 0)
130 return retval; 132 return retval;
131 } 133 }
132out_valid: 134out_valid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ffbb113d5f33..82a7c38ddad0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -811,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
811 fid = v9fs_fid_lookup(dentry); 811 fid = v9fs_fid_lookup(dentry);
812 if (IS_ERR(fid)) { 812 if (IS_ERR(fid)) {
813 __putname(link); 813 __putname(link);
814 link = ERR_PTR(PTR_ERR(fid)); 814 link = ERR_CAST(fid);
815 goto ndset; 815 goto ndset;
816 } 816 }
817 retval = p9_client_readlink(fid, &target); 817 retval = p9_client_readlink(fid, &target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f3eed3383e4f..feef6cdc1fd2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
154 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157
157 root = d_alloc_root(inode); 158 root = d_alloc_root(inode);
158 if (!root) { 159 if (!root) {
159 iput(inode); 160 iput(inode);
@@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
185 p9stat_free(st); 186 p9stat_free(st);
186 kfree(st); 187 kfree(st);
187 } 188 }
188 v9fs_fid_add(root, fid);
189 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
190 if (retval) 190 if (retval)
191 goto release_sb; 191 goto release_sb;
192 /* 192 v9fs_fid_add(root, fid);
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
203 193
204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 194 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
205 return dget(sb->s_root); 195 return dget(sb->s_root);
@@ -210,11 +200,15 @@ close_session:
210 v9fs_session_close(v9ses); 200 v9fs_session_close(v9ses);
211 kfree(v9ses); 201 kfree(v9ses);
212 return ERR_PTR(retval); 202 return ERR_PTR(retval);
203
213release_sb: 204release_sb:
214 /* 205 /*
215 * we will do the session_close and root dentry 206 * we will do the session_close and root dentry release
216 * release in the below call. 207 * in the below call. But we need to clunk fid, because we haven't
208 * attached the fid to dentry so it won't get clunked
209 * automatically.
217 */ 210 */
211 p9_client_clunk(fid);
218 deactivate_locked_super(sb); 212 deactivate_locked_super(sb);
219 return ERR_PTR(retval); 213 return ERR_PTR(retval);
220} 214}
@@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s)
232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 226 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
233 227
234 kill_anon_super(s); 228 kill_anon_super(s);
235 p9_client_clunk(v9ses->root_fid); 229
236 v9fs_session_cancel(v9ses); 230 v9fs_session_cancel(v9ses);
237 v9fs_session_close(v9ses); 231 v9fs_session_close(v9ses);
238 kfree(v9ses); 232 kfree(v9ses);
@@ -285,14 +279,6 @@ done:
285 return res; 279 return res;
286} 280}
287 281
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode) 282static int v9fs_drop_inode(struct inode *inode)
297{ 283{
298 struct v9fs_session_info *v9ses; 284 struct v9fs_session_info *v9ses;
@@ -307,6 +293,51 @@ static int v9fs_drop_inode(struct inode *inode)
307 return 1; 293 return 1;
308} 294}
309 295
296static int v9fs_write_inode(struct inode *inode,
297 struct writeback_control *wbc)
298{
299 int ret;
300 struct p9_wstat wstat;
301 struct v9fs_inode *v9inode;
302 /*
303 * send an fsync request to server irrespective of
304 * wbc->sync_mode.
305 */
306 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
307 v9inode = V9FS_I(inode);
308 if (!v9inode->writeback_fid)
309 return 0;
310 v9fs_blank_wstat(&wstat);
311
312 ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
313 if (ret < 0) {
314 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
315 return ret;
316 }
317 return 0;
318}
319
320static int v9fs_write_inode_dotl(struct inode *inode,
321 struct writeback_control *wbc)
322{
323 int ret;
324 struct v9fs_inode *v9inode;
325 /*
326 * send an fsync request to server irrespective of
327 * wbc->sync_mode.
328 */
329 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
330 v9inode = V9FS_I(inode);
331 if (!v9inode->writeback_fid)
332 return 0;
333 ret = p9_client_fsync(v9inode->writeback_fid, 0);
334 if (ret < 0) {
335 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
336 return ret;
337 }
338 return 0;
339}
340
310static const struct super_operations v9fs_super_ops = { 341static const struct super_operations v9fs_super_ops = {
311 .alloc_inode = v9fs_alloc_inode, 342 .alloc_inode = v9fs_alloc_inode,
312 .destroy_inode = v9fs_destroy_inode, 343 .destroy_inode = v9fs_destroy_inode,
@@ -314,17 +345,18 @@ static const struct super_operations v9fs_super_ops = {
314 .evict_inode = v9fs_evict_inode, 345 .evict_inode = v9fs_evict_inode,
315 .show_options = generic_show_options, 346 .show_options = generic_show_options,
316 .umount_begin = v9fs_umount_begin, 347 .umount_begin = v9fs_umount_begin,
348 .write_inode = v9fs_write_inode,
317}; 349};
318 350
319static const struct super_operations v9fs_super_ops_dotl = { 351static const struct super_operations v9fs_super_ops_dotl = {
320 .alloc_inode = v9fs_alloc_inode, 352 .alloc_inode = v9fs_alloc_inode,
321 .destroy_inode = v9fs_destroy_inode, 353 .destroy_inode = v9fs_destroy_inode,
322 .sync_fs = v9fs_sync_fs,
323 .statfs = v9fs_statfs, 354 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode, 355 .drop_inode = v9fs_drop_inode,
325 .evict_inode = v9fs_evict_inode, 356 .evict_inode = v9fs_evict_inode,
326 .show_options = generic_show_options, 357 .show_options = generic_show_options,
327 .umount_begin = v9fs_umount_begin, 358 .umount_begin = v9fs_umount_begin,
359 .write_inode = v9fs_write_inode_dotl,
328}; 360};
329 361
330struct file_system_type v9fs_fs_type = { 362struct file_system_type v9fs_fs_type = {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f34078d702d3..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
941 current->mm->start_stack = bprm->p; 941 current->mm->start_stack = bprm->p;
942 942
943#ifdef arch_randomize_brk 943#ifdef arch_randomize_brk
944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) 944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945 current->mm->brk = current->mm->start_brk = 945 current->mm->brk = current->mm->start_brk =
946 arch_randomize_brk(current->mm); 946 arch_randomize_brk(current->mm);
947#ifdef CONFIG_COMPAT_BRK
948 current->brk_randomized = 1;
949#endif
950 }
947#endif 951#endif
948 952
949 if (current->personality & MMAP_PAGE_ZERO) { 953 if (current->personality & MMAP_PAGE_ZERO) {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..5d505aaa72fb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (acl == NULL) { 181 if (acl) {
182 value = NULL; 182 ret = posix_acl_valid(acl);
183 size = 0; 183 if (ret)
184 goto out;
184 } else if (IS_ERR(acl)) { 185 } else if (IS_ERR(acl)) {
185 return PTR_ERR(acl); 186 return PTR_ERR(acl);
186 } 187 }
187 } 188 }
188 189
189 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); 190 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
190 191out:
191 posix_acl_release(acl); 192 posix_acl_release(acl);
192 193
193 return ret; 194 return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3458b5725540..2e61fe1b6b8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,8 +740,10 @@ struct btrfs_space_info {
740 */ 740 */
741 unsigned long reservation_progress; 741 unsigned long reservation_progress;
742 742
743 int full; /* indicates that we cannot allocate any more 743 int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 744 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */
746
745 int force_alloc; /* set if we need to force a chunk alloc for 747 int force_alloc; /* set if we need to force a chunk alloc for
746 this space */ 748 this space */
747 749
@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2576int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2578int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2577 struct inode *inode, u64 start, u64 end); 2579 struct inode *inode, u64 start, u64 end);
2578int btrfs_release_file(struct inode *inode, struct file *file); 2580int btrfs_release_file(struct inode *inode, struct file *file);
2581void btrfs_drop_pages(struct page **pages, size_t num_pages);
2582int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2583 struct page **pages, size_t num_pages,
2584 loff_t pos, size_t write_bytes,
2585 struct extent_state **cached);
2579 2586
2580/* tree-defrag.c */ 2587/* tree-defrag.c */
2581int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2588int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8f1d44ba332f..68c84c8c24bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3057 btrfs_destroy_pinned_extent(root, 3057 btrfs_destroy_pinned_extent(root,
3058 root->fs_info->pinned_extents); 3058 root->fs_info->pinned_extents);
3059 3059
3060 t->use_count = 0; 3060 atomic_set(&t->use_count, 0);
3061 list_del_init(&t->list); 3061 list_del_init(&t->list);
3062 memset(t, 0, sizeof(*t)); 3062 memset(t, 0, sizeof(*t));
3063 kmem_cache_free(btrfs_transaction_cachep, t); 3063 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..31f33ba56fe8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3019 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3020 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3021 found->full = 0; 3040 found->full = 0;
3022 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 found->chunk_alloc = 0;
3023 *space_info = found; 3043 *space_info = found;
3024 list_add_rcu(&found->list, &info->space_info); 3044 list_add_rcu(&found->list, &info->space_info);
3025 atomic_set(&found->caching_threads, 0); 3045 atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
3150 if (!data_sinfo->full && alloc_chunk) { 3170 if (!data_sinfo->full && alloc_chunk) {
3151 u64 alloc_target; 3171 u64 alloc_target;
3152 3172
3153 data_sinfo->force_alloc = 1; 3173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3154 spin_unlock(&data_sinfo->lock); 3174 spin_unlock(&data_sinfo->lock);
3155alloc: 3175alloc:
3156 alloc_target = btrfs_get_alloc_profile(root, 1); 3176 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
3160 3180
3161 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3181 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3162 bytes + 2 * 1024 * 1024, 3182 bytes + 2 * 1024 * 1024,
3163 alloc_target, 0); 3183 alloc_target,
3184 CHUNK_ALLOC_NO_FORCE);
3164 btrfs_end_transaction(trans, root); 3185 btrfs_end_transaction(trans, root);
3165 if (ret < 0) { 3186 if (ret < 0) {
3166 if (ret != -ENOSPC) 3187 if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3239 rcu_read_lock(); 3260 rcu_read_lock();
3240 list_for_each_entry_rcu(found, head, list) { 3261 list_for_each_entry_rcu(found, head, list) {
3241 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3242 found->force_alloc = 1; 3263 found->force_alloc = CHUNK_ALLOC_FORCE;
3243 } 3264 }
3244 rcu_read_unlock(); 3265 rcu_read_unlock();
3245} 3266}
3246 3267
3247static int should_alloc_chunk(struct btrfs_root *root, 3268static int should_alloc_chunk(struct btrfs_root *root,
3248 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3269 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 int force)
3249{ 3271{
3250 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3272 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3251 u64 thresh; 3274 u64 thresh;
3252 3275
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3276 if (force == CHUNK_ALLOC_FORCE)
3254 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3277 return 1;
3278
3279 /*
3280 * in limited mode, we want to have some free space up to
3281 * about 1% of the FS size.
3282 */
3283 if (force == CHUNK_ALLOC_LIMITED) {
3284 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 thresh = max_t(u64, 64 * 1024 * 1024,
3286 div_factor_fine(thresh, 1));
3287
3288 if (num_bytes - num_allocated < thresh)
3289 return 1;
3290 }
3291
3292 /*
3293 * we have two similar checks here, one based on percentage
3294 * and once based on a hard number of 256MB. The idea
3295 * is that if we have a good amount of free
3296 * room, don't allocate a chunk. A good mount is
3297 * less than 80% utilized of the chunks we have allocated,
3298 * or more than 256MB free
3299 */
3300 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3255 return 0; 3301 return 0;
3256 3302
3257 if (sinfo->bytes_used + sinfo->bytes_reserved + 3303 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3258 alloc_bytes < div_factor(num_bytes, 8))
3259 return 0; 3304 return 0;
3260 3305
3261 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3306 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307
3308 /* 256MB or 5% of the FS */
3262 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3309 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3263 3310
3264 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3311 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3265 return 0; 3312 return 0;
3266
3267 return 1; 3313 return 1;
3268} 3314}
3269 3315
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3273{ 3319{
3274 struct btrfs_space_info *space_info; 3320 struct btrfs_space_info *space_info;
3275 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3321 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 int wait_for_alloc = 0;
3276 int ret = 0; 3323 int ret = 0;
3277 3324
3278 mutex_lock(&fs_info->chunk_mutex);
3279
3280 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3325 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3281 3326
3282 space_info = __find_space_info(extent_root->fs_info, flags); 3327 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3287 } 3332 }
3288 BUG_ON(!space_info); 3333 BUG_ON(!space_info);
3289 3334
3335again:
3290 spin_lock(&space_info->lock); 3336 spin_lock(&space_info->lock);
3291 if (space_info->force_alloc) 3337 if (space_info->force_alloc)
3292 force = 1; 3338 force = space_info->force_alloc;
3293 if (space_info->full) { 3339 if (space_info->full) {
3294 spin_unlock(&space_info->lock); 3340 spin_unlock(&space_info->lock);
3295 goto out; 3341 return 0;
3296 } 3342 }
3297 3343
3298 if (!force && !should_alloc_chunk(extent_root, space_info, 3344 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3299 alloc_bytes)) {
3300 spin_unlock(&space_info->lock); 3345 spin_unlock(&space_info->lock);
3301 goto out; 3346 return 0;
3347 } else if (space_info->chunk_alloc) {
3348 wait_for_alloc = 1;
3349 } else {
3350 space_info->chunk_alloc = 1;
3302 } 3351 }
3352
3303 spin_unlock(&space_info->lock); 3353 spin_unlock(&space_info->lock);
3304 3354
3355 mutex_lock(&fs_info->chunk_mutex);
3356
3357 /*
3358 * The chunk_mutex is held throughout the entirety of a chunk
3359 * allocation, so once we've acquired the chunk_mutex we know that the
3360 * other guy is done and we need to recheck and see if we should
3361 * allocate.
3362 */
3363 if (wait_for_alloc) {
3364 mutex_unlock(&fs_info->chunk_mutex);
3365 wait_for_alloc = 0;
3366 goto again;
3367 }
3368
3305 /* 3369 /*
3306 * If we have mixed data/metadata chunks we want to make sure we keep 3370 * If we have mixed data/metadata chunks we want to make sure we keep
3307 * allocating mixed chunks instead of individual chunks. 3371 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3327 space_info->full = 1; 3391 space_info->full = 1;
3328 else 3392 else
3329 ret = 1; 3393 ret = 1;
3330 space_info->force_alloc = 0; 3394
3395 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 space_info->chunk_alloc = 0;
3331 spin_unlock(&space_info->lock); 3397 spin_unlock(&space_info->lock);
3332out:
3333 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3398 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3334 return ret; 3399 return ret;
3335} 3400}
@@ -5303,11 +5368,13 @@ loop:
5303 5368
5304 if (allowed_chunk_alloc) { 5369 if (allowed_chunk_alloc) {
5305 ret = do_chunk_alloc(trans, root, num_bytes + 5370 ret = do_chunk_alloc(trans, root, num_bytes +
5306 2 * 1024 * 1024, data, 1); 5371 2 * 1024 * 1024, data,
5372 CHUNK_ALLOC_LIMITED);
5307 allowed_chunk_alloc = 0; 5373 allowed_chunk_alloc = 0;
5308 done_chunk_alloc = 1; 5374 done_chunk_alloc = 1;
5309 } else if (!done_chunk_alloc) { 5375 } else if (!done_chunk_alloc &&
5310 space_info->force_alloc = 1; 5376 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5311 } 5378 }
5312 5379
5313 if (loop < LOOP_NO_EMPTY_SIZE) { 5380 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
5393 */ 5460 */
5394 if (empty_size || root->ref_cows) 5461 if (empty_size || root->ref_cows)
5395 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5462 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5396 num_bytes + 2 * 1024 * 1024, data, 0); 5463 num_bytes + 2 * 1024 * 1024, data,
5464 CHUNK_ALLOC_NO_FORCE);
5397 5465
5398 WARN_ON(num_bytes < root->sectorsize); 5466 WARN_ON(num_bytes < root->sectorsize);
5399 ret = find_free_extent(trans, root, num_bytes, empty_size, 5467 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
5405 num_bytes = num_bytes & ~(root->sectorsize - 1); 5473 num_bytes = num_bytes & ~(root->sectorsize - 1);
5406 num_bytes = max(num_bytes, min_alloc_size); 5474 num_bytes = max(num_bytes, min_alloc_size);
5407 do_chunk_alloc(trans, root->fs_info->extent_root, 5475 do_chunk_alloc(trans, root->fs_info->extent_root,
5408 num_bytes, data, 1); 5476 num_bytes, data, CHUNK_ALLOC_FORCE);
5409 goto again; 5477 goto again;
5410 } 5478 }
5411 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5479 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8109 8177
8110 alloc_flags = update_block_group_flags(root, cache->flags); 8178 alloc_flags = update_block_group_flags(root, cache->flags);
8111 if (alloc_flags != cache->flags) 8179 if (alloc_flags != cache->flags)
8112 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8180 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8181 CHUNK_ALLOC_FORCE);
8113 8182
8114 ret = set_block_group_ro(cache); 8183 ret = set_block_group_ro(cache);
8115 if (!ret) 8184 if (!ret)
8116 goto out; 8185 goto out;
8117 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8186 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8118 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8187 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8188 CHUNK_ALLOC_FORCE);
8119 if (ret < 0) 8189 if (ret < 0)
8120 goto out; 8190 goto out;
8121 ret = set_block_group_ro(cache); 8191 ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8128 struct btrfs_root *root, u64 type) 8198 struct btrfs_root *root, u64 type)
8129{ 8199{
8130 u64 alloc_flags = get_alloc_profile(root, type); 8200 u64 alloc_flags = get_alloc_profile(root, type);
8131 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8201 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8202 CHUNK_ALLOC_FORCE);
8132} 8203}
8133 8204
8134/* 8205/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..315138605088 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
690 } 690 }
691} 691}
692 692
693static void uncache_state(struct extent_state **cached_ptr)
694{
695 if (cached_ptr && (*cached_ptr)) {
696 struct extent_state *state = *cached_ptr;
697 *cached_ptr = NULL;
698 free_extent_state(state);
699 }
700}
701
693/* 702/*
694 * set some bits on a range in the tree. This may require allocations or 703 * set some bits on a range in the tree. This may require allocations or
695 * sleeping, so the gfp mask is used to indicate what is allowed. 704 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
940} 949}
941 950
942int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 951int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
943 gfp_t mask) 952 struct extent_state **cached_state, gfp_t mask)
944{ 953{
945 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 954 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
946 NULL, mask); 955 NULL, cached_state, mask);
947} 956}
948 957
949static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 958static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1012 mask); 1021 mask);
1013} 1022}
1014 1023
1015int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1024int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1016 gfp_t mask)
1017{ 1025{
1018 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1026 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1019 mask); 1027 mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1735 1743
1736 do { 1744 do {
1737 struct page *page = bvec->bv_page; 1745 struct page *page = bvec->bv_page;
1746 struct extent_state *cached = NULL;
1747 struct extent_state *state;
1748
1738 tree = &BTRFS_I(page->mapping->host)->io_tree; 1749 tree = &BTRFS_I(page->mapping->host)->io_tree;
1739 1750
1740 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1751 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1749 if (++bvec <= bvec_end) 1760 if (++bvec <= bvec_end)
1750 prefetchw(&bvec->bv_page->flags); 1761 prefetchw(&bvec->bv_page->flags);
1751 1762
1763 spin_lock(&tree->lock);
1764 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1765 if (state && state->start == start) {
1766 /*
1767 * take a reference on the state, unlock will drop
1768 * the ref
1769 */
1770 cache_state(state, &cached);
1771 }
1772 spin_unlock(&tree->lock);
1773
1752 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1774 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1753 ret = tree->ops->readpage_end_io_hook(page, start, end, 1775 ret = tree->ops->readpage_end_io_hook(page, start, end,
1754 NULL); 1776 state);
1755 if (ret) 1777 if (ret)
1756 uptodate = 0; 1778 uptodate = 0;
1757 } 1779 }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1764 test_bit(BIO_UPTODATE, &bio->bi_flags); 1786 test_bit(BIO_UPTODATE, &bio->bi_flags);
1765 if (err) 1787 if (err)
1766 uptodate = 0; 1788 uptodate = 0;
1789 uncache_state(&cached);
1767 continue; 1790 continue;
1768 } 1791 }
1769 } 1792 }
1770 1793
1771 if (uptodate) { 1794 if (uptodate) {
1772 set_extent_uptodate(tree, start, end, 1795 set_extent_uptodate(tree, start, end, &cached,
1773 GFP_ATOMIC); 1796 GFP_ATOMIC);
1774 } 1797 }
1775 unlock_extent(tree, start, end, GFP_ATOMIC); 1798 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1776 1799
1777 if (whole_page) { 1800 if (whole_page) {
1778 if (uptodate) { 1801 if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1811 1834
1812 do { 1835 do {
1813 struct page *page = bvec->bv_page; 1836 struct page *page = bvec->bv_page;
1837 struct extent_state *cached = NULL;
1814 tree = &BTRFS_I(page->mapping->host)->io_tree; 1838 tree = &BTRFS_I(page->mapping->host)->io_tree;
1815 1839
1816 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1840 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1821 prefetchw(&bvec->bv_page->flags); 1845 prefetchw(&bvec->bv_page->flags);
1822 1846
1823 if (uptodate) { 1847 if (uptodate) {
1824 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1848 set_extent_uptodate(tree, start, end, &cached,
1849 GFP_ATOMIC);
1825 } else { 1850 } else {
1826 ClearPageUptodate(page); 1851 ClearPageUptodate(page);
1827 SetPageError(page); 1852 SetPageError(page);
1828 } 1853 }
1829 1854
1830 unlock_extent(tree, start, end, GFP_ATOMIC); 1855 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1831 1856
1832 } while (bvec >= bio->bi_io_vec); 1857 } while (bvec >= bio->bi_io_vec);
1833 1858
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2016 while (cur <= end) { 2041 while (cur <= end) {
2017 if (cur >= last_byte) { 2042 if (cur >= last_byte) {
2018 char *userpage; 2043 char *userpage;
2044 struct extent_state *cached = NULL;
2045
2019 iosize = PAGE_CACHE_SIZE - page_offset; 2046 iosize = PAGE_CACHE_SIZE - page_offset;
2020 userpage = kmap_atomic(page, KM_USER0); 2047 userpage = kmap_atomic(page, KM_USER0);
2021 memset(userpage + page_offset, 0, iosize); 2048 memset(userpage + page_offset, 0, iosize);
2022 flush_dcache_page(page); 2049 flush_dcache_page(page);
2023 kunmap_atomic(userpage, KM_USER0); 2050 kunmap_atomic(userpage, KM_USER0);
2024 set_extent_uptodate(tree, cur, cur + iosize - 1, 2051 set_extent_uptodate(tree, cur, cur + iosize - 1,
2025 GFP_NOFS); 2052 &cached, GFP_NOFS);
2026 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2053 unlock_extent_cached(tree, cur, cur + iosize - 1,
2054 &cached, GFP_NOFS);
2027 break; 2055 break;
2028 } 2056 }
2029 em = get_extent(inode, page, page_offset, cur, 2057 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 /* we've found a hole, just zero and go on */ 2091 /* we've found a hole, just zero and go on */
2064 if (block_start == EXTENT_MAP_HOLE) { 2092 if (block_start == EXTENT_MAP_HOLE) {
2065 char *userpage; 2093 char *userpage;
2094 struct extent_state *cached = NULL;
2095
2066 userpage = kmap_atomic(page, KM_USER0); 2096 userpage = kmap_atomic(page, KM_USER0);
2067 memset(userpage + page_offset, 0, iosize); 2097 memset(userpage + page_offset, 0, iosize);
2068 flush_dcache_page(page); 2098 flush_dcache_page(page);
2069 kunmap_atomic(userpage, KM_USER0); 2099 kunmap_atomic(userpage, KM_USER0);
2070 2100
2071 set_extent_uptodate(tree, cur, cur + iosize - 1, 2101 set_extent_uptodate(tree, cur, cur + iosize - 1,
2072 GFP_NOFS); 2102 &cached, GFP_NOFS);
2073 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2103 unlock_extent_cached(tree, cur, cur + iosize - 1,
2104 &cached, GFP_NOFS);
2074 cur = cur + iosize; 2105 cur = cur + iosize;
2075 page_offset += iosize; 2106 page_offset += iosize;
2076 continue; 2107 continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
2789 iocount++; 2820 iocount++;
2790 block_start = block_start + iosize; 2821 block_start = block_start + iosize;
2791 } else { 2822 } else {
2792 set_extent_uptodate(tree, block_start, cur_end, 2823 struct extent_state *cached = NULL;
2824
2825 set_extent_uptodate(tree, block_start, cur_end, &cached,
2793 GFP_NOFS); 2826 GFP_NOFS);
2794 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2827 unlock_extent_cached(tree, block_start, cur_end,
2828 &cached, GFP_NOFS);
2795 block_start = cur_end + 1; 2829 block_start = cur_end + 1;
2796 } 2830 }
2797 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2831 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3457 num_pages = num_extent_pages(eb->start, eb->len); 3491 num_pages = num_extent_pages(eb->start, eb->len);
3458 3492
3459 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3493 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3460 GFP_NOFS); 3494 NULL, GFP_NOFS);
3461 for (i = 0; i < num_pages; i++) { 3495 for (i = 0; i < num_pages; i++) {
3462 page = extent_buffer_page(eb, i); 3496 page = extent_buffer_page(eb, i);
3463 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3497 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3885 kunmap_atomic(dst_kaddr, KM_USER0); 3919 kunmap_atomic(dst_kaddr, KM_USER0);
3886} 3920}
3887 3921
3922static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3923{
3924 unsigned long distance = (src > dst) ? src - dst : dst - src;
3925 return distance < len;
3926}
3927
3888static void copy_pages(struct page *dst_page, struct page *src_page, 3928static void copy_pages(struct page *dst_page, struct page *src_page,
3889 unsigned long dst_off, unsigned long src_off, 3929 unsigned long dst_off, unsigned long src_off,
3890 unsigned long len) 3930 unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3892 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3932 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3893 char *src_kaddr; 3933 char *src_kaddr;
3894 3934
3895 if (dst_page != src_page) 3935 if (dst_page != src_page) {
3896 src_kaddr = kmap_atomic(src_page, KM_USER1); 3936 src_kaddr = kmap_atomic(src_page, KM_USER1);
3897 else 3937 } else {
3898 src_kaddr = dst_kaddr; 3938 src_kaddr = dst_kaddr;
3939 BUG_ON(areas_overlap(src_off, dst_off, len));
3940 }
3899 3941
3900 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3942 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3901 kunmap_atomic(dst_kaddr, KM_USER0); 3943 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3970 "len %lu len %lu\n", dst_offset, len, dst->len); 4012 "len %lu len %lu\n", dst_offset, len, dst->len);
3971 BUG_ON(1); 4013 BUG_ON(1);
3972 } 4014 }
3973 if (dst_offset < src_offset) { 4015 if (!areas_overlap(src_offset, dst_offset, len)) {
3974 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4016 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3975 return; 4017 return;
3976 } 4018 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
208 int bits, int exclusive_bits, u64 *failed_start, 208 int bits, int exclusive_bits, u64 *failed_start,
209 struct extent_state **cached_state, gfp_t mask); 209 struct extent_state **cached_state, gfp_t mask);
210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
211 gfp_t mask); 211 struct extent_state **cached_state, gfp_t mask);
212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
213 gfp_t mask); 213 gfp_t mask);
214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e621ea54a3fd..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
104/* 104/*
105 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
106 */ 106 */
107static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
108{ 108{
109 size_t i; 109 size_t i;
110 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
127 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
128 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
129 */ 129 */
130static noinline int dirty_and_release_pages(struct btrfs_root *root, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131 struct file *file, 131 struct page **pages, size_t num_pages,
132 struct page **pages, 132 loff_t pos, size_t write_bytes,
133 size_t num_pages, 133 struct extent_state **cached)
134 loff_t pos,
135 size_t write_bytes)
136{ 134{
137 int err = 0; 135 int err = 0;
138 int i; 136 int i;
139 struct inode *inode = fdentry(file)->d_inode;
140 u64 num_bytes; 137 u64 num_bytes;
141 u64 start_pos; 138 u64 start_pos;
142 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
149 146
150 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
152 NULL); 149 cached);
153 if (err) 150 if (err)
154 return err; 151 return err;
155 152
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
992 } 989 }
993 990
994 if (copied > 0) { 991 if (copied > 0) {
995 ret = dirty_and_release_pages(root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
996 dirty_pages, pos, 993 dirty_pages, pos, copied,
997 copied); 994 NULL);
998 if (ret) { 995 if (ret) {
999 btrfs_delalloc_release_space(inode, 996 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT); 997 dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f561c953205b..11d2e9cea09e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
508 struct inode *inode; 508 struct inode *inode;
509 struct rb_node *node; 509 struct rb_node *node;
510 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
511 struct page *page; 512 struct page *page;
512 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
513 struct btrfs_free_cluster *cluster = NULL; 514 struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
517 u64 start, end, len; 518 u64 start, end, len;
518 u64 bytes = 0; 519 u64 bytes = 0;
519 u32 *crc, *checksums; 520 u32 *crc, *checksums;
520 pgoff_t index = 0, last_index = 0;
521 unsigned long first_page_offset; 521 unsigned long first_page_offset;
522 int num_checksums; 522 int index = 0, num_pages = 0;
523 int entries = 0; 523 int entries = 0;
524 int bitmaps = 0; 524 int bitmaps = 0;
525 int ret = 0; 525 int ret = 0;
526 bool next_page = false; 526 bool next_page = false;
527 bool out_of_space = false;
527 528
528 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
529 530
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
551 return 0; 552 return 0;
552 } 553 }
553 554
554 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
555 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
556 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
557 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
558 560
559 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
560 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
561 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
562 if (!crc) { 563 if (!crc) {
563 iput(inode); 564 iput(inode);
564 return 0; 565 return 0;
565 } 566 }
566 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
567 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
568 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
569 * our entries. 577 * our entries.
570 */ 578 */
571 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
572 580
573 /* Get the cluster for this block_group if it exists */ 581 /* Get the cluster for this block_group if it exists */
574 if (!list_empty(&block_group->cluster_list)) 582 if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
590 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
591 * know and don't freak out. 599 * know and don't freak out.
592 */ 600 */
593 while (index <= last_index) { 601 while (index < num_pages) {
594 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
595 if (!page) { 603 if (!page) {
596 pgoff_t i = 0; 604 int i;
597 605
598 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
599 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
600 unlock_page(page); 608 page_cache_release(pages[i]);
601 page_cache_release(page);
602 page_cache_release(page);
603 i++;
604 } 609 }
605 goto out_free; 610 goto out_free;
606 } 611 }
612 pages[index] = page;
607 index++; 613 index++;
608 } 614 }
609 615
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
631 offset = start_offset; 637 offset = start_offset;
632 } 638 }
633 639
634 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
635 646
636 addr = kmap(page); 647 addr = kmap(page);
637 entry = addr + start_offset; 648 entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
708 719
709 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
710 721
711 ClearPageChecked(page);
712 set_page_extent_mapped(page);
713 SetPageUptodate(page);
714 set_page_dirty(page);
715
716 /*
717 * We need to release our reference we got for grab_cache_page,
718 * except for the first page which will hold our checksums, we
719 * do that below.
720 */
721 if (index != 0) {
722 unlock_page(page);
723 page_cache_release(page);
724 }
725
726 page_cache_release(page);
727
728 index++; 722 index++;
729 } while (node || next_page); 723 } while (node || next_page);
730 724
@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
734 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
735 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
736 730
737 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
738 736
739 addr = kmap(page); 737 addr = kmap(page);
740 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
745 crc++; 743 crc++;
746 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
747 745
748 ClearPageChecked(page);
749 set_page_extent_mapped(page);
750 SetPageUptodate(page);
751 set_page_dirty(page);
752 unlock_page(page);
753 page_cache_release(page);
754 page_cache_release(page);
755 list_del_init(&entry->list); 746 list_del_init(&entry->list);
756 index++; 747 index++;
757 } 748 }
758 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
759 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
760 while (index <= last_index) { 760 while (index < num_pages) {
761 void *addr; 761 void *addr;
762 762
763 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
764
765 addr = kmap(page); 764 addr = kmap(page);
766 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
767 kunmap(page); 766 kunmap(page);
768 ClearPageChecked(page);
769 set_page_extent_mapped(page);
770 SetPageUptodate(page);
771 set_page_dirty(page);
772 unlock_page(page);
773 page_cache_release(page);
774 page_cache_release(page);
775 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
776 index++; 768 index++;
777 } 769 }
778 770
779 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
780
781 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
782 { 772 {
783 void *addr; 773 void *addr;
784 u64 *gen; 774 u64 *gen;
785 775
786 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
787 777
788 addr = kmap(page); 778 addr = kmap(page);
789 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
790 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
791 *gen = trans->transid; 781 *gen = trans->transid;
792 kunmap(page); 782 kunmap(page);
793 ClearPageChecked(page);
794 set_page_extent_mapped(page);
795 SetPageUptodate(page);
796 set_page_dirty(page);
797 unlock_page(page);
798 page_cache_release(page);
799 page_cache_release(page);
800 } 783 }
801 BTRFS_I(inode)->generation = trans->transid;
802 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
803 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
804 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
805 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
806 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
807 799
808 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
853 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
854 } 846 }
855 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
856 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
857 iput(inode); 850 iput(inode);
858 return ret; 851 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5cc64ab9c485..fcd66b6a8086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1770,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 add_pending_csums(trans, inode, ordered_extent->file_offset, 1770 add_pending_csums(trans, inode, ordered_extent->file_offset,
1771 &ordered_extent->list); 1771 &ordered_extent->list);
1772 1772
1773 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1773 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1774 ret = btrfs_update_inode(trans, root, inode); 1774 if (!ret) {
1775 BUG_ON(ret); 1775 ret = btrfs_update_inode(trans, root, inode);
1776 BUG_ON(ret);
1777 }
1778 ret = 0;
1776out: 1779out:
1777 if (nolock) { 1780 if (nolock) {
1778 if (trans) 1781 if (trans)
@@ -2590,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2590 struct btrfs_inode_item *item, 2593 struct btrfs_inode_item *item,
2591 struct inode *inode) 2594 struct inode *inode)
2592{ 2595{
2596 if (!leaf->map_token)
2597 map_private_extent_buffer(leaf, (unsigned long)item,
2598 sizeof(struct btrfs_inode_item),
2599 &leaf->map_token, &leaf->kaddr,
2600 &leaf->map_start, &leaf->map_len,
2601 KM_USER1);
2602
2593 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2603 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2594 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2604 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2595 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2605 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2618,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2618 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2628 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2619 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2629 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2620 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2630 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2631
2632 if (leaf->map_token) {
2633 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2634 leaf->map_token = NULL;
2635 }
2621} 2636}
2622 2637
2623/* 2638/*
@@ -4207,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4207 struct btrfs_key found_key; 4222 struct btrfs_key found_key;
4208 struct btrfs_path *path; 4223 struct btrfs_path *path;
4209 int ret; 4224 int ret;
4210 u32 nritems;
4211 struct extent_buffer *leaf; 4225 struct extent_buffer *leaf;
4212 int slot; 4226 int slot;
4213 int advance;
4214 unsigned char d_type; 4227 unsigned char d_type;
4215 int over = 0; 4228 int over = 0;
4216 u32 di_cur; 4229 u32 di_cur;
@@ -4253,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4253 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4254 if (ret < 0) 4267 if (ret < 0)
4255 goto err; 4268 goto err;
4256 advance = 0;
4257 4269
4258 while (1) { 4270 while (1) {
4259 leaf = path->nodes[0]; 4271 leaf = path->nodes[0];
4260 nritems = btrfs_header_nritems(leaf);
4261 slot = path->slots[0]; 4272 slot = path->slots[0];
4262 if (advance || slot >= nritems) { 4273 if (slot >= btrfs_header_nritems(leaf)) {
4263 if (slot >= nritems - 1) { 4274 ret = btrfs_next_leaf(root, path);
4264 ret = btrfs_next_leaf(root, path); 4275 if (ret < 0)
4265 if (ret) 4276 goto err;
4266 break; 4277 else if (ret > 0)
4267 leaf = path->nodes[0]; 4278 break;
4268 nritems = btrfs_header_nritems(leaf); 4279 continue;
4269 slot = path->slots[0];
4270 } else {
4271 slot++;
4272 path->slots[0]++;
4273 }
4274 } 4280 }
4275 4281
4276 advance = 1;
4277 item = btrfs_item_nr(leaf, slot); 4282 item = btrfs_item_nr(leaf, slot);
4278 btrfs_item_key_to_cpu(leaf, &found_key, slot); 4283 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4279 4284
@@ -4282,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4282 if (btrfs_key_type(&found_key) != key_type) 4287 if (btrfs_key_type(&found_key) != key_type)
4283 break; 4288 break;
4284 if (found_key.offset < filp->f_pos) 4289 if (found_key.offset < filp->f_pos)
4285 continue; 4290 goto next;
4286 4291
4287 filp->f_pos = found_key.offset; 4292 filp->f_pos = found_key.offset;
4288 4293
@@ -4335,6 +4340,8 @@ skip:
4335 di_cur += di_len; 4340 di_cur += di_len;
4336 di = (struct btrfs_dir_item *)((char *)di + di_len); 4341 di = (struct btrfs_dir_item *)((char *)di + di_len);
4337 } 4342 }
4343next:
4344 path->slots[0]++;
4338 } 4345 }
4339 4346
4340 /* Reached end of directory/root. Bump pos past the last item. */ 4347 /* Reached end of directory/root. Bump pos past the last item. */
@@ -4527,14 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4527 BUG_ON(!path); 4534 BUG_ON(!path);
4528 4535
4529 inode = new_inode(root->fs_info->sb); 4536 inode = new_inode(root->fs_info->sb);
4530 if (!inode) 4537 if (!inode) {
4538 btrfs_free_path(path);
4531 return ERR_PTR(-ENOMEM); 4539 return ERR_PTR(-ENOMEM);
4540 }
4532 4541
4533 if (dir) { 4542 if (dir) {
4534 trace_btrfs_inode_request(dir); 4543 trace_btrfs_inode_request(dir);
4535 4544
4536 ret = btrfs_set_inode_index(dir, index); 4545 ret = btrfs_set_inode_index(dir, index);
4537 if (ret) { 4546 if (ret) {
4547 btrfs_free_path(path);
4538 iput(inode); 4548 iput(inode);
4539 return ERR_PTR(ret); 4549 return ERR_PTR(ret);
4540 } 4550 }
@@ -4834,9 +4844,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4834 if (inode->i_nlink == ~0U) 4844 if (inode->i_nlink == ~0U)
4835 return -EMLINK; 4845 return -EMLINK;
4836 4846
4837 btrfs_inc_nlink(inode);
4838 inode->i_ctime = CURRENT_TIME;
4839
4840 err = btrfs_set_inode_index(dir, &index); 4847 err = btrfs_set_inode_index(dir, &index);
4841 if (err) 4848 if (err)
4842 goto fail; 4849 goto fail;
@@ -4852,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4852 goto fail; 4859 goto fail;
4853 } 4860 }
4854 4861
4862 btrfs_inc_nlink(inode);
4863 inode->i_ctime = CURRENT_TIME;
4864
4855 btrfs_set_trans_block_group(trans, dir); 4865 btrfs_set_trans_block_group(trans, dir);
4856 ihold(inode); 4866 ihold(inode);
4857 4867
@@ -5221,7 +5231,7 @@ again:
5221 btrfs_mark_buffer_dirty(leaf); 5231 btrfs_mark_buffer_dirty(leaf);
5222 } 5232 }
5223 set_extent_uptodate(io_tree, em->start, 5233 set_extent_uptodate(io_tree, em->start,
5224 extent_map_end(em) - 1, GFP_NOFS); 5234 extent_map_end(em) - 1, NULL, GFP_NOFS);
5225 goto insert; 5235 goto insert;
5226 } else { 5236 } else {
5227 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); 5237 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5428,17 +5438,30 @@ out:
5428} 5438}
5429 5439
5430static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5440static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5441 struct extent_map *em,
5431 u64 start, u64 len) 5442 u64 start, u64 len)
5432{ 5443{
5433 struct btrfs_root *root = BTRFS_I(inode)->root; 5444 struct btrfs_root *root = BTRFS_I(inode)->root;
5434 struct btrfs_trans_handle *trans; 5445 struct btrfs_trans_handle *trans;
5435 struct extent_map *em;
5436 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5446 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5437 struct btrfs_key ins; 5447 struct btrfs_key ins;
5438 u64 alloc_hint; 5448 u64 alloc_hint;
5439 int ret; 5449 int ret;
5450 bool insert = false;
5440 5451
5441 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5452 /*
5453 * Ok if the extent map we looked up is a hole and is for the exact
5454 * range we want, there is no reason to allocate a new one, however if
5455 * it is not right then we need to free this one and drop the cache for
5456 * our range.
5457 */
5458 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5459 em->len != len) {
5460 free_extent_map(em);
5461 em = NULL;
5462 insert = true;
5463 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5464 }
5442 5465
5443 trans = btrfs_join_transaction(root, 0); 5466 trans = btrfs_join_transaction(root, 0);
5444 if (IS_ERR(trans)) 5467 if (IS_ERR(trans))
@@ -5454,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5454 goto out; 5477 goto out;
5455 } 5478 }
5456 5479
5457 em = alloc_extent_map(GFP_NOFS);
5458 if (!em) { 5480 if (!em) {
5459 em = ERR_PTR(-ENOMEM); 5481 em = alloc_extent_map(GFP_NOFS);
5460 goto out; 5482 if (!em) {
5483 em = ERR_PTR(-ENOMEM);
5484 goto out;
5485 }
5461 } 5486 }
5462 5487
5463 em->start = start; 5488 em->start = start;
@@ -5467,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5467 em->block_start = ins.objectid; 5492 em->block_start = ins.objectid;
5468 em->block_len = ins.offset; 5493 em->block_len = ins.offset;
5469 em->bdev = root->fs_info->fs_devices->latest_bdev; 5494 em->bdev = root->fs_info->fs_devices->latest_bdev;
5495
5496 /*
5497 * We need to do this because if we're using the original em we searched
5498 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5499 */
5500 em->flags = 0;
5470 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5501 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5471 5502
5472 while (1) { 5503 while (insert) {
5473 write_lock(&em_tree->lock); 5504 write_lock(&em_tree->lock);
5474 ret = add_extent_mapping(em_tree, em); 5505 ret = add_extent_mapping(em_tree, em);
5475 write_unlock(&em_tree->lock); 5506 write_unlock(&em_tree->lock);
@@ -5687,8 +5718,7 @@ must_cow:
5687 * it above 5718 * it above
5688 */ 5719 */
5689 len = bh_result->b_size; 5720 len = bh_result->b_size;
5690 free_extent_map(em); 5721 em = btrfs_new_extent_direct(inode, em, start, len);
5691 em = btrfs_new_extent_direct(inode, start, len);
5692 if (IS_ERR(em)) 5722 if (IS_ERR(em))
5693 return PTR_ERR(em); 5723 return PTR_ERR(em);
5694 len = min(len, em->len - (start - em->start)); 5724 len = min(len, em->len - (start - em->start));
@@ -5851,8 +5881,10 @@ again:
5851 } 5881 }
5852 5882
5853 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5883 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5854 btrfs_ordered_update_i_size(inode, 0, ordered); 5884 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5855 btrfs_update_inode(trans, root, inode); 5885 if (!ret)
5886 btrfs_update_inode(trans, root, inode);
5887 ret = 0;
5856out_unlock: 5888out_unlock:
5857 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5889 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5858 ordered->file_offset + ordered->len - 1, 5890 ordered->file_offset + ordered->len - 1,
@@ -5938,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5938 5970
5939static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5971static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5940 int rw, u64 file_offset, int skip_sum, 5972 int rw, u64 file_offset, int skip_sum,
5941 u32 *csums) 5973 u32 *csums, int async_submit)
5942{ 5974{
5943 int write = rw & REQ_WRITE; 5975 int write = rw & REQ_WRITE;
5944 struct btrfs_root *root = BTRFS_I(inode)->root; 5976 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5949,13 +5981,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5949 if (ret) 5981 if (ret)
5950 goto err; 5982 goto err;
5951 5983
5952 if (write && !skip_sum) { 5984 if (skip_sum)
5985 goto map;
5986
5987 if (write && async_submit) {
5953 ret = btrfs_wq_submit_bio(root->fs_info, 5988 ret = btrfs_wq_submit_bio(root->fs_info,
5954 inode, rw, bio, 0, 0, 5989 inode, rw, bio, 0, 0,
5955 file_offset, 5990 file_offset,
5956 __btrfs_submit_bio_start_direct_io, 5991 __btrfs_submit_bio_start_direct_io,
5957 __btrfs_submit_bio_done); 5992 __btrfs_submit_bio_done);
5958 goto err; 5993 goto err;
5994 } else if (write) {
5995 /*
5996 * If we aren't doing async submit, calculate the csum of the
5997 * bio now.
5998 */
5999 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6000 if (ret)
6001 goto err;
5959 } else if (!skip_sum) { 6002 } else if (!skip_sum) {
5960 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6003 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5961 file_offset, csums); 6004 file_offset, csums);
@@ -5963,7 +6006,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5963 goto err; 6006 goto err;
5964 } 6007 }
5965 6008
5966 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6009map:
6010 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5967err: 6011err:
5968 bio_put(bio); 6012 bio_put(bio);
5969 return ret; 6013 return ret;
@@ -5985,15 +6029,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5985 int nr_pages = 0; 6029 int nr_pages = 0;
5986 u32 *csums = dip->csums; 6030 u32 *csums = dip->csums;
5987 int ret = 0; 6031 int ret = 0;
6032 int async_submit = 0;
5988 int write = rw & REQ_WRITE; 6033 int write = rw & REQ_WRITE;
5989 6034
5990 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5991 if (!bio)
5992 return -ENOMEM;
5993 bio->bi_private = dip;
5994 bio->bi_end_io = btrfs_end_dio_bio;
5995 atomic_inc(&dip->pending_bios);
5996
5997 map_length = orig_bio->bi_size; 6035 map_length = orig_bio->bi_size;
5998 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6036 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5999 &map_length, NULL, 0); 6037 &map_length, NULL, 0);
@@ -6002,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6002 return -EIO; 6040 return -EIO;
6003 } 6041 }
6004 6042
6043 if (map_length >= orig_bio->bi_size) {
6044 bio = orig_bio;
6045 goto submit;
6046 }
6047
6048 async_submit = 1;
6049 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6050 if (!bio)
6051 return -ENOMEM;
6052 bio->bi_private = dip;
6053 bio->bi_end_io = btrfs_end_dio_bio;
6054 atomic_inc(&dip->pending_bios);
6055
6005 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6056 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
6006 if (unlikely(map_length < submit_len + bvec->bv_len || 6057 if (unlikely(map_length < submit_len + bvec->bv_len ||
6007 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6058 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6015,7 +6066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6015 atomic_inc(&dip->pending_bios); 6066 atomic_inc(&dip->pending_bios);
6016 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6067 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6017 file_offset, skip_sum, 6068 file_offset, skip_sum,
6018 csums); 6069 csums, async_submit);
6019 if (ret) { 6070 if (ret) {
6020 bio_put(bio); 6071 bio_put(bio);
6021 atomic_dec(&dip->pending_bios); 6072 atomic_dec(&dip->pending_bios);
@@ -6052,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6052 } 6103 }
6053 } 6104 }
6054 6105
6106submit:
6055 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6107 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6056 csums); 6108 csums, async_submit);
6057 if (!ret) 6109 if (!ret)
6058 return 0; 6110 return 0;
6059 6111
@@ -6148,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6148 unsigned long nr_segs) 6200 unsigned long nr_segs)
6149{ 6201{
6150 int seg; 6202 int seg;
6203 int i;
6151 size_t size; 6204 size_t size;
6152 unsigned long addr; 6205 unsigned long addr;
6153 unsigned blocksize_mask = root->sectorsize - 1; 6206 unsigned blocksize_mask = root->sectorsize - 1;
@@ -6162,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6162 addr = (unsigned long)iov[seg].iov_base; 6215 addr = (unsigned long)iov[seg].iov_base;
6163 size = iov[seg].iov_len; 6216 size = iov[seg].iov_len;
6164 end += size; 6217 end += size;
6165 if ((addr & blocksize_mask) || (size & blocksize_mask)) 6218 if ((addr & blocksize_mask) || (size & blocksize_mask))
6166 goto out; 6219 goto out;
6220
6221 /* If this is a write we don't need to check anymore */
6222 if (rw & WRITE)
6223 continue;
6224
6225 /*
6226 * Check to make sure we don't have duplicate iov_base's in this
6227 * iovec, if so return EINVAL, otherwise we'll get csum errors
6228 * when reading back.
6229 */
6230 for (i = seg + 1; i < nr_segs; i++) {
6231 if (iov[seg].iov_base == iov[i].iov_base)
6232 goto out;
6233 }
6167 } 6234 }
6168 retval = 0; 6235 retval = 0;
6169out: 6236out:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cfc264fefdb0..ffb48d6c5433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2287 struct btrfs_ioctl_space_info space; 2287 struct btrfs_ioctl_space_info space;
2288 struct btrfs_ioctl_space_info *dest; 2288 struct btrfs_ioctl_space_info *dest;
2289 struct btrfs_ioctl_space_info *dest_orig; 2289 struct btrfs_ioctl_space_info *dest_orig;
2290 struct btrfs_ioctl_space_info *user_dest; 2290 struct btrfs_ioctl_space_info __user *user_dest;
2291 struct btrfs_space_info *info; 2291 struct btrfs_space_info *info;
2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2293 BTRFS_BLOCK_GROUP_SYSTEM, 2293 BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 58e7de9cc90c..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
162 Opt_enospc_debug, Opt_err, 162 Opt_enospc_debug, Opt_subvolrootid, Opt_err,
163}; 163};
164 164
165static match_table_t tokens = { 165static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
189 {Opt_clear_cache, "clear_cache"}, 189 {Opt_clear_cache, "clear_cache"},
190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 {Opt_enospc_debug, "enospc_debug"}, 191 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"},
192 {Opt_err, NULL}, 193 {Opt_err, NULL},
193}; 194};
194 195
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
232 break; 233 break;
233 case Opt_subvol: 234 case Opt_subvol:
234 case Opt_subvolid: 235 case Opt_subvolid:
236 case Opt_subvolrootid:
235 case Opt_device: 237 case Opt_device:
236 /* 238 /*
237 * These are parsed by btrfs_parse_early_options 239 * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
388 */ 390 */
389static int btrfs_parse_early_options(const char *options, fmode_t flags, 391static int btrfs_parse_early_options(const char *options, fmode_t flags,
390 void *holder, char **subvol_name, u64 *subvol_objectid, 392 void *holder, char **subvol_name, u64 *subvol_objectid,
391 struct btrfs_fs_devices **fs_devices) 393 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
392{ 394{
393 substring_t args[MAX_OPT_ARGS]; 395 substring_t args[MAX_OPT_ARGS];
394 char *opts, *orig, *p; 396 char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
429 *subvol_objectid = intarg; 431 *subvol_objectid = intarg;
430 } 432 }
431 break; 433 break;
434 case Opt_subvolrootid:
435 intarg = 0;
436 error = match_int(&args[0], &intarg);
437 if (!error) {
438 /* we want the original fs_tree */
439 if (!intarg)
440 *subvol_rootid =
441 BTRFS_FS_TREE_OBJECTID;
442 else
443 *subvol_rootid = intarg;
444 }
445 break;
432 case Opt_device: 446 case Opt_device:
433 error = btrfs_scan_one_device(match_strdup(&args[0]), 447 error = btrfs_scan_one_device(match_strdup(&args[0]),
434 flags, holder, fs_devices); 448 flags, holder, fs_devices);
@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
736 fmode_t mode = FMODE_READ; 750 fmode_t mode = FMODE_READ;
737 char *subvol_name = NULL; 751 char *subvol_name = NULL;
738 u64 subvol_objectid = 0; 752 u64 subvol_objectid = 0;
753 u64 subvol_rootid = 0;
739 int error = 0; 754 int error = 0;
740 755
741 if (!(flags & MS_RDONLY)) 756 if (!(flags & MS_RDONLY))
@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
743 758
744 error = btrfs_parse_early_options(data, mode, fs_type, 759 error = btrfs_parse_early_options(data, mode, fs_type,
745 &subvol_name, &subvol_objectid, 760 &subvol_name, &subvol_objectid,
746 &fs_devices); 761 &subvol_rootid, &fs_devices);
747 if (error) 762 if (error)
748 return ERR_PTR(error); 763 return ERR_PTR(error);
749 764
@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
807 s->s_flags |= MS_ACTIVE; 822 s->s_flags |= MS_ACTIVE;
808 } 823 }
809 824
810 root = get_default_root(s, subvol_objectid);
811 if (IS_ERR(root)) {
812 error = PTR_ERR(root);
813 deactivate_locked_super(s);
814 goto error_free_subvol_name;
815 }
816 /* if they gave us a subvolume name bind mount into that */ 825 /* if they gave us a subvolume name bind mount into that */
817 if (strcmp(subvol_name, ".")) { 826 if (strcmp(subvol_name, ".")) {
818 struct dentry *new_root; 827 struct dentry *new_root;
828
829 root = get_default_root(s, subvol_rootid);
830 if (IS_ERR(root)) {
831 error = PTR_ERR(root);
832 deactivate_locked_super(s);
833 goto error_free_subvol_name;
834 }
835
819 mutex_lock(&root->d_inode->i_mutex); 836 mutex_lock(&root->d_inode->i_mutex);
820 new_root = lookup_one_len(subvol_name, root, 837 new_root = lookup_one_len(subvol_name, root,
821 strlen(subvol_name)); 838 strlen(subvol_name));
@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
836 } 853 }
837 dput(root); 854 dput(root);
838 root = new_root; 855 root = new_root;
856 } else {
857 root = get_default_root(s, subvol_objectid);
858 if (IS_ERR(root)) {
859 error = PTR_ERR(root);
860 deactivate_locked_super(s);
861 goto error_free_subvol_name;
862 }
839 } 863 }
840 864
841 kfree(subvol_name); 865 kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b158da7e0bb..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
60 if (!cur_trans) 58 if (!cur_trans)
61 return -ENOMEM; 59 return -ENOMEM;
62 root->fs_info->generation++; 60 root->fs_info->generation++;
63 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
64 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
65 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
66 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
67 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
68 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
69 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
70 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
71 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
72 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
73 71
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
88 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
89 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
90 } else { 88 } else {
91 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
92 cur_trans->num_joined++; 90 cur_trans->num_joined++;
93 } 91 }
94 92
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
145 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
146 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
147 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
148 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
149 while (1) { 147 while (1) {
150 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
151 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181{ 179{
182 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
183 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
184 int ret; 183 int ret;
185 184
186 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
204 } 203 }
205 204
206 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
207 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
208 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
209 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
210 209
@@ -224,10 +223,18 @@ again:
224 223
225 if (num_items > 0) { 224 if (num_items > 0) {
226 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
227 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
228 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
229 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
230 } 236 }
237
231 if (ret < 0) { 238 if (ret < 0) {
232 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
233 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
327 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
328 } 335 }
329 336
330 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
331 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
332 339
333 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
457 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
458 } 465 }
459 466
460 if (lock)
461 mutex_lock(&info->trans_mutex);
462 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
463 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
464 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
465 470
466 smp_mb(); 471 smp_mb();
467 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
468 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
469 put_transaction(cur_trans); 474 put_transaction(cur_trans);
470 if (lock)
471 mutex_unlock(&info->trans_mutex);
472 475
473 if (current->journal_info == trans) 476 if (current->journal_info == trans)
474 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1178 /* take transaction reference */ 1181 /* take transaction reference */
1179 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1180 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1181 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1182 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1183 1186
1184 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1237 1240
1238 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1239 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1240 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1241 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1242 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1243 1246
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1259 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1260 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1261 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1262 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1263 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1264 1267
1265 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1300 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1301 1304
1302 smp_mb(); 1305 smp_mb();
1303 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1304 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1305 else if (should_grow) 1308 else if (should_grow)
1306 schedule_timeout(1); 1309 schedule_timeout(1);
1307 1310
1308 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1309 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1310 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1311 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1312 1315
1313 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1394 1397
1395 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1396 1399
1400 list_del_init(&cur_trans->list);
1397 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1398 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1399 1403
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
182 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
183 int ret = 0, slot, advance; 183 int ret = 0, slot;
184 size_t total_size = 0, size_left = size; 184 size_t total_size = 0, size_left = size;
185 unsigned long name_ptr; 185 unsigned long name_ptr;
186 size_t name_len; 186 size_t name_len;
187 u32 nritems;
188 187
189 /* 188 /*
190 * ok we want all objects associated with this id. 189 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
205 if (ret < 0) 204 if (ret < 0)
206 goto err; 205 goto err;
207 advance = 0; 206
208 while (1) { 207 while (1) {
209 leaf = path->nodes[0]; 208 leaf = path->nodes[0];
210 nritems = btrfs_header_nritems(leaf);
211 slot = path->slots[0]; 209 slot = path->slots[0];
212 210
213 /* this is where we start walking through the path */ 211 /* this is where we start walking through the path */
214 if (advance || slot >= nritems) { 212 if (slot >= btrfs_header_nritems(leaf)) {
215 /* 213 /*
216 * if we've reached the last slot in this leaf we need 214 * if we've reached the last slot in this leaf we need
217 * to go to the next leaf and reset everything 215 * to go to the next leaf and reset everything
218 */ 216 */
219 if (slot >= nritems-1) { 217 ret = btrfs_next_leaf(root, path);
220 ret = btrfs_next_leaf(root, path); 218 if (ret < 0)
221 if (ret) 219 goto err;
222 break; 220 else if (ret > 0)
223 leaf = path->nodes[0]; 221 break;
224 nritems = btrfs_header_nritems(leaf); 222 continue;
225 slot = path->slots[0];
226 } else {
227 /*
228 * just walking through the slots on this leaf
229 */
230 slot++;
231 path->slots[0]++;
232 }
233 } 223 }
234 advance = 1;
235 224
236 btrfs_item_key_to_cpu(leaf, &found_key, slot); 225 btrfs_item_key_to_cpu(leaf, &found_key, slot);
237 226
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
250 239
251 /* we are just looking for how big our buffer needs to be */ 240 /* we are just looking for how big our buffer needs to be */
252 if (!size) 241 if (!size)
253 continue; 242 goto next;
254 243
255 if (!buffer || (name_len + 1) > size_left) { 244 if (!buffer || (name_len + 1) > size_left) {
256 ret = -ERANGE; 245 ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
263 252
264 size_left -= name_len + 1; 253 size_left -= name_len + 1;
265 buffer += name_len + 1; 254 buffer += name_len + 1;
255next:
256 path->slots[0]++;
266 } 257 }
267 ret = total_size; 258 ret = total_size;
268 259
diff --git a/fs/cifs/README b/fs/cifs/README
index fe1683590828..74ab165fc646 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -685,22 +685,6 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
685 support and want to map the uid and gid fields 685 support and want to map the uid and gid fields
686 to values supplied at mount (rather than the 686 to values supplied at mount (rather than the
687 actual values, then set this to zero. (default 1) 687 actual values, then set this to zero. (default 1)
688Experimental When set to 1 used to enable certain experimental
689 features (currently enables multipage writes
690 when signing is enabled, the multipage write
691 performance enhancement was disabled when
692 signing turned on in case buffer was modified
693 just before it was sent, also this flag will
694 be used to use the new experimental directory change
695 notification code). When set to 2 enables
696 an additional experimental feature, "raw ntlmssp"
697 session establishment support (which allows
698 specifying "sec=ntlmssp" on mount). The Linux cifs
699 module will use ntlmv2 authentication encapsulated
700 in "raw ntlmssp" (not using SPNEGO) when
701 "sec=ntlmssp" is specified on mount.
702 This support also requires building cifs with
703 the CONFIG_CIFS_EXPERIMENTAL configuration flag.
704 688
705These experimental features and tracing can be enabled by changing flags in 689These experimental features and tracing can be enabled by changing flags in
706/proc/fs/cifs (after the cifs module has been installed or built into the 690/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e654dfd092c3..53d57a3fe427 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -50,7 +50,7 @@ void cifs_fscache_unregister(void)
50 */ 50 */
51struct cifs_server_key { 51struct cifs_server_key {
52 uint16_t family; /* address family */ 52 uint16_t family; /* address family */
53 uint16_t port; /* IP port */ 53 __be16 port; /* IP port */
54 union { 54 union {
55 struct in_addr ipv4_addr; 55 struct in_addr ipv4_addr;
56 struct in6_addr ipv6_addr; 56 struct in6_addr ipv6_addr;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 65829d32128c..30d01bc90855 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -423,7 +423,6 @@ static const struct file_operations cifs_lookup_cache_proc_fops;
423static const struct file_operations traceSMB_proc_fops; 423static const struct file_operations traceSMB_proc_fops;
424static const struct file_operations cifs_multiuser_mount_proc_fops; 424static const struct file_operations cifs_multiuser_mount_proc_fops;
425static const struct file_operations cifs_security_flags_proc_fops; 425static const struct file_operations cifs_security_flags_proc_fops;
426static const struct file_operations cifs_experimental_proc_fops;
427static const struct file_operations cifs_linux_ext_proc_fops; 426static const struct file_operations cifs_linux_ext_proc_fops;
428 427
429void 428void
@@ -441,8 +440,6 @@ cifs_proc_init(void)
441 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops); 440 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
442 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); 441 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
443 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops); 442 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
444 proc_create("Experimental", 0, proc_fs_cifs,
445 &cifs_experimental_proc_fops);
446 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, 443 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
447 &cifs_linux_ext_proc_fops); 444 &cifs_linux_ext_proc_fops);
448 proc_create("MultiuserMount", 0, proc_fs_cifs, 445 proc_create("MultiuserMount", 0, proc_fs_cifs,
@@ -469,7 +466,6 @@ cifs_proc_clean(void)
469 remove_proc_entry("OplockEnabled", proc_fs_cifs); 466 remove_proc_entry("OplockEnabled", proc_fs_cifs);
470 remove_proc_entry("SecurityFlags", proc_fs_cifs); 467 remove_proc_entry("SecurityFlags", proc_fs_cifs);
471 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 468 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
472 remove_proc_entry("Experimental", proc_fs_cifs);
473 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 469 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
474 remove_proc_entry("fs/cifs", NULL); 470 remove_proc_entry("fs/cifs", NULL);
475} 471}
@@ -550,45 +546,6 @@ static const struct file_operations cifs_oplock_proc_fops = {
550 .write = cifs_oplock_proc_write, 546 .write = cifs_oplock_proc_write,
551}; 547};
552 548
553static int cifs_experimental_proc_show(struct seq_file *m, void *v)
554{
555 seq_printf(m, "%d\n", experimEnabled);
556 return 0;
557}
558
559static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
560{
561 return single_open(file, cifs_experimental_proc_show, NULL);
562}
563
564static ssize_t cifs_experimental_proc_write(struct file *file,
565 const char __user *buffer, size_t count, loff_t *ppos)
566{
567 char c;
568 int rc;
569
570 rc = get_user(c, buffer);
571 if (rc)
572 return rc;
573 if (c == '0' || c == 'n' || c == 'N')
574 experimEnabled = 0;
575 else if (c == '1' || c == 'y' || c == 'Y')
576 experimEnabled = 1;
577 else if (c == '2')
578 experimEnabled = 2;
579
580 return count;
581}
582
583static const struct file_operations cifs_experimental_proc_fops = {
584 .owner = THIS_MODULE,
585 .open = cifs_experimental_proc_open,
586 .read = seq_read,
587 .llseek = seq_lseek,
588 .release = single_release,
589 .write = cifs_experimental_proc_write,
590};
591
592static int cifs_linux_ext_proc_show(struct seq_file *m, void *v) 549static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
593{ 550{
594 seq_printf(m, "%d\n", linuxExtEnabled); 551 seq_printf(m, "%d\n", linuxExtEnabled);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4dfba8283165..33d221394aca 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -113,7 +113,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
113 MAX_MECH_STR_LEN + 113 MAX_MECH_STR_LEN +
114 UID_KEY_LEN + (sizeof(uid_t) * 2) + 114 UID_KEY_LEN + (sizeof(uid_t) * 2) +
115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + 115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
116 USER_KEY_LEN + strlen(sesInfo->userName) + 116 USER_KEY_LEN + strlen(sesInfo->user_name) +
117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; 117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
118 118
119 spnego_key = ERR_PTR(-ENOMEM); 119 spnego_key = ERR_PTR(-ENOMEM);
@@ -153,7 +153,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); 153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
154 154
155 dp = description + strlen(description); 155 dp = description + strlen(description);
156 sprintf(dp, ";user=%s", sesInfo->userName); 156 sprintf(dp, ";user=%s", sesInfo->user_name);
157 157
158 dp = description + strlen(description); 158 dp = description + strlen(description);
159 sprintf(dp, ";pid=0x%x", current->pid); 159 sprintf(dp, ";pid=0x%x", current->pid);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index fc0fd4fde306..23d43cde4306 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -90,7 +90,7 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
90 case UNI_COLON: 90 case UNI_COLON:
91 *target = ':'; 91 *target = ':';
92 break; 92 break;
93 case UNI_ASTERIK: 93 case UNI_ASTERISK:
94 *target = '*'; 94 *target = '*';
95 break; 95 break;
96 case UNI_QUESTION: 96 case UNI_QUESTION:
@@ -264,40 +264,40 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
264 * names are little endian 16 bit Unicode on the wire 264 * names are little endian 16 bit Unicode on the wire
265 */ 265 */
266int 266int
267cifsConvertToUCS(__le16 *target, const char *source, int maxlen, 267cifsConvertToUCS(__le16 *target, const char *source, int srclen,
268 const struct nls_table *cp, int mapChars) 268 const struct nls_table *cp, int mapChars)
269{ 269{
270 int i, j, charlen; 270 int i, j, charlen;
271 int len_remaining = maxlen;
272 char src_char; 271 char src_char;
273 __u16 temp; 272 __le16 dst_char;
273 wchar_t tmp;
274 274
275 if (!mapChars) 275 if (!mapChars)
276 return cifs_strtoUCS(target, source, PATH_MAX, cp); 276 return cifs_strtoUCS(target, source, PATH_MAX, cp);
277 277
278 for (i = 0, j = 0; i < maxlen; j++) { 278 for (i = 0, j = 0; i < srclen; j++) {
279 src_char = source[i]; 279 src_char = source[i];
280 switch (src_char) { 280 switch (src_char) {
281 case 0: 281 case 0:
282 put_unaligned_le16(0, &target[j]); 282 put_unaligned(0, &target[j]);
283 goto ctoUCS_out; 283 goto ctoUCS_out;
284 case ':': 284 case ':':
285 temp = UNI_COLON; 285 dst_char = cpu_to_le16(UNI_COLON);
286 break; 286 break;
287 case '*': 287 case '*':
288 temp = UNI_ASTERIK; 288 dst_char = cpu_to_le16(UNI_ASTERISK);
289 break; 289 break;
290 case '?': 290 case '?':
291 temp = UNI_QUESTION; 291 dst_char = cpu_to_le16(UNI_QUESTION);
292 break; 292 break;
293 case '<': 293 case '<':
294 temp = UNI_LESSTHAN; 294 dst_char = cpu_to_le16(UNI_LESSTHAN);
295 break; 295 break;
296 case '>': 296 case '>':
297 temp = UNI_GRTRTHAN; 297 dst_char = cpu_to_le16(UNI_GRTRTHAN);
298 break; 298 break;
299 case '|': 299 case '|':
300 temp = UNI_PIPE; 300 dst_char = cpu_to_le16(UNI_PIPE);
301 break; 301 break;
302 /* 302 /*
303 * FIXME: We can not handle remapping backslash (UNI_SLASH) 303 * FIXME: We can not handle remapping backslash (UNI_SLASH)
@@ -305,17 +305,17 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
305 * as they use backslash as separator. 305 * as they use backslash as separator.
306 */ 306 */
307 default: 307 default:
308 charlen = cp->char2uni(source+i, len_remaining, 308 charlen = cp->char2uni(source + i, srclen - i, &tmp);
309 &temp); 309 dst_char = cpu_to_le16(tmp);
310
310 /* 311 /*
311 * if no match, use question mark, which at least in 312 * if no match, use question mark, which at least in
312 * some cases serves as wild card 313 * some cases serves as wild card
313 */ 314 */
314 if (charlen < 1) { 315 if (charlen < 1) {
315 temp = 0x003f; 316 dst_char = cpu_to_le16(0x003f);
316 charlen = 1; 317 charlen = 1;
317 } 318 }
318 len_remaining -= charlen;
319 /* 319 /*
320 * character may take more than one byte in the source 320 * character may take more than one byte in the source
321 * string, but will take exactly two bytes in the 321 * string, but will take exactly two bytes in the
@@ -324,9 +324,8 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
324 i += charlen; 324 i += charlen;
325 continue; 325 continue;
326 } 326 }
327 put_unaligned_le16(temp, &target[j]); 327 put_unaligned(dst_char, &target[j]);
328 i++; /* move to next char in source string */ 328 i++; /* move to next char in source string */
329 len_remaining--;
330 } 329 }
331 330
332ctoUCS_out: 331ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 7fe6b52df507..644dd882a560 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -44,7 +44,7 @@
44 * reserved symbols (along with \ and /), otherwise illegal to store 44 * reserved symbols (along with \ and /), otherwise illegal to store
45 * in filenames in NTFS 45 * in filenames in NTFS
46 */ 46 */
47#define UNI_ASTERIK (__u16) ('*' + 0xF000) 47#define UNI_ASTERISK (__u16) ('*' + 0xF000)
48#define UNI_QUESTION (__u16) ('?' + 0xF000) 48#define UNI_QUESTION (__u16) ('?' + 0xF000)
49#define UNI_COLON (__u16) (':' + 0xF000) 49#define UNI_COLON (__u16) (':' + 0xF000)
50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) 50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a51585f9852b..d1a016be73ba 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -30,12 +30,13 @@
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/random.h> 31#include <linux/random.h>
32 32
33/* Calculate and return the CIFS signature based on the mac key and SMB PDU */ 33/*
34/* the 16 byte signature must be allocated by the caller */ 34 * Calculate and return the CIFS signature based on the mac key and SMB PDU.
35/* Note we only use the 1st eight bytes */ 35 * The 16 byte signature must be allocated by the caller. Note we only use the
36/* Note that the smb header signature field on input contains the 36 * 1st eight bytes and that the smb header signature field on input contains
37 sequence number before this function is called */ 37 * the sequence number before this function is called. Also, this function
38 38 * should be called with the server->srv_mutex held.
39 */
39static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 40static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
40 struct TCP_Server_Info *server, char *signature) 41 struct TCP_Server_Info *server, char *signature)
41{ 42{
@@ -209,8 +210,10 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 cpu_to_le32(expected_sequence_number); 210 cpu_to_le32(expected_sequence_number);
210 cifs_pdu->Signature.Sequence.Reserved = 0; 211 cifs_pdu->Signature.Sequence.Reserved = 0;
211 212
213 mutex_lock(&server->srv_mutex);
212 rc = cifs_calculate_signature(cifs_pdu, server, 214 rc = cifs_calculate_signature(cifs_pdu, server,
213 what_we_think_sig_should_be); 215 what_we_think_sig_should_be);
216 mutex_unlock(&server->srv_mutex);
214 217
215 if (rc) 218 if (rc)
216 return rc; 219 return rc;
@@ -469,15 +472,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
469 return rc; 472 return rc;
470 } 473 }
471 474
472 /* convert ses->userName to unicode and uppercase */ 475 /* convert ses->user_name to unicode and uppercase */
473 len = strlen(ses->userName); 476 len = strlen(ses->user_name);
474 user = kmalloc(2 + (len * 2), GFP_KERNEL); 477 user = kmalloc(2 + (len * 2), GFP_KERNEL);
475 if (user == NULL) { 478 if (user == NULL) {
476 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); 479 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
477 rc = -ENOMEM; 480 rc = -ENOMEM;
478 goto calc_exit_2; 481 goto calc_exit_2;
479 } 482 }
480 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 483 len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp);
481 UniStrupr(user); 484 UniStrupr(user);
482 485
483 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, 486 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f2970136d17d..5c412b33cd7c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -53,7 +53,6 @@ int cifsFYI = 0;
53int cifsERROR = 1; 53int cifsERROR = 1;
54int traceSMB = 0; 54int traceSMB = 0;
55unsigned int oplockEnabled = 1; 55unsigned int oplockEnabled = 1;
56unsigned int experimEnabled = 0;
57unsigned int linuxExtEnabled = 1; 56unsigned int linuxExtEnabled = 1;
58unsigned int lookupCacheEnabled = 1; 57unsigned int lookupCacheEnabled = 1;
59unsigned int multiuser_mount = 0; 58unsigned int multiuser_mount = 0;
@@ -127,6 +126,7 @@ cifs_read_super(struct super_block *sb, void *data,
127 kfree(cifs_sb); 126 kfree(cifs_sb);
128 return rc; 127 return rc;
129 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131#ifdef CONFIG_CIFS_DFS_UPCALL
132 /* copy mount params to sb for use in submounts */ 132 /* copy mount params to sb for use in submounts */
@@ -409,8 +409,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
409 409
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
411 seq_printf(s, ",multiuser"); 411 seq_printf(s, ",multiuser");
412 else if (tcon->ses->userName) 412 else if (tcon->ses->user_name)
413 seq_printf(s, ",username=%s", tcon->ses->userName); 413 seq_printf(s, ",username=%s", tcon->ses->user_name);
414 414
415 if (tcon->ses->domainName) 415 if (tcon->ses->domainName)
416 seq_printf(s, ",domain=%s", tcon->ses->domainName); 416 seq_printf(s, ",domain=%s", tcon->ses->domainName);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 17afb0fbcaed..a5d1106fcbde 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -37,10 +37,9 @@
37 37
38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) 38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
39#define MAX_SERVER_SIZE 15 39#define MAX_SERVER_SIZE 15
40#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 40#define MAX_SHARE_SIZE 80
41#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 41#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
42 termination then *2 for unicode versions */ 42#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
43#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
44 43
45#define CIFS_MIN_RCV_POOL 4 44#define CIFS_MIN_RCV_POOL 4
46 45
@@ -92,7 +91,8 @@ enum statusEnum {
92 CifsNew = 0, 91 CifsNew = 0,
93 CifsGood, 92 CifsGood,
94 CifsExiting, 93 CifsExiting,
95 CifsNeedReconnect 94 CifsNeedReconnect,
95 CifsNeedNegotiate
96}; 96};
97 97
98enum securityEnum { 98enum securityEnum {
@@ -274,7 +274,7 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char userName[MAX_USERNAME_SIZE + 1]; 277 char *user_name;
278 char *domainName; 278 char *domainName;
279 char *password; 279 char *password;
280 struct session_key auth_key; 280 struct session_key auth_key;
@@ -817,7 +817,6 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
817 have the uid/password or Kerberos credential 817 have the uid/password or Kerberos credential
818 or equivalent for current user */ 818 or equivalent for current user */
819GLOBAL_EXTERN unsigned int oplockEnabled; 819GLOBAL_EXTERN unsigned int oplockEnabled;
820GLOBAL_EXTERN unsigned int experimEnabled;
821GLOBAL_EXTERN unsigned int lookupCacheEnabled; 820GLOBAL_EXTERN unsigned int lookupCacheEnabled;
822GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent 821GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
823 with more secure ntlmssp2 challenge/resp */ 822 with more secure ntlmssp2 challenge/resp */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2644a5d6cc67..df959bae6728 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -142,9 +142,9 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
142 */ 142 */
143 while (server->tcpStatus == CifsNeedReconnect) { 143 while (server->tcpStatus == CifsNeedReconnect) {
144 wait_event_interruptible_timeout(server->response_q, 144 wait_event_interruptible_timeout(server->response_q,
145 (server->tcpStatus == CifsGood), 10 * HZ); 145 (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
146 146
147 /* is TCP session is reestablished now ?*/ 147 /* are we still trying to reconnect? */
148 if (server->tcpStatus != CifsNeedReconnect) 148 if (server->tcpStatus != CifsNeedReconnect)
149 break; 149 break;
150 150
@@ -729,7 +729,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
729 return rc; 729 return rc;
730 730
731 /* set up echo request */ 731 /* set up echo request */
732 smb->hdr.Tid = cpu_to_le16(0xffff); 732 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 733 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 734 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 735 put_bcc_le(1, &smb->hdr);
@@ -1884,10 +1884,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1884 __constant_cpu_to_le16(CIFS_WRLCK)) 1884 __constant_cpu_to_le16(CIFS_WRLCK))
1885 pLockData->fl_type = F_WRLCK; 1885 pLockData->fl_type = F_WRLCK;
1886 1886
1887 pLockData->fl_start = parm_data->start; 1887 pLockData->fl_start = le64_to_cpu(parm_data->start);
1888 pLockData->fl_end = parm_data->start + 1888 pLockData->fl_end = pLockData->fl_start +
1889 parm_data->length - 1; 1889 le64_to_cpu(parm_data->length) - 1;
1890 pLockData->fl_pid = parm_data->pid; 1890 pLockData->fl_pid = le32_to_cpu(parm_data->pid);
1891 } 1891 }
1892 } 1892 }
1893 1893
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6e2b2addfc78..db9d55b507d0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -199,8 +199,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
199 } 199 }
200 spin_unlock(&GlobalMid_Lock); 200 spin_unlock(&GlobalMid_Lock);
201 201
202 while ((server->tcpStatus != CifsExiting) && 202 while (server->tcpStatus == CifsNeedReconnect) {
203 (server->tcpStatus != CifsGood)) {
204 try_to_freeze(); 203 try_to_freeze();
205 204
206 /* we should try only the port we connected to before */ 205 /* we should try only the port we connected to before */
@@ -212,7 +211,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
212 atomic_inc(&tcpSesReconnectCount); 211 atomic_inc(&tcpSesReconnectCount);
213 spin_lock(&GlobalMid_Lock); 212 spin_lock(&GlobalMid_Lock);
214 if (server->tcpStatus != CifsExiting) 213 if (server->tcpStatus != CifsExiting)
215 server->tcpStatus = CifsGood; 214 server->tcpStatus = CifsNeedNegotiate;
216 spin_unlock(&GlobalMid_Lock); 215 spin_unlock(&GlobalMid_Lock);
217 } 216 }
218 } 217 }
@@ -248,24 +247,24 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
248 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); 247 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
249 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); 248 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
250 249
251 remaining = total_data_size - data_in_this_rsp; 250 if (total_data_size == data_in_this_rsp)
252
253 if (remaining == 0)
254 return 0; 251 return 0;
255 else if (remaining < 0) { 252 else if (total_data_size < data_in_this_rsp) {
256 cFYI(1, "total data %d smaller than data in frame %d", 253 cFYI(1, "total data %d smaller than data in frame %d",
257 total_data_size, data_in_this_rsp); 254 total_data_size, data_in_this_rsp);
258 return -EINVAL; 255 return -EINVAL;
259 } else {
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
268 } 256 }
257
258 remaining = total_data_size - data_in_this_rsp;
259
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
269} 268}
270 269
271static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) 270static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
@@ -421,7 +420,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
421 pdu_length = 4; /* enough to get RFC1001 header */ 420 pdu_length = 4; /* enough to get RFC1001 header */
422 421
423incomplete_rcv: 422incomplete_rcv:
424 if (echo_retries > 0 && 423 if (echo_retries > 0 && server->tcpStatus == CifsGood &&
425 time_after(jiffies, server->lstrp + 424 time_after(jiffies, server->lstrp +
426 (echo_retries * SMB_ECHO_INTERVAL))) { 425 (echo_retries * SMB_ECHO_INTERVAL))) {
427 cERROR(1, "Server %s has not responded in %d seconds. " 426 cERROR(1, "Server %s has not responded in %d seconds. "
@@ -881,7 +880,8 @@ cifs_parse_mount_options(char *options, const char *devname,
881 /* null user, ie anonymous, authentication */ 880 /* null user, ie anonymous, authentication */
882 vol->nullauth = 1; 881 vol->nullauth = 1;
883 } 882 }
884 if (strnlen(value, 200) < 200) { 883 if (strnlen(value, MAX_USERNAME_SIZE) <
884 MAX_USERNAME_SIZE) {
885 vol->username = value; 885 vol->username = value;
886 } else { 886 } else {
887 printk(KERN_WARNING "CIFS: username too long\n"); 887 printk(KERN_WARNING "CIFS: username too long\n");
@@ -1472,7 +1472,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1472static bool 1472static bool
1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr) 1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
1474{ 1474{
1475 unsigned short int port, *sport; 1475 __be16 port, *sport;
1476 1476
1477 switch (addr->sa_family) { 1477 switch (addr->sa_family) {
1478 case AF_INET: 1478 case AF_INET:
@@ -1765,6 +1765,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1765 module_put(THIS_MODULE); 1765 module_put(THIS_MODULE);
1766 goto out_err_crypto_release; 1766 goto out_err_crypto_release;
1767 } 1767 }
1768 tcp_ses->tcpStatus = CifsNeedNegotiate;
1768 1769
1769 /* thread spawned, put it on the list */ 1770 /* thread spawned, put it on the list */
1770 spin_lock(&cifs_tcp_ses_lock); 1771 spin_lock(&cifs_tcp_ses_lock);
@@ -1808,7 +1809,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1808 break; 1809 break;
1809 default: 1810 default:
1810 /* anything else takes username/password */ 1811 /* anything else takes username/password */
1811 if (strncmp(ses->userName, vol->username, 1812 if (ses->user_name == NULL)
1813 continue;
1814 if (strncmp(ses->user_name, vol->username,
1812 MAX_USERNAME_SIZE)) 1815 MAX_USERNAME_SIZE))
1813 continue; 1816 continue;
1814 if (strlen(vol->username) != 0 && 1817 if (strlen(vol->username) != 0 &&
@@ -1851,6 +1854,8 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1851 cifs_put_tcp_session(server); 1854 cifs_put_tcp_session(server);
1852} 1855}
1853 1856
1857static bool warned_on_ntlm; /* globals init to false automatically */
1858
1854static struct cifsSesInfo * 1859static struct cifsSesInfo *
1855cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) 1860cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1856{ 1861{
@@ -1906,9 +1911,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1906 else 1911 else
1907 sprintf(ses->serverName, "%pI4", &addr->sin_addr); 1912 sprintf(ses->serverName, "%pI4", &addr->sin_addr);
1908 1913
1909 if (volume_info->username) 1914 if (volume_info->username) {
1910 strncpy(ses->userName, volume_info->username, 1915 ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
1911 MAX_USERNAME_SIZE); 1916 if (!ses->user_name)
1917 goto get_ses_fail;
1918 }
1912 1919
1913 /* volume_info->password freed at unmount */ 1920 /* volume_info->password freed at unmount */
1914 if (volume_info->password) { 1921 if (volume_info->password) {
@@ -1923,6 +1930,15 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1923 } 1930 }
1924 ses->cred_uid = volume_info->cred_uid; 1931 ses->cred_uid = volume_info->cred_uid;
1925 ses->linux_uid = volume_info->linux_uid; 1932 ses->linux_uid = volume_info->linux_uid;
1933
1934 /* ntlmv2 is much stronger than ntlm security, and has been broadly
1935 supported for many years, time to update default security mechanism */
1936 if ((volume_info->secFlg == 0) && warned_on_ntlm == false) {
1937 warned_on_ntlm = true;
1938 cERROR(1, "default security mechanism requested. The default "
1939 "security mechanism will be upgraded from ntlm to "
1940 "ntlmv2 in kernel release 2.6.41");
1941 }
1926 ses->overrideSecFlg = volume_info->secFlg; 1942 ses->overrideSecFlg = volume_info->secFlg;
1927 1943
1928 mutex_lock(&ses->session_mutex); 1944 mutex_lock(&ses->session_mutex);
@@ -2276,7 +2292,7 @@ static int
2276generic_ip_connect(struct TCP_Server_Info *server) 2292generic_ip_connect(struct TCP_Server_Info *server)
2277{ 2293{
2278 int rc = 0; 2294 int rc = 0;
2279 unsigned short int sport; 2295 __be16 sport;
2280 int slen, sfamily; 2296 int slen, sfamily;
2281 struct socket *socket = server->ssocket; 2297 struct socket *socket = server->ssocket;
2282 struct sockaddr *saddr; 2298 struct sockaddr *saddr;
@@ -2361,7 +2377,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
2361static int 2377static int
2362ip_connect(struct TCP_Server_Info *server) 2378ip_connect(struct TCP_Server_Info *server)
2363{ 2379{
2364 unsigned short int *sport; 2380 __be16 *sport;
2365 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; 2381 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
2366 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; 2382 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
2367 2383
@@ -2826,7 +2842,7 @@ try_mount_again:
2826 2842
2827remote_path_check: 2843remote_path_check:
2828 /* check if a whole path (including prepath) is not remote */ 2844 /* check if a whole path (including prepath) is not remote */
2829 if (!rc && cifs_sb->prepathlen && tcon) { 2845 if (!rc && tcon) {
2830 /* build_path_to_root works only when we have a valid tcon */ 2846 /* build_path_to_root works only when we have a valid tcon */
2831 full_path = cifs_build_path_to_root(cifs_sb, tcon); 2847 full_path = cifs_build_path_to_root(cifs_sb, tcon);
2832 if (full_path == NULL) { 2848 if (full_path == NULL) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c27d236738fc..faf59529e847 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -575,8 +575,10 @@ reopen_error_exit:
575 575
576int cifs_close(struct inode *inode, struct file *file) 576int cifs_close(struct inode *inode, struct file *file)
577{ 577{
578 cifsFileInfo_put(file->private_data); 578 if (file->private_data != NULL) {
579 file->private_data = NULL; 579 cifsFileInfo_put(file->private_data);
580 file->private_data = NULL;
581 }
580 582
581 /* return code from the ->release op is always ignored */ 583 /* return code from the ->release op is always ignored */
582 return 0; 584 return 0;
@@ -970,6 +972,9 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
970 total_written += bytes_written) { 972 total_written += bytes_written) {
971 rc = -EAGAIN; 973 rc = -EAGAIN;
972 while (rc == -EAGAIN) { 974 while (rc == -EAGAIN) {
975 struct kvec iov[2];
976 unsigned int len;
977
973 if (open_file->invalidHandle) { 978 if (open_file->invalidHandle) {
974 /* we could deadlock if we called 979 /* we could deadlock if we called
975 filemap_fdatawait from here so tell 980 filemap_fdatawait from here so tell
@@ -979,31 +984,14 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
979 if (rc != 0) 984 if (rc != 0)
980 break; 985 break;
981 } 986 }
982 if (experimEnabled || (pTcon->ses->server && 987
983 ((pTcon->ses->server->secMode & 988 len = min((size_t)cifs_sb->wsize,
984 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 989 write_size - total_written);
985 == 0))) { 990 /* iov[0] is reserved for smb header */
986 struct kvec iov[2]; 991 iov[1].iov_base = (char *)write_data + total_written;
987 unsigned int len; 992 iov[1].iov_len = len;
988 993 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len,
989 len = min((size_t)cifs_sb->wsize, 994 *poffset, &bytes_written, iov, 1, 0);
990 write_size - total_written);
991 /* iov[0] is reserved for smb header */
992 iov[1].iov_base = (char *)write_data +
993 total_written;
994 iov[1].iov_len = len;
995 rc = CIFSSMBWrite2(xid, pTcon,
996 open_file->netfid, len,
997 *poffset, &bytes_written,
998 iov, 1, 0);
999 } else
1000 rc = CIFSSMBWrite(xid, pTcon,
1001 open_file->netfid,
1002 min_t(const int, cifs_sb->wsize,
1003 write_size - total_written),
1004 *poffset, &bytes_written,
1005 write_data + total_written,
1006 NULL, 0);
1007 } 995 }
1008 if (rc || (bytes_written == 0)) { 996 if (rc || (bytes_written == 0)) {
1009 if (total_written) 997 if (total_written)
@@ -1240,12 +1228,6 @@ static int cifs_writepages(struct address_space *mapping,
1240 } 1228 }
1241 1229
1242 tcon = tlink_tcon(open_file->tlink); 1230 tcon = tlink_tcon(open_file->tlink);
1243 if (!experimEnabled && tcon->ses->server->secMode &
1244 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1245 cifsFileInfo_put(open_file);
1246 kfree(iov);
1247 return generic_writepages(mapping, wbc);
1248 }
1249 cifsFileInfo_put(open_file); 1231 cifsFileInfo_put(open_file);
1250 1232
1251 xid = GetXid(); 1233 xid = GetXid();
@@ -1980,6 +1962,24 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1980 return total_read; 1962 return total_read;
1981} 1963}
1982 1964
1965/*
1966 * If the page is mmap'ed into a process' page tables, then we need to make
1967 * sure that it doesn't change while being written back.
1968 */
1969static int
1970cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1971{
1972 struct page *page = vmf->page;
1973
1974 lock_page(page);
1975 return VM_FAULT_LOCKED;
1976}
1977
1978static struct vm_operations_struct cifs_file_vm_ops = {
1979 .fault = filemap_fault,
1980 .page_mkwrite = cifs_page_mkwrite,
1981};
1982
1983int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 1983int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1984{ 1984{
1985 int rc, xid; 1985 int rc, xid;
@@ -1991,6 +1991,8 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1991 cifs_invalidate_mapping(inode); 1991 cifs_invalidate_mapping(inode);
1992 1992
1993 rc = generic_file_mmap(file, vma); 1993 rc = generic_file_mmap(file, vma);
1994 if (rc == 0)
1995 vma->vm_ops = &cifs_file_vm_ops;
1994 FreeXid(xid); 1996 FreeXid(xid);
1995 return rc; 1997 return rc;
1996} 1998}
@@ -2007,6 +2009,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2007 return rc; 2009 return rc;
2008 } 2010 }
2009 rc = generic_file_mmap(file, vma); 2011 rc = generic_file_mmap(file, vma);
2012 if (rc == 0)
2013 vma->vm_ops = &cifs_file_vm_ops;
2010 FreeXid(xid); 2014 FreeXid(xid);
2011 return rc; 2015 return rc;
2012} 2016}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e8804d373404..ce417a9764a3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -239,7 +239,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
239 if (rc != 0) 239 if (rc != 0)
240 return rc; 240 return rc;
241 241
242 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 242 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
243 CIFSSMBClose(xid, tcon, netfid); 243 CIFSSMBClose(xid, tcon, netfid);
244 /* it's not a symlink */ 244 /* it's not a symlink */
245 return -EINVAL; 245 return -EINVAL;
@@ -316,7 +316,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
316 if (rc != 0) 316 if (rc != 0)
317 goto out; 317 goto out;
318 318
319 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 319 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
320 CIFSSMBClose(xid, pTcon, netfid); 320 CIFSSMBClose(xid, pTcon, netfid);
321 /* it's not a symlink */ 321 /* it's not a symlink */
322 goto out; 322 goto out;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a930a752a78..0c684ae4c071 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -100,6 +100,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
100 memset(buf_to_free->password, 0, strlen(buf_to_free->password)); 100 memset(buf_to_free->password, 0, strlen(buf_to_free->password));
101 kfree(buf_to_free->password); 101 kfree(buf_to_free->password);
102 } 102 }
103 kfree(buf_to_free->user_name);
103 kfree(buf_to_free->domainName); 104 kfree(buf_to_free->domainName);
104 kfree(buf_to_free); 105 kfree(buf_to_free);
105} 106}
@@ -520,7 +521,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
520 (struct smb_com_transaction_change_notify_rsp *)buf; 521 (struct smb_com_transaction_change_notify_rsp *)buf;
521 struct file_notify_information *pnotify; 522 struct file_notify_information *pnotify;
522 __u32 data_offset = 0; 523 __u32 data_offset = 0;
523 if (pSMBr->ByteCount > sizeof(struct file_notify_information)) { 524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) {
524 data_offset = le32_to_cpu(pSMBr->DataOffset); 525 data_offset = le32_to_cpu(pSMBr->DataOffset);
525 526
526 pnotify = (struct file_notify_information *) 527 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 16765703131b..f6728eb6f4b9 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -219,12 +219,12 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
219 bcc_ptr++; 219 bcc_ptr++;
220 } */ 220 } */
221 /* copy user */ 221 /* copy user */
222 if (ses->userName == NULL) { 222 if (ses->user_name == NULL) {
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name,
228 MAX_USERNAME_SIZE, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
@@ -244,12 +244,11 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
244 /* copy user */ 244 /* copy user */
245 /* BB what about null user mounts - check that we do this BB */ 245 /* BB what about null user mounts - check that we do this BB */
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->user_name != NULL)
248 /* BB what about null user mounts - check that we do this BB */ 248 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
249 } else { 249 /* else null user mount */
250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE); 250
251 } 251 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 *bcc_ptr = 0; 252 *bcc_ptr = 0;
254 bcc_ptr++; /* account for null termination */ 253 bcc_ptr++; /* account for null termination */
255 254
@@ -405,8 +404,8 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 404 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
406 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 405 we must set the MIC field of the AUTHENTICATE_MESSAGE */
407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); 406 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 407 tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 408 tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
410 if (tilen) { 409 if (tilen) {
411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); 410 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
412 if (!ses->auth_key.response) { 411 if (!ses->auth_key.response) {
@@ -523,14 +522,14 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
523 tmp += len; 522 tmp += len;
524 } 523 }
525 524
526 if (ses->userName == NULL) { 525 if (ses->user_name == NULL) {
527 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 526 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
528 sec_blob->UserName.Length = 0; 527 sec_blob->UserName.Length = 0;
529 sec_blob->UserName.MaximumLength = 0; 528 sec_blob->UserName.MaximumLength = 0;
530 tmp += 2; 529 tmp += 2;
531 } else { 530 } else {
532 int len; 531 int len;
533 len = cifs_strtoUCS((__le16 *)tmp, ses->userName, 532 len = cifs_strtoUCS((__le16 *)tmp, ses->user_name,
534 MAX_USERNAME_SIZE, nls_cp); 533 MAX_USERNAME_SIZE, nls_cp);
535 len *= 2; /* unicode is 2 bytes each */ 534 len *= 2; /* unicode is 2 bytes each */
536 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 535 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
diff --git a/fs/dcache.c b/fs/dcache.c
index ad25c4cec7d5..129a35730994 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2131,7 +2131,7 @@ EXPORT_SYMBOL(d_rehash);
2131 */ 2131 */
2132void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2132void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2133{ 2133{
2134 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2134 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2136 2136
2137 spin_lock(&dentry->d_lock); 2137 spin_lock(&dentry->d_lock);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b5c2f3c97d71..68b2e43d7c35 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3291,7 +3291,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3291 if (ext3_should_journal_data(inode)) 3291 if (ext3_should_journal_data(inode))
3292 ret = 3 * (bpp + indirects) + 2; 3292 ret = 3 * (bpp + indirects) + 2;
3293 else 3293 else
3294 ret = 2 * (bpp + indirects) + 2; 3294 ret = 2 * (bpp + indirects) + indirects + 2;
3295 3295
3296#ifdef CONFIG_QUOTA 3296#ifdef CONFIG_QUOTA
3297 /* We know that structure was already allocated during dquot_initialize so 3297 /* We know that structure was already allocated during dquot_initialize so
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index e25e99bf7ee1..d0f53538a57f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,8 +86,8 @@
86 86
87#ifdef CONFIG_QUOTA 87#ifdef CONFIG_QUOTA
88/* Amount of blocks needed for quota update - we know that the structure was 88/* Amount of blocks needed for quota update - we know that the structure was
89 * allocated so we need to update only inode+data */ 89 * allocated so we need to update only data block */
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) 90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
91/* Amount of blocks needed for quota insert/delete - we do some block writes 91/* Amount of blocks needed for quota insert/delete - we do some block writes
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 4673bc05274f..e9473cbe80df 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)
125 * the parent directory's parent as well, and so on recursively, if 125 * the parent directory's parent as well, and so on recursively, if
126 * they are also freshly created. 126 * they are also freshly created.
127 */ 127 */
128static void ext4_sync_parent(struct inode *inode) 128static int ext4_sync_parent(struct inode *inode)
129{ 129{
130 struct writeback_control wbc;
130 struct dentry *dentry = NULL; 131 struct dentry *dentry = NULL;
132 int ret = 0;
131 133
132 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 134 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
133 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 135 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)
136 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 138 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
137 break; 139 break;
138 inode = dentry->d_parent->d_inode; 140 inode = dentry->d_parent->d_inode;
139 sync_mapping_buffers(inode->i_mapping); 141 ret = sync_mapping_buffers(inode->i_mapping);
142 if (ret)
143 break;
144 memset(&wbc, 0, sizeof(wbc));
145 wbc.sync_mode = WB_SYNC_ALL;
146 wbc.nr_to_write = 0; /* only write out the inode */
147 ret = sync_inode(inode, &wbc);
148 if (ret)
149 break;
140 } 150 }
151 return ret;
141} 152}
142 153
143/* 154/*
@@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync)
176 if (!journal) { 187 if (!journal) {
177 ret = generic_file_fsync(file, datasync); 188 ret = generic_file_fsync(file, datasync);
178 if (!ret && !list_empty(&inode->i_dentry)) 189 if (!ret && !list_empty(&inode->i_dentry))
179 ext4_sync_parent(inode); 190 ret = ext4_sync_parent(inode);
180 goto out; 191 goto out;
181 } 192 }
182 193
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ad8e303c0d29..f2fa5e8a582c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2502 * for partial write. 2502 * for partial write.
2503 */ 2503 */
2504 set_buffer_new(bh); 2504 set_buffer_new(bh);
2505 set_buffer_mapped(bh);
2505 } 2506 }
2506 return 0; 2507 return 0;
2507} 2508}
@@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode)
4429 Indirect chain[4]; 4430 Indirect chain[4];
4430 Indirect *partial; 4431 Indirect *partial;
4431 __le32 nr = 0; 4432 __le32 nr = 0;
4432 int n; 4433 int n = 0;
4433 ext4_lblk_t last_block; 4434 ext4_lblk_t last_block, max_block;
4434 unsigned blocksize = inode->i_sb->s_blocksize; 4435 unsigned blocksize = inode->i_sb->s_blocksize;
4435 4436
4436 trace_ext4_truncate_enter(inode); 4437 trace_ext4_truncate_enter(inode);
@@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
4455 4456
4456 last_block = (inode->i_size + blocksize-1) 4457 last_block = (inode->i_size + blocksize-1)
4457 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4458 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4459 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
4460 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4458 4461
4459 if (inode->i_size & (blocksize - 1)) 4462 if (inode->i_size & (blocksize - 1))
4460 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 4463 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
4461 goto out_stop; 4464 goto out_stop;
4462 4465
4463 n = ext4_block_to_path(inode, last_block, offsets, NULL); 4466 if (last_block != max_block) {
4464 if (n == 0) 4467 n = ext4_block_to_path(inode, last_block, offsets, NULL);
4465 goto out_stop; /* error */ 4468 if (n == 0)
4469 goto out_stop; /* error */
4470 }
4466 4471
4467 /* 4472 /*
4468 * OK. This truncate is going to happen. We add the inode to the 4473 * OK. This truncate is going to happen. We add the inode to the
@@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
4493 */ 4498 */
4494 ei->i_disksize = inode->i_size; 4499 ei->i_disksize = inode->i_size;
4495 4500
4496 if (n == 1) { /* direct blocks */ 4501 if (last_block == max_block) {
4502 /*
4503 * It is unnecessary to free any data blocks if last_block is
4504 * equal to the indirect block limit.
4505 */
4506 goto out_unlock;
4507 } else if (n == 1) { /* direct blocks */
4497 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 4508 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
4498 i_data + EXT4_NDIR_BLOCKS); 4509 i_data + EXT4_NDIR_BLOCKS);
4499 goto do_indirects; 4510 goto do_indirects;
@@ -4553,6 +4564,7 @@ do_indirects:
4553 ; 4564 ;
4554 } 4565 }
4555 4566
4567out_unlock:
4556 up_write(&ei->i_data_sem); 4568 up_write(&ei->i_data_sem);
4557 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4569 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4558 ext4_mark_inode_dirty(handle, inode); 4570 ext4_mark_inode_dirty(handle, inode);
@@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
5398 /* if nrblocks are contiguous */ 5410 /* if nrblocks are contiguous */
5399 if (chunk) { 5411 if (chunk) {
5400 /* 5412 /*
5401 * With N contiguous data blocks, it need at most 5413 * With N contiguous data blocks, we need at most
5402 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks 5414 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
5403 * 2 dindirect blocks 5415 * 2 dindirect blocks, and 1 tindirect block
5404 * 1 tindirect block
5405 */ 5416 */
5406 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); 5417 return DIV_ROUND_UP(nrblocks,
5407 return indirects + 3; 5418 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
5408 } 5419 }
5409 /* 5420 /*
5410 * if nrblocks are not contiguous, worse case, each block touch 5421 * if nrblocks are not contiguous, worse case, each block touch
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 056474b7b8e0..8553dfb310af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
242 * journal_end calls result in the superblock being marked dirty, so 242 * journal_end calls result in the superblock being marked dirty, so
243 * that sync() will call the filesystem's write_super callback if 243 * that sync() will call the filesystem's write_super callback if
244 * appropriate. 244 * appropriate.
245 *
246 * To avoid j_barrier hold in userspace when a user calls freeze(),
247 * ext4 prevents a new handle from being started by s_frozen, which
248 * is in an upper layer.
245 */ 249 */
246handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 250handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
247{ 251{
248 journal_t *journal; 252 journal_t *journal;
253 handle_t *handle;
249 254
250 if (sb->s_flags & MS_RDONLY) 255 if (sb->s_flags & MS_RDONLY)
251 return ERR_PTR(-EROFS); 256 return ERR_PTR(-EROFS);
252 257
253 vfs_check_frozen(sb, SB_FREEZE_TRANS);
254 /* Special case here: if the journal has aborted behind our
255 * backs (eg. EIO in the commit thread), then we still need to
256 * take the FS itself readonly cleanly. */
257 journal = EXT4_SB(sb)->s_journal; 258 journal = EXT4_SB(sb)->s_journal;
258 if (journal) { 259 handle = ext4_journal_current_handle();
259 if (is_journal_aborted(journal)) { 260
260 ext4_abort(sb, "Detected aborted journal"); 261 /*
261 return ERR_PTR(-EROFS); 262 * If a handle has been started, it should be allowed to
262 } 263 * finish, otherwise deadlock could happen between freeze
263 return jbd2_journal_start(journal, nblocks); 264 * and others(e.g. truncate) due to the restart of the
265 * journal handle if the filesystem is forzen and active
266 * handles are not stopped.
267 */
268 if (!handle)
269 vfs_check_frozen(sb, SB_FREEZE_TRANS);
270
271 if (!journal)
272 return ext4_get_nojournal();
273 /*
274 * Special case here: if the journal has aborted behind our
275 * backs (eg. EIO in the commit thread), then we still need to
276 * take the FS itself readonly cleanly.
277 */
278 if (is_journal_aborted(journal)) {
279 ext4_abort(sb, "Detected aborted journal");
280 return ERR_PTR(-EROFS);
264 } 281 }
265 return ext4_get_nojournal(); 282 return jbd2_journal_start(journal, nblocks);
266} 283}
267 284
268/* 285/*
@@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
2975 mutex_unlock(&ext4_li_info->li_list_mtx); 2992 mutex_unlock(&ext4_li_info->li_list_mtx);
2976 2993
2977 sbi->s_li_request = elr; 2994 sbi->s_li_request = elr;
2995 /*
2996 * set elr to NULL here since it has been inserted to
2997 * the request_list and the removal and free of it is
2998 * handled by ext4_clear_request_list from now on.
2999 */
3000 elr = NULL;
2978 3001
2979 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3002 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2980 ret = ext4_run_lazyinit_thread(); 3003 ret = ext4_run_lazyinit_thread();
@@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3385 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3408 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3386 spin_lock_init(&sbi->s_next_gen_lock); 3409 spin_lock_init(&sbi->s_next_gen_lock);
3387 3410
3411 init_timer(&sbi->s_err_report);
3412 sbi->s_err_report.function = print_daily_error_info;
3413 sbi->s_err_report.data = (unsigned long) sb;
3414
3388 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3415 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3389 ext4_count_free_blocks(sb)); 3416 ext4_count_free_blocks(sb));
3390 if (!err) { 3417 if (!err) {
@@ -3646,9 +3673,6 @@ no_journal:
3646 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3673 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3647 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3674 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3648 3675
3649 init_timer(&sbi->s_err_report);
3650 sbi->s_err_report.function = print_daily_error_info;
3651 sbi->s_err_report.data = (unsigned long) sb;
3652 if (es->s_error_count) 3676 if (es->s_error_count)
3653 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3677 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3654 3678
@@ -3672,6 +3696,7 @@ failed_mount_wq:
3672 sbi->s_journal = NULL; 3696 sbi->s_journal = NULL;
3673 } 3697 }
3674failed_mount3: 3698failed_mount3:
3699 del_timer(&sbi->s_err_report);
3675 if (sbi->s_flex_groups) { 3700 if (sbi->s_flex_groups) {
3676 if (is_vmalloc_addr(sbi->s_flex_groups)) 3701 if (is_vmalloc_addr(sbi->s_flex_groups))
3677 vfree(sbi->s_flex_groups); 3702 vfree(sbi->s_flex_groups);
@@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4138/* 4163/*
4139 * LVM calls this function before a (read-only) snapshot is created. This 4164 * LVM calls this function before a (read-only) snapshot is created. This
4140 * gives us a chance to flush the journal completely and mark the fs clean. 4165 * gives us a chance to flush the journal completely and mark the fs clean.
4166 *
4167 * Note that only this function cannot bring a filesystem to be in a clean
4168 * state independently, because ext4 prevents a new handle from being started
4169 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
4170 * the upper layer.
4141 */ 4171 */
4142static int ext4_freeze(struct super_block *sb) 4172static int ext4_freeze(struct super_block *sb)
4143{ 4173{
@@ -4614,11 +4644,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4614 4644
4615static int ext4_quota_off(struct super_block *sb, int type) 4645static int ext4_quota_off(struct super_block *sb, int type)
4616{ 4646{
4647 struct inode *inode = sb_dqopt(sb)->files[type];
4648 handle_t *handle;
4649
4617 /* Force all delayed allocation blocks to be allocated. 4650 /* Force all delayed allocation blocks to be allocated.
4618 * Caller already holds s_umount sem */ 4651 * Caller already holds s_umount sem */
4619 if (test_opt(sb, DELALLOC)) 4652 if (test_opt(sb, DELALLOC))
4620 sync_filesystem(sb); 4653 sync_filesystem(sb);
4621 4654
4655 /* Update modification times of quota files when userspace can
4656 * start looking at them */
4657 handle = ext4_journal_start(inode, 1);
4658 if (IS_ERR(handle))
4659 goto out;
4660 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4661 ext4_mark_inode_dirty(handle, inode);
4662 ext4_journal_stop(handle);
4663
4664out:
4622 return dquot_quota_off(sb, type); 4665 return dquot_quota_off(sb, type);
4623} 4666}
4624 4667
@@ -4714,9 +4757,8 @@ out:
4714 if (inode->i_size < off + len) { 4757 if (inode->i_size < off + len) {
4715 i_size_write(inode, off + len); 4758 i_size_write(inode, off + len);
4716 EXT4_I(inode)->i_disksize = inode->i_size; 4759 EXT4_I(inode)->i_disksize = inode->i_size;
4760 ext4_mark_inode_dirty(handle, inode);
4717 } 4761 }
4718 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4719 ext4_mark_inode_dirty(handle, inode);
4720 mutex_unlock(&inode->i_mutex); 4762 mutex_unlock(&inode->i_mutex);
4721 return len; 4763 return len;
4722} 4764}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index bf93ad2bee07..6b088641f5bf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -7,6 +7,7 @@
7#include <linux/exportfs.h> 7#include <linux/exportfs.h>
8#include <linux/fs_struct.h> 8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h> 9#include <linux/fsnotify.h>
10#include <linux/personality.h>
10#include <asm/uaccess.h> 11#include <asm/uaccess.h>
11#include "internal.h" 12#include "internal.h"
12 13
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 751d6b255a12..0845f84f2a5f 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs)
110 *tmp = fs->next; 110 *tmp = fs->next;
111 fs->next = NULL; 111 fs->next = NULL;
112 write_unlock(&file_systems_lock); 112 write_unlock(&file_systems_lock);
113 synchronize_rcu();
113 return 0; 114 return 0;
114 } 115 }
115 tmp = &(*tmp)->next; 116 tmp = &(*tmp)->next;
116 } 117 }
117 write_unlock(&file_systems_lock); 118 write_unlock(&file_systems_lock);
118 119
119 synchronize_rcu();
120
121 return -EINVAL; 120 return -EINVAL;
122} 121}
123 122
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index c71995b111bf..0f5c4f9d5d62 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -884,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
884 } 884 }
885 885
886 brelse(dibh); 886 brelse(dibh);
887 gfs2_trans_end(sdp);
888failed: 887failed:
888 gfs2_trans_end(sdp);
889 if (al) { 889 if (al) {
890 gfs2_inplace_release(ip); 890 gfs2_inplace_release(ip);
891 gfs2_quota_unlock(ip); 891 gfs2_quota_unlock(ip);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5c356d09c321..f789c5732b7c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1506 inode = gfs2_inode_lookup(dir->i_sb, 1506 inode = gfs2_inode_lookup(dir->i_sb,
1507 be16_to_cpu(dent->de_type), 1507 be16_to_cpu(dent->de_type),
1508 be64_to_cpu(dent->de_inum.no_addr), 1508 be64_to_cpu(dent->de_inum.no_addr),
1509 be64_to_cpu(dent->de_inum.no_formal_ino)); 1509 be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1510 brelse(bh); 1510 brelse(bh);
1511 return inode; 1511 return inode;
1512 } 1512 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b2682e073eee..e48310885c48 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -617,18 +617,51 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
617 return generic_file_aio_write(iocb, iov, nr_segs, pos); 617 return generic_file_aio_write(iocb, iov, nr_segs, pos);
618} 618}
619 619
620static void empty_write_end(struct page *page, unsigned from, 620static int empty_write_end(struct page *page, unsigned from,
621 unsigned to) 621 unsigned to, int mode)
622{ 622{
623 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct inode *inode = page->mapping->host;
624 struct gfs2_inode *ip = GFS2_I(inode);
625 struct buffer_head *bh;
626 unsigned offset, blksize = 1 << inode->i_blkbits;
627 pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
624 628
625 zero_user(page, from, to-from); 629 zero_user(page, from, to-from);
626 mark_page_accessed(page); 630 mark_page_accessed(page);
627 631
628 if (!gfs2_is_writeback(ip)) 632 if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
629 gfs2_page_add_databufs(ip, page, from, to); 633 if (!gfs2_is_writeback(ip))
634 gfs2_page_add_databufs(ip, page, from, to);
635
636 block_commit_write(page, from, to);
637 return 0;
638 }
639
640 offset = 0;
641 bh = page_buffers(page);
642 while (offset < to) {
643 if (offset >= from) {
644 set_buffer_uptodate(bh);
645 mark_buffer_dirty(bh);
646 clear_buffer_new(bh);
647 write_dirty_buffer(bh, WRITE);
648 }
649 offset += blksize;
650 bh = bh->b_this_page;
651 }
630 652
631 block_commit_write(page, from, to); 653 offset = 0;
654 bh = page_buffers(page);
655 while (offset < to) {
656 if (offset >= from) {
657 wait_on_buffer(bh);
658 if (!buffer_uptodate(bh))
659 return -EIO;
660 }
661 offset += blksize;
662 bh = bh->b_this_page;
663 }
664 return 0;
632} 665}
633 666
634static int needs_empty_write(sector_t block, struct inode *inode) 667static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +676,8 @@ static int needs_empty_write(sector_t block, struct inode *inode)
643 return !buffer_mapped(&bh_map); 676 return !buffer_mapped(&bh_map);
644} 677}
645 678
646static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 679static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
680 int mode)
647{ 681{
648 struct inode *inode = page->mapping->host; 682 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize; 683 unsigned start, end, next, blksize;
@@ -668,7 +702,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
668 gfs2_block_map); 702 gfs2_block_map);
669 if (unlikely(ret)) 703 if (unlikely(ret))
670 return ret; 704 return ret;
671 empty_write_end(page, start, end); 705 ret = empty_write_end(page, start, end, mode);
706 if (unlikely(ret))
707 return ret;
672 end = 0; 708 end = 0;
673 } 709 }
674 start = next; 710 start = next;
@@ -682,7 +718,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
682 ret = __block_write_begin(page, start, end - start, gfs2_block_map); 718 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
683 if (unlikely(ret)) 719 if (unlikely(ret))
684 return ret; 720 return ret;
685 empty_write_end(page, start, end); 721 ret = empty_write_end(page, start, end, mode);
722 if (unlikely(ret))
723 return ret;
686 } 724 }
687 725
688 return 0; 726 return 0;
@@ -731,7 +769,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
731 769
732 if (curr == end) 770 if (curr == end)
733 to = end_offset; 771 to = end_offset;
734 error = write_empty_blocks(page, from, to); 772 error = write_empty_blocks(page, from, to, mode);
735 if (!error && offset + to > inode->i_size && 773 if (!error && offset + to > inode->i_size &&
736 !(mode & FALLOC_FL_KEEP_SIZE)) { 774 !(mode & FALLOC_FL_KEEP_SIZE)) {
737 i_size_write(inode, offset + to); 775 i_size_write(inode, offset + to);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3754e3cbf02b..25eeb2bcee47 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -385,6 +385,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
385static void iopen_go_callback(struct gfs2_glock *gl) 385static void iopen_go_callback(struct gfs2_glock *gl)
386{ 386{
387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
388 struct gfs2_sbd *sdp = gl->gl_sbd;
389
390 if (sdp->sd_vfs->s_flags & MS_RDONLY)
391 return;
388 392
389 if (gl->gl_demote_state == LM_ST_UNLOCKED && 393 if (gl->gl_demote_state == LM_ST_UNLOCKED &&
390 gl->gl_state == LM_ST_SHARED && ip) { 394 gl->gl_state == LM_ST_SHARED && ip) {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97d54a28776a..9134dcb89479 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -40,37 +40,61 @@ struct gfs2_inum_range_host {
40 u64 ir_length; 40 u64 ir_length;
41}; 41};
42 42
43struct gfs2_skip_data {
44 u64 no_addr;
45 int skipped;
46 int non_block;
47};
48
43static int iget_test(struct inode *inode, void *opaque) 49static int iget_test(struct inode *inode, void *opaque)
44{ 50{
45 struct gfs2_inode *ip = GFS2_I(inode); 51 struct gfs2_inode *ip = GFS2_I(inode);
46 u64 *no_addr = opaque; 52 struct gfs2_skip_data *data = opaque;
47 53
48 if (ip->i_no_addr == *no_addr) 54 if (ip->i_no_addr == data->no_addr) {
55 if (data->non_block &&
56 inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
57 data->skipped = 1;
58 return 0;
59 }
49 return 1; 60 return 1;
50 61 }
51 return 0; 62 return 0;
52} 63}
53 64
54static int iget_set(struct inode *inode, void *opaque) 65static int iget_set(struct inode *inode, void *opaque)
55{ 66{
56 struct gfs2_inode *ip = GFS2_I(inode); 67 struct gfs2_inode *ip = GFS2_I(inode);
57 u64 *no_addr = opaque; 68 struct gfs2_skip_data *data = opaque;
58 69
59 inode->i_ino = (unsigned long)*no_addr; 70 if (data->skipped)
60 ip->i_no_addr = *no_addr; 71 return -ENOENT;
72 inode->i_ino = (unsigned long)(data->no_addr);
73 ip->i_no_addr = data->no_addr;
61 return 0; 74 return 0;
62} 75}
63 76
64struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
65{ 78{
66 unsigned long hash = (unsigned long)no_addr; 79 unsigned long hash = (unsigned long)no_addr;
67 return ilookup5(sb, hash, iget_test, &no_addr); 80 struct gfs2_skip_data data;
81
82 data.no_addr = no_addr;
83 data.skipped = 0;
84 data.non_block = 0;
85 return ilookup5(sb, hash, iget_test, &data);
68} 86}
69 87
70static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 88static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
89 int non_block)
71{ 90{
91 struct gfs2_skip_data data;
72 unsigned long hash = (unsigned long)no_addr; 92 unsigned long hash = (unsigned long)no_addr;
73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 93
94 data.no_addr = no_addr;
95 data.skipped = 0;
96 data.non_block = non_block;
97 return iget5_locked(sb, hash, iget_test, iget_set, &data);
74} 98}
75 99
76/** 100/**
@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode)
111 * @sb: The super block 135 * @sb: The super block
112 * @no_addr: The inode number 136 * @no_addr: The inode number
113 * @type: The type of the inode 137 * @type: The type of the inode
138 * non_block: Can we block on inodes that are being freed?
114 * 139 *
115 * Returns: A VFS inode, or an error 140 * Returns: A VFS inode, or an error
116 */ 141 */
117 142
118struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, 143struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
119 u64 no_addr, u64 no_formal_ino) 144 u64 no_addr, u64 no_formal_ino, int non_block)
120{ 145{
121 struct inode *inode; 146 struct inode *inode;
122 struct gfs2_inode *ip; 147 struct gfs2_inode *ip;
123 struct gfs2_glock *io_gl = NULL; 148 struct gfs2_glock *io_gl = NULL;
124 int error; 149 int error;
125 150
126 inode = gfs2_iget(sb, no_addr); 151 inode = gfs2_iget(sb, no_addr, non_block);
127 ip = GFS2_I(inode); 152 ip = GFS2_I(inode);
128 153
129 if (!inode) 154 if (!inode)
@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
185{ 210{
186 struct super_block *sb = sdp->sd_vfs; 211 struct super_block *sb = sdp->sd_vfs;
187 struct gfs2_holder i_gh; 212 struct gfs2_holder i_gh;
188 struct inode *inode; 213 struct inode *inode = NULL;
189 int error; 214 int error;
190 215
216 /* Must not read in block until block type is verified */
191 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, 217 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
192 LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 218 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
193 if (error) 219 if (error)
194 return ERR_PTR(error); 220 return ERR_PTR(error);
195 221
@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
197 if (error) 223 if (error)
198 goto fail; 224 goto fail;
199 225
200 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); 226 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
201 if (IS_ERR(inode)) 227 if (IS_ERR(inode))
202 goto fail; 228 goto fail;
203 229
@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 goto fail_gunlock2; 869 goto fail_gunlock2;
844 870
845 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 871 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
846 inum.no_formal_ino); 872 inum.no_formal_ino, 0);
847 if (IS_ERR(inode)) 873 if (IS_ERR(inode))
848 goto fail_gunlock2; 874 goto fail_gunlock2;
849 875
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3e00a66e7cbd..099ca305e518 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -97,7 +97,8 @@ err:
97} 97}
98 98
99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
100 u64 no_addr, u64 no_formal_ino); 100 u64 no_addr, u64 no_formal_ino,
101 int non_block);
101extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
102 u64 *no_formal_ino, 103 u64 *no_formal_ino,
103 unsigned int blktype); 104 unsigned int blktype);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 42ef24355afb..d3c69eb91c74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
430 struct dentry *dentry; 430 struct dentry *dentry;
431 struct inode *inode; 431 struct inode *inode;
432 432
433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
434 if (IS_ERR(inode)) { 434 if (IS_ERR(inode)) {
435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
436 return PTR_ERR(inode); 436 return PTR_ERR(inode);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index cf930cd9664a..6fcae8469f6d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
945 /* rgblk_search can return a block < goal, so we need to 945 /* rgblk_search can return a block < goal, so we need to
946 keep it marching forward. */ 946 keep it marching forward. */
947 no_addr = block + rgd->rd_data0; 947 no_addr = block + rgd->rd_data0;
948 goal++; 948 goal = max(block + 1, goal + 1);
949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
950 continue; 950 continue;
951 if (no_addr == skip) 951 if (no_addr == skip)
@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
971 found++; 971 found++;
972 972
973 /* Limit reclaim to sensible number of tasks */ 973 /* Limit reclaim to sensible number of tasks */
974 if (found > 2*NR_CPUS) 974 if (found > NR_CPUS)
975 return; 975 return;
976 } 976 }
977 977
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a4e23d68a398..b9f28e66dad1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1318,15 +1318,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1318 1318
1319static void gfs2_evict_inode(struct inode *inode) 1319static void gfs2_evict_inode(struct inode *inode)
1320{ 1320{
1321 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 1321 struct super_block *sb = inode->i_sb;
1322 struct gfs2_sbd *sdp = sb->s_fs_info;
1322 struct gfs2_inode *ip = GFS2_I(inode); 1323 struct gfs2_inode *ip = GFS2_I(inode);
1323 struct gfs2_holder gh; 1324 struct gfs2_holder gh;
1324 int error; 1325 int error;
1325 1326
1326 if (inode->i_nlink) 1327 if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
1327 goto out; 1328 goto out;
1328 1329
1329 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1330 /* Must not read inode block until block type has been verified */
1331 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1330 if (unlikely(error)) { 1332 if (unlikely(error)) {
1331 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1333 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1332 goto out; 1334 goto out;
@@ -1336,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode)
1336 if (error) 1338 if (error)
1337 goto out_truncate; 1339 goto out_truncate;
1338 1340
1341 if (test_bit(GIF_INVALID, &ip->i_flags)) {
1342 error = gfs2_inode_refresh(ip);
1343 if (error)
1344 goto out_truncate;
1345 }
1346
1339 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1347 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1340 gfs2_glock_dq_wait(&ip->i_iopen_gh); 1348 gfs2_glock_dq_wait(&ip->i_iopen_gh);
1341 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 1349 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 20af62f4304b..6e28000a4b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
105 int ret; 105 int ret;
106 struct timespec now = current_kernel_time(); 106 struct timespec now = current_kernel_time();
107 107
108 *cbh = NULL;
109
108 if (is_journal_aborted(journal)) 110 if (is_journal_aborted(journal))
109 return 0; 111 return 0;
110 112
@@ -806,7 +808,7 @@ wait_for_iobuf:
806 if (err) 808 if (err)
807 __jbd2_journal_abort_hard(journal); 809 __jbd2_journal_abort_hard(journal);
808 } 810 }
809 if (!err && !is_journal_aborted(journal)) 811 if (cbh)
810 err = journal_wait_on_commit_record(journal, cbh); 812 err = journal_wait_on_commit_record(journal, cbh);
811 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 813 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
812 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && 814 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index aba8ebaec25c..e0ec3db1c395 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)
2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2414 if (!new_dev) 2414 if (!new_dev)
2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ 2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2416 bd = bdget(device);
2416 spin_lock(&devname_cache_lock); 2417 spin_lock(&devname_cache_lock);
2417 if (devcache[i]) { 2418 if (devcache[i]) {
2418 if (devcache[i]->device == device) { 2419 if (devcache[i]->device == device) {
2419 kfree(new_dev); 2420 kfree(new_dev);
2421 bdput(bd);
2420 ret = devcache[i]->devname; 2422 ret = devcache[i]->devname;
2421 spin_unlock(&devname_cache_lock); 2423 spin_unlock(&devname_cache_lock);
2422 return ret; 2424 return ret;
@@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)
2425 } 2427 }
2426 devcache[i] = new_dev; 2428 devcache[i] = new_dev;
2427 devcache[i]->device = device; 2429 devcache[i]->device = device;
2428 bd = bdget(device);
2429 if (bd) { 2430 if (bd) {
2430 bdevname(bd, devcache[i]->devname); 2431 bdevname(bd, devcache[i]->devname);
2431 bdput(bd); 2432 bdput(bd);
diff --git a/fs/namei.c b/fs/namei.c
index e6cd6113872c..54fc993e3027 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -697,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
697 do { 697 do {
698 seq = read_seqcount_begin(&fs->seq); 698 seq = read_seqcount_begin(&fs->seq);
699 nd->root = fs->root; 699 nd->root = fs->root;
700 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
700 } while (read_seqcount_retry(&fs->seq, seq)); 701 } while (read_seqcount_retry(&fs->seq, seq));
701 } 702 }
702} 703}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7dba2ed03429..d99bcf59e4c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = {
1030 .show = show_vfsmnt 1030 .show = show_vfsmnt
1031}; 1031};
1032 1032
1033static int uuid_is_nil(u8 *uuid)
1034{
1035 int i;
1036 u8 *cp = (u8 *)uuid;
1037
1038 for (i = 0; i < 16; i++) {
1039 if (*cp++)
1040 return 0;
1041 }
1042 return 1;
1043}
1044
1045static int show_mountinfo(struct seq_file *m, void *v) 1033static int show_mountinfo(struct seq_file *m, void *v)
1046{ 1034{
1047 struct proc_mounts *p = m->private; 1035 struct proc_mounts *p = m->private;
@@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v)
1085 if (IS_MNT_UNBINDABLE(mnt)) 1073 if (IS_MNT_UNBINDABLE(mnt))
1086 seq_puts(m, " unbindable"); 1074 seq_puts(m, " unbindable");
1087 1075
1088 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1089 /* print the uuid */
1090 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1091
1092 /* Filesystem specific data */ 1076 /* Filesystem specific data */
1093 seq_puts(m, " - "); 1077 seq_puts(m, " - ");
1094 show_type(m, sb); 1078 show_type(m, sb);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 9166fcb66da2..89fc160fd5b0 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -148,67 +148,64 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors,
148 return pseudoflavor; 148 return pseudoflavor;
149} 149}
150 150
151static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) 151static int nfs_negotiate_security(const struct dentry *parent,
152 const struct dentry *dentry,
153 rpc_authflavor_t *flavor)
152{ 154{
153 int status = 0;
154 struct page *page; 155 struct page *page;
155 struct nfs4_secinfo_flavors *flavors; 156 struct nfs4_secinfo_flavors *flavors;
156 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); 157 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
157 rpc_authflavor_t flavor = RPC_AUTH_UNIX; 158 int ret = -EPERM;
158 159
159 secinfo = NFS_PROTO(parent->d_inode)->secinfo; 160 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
160 if (secinfo != NULL) { 161 if (secinfo != NULL) {
161 page = alloc_page(GFP_KERNEL); 162 page = alloc_page(GFP_KERNEL);
162 if (!page) { 163 if (!page) {
163 status = -ENOMEM; 164 ret = -ENOMEM;
164 goto out; 165 goto out;
165 } 166 }
166 flavors = page_address(page); 167 flavors = page_address(page);
167 status = secinfo(parent->d_inode, &dentry->d_name, flavors); 168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
168 flavor = nfs_find_best_sec(flavors, dentry->d_inode); 169 *flavor = nfs_find_best_sec(flavors, dentry->d_inode);
169 put_page(page); 170 put_page(page);
170 } 171 }
171 172
172 return flavor;
173
174out: 173out:
175 status = -ENOMEM; 174 return ret;
176 return status;
177} 175}
178 176
179static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, 177static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
180 struct dentry *dentry, struct path *path, 178 struct dentry *dentry, struct path *path,
181 struct nfs_fh *fh, struct nfs_fattr *fattr) 179 struct nfs_fh *fh, struct nfs_fattr *fattr,
180 rpc_authflavor_t *flavor)
182{ 181{
183 rpc_authflavor_t flavor;
184 struct rpc_clnt *clone; 182 struct rpc_clnt *clone;
185 struct rpc_auth *auth; 183 struct rpc_auth *auth;
186 int err; 184 int err;
187 185
188 flavor = nfs_negotiate_security(parent, path->dentry); 186 err = nfs_negotiate_security(parent, path->dentry, flavor);
189 if (flavor < 0) 187 if (err < 0)
190 goto out; 188 goto out;
191 clone = rpc_clone_client(server->client); 189 clone = rpc_clone_client(server->client);
192 auth = rpcauth_create(flavor, clone); 190 auth = rpcauth_create(*flavor, clone);
193 if (!auth) { 191 if (!auth) {
194 flavor = -EIO; 192 err = -EIO;
195 goto out_shutdown; 193 goto out_shutdown;
196 } 194 }
197 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, 195 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
198 &path->dentry->d_name, 196 &path->dentry->d_name,
199 fh, fattr); 197 fh, fattr);
200 if (err < 0)
201 flavor = err;
202out_shutdown: 198out_shutdown:
203 rpc_shutdown_client(clone); 199 rpc_shutdown_client(clone);
204out: 200out:
205 return flavor; 201 return err;
206} 202}
207#else /* CONFIG_NFS_V4 */ 203#else /* CONFIG_NFS_V4 */
208static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, 204static inline int nfs_lookup_with_sec(struct nfs_server *server,
209 struct dentry *parent, struct dentry *dentry, 205 struct dentry *parent, struct dentry *dentry,
210 struct path *path, struct nfs_fh *fh, 206 struct path *path, struct nfs_fh *fh,
211 struct nfs_fattr *fattr) 207 struct nfs_fattr *fattr,
208 rpc_authflavor_t *flavor)
212{ 209{
213 return -EPERM; 210 return -EPERM;
214} 211}
@@ -234,7 +231,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
234 struct nfs_fh *fh = NULL; 231 struct nfs_fh *fh = NULL;
235 struct nfs_fattr *fattr = NULL; 232 struct nfs_fattr *fattr = NULL;
236 int err; 233 int err;
237 rpc_authflavor_t flavor = 1; 234 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
238 235
239 dprintk("--> nfs_d_automount()\n"); 236 dprintk("--> nfs_d_automount()\n");
240 237
@@ -255,13 +252,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
255 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, 252 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
256 &path->dentry->d_name, 253 &path->dentry->d_name,
257 fh, fattr); 254 fh, fattr);
258 if (err == -EPERM) { 255 if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
259 flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); 256 err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
260 if (flavor < 0)
261 err = flavor;
262 else
263 err = 0;
264 }
265 dput(parent); 257 dput(parent);
266 if (err != 0) { 258 if (err != 0) {
267 mnt = ERR_PTR(err); 259 mnt = ERR_PTR(err);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dfd1e6d7e6c3..9bf41eab3e46 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2204,8 +2204,6 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
2204 goto out; 2204 goto out;
2205 } 2205 }
2206 ret = nfs4_lookup_root(server, fhandle, info); 2206 ret = nfs4_lookup_root(server, fhandle, info);
2207 if (ret < 0)
2208 ret = -EAGAIN;
2209out: 2207out:
2210 return ret; 2208 return ret;
2211} 2209}
@@ -2226,7 +2224,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2226 2224
2227 for (i = 0; i < len; i++) { 2225 for (i = 0; i < len; i++) {
2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); 2226 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2229 if (status == 0) 2227 if (status != -EPERM)
2230 break; 2228 break;
2231 } 2229 }
2232 if (status == 0) 2230 if (status == 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af0c6279a4a7..e4cbc11a74ab 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -542,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
542 if (!nfs_need_commit(nfsi)) 542 if (!nfs_need_commit(nfsi))
543 return 0; 543 return 0;
544 544
545 spin_lock(&inode->i_lock);
545 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
546 if (ret > 0) 547 if (ret > 0)
547 nfsi->ncommit -= ret; 548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
548 if (nfs_need_commit(NFS_I(inode))) 551 if (nfs_need_commit(NFS_I(inode)))
549 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553
550 return ret; 554 return ret;
551} 555}
552#else 556#else
@@ -1483,9 +1487,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1483 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1487 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1484 if (res <= 0) 1488 if (res <= 0)
1485 goto out_mark_dirty; 1489 goto out_mark_dirty;
1486 spin_lock(&inode->i_lock);
1487 res = nfs_scan_commit(inode, &head, 0, 0); 1490 res = nfs_scan_commit(inode, &head, 0, 0);
1488 spin_unlock(&inode->i_lock);
1489 if (res) { 1491 if (res) {
1490 int error; 1492 int error;
1491 1493
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 0c6d81670137..7c831a2731fa 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
38 exp_readlock(); 38 exp_readlock();
39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); 39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
40 fh_put(&fh); 40 fh_put(&fh);
41 rqstp->rq_client = NULL;
42 exp_readunlock(); 41 exp_readunlock();
43 /* We return nlm error codes as nlm doesn't know 42 /* We return nlm error codes as nlm doesn't know
44 * about nfsd, but nfsd does know about nlm.. 43 * about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4b36ec3eb8ea..4cf04e11c66c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -258,6 +258,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
258 if (atomic_dec_and_test(&fp->fi_delegees)) { 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
260 fp->fi_lease = NULL; 260 fp->fi_lease = NULL;
261 fput(fp->fi_deleg_file);
261 fp->fi_deleg_file = NULL; 262 fp->fi_deleg_file = NULL;
262 } 263 }
263} 264}
@@ -397,9 +398,12 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp)
397 398
398static void free_generic_stateid(struct nfs4_stateid *stp) 399static void free_generic_stateid(struct nfs4_stateid *stp)
399{ 400{
400 int oflag = nfs4_access_bmap_to_omode(stp); 401 int oflag;
401 402
402 nfs4_file_put_access(stp->st_file, oflag); 403 if (stp->st_access_bmap) {
404 oflag = nfs4_access_bmap_to_omode(stp);
405 nfs4_file_put_access(stp->st_file, oflag);
406 }
403 put_nfs4_file(stp->st_file); 407 put_nfs4_file(stp->st_file);
404 kmem_cache_free(stateid_slab, stp); 408 kmem_cache_free(stateid_slab, stp);
405} 409}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2e1cebde90df..129f3c9f62d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1363,7 +1363,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1363 goto out; 1363 goto out;
1364 if (!(iap->ia_valid & ATTR_MODE)) 1364 if (!(iap->ia_valid & ATTR_MODE))
1365 iap->ia_mode = 0; 1365 iap->ia_mode = 0;
1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
1367 if (err) 1367 if (err)
1368 goto out; 1368 goto out;
1369 1369
@@ -1385,6 +1385,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1385 if (IS_ERR(dchild)) 1385 if (IS_ERR(dchild))
1386 goto out_nfserr; 1386 goto out_nfserr;
1387 1387
1388 /* If file doesn't exist, check for permissions to create one */
1389 if (!dchild->d_inode) {
1390 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1391 if (err)
1392 goto out;
1393 }
1394
1388 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1395 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1389 if (err) 1396 if (err)
1390 goto out; 1397 goto out;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index b10e3540d5b7..ce4f62440425 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1299 1299
1300 BUG_ON (!data || !frags); 1300 BUG_ON (!data || !frags);
1301 1301
1302 if (size < 2 * VBLK_SIZE_HEAD) {
1303 ldm_error("Value of size is to small.");
1304 return false;
1305 }
1306
1302 group = get_unaligned_be32(data + 0x08); 1307 group = get_unaligned_be32(data + 0x08);
1303 rec = get_unaligned_be16(data + 0x0C); 1308 rec = get_unaligned_be16(data + 0x0C);
1304 num = get_unaligned_be16(data + 0x0E); 1309 num = get_unaligned_be16(data + 0x0E);
@@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1306 ldm_error ("A VBLK claims to have %d parts.", num); 1311 ldm_error ("A VBLK claims to have %d parts.", num);
1307 return false; 1312 return false;
1308 } 1313 }
1314 if (rec >= num) {
1315 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
1316 return false;
1317 }
1309 1318
1310 list_for_each (item, frags) { 1319 list_for_each (item, frags) {
1311 f = list_entry (item, struct frag, list); 1320 f = list_entry (item, struct frag, list);
@@ -1334,10 +1343,9 @@ found:
1334 1343
1335 f->map |= (1 << rec); 1344 f->map |= (1 << rec);
1336 1345
1337 if (num > 0) { 1346 data += VBLK_SIZE_HEAD;
1338 data += VBLK_SIZE_HEAD; 1347 size -= VBLK_SIZE_HEAD;
1339 size -= VBLK_SIZE_HEAD; 1348
1340 }
1341 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); 1349 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
1342 1350
1343 return true; 1351 return true;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd6628d3ba42..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3124/* for the /proc/ directory itself, after non-process stuff has been done */ 3124/* for the /proc/ directory itself, after non-process stuff has been done */
3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3126{ 3126{
3127 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3127 unsigned int nr;
3128 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 3128 struct task_struct *reaper;
3129 struct tgid_iter iter; 3129 struct tgid_iter iter;
3130 struct pid_namespace *ns; 3130 struct pid_namespace *ns;
3131 3131
3132 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3133 goto out_no_task;
3134 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3135
3136 reaper = get_proc_task(filp->f_path.dentry->d_inode);
3132 if (!reaper) 3137 if (!reaper)
3133 goto out_no_task; 3138 goto out_no_task;
3134 3139
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a925bf205497..d3c032f5fa0a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -442,7 +442,7 @@ EXPORT_SYMBOL(dquot_acquire);
442 */ 442 */
443int dquot_commit(struct dquot *dquot) 443int dquot_commit(struct dquot *dquot)
444{ 444{
445 int ret = 0, ret2 = 0; 445 int ret = 0;
446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); 446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
447 447
448 mutex_lock(&dqopt->dqio_mutex); 448 mutex_lock(&dqopt->dqio_mutex);
@@ -454,15 +454,10 @@ int dquot_commit(struct dquot *dquot)
454 spin_unlock(&dq_list_lock); 454 spin_unlock(&dq_list_lock);
455 /* Inactive dquot can be only if there was error during read/init 455 /* Inactive dquot can be only if there was error during read/init
456 * => we have better not writing it */ 456 * => we have better not writing it */
457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { 457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); 458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
459 if (info_dirty(&dqopt->info[dquot->dq_type])) { 459 else
460 ret2 = dqopt->ops[dquot->dq_type]->write_file_info( 460 ret = -EIO;
461 dquot->dq_sb, dquot->dq_type);
462 }
463 if (ret >= 0)
464 ret = ret2;
465 }
466out_sem: 461out_sem:
467 mutex_unlock(&dqopt->dqio_mutex); 462 mutex_unlock(&dqopt->dqio_mutex);
468 return ret; 463 return ret;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9eead2c796b7..fbb0b478a346 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 SetPageDirty(page); 112 SetPageDirty(page);
113 113
114 unlock_page(page); 114 unlock_page(page);
115 put_page(page);
115 } 116 }
116 117
117 return 0; 118 return 0;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 919f0de29d8f..e6493cac193d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,6 +23,12 @@
23#ifndef __UBIFS_DEBUG_H__ 23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__ 24#define __UBIFS_DEBUG_H__
25 25
26/* Checking helper functions */
27typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
28 struct ubifs_zbranch *zbr, void *priv);
29typedef int (*dbg_znode_callback)(struct ubifs_info *c,
30 struct ubifs_znode *znode, void *priv);
31
26#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
27 33
28/** 34/**
@@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
270void dbg_dump_index(struct ubifs_info *c); 276void dbg_dump_index(struct ubifs_info *c);
271void dbg_dump_lpt_lebs(const struct ubifs_info *c); 277void dbg_dump_lpt_lebs(const struct ubifs_info *c);
272 278
273/* Checking helper functions */
274typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
275 struct ubifs_zbranch *zbr, void *priv);
276typedef int (*dbg_znode_callback)(struct ubifs_info *c,
277 struct ubifs_znode *znode, void *priv);
278int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 279int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
279 dbg_znode_callback znode_cb, void *priv); 280 dbg_znode_callback znode_cb, void *priv);
280 281
@@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
295int dbg_check_filesystem(struct ubifs_info *c); 296int dbg_check_filesystem(struct ubifs_info *c);
296void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 297void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
297 int add_pos); 298 int add_pos);
298int dbg_check_lprops(struct ubifs_info *c);
299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
300 int row, int col); 300 int row, int col);
301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, 301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
@@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
401#define DBGKEY(key) ((char *)(key)) 401#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 402#define DBGKEY1(key) ((char *)(key))
403 403
404#define ubifs_debugging_init(c) 0 404static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
405#define ubifs_debugging_exit(c) ({}) 405static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
406 406static inline const char *dbg_ntype(int type) { return ""; }
407#define dbg_ntype(type) "" 407static inline const char *dbg_cstate(int cmt_state) { return ""; }
408#define dbg_cstate(cmt_state) "" 408static inline const char *dbg_jhead(int jhead) { return ""; }
409#define dbg_jhead(jhead) "" 409static inline const char *
410#define dbg_get_key_dump(c, key) ({}) 410dbg_get_key_dump(const struct ubifs_info *c,
411#define dbg_dump_inode(c, inode) ({}) 411 const union ubifs_key *key) { return ""; }
412#define dbg_dump_node(c, node) ({}) 412static inline void dbg_dump_inode(const struct ubifs_info *c,
413#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) 413 const struct inode *inode) { return; }
414#define dbg_dump_budget_req(req) ({}) 414static inline void dbg_dump_node(const struct ubifs_info *c,
415#define dbg_dump_lstats(lst) ({}) 415 const void *node) { return; }
416#define dbg_dump_budg(c) ({}) 416static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
417#define dbg_dump_lprop(c, lp) ({}) 417 void *node, int lnum,
418#define dbg_dump_lprops(c) ({}) 418 int offs) { return; }
419#define dbg_dump_lpt_info(c) ({}) 419static inline void
420#define dbg_dump_leb(c, lnum) ({}) 420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421#define dbg_dump_znode(c, znode) ({}) 421static inline void
422#define dbg_dump_heap(c, heap, cat) ({}) 422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 423static inline void dbg_dump_budg(struct ubifs_info *c) { return; }
424#define dbg_dump_tnc(c) ({}) 424static inline void dbg_dump_lprop(const struct ubifs_info *c,
425#define dbg_dump_index(c) ({}) 425 const struct ubifs_lprops *lp) { return; }
426#define dbg_dump_lpt_lebs(c) ({}) 426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
427 427static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
428#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 428static inline void dbg_dump_leb(const struct ubifs_info *c,
429#define dbg_old_index_check_init(c, zroot) 0 429 int lnum) { return; }
430#define dbg_save_space_info(c) ({}) 430static inline void
431#define dbg_check_space_info(c) 0 431dbg_dump_znode(const struct ubifs_info *c,
432#define dbg_check_old_index(c, zroot) 0 432 const struct ubifs_znode *znode) { return; }
433#define dbg_check_cats(c) 0 433static inline void dbg_dump_heap(struct ubifs_info *c,
434#define dbg_check_ltab(c) 0 434 struct ubifs_lpt_heap *heap,
435#define dbg_chk_lpt_free_spc(c) 0 435 int cat) { return; }
436#define dbg_chk_lpt_sz(c, action, len) 0 436static inline void dbg_dump_pnode(struct ubifs_info *c,
437#define dbg_check_synced_i_size(inode) 0 437 struct ubifs_pnode *pnode,
438#define dbg_check_dir_size(c, dir) 0 438 struct ubifs_nnode *parent,
439#define dbg_check_tnc(c, x) 0 439 int iip) { return; }
440#define dbg_check_idx_size(c, idx_size) 0 440static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
441#define dbg_check_filesystem(c) 0 441static inline void dbg_dump_index(struct ubifs_info *c) { return; }
442#define dbg_check_heap(c, heap, cat, add_pos) ({}) 442static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
443#define dbg_check_lprops(c) 0 443
444#define dbg_check_lpt_nodes(c, cnode, row, col) 0 444static inline int dbg_walk_index(struct ubifs_info *c,
445#define dbg_check_inode_size(c, inode, size) 0 445 dbg_leaf_callback leaf_cb,
446#define dbg_check_data_nodes_order(c, head) 0 446 dbg_znode_callback znode_cb,
447#define dbg_check_nondata_nodes_order(c, head) 0 447 void *priv) { return 0; }
448#define dbg_force_in_the_gaps_enabled 0 448static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
449#define dbg_force_in_the_gaps() 0 449static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
450#define dbg_failure_mode 0 450static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
451 451static inline int
452#define dbg_debugfs_init() 0 452dbg_old_index_check_init(struct ubifs_info *c,
453#define dbg_debugfs_exit() 453 struct ubifs_zbranch *zroot) { return 0; }
454#define dbg_debugfs_init_fs(c) 0 454static inline int
455#define dbg_debugfs_exit_fs(c) 0 455dbg_check_old_index(struct ubifs_info *c,
456 struct ubifs_zbranch *zroot) { return 0; }
457static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
458static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
459static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
460static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
461 int action, int len) { return 0; }
462static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
463static inline int dbg_check_dir_size(struct ubifs_info *c,
464 const struct inode *dir) { return 0; }
465static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
466static inline int dbg_check_idx_size(struct ubifs_info *c,
467 long long idx_size) { return 0; }
468static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
469static inline void dbg_check_heap(struct ubifs_info *c,
470 struct ubifs_lpt_heap *heap,
471 int cat, int add_pos) { return; }
472static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
473 struct ubifs_cnode *cnode, int row, int col) { return 0; }
474static inline int dbg_check_inode_size(struct ubifs_info *c,
475 const struct inode *inode,
476 loff_t size) { return 0; }
477static inline int
478dbg_check_data_nodes_order(struct ubifs_info *c,
479 struct list_head *head) { return 0; }
480static inline int
481dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; }
483
484static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0
486#define dbg_failure_mode 0
487
488static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; }
490static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
491static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
456 492
457#endif /* !CONFIG_UBIFS_FS_DEBUG */ 493#endif /* !CONFIG_UBIFS_FS_DEBUG */
458#endif /* !__UBIFS_DEBUG_H__ */ 494#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 28be1e6a65e8..b286db79c686 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1312,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync)
1312 1312
1313 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1314 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY)
1316 return 0;
1317
1315 /* 1318 /*
1316 * VFS has already synchronized dirty pages for this inode. Synchronize 1319 * VFS has already synchronized dirty pages for this inode. Synchronize
1317 * the inode unless this is a 'datasync()' call. 1320 * the inode unless this is a 'datasync()' call.
diff --git a/fs/xattr.c b/fs/xattr.c
index a19acdb81cd1..f1ef94974dea 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -666,7 +666,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(dentry, name, value, size, 0, handler->flags); 669 return handler->set(dentry, name, value, size, flags, handler->flags);
670} 670}
671 671
672/* 672/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5ea402023ebd..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -293,7 +293,6 @@ xfs_buf_allocate_memory(
293 size_t nbytes, offset; 293 size_t nbytes, offset;
294 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
295 unsigned short page_count, i; 295 unsigned short page_count, i;
296 pgoff_t first;
297 xfs_off_t end; 296 xfs_off_t end;
298 int error; 297 int error;
299 298
@@ -333,7 +332,6 @@ use_alloc_page:
333 return error; 332 return error;
334 333
335 offset = bp->b_offset; 334 offset = bp->b_offset;
336 first = bp->b_file_offset >> PAGE_SHIFT;
337 bp->b_flags |= _XBF_PAGES; 335 bp->b_flags |= _XBF_PAGES;
338 336
339 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
@@ -657,8 +655,6 @@ xfs_buf_readahead(
657 xfs_off_t ioff, 655 xfs_off_t ioff,
658 size_t isize) 656 size_t isize)
659{ 657{
660 struct backing_dev_info *bdi;
661
662 if (bdi_read_congested(target->bt_bdi)) 658 if (bdi_read_congested(target->bt_bdi))
663 return; 659 return;
664 660
@@ -919,8 +915,6 @@ xfs_buf_lock(
919 915
920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
921 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
922 if (atomic_read(&bp->b_io_remaining))
923 blk_flush_plug(current);
924 down(&bp->b_sema); 918 down(&bp->b_sema);
925 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
926 920
@@ -1309,8 +1303,6 @@ xfs_buf_iowait(
1309{ 1303{
1310 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1311 1305
1312 if (atomic_read(&bp->b_io_remaining))
1313 blk_flush_plug(current);
1314 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1315 1307
1316 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1747,8 +1739,8 @@ xfsbufd(
1747 do { 1739 do {
1748 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1749 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1750 int count = 0;
1751 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1752 1744
1753 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1754 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1764,16 +1756,15 @@ xfsbufd(
1764 1756
1765 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1766 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1767 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1768 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1769 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1770 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1771 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1772 count++;
1773 } 1766 }
1774 if (count) 1767 blk_finish_plug(&plug);
1775 blk_flush_plug(current);
1776
1777 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1778 1769
1779 return 0; 1770 return 0;
@@ -1793,6 +1784,7 @@ xfs_flush_buftarg(
1793 int pincount = 0; 1784 int pincount = 0;
1794 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1795 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1796 1788
1797 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1798 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1807,6 +1799,8 @@ xfs_flush_buftarg(
1807 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1808 */ 1800 */
1809 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1810 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1811 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1812 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1817,10 +1811,10 @@ xfs_flush_buftarg(
1817 } 1811 }
1818 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1819 } 1813 }
1814 blk_finish_plug(&plug);
1820 1815
1821 if (wait) { 1816 if (wait) {
1822 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1823 blk_flush_plug(current);
1824 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
1825 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1826 1820
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 508e06fd7d1e..9f76cceb678d 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -28,53 +28,49 @@
28/* 28/*
29 * XFS logging functions 29 * XFS logging functions
30 */ 30 */
31static int 31static void
32__xfs_printk( 32__xfs_printk(
33 const char *level, 33 const char *level,
34 const struct xfs_mount *mp, 34 const struct xfs_mount *mp,
35 struct va_format *vaf) 35 struct va_format *vaf)
36{ 36{
37 if (mp && mp->m_fsname) 37 if (mp && mp->m_fsname) {
38 return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return printk("%sXFS: %pV\n", level, vaf); 39 return;
40 }
41 printk("%sXFS: %pV\n", level, vaf);
40} 42}
41 43
42int xfs_printk( 44void xfs_printk(
43 const char *level, 45 const char *level,
44 const struct xfs_mount *mp, 46 const struct xfs_mount *mp,
45 const char *fmt, ...) 47 const char *fmt, ...)
46{ 48{
47 struct va_format vaf; 49 struct va_format vaf;
48 va_list args; 50 va_list args;
49 int r;
50 51
51 va_start(args, fmt); 52 va_start(args, fmt);
52 53
53 vaf.fmt = fmt; 54 vaf.fmt = fmt;
54 vaf.va = &args; 55 vaf.va = &args;
55 56
56 r = __xfs_printk(level, mp, &vaf); 57 __xfs_printk(level, mp, &vaf);
57 va_end(args); 58 va_end(args);
58
59 return r;
60} 59}
61 60
62#define define_xfs_printk_level(func, kern_level) \ 61#define define_xfs_printk_level(func, kern_level) \
63int func(const struct xfs_mount *mp, const char *fmt, ...) \ 62void func(const struct xfs_mount *mp, const char *fmt, ...) \
64{ \ 63{ \
65 struct va_format vaf; \ 64 struct va_format vaf; \
66 va_list args; \ 65 va_list args; \
67 int r; \
68 \ 66 \
69 va_start(args, fmt); \ 67 va_start(args, fmt); \
70 \ 68 \
71 vaf.fmt = fmt; \ 69 vaf.fmt = fmt; \
72 vaf.va = &args; \ 70 vaf.va = &args; \
73 \ 71 \
74 r = __xfs_printk(kern_level, mp, &vaf); \ 72 __xfs_printk(kern_level, mp, &vaf); \
75 va_end(args); \ 73 va_end(args); \
76 \
77 return r; \
78} \ 74} \
79 75
80define_xfs_printk_level(xfs_emerg, KERN_EMERG); 76define_xfs_printk_level(xfs_emerg, KERN_EMERG);
@@ -88,7 +84,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO);
88define_xfs_printk_level(xfs_debug, KERN_DEBUG); 84define_xfs_printk_level(xfs_debug, KERN_DEBUG);
89#endif 85#endif
90 86
91int 87void
92xfs_alert_tag( 88xfs_alert_tag(
93 const struct xfs_mount *mp, 89 const struct xfs_mount *mp,
94 int panic_tag, 90 int panic_tag,
@@ -97,7 +93,6 @@ xfs_alert_tag(
97 struct va_format vaf; 93 struct va_format vaf;
98 va_list args; 94 va_list args;
99 int do_panic = 0; 95 int do_panic = 0;
100 int r;
101 96
102 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
103 xfs_printk(KERN_ALERT, mp, 98 xfs_printk(KERN_ALERT, mp,
@@ -110,12 +105,10 @@ xfs_alert_tag(
110 vaf.fmt = fmt; 105 vaf.fmt = fmt;
111 vaf.va = &args; 106 vaf.va = &args;
112 107
113 r = __xfs_printk(KERN_ALERT, mp, &vaf); 108 __xfs_printk(KERN_ALERT, mp, &vaf);
114 va_end(args); 109 va_end(args);
115 110
116 BUG_ON(do_panic); 111 BUG_ON(do_panic);
117
118 return r;
119} 112}
120 113
121void 114void
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index e77ffa16745b..f1b3fc1b6c4e 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,32 +3,34 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern int xfs_printk(const char *level, const struct xfs_mount *mp, 6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...) 7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4))); 8 __attribute__ ((format (printf, 3, 4)));
9extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 10 __attribute__ ((format (printf, 2, 3)));
11extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3))); 12 __attribute__ ((format (printf, 2, 3)));
13extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, 13extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...) 14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4))); 15 __attribute__ ((format (printf, 3, 4)));
16extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 16extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3))); 17 __attribute__ ((format (printf, 2, 3)));
18extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 18extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3))); 19 __attribute__ ((format (printf, 2, 3)));
20extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 20extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3))); 21 __attribute__ ((format (printf, 2, 3)));
22extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 22extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3))); 23 __attribute__ ((format (printf, 2, 3)));
24extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 24extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3))); 25 __attribute__ ((format (printf, 2, 3)));
26 26
27#ifdef DEBUG 27#ifdef DEBUG
28extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 29 __attribute__ ((format (printf, 2, 3)));
30#else 30#else
31#define xfs_debug(mp, fmt, ...) (0) 31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{
33}
32#endif 34#endif
33 35
34extern void assfail(char *expr, char *f, int l); 36extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1ba5c451da36..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -816,75 +816,6 @@ xfs_setup_devices(
816 return 0; 816 return 0;
817} 817}
818 818
819/*
820 * XFS AIL push thread support
821 */
822void
823xfsaild_wakeup(
824 struct xfs_ail *ailp,
825 xfs_lsn_t threshold_lsn)
826{
827 /* only ever move the target forwards */
828 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
829 ailp->xa_target = threshold_lsn;
830 wake_up_process(ailp->xa_task);
831 }
832}
833
834STATIC int
835xfsaild(
836 void *data)
837{
838 struct xfs_ail *ailp = data;
839 xfs_lsn_t last_pushed_lsn = 0;
840 long tout = 0; /* milliseconds */
841
842 while (!kthread_should_stop()) {
843 /*
844 * for short sleeps indicating congestion, don't allow us to
845 * get woken early. Otherwise all we do is bang on the AIL lock
846 * without making progress.
847 */
848 if (tout && tout <= 20)
849 __set_current_state(TASK_KILLABLE);
850 else
851 __set_current_state(TASK_INTERRUPTIBLE);
852 schedule_timeout(tout ?
853 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
854
855 /* swsusp */
856 try_to_freeze();
857
858 ASSERT(ailp->xa_mount->m_log);
859 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
860 continue;
861
862 tout = xfsaild_push(ailp, &last_pushed_lsn);
863 }
864
865 return 0;
866} /* xfsaild */
867
868int
869xfsaild_start(
870 struct xfs_ail *ailp)
871{
872 ailp->xa_target = 0;
873 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
874 ailp->xa_mount->m_fsname);
875 if (IS_ERR(ailp->xa_task))
876 return -PTR_ERR(ailp->xa_task);
877 return 0;
878}
879
880void
881xfsaild_stop(
882 struct xfs_ail *ailp)
883{
884 kthread_stop(ailp->xa_task);
885}
886
887
888/* Catch misguided souls that try to use this interface on XFS */ 819/* Catch misguided souls that try to use this interface on XFS */
889STATIC struct inode * 820STATIC struct inode *
890xfs_fs_alloc_inode( 821xfs_fs_alloc_inode(
@@ -1191,22 +1122,12 @@ xfs_fs_sync_fs(
1191 return -error; 1122 return -error;
1192 1123
1193 if (laptop_mode) { 1124 if (laptop_mode) {
1194 int prev_sync_seq = mp->m_sync_seq;
1195
1196 /* 1125 /*
1197 * The disk must be active because we're syncing. 1126 * The disk must be active because we're syncing.
1198 * We schedule xfssyncd now (now that the disk is 1127 * We schedule xfssyncd now (now that the disk is
1199 * active) instead of later (when it might not be). 1128 * active) instead of later (when it might not be).
1200 */ 1129 */
1201 wake_up_process(mp->m_sync_task); 1130 flush_delayed_work_sync(&mp->m_sync_work);
1202 /*
1203 * We have to wait for the sync iteration to complete.
1204 * If we don't, the disk activity caused by the sync
1205 * will come after the sync is completed, and that
1206 * triggers another sync from laptop mode.
1207 */
1208 wait_event(mp->m_wait_single_sync_task,
1209 mp->m_sync_seq != prev_sync_seq);
1210 } 1131 }
1211 1132
1212 return 0; 1133 return 0;
@@ -1490,9 +1411,6 @@ xfs_fs_fill_super(
1490 spin_lock_init(&mp->m_sb_lock); 1411 spin_lock_init(&mp->m_sb_lock);
1491 mutex_init(&mp->m_growlock); 1412 mutex_init(&mp->m_growlock);
1492 atomic_set(&mp->m_active_trans, 0); 1413 atomic_set(&mp->m_active_trans, 0);
1493 INIT_LIST_HEAD(&mp->m_sync_list);
1494 spin_lock_init(&mp->m_sync_lock);
1495 init_waitqueue_head(&mp->m_wait_single_sync_task);
1496 1414
1497 mp->m_super = sb; 1415 mp->m_super = sb;
1498 sb->s_fs_info = mp; 1416 sb->s_fs_info = mp;
@@ -1799,6 +1717,38 @@ xfs_destroy_zones(void)
1799} 1717}
1800 1718
1801STATIC int __init 1719STATIC int __init
1720xfs_init_workqueues(void)
1721{
1722 /*
1723 * max_active is set to 8 to give enough concurency to allow
1724 * multiple work operations on each CPU to run. This allows multiple
1725 * filesystems to be running sync work concurrently, and scales with
1726 * the number of CPUs in the system.
1727 */
1728 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1729 if (!xfs_syncd_wq)
1730 goto out;
1731
1732 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1733 if (!xfs_ail_wq)
1734 goto out_destroy_syncd;
1735
1736 return 0;
1737
1738out_destroy_syncd:
1739 destroy_workqueue(xfs_syncd_wq);
1740out:
1741 return -ENOMEM;
1742}
1743
1744STATIC void
1745xfs_destroy_workqueues(void)
1746{
1747 destroy_workqueue(xfs_ail_wq);
1748 destroy_workqueue(xfs_syncd_wq);
1749}
1750
1751STATIC int __init
1802init_xfs_fs(void) 1752init_xfs_fs(void)
1803{ 1753{
1804 int error; 1754 int error;
@@ -1813,10 +1763,14 @@ init_xfs_fs(void)
1813 if (error) 1763 if (error)
1814 goto out; 1764 goto out;
1815 1765
1816 error = xfs_mru_cache_init(); 1766 error = xfs_init_workqueues();
1817 if (error) 1767 if (error)
1818 goto out_destroy_zones; 1768 goto out_destroy_zones;
1819 1769
1770 error = xfs_mru_cache_init();
1771 if (error)
1772 goto out_destroy_wq;
1773
1820 error = xfs_filestream_init(); 1774 error = xfs_filestream_init();
1821 if (error) 1775 if (error)
1822 goto out_mru_cache_uninit; 1776 goto out_mru_cache_uninit;
@@ -1833,6 +1787,10 @@ init_xfs_fs(void)
1833 if (error) 1787 if (error)
1834 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1835 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1836 vfs_initquota(); 1794 vfs_initquota();
1837 1795
1838 error = register_filesystem(&xfs_fs_type); 1796 error = register_filesystem(&xfs_fs_type);
@@ -1850,6 +1808,8 @@ init_xfs_fs(void)
1850 xfs_filestream_uninit(); 1808 xfs_filestream_uninit();
1851 out_mru_cache_uninit: 1809 out_mru_cache_uninit:
1852 xfs_mru_cache_uninit(); 1810 xfs_mru_cache_uninit();
1811 out_destroy_wq:
1812 xfs_destroy_workqueues();
1853 out_destroy_zones: 1813 out_destroy_zones:
1854 xfs_destroy_zones(); 1814 xfs_destroy_zones();
1855 out: 1815 out:
@@ -1866,6 +1826,7 @@ exit_xfs_fs(void)
1866 xfs_buf_terminate(); 1826 xfs_buf_terminate();
1867 xfs_filestream_uninit(); 1827 xfs_filestream_uninit();
1868 xfs_mru_cache_uninit(); 1828 xfs_mru_cache_uninit();
1829 xfs_destroy_workqueues();
1869 xfs_destroy_zones(); 1830 xfs_destroy_zones();
1870} 1831}
1871 1832
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 9cf35a688f53..e4f9c1b0836c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39#include <linux/kthread.h> 40#include <linux/kthread.h>
40#include <linux/freezer.h> 41#include <linux/freezer.h>
41 42
43struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
44
42/* 45/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 47 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
431 xfs_unmountfs_writesb(mp); 434 xfs_unmountfs_writesb(mp);
432} 435}
433 436
434/* 437static void
435 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 438xfs_syncd_queue_sync(
436 * Doing this has two advantages: 439 struct xfs_mount *mp)
437 * - It saves on stack space, which is tight in certain situations
438 * - It can be used (with care) as a mechanism to avoid deadlocks.
439 * Flushing while allocating in a full filesystem requires both.
440 */
441STATIC void
442xfs_syncd_queue_work(
443 struct xfs_mount *mp,
444 void *data,
445 void (*syncer)(struct xfs_mount *, void *),
446 struct completion *completion)
447{
448 struct xfs_sync_work *work;
449
450 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
451 INIT_LIST_HEAD(&work->w_list);
452 work->w_syncer = syncer;
453 work->w_data = data;
454 work->w_mount = mp;
455 work->w_completion = completion;
456 spin_lock(&mp->m_sync_lock);
457 list_add_tail(&work->w_list, &mp->m_sync_list);
458 spin_unlock(&mp->m_sync_lock);
459 wake_up_process(mp->m_sync_task);
460}
461
462/*
463 * Flush delayed allocate data, attempting to free up reserved space
464 * from existing allocations. At this point a new allocation attempt
465 * has failed with ENOSPC and we are in the process of scratching our
466 * heads, looking about for more room...
467 */
468STATIC void
469xfs_flush_inodes_work(
470 struct xfs_mount *mp,
471 void *arg)
472{
473 struct inode *inode = arg;
474 xfs_sync_data(mp, SYNC_TRYLOCK);
475 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
476 iput(inode);
477}
478
479void
480xfs_flush_inodes(
481 xfs_inode_t *ip)
482{ 440{
483 struct inode *inode = VFS_I(ip); 441 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
484 DECLARE_COMPLETION_ONSTACK(completion); 442 msecs_to_jiffies(xfs_syncd_centisecs * 10));
485
486 igrab(inode);
487 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
488 wait_for_completion(&completion);
489 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
490} 443}
491 444
492/* 445/*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
496 */ 449 */
497STATIC void 450STATIC void
498xfs_sync_worker( 451xfs_sync_worker(
499 struct xfs_mount *mp, 452 struct work_struct *work)
500 void *unused)
501{ 453{
454 struct xfs_mount *mp = container_of(to_delayed_work(work),
455 struct xfs_mount, m_sync_work);
502 int error; 456 int error;
503 457
504 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 458 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
508 error = xfs_fs_log_dummy(mp); 462 error = xfs_fs_log_dummy(mp);
509 else 463 else
510 xfs_log_force(mp, 0); 464 xfs_log_force(mp, 0);
511 xfs_reclaim_inodes(mp, 0);
512 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 465 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
466
467 /* start pushing all the metadata that is currently dirty */
468 xfs_ail_push_all(mp->m_ail);
513 } 469 }
514 mp->m_sync_seq++; 470
515 wake_up(&mp->m_wait_single_sync_task); 471 /* queue us up again */
472 xfs_syncd_queue_sync(mp);
516} 473}
517 474
518STATIC int 475/*
519xfssyncd( 476 * Queue a new inode reclaim pass if there are reclaimable inodes and there
520 void *arg) 477 * isn't a reclaim pass already in progress. By default it runs every 5s based
478 * on the xfs syncd work default of 30s. Perhaps this should have it's own
479 * tunable, but that can be done if this method proves to be ineffective or too
480 * aggressive.
481 */
482static void
483xfs_syncd_queue_reclaim(
484 struct xfs_mount *mp)
521{ 485{
522 struct xfs_mount *mp = arg;
523 long timeleft;
524 xfs_sync_work_t *work, *n;
525 LIST_HEAD (tmp);
526
527 set_freezable();
528 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
529 for (;;) {
530 if (list_empty(&mp->m_sync_list))
531 timeleft = schedule_timeout_interruptible(timeleft);
532 /* swsusp */
533 try_to_freeze();
534 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
535 break;
536 486
537 spin_lock(&mp->m_sync_lock); 487 /*
538 /* 488 * We can have inodes enter reclaim after we've shut down the syncd
539 * We can get woken by laptop mode, to do a sync - 489 * workqueue during unmount, so don't allow reclaim work to be queued
540 * that's the (only!) case where the list would be 490 * during unmount.
541 * empty with time remaining. 491 */
542 */ 492 if (!(mp->m_super->s_flags & MS_ACTIVE))
543 if (!timeleft || list_empty(&mp->m_sync_list)) { 493 return;
544 if (!timeleft)
545 timeleft = xfs_syncd_centisecs *
546 msecs_to_jiffies(10);
547 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
548 list_add_tail(&mp->m_sync_work.w_list,
549 &mp->m_sync_list);
550 }
551 list_splice_init(&mp->m_sync_list, &tmp);
552 spin_unlock(&mp->m_sync_lock);
553 494
554 list_for_each_entry_safe(work, n, &tmp, w_list) { 495 rcu_read_lock();
555 (*work->w_syncer)(mp, work->w_data); 496 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
556 list_del(&work->w_list); 497 queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
557 if (work == &mp->m_sync_work) 498 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
558 continue;
559 if (work->w_completion)
560 complete(work->w_completion);
561 kmem_free(work);
562 }
563 } 499 }
500 rcu_read_unlock();
501}
564 502
565 return 0; 503/*
504 * This is a fast pass over the inode cache to try to get reclaim moving on as
505 * many inodes as possible in a short period of time. It kicks itself every few
506 * seconds, as well as being kicked by the inode cache shrinker when memory
507 * goes low. It scans as quickly as possible avoiding locked inodes or those
508 * already being flushed, and once done schedules a future pass.
509 */
510STATIC void
511xfs_reclaim_worker(
512 struct work_struct *work)
513{
514 struct xfs_mount *mp = container_of(to_delayed_work(work),
515 struct xfs_mount, m_reclaim_work);
516
517 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
518 xfs_syncd_queue_reclaim(mp);
519}
520
521/*
522 * Flush delayed allocate data, attempting to free up reserved space
523 * from existing allocations. At this point a new allocation attempt
524 * has failed with ENOSPC and we are in the process of scratching our
525 * heads, looking about for more room.
526 *
527 * Queue a new data flush if there isn't one already in progress and
528 * wait for completion of the flush. This means that we only ever have one
529 * inode flush in progress no matter how many ENOSPC events are occurring and
530 * so will prevent the system from bogging down due to every concurrent
531 * ENOSPC event scanning all the active inodes in the system for writeback.
532 */
533void
534xfs_flush_inodes(
535 struct xfs_inode *ip)
536{
537 struct xfs_mount *mp = ip->i_mount;
538
539 queue_work(xfs_syncd_wq, &mp->m_flush_work);
540 flush_work_sync(&mp->m_flush_work);
541}
542
543STATIC void
544xfs_flush_worker(
545 struct work_struct *work)
546{
547 struct xfs_mount *mp = container_of(work,
548 struct xfs_mount, m_flush_work);
549
550 xfs_sync_data(mp, SYNC_TRYLOCK);
551 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
566} 552}
567 553
568int 554int
569xfs_syncd_init( 555xfs_syncd_init(
570 struct xfs_mount *mp) 556 struct xfs_mount *mp)
571{ 557{
572 mp->m_sync_work.w_syncer = xfs_sync_worker; 558 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
573 mp->m_sync_work.w_mount = mp; 559 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
574 mp->m_sync_work.w_completion = NULL; 560 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
575 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 561
576 if (IS_ERR(mp->m_sync_task)) 562 xfs_syncd_queue_sync(mp);
577 return -PTR_ERR(mp->m_sync_task); 563 xfs_syncd_queue_reclaim(mp);
564
578 return 0; 565 return 0;
579} 566}
580 567
@@ -582,7 +569,9 @@ void
582xfs_syncd_stop( 569xfs_syncd_stop(
583 struct xfs_mount *mp) 570 struct xfs_mount *mp)
584{ 571{
585 kthread_stop(mp->m_sync_task); 572 cancel_delayed_work_sync(&mp->m_sync_work);
573 cancel_delayed_work_sync(&mp->m_reclaim_work);
574 cancel_work_sync(&mp->m_flush_work);
586} 575}
587 576
588void 577void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
601 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 590 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
602 XFS_ICI_RECLAIM_TAG); 591 XFS_ICI_RECLAIM_TAG);
603 spin_unlock(&ip->i_mount->m_perag_lock); 592 spin_unlock(&ip->i_mount->m_perag_lock);
593
594 /* schedule periodic background inode reclaim */
595 xfs_syncd_queue_reclaim(ip->i_mount);
596
604 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 597 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
605 -1, _RET_IP_); 598 -1, _RET_IP_);
606 } 599 }
@@ -1017,7 +1010,13 @@ xfs_reclaim_inodes(
1017} 1010}
1018 1011
1019/* 1012/*
1020 * Shrinker infrastructure. 1013 * Inode cache shrinker.
1014 *
1015 * When called we make sure that there is a background (fast) inode reclaim in
1016 * progress, while we will throttle the speed of reclaim via doiing synchronous
1017 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1018 * them to be cleaned, which we hope will not be very long due to the
1019 * background walker having already kicked the IO off on those dirty inodes.
1021 */ 1020 */
1022static int 1021static int
1023xfs_reclaim_inode_shrink( 1022xfs_reclaim_inode_shrink(
@@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink(
1032 1031
1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1032 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034 if (nr_to_scan) { 1033 if (nr_to_scan) {
1034 /* kick background reclaimer and push the AIL */
1035 xfs_syncd_queue_reclaim(mp);
1036 xfs_ail_push_all(mp->m_ail);
1037
1035 if (!(gfp_mask & __GFP_FS)) 1038 if (!(gfp_mask & __GFP_FS))
1036 return -1; 1039 return -1;
1037 1040
1038 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1041 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
1042 &nr_to_scan);
1039 /* terminate if we don't exhaust the scan */ 1043 /* terminate if we don't exhaust the scan */
1040 if (nr_to_scan > 0) 1044 if (nr_to_scan > 0)
1041 return -1; 1045 return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 254ee062bd7d..69228aa8605a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -461,12 +461,10 @@ xfs_qm_dqflush_all(
461 struct xfs_quotainfo *q = mp->m_quotainfo; 461 struct xfs_quotainfo *q = mp->m_quotainfo;
462 int recl; 462 int recl;
463 struct xfs_dquot *dqp; 463 struct xfs_dquot *dqp;
464 int niters;
465 int error; 464 int error;
466 465
467 if (!q) 466 if (!q)
468 return 0; 467 return 0;
469 niters = 0;
470again: 468again:
471 mutex_lock(&q->qi_dqlist_lock); 469 mutex_lock(&q->qi_dqlist_lock);
472 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 470 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
@@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs(
1314{ 1312{
1315 xfs_buf_t *bp; 1313 xfs_buf_t *bp;
1316 int error; 1314 int error;
1317 int notcommitted;
1318 int incr;
1319 int type; 1315 int type;
1320 1316
1321 ASSERT(blkcnt > 0); 1317 ASSERT(blkcnt > 0);
1322 notcommitted = 0;
1323 incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1324 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1325 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : 1318 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1326 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); 1319 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1327 error = 0; 1320 error = 0;
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index c9446f1c726d..567b29b9f1b3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone;
65 * block in the dquot/xqm code. 65 * block in the dquot/xqm code.
66 */ 66 */
67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
68/*
69 * When doing a quotacheck, we log dquot clusters of this many FSBs at most
70 * in a single transaction. We don't want to ask for too huge a log reservation.
71 */
72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
73 68
74typedef xfs_dqhash_t xfs_dqlist_t; 69typedef xfs_dqhash_t xfs_dqlist_t;
75 70
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 0d62a07b7fd8..2dadb15d5ca9 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -313,14 +313,12 @@ xfs_qm_scall_quotaon(
313{ 313{
314 int error; 314 int error;
315 uint qf; 315 uint qf;
316 uint accflags;
317 __int64_t sbflags; 316 __int64_t sbflags;
318 317
319 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 318 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
320 /* 319 /*
321 * Switching on quota accounting must be done at mount time. 320 * Switching on quota accounting must be done at mount time.
322 */ 321 */
323 accflags = flags & XFS_ALL_QUOTA_ACCT;
324 flags &= ~(XFS_ALL_QUOTA_ACCT); 322 flags &= ~(XFS_ALL_QUOTA_ACCT);
325 323
326 sbflags = 0; 324 sbflags = 0;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4bc3c649aee4..27d64d752eab 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2395,17 +2395,33 @@ xfs_free_extent(
2395 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2395 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2396 args.tp = tp; 2396 args.tp = tp;
2397 args.mp = tp->t_mountp; 2397 args.mp = tp->t_mountp;
2398
2399 /*
2400 * validate that the block number is legal - the enables us to detect
2401 * and handle a silent filesystem corruption rather than crashing.
2402 */
2398 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2403 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2399 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2404 if (args.agno >= args.mp->m_sb.sb_agcount)
2405 return EFSCORRUPTED;
2406
2400 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2407 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2408 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2409 return EFSCORRUPTED;
2410
2401 args.pag = xfs_perag_get(args.mp, args.agno); 2411 args.pag = xfs_perag_get(args.mp, args.agno);
2402 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2412 ASSERT(args.pag);
2413
2414 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2415 if (error)
2403 goto error0; 2416 goto error0;
2404#ifdef DEBUG 2417
2405 ASSERT(args.agbp != NULL); 2418 /* validate the extent size is legal now we have the agf locked */
2406 ASSERT((args.agbno + len) <= 2419 if (args.agbno + len >
2407 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2420 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2408#endif 2421 error = EFSCORRUPTED;
2422 goto error0;
2423 }
2424
2409 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2410error0: 2426error0:
2411 xfs_perag_put(args.pag); 2427 xfs_perag_put(args.pag);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 46cc40131d4a..576fdfe81d60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -198,6 +198,41 @@ xfs_inode_item_size(
198} 198}
199 199
200/* 200/*
201 * xfs_inode_item_format_extents - convert in-core extents to on-disk form
202 *
203 * For either the data or attr fork in extent format, we need to endian convert
204 * the in-core extent as we place them into the on-disk inode. In this case, we
205 * need to do this conversion before we write the extents into the log. Because
206 * we don't have the disk inode to write into here, we allocate a buffer and
207 * format the extents into it via xfs_iextents_copy(). We free the buffer in
208 * the unlock routine after the copy for the log has been made.
209 *
210 * In the case of the data fork, the in-core and on-disk fork sizes can be
211 * different due to delayed allocation extents. We only log on-disk extents
212 * here, so always use the physical fork size to determine the size of the
213 * buffer we need to allocate.
214 */
215STATIC void
216xfs_inode_item_format_extents(
217 struct xfs_inode *ip,
218 struct xfs_log_iovec *vecp,
219 int whichfork,
220 int type)
221{
222 xfs_bmbt_rec_t *ext_buffer;
223
224 ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
225 if (whichfork == XFS_DATA_FORK)
226 ip->i_itemp->ili_extents_buf = ext_buffer;
227 else
228 ip->i_itemp->ili_aextents_buf = ext_buffer;
229
230 vecp->i_addr = ext_buffer;
231 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
232 vecp->i_type = type;
233}
234
235/*
201 * This is called to fill in the vector of log iovecs for the 236 * This is called to fill in the vector of log iovecs for the
202 * given inode log item. It fills the first item with an inode 237 * given inode log item. It fills the first item with an inode
203 * log format structure, the second with the on-disk inode structure, 238 * log format structure, the second with the on-disk inode structure,
@@ -213,7 +248,6 @@ xfs_inode_item_format(
213 struct xfs_inode *ip = iip->ili_inode; 248 struct xfs_inode *ip = iip->ili_inode;
214 uint nvecs; 249 uint nvecs;
215 size_t data_bytes; 250 size_t data_bytes;
216 xfs_bmbt_rec_t *ext_buffer;
217 xfs_mount_t *mp; 251 xfs_mount_t *mp;
218 252
219 vecp->i_addr = &iip->ili_format; 253 vecp->i_addr = &iip->ili_format;
@@ -320,22 +354,8 @@ xfs_inode_item_format(
320 } else 354 } else
321#endif 355#endif
322 { 356 {
323 /* 357 xfs_inode_item_format_extents(ip, vecp,
324 * There are delayed allocation extents 358 XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
325 * in the inode, or we need to convert
326 * the extents to on disk format.
327 * Use xfs_iextents_copy()
328 * to copy only the real extents into
329 * a separate buffer. We'll free the
330 * buffer in the unlock routine.
331 */
332 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
333 KM_SLEEP);
334 iip->ili_extents_buf = ext_buffer;
335 vecp->i_addr = ext_buffer;
336 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
337 XFS_DATA_FORK);
338 vecp->i_type = XLOG_REG_TYPE_IEXT;
339 } 359 }
340 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
341 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -445,19 +465,12 @@ xfs_inode_item_format(
445 */ 465 */
446 vecp->i_addr = ip->i_afp->if_u1.if_extents; 466 vecp->i_addr = ip->i_afp->if_u1.if_extents;
447 vecp->i_len = ip->i_afp->if_bytes; 467 vecp->i_len = ip->i_afp->if_bytes;
468 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
448#else 469#else
449 ASSERT(iip->ili_aextents_buf == NULL); 470 ASSERT(iip->ili_aextents_buf == NULL);
450 /* 471 xfs_inode_item_format_extents(ip, vecp,
451 * Need to endian flip before logging 472 XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
452 */
453 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
454 KM_SLEEP);
455 iip->ili_aextents_buf = ext_buffer;
456 vecp->i_addr = ext_buffer;
457 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
458 XFS_ATTR_FORK);
459#endif 473#endif
460 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
461 iip->ili_format.ilf_asize = vecp->i_len; 474 iip->ili_format.ilf_asize = vecp->i_len;
462 vecp++; 475 vecp++;
463 nvecs++; 476 nvecs++;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index dc1882adaf54..751e94fe1f77 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -204,7 +204,6 @@ xfs_bulkstat(
204 xfs_agi_t *agi; /* agi header data */ 204 xfs_agi_t *agi; /* agi header data */
205 xfs_agino_t agino; /* inode # in allocation group */ 205 xfs_agino_t agino; /* inode # in allocation group */
206 xfs_agnumber_t agno; /* allocation group number */ 206 xfs_agnumber_t agno; /* allocation group number */
207 xfs_daddr_t bno; /* inode cluster start daddr */
208 int chunkidx; /* current index into inode chunk */ 207 int chunkidx; /* current index into inode chunk */
209 int clustidx; /* current index into inode cluster */ 208 int clustidx; /* current index into inode cluster */
210 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 209 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
@@ -463,7 +462,6 @@ xfs_bulkstat(
463 mp->m_sb.sb_inopblog); 462 mp->m_sb.sb_inopblog);
464 } 463 }
465 ino = XFS_AGINO_TO_INO(mp, agno, agino); 464 ino = XFS_AGINO_TO_INO(mp, agno, agino);
466 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
467 /* 465 /*
468 * Skip if this inode is free. 466 * Skip if this inode is free.
469 */ 467 */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 25efa9b8a602..b612ce4520ae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
761 break; 761 break;
762 case XLOG_STATE_COVER_NEED: 762 case XLOG_STATE_COVER_NEED:
763 case XLOG_STATE_COVER_NEED2: 763 case XLOG_STATE_COVER_NEED2:
764 if (!xfs_trans_ail_tail(log->l_ailp) && 764 if (!xfs_ail_min_lsn(log->l_ailp) &&
765 xlog_iclogs_empty(log)) { 765 xlog_iclogs_empty(log)) {
766 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 766 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
767 log->l_covered_state = XLOG_STATE_COVER_DONE; 767 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -801,7 +801,7 @@ xlog_assign_tail_lsn(
801 xfs_lsn_t tail_lsn; 801 xfs_lsn_t tail_lsn;
802 struct log *log = mp->m_log; 802 struct log *log = mp->m_log;
803 803
804 tail_lsn = xfs_trans_ail_tail(mp->m_ail); 804 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
805 if (!tail_lsn) 805 if (!tail_lsn)
806 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 806 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
807 807
@@ -1239,7 +1239,7 @@ xlog_grant_push_ail(
1239 * the filesystem is shutting down. 1239 * the filesystem is shutting down.
1240 */ 1240 */
1241 if (!XLOG_FORCED_SHUTDOWN(log)) 1241 if (!XLOG_FORCED_SHUTDOWN(log))
1242 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1242 xfs_ail_push(log->l_ailp, threshold_lsn);
1243} 1243}
1244 1244
1245/* 1245/*
@@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr(
3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3408} 3408}
3409 3409
3410/*
3411 * Check to make sure the grant write head didn't just over lap the tail. If
3412 * the cycles are the same, we can't be overlapping. Otherwise, make sure that
3413 * the cycles differ by exactly one and check the byte count.
3414 *
3415 * This check is run unlocked, so can give false positives. Rather than assert
3416 * on failures, use a warn-once flag and a panic tag to allow the admin to
3417 * determine if they want to panic the machine when such an error occurs. For
3418 * debug kernels this will have the same effect as using an assert but, unlinke
3419 * an assert, it can be turned off at runtime.
3420 */
3410STATIC void 3421STATIC void
3411xlog_verify_grant_tail( 3422xlog_verify_grant_tail(
3412 struct log *log) 3423 struct log *log)
@@ -3414,17 +3425,22 @@ xlog_verify_grant_tail(
3414 int tail_cycle, tail_blocks; 3425 int tail_cycle, tail_blocks;
3415 int cycle, space; 3426 int cycle, space;
3416 3427
3417 /*
3418 * Check to make sure the grant write head didn't just over lap the
3419 * tail. If the cycles are the same, we can't be overlapping.
3420 * Otherwise, make sure that the cycles differ by exactly one and
3421 * check the byte count.
3422 */
3423 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3428 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
3424 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3429 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3425 if (tail_cycle != cycle) { 3430 if (tail_cycle != cycle) {
3426 ASSERT(cycle - 1 == tail_cycle); 3431 if (cycle - 1 != tail_cycle &&
3427 ASSERT(space <= BBTOB(tail_blocks)); 3432 !(log->l_flags & XLOG_TAIL_WARN)) {
3433 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3434 "%s: cycle - 1 != tail_cycle", __func__);
3435 log->l_flags |= XLOG_TAIL_WARN;
3436 }
3437
3438 if (space > BBTOB(tail_blocks) &&
3439 !(log->l_flags & XLOG_TAIL_WARN)) {
3440 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3441 "%s: space > BBTOB(tail_blocks)", __func__);
3442 log->l_flags |= XLOG_TAIL_WARN;
3443 }
3428 } 3444 }
3429} 3445}
3430 3446
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ffae692c9832..5864850e9e34 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i)
144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ 144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being 145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
147 148
148#ifdef __KERNEL__ 149#ifdef __KERNEL__
149/* 150/*
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a62e8971539d..19af0ab0d0c6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -203,12 +203,9 @@ typedef struct xfs_mount {
203 struct mutex m_icsb_mutex; /* balancer sync lock */ 203 struct mutex m_icsb_mutex; /* balancer sync lock */
204#endif 204#endif
205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
206 struct task_struct *m_sync_task; /* generalised sync thread */ 206 struct delayed_work m_sync_work; /* background sync work */
207 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 207 struct delayed_work m_reclaim_work; /* background inode reclaim */
208 struct list_head m_sync_list; /* sync thread work item list */ 208 struct work_struct m_flush_work; /* background inode flush */
209 spinlock_t m_sync_lock; /* work item list lock */
210 int m_sync_seq; /* sync thread generation no. */
211 wait_queue_head_t m_wait_single_sync_task;
212 __int64_t m_update_flags; /* sb flags we need to update 209 __int64_t m_update_flags; /* sb flags we need to update
213 on the next remount,rw */ 210 on the next remount,rw */
214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 12aff9584e29..acdb92f14d51 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,74 +28,138 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); 31struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
32STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 32
36#ifdef DEBUG 33#ifdef DEBUG
37STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); 34/*
38#else 35 * Check that the list is sorted as it should be.
36 */
37STATIC void
38xfs_ail_check(
39 struct xfs_ail *ailp,
40 xfs_log_item_t *lip)
41{
42 xfs_log_item_t *prev_lip;
43
44 if (list_empty(&ailp->xa_ail))
45 return;
46
47 /*
48 * Check the next and previous entries are valid.
49 */
50 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
51 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
52 if (&prev_lip->li_ail != &ailp->xa_ail)
53 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
54
55 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
56 if (&prev_lip->li_ail != &ailp->xa_ail)
57 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
58
59
60#ifdef XFS_TRANS_DEBUG
61 /*
62 * Walk the list checking lsn ordering, and that every entry has the
63 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
64 * when specifically debugging the transaction subsystem.
65 */
66 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
67 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
68 if (&prev_lip->li_ail != &ailp->xa_ail)
69 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
70 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
71 prev_lip = lip;
72 }
73#endif /* XFS_TRANS_DEBUG */
74}
75#else /* !DEBUG */
39#define xfs_ail_check(a,l) 76#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 77#endif /* DEBUG */
41 78
79/*
80 * Return a pointer to the first item in the AIL. If the AIL is empty, then
81 * return NULL.
82 */
83static xfs_log_item_t *
84xfs_ail_min(
85 struct xfs_ail *ailp)
86{
87 if (list_empty(&ailp->xa_ail))
88 return NULL;
89
90 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
91}
92
93 /*
94 * Return a pointer to the last item in the AIL. If the AIL is empty, then
95 * return NULL.
96 */
97static xfs_log_item_t *
98xfs_ail_max(
99 struct xfs_ail *ailp)
100{
101 if (list_empty(&ailp->xa_ail))
102 return NULL;
103
104 return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
105}
106
107/*
108 * Return a pointer to the item which follows the given item in the AIL. If
109 * the given item is the last item in the list, then return NULL.
110 */
111static xfs_log_item_t *
112xfs_ail_next(
113 struct xfs_ail *ailp,
114 xfs_log_item_t *lip)
115{
116 if (lip->li_ail.next == &ailp->xa_ail)
117 return NULL;
118
119 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
120}
42 121
43/* 122/*
44 * This is called by the log manager code to determine the LSN 123 * This is called by the log manager code to determine the LSN of the tail of
45 * of the tail of the log. This is exactly the LSN of the first 124 * the log. This is exactly the LSN of the first item in the AIL. If the AIL
46 * item in the AIL. If the AIL is empty, then this function 125 * is empty, then this function returns 0.
47 * returns 0.
48 * 126 *
49 * We need the AIL lock in order to get a coherent read of the 127 * We need the AIL lock in order to get a coherent read of the lsn of the last
50 * lsn of the last item in the AIL. 128 * item in the AIL.
51 */ 129 */
52xfs_lsn_t 130xfs_lsn_t
53xfs_trans_ail_tail( 131xfs_ail_min_lsn(
54 struct xfs_ail *ailp) 132 struct xfs_ail *ailp)
55{ 133{
56 xfs_lsn_t lsn; 134 xfs_lsn_t lsn = 0;
57 xfs_log_item_t *lip; 135 xfs_log_item_t *lip;
58 136
59 spin_lock(&ailp->xa_lock); 137 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(ailp); 138 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 139 if (lip)
62 lsn = (xfs_lsn_t)0;
63 } else {
64 lsn = lip->li_lsn; 140 lsn = lip->li_lsn;
65 }
66 spin_unlock(&ailp->xa_lock); 141 spin_unlock(&ailp->xa_lock);
67 142
68 return lsn; 143 return lsn;
69} 144}
70 145
71/* 146/*
72 * xfs_trans_push_ail 147 * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
73 *
74 * This routine is called to move the tail of the AIL forward. It does this by
75 * trying to flush items in the AIL whose lsns are below the given
76 * threshold_lsn.
77 *
78 * the push is run asynchronously in a separate thread, so we return the tail
79 * of the log right now instead of the tail after the push. This means we will
80 * either continue right away, or we will sleep waiting on the async thread to
81 * do its work.
82 *
83 * We do this unlocked - we only need to know whether there is anything in the
84 * AIL at the time we are called. We don't need to access the contents of
85 * any of the objects, so the lock is not needed.
86 */ 148 */
87void 149static xfs_lsn_t
88xfs_trans_ail_push( 150xfs_ail_max_lsn(
89 struct xfs_ail *ailp, 151 struct xfs_ail *ailp)
90 xfs_lsn_t threshold_lsn)
91{ 152{
92 xfs_log_item_t *lip; 153 xfs_lsn_t lsn = 0;
154 xfs_log_item_t *lip;
93 155
94 lip = xfs_ail_min(ailp); 156 spin_lock(&ailp->xa_lock);
95 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { 157 lip = xfs_ail_max(ailp);
96 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) 158 if (lip)
97 xfsaild_wakeup(ailp, threshold_lsn); 159 lsn = lip->li_lsn;
98 } 160 spin_unlock(&ailp->xa_lock);
161
162 return lsn;
99} 163}
100 164
101/* 165/*
@@ -236,16 +300,57 @@ out:
236} 300}
237 301
238/* 302/*
239 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of 303 * splice the log item list into the AIL at the given LSN.
240 * zero indicates that the caller should sleep until woken.
241 */ 304 */
242long 305static void
243xfsaild_push( 306xfs_ail_splice(
244 struct xfs_ail *ailp, 307 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 308 struct list_head *list,
309 xfs_lsn_t lsn)
246{ 310{
247 long tout = 0; 311 xfs_log_item_t *next_lip;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 312
313 /* If the list is empty, just insert the item. */
314 if (list_empty(&ailp->xa_ail)) {
315 list_splice(list, &ailp->xa_ail);
316 return;
317 }
318
319 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
320 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
321 break;
322 }
323
324 ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
325 XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
326
327 list_splice_init(list, &next_lip->li_ail);
328}
329
330/*
331 * Delete the given item from the AIL. Return a pointer to the item.
332 */
333static void
334xfs_ail_delete(
335 struct xfs_ail *ailp,
336 xfs_log_item_t *lip)
337{
338 xfs_ail_check(ailp, lip);
339 list_del(&lip->li_ail);
340 xfs_trans_ail_cursor_clear(ailp, lip);
341}
342
343/*
344 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
345 * to run at a later time if there is more work to do to complete the push.
346 */
347STATIC void
348xfs_ail_worker(
349 struct work_struct *work)
350{
351 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work);
353 long tout;
249 xfs_lsn_t target = ailp->xa_target; 354 xfs_lsn_t target = ailp->xa_target;
250 xfs_lsn_t lsn; 355 xfs_lsn_t lsn;
251 xfs_log_item_t *lip; 356 xfs_log_item_t *lip;
@@ -256,15 +361,15 @@ xfsaild_push(
256 361
257 spin_lock(&ailp->xa_lock); 362 spin_lock(&ailp->xa_lock);
258 xfs_trans_ail_cursor_init(ailp, cur); 363 xfs_trans_ail_cursor_init(ailp, cur);
259 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); 364 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
260 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 365 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
261 /* 366 /*
262 * AIL is empty or our push has reached the end. 367 * AIL is empty or our push has reached the end.
263 */ 368 */
264 xfs_trans_ail_cursor_done(ailp, cur); 369 xfs_trans_ail_cursor_done(ailp, cur);
265 spin_unlock(&ailp->xa_lock); 370 spin_unlock(&ailp->xa_lock);
266 *last_lsn = 0; 371 ailp->xa_last_pushed_lsn = 0;
267 return tout; 372 return;
268 } 373 }
269 374
270 XFS_STATS_INC(xs_push_ail); 375 XFS_STATS_INC(xs_push_ail);
@@ -301,13 +406,13 @@ xfsaild_push(
301 case XFS_ITEM_SUCCESS: 406 case XFS_ITEM_SUCCESS:
302 XFS_STATS_INC(xs_push_ail_success); 407 XFS_STATS_INC(xs_push_ail_success);
303 IOP_PUSH(lip); 408 IOP_PUSH(lip);
304 last_pushed_lsn = lsn; 409 ailp->xa_last_pushed_lsn = lsn;
305 break; 410 break;
306 411
307 case XFS_ITEM_PUSHBUF: 412 case XFS_ITEM_PUSHBUF:
308 XFS_STATS_INC(xs_push_ail_pushbuf); 413 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 414 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 415 ailp->xa_last_pushed_lsn = lsn;
311 push_xfsbufd = 1; 416 push_xfsbufd = 1;
312 break; 417 break;
313 418
@@ -319,7 +424,7 @@ xfsaild_push(
319 424
320 case XFS_ITEM_LOCKED: 425 case XFS_ITEM_LOCKED:
321 XFS_STATS_INC(xs_push_ail_locked); 426 XFS_STATS_INC(xs_push_ail_locked);
322 last_pushed_lsn = lsn; 427 ailp->xa_last_pushed_lsn = lsn;
323 stuck++; 428 stuck++;
324 break; 429 break;
325 430
@@ -374,9 +479,23 @@ xfsaild_push(
374 wake_up_process(mp->m_ddev_targp->bt_task); 479 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 480 }
376 481
482 /* assume we have more work to do in a short while */
483 tout = 10;
377 if (!count) { 484 if (!count) {
378 /* We're past our target or empty, so idle */ 485 /* We're past our target or empty, so idle */
379 last_pushed_lsn = 0; 486 ailp->xa_last_pushed_lsn = 0;
487
488 /*
489 * Check for an updated push target before clearing the
490 * XFS_AIL_PUSHING_BIT. If the target changed, we've got more
491 * work to do. Wait a bit longer before starting that work.
492 */
493 smp_rmb();
494 if (ailp->xa_target == target) {
495 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
496 return;
497 }
498 tout = 50;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 499 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 500 /*
382 * We reached the target so wait a bit longer for I/O to 501 * We reached the target so wait a bit longer for I/O to
@@ -384,7 +503,7 @@ xfsaild_push(
384 * start the next scan from the start of the AIL. 503 * start the next scan from the start of the AIL.
385 */ 504 */
386 tout = 50; 505 tout = 50;
387 last_pushed_lsn = 0; 506 ailp->xa_last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 507 } else if ((stuck * 100) / count > 90) {
389 /* 508 /*
390 * Either there is a lot of contention on the AIL or we 509 * Either there is a lot of contention on the AIL or we
@@ -396,14 +515,61 @@ xfsaild_push(
396 * continuing from where we were. 515 * continuing from where we were.
397 */ 516 */
398 tout = 20; 517 tout = 20;
399 } else {
400 /* more to do, but wait a short while before continuing */
401 tout = 10;
402 } 518 }
403 *last_lsn = last_pushed_lsn; 519
404 return tout; 520 /* There is more to do, requeue us. */
521 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
522 msecs_to_jiffies(tout));
523}
524
525/*
526 * This routine is called to move the tail of the AIL forward. It does this by
527 * trying to flush items in the AIL whose lsns are below the given
528 * threshold_lsn.
529 *
530 * The push is run asynchronously in a workqueue, which means the caller needs
531 * to handle waiting on the async flush for space to become available.
532 * We don't want to interrupt any push that is in progress, hence we only queue
533 * work if we set the pushing bit approriately.
534 *
535 * We do this unlocked - we only need to know whether there is anything in the
536 * AIL at the time we are called. We don't need to access the contents of
537 * any of the objects, so the lock is not needed.
538 */
539void
540xfs_ail_push(
541 struct xfs_ail *ailp,
542 xfs_lsn_t threshold_lsn)
543{
544 xfs_log_item_t *lip;
545
546 lip = xfs_ail_min(ailp);
547 if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
548 XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
549 return;
550
551 /*
552 * Ensure that the new target is noticed in push code before it clears
553 * the XFS_AIL_PUSHING_BIT.
554 */
555 smp_wmb();
556 ailp->xa_target = threshold_lsn;
557 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
558 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
405} 559}
406 560
561/*
562 * Push out all items in the AIL immediately
563 */
564void
565xfs_ail_push_all(
566 struct xfs_ail *ailp)
567{
568 xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp);
569
570 if (threshold_lsn)
571 xfs_ail_push(ailp, threshold_lsn);
572}
407 573
408/* 574/*
409 * This is to be called when an item is unlocked that may have 575 * This is to be called when an item is unlocked that may have
@@ -615,7 +781,6 @@ xfs_trans_ail_init(
615 xfs_mount_t *mp) 781 xfs_mount_t *mp)
616{ 782{
617 struct xfs_ail *ailp; 783 struct xfs_ail *ailp;
618 int error;
619 784
620 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); 785 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
621 if (!ailp) 786 if (!ailp)
@@ -624,15 +789,9 @@ xfs_trans_ail_init(
624 ailp->xa_mount = mp; 789 ailp->xa_mount = mp;
625 INIT_LIST_HEAD(&ailp->xa_ail); 790 INIT_LIST_HEAD(&ailp->xa_ail);
626 spin_lock_init(&ailp->xa_lock); 791 spin_lock_init(&ailp->xa_lock);
627 error = xfsaild_start(ailp); 792 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
628 if (error)
629 goto out_free_ailp;
630 mp->m_ail = ailp; 793 mp->m_ail = ailp;
631 return 0; 794 return 0;
632
633out_free_ailp:
634 kmem_free(ailp);
635 return error;
636} 795}
637 796
638void 797void
@@ -641,124 +800,6 @@ xfs_trans_ail_destroy(
641{ 800{
642 struct xfs_ail *ailp = mp->m_ail; 801 struct xfs_ail *ailp = mp->m_ail;
643 802
644 xfsaild_stop(ailp); 803 cancel_delayed_work_sync(&ailp->xa_work);
645 kmem_free(ailp); 804 kmem_free(ailp);
646} 805}
647
648/*
649 * splice the log item list into the AIL at the given LSN.
650 */
651STATIC void
652xfs_ail_splice(
653 struct xfs_ail *ailp,
654 struct list_head *list,
655 xfs_lsn_t lsn)
656{
657 xfs_log_item_t *next_lip;
658
659 /*
660 * If the list is empty, just insert the item.
661 */
662 if (list_empty(&ailp->xa_ail)) {
663 list_splice(list, &ailp->xa_ail);
664 return;
665 }
666
667 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
668 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
669 break;
670 }
671
672 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
673 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
674
675 list_splice_init(list, &next_lip->li_ail);
676 return;
677}
678
679/*
680 * Delete the given item from the AIL. Return a pointer to the item.
681 */
682STATIC void
683xfs_ail_delete(
684 struct xfs_ail *ailp,
685 xfs_log_item_t *lip)
686{
687 xfs_ail_check(ailp, lip);
688 list_del(&lip->li_ail);
689 xfs_trans_ail_cursor_clear(ailp, lip);
690}
691
692/*
693 * Return a pointer to the first item in the AIL.
694 * If the AIL is empty, then return NULL.
695 */
696STATIC xfs_log_item_t *
697xfs_ail_min(
698 struct xfs_ail *ailp)
699{
700 if (list_empty(&ailp->xa_ail))
701 return NULL;
702
703 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
704}
705
706/*
707 * Return a pointer to the item which follows
708 * the given item in the AIL. If the given item
709 * is the last item in the list, then return NULL.
710 */
711STATIC xfs_log_item_t *
712xfs_ail_next(
713 struct xfs_ail *ailp,
714 xfs_log_item_t *lip)
715{
716 if (lip->li_ail.next == &ailp->xa_ail)
717 return NULL;
718
719 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
720}
721
722#ifdef DEBUG
723/*
724 * Check that the list is sorted as it should be.
725 */
726STATIC void
727xfs_ail_check(
728 struct xfs_ail *ailp,
729 xfs_log_item_t *lip)
730{
731 xfs_log_item_t *prev_lip;
732
733 if (list_empty(&ailp->xa_ail))
734 return;
735
736 /*
737 * Check the next and previous entries are valid.
738 */
739 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
740 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
741 if (&prev_lip->li_ail != &ailp->xa_ail)
742 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
743
744 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
745 if (&prev_lip->li_ail != &ailp->xa_ail)
746 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
747
748
749#ifdef XFS_TRANS_DEBUG
750 /*
751 * Walk the list checking lsn ordering, and that every entry has the
752 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
753 * when specifically debugging the transaction subsystem.
754 */
755 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
756 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
757 if (&prev_lip->li_ail != &ailp->xa_ail)
758 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
759 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
760 prev_lip = lip;
761 }
762#endif /* XFS_TRANS_DEBUG */
763}
764#endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 35162c238fa3..6b164e9e9a1f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,16 +65,22 @@ struct xfs_ail_cursor {
65struct xfs_ail { 65struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct list_head xa_ail; 67 struct list_head xa_ail;
68 uint xa_gen;
69 struct task_struct *xa_task;
70 xfs_lsn_t xa_target; 68 xfs_lsn_t xa_target;
71 struct xfs_ail_cursor xa_cursors; 69 struct xfs_ail_cursor xa_cursors;
72 spinlock_t xa_lock; 70 spinlock_t xa_lock;
71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn;
73 unsigned long xa_flags;
73}; 74};
74 75
76#define XFS_AIL_PUSHING_BIT 0
77
75/* 78/*
76 * From xfs_trans_ail.c 79 * From xfs_trans_ail.c
77 */ 80 */
81
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83
78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
79 struct xfs_log_item **log_items, int nr_items, 85 struct xfs_log_item **log_items, int nr_items,
80 xfs_lsn_t lsn) __releases(ailp->xa_lock); 86 xfs_lsn_t lsn) __releases(ailp->xa_lock);
@@ -98,12 +104,13 @@ xfs_trans_ail_delete(
98 xfs_trans_ail_delete_bulk(ailp, &lip, 1); 104 xfs_trans_ail_delete_bulk(ailp, &lip, 1);
99} 105}
100 106
101void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 107void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
108void xfs_ail_push_all(struct xfs_ail *);
109xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
110
102void xfs_trans_unlocked_item(struct xfs_ail *, 111void xfs_trans_unlocked_item(struct xfs_ail *,
103 xfs_log_item_t *); 112 xfs_log_item_t *);
104 113
105xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
106
107struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 114struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
108 struct xfs_ail_cursor *cur, 115 struct xfs_ail_cursor *cur,
109 xfs_lsn_t lsn); 116 xfs_lsn_t lsn);
@@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
112void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 119void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
113 struct xfs_ail_cursor *cur); 120 struct xfs_ail_cursor *cur);
114 121
115long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
116void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
117int xfsaild_start(struct xfs_ail *);
118void xfsaild_stop(struct xfs_ail *);
119
120#if BITS_PER_LONG != 64 122#if BITS_PER_LONG != 64
121static inline void 123static inline void
122xfs_trans_ail_copy_lsn( 124xfs_trans_ail_copy_lsn(