aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <jaxboe@fusionio.com>2011-07-01 10:17:13 -0400
committerJens Axboe <jaxboe@fusionio.com>2011-07-01 10:17:13 -0400
commit04bf7869ca0fd12009aee301cac2264a36df4d98 (patch)
tree66cb81ebf8b76560a31433c2c493dc430c914af9 /fs
parentd2f31a5fd60d168b00fc4f7617b68a1287b21e90 (diff)
parent7b28afe01ab6ffb5f152f47831b44933facd2328 (diff)
Merge branch 'for-linus' into for-3.1/core
Conflicts: block/blk-throttle.c block/cfq-iosched.c Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c8
-rw-r--r--fs/afs/fsclient.c3
-rw-r--r--fs/afs/inode.c10
-rw-r--r--fs/afs/super.c74
-rw-r--r--fs/afs/write.c21
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/block_dev.c18
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c32
-rw-r--r--fs/btrfs/ctree.h38
-rw-r--r--fs/btrfs/delayed-inode.c40
-rw-r--r--fs/btrfs/delayed-inode.h5
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/extent-tree.c162
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c10
-rw-r--r--fs/btrfs/free-space-cache.c231
-rw-r--r--fs/btrfs/inode-map.c34
-rw-r--r--fs/btrfs/inode.c266
-rw-r--r--fs/btrfs/ioctl.c51
-rw-r--r--fs/btrfs/relocation.c48
-rw-r--r--fs/btrfs/scrub.c192
-rw-r--r--fs/btrfs/super.c10
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c377
-rw-r--r--fs/btrfs/transaction.h29
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c35
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/ioctl.c6
-rw-r--r--fs/ceph/locks.c29
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/xattr.c6
-rw-r--r--fs/cifs/Kconfig5
-rw-r--r--fs/cifs/cache.c6
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsencrypt.c2
-rw-r--r--fs/cifs/cifsfs.c192
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/connect.c117
-rw-r--r--fs/cifs/fscache.c51
-rw-r--r--fs/cifs/smbencrypt.c6
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/dcookies.c3
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext4/ext4_extents.h9
-rw-r--r--fs/ext4/extents.c42
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/mballoc.c8
-rw-r--r--fs/ext4/move_extent.c10
-rw-r--r--fs/ext4/super.c15
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/glock.c9
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd2/checkpoint.c28
-rw-r--r--fs/jbd2/commit.c33
-rw-r--r--fs/jbd2/journal.c91
-rw-r--r--fs/jbd2/transaction.c69
-rw-r--r--fs/jfs/file.c6
-rw-r--r--fs/jfs/jfs_imap.c12
-rw-r--r--fs/jfs/jfs_incore.h3
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/resize.c2
-rw-r--r--fs/lockd/clntproc.c8
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/namei.c39
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/internal.h11
-rw-r--r--fs/nfs/nfs4filelayout.c21
-rw-r--r--fs/nfs/nfs4proc.c45
-rw-r--r--fs/nfs/nfs4xdr.c26
-rw-r--r--fs/nfs/objlayout/objio_osd.c4
-rw-r--r--fs/nfs/objlayout/objlayout.c2
-rw-r--r--fs/nfs/pagelist.c3
-rw-r--r--fs/nfs/pnfs.c44
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/pnfs_dev.c17
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfsctl.c19
-rw-r--r--fs/nfsd/vfs.c19
-rw-r--r--fs/nilfs2/btree.c39
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/omfs/file.c1
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/namespaces.c9
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/proc/root.c11
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/mount.c37
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/timerfd.c5
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/journal.c1
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/recovery.c164
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/shrinker.c6
-rw-r--r--fs/ubifs/super.c178
-rw-r--r--fs/ubifs/tnc.c9
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c75
-rw-r--r--fs/xfs/xfs_log.c11
118 files changed, 2016 insertions, 1662 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 20c106f24927..1b0b19550015 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -584,11 +584,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
584 584
585success: 585success:
586 d_add(dentry, inode); 586 d_add(dentry, inode);
587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", 587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
588 fid.vnode, 588 fid.vnode,
589 fid.unique, 589 fid.unique,
590 dentry->d_inode->i_ino, 590 dentry->d_inode->i_ino,
591 (unsigned long long)dentry->d_inode->i_version); 591 dentry->d_inode->i_generation);
592 592
593 return NULL; 593 return NULL;
594} 594}
@@ -671,10 +671,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
671 * been deleted and replaced, and the original vnode ID has 671 * been deleted and replaced, and the original vnode ID has
672 * been reused */ 672 * been reused */
673 if (fid.unique != vnode->fid.unique) { 673 if (fid.unique != vnode->fid.unique) {
674 _debug("%s: file deleted (uq %u -> %u I:%llu)", 674 _debug("%s: file deleted (uq %u -> %u I:%u)",
675 dentry->d_name.name, fid.unique, 675 dentry->d_name.name, fid.unique,
676 vnode->fid.unique, 676 vnode->fid.unique,
677 (unsigned long long)dentry->d_inode->i_version); 677 dentry->d_inode->i_generation);
678 spin_lock(&vnode->lock); 678 spin_lock(&vnode->lock);
679 set_bit(AFS_VNODE_DELETED, &vnode->flags); 679 set_bit(AFS_VNODE_DELETED, &vnode->flags);
680 spin_unlock(&vnode->lock); 680 spin_unlock(&vnode->lock);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4bd0218473a9..346e3289abd7 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -89,7 +89,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
89 i_size_write(&vnode->vfs_inode, size); 89 i_size_write(&vnode->vfs_inode, size);
90 vnode->vfs_inode.i_uid = status->owner; 90 vnode->vfs_inode.i_uid = status->owner;
91 vnode->vfs_inode.i_gid = status->group; 91 vnode->vfs_inode.i_gid = status->group;
92 vnode->vfs_inode.i_version = vnode->fid.unique; 92 vnode->vfs_inode.i_generation = vnode->fid.unique;
93 vnode->vfs_inode.i_nlink = status->nlink; 93 vnode->vfs_inode.i_nlink = status->nlink;
94 94
95 mode = vnode->vfs_inode.i_mode; 95 mode = vnode->vfs_inode.i_mode;
@@ -102,6 +102,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
102 vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; 102 vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
103 vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; 103 vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
104 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; 104 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
105 vnode->vfs_inode.i_version = data_version;
105 } 106 }
106 107
107 expected_version = status->data_version; 108 expected_version = status->data_version;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index db66c5201474..0fdab6e03d87 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -75,7 +75,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
75 inode->i_ctime.tv_nsec = 0; 75 inode->i_ctime.tv_nsec = 0;
76 inode->i_atime = inode->i_mtime = inode->i_ctime; 76 inode->i_atime = inode->i_mtime = inode->i_ctime;
77 inode->i_blocks = 0; 77 inode->i_blocks = 0;
78 inode->i_version = vnode->fid.unique; 78 inode->i_generation = vnode->fid.unique;
79 inode->i_version = vnode->status.data_version;
79 inode->i_mapping->a_ops = &afs_fs_aops; 80 inode->i_mapping->a_ops = &afs_fs_aops;
80 81
81 /* check to see whether a symbolic link is really a mountpoint */ 82 /* check to see whether a symbolic link is really a mountpoint */
@@ -100,7 +101,7 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
100 struct afs_iget_data *data = opaque; 101 struct afs_iget_data *data = opaque;
101 102
102 return inode->i_ino == data->fid.vnode && 103 return inode->i_ino == data->fid.vnode &&
103 inode->i_version == data->fid.unique; 104 inode->i_generation == data->fid.unique;
104} 105}
105 106
106/* 107/*
@@ -122,7 +123,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
122 struct afs_vnode *vnode = AFS_FS_I(inode); 123 struct afs_vnode *vnode = AFS_FS_I(inode);
123 124
124 inode->i_ino = data->fid.vnode; 125 inode->i_ino = data->fid.vnode;
125 inode->i_version = data->fid.unique; 126 inode->i_generation = data->fid.unique;
126 vnode->fid = data->fid; 127 vnode->fid = data->fid;
127 vnode->volume = data->volume; 128 vnode->volume = data->volume;
128 129
@@ -380,8 +381,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
380 381
381 inode = dentry->d_inode; 382 inode = dentry->d_inode;
382 383
383 _enter("{ ino=%lu v=%llu }", inode->i_ino, 384 _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
384 (unsigned long long)inode->i_version);
385 385
386 generic_fillattr(inode, stat); 386 generic_fillattr(inode, stat);
387 return 0; 387 return 0;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index fb240e8766d6..356dcf0929e8 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -31,8 +31,8 @@
31static void afs_i_init_once(void *foo); 31static void afs_i_init_once(void *foo);
32static struct dentry *afs_mount(struct file_system_type *fs_type, 32static struct dentry *afs_mount(struct file_system_type *fs_type,
33 int flags, const char *dev_name, void *data); 33 int flags, const char *dev_name, void *data);
34static void afs_kill_super(struct super_block *sb);
34static struct inode *afs_alloc_inode(struct super_block *sb); 35static struct inode *afs_alloc_inode(struct super_block *sb);
35static void afs_put_super(struct super_block *sb);
36static void afs_destroy_inode(struct inode *inode); 36static void afs_destroy_inode(struct inode *inode);
37static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); 37static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
38 38
@@ -40,7 +40,7 @@ struct file_system_type afs_fs_type = {
40 .owner = THIS_MODULE, 40 .owner = THIS_MODULE,
41 .name = "afs", 41 .name = "afs",
42 .mount = afs_mount, 42 .mount = afs_mount,
43 .kill_sb = kill_anon_super, 43 .kill_sb = afs_kill_super,
44 .fs_flags = 0, 44 .fs_flags = 0,
45}; 45};
46 46
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = {
50 .drop_inode = afs_drop_inode, 50 .drop_inode = afs_drop_inode,
51 .destroy_inode = afs_destroy_inode, 51 .destroy_inode = afs_destroy_inode,
52 .evict_inode = afs_evict_inode, 52 .evict_inode = afs_evict_inode,
53 .put_super = afs_put_super,
54 .show_options = generic_show_options, 53 .show_options = generic_show_options,
55}; 54};
56 55
@@ -282,19 +281,25 @@ static int afs_parse_device_name(struct afs_mount_params *params,
282 */ 281 */
283static int afs_test_super(struct super_block *sb, void *data) 282static int afs_test_super(struct super_block *sb, void *data)
284{ 283{
285 struct afs_mount_params *params = data; 284 struct afs_super_info *as1 = data;
286 struct afs_super_info *as = sb->s_fs_info; 285 struct afs_super_info *as = sb->s_fs_info;
287 286
288 return as->volume == params->volume; 287 return as->volume == as1->volume;
288}
289
290static int afs_set_super(struct super_block *sb, void *data)
291{
292 sb->s_fs_info = data;
293 return set_anon_super(sb, NULL);
289} 294}
290 295
291/* 296/*
292 * fill in the superblock 297 * fill in the superblock
293 */ 298 */
294static int afs_fill_super(struct super_block *sb, void *data) 299static int afs_fill_super(struct super_block *sb,
300 struct afs_mount_params *params)
295{ 301{
296 struct afs_mount_params *params = data; 302 struct afs_super_info *as = sb->s_fs_info;
297 struct afs_super_info *as = NULL;
298 struct afs_fid fid; 303 struct afs_fid fid;
299 struct dentry *root = NULL; 304 struct dentry *root = NULL;
300 struct inode *inode = NULL; 305 struct inode *inode = NULL;
@@ -302,23 +307,13 @@ static int afs_fill_super(struct super_block *sb, void *data)
302 307
303 _enter(""); 308 _enter("");
304 309
305 /* allocate a superblock info record */
306 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
307 if (!as) {
308 _leave(" = -ENOMEM");
309 return -ENOMEM;
310 }
311
312 afs_get_volume(params->volume);
313 as->volume = params->volume;
314
315 /* fill in the superblock */ 310 /* fill in the superblock */
316 sb->s_blocksize = PAGE_CACHE_SIZE; 311 sb->s_blocksize = PAGE_CACHE_SIZE;
317 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 312 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
318 sb->s_magic = AFS_FS_MAGIC; 313 sb->s_magic = AFS_FS_MAGIC;
319 sb->s_op = &afs_super_ops; 314 sb->s_op = &afs_super_ops;
320 sb->s_fs_info = as;
321 sb->s_bdi = &as->volume->bdi; 315 sb->s_bdi = &as->volume->bdi;
316 strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
322 317
323 /* allocate the root inode and dentry */ 318 /* allocate the root inode and dentry */
324 fid.vid = as->volume->vid; 319 fid.vid = as->volume->vid;
@@ -326,7 +321,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
326 fid.unique = 1; 321 fid.unique = 1;
327 inode = afs_iget(sb, params->key, &fid, NULL, NULL); 322 inode = afs_iget(sb, params->key, &fid, NULL, NULL);
328 if (IS_ERR(inode)) 323 if (IS_ERR(inode))
329 goto error_inode; 324 return PTR_ERR(inode);
330 325
331 if (params->autocell) 326 if (params->autocell)
332 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); 327 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
@@ -342,16 +337,8 @@ static int afs_fill_super(struct super_block *sb, void *data)
342 _leave(" = 0"); 337 _leave(" = 0");
343 return 0; 338 return 0;
344 339
345error_inode:
346 ret = PTR_ERR(inode);
347 inode = NULL;
348error: 340error:
349 iput(inode); 341 iput(inode);
350 afs_put_volume(as->volume);
351 kfree(as);
352
353 sb->s_fs_info = NULL;
354
355 _leave(" = %d", ret); 342 _leave(" = %d", ret);
356 return ret; 343 return ret;
357} 344}
@@ -367,6 +354,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
367 struct afs_volume *vol; 354 struct afs_volume *vol;
368 struct key *key; 355 struct key *key;
369 char *new_opts = kstrdup(options, GFP_KERNEL); 356 char *new_opts = kstrdup(options, GFP_KERNEL);
357 struct afs_super_info *as;
370 int ret; 358 int ret;
371 359
372 _enter(",,%s,%p", dev_name, options); 360 _enter(",,%s,%p", dev_name, options);
@@ -399,12 +387,22 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
399 ret = PTR_ERR(vol); 387 ret = PTR_ERR(vol);
400 goto error; 388 goto error;
401 } 389 }
402 params.volume = vol; 390
391 /* allocate a superblock info record */
392 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
393 if (!as) {
394 ret = -ENOMEM;
395 afs_put_volume(vol);
396 goto error;
397 }
398 as->volume = vol;
403 399
404 /* allocate a deviceless superblock */ 400 /* allocate a deviceless superblock */
405 sb = sget(fs_type, afs_test_super, set_anon_super, &params); 401 sb = sget(fs_type, afs_test_super, afs_set_super, as);
406 if (IS_ERR(sb)) { 402 if (IS_ERR(sb)) {
407 ret = PTR_ERR(sb); 403 ret = PTR_ERR(sb);
404 afs_put_volume(vol);
405 kfree(as);
408 goto error; 406 goto error;
409 } 407 }
410 408
@@ -422,16 +420,16 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
422 } else { 420 } else {
423 _debug("reuse"); 421 _debug("reuse");
424 ASSERTCMP(sb->s_flags, &, MS_ACTIVE); 422 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
423 afs_put_volume(vol);
424 kfree(as);
425 } 425 }
426 426
427 afs_put_volume(params.volume);
428 afs_put_cell(params.cell); 427 afs_put_cell(params.cell);
429 kfree(new_opts); 428 kfree(new_opts);
430 _leave(" = 0 [%p]", sb); 429 _leave(" = 0 [%p]", sb);
431 return dget(sb->s_root); 430 return dget(sb->s_root);
432 431
433error: 432error:
434 afs_put_volume(params.volume);
435 afs_put_cell(params.cell); 433 afs_put_cell(params.cell);
436 key_put(params.key); 434 key_put(params.key);
437 kfree(new_opts); 435 kfree(new_opts);
@@ -439,18 +437,12 @@ error:
439 return ERR_PTR(ret); 437 return ERR_PTR(ret);
440} 438}
441 439
442/* 440static void afs_kill_super(struct super_block *sb)
443 * finish the unmounting process on the superblock
444 */
445static void afs_put_super(struct super_block *sb)
446{ 441{
447 struct afs_super_info *as = sb->s_fs_info; 442 struct afs_super_info *as = sb->s_fs_info;
448 443 kill_anon_super(sb);
449 _enter("");
450
451 afs_put_volume(as->volume); 444 afs_put_volume(as->volume);
452 445 kfree(as);
453 _leave("");
454} 446}
455 447
456/* 448/*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 789b3afb3423..b806285ff853 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,23 +84,21 @@ void afs_put_writeback(struct afs_writeback *wb)
84 * partly or wholly fill a page that's under preparation for writing 84 * partly or wholly fill a page that's under preparation for writing
85 */ 85 */
86static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 86static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87 loff_t pos, unsigned len, struct page *page) 87 loff_t pos, struct page *page)
88{ 88{
89 loff_t i_size; 89 loff_t i_size;
90 unsigned eof;
91 int ret; 90 int ret;
91 int len;
92 92
93 _enter(",,%llu,%u", (unsigned long long)pos, len); 93 _enter(",,%llu", (unsigned long long)pos);
94
95 ASSERTCMP(len, <=, PAGE_CACHE_SIZE);
96 94
97 i_size = i_size_read(&vnode->vfs_inode); 95 i_size = i_size_read(&vnode->vfs_inode);
98 if (pos + len > i_size) 96 if (pos + PAGE_CACHE_SIZE > i_size)
99 eof = i_size; 97 len = i_size - pos;
100 else 98 else
101 eof = PAGE_CACHE_SIZE; 99 len = PAGE_CACHE_SIZE;
102 100
103 ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); 101 ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
104 if (ret < 0) { 102 if (ret < 0) {
105 if (ret == -ENOENT) { 103 if (ret == -ENOENT) {
106 _debug("got NOENT from server" 104 _debug("got NOENT from server"
@@ -153,9 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
153 *pagep = page; 151 *pagep = page;
154 /* page won't leak in error case: it eventually gets cleaned off LRU */ 152 /* page won't leak in error case: it eventually gets cleaned off LRU */
155 153
156 if (!PageUptodate(page)) { 154 if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
157 _debug("not up to date"); 155 ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
158 ret = afs_fill_page(vnode, key, pos, len, page);
159 if (ret < 0) { 156 if (ret < 0) {
160 kfree(candidate); 157 kfree(candidate);
161 _leave(" = %d [prep]", ret); 158 _leave(" = %d [prep]", ret);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9ad2369d9e35..bfcb18feb1df 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
231 231
232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) 232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
233{ 233{
234 if (flags & IPERM_FLAG_RCU)
235 return -ECHILD;
236
237 return -EIO; 234 return -EIO;
238} 235}
239 236
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1f2b19978333..610e8e0b04b8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -762,7 +762,19 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
762 if (!disk) 762 if (!disk)
763 return ERR_PTR(-ENXIO); 763 return ERR_PTR(-ENXIO);
764 764
765 whole = bdget_disk(disk, 0); 765 /*
766 * Normally, @bdev should equal what's returned from bdget_disk()
767 * if partno is 0; however, some drivers (floppy) use multiple
768 * bdev's for the same physical device and @bdev may be one of the
769 * aliases. Keep @bdev if partno is 0. This means claimer
770 * tracking is broken for those devices but it has always been that
771 * way.
772 */
773 if (partno)
774 whole = bdget_disk(disk, 0);
775 else
776 whole = bdgrab(bdev);
777
766 module_put(disk->fops->owner); 778 module_put(disk->fops->owner);
767 put_disk(disk); 779 put_disk(disk);
768 if (!whole) 780 if (!whole)
@@ -1272,8 +1284,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1272 * individual writeable reference is too fragile given the 1284 * individual writeable reference is too fragile given the
1273 * way @mode is used in blkdev_get/put(). 1285 * way @mode is used in blkdev_get/put().
1274 */ 1286 */
1275 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && 1287 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1276 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1288 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1277 bdev->bd_write_holder = true; 1289 bdev->bd_write_holder = true;
1278 disk_block_events(disk); 1290 disk_block_events(disk);
1279 } 1291 }
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 93b1aa932014..52d7eca8c7bf 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -121,9 +121,6 @@ struct btrfs_inode {
121 */ 121 */
122 u64 index_cnt; 122 u64 index_cnt;
123 123
124 /* the start of block group preferred for allocations. */
125 u64 block_group;
126
127 /* the fsync log has some corner cases that mean we have to check 124 /* the fsync log has some corner cases that mean we have to check
128 * directories to see if any unlinks have been done before 125 * directories to see if any unlinks have been done before
129 * the directory was logged. See tree-log.c for all the 126 * the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b0e18d986e0a..2e667868e0d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void)
43{ 43{
44 struct btrfs_path *path; 44 struct btrfs_path *path;
45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
46 if (path)
47 path->reada = 1;
48 return path; 46 return path;
49} 47}
50 48
@@ -1224,11 +1222,13 @@ static void reada_for_search(struct btrfs_root *root,
1224 u64 search; 1222 u64 search;
1225 u64 target; 1223 u64 target;
1226 u64 nread = 0; 1224 u64 nread = 0;
1225 u64 gen;
1227 int direction = path->reada; 1226 int direction = path->reada;
1228 struct extent_buffer *eb; 1227 struct extent_buffer *eb;
1229 u32 nr; 1228 u32 nr;
1230 u32 blocksize; 1229 u32 blocksize;
1231 u32 nscan = 0; 1230 u32 nscan = 0;
1231 bool map = true;
1232 1232
1233 if (level != 1) 1233 if (level != 1)
1234 return; 1234 return;
@@ -1250,7 +1250,19 @@ static void reada_for_search(struct btrfs_root *root,
1250 1250
1251 nritems = btrfs_header_nritems(node); 1251 nritems = btrfs_header_nritems(node);
1252 nr = slot; 1252 nr = slot;
1253 if (node->map_token || path->skip_locking)
1254 map = false;
1255
1253 while (1) { 1256 while (1) {
1257 if (map && !node->map_token) {
1258 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1259 map_private_extent_buffer(node, offset,
1260 sizeof(struct btrfs_key_ptr),
1261 &node->map_token,
1262 &node->kaddr,
1263 &node->map_start,
1264 &node->map_len, KM_USER1);
1265 }
1254 if (direction < 0) { 1266 if (direction < 0) {
1255 if (nr == 0) 1267 if (nr == 0)
1256 break; 1268 break;
@@ -1268,14 +1280,23 @@ static void reada_for_search(struct btrfs_root *root,
1268 search = btrfs_node_blockptr(node, nr); 1280 search = btrfs_node_blockptr(node, nr);
1269 if ((search <= target && target - search <= 65536) || 1281 if ((search <= target && target - search <= 65536) ||
1270 (search > target && search - target <= 65536)) { 1282 (search > target && search - target <= 65536)) {
1271 readahead_tree_block(root, search, blocksize, 1283 gen = btrfs_node_ptr_generation(node, nr);
1272 btrfs_node_ptr_generation(node, nr)); 1284 if (map && node->map_token) {
1285 unmap_extent_buffer(node, node->map_token,
1286 KM_USER1);
1287 node->map_token = NULL;
1288 }
1289 readahead_tree_block(root, search, blocksize, gen);
1273 nread += blocksize; 1290 nread += blocksize;
1274 } 1291 }
1275 nscan++; 1292 nscan++;
1276 if ((nread > 65536 || nscan > 32)) 1293 if ((nread > 65536 || nscan > 32))
1277 break; 1294 break;
1278 } 1295 }
1296 if (map && node->map_token) {
1297 unmap_extent_buffer(node, node->map_token, KM_USER1);
1298 node->map_token = NULL;
1299 }
1279} 1300}
1280 1301
1281/* 1302/*
@@ -1648,9 +1669,6 @@ again:
1648 } 1669 }
1649cow_done: 1670cow_done:
1650 BUG_ON(!cow && ins_len); 1671 BUG_ON(!cow && ins_len);
1651 if (level != btrfs_header_level(b))
1652 WARN_ON(1);
1653 level = btrfs_header_level(b);
1654 1672
1655 p->nodes[level] = b; 1673 p->nodes[level] = b;
1656 if (!p->skip_locking) 1674 if (!p->skip_locking)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c093fa98f61..f30ac05dbda7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,7 +19,6 @@
19#ifndef __BTRFS_CTREE__ 19#ifndef __BTRFS_CTREE__
20#define __BTRFS_CTREE__ 20#define __BTRFS_CTREE__
21 21
22#include <linux/version.h>
23#include <linux/mm.h> 22#include <linux/mm.h>
24#include <linux/highmem.h> 23#include <linux/highmem.h>
25#include <linux/fs.h> 24#include <linux/fs.h>
@@ -930,7 +929,6 @@ struct btrfs_fs_info {
930 * is required instead of the faster short fsync log commits 929 * is required instead of the faster short fsync log commits
931 */ 930 */
932 u64 last_trans_log_full_commit; 931 u64 last_trans_log_full_commit;
933 u64 open_ioctl_trans;
934 unsigned long mount_opt:20; 932 unsigned long mount_opt:20;
935 unsigned long compress_type:4; 933 unsigned long compress_type:4;
936 u64 max_inline; 934 u64 max_inline;
@@ -947,7 +945,6 @@ struct btrfs_fs_info {
947 struct super_block *sb; 945 struct super_block *sb;
948 struct inode *btree_inode; 946 struct inode *btree_inode;
949 struct backing_dev_info bdi; 947 struct backing_dev_info bdi;
950 struct mutex trans_mutex;
951 struct mutex tree_log_mutex; 948 struct mutex tree_log_mutex;
952 struct mutex transaction_kthread_mutex; 949 struct mutex transaction_kthread_mutex;
953 struct mutex cleaner_mutex; 950 struct mutex cleaner_mutex;
@@ -968,6 +965,13 @@ struct btrfs_fs_info {
968 struct rw_semaphore subvol_sem; 965 struct rw_semaphore subvol_sem;
969 struct srcu_struct subvol_srcu; 966 struct srcu_struct subvol_srcu;
970 967
968 spinlock_t trans_lock;
969 /*
970 * the reloc mutex goes with the trans lock, it is taken
971 * during commit to protect us from the relocation code
972 */
973 struct mutex reloc_mutex;
974
971 struct list_head trans_list; 975 struct list_head trans_list;
972 struct list_head hashers; 976 struct list_head hashers;
973 struct list_head dead_roots; 977 struct list_head dead_roots;
@@ -980,6 +984,7 @@ struct btrfs_fs_info {
980 atomic_t async_submit_draining; 984 atomic_t async_submit_draining;
981 atomic_t nr_async_bios; 985 atomic_t nr_async_bios;
982 atomic_t async_delalloc_pages; 986 atomic_t async_delalloc_pages;
987 atomic_t open_ioctl_trans;
983 988
984 /* 989 /*
985 * this is used by the balancing code to wait for all the pending 990 * this is used by the balancing code to wait for all the pending
@@ -1044,6 +1049,7 @@ struct btrfs_fs_info {
1044 int closing; 1049 int closing;
1045 int log_root_recovering; 1050 int log_root_recovering;
1046 int enospc_unlink; 1051 int enospc_unlink;
1052 int trans_no_join;
1047 1053
1048 u64 total_pinned; 1054 u64 total_pinned;
1049 1055
@@ -1065,7 +1071,6 @@ struct btrfs_fs_info {
1065 struct reloc_control *reloc_ctl; 1071 struct reloc_control *reloc_ctl;
1066 1072
1067 spinlock_t delalloc_lock; 1073 spinlock_t delalloc_lock;
1068 spinlock_t new_trans_lock;
1069 u64 delalloc_bytes; 1074 u64 delalloc_bytes;
1070 1075
1071 /* data_alloc_cluster is only used in ssd mode */ 1076 /* data_alloc_cluster is only used in ssd mode */
@@ -1172,6 +1177,14 @@ struct btrfs_root {
1172 u32 type; 1177 u32 type;
1173 1178
1174 u64 highest_objectid; 1179 u64 highest_objectid;
1180
1181 /* btrfs_record_root_in_trans is a multi-step process,
1182 * and it can race with the balancing code. But the
1183 * race is very small, and only the first time the root
1184 * is added to each transaction. So in_trans_setup
1185 * is used to tell us when more checks are required
1186 */
1187 unsigned long in_trans_setup;
1175 int ref_cows; 1188 int ref_cows;
1176 int track_dirty; 1189 int track_dirty;
1177 int in_radix; 1190 int in_radix;
@@ -1181,7 +1194,6 @@ struct btrfs_root {
1181 struct btrfs_key defrag_max; 1194 struct btrfs_key defrag_max;
1182 int defrag_running; 1195 int defrag_running;
1183 char *name; 1196 char *name;
1184 int in_sysfs;
1185 1197
1186 /* the dirty list is only used by non-reference counted roots */ 1198 /* the dirty list is only used by non-reference counted roots */
1187 struct list_head dirty_list; 1199 struct list_head dirty_list;
@@ -1340,6 +1352,7 @@ struct btrfs_ioctl_defrag_range_args {
1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1352#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 1353#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) 1354#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1355#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
1343 1356
1344#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1357#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1345#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1358#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2238,6 +2251,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
2238void btrfs_block_rsv_release(struct btrfs_root *root, 2251void btrfs_block_rsv_release(struct btrfs_root *root,
2239 struct btrfs_block_rsv *block_rsv, 2252 struct btrfs_block_rsv *block_rsv,
2240 u64 num_bytes); 2253 u64 num_bytes);
2254int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
2255 struct btrfs_root *root,
2256 struct btrfs_block_rsv *rsv);
2241int btrfs_set_block_group_ro(struct btrfs_root *root, 2257int btrfs_set_block_group_ro(struct btrfs_root *root,
2242 struct btrfs_block_group_cache *cache); 2258 struct btrfs_block_group_cache *cache);
2243int btrfs_set_block_group_rw(struct btrfs_root *root, 2259int btrfs_set_block_group_rw(struct btrfs_root *root,
@@ -2350,6 +2366,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2350 struct btrfs_root *root, 2366 struct btrfs_root *root,
2351 struct extent_buffer *node, 2367 struct extent_buffer *node,
2352 struct extent_buffer *parent); 2368 struct extent_buffer *parent);
2369static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
2370{
2371 /*
2372 * Get synced with close_ctree()
2373 */
2374 smp_mb();
2375 return fs_info->closing;
2376}
2377
2353/* root-item.c */ 2378/* root-item.c */
2354int btrfs_find_root_ref(struct btrfs_root *tree_root, 2379int btrfs_find_root_ref(struct btrfs_root *tree_root,
2355 struct btrfs_path *path, 2380 struct btrfs_path *path,
@@ -2512,8 +2537,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2512int btrfs_writepages(struct address_space *mapping, 2537int btrfs_writepages(struct address_space *mapping,
2513 struct writeback_control *wbc); 2538 struct writeback_control *wbc);
2514int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2539int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2515 struct btrfs_root *new_root, 2540 struct btrfs_root *new_root, u64 new_dirid);
2516 u64 new_dirid, u64 alloc_hint);
2517int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2541int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2518 size_t size, struct bio *bio, unsigned long bio_flags); 2542 size_t size, struct bio *bio, unsigned long bio_flags);
2519 2543
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 01e29503a54b..f1cbd028f7b3 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
297 item->data_len = data_len; 297 item->data_len = data_len;
298 item->ins_or_del = 0; 298 item->ins_or_del = 0;
299 item->bytes_reserved = 0; 299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL; 300 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1); 301 atomic_set(&item->refs, 1);
303 } 302 }
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
593 592
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 593 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 594 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) { 595 if (!ret)
597 item->bytes_reserved = num_bytes; 596 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600 597
601 return ret; 598 return ret;
602} 599}
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, 601static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item) 602 struct btrfs_delayed_item *item)
606{ 603{
604 struct btrfs_block_rsv *rsv;
605
607 if (!item->bytes_reserved) 606 if (!item->bytes_reserved)
608 return; 607 return;
609 608
610 btrfs_block_rsv_release(root, item->block_rsv, 609 rsv = &root->fs_info->global_block_rsv;
610 btrfs_block_rsv_release(root, rsv,
611 item->bytes_reserved); 611 item->bytes_reserved);
612} 612}
613 613
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
678 INIT_LIST_HEAD(&head); 678 INIT_LIST_HEAD(&head);
679 679
680 next = item; 680 next = item;
681 nitems = 0;
681 682
682 /* 683 /*
683 * count the number of the continuous items that we can insert in batch 684 * count the number of the continuous items that we can insert in batch
@@ -1013,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1013 struct btrfs_delayed_root *delayed_root; 1014 struct btrfs_delayed_root *delayed_root;
1014 struct btrfs_delayed_node *curr_node, *prev_node; 1015 struct btrfs_delayed_node *curr_node, *prev_node;
1015 struct btrfs_path *path; 1016 struct btrfs_path *path;
1017 struct btrfs_block_rsv *block_rsv;
1016 int ret = 0; 1018 int ret = 0;
1017 1019
1018 path = btrfs_alloc_path(); 1020 path = btrfs_alloc_path();
@@ -1020,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1020 return -ENOMEM; 1022 return -ENOMEM;
1021 path->leave_spinning = 1; 1023 path->leave_spinning = 1;
1022 1024
1025 block_rsv = trans->block_rsv;
1026 trans->block_rsv = &root->fs_info->global_block_rsv;
1027
1023 delayed_root = btrfs_get_delayed_root(root); 1028 delayed_root = btrfs_get_delayed_root(root);
1024 1029
1025 curr_node = btrfs_first_delayed_node(delayed_root); 1030 curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1044,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1044 } 1049 }
1045 1050
1046 btrfs_free_path(path); 1051 btrfs_free_path(path);
1052 trans->block_rsv = block_rsv;
1047 return ret; 1053 return ret;
1048} 1054}
1049 1055
@@ -1051,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1051 struct btrfs_delayed_node *node) 1057 struct btrfs_delayed_node *node)
1052{ 1058{
1053 struct btrfs_path *path; 1059 struct btrfs_path *path;
1060 struct btrfs_block_rsv *block_rsv;
1054 int ret; 1061 int ret;
1055 1062
1056 path = btrfs_alloc_path(); 1063 path = btrfs_alloc_path();
@@ -1058,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1058 return -ENOMEM; 1065 return -ENOMEM;
1059 path->leave_spinning = 1; 1066 path->leave_spinning = 1;
1060 1067
1068 block_rsv = trans->block_rsv;
1069 trans->block_rsv = &node->root->fs_info->global_block_rsv;
1070
1061 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1071 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1062 if (!ret) 1072 if (!ret)
1063 ret = btrfs_delete_delayed_items(trans, path, node->root, node); 1073 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1065,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1065 ret = btrfs_update_delayed_inode(trans, node->root, path, node); 1075 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1066 btrfs_free_path(path); 1076 btrfs_free_path(path);
1067 1077
1078 trans->block_rsv = block_rsv;
1068 return ret; 1079 return ret;
1069} 1080}
1070 1081
@@ -1115,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1115 struct btrfs_path *path; 1126 struct btrfs_path *path;
1116 struct btrfs_delayed_node *delayed_node = NULL; 1127 struct btrfs_delayed_node *delayed_node = NULL;
1117 struct btrfs_root *root; 1128 struct btrfs_root *root;
1129 struct btrfs_block_rsv *block_rsv;
1118 unsigned long nr = 0; 1130 unsigned long nr = 0;
1119 int need_requeue = 0; 1131 int need_requeue = 0;
1120 int ret; 1132 int ret;
@@ -1129,10 +1141,13 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1129 delayed_node = async_node->delayed_node; 1141 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root; 1142 root = delayed_node->root;
1131 1143
1132 trans = btrfs_join_transaction(root, 0); 1144 trans = btrfs_join_transaction(root);
1133 if (IS_ERR(trans)) 1145 if (IS_ERR(trans))
1134 goto free_path; 1146 goto free_path;
1135 1147
1148 block_rsv = trans->block_rsv;
1149 trans->block_rsv = &root->fs_info->global_block_rsv;
1150
1136 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); 1151 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1137 if (!ret) 1152 if (!ret)
1138 ret = btrfs_delete_delayed_items(trans, path, root, 1153 ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1175,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1175 1190
1176 nr = trans->blocks_used; 1191 nr = trans->blocks_used;
1177 1192
1193 trans->block_rsv = block_rsv;
1178 btrfs_end_transaction_dmeta(trans, root); 1194 btrfs_end_transaction_dmeta(trans, root);
1179 __btrfs_btree_balance_dirty(root, nr); 1195 __btrfs_btree_balance_dirty(root, nr);
1180free_path: 1196free_path:
@@ -1221,6 +1237,13 @@ again:
1221 return 0; 1237 return 0;
1222} 1238}
1223 1239
1240void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1241{
1242 struct btrfs_delayed_root *delayed_root;
1243 delayed_root = btrfs_get_delayed_root(root);
1244 WARN_ON(btrfs_first_delayed_node(delayed_root));
1245}
1246
1224void btrfs_balance_delayed_items(struct btrfs_root *root) 1247void btrfs_balance_delayed_items(struct btrfs_root *root)
1225{ 1248{
1226 struct btrfs_delayed_root *delayed_root; 1249 struct btrfs_delayed_root *delayed_root;
@@ -1572,8 +1595,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1595 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1596 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1597 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item, 1598 btrfs_set_stack_inode_block_group(inode_item, 0);
1576 BTRFS_I(inode)->block_group);
1577 1599
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), 1600 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec); 1601 inode->i_atime.tv_sec);
@@ -1595,7 +1617,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode) 1617 struct btrfs_root *root, struct inode *inode)
1596{ 1618{
1597 struct btrfs_delayed_node *delayed_node; 1619 struct btrfs_delayed_node *delayed_node;
1598 int ret; 1620 int ret = 0;
1599 1621
1600 delayed_node = btrfs_get_or_create_delayed_node(inode); 1622 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node)) 1623 if (IS_ERR(delayed_node))
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..d1a6a2915c66 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
75 struct list_head tree_list; /* used for batch insert/delete items */ 75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */ 76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved; 77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node; 78 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs; 79 atomic_t refs;
81 int ins_or_del; 80 int ins_or_del;
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
138/* for init */ 137/* for init */
139int __init btrfs_delayed_inode_init(void); 138int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void); 139void btrfs_delayed_inode_exit(void);
140
141/* for debugging */
142void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
143
141#endif 144#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 98b6a71decba..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1044 root->last_trans = 0; 1044 root->last_trans = 0;
1045 root->highest_objectid = 0; 1045 root->highest_objectid = 0;
1046 root->name = NULL; 1046 root->name = NULL;
1047 root->in_sysfs = 0;
1048 root->inode_tree = RB_ROOT; 1047 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1048 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1050 root->block_rsv = NULL; 1049 root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
1300 return root; 1299 return root;
1301 1300
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); 1301 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), 1302 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS); 1303 GFP_NOFS);
1307 if (!root->free_ino_pinned) 1304 if (!root->free_ino_pinned || !root->free_ino_ctl) {
1305 ret = -ENOMEM;
1308 goto fail; 1306 goto fail;
1307 }
1309 1308
1310 btrfs_init_free_ino_ctl(root); 1309 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex); 1310 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock); 1311 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait); 1312 init_waitqueue_head(&root->cache_wait);
1314 1313
1315 set_anon_super(&root->anon_super, NULL); 1314 ret = set_anon_super(&root->anon_super, NULL);
1315 if (ret)
1316 goto fail;
1316 1317
1317 if (btrfs_root_refs(&root->root_item) == 0) { 1318 if (btrfs_root_refs(&root->root_item) == 0) {
1318 ret = -ENOENT; 1319 ret = -ENOENT;
@@ -1505,24 +1506,24 @@ static int transaction_kthread(void *arg)
1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1506 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1506 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1507 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1507 1508
1508 spin_lock(&root->fs_info->new_trans_lock); 1509 spin_lock(&root->fs_info->trans_lock);
1509 cur = root->fs_info->running_transaction; 1510 cur = root->fs_info->running_transaction;
1510 if (!cur) { 1511 if (!cur) {
1511 spin_unlock(&root->fs_info->new_trans_lock); 1512 spin_unlock(&root->fs_info->trans_lock);
1512 goto sleep; 1513 goto sleep;
1513 } 1514 }
1514 1515
1515 now = get_seconds(); 1516 now = get_seconds();
1516 if (!cur->blocked && 1517 if (!cur->blocked &&
1517 (now < cur->start_time || now - cur->start_time < 30)) { 1518 (now < cur->start_time || now - cur->start_time < 30)) {
1518 spin_unlock(&root->fs_info->new_trans_lock); 1519 spin_unlock(&root->fs_info->trans_lock);
1519 delay = HZ * 5; 1520 delay = HZ * 5;
1520 goto sleep; 1521 goto sleep;
1521 } 1522 }
1522 transid = cur->transid; 1523 transid = cur->transid;
1523 spin_unlock(&root->fs_info->new_trans_lock); 1524 spin_unlock(&root->fs_info->trans_lock);
1524 1525
1525 trans = btrfs_join_transaction(root, 1); 1526 trans = btrfs_join_transaction(root);
1526 BUG_ON(IS_ERR(trans)); 1527 BUG_ON(IS_ERR(trans));
1527 if (transid == trans->transid) { 1528 if (transid == trans->transid) {
1528 ret = btrfs_commit_transaction(trans, root); 1529 ret = btrfs_commit_transaction(trans, root);
@@ -1613,11 +1614,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 INIT_LIST_HEAD(&fs_info->ordered_operations); 1614 INIT_LIST_HEAD(&fs_info->ordered_operations);
1614 INIT_LIST_HEAD(&fs_info->caching_block_groups); 1615 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1615 spin_lock_init(&fs_info->delalloc_lock); 1616 spin_lock_init(&fs_info->delalloc_lock);
1616 spin_lock_init(&fs_info->new_trans_lock); 1617 spin_lock_init(&fs_info->trans_lock);
1617 spin_lock_init(&fs_info->ref_cache_lock); 1618 spin_lock_init(&fs_info->ref_cache_lock);
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1619 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1620 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock); 1621 spin_lock_init(&fs_info->defrag_inodes_lock);
1622 mutex_init(&fs_info->reloc_mutex);
1621 1623
1622 init_completion(&fs_info->kobj_unregister); 1624 init_completion(&fs_info->kobj_unregister);
1623 fs_info->tree_root = tree_root; 1625 fs_info->tree_root = tree_root;
@@ -1645,6 +1647,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1645 fs_info->max_inline = 8192 * 1024; 1647 fs_info->max_inline = 8192 * 1024;
1646 fs_info->metadata_ratio = 0; 1648 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT; 1649 fs_info->defrag_inodes = RB_ROOT;
1650 fs_info->trans_no_join = 0;
1648 1651
1649 fs_info->thread_pool_size = min_t(unsigned long, 1652 fs_info->thread_pool_size = min_t(unsigned long,
1650 num_online_cpus() + 2, 8); 1653 num_online_cpus() + 2, 8);
@@ -1667,8 +1670,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1667 init_waitqueue_head(&fs_info->scrub_pause_wait); 1670 init_waitqueue_head(&fs_info->scrub_pause_wait);
1668 init_rwsem(&fs_info->scrub_super_lock); 1671 init_rwsem(&fs_info->scrub_super_lock);
1669 fs_info->scrub_workers_refcnt = 0; 1672 fs_info->scrub_workers_refcnt = 0;
1670 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1671 fs_info->thread_pool_size, &fs_info->generic_worker);
1672 1673
1673 sb->s_blocksize = 4096; 1674 sb->s_blocksize = 4096;
1674 sb->s_blocksize_bits = blksize_bits(4096); 1675 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1709,7 +1710,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1709 fs_info->do_barriers = 1; 1710 fs_info->do_barriers = 1;
1710 1711
1711 1712
1712 mutex_init(&fs_info->trans_mutex);
1713 mutex_init(&fs_info->ordered_operations_mutex); 1713 mutex_init(&fs_info->ordered_operations_mutex);
1714 mutex_init(&fs_info->tree_log_mutex); 1714 mutex_init(&fs_info->tree_log_mutex);
1715 mutex_init(&fs_info->chunk_mutex); 1715 mutex_init(&fs_info->chunk_mutex);
@@ -2479,13 +2479,13 @@ int btrfs_commit_super(struct btrfs_root *root)
2479 down_write(&root->fs_info->cleanup_work_sem); 2479 down_write(&root->fs_info->cleanup_work_sem);
2480 up_write(&root->fs_info->cleanup_work_sem); 2480 up_write(&root->fs_info->cleanup_work_sem);
2481 2481
2482 trans = btrfs_join_transaction(root, 1); 2482 trans = btrfs_join_transaction(root);
2483 if (IS_ERR(trans)) 2483 if (IS_ERR(trans))
2484 return PTR_ERR(trans); 2484 return PTR_ERR(trans);
2485 ret = btrfs_commit_transaction(trans, root); 2485 ret = btrfs_commit_transaction(trans, root);
2486 BUG_ON(ret); 2486 BUG_ON(ret);
2487 /* run commit again to drop the original snapshot */ 2487 /* run commit again to drop the original snapshot */
2488 trans = btrfs_join_transaction(root, 1); 2488 trans = btrfs_join_transaction(root);
2489 if (IS_ERR(trans)) 2489 if (IS_ERR(trans))
2490 return PTR_ERR(trans); 2490 return PTR_ERR(trans);
2491 btrfs_commit_transaction(trans, root); 2491 btrfs_commit_transaction(trans, root);
@@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2911 2911
2912 INIT_LIST_HEAD(&splice); 2912 INIT_LIST_HEAD(&splice);
2913 2913
2914 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2915
2916 spin_lock(&root->fs_info->delalloc_lock); 2914 spin_lock(&root->fs_info->delalloc_lock);
2915 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2917 2916
2918 while (!list_empty(&splice)) { 2917 while (!list_empty(&splice)) {
2919 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2918 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
@@ -3024,10 +3023,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3024 3023
3025 WARN_ON(1); 3024 WARN_ON(1);
3026 3025
3027 mutex_lock(&root->fs_info->trans_mutex);
3028 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3026 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3029 3027
3028 spin_lock(&root->fs_info->trans_lock);
3030 list_splice_init(&root->fs_info->trans_list, &list); 3029 list_splice_init(&root->fs_info->trans_list, &list);
3030 root->fs_info->trans_no_join = 1;
3031 spin_unlock(&root->fs_info->trans_lock);
3032
3031 while (!list_empty(&list)) { 3033 while (!list_empty(&list)) {
3032 t = list_entry(list.next, struct btrfs_transaction, list); 3034 t = list_entry(list.next, struct btrfs_transaction, list);
3033 if (!t) 3035 if (!t)
@@ -3052,23 +3054,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3052 t->blocked = 0; 3054 t->blocked = 0;
3053 if (waitqueue_active(&root->fs_info->transaction_wait)) 3055 if (waitqueue_active(&root->fs_info->transaction_wait))
3054 wake_up(&root->fs_info->transaction_wait); 3056 wake_up(&root->fs_info->transaction_wait);
3055 mutex_unlock(&root->fs_info->trans_mutex);
3056 3057
3057 mutex_lock(&root->fs_info->trans_mutex);
3058 t->commit_done = 1; 3058 t->commit_done = 1;
3059 if (waitqueue_active(&t->commit_wait)) 3059 if (waitqueue_active(&t->commit_wait))
3060 wake_up(&t->commit_wait); 3060 wake_up(&t->commit_wait);
3061 mutex_unlock(&root->fs_info->trans_mutex);
3062
3063 mutex_lock(&root->fs_info->trans_mutex);
3064 3061
3065 btrfs_destroy_pending_snapshots(t); 3062 btrfs_destroy_pending_snapshots(t);
3066 3063
3067 btrfs_destroy_delalloc_inodes(root); 3064 btrfs_destroy_delalloc_inodes(root);
3068 3065
3069 spin_lock(&root->fs_info->new_trans_lock); 3066 spin_lock(&root->fs_info->trans_lock);
3070 root->fs_info->running_transaction = NULL; 3067 root->fs_info->running_transaction = NULL;
3071 spin_unlock(&root->fs_info->new_trans_lock); 3068 spin_unlock(&root->fs_info->trans_lock);
3072 3069
3073 btrfs_destroy_marked_extents(root, &t->dirty_pages, 3070 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3074 EXTENT_DIRTY); 3071 EXTENT_DIRTY);
@@ -3082,8 +3079,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3082 kmem_cache_free(btrfs_transaction_cachep, t); 3079 kmem_cache_free(btrfs_transaction_cachep, t);
3083 } 3080 }
3084 3081
3082 spin_lock(&root->fs_info->trans_lock);
3083 root->fs_info->trans_no_join = 0;
3084 spin_unlock(&root->fs_info->trans_lock);
3085 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3085 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3086 mutex_unlock(&root->fs_info->trans_mutex);
3087 3086
3088 return 0; 3087 return 0;
3089} 3088}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 169bd62ce776..1f61bf5b4960 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -348,7 +348,7 @@ static int caching_kthread(void *data)
348 */ 348 */
349 path->skip_locking = 1; 349 path->skip_locking = 1;
350 path->search_commit_root = 1; 350 path->search_commit_root = 1;
351 path->reada = 2; 351 path->reada = 1;
352 352
353 key.objectid = last; 353 key.objectid = last;
354 key.offset = 0; 354 key.offset = 0;
@@ -366,8 +366,7 @@ again:
366 nritems = btrfs_header_nritems(leaf); 366 nritems = btrfs_header_nritems(leaf);
367 367
368 while (1) { 368 while (1) {
369 smp_mb(); 369 if (btrfs_fs_closing(fs_info) > 1) {
370 if (fs_info->closing > 1) {
371 last = (u64)-1; 370 last = (u64)-1;
372 break; 371 break;
373 } 372 }
@@ -379,15 +378,18 @@ again:
379 if (ret) 378 if (ret)
380 break; 379 break;
381 380
382 caching_ctl->progress = last; 381 if (need_resched() ||
383 btrfs_release_path(path); 382 btrfs_next_leaf(extent_root, path)) {
384 up_read(&fs_info->extent_commit_sem); 383 caching_ctl->progress = last;
385 mutex_unlock(&caching_ctl->mutex); 384 btrfs_release_path(path);
386 if (btrfs_transaction_in_commit(fs_info)) 385 up_read(&fs_info->extent_commit_sem);
387 schedule_timeout(1); 386 mutex_unlock(&caching_ctl->mutex);
388 else
389 cond_resched(); 387 cond_resched();
390 goto again; 388 goto again;
389 }
390 leaf = path->nodes[0];
391 nritems = btrfs_header_nritems(leaf);
392 continue;
391 } 393 }
392 394
393 if (key.objectid < block_group->key.objectid) { 395 if (key.objectid < block_group->key.objectid) {
@@ -3065,7 +3067,7 @@ again:
3065 spin_unlock(&data_sinfo->lock); 3067 spin_unlock(&data_sinfo->lock);
3066alloc: 3068alloc:
3067 alloc_target = btrfs_get_alloc_profile(root, 1); 3069 alloc_target = btrfs_get_alloc_profile(root, 1);
3068 trans = btrfs_join_transaction(root, 1); 3070 trans = btrfs_join_transaction(root);
3069 if (IS_ERR(trans)) 3071 if (IS_ERR(trans))
3070 return PTR_ERR(trans); 3072 return PTR_ERR(trans);
3071 3073
@@ -3087,13 +3089,21 @@ alloc:
3087 } 3089 }
3088 goto again; 3090 goto again;
3089 } 3091 }
3092
3093 /*
3094 * If we have less pinned bytes than we want to allocate then
3095 * don't bother committing the transaction, it won't help us.
3096 */
3097 if (data_sinfo->bytes_pinned < bytes)
3098 committed = 1;
3090 spin_unlock(&data_sinfo->lock); 3099 spin_unlock(&data_sinfo->lock);
3091 3100
3092 /* commit the current transaction and try again */ 3101 /* commit the current transaction and try again */
3093commit_trans: 3102commit_trans:
3094 if (!committed && !root->fs_info->open_ioctl_trans) { 3103 if (!committed &&
3104 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3095 committed = 1; 3105 committed = 1;
3096 trans = btrfs_join_transaction(root, 1); 3106 trans = btrfs_join_transaction(root);
3097 if (IS_ERR(trans)) 3107 if (IS_ERR(trans))
3098 return PTR_ERR(trans); 3108 return PTR_ERR(trans);
3099 ret = btrfs_commit_transaction(trans, root); 3109 ret = btrfs_commit_transaction(trans, root);
@@ -3304,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3304 if (reserved == 0) 3314 if (reserved == 0)
3305 return 0; 3315 return 0;
3306 3316
3307 /* nothing to shrink - nothing to reclaim */
3308 if (root->fs_info->delalloc_bytes == 0)
3309 return 0;
3310
3311 max_reclaim = min(reserved, to_reclaim); 3317 max_reclaim = min(reserved, to_reclaim);
3312 3318
3313 while (loops < 1024) { 3319 while (loops < 1024) {
@@ -3472,7 +3478,7 @@ again:
3472 goto out; 3478 goto out;
3473 3479
3474 ret = -ENOSPC; 3480 ret = -ENOSPC;
3475 trans = btrfs_join_transaction(root, 1); 3481 trans = btrfs_join_transaction(root);
3476 if (IS_ERR(trans)) 3482 if (IS_ERR(trans))
3477 goto out; 3483 goto out;
3478 ret = btrfs_commit_transaction(trans, root); 3484 ret = btrfs_commit_transaction(trans, root);
@@ -3699,7 +3705,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3699 if (trans) 3705 if (trans)
3700 return -EAGAIN; 3706 return -EAGAIN;
3701 3707
3702 trans = btrfs_join_transaction(root, 1); 3708 trans = btrfs_join_transaction(root);
3703 BUG_ON(IS_ERR(trans)); 3709 BUG_ON(IS_ERR(trans));
3704 ret = btrfs_commit_transaction(trans, root); 3710 ret = btrfs_commit_transaction(trans, root);
3705 return 0; 3711 return 0;
@@ -3837,6 +3843,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3843 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3838} 3844}
3839 3845
3846int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3847 struct btrfs_root *root,
3848 struct btrfs_block_rsv *rsv)
3849{
3850 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3851 u64 num_bytes;
3852 int ret;
3853
3854 /*
3855 * Truncate should be freeing data, but give us 2 items just in case it
3856 * needs to use some space. We may want to be smarter about this in the
3857 * future.
3858 */
3859 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3860
3861 /* We already have enough bytes, just return */
3862 if (rsv->reserved >= num_bytes)
3863 return 0;
3864
3865 num_bytes -= rsv->reserved;
3866
3867 /*
3868 * You should have reserved enough space before hand to do this, so this
3869 * should not fail.
3870 */
3871 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3872 BUG_ON(ret);
3873
3874 return 0;
3875}
3876
3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3877int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3841 struct btrfs_root *root, 3878 struct btrfs_root *root,
3842 int num_items) 3879 int num_items)
@@ -3877,23 +3914,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3877 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3914 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
3878 3915
3879 /* 3916 /*
3880 * one for deleting orphan item, one for updating inode and 3917 * We need to hold space in order to delete our orphan item once we've
3881 * two for calling btrfs_truncate_inode_items. 3918 * added it, so this takes the reservation so we can release it later
3882 * 3919 * when we are truly done with the orphan item.
3883 * btrfs_truncate_inode_items is a delete operation, it frees
3884 * more space than it uses in most cases. So two units of
3885 * metadata space should be enough for calling it many times.
3886 * If all of the metadata space is used, we can commit
3887 * transaction and use space it freed.
3888 */ 3920 */
3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3921 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3922 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3891} 3923}
3892 3924
3893void btrfs_orphan_release_metadata(struct inode *inode) 3925void btrfs_orphan_release_metadata(struct inode *inode)
3894{ 3926{
3895 struct btrfs_root *root = BTRFS_I(inode)->root; 3927 struct btrfs_root *root = BTRFS_I(inode)->root;
3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3928 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3929 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3898} 3930}
3899 3931
@@ -4987,6 +5019,15 @@ have_block_group:
4987 if (unlikely(block_group->ro)) 5019 if (unlikely(block_group->ro))
4988 goto loop; 5020 goto loop;
4989 5021
5022 spin_lock(&block_group->free_space_ctl->tree_lock);
5023 if (cached &&
5024 block_group->free_space_ctl->free_space <
5025 num_bytes + empty_size) {
5026 spin_unlock(&block_group->free_space_ctl->tree_lock);
5027 goto loop;
5028 }
5029 spin_unlock(&block_group->free_space_ctl->tree_lock);
5030
4990 /* 5031 /*
4991 * Ok we want to try and use the cluster allocator, so lets look 5032 * Ok we want to try and use the cluster allocator, so lets look
4992 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5033 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -5150,6 +5191,7 @@ checks:
5150 btrfs_add_free_space(block_group, offset, 5191 btrfs_add_free_space(block_group, offset,
5151 search_start - offset); 5192 search_start - offset);
5152 BUG_ON(offset > search_start); 5193 BUG_ON(offset > search_start);
5194 btrfs_put_block_group(block_group);
5153 break; 5195 break;
5154loop: 5196loop:
5155 failed_cluster_refill = false; 5197 failed_cluster_refill = false;
@@ -5172,9 +5214,7 @@ loop:
5172 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5214 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
5173 * again 5215 * again
5174 */ 5216 */
5175 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5217 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5176 (found_uncached_bg || empty_size || empty_cluster ||
5177 allowed_chunk_alloc)) {
5178 index = 0; 5218 index = 0;
5179 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5180 found_uncached_bg = false; 5220 found_uncached_bg = false;
@@ -5214,42 +5254,39 @@ loop:
5214 goto search; 5254 goto search;
5215 } 5255 }
5216 5256
5217 if (loop < LOOP_CACHING_WAIT) { 5257 loop++;
5218 loop++;
5219 goto search;
5220 }
5221 5258
5222 if (loop == LOOP_ALLOC_CHUNK) { 5259 if (loop == LOOP_ALLOC_CHUNK) {
5223 empty_size = 0; 5260 if (allowed_chunk_alloc) {
5224 empty_cluster = 0; 5261 ret = do_chunk_alloc(trans, root, num_bytes +
5225 } 5262 2 * 1024 * 1024, data,
5263 CHUNK_ALLOC_LIMITED);
5264 allowed_chunk_alloc = 0;
5265 if (ret == 1)
5266 done_chunk_alloc = 1;
5267 } else if (!done_chunk_alloc &&
5268 space_info->force_alloc ==
5269 CHUNK_ALLOC_NO_FORCE) {
5270 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5271 }
5226 5272
5227 if (allowed_chunk_alloc) { 5273 /*
5228 ret = do_chunk_alloc(trans, root, num_bytes + 5274 * We didn't allocate a chunk, go ahead and drop the
5229 2 * 1024 * 1024, data, 5275 * empty size and loop again.
5230 CHUNK_ALLOC_LIMITED); 5276 */
5231 allowed_chunk_alloc = 0; 5277 if (!done_chunk_alloc)
5232 done_chunk_alloc = 1; 5278 loop = LOOP_NO_EMPTY_SIZE;
5233 } else if (!done_chunk_alloc &&
5234 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5235 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5236 } 5279 }
5237 5280
5238 if (loop < LOOP_NO_EMPTY_SIZE) { 5281 if (loop == LOOP_NO_EMPTY_SIZE) {
5239 loop++; 5282 empty_size = 0;
5240 goto search; 5283 empty_cluster = 0;
5241 } 5284 }
5242 ret = -ENOSPC; 5285
5286 goto search;
5243 } else if (!ins->objectid) { 5287 } else if (!ins->objectid) {
5244 ret = -ENOSPC; 5288 ret = -ENOSPC;
5245 } 5289 } else if (ins->objectid) {
5246
5247 /* we found what we needed */
5248 if (ins->objectid) {
5249 if (!(data & BTRFS_BLOCK_GROUP_DATA))
5250 trans->block_group = block_group->key.objectid;
5251
5252 btrfs_put_block_group(block_group);
5253 ret = 0; 5290 ret = 0;
5254 } 5291 }
5255 5292
@@ -6526,7 +6563,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6526 6563
6527 BUG_ON(cache->ro); 6564 BUG_ON(cache->ro);
6528 6565
6529 trans = btrfs_join_transaction(root, 1); 6566 trans = btrfs_join_transaction(root);
6530 BUG_ON(IS_ERR(trans)); 6567 BUG_ON(IS_ERR(trans));
6531 6568
6532 alloc_flags = update_block_group_flags(root, cache->flags); 6569 alloc_flags = update_block_group_flags(root, cache->flags);
@@ -6882,6 +6919,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6882 path = btrfs_alloc_path(); 6919 path = btrfs_alloc_path();
6883 if (!path) 6920 if (!path)
6884 return -ENOMEM; 6921 return -ENOMEM;
6922 path->reada = 1;
6885 6923
6886 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6924 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
6887 if (cache_gen != 0 && 6925 if (cache_gen != 0 &&
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c5d9fbb92bc3..7055d11c1efd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
1476 if (total_bytes >= max_bytes) 1476 if (total_bytes >= max_bytes)
1477 break; 1477 break;
1478 if (!found) { 1478 if (!found) {
1479 *start = state->start; 1479 *start = max(cur_start, state->start);
1480 found = 1; 1480 found = 1;
1481 } 1481 }
1482 last = state->end; 1482 last = state->end;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..a11a92ee2d30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
126 unsigned long map_len; 126 unsigned long map_len;
127 struct page *first_page; 127 struct page *first_page;
128 unsigned long bflags; 128 unsigned long bflags;
129 atomic_t refs;
130 struct list_head leak_list; 129 struct list_head leak_list;
131 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 atomic_t refs;
132 132
133 /* the spinlock is used to protect most operations */ 133 /* the spinlock is used to protect most operations */
134 spinlock_t lock; 134 spinlock_t lock;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c6a22d783c35..fa4ef18b66b1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0; 130 return 0;
131 131
132 if (root->fs_info->closing) 132 if (btrfs_fs_closing(root->fs_info))
133 return 0; 133 return 0;
134 134
135 if (BTRFS_I(inode)->in_defrag) 135 if (BTRFS_I(inode)->in_defrag)
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
144 if (!defrag) 144 if (!defrag)
145 return -ENOMEM; 145 return -ENOMEM;
146 146
147 defrag->ino = inode->i_ino; 147 defrag->ino = btrfs_ino(inode);
148 defrag->transid = transid; 148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid; 149 defrag->root = root->root_key.objectid;
150 150
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
229 first_ino = defrag->ino + 1; 229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); 230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231 231
232 if (fs_info->closing) 232 if (btrfs_fs_closing(fs_info))
233 goto next_free; 233 goto next_free;
234 234
235 spin_unlock(&fs_info->defrag_inodes_lock); 235 spin_unlock(&fs_info->defrag_inodes_lock);
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)
1480 * the current transaction, we can bail out now without any 1480 * the current transaction, we can bail out now without any
1481 * syncing 1481 * syncing
1482 */ 1482 */
1483 mutex_lock(&root->fs_info->trans_mutex); 1483 smp_mb();
1484 if (BTRFS_I(inode)->last_trans <= 1484 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1485 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1486 BTRFS_I(inode)->last_trans = 0;
1487 mutex_unlock(&root->fs_info->trans_mutex);
1488 goto out; 1487 goto out;
1489 } 1488 }
1490 mutex_unlock(&root->fs_info->trans_mutex);
1491 1489
1492 /* 1490 /*
1493 * ok we haven't committed the transaction yet, lets do a commit 1491 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 70d45795d758..9f985a429877 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
98 return inode; 98 return inode;
99 99
100 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
101 if (!root->fs_info->closing) { 101 if (!btrfs_fs_closing(root->fs_info)) {
102 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
103 block_group->iref = 1; 103 block_group->iref = 1;
104 } 104 }
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
250 pgoff_t index = 0; 250 pgoff_t index = 0;
251 unsigned long first_page_offset; 251 unsigned long first_page_offset;
252 int num_checksums; 252 int num_checksums;
253 int ret = 0, ret2; 253 int ret = 0;
254 254
255 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
256 256
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
402 spin_lock(&ctl->tree_lock); 402 spin_lock(&ctl->tree_lock);
403 ret = link_free_space(ctl, e); 403 ret = link_free_space(ctl, e);
404 spin_unlock(&ctl->tree_lock); 404 spin_unlock(&ctl->tree_lock);
405 BUG_ON(ret); 405 if (ret) {
406 printk(KERN_ERR "Duplicate entries in "
407 "free space cache, dumping\n");
408 kunmap(page);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
406 } else { 413 } else {
407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 414 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
408 if (!e->bitmap) { 415 if (!e->bitmap) {
@@ -414,10 +421,18 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
414 goto free_cache; 421 goto free_cache;
415 } 422 }
416 spin_lock(&ctl->tree_lock); 423 spin_lock(&ctl->tree_lock);
417 ret2 = link_free_space(ctl, e); 424 ret = link_free_space(ctl, e);
418 ctl->total_bitmaps++; 425 ctl->total_bitmaps++;
419 ctl->op->recalc_thresholds(ctl); 426 ctl->op->recalc_thresholds(ctl);
420 spin_unlock(&ctl->tree_lock); 427 spin_unlock(&ctl->tree_lock);
428 if (ret) {
429 printk(KERN_ERR "Duplicate entries in "
430 "free space cache, dumping\n");
431 kunmap(page);
432 unlock_page(page);
433 page_cache_release(page);
434 goto free_cache;
435 }
421 list_add_tail(&e->list, &bitmaps); 436 list_add_tail(&e->list, &bitmaps);
422 } 437 }
423 438
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
478 * If we're unmounting then just return, since this does a search on the 493 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock. 494 * normal root and not the commit root and we could deadlock.
480 */ 495 */
481 smp_mb(); 496 if (btrfs_fs_closing(fs_info))
482 if (fs_info->closing)
483 return 0; 497 return 0;
484 498
485 /* 499 /*
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
575 589
576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 590 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
577 PAGE_CACHE_SHIFT; 591 PAGE_CACHE_SHIFT;
592
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
578 filemap_write_and_wait(inode->i_mapping); 599 filemap_write_and_wait(inode->i_mapping);
579 btrfs_wait_ordered_range(inode, inode->i_size & 600 btrfs_wait_ordered_range(inode, inode->i_size &
580 ~(root->sectorsize - 1), (u64)-1); 601 ~(root->sectorsize - 1), (u64)-1);
581 602
603 /* make sure we don't overflow that first page */
604 if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
605 /* this is really the same as running out of space, where we also return 0 */
606 printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
607 ret = 0;
608 goto out_update;
609 }
610
582 /* We need a checksum per page. */ 611 /* We need a checksum per page. */
583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 612 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
584 if (!crc) 613 if (!crc)
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
590 return -1; 619 return -1;
591 } 620 }
592 621
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
599 /* Get the cluster for this block_group if it exists */ 622 /* Get the cluster for this block_group if it exists */
600 if (block_group && !list_empty(&block_group->cluster_list)) 623 if (block_group && !list_empty(&block_group->cluster_list))
601 cluster = list_entry(block_group->cluster_list.next, 624 cluster = list_entry(block_group->cluster_list.next,
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
857 ret = 1; 880 ret = 1;
858 881
859out_free: 882out_free:
883 kfree(checksums);
884 kfree(pages);
885
886out_update:
860 if (ret != 1) { 887 if (ret != 1) {
861 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 888 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
862 BTRFS_I(inode)->generation = 0; 889 BTRFS_I(inode)->generation = 0;
863 } 890 }
864 kfree(checksums);
865 kfree(pages);
866 btrfs_update_inode(trans, root, inode); 891 btrfs_update_inode(trans, root, inode);
867 return ret; 892 return ret;
868} 893}
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
963 * logically. 988 * logically.
964 */ 989 */
965 if (bitmap) { 990 if (bitmap) {
966 WARN_ON(info->bitmap); 991 if (info->bitmap) {
992 WARN_ON_ONCE(1);
993 return -EEXIST;
994 }
967 p = &(*p)->rb_right; 995 p = &(*p)->rb_right;
968 } else { 996 } else {
969 WARN_ON(!info->bitmap); 997 if (!info->bitmap) {
998 WARN_ON_ONCE(1);
999 return -EEXIST;
1000 }
970 p = &(*p)->rb_left; 1001 p = &(*p)->rb_left;
971 } 1002 }
972 } 1003 }
@@ -1386,6 +1417,23 @@ again:
1386 return 0; 1417 return 0;
1387} 1418}
1388 1419
1420static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info, u64 offset,
1422 u64 bytes)
1423{
1424 u64 bytes_to_set = 0;
1425 u64 end;
1426
1427 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1428
1429 bytes_to_set = min(end - offset, bytes);
1430
1431 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1432
1433 return bytes_to_set;
1434
1435}
1436
1389static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1437static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1390 struct btrfs_free_space *info) 1438 struct btrfs_free_space *info)
1391{ 1439{
@@ -1422,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1422 return true; 1470 return true;
1423} 1471}
1424 1472
1473static struct btrfs_free_space_op free_space_op = {
1474 .recalc_thresholds = recalculate_thresholds,
1475 .use_bitmap = use_bitmap,
1476};
1477
1425static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1478static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1426 struct btrfs_free_space *info) 1479 struct btrfs_free_space *info)
1427{ 1480{
1428 struct btrfs_free_space *bitmap_info; 1481 struct btrfs_free_space *bitmap_info;
1482 struct btrfs_block_group_cache *block_group = NULL;
1429 int added = 0; 1483 int added = 0;
1430 u64 bytes, offset, end; 1484 u64 bytes, offset, bytes_added;
1431 int ret; 1485 int ret;
1432 1486
1433 bytes = info->bytes; 1487 bytes = info->bytes;
@@ -1436,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1436 if (!ctl->op->use_bitmap(ctl, info)) 1490 if (!ctl->op->use_bitmap(ctl, info))
1437 return 0; 1491 return 0;
1438 1492
1493 if (ctl->op == &free_space_op)
1494 block_group = ctl->private;
1439again: 1495again:
1496 /*
1497 * Since we link bitmaps right into the cluster we need to see if we
1498 * have a cluster here, and if so and it has our bitmap we need to add
1499 * the free space to that bitmap.
1500 */
1501 if (block_group && !list_empty(&block_group->cluster_list)) {
1502 struct btrfs_free_cluster *cluster;
1503 struct rb_node *node;
1504 struct btrfs_free_space *entry;
1505
1506 cluster = list_entry(block_group->cluster_list.next,
1507 struct btrfs_free_cluster,
1508 block_group_list);
1509 spin_lock(&cluster->lock);
1510 node = rb_first(&cluster->root);
1511 if (!node) {
1512 spin_unlock(&cluster->lock);
1513 goto no_cluster_bitmap;
1514 }
1515
1516 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1517 if (!entry->bitmap) {
1518 spin_unlock(&cluster->lock);
1519 goto no_cluster_bitmap;
1520 }
1521
1522 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1523 bytes_added = add_bytes_to_bitmap(ctl, entry,
1524 offset, bytes);
1525 bytes -= bytes_added;
1526 offset += bytes_added;
1527 }
1528 spin_unlock(&cluster->lock);
1529 if (!bytes) {
1530 ret = 1;
1531 goto out;
1532 }
1533 }
1534
1535no_cluster_bitmap:
1440 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1536 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1441 1, 0); 1537 1, 0);
1442 if (!bitmap_info) { 1538 if (!bitmap_info) {
@@ -1444,19 +1540,10 @@ again:
1444 goto new_bitmap; 1540 goto new_bitmap;
1445 } 1541 }
1446 1542
1447 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1543 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1448 1544 bytes -= bytes_added;
1449 if (offset >= bitmap_info->offset && offset + bytes > end) { 1545 offset += bytes_added;
1450 bitmap_set_bits(ctl, bitmap_info, offset, end - offset); 1546 added = 0;
1451 bytes -= end - offset;
1452 offset = end;
1453 added = 0;
1454 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1455 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1456 bytes = 0;
1457 } else {
1458 BUG();
1459 }
1460 1547
1461 if (!bytes) { 1548 if (!bytes) {
1462 ret = 1; 1549 ret = 1;
@@ -1735,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1735 "\n", count); 1822 "\n", count);
1736} 1823}
1737 1824
1738static struct btrfs_free_space_op free_space_op = {
1739 .recalc_thresholds = recalculate_thresholds,
1740 .use_bitmap = use_bitmap,
1741};
1742
1743void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1825void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1744{ 1826{
1745 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1827 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2111,9 +2193,11 @@ again:
2111/* 2193/*
2112 * This searches the block group for just extents to fill the cluster with. 2194 * This searches the block group for just extents to fill the cluster with.
2113 */ 2195 */
2114static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2196static noinline int
2115 struct btrfs_free_cluster *cluster, 2197setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2116 u64 offset, u64 bytes, u64 min_bytes) 2198 struct btrfs_free_cluster *cluster,
2199 struct list_head *bitmaps, u64 offset, u64 bytes,
2200 u64 min_bytes)
2117{ 2201{
2118 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2202 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2119 struct btrfs_free_space *first = NULL; 2203 struct btrfs_free_space *first = NULL;
@@ -2135,6 +2219,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2135 * extent entry. 2219 * extent entry.
2136 */ 2220 */
2137 while (entry->bitmap) { 2221 while (entry->bitmap) {
2222 if (list_empty(&entry->list))
2223 list_add_tail(&entry->list, bitmaps);
2138 node = rb_next(&entry->offset_index); 2224 node = rb_next(&entry->offset_index);
2139 if (!node) 2225 if (!node)
2140 return -ENOSPC; 2226 return -ENOSPC;
@@ -2154,8 +2240,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2154 return -ENOSPC; 2240 return -ENOSPC;
2155 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2241 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2156 2242
2157 if (entry->bitmap) 2243 if (entry->bitmap) {
2244 if (list_empty(&entry->list))
2245 list_add_tail(&entry->list, bitmaps);
2158 continue; 2246 continue;
2247 }
2248
2159 /* 2249 /*
2160 * we haven't filled the empty size and the window is 2250 * we haven't filled the empty size and the window is
2161 * very large. reset and try again 2251 * very large. reset and try again
@@ -2207,9 +2297,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2207 * This specifically looks for bitmaps that may work in the cluster, we assume 2297 * This specifically looks for bitmaps that may work in the cluster, we assume
2208 * that we have already failed to find extents that will work. 2298 * that we have already failed to find extents that will work.
2209 */ 2299 */
2210static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2300static noinline int
2211 struct btrfs_free_cluster *cluster, 2301setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2212 u64 offset, u64 bytes, u64 min_bytes) 2302 struct btrfs_free_cluster *cluster,
2303 struct list_head *bitmaps, u64 offset, u64 bytes,
2304 u64 min_bytes)
2213{ 2305{
2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2306 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2215 struct btrfs_free_space *entry; 2307 struct btrfs_free_space *entry;
@@ -2219,10 +2311,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2219 if (ctl->total_bitmaps == 0) 2311 if (ctl->total_bitmaps == 0)
2220 return -ENOSPC; 2312 return -ENOSPC;
2221 2313
2314 /*
2315 * First check our cached list of bitmaps and see if there is an entry
2316 * here that will work.
2317 */
2318 list_for_each_entry(entry, bitmaps, list) {
2319 if (entry->bytes < min_bytes)
2320 continue;
2321 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2322 bytes, min_bytes);
2323 if (!ret)
2324 return 0;
2325 }
2326
2327 /*
2328 * If we do have entries on our list and we are here then we didn't find
2329 * anything, so go ahead and get the next entry after the last entry in
2330 * this list and start the search from there.
2331 */
2332 if (!list_empty(bitmaps)) {
2333 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2334 list);
2335 node = rb_next(&entry->offset_index);
2336 if (!node)
2337 return -ENOSPC;
2338 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2339 goto search;
2340 }
2341
2222 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2342 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2223 if (!entry) 2343 if (!entry)
2224 return -ENOSPC; 2344 return -ENOSPC;
2225 2345
2346search:
2226 node = &entry->offset_index; 2347 node = &entry->offset_index;
2227 do { 2348 do {
2228 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2349 entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2253,6 +2374,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2253 u64 offset, u64 bytes, u64 empty_size) 2374 u64 offset, u64 bytes, u64 empty_size)
2254{ 2375{
2255 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2376 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2377 struct list_head bitmaps;
2378 struct btrfs_free_space *entry, *tmp;
2256 u64 min_bytes; 2379 u64 min_bytes;
2257 int ret; 2380 int ret;
2258 2381
@@ -2291,11 +2414,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2291 goto out; 2414 goto out;
2292 } 2415 }
2293 2416
2294 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, 2417 INIT_LIST_HEAD(&bitmaps);
2295 min_bytes); 2418 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2419 bytes, min_bytes);
2296 if (ret) 2420 if (ret)
2297 ret = setup_cluster_bitmap(block_group, cluster, offset, 2421 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2298 bytes, min_bytes); 2422 offset, bytes, min_bytes);
2423
2424 /* Clear our temporary list */
2425 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2426 list_del_init(&entry->list);
2299 2427
2300 if (!ret) { 2428 if (!ret) {
2301 atomic_inc(&block_group->count); 2429 atomic_inc(&block_group->count);
@@ -2481,7 +2609,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2481 return inode; 2609 return inode;
2482 2610
2483 spin_lock(&root->cache_lock); 2611 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing) 2612 if (!btrfs_fs_closing(root->fs_info))
2485 root->cache_inode = igrab(inode); 2613 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock); 2614 spin_unlock(&root->cache_lock);
2487 2615
@@ -2504,12 +2632,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2504 int ret = 0; 2632 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item); 2633 u64 root_gen = btrfs_root_generation(&root->root_item);
2506 2634
2635 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2636 return 0;
2637
2507 /* 2638 /*
2508 * If we're unmounting then just return, since this does a search on the 2639 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock. 2640 * normal root and not the commit root and we could deadlock.
2510 */ 2641 */
2511 smp_mb(); 2642 if (btrfs_fs_closing(fs_info))
2512 if (fs_info->closing)
2513 return 0; 2643 return 0;
2514 2644
2515 path = btrfs_alloc_path(); 2645 path = btrfs_alloc_path();
@@ -2543,6 +2673,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2543 struct inode *inode; 2673 struct inode *inode;
2544 int ret; 2674 int ret;
2545 2675
2676 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2677 return 0;
2678
2546 inode = lookup_free_ino_inode(root, path); 2679 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode)) 2680 if (IS_ERR(inode))
2548 return 0; 2681 return 0;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3262cd17a12f..b4087e0fa871 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -38,6 +38,9 @@ static int caching_kthread(void *data)
38 int slot; 38 int slot;
39 int ret; 39 int ret;
40 40
41 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
42 return 0;
43
41 path = btrfs_alloc_path(); 44 path = btrfs_alloc_path();
42 if (!path) 45 if (!path)
43 return -ENOMEM; 46 return -ENOMEM;
@@ -59,8 +62,7 @@ again:
59 goto out; 62 goto out;
60 63
61 while (1) { 64 while (1) {
62 smp_mb(); 65 if (btrfs_fs_closing(fs_info))
63 if (fs_info->closing)
64 goto out; 66 goto out;
65 67
66 leaf = path->nodes[0]; 68 leaf = path->nodes[0];
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root)
141 int ret; 143 int ret;
142 u64 objectid; 144 u64 objectid;
143 145
146 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
147 return;
148
144 spin_lock(&root->cache_lock); 149 spin_lock(&root->cache_lock);
145 if (root->cached != BTRFS_CACHE_NO) { 150 if (root->cached != BTRFS_CACHE_NO) {
146 spin_unlock(&root->cache_lock); 151 spin_unlock(&root->cache_lock);
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root)
178 183
179int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
180{ 185{
186 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
187 return btrfs_find_free_objectid(root, objectid);
188
181again: 189again:
182 *objectid = btrfs_find_ino_for_alloc(root); 190 *objectid = btrfs_find_ino_for_alloc(root);
183 191
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
201{ 209{
202 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 210 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
203 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; 211 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
212
213 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
214 return;
215
204again: 216again:
205 if (root->cached == BTRFS_CACHE_FINISHED) { 217 if (root->cached == BTRFS_CACHE_FINISHED) {
206 __btrfs_add_free_space(ctl, objectid, 1); 218 __btrfs_add_free_space(ctl, objectid, 1);
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
250 struct rb_node *n; 262 struct rb_node *n;
251 u64 count; 263 u64 count;
252 264
265 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
266 return;
267
253 while (1) { 268 while (1) {
254 n = rb_first(rbroot); 269 n = rb_first(rbroot);
255 if (!n) 270 if (!n)
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
388 int prealloc; 403 int prealloc;
389 bool retry = false; 404 bool retry = false;
390 405
406 /* only fs tree and subvol/snap needs ino cache */
407 if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
408 (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
409 root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
410 return 0;
411
412 /* Don't save inode cache if we are deleting this root */
413 if (btrfs_root_refs(&root->root_item) == 0 &&
414 root != root->fs_info->tree_root)
415 return 0;
416
417 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
418 return 0;
419
391 path = btrfs_alloc_path(); 420 path = btrfs_alloc_path();
392 if (!path) 421 if (!path)
393 return -ENOMEM; 422 return -ENOMEM;
423
394again: 424again:
395 inode = lookup_free_ino_inode(root, path); 425 inode = lookup_free_ino_inode(root, path);
396 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 426 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 39a9d5750efd..0a9b10c5b0a7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
138 return -ENOMEM; 138 return -ENOMEM;
139 139
140 path->leave_spinning = 1; 140 path->leave_spinning = 1;
141 btrfs_set_trans_block_group(trans, inode);
142 141
143 key.objectid = btrfs_ino(inode); 142 key.objectid = btrfs_ino(inode);
144 key.offset = start; 143 key.offset = start;
@@ -426,9 +425,8 @@ again:
426 } 425 }
427 } 426 }
428 if (start == 0) { 427 if (start == 0) {
429 trans = btrfs_join_transaction(root, 1); 428 trans = btrfs_join_transaction(root);
430 BUG_ON(IS_ERR(trans)); 429 BUG_ON(IS_ERR(trans));
431 btrfs_set_trans_block_group(trans, inode);
432 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 430 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
433 431
434 /* lets try to make an inline extent */ 432 /* lets try to make an inline extent */
@@ -623,8 +621,9 @@ retry:
623 async_extent->start + async_extent->ram_size - 1, 621 async_extent->start + async_extent->ram_size - 1,
624 GFP_NOFS); 622 GFP_NOFS);
625 623
626 trans = btrfs_join_transaction(root, 1); 624 trans = btrfs_join_transaction(root);
627 BUG_ON(IS_ERR(trans)); 625 BUG_ON(IS_ERR(trans));
626 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
628 ret = btrfs_reserve_extent(trans, root, 627 ret = btrfs_reserve_extent(trans, root,
629 async_extent->compressed_size, 628 async_extent->compressed_size,
630 async_extent->compressed_size, 629 async_extent->compressed_size,
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode,
793 int ret = 0; 792 int ret = 0;
794 793
795 BUG_ON(is_free_space_inode(root, inode)); 794 BUG_ON(is_free_space_inode(root, inode));
796 trans = btrfs_join_transaction(root, 1); 795 trans = btrfs_join_transaction(root);
797 BUG_ON(IS_ERR(trans)); 796 BUG_ON(IS_ERR(trans));
798 btrfs_set_trans_block_group(trans, inode);
799 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 797 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
800 798
801 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 799 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1077 nolock = is_free_space_inode(root, inode); 1075 nolock = is_free_space_inode(root, inode);
1078 1076
1079 if (nolock) 1077 if (nolock)
1080 trans = btrfs_join_transaction_nolock(root, 1); 1078 trans = btrfs_join_transaction_nolock(root);
1081 else 1079 else
1082 trans = btrfs_join_transaction(root, 1); 1080 trans = btrfs_join_transaction(root);
1081
1083 BUG_ON(IS_ERR(trans)); 1082 BUG_ON(IS_ERR(trans));
1083 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1084 1084
1085 cow_start = (u64)-1; 1085 cow_start = (u64)-1;
1086 cur_offset = start; 1086 cur_offset = start;
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1519{ 1519{
1520 struct btrfs_ordered_sum *sum; 1520 struct btrfs_ordered_sum *sum;
1521 1521
1522 btrfs_set_trans_block_group(trans, inode);
1523
1524 list_for_each_entry(sum, list, list) { 1522 list_for_each_entry(sum, list, list) {
1525 btrfs_csum_file_blocks(trans, 1523 btrfs_csum_file_blocks(trans,
1526 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1524 BTRFS_I(inode)->root->fs_info->csum_root, sum);
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1735 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1733 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1736 if (!ret) { 1734 if (!ret) {
1737 if (nolock) 1735 if (nolock)
1738 trans = btrfs_join_transaction_nolock(root, 1); 1736 trans = btrfs_join_transaction_nolock(root);
1739 else 1737 else
1740 trans = btrfs_join_transaction(root, 1); 1738 trans = btrfs_join_transaction(root);
1741 BUG_ON(IS_ERR(trans)); 1739 BUG_ON(IS_ERR(trans));
1742 btrfs_set_trans_block_group(trans, inode);
1743 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1740 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1744 ret = btrfs_update_inode(trans, root, inode); 1741 ret = btrfs_update_inode(trans, root, inode);
1745 BUG_ON(ret); 1742 BUG_ON(ret);
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1752 0, &cached_state, GFP_NOFS); 1749 0, &cached_state, GFP_NOFS);
1753 1750
1754 if (nolock) 1751 if (nolock)
1755 trans = btrfs_join_transaction_nolock(root, 1); 1752 trans = btrfs_join_transaction_nolock(root);
1756 else 1753 else
1757 trans = btrfs_join_transaction(root, 1); 1754 trans = btrfs_join_transaction(root);
1758 BUG_ON(IS_ERR(trans)); 1755 BUG_ON(IS_ERR(trans));
1759 btrfs_set_trans_block_group(trans, inode);
1760 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1756 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1761 1757
1762 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1758 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1990,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1990 } 1986 }
1991 1987
1992 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
1993 return 0; 1989 goto good;
1994 1990
1995 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1996 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { 1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2431 (u64)-1); 2427 (u64)-1);
2432 2428
2433 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2429 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2434 trans = btrfs_join_transaction(root, 1); 2430 trans = btrfs_join_transaction(root);
2435 if (!IS_ERR(trans)) 2431 if (!IS_ERR(trans))
2436 btrfs_end_transaction(trans, root); 2432 btrfs_end_transaction(trans, root);
2437 } 2433 }
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2511 struct btrfs_root *root = BTRFS_I(inode)->root; 2507 struct btrfs_root *root = BTRFS_I(inode)->root;
2512 struct btrfs_key location; 2508 struct btrfs_key location;
2513 int maybe_acls; 2509 int maybe_acls;
2514 u64 alloc_group_block;
2515 u32 rdev; 2510 u32 rdev;
2516 int ret; 2511 int ret;
2517 2512
2518 path = btrfs_alloc_path(); 2513 path = btrfs_alloc_path();
2519 BUG_ON(!path); 2514 BUG_ON(!path);
2515 path->leave_spinning = 1;
2520 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2516 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2521 2517
2522 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 2518 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2526 leaf = path->nodes[0]; 2522 leaf = path->nodes[0];
2527 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2523 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2528 struct btrfs_inode_item); 2524 struct btrfs_inode_item);
2525 if (!leaf->map_token)
2526 map_private_extent_buffer(leaf, (unsigned long)inode_item,
2527 sizeof(struct btrfs_inode_item),
2528 &leaf->map_token, &leaf->kaddr,
2529 &leaf->map_start, &leaf->map_len,
2530 KM_USER1);
2529 2531
2530 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2532 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2531 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2533 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2555 BTRFS_I(inode)->index_cnt = (u64)-1; 2557 BTRFS_I(inode)->index_cnt = (u64)-1;
2556 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2558 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2557 2559
2558 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2559
2560 /* 2560 /*
2561 * try to precache a NULL acl entry for files that don't have 2561 * try to precache a NULL acl entry for files that don't have
2562 * any xattrs or acls 2562 * any xattrs or acls
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
2566 if (!maybe_acls) 2566 if (!maybe_acls)
2567 cache_no_acl(inode); 2567 cache_no_acl(inode);
2568 2568
2569 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2569 if (leaf->map_token) {
2570 alloc_group_block, 0); 2570 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2571 leaf->map_token = NULL;
2572 }
2573
2571 btrfs_free_path(path); 2574 btrfs_free_path(path);
2572 inode_item = NULL; 2575 inode_item = NULL;
2573 2576
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2647 btrfs_set_inode_transid(leaf, item, trans->transid); 2650 btrfs_set_inode_transid(leaf, item, trans->transid);
2648 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2651 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2649 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2652 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2650 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2653 btrfs_set_inode_block_group(leaf, item, 0);
2651 2654
2652 if (leaf->map_token) { 2655 if (leaf->map_token) {
2653 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2656 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3004 if (IS_ERR(trans)) 3007 if (IS_ERR(trans))
3005 return PTR_ERR(trans); 3008 return PTR_ERR(trans);
3006 3009
3007 btrfs_set_trans_block_group(trans, dir);
3008
3009 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); 3010 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
3010 3011
3011 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3012 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
@@ -3075,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3075 ret = btrfs_update_inode(trans, root, dir); 3076 ret = btrfs_update_inode(trans, root, dir);
3076 BUG_ON(ret); 3077 BUG_ON(ret);
3077 3078
3079 btrfs_free_path(path);
3078 return 0; 3080 return 0;
3079} 3081}
3080 3082
@@ -3094,8 +3096,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3094 if (IS_ERR(trans)) 3096 if (IS_ERR(trans))
3095 return PTR_ERR(trans); 3097 return PTR_ERR(trans);
3096 3098
3097 btrfs_set_trans_block_group(trans, dir);
3098
3099 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3099 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3100 err = btrfs_unlink_subvol(trans, root, dir, 3100 err = btrfs_unlink_subvol(trans, root, dir,
3101 BTRFS_I(inode)->location.objectid, 3101 BTRFS_I(inode)->location.objectid,
@@ -3514,7 +3514,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3514 err = PTR_ERR(trans); 3514 err = PTR_ERR(trans);
3515 break; 3515 break;
3516 } 3516 }
3517 btrfs_set_trans_block_group(trans, inode);
3518 3517
3519 err = btrfs_drop_extents(trans, inode, cur_offset, 3518 err = btrfs_drop_extents(trans, inode, cur_offset,
3520 cur_offset + hole_size, 3519 cur_offset + hole_size,
@@ -3648,9 +3647,8 @@ void btrfs_evict_inode(struct inode *inode)
3648 btrfs_i_size_write(inode, 0); 3647 btrfs_i_size_write(inode, 0);
3649 3648
3650 while (1) { 3649 while (1) {
3651 trans = btrfs_start_transaction(root, 0); 3650 trans = btrfs_join_transaction(root);
3652 BUG_ON(IS_ERR(trans)); 3651 BUG_ON(IS_ERR(trans));
3653 btrfs_set_trans_block_group(trans, inode);
3654 trans->block_rsv = root->orphan_block_rsv; 3652 trans->block_rsv = root->orphan_block_rsv;
3655 3653
3656 ret = btrfs_block_rsv_check(trans, root, 3654 ret = btrfs_block_rsv_check(trans, root,
@@ -4133,7 +4131,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4133 path = btrfs_alloc_path(); 4131 path = btrfs_alloc_path();
4134 if (!path) 4132 if (!path)
4135 return -ENOMEM; 4133 return -ENOMEM;
4136 path->reada = 2; 4134
4135 path->reada = 1;
4137 4136
4138 if (key_type == BTRFS_DIR_INDEX_KEY) { 4137 if (key_type == BTRFS_DIR_INDEX_KEY) {
4139 INIT_LIST_HEAD(&ins_list); 4138 INIT_LIST_HEAD(&ins_list);
@@ -4268,18 +4267,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4268 if (BTRFS_I(inode)->dummy_inode) 4267 if (BTRFS_I(inode)->dummy_inode)
4269 return 0; 4268 return 0;
4270 4269
4271 smp_mb(); 4270 if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
4272 if (root->fs_info->closing && is_free_space_inode(root, inode))
4273 nolock = true; 4271 nolock = true;
4274 4272
4275 if (wbc->sync_mode == WB_SYNC_ALL) { 4273 if (wbc->sync_mode == WB_SYNC_ALL) {
4276 if (nolock) 4274 if (nolock)
4277 trans = btrfs_join_transaction_nolock(root, 1); 4275 trans = btrfs_join_transaction_nolock(root);
4278 else 4276 else
4279 trans = btrfs_join_transaction(root, 1); 4277 trans = btrfs_join_transaction(root);
4280 if (IS_ERR(trans)) 4278 if (IS_ERR(trans))
4281 return PTR_ERR(trans); 4279 return PTR_ERR(trans);
4282 btrfs_set_trans_block_group(trans, inode);
4283 if (nolock) 4280 if (nolock)
4284 ret = btrfs_end_transaction_nolock(trans, root); 4281 ret = btrfs_end_transaction_nolock(trans, root);
4285 else 4282 else
@@ -4303,9 +4300,8 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4303 if (BTRFS_I(inode)->dummy_inode) 4300 if (BTRFS_I(inode)->dummy_inode)
4304 return; 4301 return;
4305 4302
4306 trans = btrfs_join_transaction(root, 1); 4303 trans = btrfs_join_transaction(root);
4307 BUG_ON(IS_ERR(trans)); 4304 BUG_ON(IS_ERR(trans));
4308 btrfs_set_trans_block_group(trans, inode);
4309 4305
4310 ret = btrfs_update_inode(trans, root, inode); 4306 ret = btrfs_update_inode(trans, root, inode);
4311 if (ret && ret == -ENOSPC) { 4307 if (ret && ret == -ENOSPC) {
@@ -4319,7 +4315,6 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4319 PTR_ERR(trans)); 4315 PTR_ERR(trans));
4320 return; 4316 return;
4321 } 4317 }
4322 btrfs_set_trans_block_group(trans, inode);
4323 4318
4324 ret = btrfs_update_inode(trans, root, inode); 4319 ret = btrfs_update_inode(trans, root, inode);
4325 if (ret) { 4320 if (ret) {
@@ -4418,8 +4413,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4418 struct btrfs_root *root, 4413 struct btrfs_root *root,
4419 struct inode *dir, 4414 struct inode *dir,
4420 const char *name, int name_len, 4415 const char *name, int name_len,
4421 u64 ref_objectid, u64 objectid, 4416 u64 ref_objectid, u64 objectid, int mode,
4422 u64 alloc_hint, int mode, u64 *index) 4417 u64 *index)
4423{ 4418{
4424 struct inode *inode; 4419 struct inode *inode;
4425 struct btrfs_inode_item *inode_item; 4420 struct btrfs_inode_item *inode_item;
@@ -4472,8 +4467,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4472 owner = 0; 4467 owner = 0;
4473 else 4468 else
4474 owner = 1; 4469 owner = 1;
4475 BTRFS_I(inode)->block_group =
4476 btrfs_find_block_group(root, 0, alloc_hint, owner);
4477 4470
4478 key[0].objectid = objectid; 4471 key[0].objectid = objectid;
4479 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 4472 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4629,15 +4622,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4629 if (IS_ERR(trans)) 4622 if (IS_ERR(trans))
4630 return PTR_ERR(trans); 4623 return PTR_ERR(trans);
4631 4624
4632 btrfs_set_trans_block_group(trans, dir);
4633
4634 err = btrfs_find_free_ino(root, &objectid); 4625 err = btrfs_find_free_ino(root, &objectid);
4635 if (err) 4626 if (err)
4636 goto out_unlock; 4627 goto out_unlock;
4637 4628
4638 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4629 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4639 dentry->d_name.len, btrfs_ino(dir), objectid, 4630 dentry->d_name.len, btrfs_ino(dir), objectid,
4640 BTRFS_I(dir)->block_group, mode, &index); 4631 mode, &index);
4641 if (IS_ERR(inode)) { 4632 if (IS_ERR(inode)) {
4642 err = PTR_ERR(inode); 4633 err = PTR_ERR(inode);
4643 goto out_unlock; 4634 goto out_unlock;
@@ -4649,7 +4640,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4649 goto out_unlock; 4640 goto out_unlock;
4650 } 4641 }
4651 4642
4652 btrfs_set_trans_block_group(trans, inode);
4653 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4643 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4654 if (err) 4644 if (err)
4655 drop_inode = 1; 4645 drop_inode = 1;
@@ -4658,8 +4648,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4658 init_special_inode(inode, inode->i_mode, rdev); 4648 init_special_inode(inode, inode->i_mode, rdev);
4659 btrfs_update_inode(trans, root, inode); 4649 btrfs_update_inode(trans, root, inode);
4660 } 4650 }
4661 btrfs_update_inode_block_group(trans, inode);
4662 btrfs_update_inode_block_group(trans, dir);
4663out_unlock: 4651out_unlock:
4664 nr = trans->blocks_used; 4652 nr = trans->blocks_used;
4665 btrfs_end_transaction_throttle(trans, root); 4653 btrfs_end_transaction_throttle(trans, root);
@@ -4692,15 +4680,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4692 if (IS_ERR(trans)) 4680 if (IS_ERR(trans))
4693 return PTR_ERR(trans); 4681 return PTR_ERR(trans);
4694 4682
4695 btrfs_set_trans_block_group(trans, dir);
4696
4697 err = btrfs_find_free_ino(root, &objectid); 4683 err = btrfs_find_free_ino(root, &objectid);
4698 if (err) 4684 if (err)
4699 goto out_unlock; 4685 goto out_unlock;
4700 4686
4701 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4687 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4702 dentry->d_name.len, btrfs_ino(dir), objectid, 4688 dentry->d_name.len, btrfs_ino(dir), objectid,
4703 BTRFS_I(dir)->block_group, mode, &index); 4689 mode, &index);
4704 if (IS_ERR(inode)) { 4690 if (IS_ERR(inode)) {
4705 err = PTR_ERR(inode); 4691 err = PTR_ERR(inode);
4706 goto out_unlock; 4692 goto out_unlock;
@@ -4712,7 +4698,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4712 goto out_unlock; 4698 goto out_unlock;
4713 } 4699 }
4714 4700
4715 btrfs_set_trans_block_group(trans, inode);
4716 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4701 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4717 if (err) 4702 if (err)
4718 drop_inode = 1; 4703 drop_inode = 1;
@@ -4723,8 +4708,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4723 inode->i_op = &btrfs_file_inode_operations; 4708 inode->i_op = &btrfs_file_inode_operations;
4724 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4709 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4725 } 4710 }
4726 btrfs_update_inode_block_group(trans, inode);
4727 btrfs_update_inode_block_group(trans, dir);
4728out_unlock: 4711out_unlock:
4729 nr = trans->blocks_used; 4712 nr = trans->blocks_used;
4730 btrfs_end_transaction_throttle(trans, root); 4713 btrfs_end_transaction_throttle(trans, root);
@@ -4771,8 +4754,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4771 4754
4772 btrfs_inc_nlink(inode); 4755 btrfs_inc_nlink(inode);
4773 inode->i_ctime = CURRENT_TIME; 4756 inode->i_ctime = CURRENT_TIME;
4774
4775 btrfs_set_trans_block_group(trans, dir);
4776 ihold(inode); 4757 ihold(inode);
4777 4758
4778 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); 4759 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -4781,7 +4762,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4781 drop_inode = 1; 4762 drop_inode = 1;
4782 } else { 4763 } else {
4783 struct dentry *parent = dget_parent(dentry); 4764 struct dentry *parent = dget_parent(dentry);
4784 btrfs_update_inode_block_group(trans, dir);
4785 err = btrfs_update_inode(trans, root, inode); 4765 err = btrfs_update_inode(trans, root, inode);
4786 BUG_ON(err); 4766 BUG_ON(err);
4787 btrfs_log_new_name(trans, inode, NULL, parent); 4767 btrfs_log_new_name(trans, inode, NULL, parent);
@@ -4818,7 +4798,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4818 trans = btrfs_start_transaction(root, 5); 4798 trans = btrfs_start_transaction(root, 5);
4819 if (IS_ERR(trans)) 4799 if (IS_ERR(trans))
4820 return PTR_ERR(trans); 4800 return PTR_ERR(trans);
4821 btrfs_set_trans_block_group(trans, dir);
4822 4801
4823 err = btrfs_find_free_ino(root, &objectid); 4802 err = btrfs_find_free_ino(root, &objectid);
4824 if (err) 4803 if (err)
@@ -4826,8 +4805,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4826 4805
4827 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4806 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4828 dentry->d_name.len, btrfs_ino(dir), objectid, 4807 dentry->d_name.len, btrfs_ino(dir), objectid,
4829 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4808 S_IFDIR | mode, &index);
4830 &index);
4831 if (IS_ERR(inode)) { 4809 if (IS_ERR(inode)) {
4832 err = PTR_ERR(inode); 4810 err = PTR_ERR(inode);
4833 goto out_fail; 4811 goto out_fail;
@@ -4841,7 +4819,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4841 4819
4842 inode->i_op = &btrfs_dir_inode_operations; 4820 inode->i_op = &btrfs_dir_inode_operations;
4843 inode->i_fop = &btrfs_dir_file_operations; 4821 inode->i_fop = &btrfs_dir_file_operations;
4844 btrfs_set_trans_block_group(trans, inode);
4845 4822
4846 btrfs_i_size_write(inode, 0); 4823 btrfs_i_size_write(inode, 0);
4847 err = btrfs_update_inode(trans, root, inode); 4824 err = btrfs_update_inode(trans, root, inode);
@@ -4855,8 +4832,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4855 4832
4856 d_instantiate(dentry, inode); 4833 d_instantiate(dentry, inode);
4857 drop_on_err = 0; 4834 drop_on_err = 0;
4858 btrfs_update_inode_block_group(trans, inode);
4859 btrfs_update_inode_block_group(trans, dir);
4860 4835
4861out_fail: 4836out_fail:
4862 nr = trans->blocks_used; 4837 nr = trans->blocks_used;
@@ -4989,7 +4964,15 @@ again:
4989 4964
4990 if (!path) { 4965 if (!path) {
4991 path = btrfs_alloc_path(); 4966 path = btrfs_alloc_path();
4992 BUG_ON(!path); 4967 if (!path) {
4968 err = -ENOMEM;
4969 goto out;
4970 }
4971 /*
4972 * Chances are we'll be called again, so go ahead and do
4973 * readahead
4974 */
4975 path->reada = 1;
4993 } 4976 }
4994 4977
4995 ret = btrfs_lookup_file_extent(trans, root, path, 4978 ret = btrfs_lookup_file_extent(trans, root, path,
@@ -5130,8 +5113,10 @@ again:
5130 kunmap(page); 5113 kunmap(page);
5131 free_extent_map(em); 5114 free_extent_map(em);
5132 em = NULL; 5115 em = NULL;
5116
5133 btrfs_release_path(path); 5117 btrfs_release_path(path);
5134 trans = btrfs_join_transaction(root, 1); 5118 trans = btrfs_join_transaction(root);
5119
5135 if (IS_ERR(trans)) 5120 if (IS_ERR(trans))
5136 return ERR_CAST(trans); 5121 return ERR_CAST(trans);
5137 goto again; 5122 goto again;
@@ -5375,7 +5360,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5375 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5360 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5376 } 5361 }
5377 5362
5378 trans = btrfs_join_transaction(root, 0); 5363 trans = btrfs_join_transaction(root);
5379 if (IS_ERR(trans)) 5364 if (IS_ERR(trans))
5380 return ERR_CAST(trans); 5365 return ERR_CAST(trans);
5381 5366
@@ -5611,7 +5596,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5611 * to make sure the current transaction stays open 5596 * to make sure the current transaction stays open
5612 * while we look for nocow cross refs 5597 * while we look for nocow cross refs
5613 */ 5598 */
5614 trans = btrfs_join_transaction(root, 0); 5599 trans = btrfs_join_transaction(root);
5615 if (IS_ERR(trans)) 5600 if (IS_ERR(trans))
5616 goto must_cow; 5601 goto must_cow;
5617 5602
@@ -5750,7 +5735,7 @@ again:
5750 5735
5751 BUG_ON(!ordered); 5736 BUG_ON(!ordered);
5752 5737
5753 trans = btrfs_join_transaction(root, 1); 5738 trans = btrfs_join_transaction(root);
5754 if (IS_ERR(trans)) { 5739 if (IS_ERR(trans)) {
5755 err = -ENOMEM; 5740 err = -ENOMEM;
5756 goto out; 5741 goto out;
@@ -6500,6 +6485,7 @@ out:
6500static int btrfs_truncate(struct inode *inode) 6485static int btrfs_truncate(struct inode *inode)
6501{ 6486{
6502 struct btrfs_root *root = BTRFS_I(inode)->root; 6487 struct btrfs_root *root = BTRFS_I(inode)->root;
6488 struct btrfs_block_rsv *rsv;
6503 int ret; 6489 int ret;
6504 int err = 0; 6490 int err = 0;
6505 struct btrfs_trans_handle *trans; 6491 struct btrfs_trans_handle *trans;
@@ -6513,28 +6499,80 @@ static int btrfs_truncate(struct inode *inode)
6513 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6499 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
6514 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6500 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
6515 6501
6516 trans = btrfs_start_transaction(root, 5); 6502 /*
6517 if (IS_ERR(trans)) 6503 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
6518 return PTR_ERR(trans); 6504 * 3 things going on here
6505 *
6506 * 1) We need to reserve space for our orphan item and the space to
6507 * delete our orphan item. Lord knows we don't want to have a dangling
6508 * orphan item because we didn't reserve space to remove it.
6509 *
6510 * 2) We need to reserve space to update our inode.
6511 *
6512 * 3) We need to have something to cache all the space that is going to
6513 * be free'd up by the truncate operation, but also have some slack
6514 * space reserved in case it uses space during the truncate (thank you
6515 * very much snapshotting).
6516 *
6517 * And we need these to all be seperate. The fact is we can use alot of
6518 * space doing the truncate, and we have no earthly idea how much space
6519 * we will use, so we need the truncate reservation to be seperate so it
6520 * doesn't end up using space reserved for updating the inode or
6521 * removing the orphan item. We also need to be able to stop the
6522 * transaction and start a new one, which means we need to be able to
6523 * update the inode several times, and we have no idea of knowing how
6524 * many times that will be, so we can't just reserve 1 item for the
6525 * entirety of the opration, so that has to be done seperately as well.
6526 * Then there is the orphan item, which does indeed need to be held on
6527 * to for the whole operation, and we need nobody to touch this reserved
6528 * space except the orphan code.
6529 *
6530 * So that leaves us with
6531 *
6532 * 1) root->orphan_block_rsv - for the orphan deletion.
6533 * 2) rsv - for the truncate reservation, which we will steal from the
6534 * transaction reservation.
6535 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
6536 * updating the inode.
6537 */
6538 rsv = btrfs_alloc_block_rsv(root);
6539 if (!rsv)
6540 return -ENOMEM;
6541 btrfs_add_durable_block_rsv(root->fs_info, rsv);
6519 6542
6520 btrfs_set_trans_block_group(trans, inode); 6543 trans = btrfs_start_transaction(root, 4);
6544 if (IS_ERR(trans)) {
6545 err = PTR_ERR(trans);
6546 goto out;
6547 }
6548
6549 /*
6550 * Reserve space for the truncate process. Truncate should be adding
6551 * space, but if there are snapshots it may end up using space.
6552 */
6553 ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
6554 BUG_ON(ret);
6521 6555
6522 ret = btrfs_orphan_add(trans, inode); 6556 ret = btrfs_orphan_add(trans, inode);
6523 if (ret) { 6557 if (ret) {
6524 btrfs_end_transaction(trans, root); 6558 btrfs_end_transaction(trans, root);
6525 return ret; 6559 goto out;
6526 } 6560 }
6527 6561
6528 nr = trans->blocks_used; 6562 nr = trans->blocks_used;
6529 btrfs_end_transaction(trans, root); 6563 btrfs_end_transaction(trans, root);
6530 btrfs_btree_balance_dirty(root, nr); 6564 btrfs_btree_balance_dirty(root, nr);
6531 6565
6532 /* Now start a transaction for the truncate */ 6566 /*
6533 trans = btrfs_start_transaction(root, 0); 6567 * Ok so we've already migrated our bytes over for the truncate, so here
6534 if (IS_ERR(trans)) 6568 * just reserve the one slot we need for updating the inode.
6535 return PTR_ERR(trans); 6569 */
6536 btrfs_set_trans_block_group(trans, inode); 6570 trans = btrfs_start_transaction(root, 1);
6537 trans->block_rsv = root->orphan_block_rsv; 6571 if (IS_ERR(trans)) {
6572 err = PTR_ERR(trans);
6573 goto out;
6574 }
6575 trans->block_rsv = rsv;
6538 6576
6539 /* 6577 /*
6540 * setattr is responsible for setting the ordered_data_close flag, 6578 * setattr is responsible for setting the ordered_data_close flag,
@@ -6558,24 +6596,17 @@ static int btrfs_truncate(struct inode *inode)
6558 6596
6559 while (1) { 6597 while (1) {
6560 if (!trans) { 6598 if (!trans) {
6561 trans = btrfs_start_transaction(root, 0); 6599 trans = btrfs_start_transaction(root, 3);
6562 if (IS_ERR(trans)) 6600 if (IS_ERR(trans)) {
6563 return PTR_ERR(trans); 6601 err = PTR_ERR(trans);
6564 btrfs_set_trans_block_group(trans, inode); 6602 goto out;
6565 trans->block_rsv = root->orphan_block_rsv; 6603 }
6566 }
6567 6604
6568 ret = btrfs_block_rsv_check(trans, root, 6605 ret = btrfs_truncate_reserve_metadata(trans, root,
6569 root->orphan_block_rsv, 0, 5); 6606 rsv);
6570 if (ret == -EAGAIN) { 6607 BUG_ON(ret);
6571 ret = btrfs_commit_transaction(trans, root); 6608
6572 if (ret) 6609 trans->block_rsv = rsv;
6573 return ret;
6574 trans = NULL;
6575 continue;
6576 } else if (ret) {
6577 err = ret;
6578 break;
6579 } 6610 }
6580 6611
6581 ret = btrfs_truncate_inode_items(trans, root, inode, 6612 ret = btrfs_truncate_inode_items(trans, root, inode,
@@ -6586,6 +6617,7 @@ static int btrfs_truncate(struct inode *inode)
6586 break; 6617 break;
6587 } 6618 }
6588 6619
6620 trans->block_rsv = &root->fs_info->trans_block_rsv;
6589 ret = btrfs_update_inode(trans, root, inode); 6621 ret = btrfs_update_inode(trans, root, inode);
6590 if (ret) { 6622 if (ret) {
6591 err = ret; 6623 err = ret;
@@ -6599,6 +6631,7 @@ static int btrfs_truncate(struct inode *inode)
6599 } 6631 }
6600 6632
6601 if (ret == 0 && inode->i_nlink > 0) { 6633 if (ret == 0 && inode->i_nlink > 0) {
6634 trans->block_rsv = root->orphan_block_rsv;
6602 ret = btrfs_orphan_del(trans, inode); 6635 ret = btrfs_orphan_del(trans, inode);
6603 if (ret) 6636 if (ret)
6604 err = ret; 6637 err = ret;
@@ -6610,15 +6643,20 @@ static int btrfs_truncate(struct inode *inode)
6610 ret = btrfs_orphan_del(NULL, inode); 6643 ret = btrfs_orphan_del(NULL, inode);
6611 } 6644 }
6612 6645
6646 trans->block_rsv = &root->fs_info->trans_block_rsv;
6613 ret = btrfs_update_inode(trans, root, inode); 6647 ret = btrfs_update_inode(trans, root, inode);
6614 if (ret && !err) 6648 if (ret && !err)
6615 err = ret; 6649 err = ret;
6616 6650
6617 nr = trans->blocks_used; 6651 nr = trans->blocks_used;
6618 ret = btrfs_end_transaction_throttle(trans, root); 6652 ret = btrfs_end_transaction_throttle(trans, root);
6653 btrfs_btree_balance_dirty(root, nr);
6654
6655out:
6656 btrfs_free_block_rsv(root, rsv);
6657
6619 if (ret && !err) 6658 if (ret && !err)
6620 err = ret; 6659 err = ret;
6621 btrfs_btree_balance_dirty(root, nr);
6622 6660
6623 return err; 6661 return err;
6624} 6662}
@@ -6627,15 +6665,14 @@ static int btrfs_truncate(struct inode *inode)
6627 * create a new subvolume directory/inode (helper for the ioctl). 6665 * create a new subvolume directory/inode (helper for the ioctl).
6628 */ 6666 */
6629int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 6667int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6630 struct btrfs_root *new_root, 6668 struct btrfs_root *new_root, u64 new_dirid)
6631 u64 new_dirid, u64 alloc_hint)
6632{ 6669{
6633 struct inode *inode; 6670 struct inode *inode;
6634 int err; 6671 int err;
6635 u64 index = 0; 6672 u64 index = 0;
6636 6673
6637 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 6674 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
6638 new_dirid, alloc_hint, S_IFDIR | 0700, &index); 6675 new_dirid, S_IFDIR | 0700, &index);
6639 if (IS_ERR(inode)) 6676 if (IS_ERR(inode))
6640 return PTR_ERR(inode); 6677 return PTR_ERR(inode);
6641 inode->i_op = &btrfs_dir_inode_operations; 6678 inode->i_op = &btrfs_dir_inode_operations;
@@ -6748,21 +6785,6 @@ void btrfs_destroy_inode(struct inode *inode)
6748 spin_unlock(&root->fs_info->ordered_extent_lock); 6785 spin_unlock(&root->fs_info->ordered_extent_lock);
6749 } 6786 }
6750 6787
6751 if (root == root->fs_info->tree_root) {
6752 struct btrfs_block_group_cache *block_group;
6753
6754 block_group = btrfs_lookup_block_group(root->fs_info,
6755 BTRFS_I(inode)->block_group);
6756 if (block_group && block_group->inode == inode) {
6757 spin_lock(&block_group->lock);
6758 block_group->inode = NULL;
6759 spin_unlock(&block_group->lock);
6760 btrfs_put_block_group(block_group);
6761 } else if (block_group) {
6762 btrfs_put_block_group(block_group);
6763 }
6764 }
6765
6766 spin_lock(&root->orphan_lock); 6788 spin_lock(&root->orphan_lock);
6767 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6789 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6768 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6790 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
@@ -6948,8 +6970,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6948 goto out_notrans; 6970 goto out_notrans;
6949 } 6971 }
6950 6972
6951 btrfs_set_trans_block_group(trans, new_dir);
6952
6953 if (dest != root) 6973 if (dest != root)
6954 btrfs_record_root_in_trans(trans, dest); 6974 btrfs_record_root_in_trans(trans, dest);
6955 6975
@@ -7131,16 +7151,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7131 if (IS_ERR(trans)) 7151 if (IS_ERR(trans))
7132 return PTR_ERR(trans); 7152 return PTR_ERR(trans);
7133 7153
7134 btrfs_set_trans_block_group(trans, dir);
7135
7136 err = btrfs_find_free_ino(root, &objectid); 7154 err = btrfs_find_free_ino(root, &objectid);
7137 if (err) 7155 if (err)
7138 goto out_unlock; 7156 goto out_unlock;
7139 7157
7140 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7158 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7141 dentry->d_name.len, btrfs_ino(dir), objectid, 7159 dentry->d_name.len, btrfs_ino(dir), objectid,
7142 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7160 S_IFLNK|S_IRWXUGO, &index);
7143 &index);
7144 if (IS_ERR(inode)) { 7161 if (IS_ERR(inode)) {
7145 err = PTR_ERR(inode); 7162 err = PTR_ERR(inode);
7146 goto out_unlock; 7163 goto out_unlock;
@@ -7152,7 +7169,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7152 goto out_unlock; 7169 goto out_unlock;
7153 } 7170 }
7154 7171
7155 btrfs_set_trans_block_group(trans, inode);
7156 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7172 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7157 if (err) 7173 if (err)
7158 drop_inode = 1; 7174 drop_inode = 1;
@@ -7163,8 +7179,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7163 inode->i_op = &btrfs_file_inode_operations; 7179 inode->i_op = &btrfs_file_inode_operations;
7164 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7180 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7165 } 7181 }
7166 btrfs_update_inode_block_group(trans, inode);
7167 btrfs_update_inode_block_group(trans, dir);
7168 if (drop_inode) 7182 if (drop_inode)
7169 goto out_unlock; 7183 goto out_unlock;
7170 7184
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 85e818ce00c5..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
244 } 244 }
245 245
246 trans = btrfs_join_transaction(root, 1); 246 trans = btrfs_join_transaction(root);
247 BUG_ON(IS_ERR(trans)); 247 BUG_ON(IS_ERR(trans));
248 248
249 ret = btrfs_update_inode(trans, root, inode); 249 ret = btrfs_update_inode(trans, root, inode);
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root,
414 414
415 btrfs_record_root_in_trans(trans, new_root); 415 btrfs_record_root_in_trans(trans, new_root);
416 416
417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid, 417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
418 BTRFS_I(dir)->block_group);
419 /* 418 /*
420 * insert the directory item 419 * insert the directory item
421 */ 420 */
@@ -483,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
483 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
484 BUG_ON(ret); 483 BUG_ON(ret);
485 484
485 spin_lock(&root->fs_info->trans_lock);
486 list_add(&pending_snapshot->list, 486 list_add(&pending_snapshot->list,
487 &trans->transaction->pending_snapshots); 487 &trans->transaction->pending_snapshots);
488 spin_unlock(&root->fs_info->trans_lock);
488 if (async_transid) { 489 if (async_transid) {
489 *async_transid = trans->transid; 490 *async_transid = trans->transid;
490 ret = btrfs_commit_transaction_async(trans, 491 ret = btrfs_commit_transaction_async(trans,
@@ -707,16 +708,17 @@ static int find_new_extents(struct btrfs_root *root,
707 struct btrfs_file_extent_item *extent; 708 struct btrfs_file_extent_item *extent;
708 int type; 709 int type;
709 int ret; 710 int ret;
711 u64 ino = btrfs_ino(inode);
710 712
711 path = btrfs_alloc_path(); 713 path = btrfs_alloc_path();
712 if (!path) 714 if (!path)
713 return -ENOMEM; 715 return -ENOMEM;
714 716
715 min_key.objectid = inode->i_ino; 717 min_key.objectid = ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY; 718 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off; 719 min_key.offset = *off;
718 720
719 max_key.objectid = inode->i_ino; 721 max_key.objectid = ino;
720 max_key.type = (u8)-1; 722 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1; 723 max_key.offset = (u64)-1;
722 724
@@ -727,7 +729,7 @@ static int find_new_extents(struct btrfs_root *root,
727 path, 0, newer_than); 729 path, 0, newer_than);
728 if (ret != 0) 730 if (ret != 0)
729 goto none; 731 goto none;
730 if (min_key.objectid != inode->i_ino) 732 if (min_key.objectid != ino)
731 goto none; 733 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 734 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none; 735 goto none;
@@ -2054,29 +2056,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2054 2056
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2057static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{ 2058{
2057 struct btrfs_ioctl_fs_info_args fi_args; 2059 struct btrfs_ioctl_fs_info_args *fi_args;
2058 struct btrfs_device *device; 2060 struct btrfs_device *device;
2059 struct btrfs_device *next; 2061 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2062 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2063 int ret = 0;
2061 2064
2062 if (!capable(CAP_SYS_ADMIN)) 2065 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM; 2066 return -EPERM;
2064 2067
2065 fi_args.num_devices = fs_devices->num_devices; 2068 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2066 fi_args.max_id = 0; 2069 if (!fi_args)
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); 2070 return -ENOMEM;
2071
2072 fi_args->num_devices = fs_devices->num_devices;
2073 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2068 2074
2069 mutex_lock(&fs_devices->device_list_mutex); 2075 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2076 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id) 2077 if (device->devid > fi_args->max_id)
2072 fi_args.max_id = device->devid; 2078 fi_args->max_id = device->devid;
2073 } 2079 }
2074 mutex_unlock(&fs_devices->device_list_mutex); 2080 mutex_unlock(&fs_devices->device_list_mutex);
2075 2081
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args))) 2082 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2077 return -EFAULT; 2083 ret = -EFAULT;
2078 2084
2079 return 0; 2085 kfree(fi_args);
2086 return ret;
2080} 2087}
2081 2088
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2089static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
@@ -2489,12 +2496,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
2489 if (ret) 2496 if (ret)
2490 goto out; 2497 goto out;
2491 2498
2492 mutex_lock(&root->fs_info->trans_mutex); 2499 atomic_inc(&root->fs_info->open_ioctl_trans);
2493 root->fs_info->open_ioctl_trans++;
2494 mutex_unlock(&root->fs_info->trans_mutex);
2495 2500
2496 ret = -ENOMEM; 2501 ret = -ENOMEM;
2497 trans = btrfs_start_ioctl_transaction(root, 0); 2502 trans = btrfs_start_ioctl_transaction(root);
2498 if (IS_ERR(trans)) 2503 if (IS_ERR(trans))
2499 goto out_drop; 2504 goto out_drop;
2500 2505
@@ -2502,9 +2507,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2502 return 0; 2507 return 0;
2503 2508
2504out_drop: 2509out_drop:
2505 mutex_lock(&root->fs_info->trans_mutex); 2510 atomic_dec(&root->fs_info->open_ioctl_trans);
2506 root->fs_info->open_ioctl_trans--;
2507 mutex_unlock(&root->fs_info->trans_mutex);
2508 mnt_drop_write(file->f_path.mnt); 2511 mnt_drop_write(file->f_path.mnt);
2509out: 2512out:
2510 return ret; 2513 return ret;
@@ -2738,9 +2741,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2738 2741
2739 btrfs_end_transaction(trans, root); 2742 btrfs_end_transaction(trans, root);
2740 2743
2741 mutex_lock(&root->fs_info->trans_mutex); 2744 atomic_dec(&root->fs_info->open_ioctl_trans);
2742 root->fs_info->open_ioctl_trans--;
2743 mutex_unlock(&root->fs_info->trans_mutex);
2744 2745
2745 mnt_drop_write(file->f_path.mnt); 2746 mnt_drop_write(file->f_path.mnt);
2746 return 0; 2747 return 0;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ca38eca70af0..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
677 err = -ENOMEM; 677 err = -ENOMEM;
678 goto out; 678 goto out;
679 } 679 }
680 path1->reada = 1;
681 path2->reada = 2;
680 682
681 node = alloc_backref_node(cache); 683 node = alloc_backref_node(cache);
682 if (!node) { 684 if (!node) {
@@ -1366,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1366 int ret; 1368 int ret;
1367 1369
1368 if (!root->reloc_root) 1370 if (!root->reloc_root)
1369 return 0; 1371 goto out;
1370 1372
1371 reloc_root = root->reloc_root; 1373 reloc_root = root->reloc_root;
1372 root_item = &reloc_root->root_item; 1374 root_item = &reloc_root->root_item;
@@ -1388,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1388 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1390 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1389 &reloc_root->root_key, root_item); 1391 &reloc_root->root_key, root_item);
1390 BUG_ON(ret); 1392 BUG_ON(ret);
1393
1394out:
1391 return 0; 1395 return 0;
1392} 1396}
1393 1397
@@ -1999,6 +2003,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1999 path = btrfs_alloc_path(); 2003 path = btrfs_alloc_path();
2000 if (!path) 2004 if (!path)
2001 return -ENOMEM; 2005 return -ENOMEM;
2006 path->reada = 1;
2002 2007
2003 reloc_root = root->reloc_root; 2008 reloc_root = root->reloc_root;
2004 root_item = &reloc_root->root_item; 2009 root_item = &reloc_root->root_item;
@@ -2139,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2139 u64 num_bytes = 0; 2144 u64 num_bytes = 0;
2140 int ret; 2145 int ret;
2141 2146
2142 mutex_lock(&root->fs_info->trans_mutex); 2147 mutex_lock(&root->fs_info->reloc_mutex);
2143 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2148 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2144 rc->merging_rsv_size += rc->nodes_relocated * 2; 2149 rc->merging_rsv_size += rc->nodes_relocated * 2;
2145 mutex_unlock(&root->fs_info->trans_mutex); 2150 mutex_unlock(&root->fs_info->reloc_mutex);
2151
2146again: 2152again:
2147 if (!err) { 2153 if (!err) {
2148 num_bytes = rc->merging_rsv_size; 2154 num_bytes = rc->merging_rsv_size;
@@ -2152,7 +2158,7 @@ again:
2152 err = ret; 2158 err = ret;
2153 } 2159 }
2154 2160
2155 trans = btrfs_join_transaction(rc->extent_root, 1); 2161 trans = btrfs_join_transaction(rc->extent_root);
2156 if (IS_ERR(trans)) { 2162 if (IS_ERR(trans)) {
2157 if (!err) 2163 if (!err)
2158 btrfs_block_rsv_release(rc->extent_root, 2164 btrfs_block_rsv_release(rc->extent_root,
@@ -2211,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
2211 int ret; 2217 int ret;
2212again: 2218again:
2213 root = rc->extent_root; 2219 root = rc->extent_root;
2214 mutex_lock(&root->fs_info->trans_mutex); 2220
2221 /*
2222 * this serializes us with btrfs_record_root_in_transaction,
2223 * we have to make sure nobody is in the middle of
2224 * adding their roots to the list while we are
2225 * doing this splice
2226 */
2227 mutex_lock(&root->fs_info->reloc_mutex);
2215 list_splice_init(&rc->reloc_roots, &reloc_roots); 2228 list_splice_init(&rc->reloc_roots, &reloc_roots);
2216 mutex_unlock(&root->fs_info->trans_mutex); 2229 mutex_unlock(&root->fs_info->reloc_mutex);
2217 2230
2218 while (!list_empty(&reloc_roots)) { 2231 while (!list_empty(&reloc_roots)) {
2219 found = 1; 2232 found = 1;
@@ -3236,7 +3249,7 @@ truncate:
3236 goto out; 3249 goto out;
3237 } 3250 }
3238 3251
3239 trans = btrfs_join_transaction(root, 0); 3252 trans = btrfs_join_transaction(root);
3240 if (IS_ERR(trans)) { 3253 if (IS_ERR(trans)) {
3241 btrfs_free_path(path); 3254 btrfs_free_path(path);
3242 ret = PTR_ERR(trans); 3255 ret = PTR_ERR(trans);
@@ -3300,6 +3313,7 @@ static int find_data_references(struct reloc_control *rc,
3300 path = btrfs_alloc_path(); 3313 path = btrfs_alloc_path();
3301 if (!path) 3314 if (!path)
3302 return -ENOMEM; 3315 return -ENOMEM;
3316 path->reada = 1;
3303 3317
3304 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3318 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3305 if (IS_ERR(root)) { 3319 if (IS_ERR(root)) {
@@ -3586,17 +3600,19 @@ next:
3586static void set_reloc_control(struct reloc_control *rc) 3600static void set_reloc_control(struct reloc_control *rc)
3587{ 3601{
3588 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3602 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3589 mutex_lock(&fs_info->trans_mutex); 3603
3604 mutex_lock(&fs_info->reloc_mutex);
3590 fs_info->reloc_ctl = rc; 3605 fs_info->reloc_ctl = rc;
3591 mutex_unlock(&fs_info->trans_mutex); 3606 mutex_unlock(&fs_info->reloc_mutex);
3592} 3607}
3593 3608
3594static void unset_reloc_control(struct reloc_control *rc) 3609static void unset_reloc_control(struct reloc_control *rc)
3595{ 3610{
3596 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3611 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3597 mutex_lock(&fs_info->trans_mutex); 3612
3613 mutex_lock(&fs_info->reloc_mutex);
3598 fs_info->reloc_ctl = NULL; 3614 fs_info->reloc_ctl = NULL;
3599 mutex_unlock(&fs_info->trans_mutex); 3615 mutex_unlock(&fs_info->reloc_mutex);
3600} 3616}
3601 3617
3602static int check_extent_flags(u64 flags) 3618static int check_extent_flags(u64 flags)
@@ -3645,7 +3661,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3645 rc->create_reloc_tree = 1; 3661 rc->create_reloc_tree = 1;
3646 set_reloc_control(rc); 3662 set_reloc_control(rc);
3647 3663
3648 trans = btrfs_join_transaction(rc->extent_root, 1); 3664 trans = btrfs_join_transaction(rc->extent_root);
3649 BUG_ON(IS_ERR(trans)); 3665 BUG_ON(IS_ERR(trans));
3650 btrfs_commit_transaction(trans, rc->extent_root); 3666 btrfs_commit_transaction(trans, rc->extent_root);
3651 return 0; 3667 return 0;
@@ -3668,6 +3684,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3668 path = btrfs_alloc_path(); 3684 path = btrfs_alloc_path();
3669 if (!path) 3685 if (!path)
3670 return -ENOMEM; 3686 return -ENOMEM;
3687 path->reada = 1;
3671 3688
3672 ret = prepare_to_relocate(rc); 3689 ret = prepare_to_relocate(rc);
3673 if (ret) { 3690 if (ret) {
@@ -3834,7 +3851,7 @@ restart:
3834 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3851 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3835 3852
3836 /* get rid of pinned extents */ 3853 /* get rid of pinned extents */
3837 trans = btrfs_join_transaction(rc->extent_root, 1); 3854 trans = btrfs_join_transaction(rc->extent_root);
3838 if (IS_ERR(trans)) 3855 if (IS_ERR(trans))
3839 err = PTR_ERR(trans); 3856 err = PTR_ERR(trans);
3840 else 3857 else
@@ -4093,6 +4110,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4093 path = btrfs_alloc_path(); 4110 path = btrfs_alloc_path();
4094 if (!path) 4111 if (!path)
4095 return -ENOMEM; 4112 return -ENOMEM;
4113 path->reada = -1;
4096 4114
4097 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4115 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4098 key.type = BTRFS_ROOT_ITEM_KEY; 4116 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -4159,7 +4177,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4159 4177
4160 set_reloc_control(rc); 4178 set_reloc_control(rc);
4161 4179
4162 trans = btrfs_join_transaction(rc->extent_root, 1); 4180 trans = btrfs_join_transaction(rc->extent_root);
4163 if (IS_ERR(trans)) { 4181 if (IS_ERR(trans)) {
4164 unset_reloc_control(rc); 4182 unset_reloc_control(rc);
4165 err = PTR_ERR(trans); 4183 err = PTR_ERR(trans);
@@ -4193,7 +4211,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4193 4211
4194 unset_reloc_control(rc); 4212 unset_reloc_control(rc);
4195 4213
4196 trans = btrfs_join_transaction(rc->extent_root, 1); 4214 trans = btrfs_join_transaction(rc->extent_root);
4197 if (IS_ERR(trans)) 4215 if (IS_ERR(trans))
4198 err = PTR_ERR(trans); 4216 err = PTR_ERR(trans);
4199 else 4217 else
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6dfed0c27ac3..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h> 19#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h" 20#include "ctree.h"
27#include "volumes.h" 21#include "volumes.h"
28#include "disk-io.h" 22#include "disk-io.h"
@@ -117,33 +111,37 @@ static void scrub_free_csums(struct scrub_dev *sdev)
117 } 111 }
118} 112}
119 113
114static void scrub_free_bio(struct bio *bio)
115{
116 int i;
117 struct page *last_page = NULL;
118
119 if (!bio)
120 return;
121
122 for (i = 0; i < bio->bi_vcnt; ++i) {
123 if (bio->bi_io_vec[i].bv_page == last_page)
124 continue;
125 last_page = bio->bi_io_vec[i].bv_page;
126 __free_page(last_page);
127 }
128 bio_put(bio);
129}
130
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 131static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{ 132{
122 int i; 133 int i;
123 int j;
124 struct page *last_page;
125 134
126 if (!sdev) 135 if (!sdev)
127 return; 136 return;
128 137
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 138 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i]; 139 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132 140
133 if (!sbio) 141 if (!sbio)
134 break; 142 break;
135 143
136 bio = sbio->bio; 144 scrub_free_bio(sbio->bio);
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio); 145 kfree(sbio);
148 } 146 }
149 147
@@ -156,8 +154,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{ 154{
157 struct scrub_dev *sdev; 155 struct scrub_dev *sdev;
158 int i; 156 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 157 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162 158
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 159 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
@@ -165,7 +161,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
165 goto nomem; 161 goto nomem;
166 sdev->dev = dev; 162 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 163 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio; 164 struct scrub_bio *sbio;
170 165
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS); 166 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
@@ -173,32 +168,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
173 goto nomem; 168 goto nomem;
174 sdev->bios[i] = sbio; 169 sdev->bios[i] = sbio;
175 170
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i; 171 sbio->index = i;
181 sbio->sdev = sdev; 172 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0; 173 sbio->count = 0;
184 sbio->work.func = scrub_checksum; 174 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202 175
203 if (i != SCRUB_BIOS_PER_DEV-1) 176 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1; 177 sdev->bios[i]->next_free = i + 1;
@@ -369,9 +342,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
369 int ret; 342 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete); 343 DECLARE_COMPLETION_ONSTACK(complete);
371 344
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1); 345 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev; 346 bio->bi_bdev = bdev;
377 bio->bi_sector = sector; 347 bio->bi_sector = sector;
@@ -380,6 +350,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
380 bio->bi_private = &complete; 350 bio->bi_private = &complete;
381 submit_bio(rw, bio); 351 submit_bio(rw, bio);
382 352
353 /* this will also unplug the queue */
383 wait_for_completion(&complete); 354 wait_for_completion(&complete);
384 355
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 356 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -394,6 +365,7 @@ static void scrub_bio_end_io(struct bio *bio, int err)
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 365 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395 366
396 sbio->err = err; 367 sbio->err = err;
368 sbio->bio = bio;
397 369
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 370 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399} 371}
@@ -453,6 +425,8 @@ static void scrub_checksum(struct btrfs_work *work)
453 } 425 }
454 426
455out: 427out:
428 scrub_free_bio(sbio->bio);
429 sbio->bio = NULL;
456 spin_lock(&sdev->list_lock); 430 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free; 431 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index; 432 sdev->first_free = sbio->index;
@@ -583,25 +557,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
583static int scrub_submit(struct scrub_dev *sdev) 557static int scrub_submit(struct scrub_dev *sdev)
584{ 558{
585 struct scrub_bio *sbio; 559 struct scrub_bio *sbio;
560 struct bio *bio;
561 int i;
586 562
587 if (sdev->curr == -1) 563 if (sdev->curr == -1)
588 return 0; 564 return 0;
589 565
590 sbio = sdev->bios[sdev->curr]; 566 sbio = sdev->bios[sdev->curr];
591 567
592 sbio->bio->bi_sector = sbio->physical >> 9; 568 bio = bio_alloc(GFP_NOFS, sbio->count);
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE; 569 if (!bio)
594 sbio->bio->bi_next = NULL; 570 goto nomem;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 571
596 sbio->bio->bi_comp_cpu = -1; 572 bio->bi_private = sbio;
597 sbio->bio->bi_bdev = sdev->dev->bdev; 573 bio->bi_end_io = scrub_bio_end_io;
574 bio->bi_bdev = sdev->dev->bdev;
575 bio->bi_sector = sbio->physical >> 9;
576
577 for (i = 0; i < sbio->count; ++i) {
578 struct page *page;
579 int ret;
580
581 page = alloc_page(GFP_NOFS);
582 if (!page)
583 goto nomem;
584
585 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
586 if (!ret) {
587 __free_page(page);
588 goto nomem;
589 }
590 }
591
598 sbio->err = 0; 592 sbio->err = 0;
599 sdev->curr = -1; 593 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight); 594 atomic_inc(&sdev->in_flight);
601 595
602 submit_bio(0, sbio->bio); 596 submit_bio(READ, bio);
603 597
604 return 0; 598 return 0;
599
600nomem:
601 scrub_free_bio(bio);
602
603 return -ENOMEM;
605} 604}
606 605
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 606static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -633,7 +632,11 @@ again:
633 sbio->logical = logical; 632 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 633 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
635 sbio->logical + sbio->count * PAGE_SIZE != logical) { 634 sbio->logical + sbio->count * PAGE_SIZE != logical) {
636 scrub_submit(sdev); 635 int ret;
636
637 ret = scrub_submit(sdev);
638 if (ret)
639 return ret;
637 goto again; 640 goto again;
638 } 641 }
639 sbio->spag[sbio->count].flags = flags; 642 sbio->spag[sbio->count].flags = flags;
@@ -645,8 +648,13 @@ again:
645 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 648 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
646 } 649 }
647 ++sbio->count; 650 ++sbio->count;
648 if (sbio->count == SCRUB_PAGES_PER_BIO || force) 651 if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
649 scrub_submit(sdev); 652 int ret;
653
654 ret = scrub_submit(sdev);
655 if (ret)
656 return ret;
657 }
650 658
651 return 0; 659 return 0;
652} 660}
@@ -727,6 +735,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
727 struct btrfs_root *root = fs_info->extent_root; 735 struct btrfs_root *root = fs_info->extent_root;
728 struct btrfs_root *csum_root = fs_info->csum_root; 736 struct btrfs_root *csum_root = fs_info->csum_root;
729 struct btrfs_extent_item *extent; 737 struct btrfs_extent_item *extent;
738 struct blk_plug plug;
730 u64 flags; 739 u64 flags;
731 int ret; 740 int ret;
732 int slot; 741 int slot;
@@ -789,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
789 798
790 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 799 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
791 if (ret < 0) 800 if (ret < 0)
792 goto out; 801 goto out_noplug;
793
794 l = path->nodes[0];
795 slot = path->slots[0];
796 btrfs_item_key_to_cpu(l, &key, slot);
797 if (key.objectid != logical) {
798 ret = btrfs_previous_item(root, path, 0,
799 BTRFS_EXTENT_ITEM_KEY);
800 if (ret < 0)
801 goto out;
802 }
803 802
803 /*
804 * we might miss half an extent here, but that doesn't matter,
805 * as it's only the prefetch
806 */
804 while (1) { 807 while (1) {
805 l = path->nodes[0]; 808 l = path->nodes[0];
806 slot = path->slots[0]; 809 slot = path->slots[0];
@@ -809,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
809 if (ret == 0) 812 if (ret == 0)
810 continue; 813 continue;
811 if (ret < 0) 814 if (ret < 0)
812 goto out; 815 goto out_noplug;
813 816
814 break; 817 break;
815 } 818 }
@@ -831,6 +834,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
831 * the scrub. This might currently (crc32) end up to be about 1MB 834 * the scrub. This might currently (crc32) end up to be about 1MB
832 */ 835 */
833 start_stripe = 0; 836 start_stripe = 0;
837 blk_start_plug(&plug);
834again: 838again:
835 logical = base + offset + start_stripe * increment; 839 logical = base + offset + start_stripe * increment;
836 for (i = start_stripe; i < nstripes; ++i) { 840 for (i = start_stripe; i < nstripes; ++i) {
@@ -890,15 +894,20 @@ again:
890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
891 if (ret < 0) 895 if (ret < 0)
892 goto out; 896 goto out;
893 897 if (ret > 0) {
894 l = path->nodes[0];
895 slot = path->slots[0];
896 btrfs_item_key_to_cpu(l, &key, slot);
897 if (key.objectid != logical) {
898 ret = btrfs_previous_item(root, path, 0, 898 ret = btrfs_previous_item(root, path, 0,
899 BTRFS_EXTENT_ITEM_KEY); 899 BTRFS_EXTENT_ITEM_KEY);
900 if (ret < 0) 900 if (ret < 0)
901 goto out; 901 goto out;
902 if (ret > 0) {
903 /* there's no smaller item, so stick with the
904 * larger one */
905 btrfs_release_path(path);
906 ret = btrfs_search_slot(NULL, root, &key,
907 path, 0, 0);
908 if (ret < 0)
909 goto out;
910 }
902 } 911 }
903 912
904 while (1) { 913 while (1) {
@@ -972,6 +981,8 @@ next:
972 scrub_submit(sdev); 981 scrub_submit(sdev);
973 982
974out: 983out:
984 blk_finish_plug(&plug);
985out_noplug:
975 btrfs_free_path(path); 986 btrfs_free_path(path);
976 return ret < 0 ? ret : 0; 987 return ret < 0 ? ret : 0;
977} 988}
@@ -1047,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1047 while (1) { 1058 while (1) {
1048 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1049 if (ret < 0) 1060 if (ret < 0)
1050 goto out; 1061 break;
1051 ret = 0; 1062 if (ret > 0) {
1063 if (path->slots[0] >=
1064 btrfs_header_nritems(path->nodes[0])) {
1065 ret = btrfs_next_leaf(root, path);
1066 if (ret)
1067 break;
1068 }
1069 }
1052 1070
1053 l = path->nodes[0]; 1071 l = path->nodes[0];
1054 slot = path->slots[0]; 1072 slot = path->slots[0];
@@ -1058,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1058 if (found_key.objectid != sdev->dev->devid) 1076 if (found_key.objectid != sdev->dev->devid)
1059 break; 1077 break;
1060 1078
1061 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 1079 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1062 break; 1080 break;
1063 1081
1064 if (found_key.offset >= end) 1082 if (found_key.offset >= end)
@@ -1087,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1087 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 1105 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1088 if (!cache) { 1106 if (!cache) {
1089 ret = -ENOENT; 1107 ret = -ENOENT;
1090 goto out; 1108 break;
1091 } 1109 }
1092 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1110 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1093 chunk_offset, length); 1111 chunk_offset, length);
@@ -1099,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1099 btrfs_release_path(path); 1117 btrfs_release_path(path);
1100 } 1118 }
1101 1119
1102out:
1103 btrfs_free_path(path); 1120 btrfs_free_path(path);
1104 return ret; 1121
1122 /*
1123 * ret can still be 1 from search_slot or next_leaf,
1124 * that's not an error
1125 */
1126 return ret < 0 ? ret : 0;
1105} 1127}
1106 1128
1107static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) 1129static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1138,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1138 struct btrfs_fs_info *fs_info = root->fs_info; 1160 struct btrfs_fs_info *fs_info = root->fs_info;
1139 1161
1140 mutex_lock(&fs_info->scrub_lock); 1162 mutex_lock(&fs_info->scrub_lock);
1141 if (fs_info->scrub_workers_refcnt == 0) 1163 if (fs_info->scrub_workers_refcnt == 0) {
1164 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165 fs_info->thread_pool_size, &fs_info->generic_worker);
1166 fs_info->scrub_workers.idle_thresh = 4;
1142 btrfs_start_workers(&fs_info->scrub_workers, 1); 1167 btrfs_start_workers(&fs_info->scrub_workers, 1);
1168 }
1143 ++fs_info->scrub_workers_refcnt; 1169 ++fs_info->scrub_workers_refcnt;
1144 mutex_unlock(&fs_info->scrub_lock); 1170 mutex_unlock(&fs_info->scrub_lock);
1145 1171
@@ -1166,7 +1192,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1166 int ret; 1192 int ret;
1167 struct btrfs_device *dev; 1193 struct btrfs_device *dev;
1168 1194
1169 if (root->fs_info->closing) 1195 if (btrfs_fs_closing(root->fs_info))
1170 return -EINVAL; 1196 return -EINVAL;
1171 1197
1172 /* 1198 /*
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9b2e7e5bc3ef..0bb4ebbb71b7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -161,7 +161,8 @@ enum {
161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, 164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
165 Opt_inode_cache, Opt_err,
165}; 166};
166 167
167static match_table_t tokens = { 168static match_table_t tokens = {
@@ -193,6 +194,7 @@ static match_table_t tokens = {
193 {Opt_enospc_debug, "enospc_debug"}, 194 {Opt_enospc_debug, "enospc_debug"},
194 {Opt_subvolrootid, "subvolrootid=%d"}, 195 {Opt_subvolrootid, "subvolrootid=%d"},
195 {Opt_defrag, "autodefrag"}, 196 {Opt_defrag, "autodefrag"},
197 {Opt_inode_cache, "inode_cache"},
196 {Opt_err, NULL}, 198 {Opt_err, NULL},
197}; 199};
198 200
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
361 printk(KERN_INFO "btrfs: enabling disk space caching\n"); 363 printk(KERN_INFO "btrfs: enabling disk space caching\n");
362 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 364 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
363 break; 365 break;
366 case Opt_inode_cache:
367 printk(KERN_INFO "btrfs: enabling inode map caching\n");
368 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
369 break;
364 case Opt_clear_cache: 370 case Opt_clear_cache:
365 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 371 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
366 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 372 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
@@ -819,7 +825,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
819 } else { 825 } else {
820 char b[BDEVNAME_SIZE]; 826 char b[BDEVNAME_SIZE];
821 827
822 s->s_flags = flags; 828 s->s_flags = flags | MS_NOSEC;
823 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 829 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
824 error = btrfs_fill_super(s, fs_devices, data, 830 error = btrfs_fill_super(s, fs_devices, data,
825 flags & MS_SILENT ? 1 : 0); 831 flags & MS_SILENT ? 1 : 0);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
28#include "disk-io.h" 28#include "disk-io.h"
29#include "transaction.h" 29#include "transaction.h"
30 30
31static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
32{
33 return snprintf(buf, PAGE_SIZE, "%llu\n",
34 (unsigned long long)btrfs_root_used(&root->root_item));
35}
36
37static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
38{
39 return snprintf(buf, PAGE_SIZE, "%llu\n",
40 (unsigned long long)btrfs_root_limit(&root->root_item));
41}
42
43static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
44{
45
46 return snprintf(buf, PAGE_SIZE, "%llu\n",
47 (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
48}
49
50static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
51{
52 return snprintf(buf, PAGE_SIZE, "%llu\n",
53 (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
54}
55
56static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
57{
58 return snprintf(buf, PAGE_SIZE, "%llu\n",
59 (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
60}
61
62/* this is for root attrs (subvols/snapshots) */
63struct btrfs_root_attr {
64 struct attribute attr;
65 ssize_t (*show)(struct btrfs_root *, char *);
66 ssize_t (*store)(struct btrfs_root *, const char *, size_t);
67};
68
69#define ROOT_ATTR(name, mode, show, store) \
70static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
71 show, store)
72
73ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
74ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
75
76static struct attribute *btrfs_root_attrs[] = {
77 &btrfs_root_attr_blocks_used.attr,
78 &btrfs_root_attr_block_limit.attr,
79 NULL,
80};
81
82/* this is for super attrs (actual full fs) */
83struct btrfs_super_attr {
84 struct attribute attr;
85 ssize_t (*show)(struct btrfs_fs_info *, char *);
86 ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
87};
88
89#define SUPER_ATTR(name, mode, show, store) \
90static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
91 show, store)
92
93SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
94SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
95SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
96
97static struct attribute *btrfs_super_attrs[] = {
98 &btrfs_super_attr_blocks_used.attr,
99 &btrfs_super_attr_total_blocks.attr,
100 &btrfs_super_attr_blocksize.attr,
101 NULL,
102};
103
104static ssize_t btrfs_super_attr_show(struct kobject *kobj,
105 struct attribute *attr, char *buf)
106{
107 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
108 super_kobj);
109 struct btrfs_super_attr *a = container_of(attr,
110 struct btrfs_super_attr,
111 attr);
112
113 return a->show ? a->show(fs, buf) : 0;
114}
115
116static ssize_t btrfs_super_attr_store(struct kobject *kobj,
117 struct attribute *attr,
118 const char *buf, size_t len)
119{
120 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
121 super_kobj);
122 struct btrfs_super_attr *a = container_of(attr,
123 struct btrfs_super_attr,
124 attr);
125
126 return a->store ? a->store(fs, buf, len) : 0;
127}
128
129static ssize_t btrfs_root_attr_show(struct kobject *kobj,
130 struct attribute *attr, char *buf)
131{
132 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
133 root_kobj);
134 struct btrfs_root_attr *a = container_of(attr,
135 struct btrfs_root_attr,
136 attr);
137
138 return a->show ? a->show(root, buf) : 0;
139}
140
141static ssize_t btrfs_root_attr_store(struct kobject *kobj,
142 struct attribute *attr,
143 const char *buf, size_t len)
144{
145 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
146 root_kobj);
147 struct btrfs_root_attr *a = container_of(attr,
148 struct btrfs_root_attr,
149 attr);
150 return a->store ? a->store(root, buf, len) : 0;
151}
152
153static void btrfs_super_release(struct kobject *kobj)
154{
155 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
156 super_kobj);
157 complete(&fs->kobj_unregister);
158}
159
160static void btrfs_root_release(struct kobject *kobj)
161{
162 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
163 root_kobj);
164 complete(&root->kobj_unregister);
165}
166
167static const struct sysfs_ops btrfs_super_attr_ops = {
168 .show = btrfs_super_attr_show,
169 .store = btrfs_super_attr_store,
170};
171
172static const struct sysfs_ops btrfs_root_attr_ops = {
173 .show = btrfs_root_attr_show,
174 .store = btrfs_root_attr_store,
175};
176
177/* /sys/fs/btrfs/ entry */ 31/* /sys/fs/btrfs/ entry */
178static struct kset *btrfs_kset; 32static struct kset *btrfs_kset;
179 33
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dc80f7156923..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
35{ 35{
36 WARN_ON(atomic_read(&transaction->use_count) == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
37 if (atomic_dec_and_test(&transaction->use_count)) { 37 if (atomic_dec_and_test(&transaction->use_count)) {
38 BUG_ON(!list_empty(&transaction->list));
38 memset(transaction, 0, sizeof(*transaction)); 39 memset(transaction, 0, sizeof(*transaction));
39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 kmem_cache_free(btrfs_transaction_cachep, transaction);
40 } 41 }
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)
49/* 50/*
50 * either allocate a new transaction or hop into the existing one 51 * either allocate a new transaction or hop into the existing one
51 */ 52 */
52static noinline int join_transaction(struct btrfs_root *root) 53static noinline int join_transaction(struct btrfs_root *root, int nofail)
53{ 54{
54 struct btrfs_transaction *cur_trans; 55 struct btrfs_transaction *cur_trans;
56
57 spin_lock(&root->fs_info->trans_lock);
58 if (root->fs_info->trans_no_join) {
59 if (!nofail) {
60 spin_unlock(&root->fs_info->trans_lock);
61 return -EBUSY;
62 }
63 }
64
55 cur_trans = root->fs_info->running_transaction; 65 cur_trans = root->fs_info->running_transaction;
56 if (!cur_trans) { 66 if (cur_trans) {
57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 67 atomic_inc(&cur_trans->use_count);
58 GFP_NOFS); 68 atomic_inc(&cur_trans->num_writers);
59 if (!cur_trans) 69 cur_trans->num_joined++;
60 return -ENOMEM; 70 spin_unlock(&root->fs_info->trans_lock);
61 root->fs_info->generation++; 71 return 0;
62 atomic_set(&cur_trans->num_writers, 1); 72 }
63 cur_trans->num_joined = 0; 73 spin_unlock(&root->fs_info->trans_lock);
64 cur_trans->transid = root->fs_info->generation; 74
65 init_waitqueue_head(&cur_trans->writer_wait); 75 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
66 init_waitqueue_head(&cur_trans->commit_wait); 76 if (!cur_trans)
67 cur_trans->in_commit = 0; 77 return -ENOMEM;
68 cur_trans->blocked = 0; 78 spin_lock(&root->fs_info->trans_lock);
69 atomic_set(&cur_trans->use_count, 1); 79 if (root->fs_info->running_transaction) {
70 cur_trans->commit_done = 0; 80 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
71 cur_trans->start_time = get_seconds(); 81 cur_trans = root->fs_info->running_transaction;
72 82 atomic_inc(&cur_trans->use_count);
73 cur_trans->delayed_refs.root = RB_ROOT;
74 cur_trans->delayed_refs.num_entries = 0;
75 cur_trans->delayed_refs.num_heads_ready = 0;
76 cur_trans->delayed_refs.num_heads = 0;
77 cur_trans->delayed_refs.flushing = 0;
78 cur_trans->delayed_refs.run_delayed_start = 0;
79 spin_lock_init(&cur_trans->delayed_refs.lock);
80
81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
83 extent_io_tree_init(&cur_trans->dirty_pages,
84 root->fs_info->btree_inode->i_mapping);
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
88 } else {
89 atomic_inc(&cur_trans->num_writers); 83 atomic_inc(&cur_trans->num_writers);
90 cur_trans->num_joined++; 84 cur_trans->num_joined++;
85 spin_unlock(&root->fs_info->trans_lock);
86 return 0;
91 } 87 }
88 atomic_set(&cur_trans->num_writers, 1);
89 cur_trans->num_joined = 0;
90 init_waitqueue_head(&cur_trans->writer_wait);
91 init_waitqueue_head(&cur_trans->commit_wait);
92 cur_trans->in_commit = 0;
93 cur_trans->blocked = 0;
94 /*
95 * One for this trans handle, one so it will live on until we
96 * commit the transaction.
97 */
98 atomic_set(&cur_trans->use_count, 2);
99 cur_trans->commit_done = 0;
100 cur_trans->start_time = get_seconds();
101
102 cur_trans->delayed_refs.root = RB_ROOT;
103 cur_trans->delayed_refs.num_entries = 0;
104 cur_trans->delayed_refs.num_heads_ready = 0;
105 cur_trans->delayed_refs.num_heads = 0;
106 cur_trans->delayed_refs.flushing = 0;
107 cur_trans->delayed_refs.run_delayed_start = 0;
108 spin_lock_init(&cur_trans->commit_lock);
109 spin_lock_init(&cur_trans->delayed_refs.lock);
110
111 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
112 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
113 extent_io_tree_init(&cur_trans->dirty_pages,
114 root->fs_info->btree_inode->i_mapping);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
92 119
93 return 0; 120 return 0;
94} 121}
@@ -99,36 +126,82 @@ static noinline int join_transaction(struct btrfs_root *root)
99 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits 127 * when the transaction commits
101 */ 128 */
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129static int record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root) 130 struct btrfs_root *root)
104{ 131{
105 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
108 135
136 /*
137 * see below for in_trans_setup usage rules
138 * we have the reloc mutex held now, so there
139 * is only one writer in this function
140 */
141 root->in_trans_setup = 1;
142
143 /* make sure readers find in_trans_setup before
144 * they find our root->last_trans update
145 */
146 smp_wmb();
147
148 spin_lock(&root->fs_info->fs_roots_radix_lock);
149 if (root->last_trans == trans->transid) {
150 spin_unlock(&root->fs_info->fs_roots_radix_lock);
151 return 0;
152 }
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 153 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid, 154 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG); 155 BTRFS_ROOT_TRANS_TAG);
156 spin_unlock(&root->fs_info->fs_roots_radix_lock);
112 root->last_trans = trans->transid; 157 root->last_trans = trans->transid;
158
159 /* this is pretty tricky. We don't want to
160 * take the relocation lock in btrfs_record_root_in_trans
161 * unless we're really doing the first setup for this root in
162 * this transaction.
163 *
164 * Normally we'd use root->last_trans as a flag to decide
165 * if we want to take the expensive mutex.
166 *
167 * But, we have to set root->last_trans before we
168 * init the relocation root, otherwise, we trip over warnings
169 * in ctree.c. The solution used here is to flag ourselves
170 * with root->in_trans_setup. When this is 1, we're still
171 * fixing up the reloc trees and everyone must wait.
172 *
173 * When this is zero, they can trust root->last_trans and fly
174 * through btrfs_record_root_in_trans without having to take the
175 * lock. smp_wmb() makes sure that all the writes above are
176 * done before we pop in the zero below
177 */
113 btrfs_init_reloc_root(trans, root); 178 btrfs_init_reloc_root(trans, root);
179 smp_wmb();
180 root->in_trans_setup = 0;
114 } 181 }
115 return 0; 182 return 0;
116} 183}
117 184
185
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 186int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root) 187 struct btrfs_root *root)
120{ 188{
121 if (!root->ref_cows) 189 if (!root->ref_cows)
122 return 0; 190 return 0;
123 191
124 mutex_lock(&root->fs_info->trans_mutex); 192 /*
125 if (root->last_trans == trans->transid) { 193 * see record_root_in_trans for comments about in_trans_setup usage
126 mutex_unlock(&root->fs_info->trans_mutex); 194 * and barriers
195 */
196 smp_rmb();
197 if (root->last_trans == trans->transid &&
198 !root->in_trans_setup)
127 return 0; 199 return 0;
128 }
129 200
201 mutex_lock(&root->fs_info->reloc_mutex);
130 record_root_in_trans(trans, root); 202 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex); 203 mutex_unlock(&root->fs_info->reloc_mutex);
204
132 return 0; 205 return 0;
133} 206}
134 207
@@ -140,21 +213,23 @@ static void wait_current_trans(struct btrfs_root *root)
140{ 213{
141 struct btrfs_transaction *cur_trans; 214 struct btrfs_transaction *cur_trans;
142 215
216 spin_lock(&root->fs_info->trans_lock);
143 cur_trans = root->fs_info->running_transaction; 217 cur_trans = root->fs_info->running_transaction;
144 if (cur_trans && cur_trans->blocked) { 218 if (cur_trans && cur_trans->blocked) {
145 DEFINE_WAIT(wait); 219 DEFINE_WAIT(wait);
146 atomic_inc(&cur_trans->use_count); 220 atomic_inc(&cur_trans->use_count);
221 spin_unlock(&root->fs_info->trans_lock);
147 while (1) { 222 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 223 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 224 TASK_UNINTERRUPTIBLE);
150 if (!cur_trans->blocked) 225 if (!cur_trans->blocked)
151 break; 226 break;
152 mutex_unlock(&root->fs_info->trans_mutex);
153 schedule(); 227 schedule();
154 mutex_lock(&root->fs_info->trans_mutex);
155 } 228 }
156 finish_wait(&root->fs_info->transaction_wait, &wait); 229 finish_wait(&root->fs_info->transaction_wait, &wait);
157 put_transaction(cur_trans); 230 put_transaction(cur_trans);
231 } else {
232 spin_unlock(&root->fs_info->trans_lock);
158 } 233 }
159} 234}
160 235
@@ -167,10 +242,16 @@ enum btrfs_trans_type {
167 242
168static int may_wait_transaction(struct btrfs_root *root, int type) 243static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 244{
170 if (!root->fs_info->log_root_recovering && 245 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 246 return 0;
172 type == TRANS_USERSPACE)) 247
248 if (type == TRANS_USERSPACE)
173 return 1; 249 return 1;
250
251 if (type == TRANS_START &&
252 !atomic_read(&root->fs_info->open_ioctl_trans))
253 return 1;
254
174 return 0; 255 return 0;
175} 256}
176 257
@@ -184,36 +265,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
184 265
185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 266 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
186 return ERR_PTR(-EROFS); 267 return ERR_PTR(-EROFS);
268
269 if (current->journal_info) {
270 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
271 h = current->journal_info;
272 h->use_count++;
273 h->orig_rsv = h->block_rsv;
274 h->block_rsv = NULL;
275 goto got_it;
276 }
187again: 277again:
188 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 278 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
189 if (!h) 279 if (!h)
190 return ERR_PTR(-ENOMEM); 280 return ERR_PTR(-ENOMEM);
191 281
192 if (type != TRANS_JOIN_NOLOCK)
193 mutex_lock(&root->fs_info->trans_mutex);
194 if (may_wait_transaction(root, type)) 282 if (may_wait_transaction(root, type))
195 wait_current_trans(root); 283 wait_current_trans(root);
196 284
197 ret = join_transaction(root); 285 do {
286 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
287 if (ret == -EBUSY)
288 wait_current_trans(root);
289 } while (ret == -EBUSY);
290
198 if (ret < 0) { 291 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h); 292 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 293 return ERR_PTR(ret);
203 } 294 }
204 295
205 cur_trans = root->fs_info->running_transaction; 296 cur_trans = root->fs_info->running_transaction;
206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex);
209 297
210 h->transid = cur_trans->transid; 298 h->transid = cur_trans->transid;
211 h->transaction = cur_trans; 299 h->transaction = cur_trans;
212 h->blocks_used = 0; 300 h->blocks_used = 0;
213 h->block_group = 0;
214 h->bytes_reserved = 0; 301 h->bytes_reserved = 0;
215 h->delayed_ref_updates = 0; 302 h->delayed_ref_updates = 0;
303 h->use_count = 1;
216 h->block_rsv = NULL; 304 h->block_rsv = NULL;
305 h->orig_rsv = NULL;
217 306
218 smp_mb(); 307 smp_mb();
219 if (cur_trans->blocked && may_wait_transaction(root, type)) { 308 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -241,11 +330,8 @@ again:
241 } 330 }
242 } 331 }
243 332
244 if (type != TRANS_JOIN_NOLOCK) 333got_it:
245 mutex_lock(&root->fs_info->trans_mutex); 334 btrfs_record_root_in_trans(h, root);
246 record_root_in_trans(h, root);
247 if (type != TRANS_JOIN_NOLOCK)
248 mutex_unlock(&root->fs_info->trans_mutex);
249 335
250 if (!current->journal_info && type != TRANS_USERSPACE) 336 if (!current->journal_info && type != TRANS_USERSPACE)
251 current->journal_info = h; 337 current->journal_info = h;
@@ -257,22 +343,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
257{ 343{
258 return start_transaction(root, num_items, TRANS_START); 344 return start_transaction(root, num_items, TRANS_START);
259} 345}
260struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 346struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
261 int num_blocks)
262{ 347{
263 return start_transaction(root, 0, TRANS_JOIN); 348 return start_transaction(root, 0, TRANS_JOIN);
264} 349}
265 350
266struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 351struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
267 int num_blocks)
268{ 352{
269 return start_transaction(root, 0, TRANS_JOIN_NOLOCK); 353 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
270} 354}
271 355
272struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 356struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
273 int num_blocks)
274{ 357{
275 return start_transaction(r, 0, TRANS_USERSPACE); 358 return start_transaction(root, 0, TRANS_USERSPACE);
276} 359}
277 360
278/* wait for a transaction commit to be fully complete */ 361/* wait for a transaction commit to be fully complete */
@@ -280,17 +363,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
280 struct btrfs_transaction *commit) 363 struct btrfs_transaction *commit)
281{ 364{
282 DEFINE_WAIT(wait); 365 DEFINE_WAIT(wait);
283 mutex_lock(&root->fs_info->trans_mutex);
284 while (!commit->commit_done) { 366 while (!commit->commit_done) {
285 prepare_to_wait(&commit->commit_wait, &wait, 367 prepare_to_wait(&commit->commit_wait, &wait,
286 TASK_UNINTERRUPTIBLE); 368 TASK_UNINTERRUPTIBLE);
287 if (commit->commit_done) 369 if (commit->commit_done)
288 break; 370 break;
289 mutex_unlock(&root->fs_info->trans_mutex);
290 schedule(); 371 schedule();
291 mutex_lock(&root->fs_info->trans_mutex);
292 } 372 }
293 mutex_unlock(&root->fs_info->trans_mutex);
294 finish_wait(&commit->commit_wait, &wait); 373 finish_wait(&commit->commit_wait, &wait);
295 return 0; 374 return 0;
296} 375}
@@ -300,59 +379,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
300 struct btrfs_transaction *cur_trans = NULL, *t; 379 struct btrfs_transaction *cur_trans = NULL, *t;
301 int ret; 380 int ret;
302 381
303 mutex_lock(&root->fs_info->trans_mutex);
304
305 ret = 0; 382 ret = 0;
306 if (transid) { 383 if (transid) {
307 if (transid <= root->fs_info->last_trans_committed) 384 if (transid <= root->fs_info->last_trans_committed)
308 goto out_unlock; 385 goto out;
309 386
310 /* find specified transaction */ 387 /* find specified transaction */
388 spin_lock(&root->fs_info->trans_lock);
311 list_for_each_entry(t, &root->fs_info->trans_list, list) { 389 list_for_each_entry(t, &root->fs_info->trans_list, list) {
312 if (t->transid == transid) { 390 if (t->transid == transid) {
313 cur_trans = t; 391 cur_trans = t;
392 atomic_inc(&cur_trans->use_count);
314 break; 393 break;
315 } 394 }
316 if (t->transid > transid) 395 if (t->transid > transid)
317 break; 396 break;
318 } 397 }
398 spin_unlock(&root->fs_info->trans_lock);
319 ret = -EINVAL; 399 ret = -EINVAL;
320 if (!cur_trans) 400 if (!cur_trans)
321 goto out_unlock; /* bad transid */ 401 goto out; /* bad transid */
322 } else { 402 } else {
323 /* find newest transaction that is committing | committed */ 403 /* find newest transaction that is committing | committed */
404 spin_lock(&root->fs_info->trans_lock);
324 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 405 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
325 list) { 406 list) {
326 if (t->in_commit) { 407 if (t->in_commit) {
327 if (t->commit_done) 408 if (t->commit_done)
328 goto out_unlock; 409 break;
329 cur_trans = t; 410 cur_trans = t;
411 atomic_inc(&cur_trans->use_count);
330 break; 412 break;
331 } 413 }
332 } 414 }
415 spin_unlock(&root->fs_info->trans_lock);
333 if (!cur_trans) 416 if (!cur_trans)
334 goto out_unlock; /* nothing committing|committed */ 417 goto out; /* nothing committing|committed */
335 } 418 }
336 419
337 atomic_inc(&cur_trans->use_count);
338 mutex_unlock(&root->fs_info->trans_mutex);
339
340 wait_for_commit(root, cur_trans); 420 wait_for_commit(root, cur_trans);
341 421
342 mutex_lock(&root->fs_info->trans_mutex);
343 put_transaction(cur_trans); 422 put_transaction(cur_trans);
344 ret = 0; 423 ret = 0;
345out_unlock: 424out:
346 mutex_unlock(&root->fs_info->trans_mutex);
347 return ret; 425 return ret;
348} 426}
349 427
350void btrfs_throttle(struct btrfs_root *root) 428void btrfs_throttle(struct btrfs_root *root)
351{ 429{
352 mutex_lock(&root->fs_info->trans_mutex); 430 if (!atomic_read(&root->fs_info->open_ioctl_trans))
353 if (!root->fs_info->open_ioctl_trans)
354 wait_current_trans(root); 431 wait_current_trans(root);
355 mutex_unlock(&root->fs_info->trans_mutex);
356} 432}
357 433
358static int should_end_transaction(struct btrfs_trans_handle *trans, 434static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -370,6 +446,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
370 struct btrfs_transaction *cur_trans = trans->transaction; 446 struct btrfs_transaction *cur_trans = trans->transaction;
371 int updates; 447 int updates;
372 448
449 smp_mb();
373 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 450 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
374 return 1; 451 return 1;
375 452
@@ -388,6 +465,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
388 struct btrfs_fs_info *info = root->fs_info; 465 struct btrfs_fs_info *info = root->fs_info;
389 int count = 0; 466 int count = 0;
390 467
468 if (--trans->use_count) {
469 trans->block_rsv = trans->orig_rsv;
470 return 0;
471 }
472
391 while (count < 4) { 473 while (count < 4) {
392 unsigned long cur = trans->delayed_ref_updates; 474 unsigned long cur = trans->delayed_ref_updates;
393 trans->delayed_ref_updates = 0; 475 trans->delayed_ref_updates = 0;
@@ -410,9 +492,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
410 492
411 btrfs_trans_release_metadata(trans, root); 493 btrfs_trans_release_metadata(trans, root);
412 494
413 if (lock && !root->fs_info->open_ioctl_trans && 495 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
414 should_end_transaction(trans, root)) 496 should_end_transaction(trans, root)) {
415 trans->transaction->blocked = 1; 497 trans->transaction->blocked = 1;
498 smp_wmb();
499 }
416 500
417 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 501 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
418 if (throttle) 502 if (throttle)
@@ -703,9 +787,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
703 */ 787 */
704int btrfs_add_dead_root(struct btrfs_root *root) 788int btrfs_add_dead_root(struct btrfs_root *root)
705{ 789{
706 mutex_lock(&root->fs_info->trans_mutex); 790 spin_lock(&root->fs_info->trans_lock);
707 list_add(&root->root_list, &root->fs_info->dead_roots); 791 list_add(&root->root_list, &root->fs_info->dead_roots);
708 mutex_unlock(&root->fs_info->trans_mutex); 792 spin_unlock(&root->fs_info->trans_lock);
709 return 0; 793 return 0;
710} 794}
711 795
@@ -721,6 +805,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
721 int ret; 805 int ret;
722 int err = 0; 806 int err = 0;
723 807
808 spin_lock(&fs_info->fs_roots_radix_lock);
724 while (1) { 809 while (1) {
725 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 810 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
726 (void **)gang, 0, 811 (void **)gang, 0,
@@ -733,6 +818,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
733 radix_tree_tag_clear(&fs_info->fs_roots_radix, 818 radix_tree_tag_clear(&fs_info->fs_roots_radix,
734 (unsigned long)root->root_key.objectid, 819 (unsigned long)root->root_key.objectid,
735 BTRFS_ROOT_TRANS_TAG); 820 BTRFS_ROOT_TRANS_TAG);
821 spin_unlock(&fs_info->fs_roots_radix_lock);
736 822
737 btrfs_free_log(trans, root); 823 btrfs_free_log(trans, root);
738 btrfs_update_reloc_root(trans, root); 824 btrfs_update_reloc_root(trans, root);
@@ -753,10 +839,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
753 err = btrfs_update_root(trans, fs_info->tree_root, 839 err = btrfs_update_root(trans, fs_info->tree_root,
754 &root->root_key, 840 &root->root_key,
755 &root->root_item); 841 &root->root_item);
842 spin_lock(&fs_info->fs_roots_radix_lock);
756 if (err) 843 if (err)
757 break; 844 break;
758 } 845 }
759 } 846 }
847 spin_unlock(&fs_info->fs_roots_radix_lock);
760 return err; 848 return err;
761} 849}
762 850
@@ -786,7 +874,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
786 btrfs_btree_balance_dirty(info->tree_root, nr); 874 btrfs_btree_balance_dirty(info->tree_root, nr);
787 cond_resched(); 875 cond_resched();
788 876
789 if (root->fs_info->closing || ret != -EAGAIN) 877 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
790 break; 878 break;
791 } 879 }
792 root->defrag_running = 0; 880 root->defrag_running = 0;
@@ -869,6 +957,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
869 ret = btrfs_update_inode(trans, parent_root, parent_inode); 957 ret = btrfs_update_inode(trans, parent_root, parent_inode);
870 BUG_ON(ret); 958 BUG_ON(ret);
871 959
960 /*
961 * pull in the delayed directory update
962 * and the delayed inode item
963 * otherwise we corrupt the FS during
964 * snapshot
965 */
966 ret = btrfs_run_delayed_items(trans, root);
967 BUG_ON(ret);
968
872 record_root_in_trans(trans, root); 969 record_root_in_trans(trans, root);
873 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 970 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
874 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 971 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
@@ -930,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
930 int ret; 1027 int ret;
931 1028
932 list_for_each_entry(pending, head, list) { 1029 list_for_each_entry(pending, head, list) {
933 /*
934 * We must deal with the delayed items before creating
935 * snapshots, or we will create a snapthot with inconsistent
936 * information.
937 */
938 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
939 BUG_ON(ret);
940
941 ret = create_pending_snapshot(trans, fs_info, pending); 1030 ret = create_pending_snapshot(trans, fs_info, pending);
942 BUG_ON(ret); 1031 BUG_ON(ret);
943 } 1032 }
@@ -967,20 +1056,20 @@ static void update_super_roots(struct btrfs_root *root)
967int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1056int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
968{ 1057{
969 int ret = 0; 1058 int ret = 0;
970 spin_lock(&info->new_trans_lock); 1059 spin_lock(&info->trans_lock);
971 if (info->running_transaction) 1060 if (info->running_transaction)
972 ret = info->running_transaction->in_commit; 1061 ret = info->running_transaction->in_commit;
973 spin_unlock(&info->new_trans_lock); 1062 spin_unlock(&info->trans_lock);
974 return ret; 1063 return ret;
975} 1064}
976 1065
977int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1066int btrfs_transaction_blocked(struct btrfs_fs_info *info)
978{ 1067{
979 int ret = 0; 1068 int ret = 0;
980 spin_lock(&info->new_trans_lock); 1069 spin_lock(&info->trans_lock);
981 if (info->running_transaction) 1070 if (info->running_transaction)
982 ret = info->running_transaction->blocked; 1071 ret = info->running_transaction->blocked;
983 spin_unlock(&info->new_trans_lock); 1072 spin_unlock(&info->trans_lock);
984 return ret; 1073 return ret;
985} 1074}
986 1075
@@ -1004,9 +1093,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1004 &wait); 1093 &wait);
1005 break; 1094 break;
1006 } 1095 }
1007 mutex_unlock(&root->fs_info->trans_mutex);
1008 schedule(); 1096 schedule();
1009 mutex_lock(&root->fs_info->trans_mutex);
1010 finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1097 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1011 } 1098 }
1012} 1099}
@@ -1032,9 +1119,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1032 &wait); 1119 &wait);
1033 break; 1120 break;
1034 } 1121 }
1035 mutex_unlock(&root->fs_info->trans_mutex);
1036 schedule(); 1122 schedule();
1037 mutex_lock(&root->fs_info->trans_mutex);
1038 finish_wait(&root->fs_info->transaction_wait, 1123 finish_wait(&root->fs_info->transaction_wait,
1039 &wait); 1124 &wait);
1040 } 1125 }
@@ -1072,7 +1157,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1072 1157
1073 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1158 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1074 ac->root = root; 1159 ac->root = root;
1075 ac->newtrans = btrfs_join_transaction(root, 0); 1160 ac->newtrans = btrfs_join_transaction(root);
1076 if (IS_ERR(ac->newtrans)) { 1161 if (IS_ERR(ac->newtrans)) {
1077 int err = PTR_ERR(ac->newtrans); 1162 int err = PTR_ERR(ac->newtrans);
1078 kfree(ac); 1163 kfree(ac);
@@ -1080,23 +1165,22 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1080 } 1165 }
1081 1166
1082 /* take transaction reference */ 1167 /* take transaction reference */
1083 mutex_lock(&root->fs_info->trans_mutex);
1084 cur_trans = trans->transaction; 1168 cur_trans = trans->transaction;
1085 atomic_inc(&cur_trans->use_count); 1169 atomic_inc(&cur_trans->use_count);
1086 mutex_unlock(&root->fs_info->trans_mutex);
1087 1170
1088 btrfs_end_transaction(trans, root); 1171 btrfs_end_transaction(trans, root);
1089 schedule_delayed_work(&ac->work, 0); 1172 schedule_delayed_work(&ac->work, 0);
1090 1173
1091 /* wait for transaction to start and unblock */ 1174 /* wait for transaction to start and unblock */
1092 mutex_lock(&root->fs_info->trans_mutex);
1093 if (wait_for_unblock) 1175 if (wait_for_unblock)
1094 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1176 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1095 else 1177 else
1096 wait_current_trans_commit_start(root, cur_trans); 1178 wait_current_trans_commit_start(root, cur_trans);
1097 put_transaction(cur_trans);
1098 mutex_unlock(&root->fs_info->trans_mutex);
1099 1179
1180 if (current->journal_info == trans)
1181 current->journal_info = NULL;
1182
1183 put_transaction(cur_trans);
1100 return 0; 1184 return 0;
1101} 1185}
1102 1186
@@ -1139,38 +1223,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1139 ret = btrfs_run_delayed_refs(trans, root, 0); 1223 ret = btrfs_run_delayed_refs(trans, root, 0);
1140 BUG_ON(ret); 1224 BUG_ON(ret);
1141 1225
1142 mutex_lock(&root->fs_info->trans_mutex); 1226 spin_lock(&cur_trans->commit_lock);
1143 if (cur_trans->in_commit) { 1227 if (cur_trans->in_commit) {
1228 spin_unlock(&cur_trans->commit_lock);
1144 atomic_inc(&cur_trans->use_count); 1229 atomic_inc(&cur_trans->use_count);
1145 mutex_unlock(&root->fs_info->trans_mutex);
1146 btrfs_end_transaction(trans, root); 1230 btrfs_end_transaction(trans, root);
1147 1231
1148 ret = wait_for_commit(root, cur_trans); 1232 ret = wait_for_commit(root, cur_trans);
1149 BUG_ON(ret); 1233 BUG_ON(ret);
1150 1234
1151 mutex_lock(&root->fs_info->trans_mutex);
1152 put_transaction(cur_trans); 1235 put_transaction(cur_trans);
1153 mutex_unlock(&root->fs_info->trans_mutex);
1154 1236
1155 return 0; 1237 return 0;
1156 } 1238 }
1157 1239
1158 trans->transaction->in_commit = 1; 1240 trans->transaction->in_commit = 1;
1159 trans->transaction->blocked = 1; 1241 trans->transaction->blocked = 1;
1242 spin_unlock(&cur_trans->commit_lock);
1160 wake_up(&root->fs_info->transaction_blocked_wait); 1243 wake_up(&root->fs_info->transaction_blocked_wait);
1161 1244
1245 spin_lock(&root->fs_info->trans_lock);
1162 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1246 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1163 prev_trans = list_entry(cur_trans->list.prev, 1247 prev_trans = list_entry(cur_trans->list.prev,
1164 struct btrfs_transaction, list); 1248 struct btrfs_transaction, list);
1165 if (!prev_trans->commit_done) { 1249 if (!prev_trans->commit_done) {
1166 atomic_inc(&prev_trans->use_count); 1250 atomic_inc(&prev_trans->use_count);
1167 mutex_unlock(&root->fs_info->trans_mutex); 1251 spin_unlock(&root->fs_info->trans_lock);
1168 1252
1169 wait_for_commit(root, prev_trans); 1253 wait_for_commit(root, prev_trans);
1170 1254
1171 mutex_lock(&root->fs_info->trans_mutex);
1172 put_transaction(prev_trans); 1255 put_transaction(prev_trans);
1256 } else {
1257 spin_unlock(&root->fs_info->trans_lock);
1173 } 1258 }
1259 } else {
1260 spin_unlock(&root->fs_info->trans_lock);
1174 } 1261 }
1175 1262
1176 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1263 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1178,12 +1265,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1178 1265
1179 do { 1266 do {
1180 int snap_pending = 0; 1267 int snap_pending = 0;
1268
1181 joined = cur_trans->num_joined; 1269 joined = cur_trans->num_joined;
1182 if (!list_empty(&trans->transaction->pending_snapshots)) 1270 if (!list_empty(&trans->transaction->pending_snapshots))
1183 snap_pending = 1; 1271 snap_pending = 1;
1184 1272
1185 WARN_ON(cur_trans != trans->transaction); 1273 WARN_ON(cur_trans != trans->transaction);
1186 mutex_unlock(&root->fs_info->trans_mutex);
1187 1274
1188 if (flush_on_commit || snap_pending) { 1275 if (flush_on_commit || snap_pending) {
1189 btrfs_start_delalloc_inodes(root, 1); 1276 btrfs_start_delalloc_inodes(root, 1);
@@ -1206,26 +1293,48 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1206 prepare_to_wait(&cur_trans->writer_wait, &wait, 1293 prepare_to_wait(&cur_trans->writer_wait, &wait,
1207 TASK_UNINTERRUPTIBLE); 1294 TASK_UNINTERRUPTIBLE);
1208 1295
1209 smp_mb();
1210 if (atomic_read(&cur_trans->num_writers) > 1) 1296 if (atomic_read(&cur_trans->num_writers) > 1)
1211 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1297 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1212 else if (should_grow) 1298 else if (should_grow)
1213 schedule_timeout(1); 1299 schedule_timeout(1);
1214 1300
1215 mutex_lock(&root->fs_info->trans_mutex);
1216 finish_wait(&cur_trans->writer_wait, &wait); 1301 finish_wait(&cur_trans->writer_wait, &wait);
1217 } while (atomic_read(&cur_trans->num_writers) > 1 || 1302 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1218 (should_grow && cur_trans->num_joined != joined)); 1303 (should_grow && cur_trans->num_joined != joined));
1219 1304
1220 ret = create_pending_snapshots(trans, root->fs_info); 1305 /*
1221 BUG_ON(ret); 1306 * Ok now we need to make sure to block out any other joins while we
1307 * commit the transaction. We could have started a join before setting
1308 * no_join so make sure to wait for num_writers to == 1 again.
1309 */
1310 spin_lock(&root->fs_info->trans_lock);
1311 root->fs_info->trans_no_join = 1;
1312 spin_unlock(&root->fs_info->trans_lock);
1313 wait_event(cur_trans->writer_wait,
1314 atomic_read(&cur_trans->num_writers) == 1);
1315
1316 /*
1317 * the reloc mutex makes sure that we stop
1318 * the balancing code from coming in and moving
1319 * extents around in the middle of the commit
1320 */
1321 mutex_lock(&root->fs_info->reloc_mutex);
1222 1322
1223 ret = btrfs_run_delayed_items(trans, root); 1323 ret = btrfs_run_delayed_items(trans, root);
1224 BUG_ON(ret); 1324 BUG_ON(ret);
1225 1325
1326 ret = create_pending_snapshots(trans, root->fs_info);
1327 BUG_ON(ret);
1328
1226 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1329 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1227 BUG_ON(ret); 1330 BUG_ON(ret);
1228 1331
1332 /*
1333 * make sure none of the code above managed to slip in a
1334 * delayed item
1335 */
1336 btrfs_assert_delayed_root_empty(root);
1337
1229 WARN_ON(cur_trans != trans->transaction); 1338 WARN_ON(cur_trans != trans->transaction);
1230 1339
1231 btrfs_scrub_pause(root); 1340 btrfs_scrub_pause(root);
@@ -1258,9 +1367,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1258 btrfs_prepare_extent_commit(trans, root); 1367 btrfs_prepare_extent_commit(trans, root);
1259 1368
1260 cur_trans = root->fs_info->running_transaction; 1369 cur_trans = root->fs_info->running_transaction;
1261 spin_lock(&root->fs_info->new_trans_lock);
1262 root->fs_info->running_transaction = NULL;
1263 spin_unlock(&root->fs_info->new_trans_lock);
1264 1370
1265 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1371 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1266 root->fs_info->tree_root->node); 1372 root->fs_info->tree_root->node);
@@ -1281,10 +1387,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1281 sizeof(root->fs_info->super_copy)); 1387 sizeof(root->fs_info->super_copy));
1282 1388
1283 trans->transaction->blocked = 0; 1389 trans->transaction->blocked = 0;
1390 spin_lock(&root->fs_info->trans_lock);
1391 root->fs_info->running_transaction = NULL;
1392 root->fs_info->trans_no_join = 0;
1393 spin_unlock(&root->fs_info->trans_lock);
1394 mutex_unlock(&root->fs_info->reloc_mutex);
1284 1395
1285 wake_up(&root->fs_info->transaction_wait); 1396 wake_up(&root->fs_info->transaction_wait);
1286 1397
1287 mutex_unlock(&root->fs_info->trans_mutex);
1288 ret = btrfs_write_and_wait_transaction(trans, root); 1398 ret = btrfs_write_and_wait_transaction(trans, root);
1289 BUG_ON(ret); 1399 BUG_ON(ret);
1290 write_ctree_super(trans, root, 0); 1400 write_ctree_super(trans, root, 0);
@@ -1297,22 +1407,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1297 1407
1298 btrfs_finish_extent_commit(trans, root); 1408 btrfs_finish_extent_commit(trans, root);
1299 1409
1300 mutex_lock(&root->fs_info->trans_mutex);
1301
1302 cur_trans->commit_done = 1; 1410 cur_trans->commit_done = 1;
1303 1411
1304 root->fs_info->last_trans_committed = cur_trans->transid; 1412 root->fs_info->last_trans_committed = cur_trans->transid;
1305 1413
1306 wake_up(&cur_trans->commit_wait); 1414 wake_up(&cur_trans->commit_wait);
1307 1415
1416 spin_lock(&root->fs_info->trans_lock);
1308 list_del_init(&cur_trans->list); 1417 list_del_init(&cur_trans->list);
1418 spin_unlock(&root->fs_info->trans_lock);
1419
1309 put_transaction(cur_trans); 1420 put_transaction(cur_trans);
1310 put_transaction(cur_trans); 1421 put_transaction(cur_trans);
1311 1422
1312 trace_btrfs_transaction_commit(root); 1423 trace_btrfs_transaction_commit(root);
1313 1424
1314 mutex_unlock(&root->fs_info->trans_mutex);
1315
1316 btrfs_scrub_continue(root); 1425 btrfs_scrub_continue(root);
1317 1426
1318 if (current->journal_info == trans) 1427 if (current->journal_info == trans)
@@ -1334,9 +1443,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1334 LIST_HEAD(list); 1443 LIST_HEAD(list);
1335 struct btrfs_fs_info *fs_info = root->fs_info; 1444 struct btrfs_fs_info *fs_info = root->fs_info;
1336 1445
1337 mutex_lock(&fs_info->trans_mutex); 1446 spin_lock(&fs_info->trans_lock);
1338 list_splice_init(&fs_info->dead_roots, &list); 1447 list_splice_init(&fs_info->dead_roots, &list);
1339 mutex_unlock(&fs_info->trans_mutex); 1448 spin_unlock(&fs_info->trans_lock);
1340 1449
1341 while (!list_empty(&list)) { 1450 while (!list_empty(&list)) {
1342 root = list_entry(list.next, struct btrfs_root, root_list); 1451 root = list_entry(list.next, struct btrfs_root, root_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 804c88639e5d..02564e6230ac 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -28,10 +28,12 @@ struct btrfs_transaction {
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 atomic_t num_writers; 30 atomic_t num_writers;
31 atomic_t use_count;
31 32
32 unsigned long num_joined; 33 unsigned long num_joined;
34
35 spinlock_t commit_lock;
33 int in_commit; 36 int in_commit;
34 atomic_t use_count;
35 int commit_done; 37 int commit_done;
36 int blocked; 38 int blocked;
37 struct list_head list; 39 struct list_head list;
@@ -45,13 +47,14 @@ struct btrfs_transaction {
45 47
46struct btrfs_trans_handle { 48struct btrfs_trans_handle {
47 u64 transid; 49 u64 transid;
48 u64 block_group;
49 u64 bytes_reserved; 50 u64 bytes_reserved;
51 unsigned long use_count;
50 unsigned long blocks_reserved; 52 unsigned long blocks_reserved;
51 unsigned long blocks_used; 53 unsigned long blocks_used;
52 unsigned long delayed_ref_updates; 54 unsigned long delayed_ref_updates;
53 struct btrfs_transaction *transaction; 55 struct btrfs_transaction *transaction;
54 struct btrfs_block_rsv *block_rsv; 56 struct btrfs_block_rsv *block_rsv;
57 struct btrfs_block_rsv *orig_rsv;
55}; 58};
56 59
57struct btrfs_pending_snapshot { 60struct btrfs_pending_snapshot {
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot {
66 struct list_head list; 69 struct list_head list;
67}; 70};
68 71
69static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
70 struct inode *inode)
71{
72 trans->block_group = BTRFS_I(inode)->block_group;
73}
74
75static inline void btrfs_update_inode_block_group(
76 struct btrfs_trans_handle *trans,
77 struct inode *inode)
78{
79 BTRFS_I(inode)->block_group = trans->block_group;
80}
81
82static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, 72static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
83 struct inode *inode) 73 struct inode *inode)
84{ 74{
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root); 82 struct btrfs_root *root);
93struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 83struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
94 int num_items); 84 int num_items);
95struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 85struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
96 int num_blocks); 86struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
97struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 87struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
98 int num_blocks);
99struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
100 int num_blocks);
101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 88int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 89int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root); 90 struct btrfs_root *root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
3177 tmp_key.offset = (u64)-1; 3177 tmp_key.offset = (u64)-1;
3178 3178
3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
3180 BUG_ON(!wc.replay_dest); 3180 BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
3181 3181
3182 wc.replay_dest->log_root = log; 3182 wc.replay_dest->log_root = log;
3183 btrfs_record_root_in_trans(trans, wc.replay_dest); 3183 btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c48214ef5c09..1efa56e18f9b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
504 BUG_ON(!new_device); 504 BUG_ON(!new_device);
505 memcpy(new_device, device, sizeof(*new_device)); 505 memcpy(new_device, device, sizeof(*new_device));
506 new_device->name = kstrdup(device->name, GFP_NOFS); 506 new_device->name = kstrdup(device->name, GFP_NOFS);
507 BUG_ON(!new_device->name); 507 BUG_ON(device->name && !new_device->name);
508 new_device->bdev = NULL; 508 new_device->bdev = NULL;
509 new_device->writeable = 0; 509 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0; 510 new_device->in_fs_metadata = 0;
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
689 transid = btrfs_super_generation(disk_super); 689 transid = btrfs_super_generation(disk_super);
690 if (disk_super->label[0]) 690 if (disk_super->label[0])
691 printk(KERN_INFO "device label %s ", disk_super->label); 691 printk(KERN_INFO "device label %s ", disk_super->label);
692 else { 692 else
693 /* FIXME, make a readl uuid parser */ 693 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
694 printk(KERN_INFO "device fsid %llx-%llx ",
695 *(unsigned long long *)disk_super->fsid,
696 *(unsigned long long *)(disk_super->fsid + 8));
697 }
698 printk(KERN_CONT "devid %llu transid %llu %s\n", 694 printk(KERN_CONT "devid %llu transid %llu %s\n",
699 (unsigned long long)devid, (unsigned long long)transid, path); 695 (unsigned long long)devid, (unsigned long long)transid, path);
700 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 696 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f3107e4b4d56..5366fe452ab0 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
158 if (IS_ERR(trans)) 158 if (IS_ERR(trans))
159 return PTR_ERR(trans); 159 return PTR_ERR(trans);
160 160
161 btrfs_set_trans_block_group(trans, inode);
162
163 ret = do_setxattr(trans, inode, name, value, size, flags); 161 ret = do_setxattr(trans, inode, name, value, size, flags);
164 if (ret) 162 if (ret)
165 goto out; 163 goto out;
diff --git a/fs/buffer.c b/fs/buffer.c
index 49c9aada0374..1a80b048ade8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1902,10 +1902,8 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1902 if (!buffer_uptodate(*wait_bh)) 1902 if (!buffer_uptodate(*wait_bh))
1903 err = -EIO; 1903 err = -EIO;
1904 } 1904 }
1905 if (unlikely(err)) { 1905 if (unlikely(err))
1906 page_zero_new_buffers(page, from, to); 1906 page_zero_new_buffers(page, from, to);
1907 ClearPageUptodate(page);
1908 }
1909 return err; 1907 return err;
1910} 1908}
1911EXPORT_SYMBOL(__block_write_begin); 1909EXPORT_SYMBOL(__block_write_begin);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 33da49dc3cc6..5a3953db8118 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -453,7 +453,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
453 int err; 453 int err;
454 struct inode *inode = page->mapping->host; 454 struct inode *inode = page->mapping->host;
455 BUG_ON(!inode); 455 BUG_ON(!inode);
456 igrab(inode); 456 ihold(inode);
457 err = writepage_nounlock(page, wbc); 457 err = writepage_nounlock(page, wbc);
458 unlock_page(page); 458 unlock_page(page);
459 iput(inode); 459 iput(inode);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1f72b00447c4..f605753c8fe9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2940,14 +2940,12 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2940 while (!list_empty(&mdsc->cap_dirty)) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2942 i_dirty_item); 2942 i_dirty_item);
2943 inode = igrab(&ci->vfs_inode); 2943 inode = &ci->vfs_inode;
2944 ihold(inode);
2944 dout("flush_dirty_caps %p\n", inode); 2945 dout("flush_dirty_caps %p\n", inode);
2945 spin_unlock(&mdsc->cap_dirty_lock); 2946 spin_unlock(&mdsc->cap_dirty_lock);
2946 if (inode) { 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2948 iput(inode);
2948 NULL);
2949 iput(inode);
2950 }
2951 spin_lock(&mdsc->cap_dirty_lock); 2949 spin_lock(&mdsc->cap_dirty_lock);
2952 } 2950 }
2953 spin_unlock(&mdsc->cap_dirty_lock); 2951 spin_unlock(&mdsc->cap_dirty_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 33729e822bb9..ef8f08c343e8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -308,7 +308,8 @@ more:
308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
309 if (IS_ERR(req)) 309 if (IS_ERR(req))
310 return PTR_ERR(req); 310 return PTR_ERR(req);
311 req->r_inode = igrab(inode); 311 req->r_inode = inode;
312 ihold(inode);
312 req->r_dentry = dget(filp->f_dentry); 313 req->r_dentry = dget(filp->f_dentry);
313 /* hints to request -> mds selection code */ 314 /* hints to request -> mds selection code */
314 req->r_direct_mode = USE_AUTH_MDS; 315 req->r_direct_mode = USE_AUTH_MDS;
@@ -787,10 +788,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
787 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 788 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
788 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 789 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
789 err = ceph_mdsc_do_request(mdsc, dir, req); 790 err = ceph_mdsc_do_request(mdsc, dir, req);
790 if (err) 791 if (err) {
791 d_drop(dentry); 792 d_drop(dentry);
792 else if (!req->r_reply_info.head->is_dentry) 793 } else if (!req->r_reply_info.head->is_dentry) {
793 d_instantiate(dentry, igrab(old_dentry->d_inode)); 794 ihold(old_dentry->d_inode);
795 d_instantiate(dentry, old_dentry->d_inode);
796 }
794 ceph_mdsc_put_request(req); 797 ceph_mdsc_put_request(req);
795 return err; 798 return err;
796} 799}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index a610d3d67488..f67b687550de 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -109,7 +109,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
109 err = ceph_mdsc_do_request(mdsc, NULL, req); 109 err = ceph_mdsc_do_request(mdsc, NULL, req);
110 inode = req->r_target_inode; 110 inode = req->r_target_inode;
111 if (inode) 111 if (inode)
112 igrab(inode); 112 ihold(inode);
113 ceph_mdsc_put_request(req); 113 ceph_mdsc_put_request(req);
114 if (!inode) 114 if (!inode)
115 return ERR_PTR(-ESTALE); 115 return ERR_PTR(-ESTALE);
@@ -167,7 +167,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
167 err = ceph_mdsc_do_request(mdsc, NULL, req); 167 err = ceph_mdsc_do_request(mdsc, NULL, req);
168 inode = req->r_target_inode; 168 inode = req->r_target_inode;
169 if (inode) 169 if (inode)
170 igrab(inode); 170 ihold(inode);
171 ceph_mdsc_put_request(req); 171 ceph_mdsc_put_request(req);
172 if (!inode) 172 if (!inode)
173 return ERR_PTR(err ? err : -ESTALE); 173 return ERR_PTR(err ? err : -ESTALE);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 203252d88d9f..9542f07d0b93 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -191,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file)
191 err = PTR_ERR(req); 191 err = PTR_ERR(req);
192 goto out; 192 goto out;
193 } 193 }
194 req->r_inode = igrab(inode); 194 req->r_inode = inode;
195 ihold(inode);
195 req->r_num_caps = 1; 196 req->r_num_caps = 1;
196 err = ceph_mdsc_do_request(mdsc, parent_inode, req); 197 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
197 if (!err) 198 if (!err)
@@ -282,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file)
282static int striped_read(struct inode *inode, 283static int striped_read(struct inode *inode,
283 u64 off, u64 len, 284 u64 off, u64 len,
284 struct page **pages, int num_pages, 285 struct page **pages, int num_pages,
285 int *checkeof, bool align_to_pages, 286 int *checkeof, bool o_direct,
286 unsigned long buf_align) 287 unsigned long buf_align)
287{ 288{
288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 289 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
307 io_align = off & ~PAGE_MASK; 308 io_align = off & ~PAGE_MASK;
308 309
309more: 310more:
310 if (align_to_pages) 311 if (o_direct)
311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK; 312 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
312 else 313 else
313 page_align = pos & ~PAGE_MASK; 314 page_align = pos & ~PAGE_MASK;
@@ -317,10 +318,10 @@ more:
317 ci->i_truncate_seq, 318 ci->i_truncate_seq,
318 ci->i_truncate_size, 319 ci->i_truncate_size,
319 page_pos, pages_left, page_align); 320 page_pos, pages_left, page_align);
320 hit_stripe = this_len < left;
321 was_short = ret >= 0 && ret < this_len;
322 if (ret == -ENOENT) 321 if (ret == -ENOENT)
323 ret = 0; 322 ret = 0;
323 hit_stripe = this_len < left;
324 was_short = ret >= 0 && ret < this_len;
324 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, 325 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
325 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 326 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
326 327
@@ -345,20 +346,22 @@ more:
345 } 346 }
346 347
347 if (was_short) { 348 if (was_short) {
348 /* was original extent fully inside i_size? */ 349 /* did we bounce off eof? */
349 if (pos + left <= inode->i_size) { 350 if (pos + left > inode->i_size)
350 dout("zero tail\n"); 351 *checkeof = 1;
351 ceph_zero_page_vector_range(page_off + read, len - read, 352
353 /* zero trailing bytes (inside i_size) */
354 if (left > 0 && pos < inode->i_size) {
355 if (pos + left > inode->i_size)
356 left = inode->i_size - pos;
357
358 dout("zero tail %d\n", left);
359 ceph_zero_page_vector_range(page_off + read, left,
352 pages); 360 pages);
353 read = len; 361 read += left;
354 goto out;
355 } 362 }
356
357 /* check i_size */
358 *checkeof = 1;
359 } 363 }
360 364
361out:
362 if (ret >= 0) 365 if (ret >= 0)
363 ret = read; 366 ret = read;
364 dout("striped_read returns %d\n", ret); 367 dout("striped_read returns %d\n", ret);
@@ -658,7 +661,7 @@ out:
658 661
659 /* hit EOF or hole? */ 662 /* hit EOF or hole? */
660 if (statret == 0 && *ppos < inode->i_size) { 663 if (statret == 0 && *ppos < inode->i_size) {
661 dout("aio_read sync_read hit hole, reading more\n"); 664 dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
662 read += ret; 665 read += ret;
663 base += ret; 666 base += ret;
664 len -= ret; 667 len -= ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 70b6a4839c38..d8858e96ab18 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1101,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1101 goto done; 1101 goto done;
1102 } 1102 }
1103 req->r_dentry = dn; /* may have spliced */ 1103 req->r_dentry = dn; /* may have spliced */
1104 igrab(in); 1104 ihold(in);
1105 } else if (ceph_ino(in) == vino.ino && 1105 } else if (ceph_ino(in) == vino.ino &&
1106 ceph_snap(in) == vino.snap) { 1106 ceph_snap(in) == vino.snap) {
1107 igrab(in); 1107 ihold(in);
1108 } else { 1108 } else {
1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
1110 dn, in, ceph_ino(in), ceph_snap(in), 1110 dn, in, ceph_ino(in), ceph_snap(in),
@@ -1144,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1144 goto done; 1144 goto done;
1145 } 1145 }
1146 req->r_dentry = dn; /* may have spliced */ 1146 req->r_dentry = dn; /* may have spliced */
1147 igrab(in); 1147 ihold(in);
1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */
1149 } 1149 }
1150 1150
@@ -1328,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode)
1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1329 &ceph_inode(inode)->i_wb_work)) { 1329 &ceph_inode(inode)->i_wb_work)) {
1330 dout("ceph_queue_writeback %p\n", inode); 1330 dout("ceph_queue_writeback %p\n", inode);
1331 igrab(inode); 1331 ihold(inode);
1332 } else { 1332 } else {
1333 dout("ceph_queue_writeback %p failed\n", inode); 1333 dout("ceph_queue_writeback %p failed\n", inode);
1334 } 1334 }
@@ -1353,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode)
1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1354 &ceph_inode(inode)->i_pg_inv_work)) { 1354 &ceph_inode(inode)->i_pg_inv_work)) {
1355 dout("ceph_queue_invalidate %p\n", inode); 1355 dout("ceph_queue_invalidate %p\n", inode);
1356 igrab(inode); 1356 ihold(inode);
1357 } else { 1357 } else {
1358 dout("ceph_queue_invalidate %p failed\n", inode); 1358 dout("ceph_queue_invalidate %p failed\n", inode);
1359 } 1359 }
@@ -1477,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1478 &ci->i_vmtruncate_work)) { 1478 &ci->i_vmtruncate_work)) {
1479 dout("ceph_queue_vmtruncate %p\n", inode); 1479 dout("ceph_queue_vmtruncate %p\n", inode);
1480 igrab(inode); 1480 ihold(inode);
1481 } else { 1481 } else {
1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1483 inode, ci->i_truncate_pending); 1483 inode, ci->i_truncate_pending);
@@ -1738,7 +1738,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1738 __mark_inode_dirty(inode, inode_dirty_flags); 1738 __mark_inode_dirty(inode, inode_dirty_flags);
1739 1739
1740 if (mask) { 1740 if (mask) {
1741 req->r_inode = igrab(inode); 1741 req->r_inode = inode;
1742 ihold(inode);
1742 req->r_inode_drop = release; 1743 req->r_inode_drop = release;
1743 req->r_args.setattr.mask = cpu_to_le32(mask); 1744 req->r_args.setattr.mask = cpu_to_le32(mask);
1744 req->r_num_caps = 1; 1745 req->r_num_caps = 1;
@@ -1779,7 +1780,8 @@ int ceph_do_getattr(struct inode *inode, int mask)
1779 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1780 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1780 if (IS_ERR(req)) 1781 if (IS_ERR(req))
1781 return PTR_ERR(req); 1782 return PTR_ERR(req);
1782 req->r_inode = igrab(inode); 1783 req->r_inode = inode;
1784 ihold(inode);
1783 req->r_num_caps = 1; 1785 req->r_num_caps = 1;
1784 req->r_args.getattr.mask = cpu_to_le32(mask); 1786 req->r_args.getattr.mask = cpu_to_le32(mask);
1785 err = ceph_mdsc_do_request(mdsc, NULL, req); 1787 err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8888c9ba68db..ef0b5f48e13a 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -73,7 +73,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
73 USE_AUTH_MDS); 73 USE_AUTH_MDS);
74 if (IS_ERR(req)) 74 if (IS_ERR(req))
75 return PTR_ERR(req); 75 return PTR_ERR(req);
76 req->r_inode = igrab(inode); 76 req->r_inode = inode;
77 ihold(inode);
77 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; 78 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
78 79
79 req->r_args.setlayout.layout.fl_stripe_unit = 80 req->r_args.setlayout.layout.fl_stripe_unit =
@@ -135,7 +136,8 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
135 136
136 if (IS_ERR(req)) 137 if (IS_ERR(req))
137 return PTR_ERR(req); 138 return PTR_ERR(req);
138 req->r_inode = igrab(inode); 139 req->r_inode = inode;
140 ihold(inode);
139 141
140 req->r_args.setlayout.layout.fl_stripe_unit = 142 req->r_args.setlayout.layout.fl_stripe_unit =
141 cpu_to_le32(l.stripe_unit); 143 cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 476b329867d4..80576d05d687 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -23,7 +23,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 24 if (IS_ERR(req))
25 return PTR_ERR(req); 25 return PTR_ERR(req);
26 req->r_inode = igrab(inode); 26 req->r_inode = inode;
27 ihold(inode);
27 28
28 /* mds requires start and length rather than start and end */ 29 /* mds requires start and length rather than start and end */
29 if (LLONG_MAX == fl->fl_end) 30 if (LLONG_MAX == fl->fl_end)
@@ -32,11 +33,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
32 length = fl->fl_end - fl->fl_start + 1; 33 length = fl->fl_end - fl->fl_start + 1;
33 34
34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 35 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
35 "length: %llu, wait: %d, type`: %d", (int)lock_type, 36 "length: %llu, wait: %d, type: %d", (int)lock_type,
36 (int)operation, (u64)fl->fl_pid, fl->fl_start, 37 (int)operation, (u64)fl->fl_pid, fl->fl_start,
37 length, wait, fl->fl_type); 38 length, wait, fl->fl_type);
38 39
39
40 req->r_args.filelock_change.rule = lock_type; 40 req->r_args.filelock_change.rule = lock_type;
41 req->r_args.filelock_change.type = cmd; 41 req->r_args.filelock_change.type = cmd;
42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
@@ -70,7 +70,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
70 } 70 }
71 ceph_mdsc_put_request(req); 71 ceph_mdsc_put_request(req);
72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
73 "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, 73 "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
74 (int)operation, (u64)fl->fl_pid, fl->fl_start, 74 (int)operation, (u64)fl->fl_pid, fl->fl_start,
75 length, wait, fl->fl_type, err); 75 length, wait, fl->fl_type, err);
76 return err; 76 return err;
@@ -109,16 +109,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
109 dout("mds locked, locking locally"); 109 dout("mds locked, locking locally");
110 err = posix_lock_file(file, fl, NULL); 110 err = posix_lock_file(file, fl, NULL);
111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
112 /* undo! This should only happen if the kernel detects 112 /* undo! This should only happen if
113 * local deadlock. */ 113 * the kernel detects local
114 * deadlock. */
114 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 115 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
115 CEPH_LOCK_UNLOCK, 0, fl); 116 CEPH_LOCK_UNLOCK, 0, fl);
116 dout("got %d on posix_lock_file, undid lock", err); 117 dout("got %d on posix_lock_file, undid lock",
118 err);
117 } 119 }
118 } 120 }
119 121
120 } else { 122 } else if (err == -ERESTARTSYS) {
121 dout("mds returned error code %d", err); 123 dout("undoing lock\n");
124 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
125 CEPH_LOCK_UNLOCK, 0, fl);
122 } 126 }
123 return err; 127 return err;
124} 128}
@@ -155,8 +159,11 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
155 file, CEPH_LOCK_UNLOCK, 0, fl); 159 file, CEPH_LOCK_UNLOCK, 0, fl);
156 dout("got %d on flock_lock_file_wait, undid lock", err); 160 dout("got %d on flock_lock_file_wait, undid lock", err);
157 } 161 }
158 } else { 162 } else if (err == -ERESTARTSYS) {
159 dout("mds error code %d", err); 163 dout("undoing lock\n");
164 ceph_lock_message(CEPH_LOCK_FLOCK,
165 CEPH_MDS_OP_SETFILELOCK,
166 file, CEPH_LOCK_UNLOCK, 0, fl);
160 } 167 }
161 return err; 168 return err;
162} 169}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 24067d68a554..54b14de2e729 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -722,7 +722,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
722 ci = list_first_entry(&mdsc->snap_flush_list, 722 ci = list_first_entry(&mdsc->snap_flush_list,
723 struct ceph_inode_info, i_snap_flush_item); 723 struct ceph_inode_info, i_snap_flush_item);
724 inode = &ci->vfs_inode; 724 inode = &ci->vfs_inode;
725 igrab(inode); 725 ihold(inode);
726 spin_unlock(&mdsc->snap_flush_lock); 726 spin_unlock(&mdsc->snap_flush_lock);
727 spin_lock(&inode->i_lock); 727 spin_lock(&inode->i_lock);
728 __ceph_flush_snaps(ci, &session, 0); 728 __ceph_flush_snaps(ci, &session, 0);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f2b628696180..f42d730f1b66 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -665,7 +665,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
665 err = PTR_ERR(req); 665 err = PTR_ERR(req);
666 goto out; 666 goto out;
667 } 667 }
668 req->r_inode = igrab(inode); 668 req->r_inode = inode;
669 ihold(inode);
669 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 670 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
670 req->r_num_caps = 1; 671 req->r_num_caps = 1;
671 req->r_args.setxattr.flags = cpu_to_le32(flags); 672 req->r_args.setxattr.flags = cpu_to_le32(flags);
@@ -795,7 +796,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
795 USE_AUTH_MDS); 796 USE_AUTH_MDS);
796 if (IS_ERR(req)) 797 if (IS_ERR(req))
797 return PTR_ERR(req); 798 return PTR_ERR(req);
798 req->r_inode = igrab(inode); 799 req->r_inode = inode;
800 ihold(inode);
799 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 801 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
800 req->r_num_caps = 1; 802 req->r_num_caps = 1;
801 req->r_path2 = kstrdup(name, GFP_NOFS); 803 req->r_path2 = kstrdup(name, GFP_NOFS);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 1cd4c3a1862d..f66cc1625150 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_ECB
10 select CRYPTO_DES 11 select CRYPTO_DES
11 help 12 help
12 This is the client VFS module for the Common Internet File System 13 This is the client VFS module for the Common Internet File System
@@ -148,13 +149,13 @@ config CIFS_FSCACHE
148 149
149config CIFS_ACL 150config CIFS_ACL
150 bool "Provide CIFS ACL support (EXPERIMENTAL)" 151 bool "Provide CIFS ACL support (EXPERIMENTAL)"
151 depends on EXPERIMENTAL && CIFS_XATTR 152 depends on EXPERIMENTAL && CIFS_XATTR && KEYS
152 help 153 help
153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 154 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
154 is handed over to the application/caller. 155 is handed over to the application/caller.
155 156
156config CIFS_NFSD_EXPORT 157config CIFS_NFSD_EXPORT
157 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 158 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
158 depends on CIFS && EXPERIMENTAL 159 depends on CIFS && EXPERIMENTAL && BROKEN
159 help 160 help
160 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 161 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index dd8584d35a14..545509c3313b 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -92,7 +92,7 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
92 break; 92 break;
93 93
94 default: 94 default:
95 cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family); 95 cERROR(1, "Unknown network family '%d'", sa->sa_family);
96 key_len = 0; 96 key_len = 0;
97 break; 97 break;
98 } 98 }
@@ -152,7 +152,7 @@ static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
152 152
153 sharename = extract_sharename(tcon->treeName); 153 sharename = extract_sharename(tcon->treeName);
154 if (IS_ERR(sharename)) { 154 if (IS_ERR(sharename)) {
155 cFYI(1, "CIFS: couldn't extract sharename\n"); 155 cFYI(1, "%s: couldn't extract sharename\n", __func__);
156 sharename = NULL; 156 sharename = NULL;
157 return 0; 157 return 0;
158 } 158 }
@@ -302,7 +302,7 @@ static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
302 pagevec_init(&pvec, 0); 302 pagevec_init(&pvec, 0);
303 first = 0; 303 first = 0;
304 304
305 cFYI(1, "cifs inode 0x%p now uncached", cifsi); 305 cFYI(1, "%s: cifs inode 0x%p now uncached", __func__, cifsi);
306 306
307 for (;;) { 307 for (;;) {
308 nr_pages = pagevec_lookup(&pvec, 308 nr_pages = pagevec_lookup(&pvec,
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ffb1459dc6ec..7260e11e21f8 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -42,6 +42,7 @@
42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ 42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
43#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ 43#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */
44#define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ 44#define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */
45#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */
45 46
46struct cifs_sb_info { 47struct cifs_sb_info {
47 struct rb_root tlink_tree; 48 struct rb_root tlink_tree;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index dfbd9f1f373d..5a0ee7f2af06 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -184,7 +184,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
184 if (cifs_pdu == NULL || server == NULL) 184 if (cifs_pdu == NULL || server == NULL)
185 return -EINVAL; 185 return -EINVAL;
186 186
187 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 187 if (!server->session_estab)
188 return 0; 188 return 0;
189 189
190 if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { 190 if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 989442dcfb45..35f9154615fa 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -104,8 +104,7 @@ cifs_sb_deactive(struct super_block *sb)
104} 104}
105 105
106static int 106static int
107cifs_read_super(struct super_block *sb, struct smb_vol *volume_info, 107cifs_read_super(struct super_block *sb)
108 const char *devname, int silent)
109{ 108{
110 struct inode *inode; 109 struct inode *inode;
111 struct cifs_sb_info *cifs_sb; 110 struct cifs_sb_info *cifs_sb;
@@ -113,22 +112,16 @@ cifs_read_super(struct super_block *sb, struct smb_vol *volume_info,
113 112
114 cifs_sb = CIFS_SB(sb); 113 cifs_sb = CIFS_SB(sb);
115 114
116 spin_lock_init(&cifs_sb->tlink_tree_lock); 115 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
117 cifs_sb->tlink_tree = RB_ROOT; 116 sb->s_flags |= MS_POSIXACL;
118 117
119 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); 118 if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES)
120 if (rc) 119 sb->s_maxbytes = MAX_LFS_FILESIZE;
121 return rc; 120 else
122 121 sb->s_maxbytes = MAX_NON_LFS;
123 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
124 122
125 rc = cifs_mount(sb, cifs_sb, volume_info, devname); 123 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
126 124 sb->s_time_gran = 100;
127 if (rc) {
128 if (!silent)
129 cERROR(1, "cifs_mount failed w/return code = %d", rc);
130 goto out_mount_failed;
131 }
132 125
133 sb->s_magic = CIFS_MAGIC_NUMBER; 126 sb->s_magic = CIFS_MAGIC_NUMBER;
134 sb->s_op = &cifs_super_ops; 127 sb->s_op = &cifs_super_ops;
@@ -170,37 +163,14 @@ out_no_root:
170 if (inode) 163 if (inode)
171 iput(inode); 164 iput(inode);
172 165
173 cifs_umount(sb, cifs_sb);
174
175out_mount_failed:
176 bdi_destroy(&cifs_sb->bdi);
177 return rc; 166 return rc;
178} 167}
179 168
180static void 169static void cifs_kill_sb(struct super_block *sb)
181cifs_put_super(struct super_block *sb)
182{ 170{
183 int rc = 0; 171 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
184 struct cifs_sb_info *cifs_sb; 172 kill_anon_super(sb);
185 173 cifs_umount(cifs_sb);
186 cFYI(1, "In cifs_put_super");
187 cifs_sb = CIFS_SB(sb);
188 if (cifs_sb == NULL) {
189 cFYI(1, "Empty cifs superblock info passed to unmount");
190 return;
191 }
192
193 rc = cifs_umount(sb, cifs_sb);
194 if (rc)
195 cERROR(1, "cifs_umount failed with return code %d", rc);
196 if (cifs_sb->mountdata) {
197 kfree(cifs_sb->mountdata);
198 cifs_sb->mountdata = NULL;
199 }
200
201 unload_nls(cifs_sb->local_nls);
202 bdi_destroy(&cifs_sb->bdi);
203 kfree(cifs_sb);
204} 174}
205 175
206static int 176static int
@@ -257,9 +227,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
257{ 227{
258 struct cifs_sb_info *cifs_sb; 228 struct cifs_sb_info *cifs_sb;
259 229
260 if (flags & IPERM_FLAG_RCU)
261 return -ECHILD;
262
263 cifs_sb = CIFS_SB(inode->i_sb); 230 cifs_sb = CIFS_SB(inode->i_sb);
264 231
265 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { 232 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -352,6 +319,37 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
352 } 319 }
353} 320}
354 321
322static void
323cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
324{
325 seq_printf(s, ",sec=");
326
327 switch (server->secType) {
328 case LANMAN:
329 seq_printf(s, "lanman");
330 break;
331 case NTLMv2:
332 seq_printf(s, "ntlmv2");
333 break;
334 case NTLM:
335 seq_printf(s, "ntlm");
336 break;
337 case Kerberos:
338 seq_printf(s, "krb5");
339 break;
340 case RawNTLMSSP:
341 seq_printf(s, "ntlmssp");
342 break;
343 default:
344 /* shouldn't ever happen */
345 seq_printf(s, "unknown");
346 break;
347 }
348
349 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
350 seq_printf(s, "i");
351}
352
355/* 353/*
356 * cifs_show_options() is for displaying mount options in /proc/mounts. 354 * cifs_show_options() is for displaying mount options in /proc/mounts.
357 * Not all settable options are displayed but most of the important 355 * Not all settable options are displayed but most of the important
@@ -365,6 +363,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
365 struct sockaddr *srcaddr; 363 struct sockaddr *srcaddr;
366 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 364 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
367 365
366 cifs_show_security(s, tcon->ses->server);
367
368 seq_printf(s, ",unc=%s", tcon->treeName); 368 seq_printf(s, ",unc=%s", tcon->treeName);
369 369
370 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 370 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
@@ -518,7 +518,6 @@ static int cifs_drop_inode(struct inode *inode)
518} 518}
519 519
520static const struct super_operations cifs_super_ops = { 520static const struct super_operations cifs_super_ops = {
521 .put_super = cifs_put_super,
522 .statfs = cifs_statfs, 521 .statfs = cifs_statfs,
523 .alloc_inode = cifs_alloc_inode, 522 .alloc_inode = cifs_alloc_inode,
524 .destroy_inode = cifs_destroy_inode, 523 .destroy_inode = cifs_destroy_inode,
@@ -555,7 +554,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
555 full_path = cifs_build_path_to_root(vol, cifs_sb, 554 full_path = cifs_build_path_to_root(vol, cifs_sb,
556 cifs_sb_master_tcon(cifs_sb)); 555 cifs_sb_master_tcon(cifs_sb));
557 if (full_path == NULL) 556 if (full_path == NULL)
558 return NULL; 557 return ERR_PTR(-ENOMEM);
559 558
560 cFYI(1, "Get root dentry for %s", full_path); 559 cFYI(1, "Get root dentry for %s", full_path);
561 560
@@ -584,7 +583,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
584 dchild = d_alloc(dparent, &name); 583 dchild = d_alloc(dparent, &name);
585 if (dchild == NULL) { 584 if (dchild == NULL) {
586 dput(dparent); 585 dput(dparent);
587 dparent = NULL; 586 dparent = ERR_PTR(-ENOMEM);
588 goto out; 587 goto out;
589 } 588 }
590 } 589 }
@@ -602,7 +601,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
602 if (rc) { 601 if (rc) {
603 dput(dchild); 602 dput(dchild);
604 dput(dparent); 603 dput(dparent);
605 dparent = NULL; 604 dparent = ERR_PTR(rc);
606 goto out; 605 goto out;
607 } 606 }
608 alias = d_materialise_unique(dchild, inode); 607 alias = d_materialise_unique(dchild, inode);
@@ -610,7 +609,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
610 dput(dchild); 609 dput(dchild);
611 if (IS_ERR(alias)) { 610 if (IS_ERR(alias)) {
612 dput(dparent); 611 dput(dparent);
613 dparent = NULL; 612 dparent = ERR_PTR(-EINVAL); /* XXX */
614 goto out; 613 goto out;
615 } 614 }
616 dchild = alias; 615 dchild = alias;
@@ -630,6 +629,13 @@ out:
630 return dparent; 629 return dparent;
631} 630}
632 631
632static int cifs_set_super(struct super_block *sb, void *data)
633{
634 struct cifs_mnt_data *mnt_data = data;
635 sb->s_fs_info = mnt_data->cifs_sb;
636 return set_anon_super(sb, NULL);
637}
638
633static struct dentry * 639static struct dentry *
634cifs_do_mount(struct file_system_type *fs_type, 640cifs_do_mount(struct file_system_type *fs_type,
635 int flags, const char *dev_name, void *data) 641 int flags, const char *dev_name, void *data)
@@ -650,75 +656,73 @@ cifs_do_mount(struct file_system_type *fs_type,
650 cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); 656 cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
651 if (cifs_sb == NULL) { 657 if (cifs_sb == NULL) {
652 root = ERR_PTR(-ENOMEM); 658 root = ERR_PTR(-ENOMEM);
653 goto out; 659 goto out_nls;
660 }
661
662 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
663 if (cifs_sb->mountdata == NULL) {
664 root = ERR_PTR(-ENOMEM);
665 goto out_cifs_sb;
654 } 666 }
655 667
656 cifs_setup_cifs_sb(volume_info, cifs_sb); 668 cifs_setup_cifs_sb(volume_info, cifs_sb);
657 669
670 rc = cifs_mount(cifs_sb, volume_info);
671 if (rc) {
672 if (!(flags & MS_SILENT))
673 cERROR(1, "cifs_mount failed w/return code = %d", rc);
674 root = ERR_PTR(rc);
675 goto out_mountdata;
676 }
677
658 mnt_data.vol = volume_info; 678 mnt_data.vol = volume_info;
659 mnt_data.cifs_sb = cifs_sb; 679 mnt_data.cifs_sb = cifs_sb;
660 mnt_data.flags = flags; 680 mnt_data.flags = flags;
661 681
662 sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data); 682 sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data);
663 if (IS_ERR(sb)) { 683 if (IS_ERR(sb)) {
664 root = ERR_CAST(sb); 684 root = ERR_CAST(sb);
665 goto out_cifs_sb; 685 cifs_umount(cifs_sb);
686 goto out;
666 } 687 }
667 688
668 if (sb->s_fs_info) { 689 if (sb->s_root) {
669 cFYI(1, "Use existing superblock"); 690 cFYI(1, "Use existing superblock");
670 goto out_shared; 691 cifs_umount(cifs_sb);
671 } 692 } else {
672 693 sb->s_flags = flags;
673 /* 694 /* BB should we make this contingent on mount parm? */
674 * Copy mount params for use in submounts. Better to do 695 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
675 * the copy here and deal with the error before cleanup gets 696
676 * complicated post-mount. 697 rc = cifs_read_super(sb);
677 */ 698 if (rc) {
678 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); 699 root = ERR_PTR(rc);
679 if (cifs_sb->mountdata == NULL) { 700 goto out_super;
680 root = ERR_PTR(-ENOMEM); 701 }
681 goto out_super;
682 }
683
684 sb->s_flags = flags;
685 /* BB should we make this contingent on mount parm? */
686 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
687 sb->s_fs_info = cifs_sb;
688 702
689 rc = cifs_read_super(sb, volume_info, dev_name, 703 sb->s_flags |= MS_ACTIVE;
690 flags & MS_SILENT ? 1 : 0);
691 if (rc) {
692 root = ERR_PTR(rc);
693 goto out_super;
694 } 704 }
695 705
696 sb->s_flags |= MS_ACTIVE;
697
698 root = cifs_get_root(volume_info, sb); 706 root = cifs_get_root(volume_info, sb);
699 if (root == NULL) 707 if (IS_ERR(root))
700 goto out_super; 708 goto out_super;
701 709
702 cFYI(1, "dentry root is: %p", root); 710 cFYI(1, "dentry root is: %p", root);
703 goto out; 711 goto out;
704 712
705out_shared:
706 root = cifs_get_root(volume_info, sb);
707 if (root)
708 cFYI(1, "dentry root is: %p", root);
709 goto out;
710
711out_super: 713out_super:
712 kfree(cifs_sb->mountdata);
713 deactivate_locked_super(sb); 714 deactivate_locked_super(sb);
714
715out_cifs_sb:
716 unload_nls(cifs_sb->local_nls);
717 kfree(cifs_sb);
718
719out: 715out:
720 cifs_cleanup_volume_info(&volume_info); 716 cifs_cleanup_volume_info(&volume_info);
721 return root; 717 return root;
718
719out_mountdata:
720 kfree(cifs_sb->mountdata);
721out_cifs_sb:
722 kfree(cifs_sb);
723out_nls:
724 unload_nls(volume_info->local_nls);
725 goto out;
722} 726}
723 727
724static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 728static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -807,7 +811,7 @@ struct file_system_type cifs_fs_type = {
807 .owner = THIS_MODULE, 811 .owner = THIS_MODULE,
808 .name = "cifs", 812 .name = "cifs",
809 .mount = cifs_do_mount, 813 .mount = cifs_do_mount,
810 .kill_sb = kill_anon_super, 814 .kill_sb = cifs_kill_sb,
811 /* .fs_flags */ 815 /* .fs_flags */
812}; 816};
813const struct inode_operations cifs_dir_inode_ops = { 817const struct inode_operations cifs_dir_inode_ops = {
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 64313f778ebf..0900e1658c96 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -129,5 +129,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
129extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
130#endif /* CIFS_NFSD_EXPORT */ 130#endif /* CIFS_NFSD_EXPORT */
131 131
132#define CIFS_VERSION "1.72" 132#define CIFS_VERSION "1.73"
133#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 953f84413c77..257f312ede42 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -157,9 +157,8 @@ extern int cifs_match_super(struct super_block *, void *);
157extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info); 157extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info);
158extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, 158extern int cifs_setup_volume_info(struct smb_vol **pvolume_info,
159 char *mount_data, const char *devname); 159 char *mount_data, const char *devname);
160extern int cifs_mount(struct super_block *, struct cifs_sb_info *, 160extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
161 struct smb_vol *, const char *); 161extern void cifs_umount(struct cifs_sb_info *);
162extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
163extern void cifs_dfs_release_automount_timer(void); 162extern void cifs_dfs_release_automount_timer(void);
164void cifs_proc_init(void); 163void cifs_proc_init(void);
165void cifs_proc_clean(void); 164void cifs_proc_clean(void);
@@ -218,7 +217,8 @@ extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo,
218 struct dfs_info3_param **preferrals, 217 struct dfs_info3_param **preferrals,
219 int remap); 218 int remap);
220extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, 219extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
221 struct super_block *sb, struct smb_vol *vol); 220 struct cifs_sb_info *cifs_sb,
221 struct smb_vol *vol);
222extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, 222extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon,
223 struct kstatfs *FSData); 223 struct kstatfs *FSData);
224extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, 224extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6d88b82537c3..7f540df52527 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -152,7 +152,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
152 mid_entry->callback(mid_entry); 152 mid_entry->callback(mid_entry);
153 } 153 }
154 154
155 while (server->tcpStatus == CifsNeedReconnect) { 155 do {
156 try_to_freeze(); 156 try_to_freeze();
157 157
158 /* we should try only the port we connected to before */ 158 /* we should try only the port we connected to before */
@@ -167,7 +167,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
167 server->tcpStatus = CifsNeedNegotiate; 167 server->tcpStatus = CifsNeedNegotiate;
168 spin_unlock(&GlobalMid_Lock); 168 spin_unlock(&GlobalMid_Lock);
169 } 169 }
170 } 170 } while (server->tcpStatus == CifsNeedReconnect);
171 171
172 return rc; 172 return rc;
173} 173}
@@ -784,7 +784,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
784 struct smb_vol *vol) 784 struct smb_vol *vol)
785{ 785{
786 char *value, *data, *end; 786 char *value, *data, *end;
787 char *mountdata_copy, *options; 787 char *mountdata_copy = NULL, *options;
788 unsigned int temp_len, i, j; 788 unsigned int temp_len, i, j;
789 char separator[2]; 789 char separator[2];
790 short int override_uid = -1; 790 short int override_uid = -1;
@@ -1391,7 +1391,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1391 "/proc/fs/cifs/LookupCacheEnabled to 0\n"); 1391 "/proc/fs/cifs/LookupCacheEnabled to 0\n");
1392 } else if (strnicmp(data, "fsc", 3) == 0) { 1392 } else if (strnicmp(data, "fsc", 3) == 0) {
1393#ifndef CONFIG_CIFS_FSCACHE 1393#ifndef CONFIG_CIFS_FSCACHE
1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE "
1395 "kernel config option set"); 1395 "kernel config option set");
1396 goto cifs_parse_mount_err; 1396 goto cifs_parse_mount_err;
1397#endif 1397#endif
@@ -1976,7 +1976,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1976 warned_on_ntlm = true; 1976 warned_on_ntlm = true;
1977 cERROR(1, "default security mechanism requested. The default " 1977 cERROR(1, "default security mechanism requested. The default "
1978 "security mechanism will be upgraded from ntlm to " 1978 "security mechanism will be upgraded from ntlm to "
1979 "ntlmv2 in kernel release 2.6.41"); 1979 "ntlmv2 in kernel release 3.1");
1980 } 1980 }
1981 ses->overrideSecFlg = volume_info->secFlg; 1981 ses->overrideSecFlg = volume_info->secFlg;
1982 1982
@@ -2149,7 +2149,10 @@ cifs_put_tlink(struct tcon_link *tlink)
2149} 2149}
2150 2150
2151static inline struct tcon_link * 2151static inline struct tcon_link *
2152cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb); 2152cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
2153{
2154 return cifs_sb->master_tlink;
2155}
2153 2156
2154static int 2157static int
2155compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) 2158compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
@@ -2543,7 +2546,7 @@ ip_connect(struct TCP_Server_Info *server)
2543} 2546}
2544 2547
2545void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, 2548void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
2546 struct super_block *sb, struct smb_vol *vol_info) 2549 struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info)
2547{ 2550{
2548 /* if we are reconnecting then should we check to see if 2551 /* if we are reconnecting then should we check to see if
2549 * any requested capabilities changed locally e.g. via 2552 * any requested capabilities changed locally e.g. via
@@ -2597,22 +2600,23 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
2597 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2600 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
2598 else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { 2601 else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
2599 cFYI(1, "negotiated posix acl support"); 2602 cFYI(1, "negotiated posix acl support");
2600 if (sb) 2603 if (cifs_sb)
2601 sb->s_flags |= MS_POSIXACL; 2604 cifs_sb->mnt_cifs_flags |=
2605 CIFS_MOUNT_POSIXACL;
2602 } 2606 }
2603 2607
2604 if (vol_info && vol_info->posix_paths == 0) 2608 if (vol_info && vol_info->posix_paths == 0)
2605 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; 2609 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
2606 else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { 2610 else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
2607 cFYI(1, "negotiate posix pathnames"); 2611 cFYI(1, "negotiate posix pathnames");
2608 if (sb) 2612 if (cifs_sb)
2609 CIFS_SB(sb)->mnt_cifs_flags |= 2613 cifs_sb->mnt_cifs_flags |=
2610 CIFS_MOUNT_POSIX_PATHS; 2614 CIFS_MOUNT_POSIX_PATHS;
2611 } 2615 }
2612 2616
2613 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { 2617 if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) {
2614 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { 2618 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
2615 CIFS_SB(sb)->rsize = 127 * 1024; 2619 cifs_sb->rsize = 127 * 1024;
2616 cFYI(DBG2, "larger reads not supported by srv"); 2620 cFYI(DBG2, "larger reads not supported by srv");
2617 } 2621 }
2618 } 2622 }
@@ -2659,6 +2663,9 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2659{ 2663{
2660 INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); 2664 INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
2661 2665
2666 spin_lock_init(&cifs_sb->tlink_tree_lock);
2667 cifs_sb->tlink_tree = RB_ROOT;
2668
2662 if (pvolume_info->rsize > CIFSMaxBufSize) { 2669 if (pvolume_info->rsize > CIFSMaxBufSize) {
2663 cERROR(1, "rsize %d too large, using MaxBufSize", 2670 cERROR(1, "rsize %d too large, using MaxBufSize",
2664 pvolume_info->rsize); 2671 pvolume_info->rsize);
@@ -2747,21 +2754,21 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2747 2754
2748/* 2755/*
2749 * When the server supports very large writes via POSIX extensions, we can 2756 * When the server supports very large writes via POSIX extensions, we can
2750 * allow up to 2^24 - PAGE_CACHE_SIZE. 2757 * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including
2758 * the RFC1001 length.
2751 * 2759 *
2752 * Note that this might make for "interesting" allocation problems during 2760 * Note that this might make for "interesting" allocation problems during
2753 * writeback however (as we have to allocate an array of pointers for the 2761 * writeback however as we have to allocate an array of pointers for the
2754 * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. 2762 * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
2755 */ 2763 */
2756#define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE) 2764#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
2757 2765
2758/* 2766/*
2759 * When the server doesn't allow large posix writes, default to a wsize of 2767 * When the server doesn't allow large posix writes, only allow a wsize of
2760 * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size 2768 * 128k minus the size of the WRITE_AND_X header. That allows for a write up
2761 * described in RFC1001. This allows space for the header without going over 2769 * to the maximum size described by RFC1002.
2762 * that by default.
2763 */ 2770 */
2764#define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE) 2771#define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4)
2765 2772
2766/* 2773/*
2767 * The default wsize is 1M. find_get_pages seems to return a maximum of 256 2774 * The default wsize is 1M. find_get_pages seems to return a maximum of 256
@@ -2780,11 +2787,18 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
2780 2787
2781 /* can server support 24-bit write sizes? (via UNIX extensions) */ 2788 /* can server support 24-bit write sizes? (via UNIX extensions) */
2782 if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) 2789 if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
2783 wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE); 2790 wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
2784 2791
2785 /* no CAP_LARGE_WRITE_X? Limit it to 16 bits */ 2792 /*
2786 if (!(server->capabilities & CAP_LARGE_WRITE_X)) 2793 * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
2787 wsize = min_t(unsigned int, wsize, USHRT_MAX); 2794 * Limit it to max buffer offered by the server, minus the size of the
2795 * WRITEX header, not including the 4 byte RFC1001 length.
2796 */
2797 if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
2798 (!(server->capabilities & CAP_UNIX) &&
2799 (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
2800 wsize = min_t(unsigned int, wsize,
2801 server->maxBuf - sizeof(WRITE_REQ) + 4);
2788 2802
2789 /* hard limit of CIFS_MAX_WSIZE */ 2803 /* hard limit of CIFS_MAX_WSIZE */
2790 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); 2804 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
@@ -2934,7 +2948,11 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
2934 2948
2935 if (volume_info->nullauth) { 2949 if (volume_info->nullauth) {
2936 cFYI(1, "null user"); 2950 cFYI(1, "null user");
2937 volume_info->username = ""; 2951 volume_info->username = kzalloc(1, GFP_KERNEL);
2952 if (volume_info->username == NULL) {
2953 rc = -ENOMEM;
2954 goto out;
2955 }
2938 } else if (volume_info->username) { 2956 } else if (volume_info->username) {
2939 /* BB fixme parse for domain name here */ 2957 /* BB fixme parse for domain name here */
2940 cFYI(1, "Username: %s", volume_info->username); 2958 cFYI(1, "Username: %s", volume_info->username);
@@ -2968,8 +2986,7 @@ out:
2968} 2986}
2969 2987
2970int 2988int
2971cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2989cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
2972 struct smb_vol *volume_info, const char *devname)
2973{ 2990{
2974 int rc = 0; 2991 int rc = 0;
2975 int xid; 2992 int xid;
@@ -2980,6 +2997,13 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2980 struct tcon_link *tlink; 2997 struct tcon_link *tlink;
2981#ifdef CONFIG_CIFS_DFS_UPCALL 2998#ifdef CONFIG_CIFS_DFS_UPCALL
2982 int referral_walks_count = 0; 2999 int referral_walks_count = 0;
3000
3001 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
3002 if (rc)
3003 return rc;
3004
3005 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
3006
2983try_mount_again: 3007try_mount_again:
2984 /* cleanup activities if we're chasing a referral */ 3008 /* cleanup activities if we're chasing a referral */
2985 if (referral_walks_count) { 3009 if (referral_walks_count) {
@@ -3004,6 +3028,7 @@ try_mount_again:
3004 srvTcp = cifs_get_tcp_session(volume_info); 3028 srvTcp = cifs_get_tcp_session(volume_info);
3005 if (IS_ERR(srvTcp)) { 3029 if (IS_ERR(srvTcp)) {
3006 rc = PTR_ERR(srvTcp); 3030 rc = PTR_ERR(srvTcp);
3031 bdi_destroy(&cifs_sb->bdi);
3007 goto out; 3032 goto out;
3008 } 3033 }
3009 3034
@@ -3015,14 +3040,6 @@ try_mount_again:
3015 goto mount_fail_check; 3040 goto mount_fail_check;
3016 } 3041 }
3017 3042
3018 if (pSesInfo->capabilities & CAP_LARGE_FILES)
3019 sb->s_maxbytes = MAX_LFS_FILESIZE;
3020 else
3021 sb->s_maxbytes = MAX_NON_LFS;
3022
3023 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
3024 sb->s_time_gran = 100;
3025
3026 /* search for existing tcon to this server share */ 3043 /* search for existing tcon to this server share */
3027 tcon = cifs_get_tcon(pSesInfo, volume_info); 3044 tcon = cifs_get_tcon(pSesInfo, volume_info);
3028 if (IS_ERR(tcon)) { 3045 if (IS_ERR(tcon)) {
@@ -3035,7 +3052,7 @@ try_mount_again:
3035 if (tcon->ses->capabilities & CAP_UNIX) { 3052 if (tcon->ses->capabilities & CAP_UNIX) {
3036 /* reset of caps checks mount to see if unix extensions 3053 /* reset of caps checks mount to see if unix extensions
3037 disabled for just this mount */ 3054 disabled for just this mount */
3038 reset_cifs_unix_caps(xid, tcon, sb, volume_info); 3055 reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info);
3039 if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && 3056 if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
3040 (le64_to_cpu(tcon->fsUnixInfo.Capability) & 3057 (le64_to_cpu(tcon->fsUnixInfo.Capability) &
3041 CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { 3058 CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
@@ -3158,6 +3175,7 @@ mount_fail_check:
3158 cifs_put_smb_ses(pSesInfo); 3175 cifs_put_smb_ses(pSesInfo);
3159 else 3176 else
3160 cifs_put_tcp_session(srvTcp); 3177 cifs_put_tcp_session(srvTcp);
3178 bdi_destroy(&cifs_sb->bdi);
3161 goto out; 3179 goto out;
3162 } 3180 }
3163 3181
@@ -3171,6 +3189,10 @@ out:
3171 return rc; 3189 return rc;
3172} 3190}
3173 3191
3192/*
3193 * Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon
3194 * pointer may be NULL.
3195 */
3174int 3196int
3175CIFSTCon(unsigned int xid, struct cifs_ses *ses, 3197CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3176 const char *tree, struct cifs_tcon *tcon, 3198 const char *tree, struct cifs_tcon *tcon,
@@ -3205,7 +3227,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3205 pSMB->AndXCommand = 0xFF; 3227 pSMB->AndXCommand = 0xFF;
3206 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); 3228 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
3207 bcc_ptr = &pSMB->Password[0]; 3229 bcc_ptr = &pSMB->Password[0];
3208 if ((ses->server->sec_mode) & SECMODE_USER) { 3230 if (!tcon || (ses->server->sec_mode & SECMODE_USER)) {
3209 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ 3231 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3210 *bcc_ptr = 0; /* password is null byte */ 3232 *bcc_ptr = 0; /* password is null byte */
3211 bcc_ptr++; /* skip password */ 3233 bcc_ptr++; /* skip password */
@@ -3328,8 +3350,8 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3328 return rc; 3350 return rc;
3329} 3351}
3330 3352
3331int 3353void
3332cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) 3354cifs_umount(struct cifs_sb_info *cifs_sb)
3333{ 3355{
3334 struct rb_root *root = &cifs_sb->tlink_tree; 3356 struct rb_root *root = &cifs_sb->tlink_tree;
3335 struct rb_node *node; 3357 struct rb_node *node;
@@ -3350,7 +3372,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3350 } 3372 }
3351 spin_unlock(&cifs_sb->tlink_tree_lock); 3373 spin_unlock(&cifs_sb->tlink_tree_lock);
3352 3374
3353 return 0; 3375 bdi_destroy(&cifs_sb->bdi);
3376 kfree(cifs_sb->mountdata);
3377 unload_nls(cifs_sb->local_nls);
3378 kfree(cifs_sb);
3354} 3379}
3355 3380
3356int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) 3381int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
@@ -3371,7 +3396,7 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
3371 } 3396 }
3372 if (rc == 0) { 3397 if (rc == 0) {
3373 spin_lock(&GlobalMid_Lock); 3398 spin_lock(&GlobalMid_Lock);
3374 if (server->tcpStatus != CifsExiting) 3399 if (server->tcpStatus == CifsNeedNegotiate)
3375 server->tcpStatus = CifsGood; 3400 server->tcpStatus = CifsGood;
3376 else 3401 else
3377 rc = -EHOSTDOWN; 3402 rc = -EHOSTDOWN;
@@ -3484,12 +3509,6 @@ out:
3484 return tcon; 3509 return tcon;
3485} 3510}
3486 3511
3487static inline struct tcon_link *
3488cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
3489{
3490 return cifs_sb->master_tlink;
3491}
3492
3493struct cifs_tcon * 3512struct cifs_tcon *
3494cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) 3513cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
3495{ 3514{
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index d368a47ba5eb..816696621ec9 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -28,14 +28,14 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
28 server->fscache = 28 server->fscache =
29 fscache_acquire_cookie(cifs_fscache_netfs.primary_index, 29 fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
30 &cifs_fscache_server_index_def, server); 30 &cifs_fscache_server_index_def, server);
31 cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server, 31 cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
32 server->fscache); 32 server->fscache);
33} 33}
34 34
35void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) 35void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
36{ 36{
37 cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server, 37 cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
38 server->fscache); 38 server->fscache);
39 fscache_relinquish_cookie(server->fscache, 0); 39 fscache_relinquish_cookie(server->fscache, 0);
40 server->fscache = NULL; 40 server->fscache = NULL;
41} 41}
@@ -47,13 +47,13 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
47 tcon->fscache = 47 tcon->fscache =
48 fscache_acquire_cookie(server->fscache, 48 fscache_acquire_cookie(server->fscache,
49 &cifs_fscache_super_index_def, tcon); 49 &cifs_fscache_super_index_def, tcon);
50 cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)", 50 cFYI(1, "%s: (0x%p/0x%p)", __func__, server->fscache,
51 server->fscache, tcon->fscache); 51 tcon->fscache);
52} 52}
53 53
54void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) 54void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
55{ 55{
56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache); 56 cFYI(1, "%s: (0x%p)", __func__, tcon->fscache);
57 fscache_relinquish_cookie(tcon->fscache, 0); 57 fscache_relinquish_cookie(tcon->fscache, 0);
58 tcon->fscache = NULL; 58 tcon->fscache = NULL;
59} 59}
@@ -70,8 +70,8 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
70 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { 70 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
71 cifsi->fscache = fscache_acquire_cookie(tcon->fscache, 71 cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
72 &cifs_fscache_inode_object_def, cifsi); 72 &cifs_fscache_inode_object_def, cifsi);
73 cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, 73 cFYI(1, "%s: got FH cookie (0x%p/0x%p)", __func__,
74 cifsi->fscache); 74 tcon->fscache, cifsi->fscache);
75 } 75 }
76} 76}
77 77
@@ -80,8 +80,7 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
80 struct cifsInodeInfo *cifsi = CIFS_I(inode); 80 struct cifsInodeInfo *cifsi = CIFS_I(inode);
81 81
82 if (cifsi->fscache) { 82 if (cifsi->fscache) {
83 cFYI(1, "CIFS releasing inode cookie (0x%p)", 83 cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
84 cifsi->fscache);
85 fscache_relinquish_cookie(cifsi->fscache, 0); 84 fscache_relinquish_cookie(cifsi->fscache, 0);
86 cifsi->fscache = NULL; 85 cifsi->fscache = NULL;
87 } 86 }
@@ -92,8 +91,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode)
92 struct cifsInodeInfo *cifsi = CIFS_I(inode); 91 struct cifsInodeInfo *cifsi = CIFS_I(inode);
93 92
94 if (cifsi->fscache) { 93 if (cifsi->fscache) {
95 cFYI(1, "CIFS disabling inode cookie (0x%p)", 94 cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
96 cifsi->fscache);
97 fscache_relinquish_cookie(cifsi->fscache, 1); 95 fscache_relinquish_cookie(cifsi->fscache, 1);
98 cifsi->fscache = NULL; 96 cifsi->fscache = NULL;
99 } 97 }
@@ -121,8 +119,8 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
121 cifs_sb_master_tcon(cifs_sb)->fscache, 119 cifs_sb_master_tcon(cifs_sb)->fscache,
122 &cifs_fscache_inode_object_def, 120 &cifs_fscache_inode_object_def,
123 cifsi); 121 cifsi);
124 cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p", 122 cFYI(1, "%s: new cookie 0x%p oldcookie 0x%p",
125 cifsi->fscache, old); 123 __func__, cifsi->fscache, old);
126 } 124 }
127} 125}
128 126
@@ -132,8 +130,8 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp)
132 struct inode *inode = page->mapping->host; 130 struct inode *inode = page->mapping->host;
133 struct cifsInodeInfo *cifsi = CIFS_I(inode); 131 struct cifsInodeInfo *cifsi = CIFS_I(inode);
134 132
135 cFYI(1, "CIFS: fscache release page (0x%p/0x%p)", 133 cFYI(1, "%s: (0x%p/0x%p)", __func__, page,
136 page, cifsi->fscache); 134 cifsi->fscache);
137 if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) 135 if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
138 return 0; 136 return 0;
139 } 137 }
@@ -144,8 +142,7 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp)
144static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, 142static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
145 int error) 143 int error)
146{ 144{
147 cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)", 145 cFYI(1, "%s: (0x%p/%d)", __func__, page, error);
148 page, error);
149 if (!error) 146 if (!error)
150 SetPageUptodate(page); 147 SetPageUptodate(page);
151 unlock_page(page); 148 unlock_page(page);
@@ -158,7 +155,7 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
158{ 155{
159 int ret; 156 int ret;
160 157
161 cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p", 158 cFYI(1, "%s: (fsc:%p, p:%p, i:0x%p", __func__,
162 CIFS_I(inode)->fscache, page, inode); 159 CIFS_I(inode)->fscache, page, inode);
163 ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, 160 ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
164 cifs_readpage_from_fscache_complete, 161 cifs_readpage_from_fscache_complete,
@@ -167,11 +164,11 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
167 switch (ret) { 164 switch (ret) {
168 165
169 case 0: /* page found in fscache, read submitted */ 166 case 0: /* page found in fscache, read submitted */
170 cFYI(1, "CIFS: readpage_from_fscache: submitted"); 167 cFYI(1, "%s: submitted", __func__);
171 return ret; 168 return ret;
172 case -ENOBUFS: /* page won't be cached */ 169 case -ENOBUFS: /* page won't be cached */
173 case -ENODATA: /* page not in cache */ 170 case -ENODATA: /* page not in cache */
174 cFYI(1, "CIFS: readpage_from_fscache %d", ret); 171 cFYI(1, "%s: %d", __func__, ret);
175 return 1; 172 return 1;
176 173
177 default: 174 default:
@@ -190,7 +187,7 @@ int __cifs_readpages_from_fscache(struct inode *inode,
190{ 187{
191 int ret; 188 int ret;
192 189
193 cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)", 190 cFYI(1, "%s: (0x%p/%u/0x%p)", __func__,
194 CIFS_I(inode)->fscache, *nr_pages, inode); 191 CIFS_I(inode)->fscache, *nr_pages, inode);
195 ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, 192 ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
196 pages, nr_pages, 193 pages, nr_pages,
@@ -199,12 +196,12 @@ int __cifs_readpages_from_fscache(struct inode *inode,
199 mapping_gfp_mask(mapping)); 196 mapping_gfp_mask(mapping));
200 switch (ret) { 197 switch (ret) {
201 case 0: /* read submitted to the cache for all pages */ 198 case 0: /* read submitted to the cache for all pages */
202 cFYI(1, "CIFS: readpages_from_fscache: submitted"); 199 cFYI(1, "%s: submitted", __func__);
203 return ret; 200 return ret;
204 201
205 case -ENOBUFS: /* some pages are not cached and can't be */ 202 case -ENOBUFS: /* some pages are not cached and can't be */
206 case -ENODATA: /* some pages are not cached */ 203 case -ENODATA: /* some pages are not cached */
207 cFYI(1, "CIFS: readpages_from_fscache: no page"); 204 cFYI(1, "%s: no page", __func__);
208 return 1; 205 return 1;
209 206
210 default: 207 default:
@@ -218,7 +215,7 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
218{ 215{
219 int ret; 216 int ret;
220 217
221 cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p", 218 cFYI(1, "%s: (fsc: %p, p: %p, i: %p)", __func__,
222 CIFS_I(inode)->fscache, page, inode); 219 CIFS_I(inode)->fscache, page, inode);
223 ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); 220 ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
224 if (ret != 0) 221 if (ret != 0)
@@ -230,7 +227,7 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
230 struct cifsInodeInfo *cifsi = CIFS_I(inode); 227 struct cifsInodeInfo *cifsi = CIFS_I(inode);
231 struct fscache_cookie *cookie = cifsi->fscache; 228 struct fscache_cookie *cookie = cifsi->fscache;
232 229
233 cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie); 230 cFYI(1, "%s: (0x%p/0x%p)", __func__, page, cookie);
234 fscache_wait_on_page_write(cookie, page); 231 fscache_wait_on_page_write(cookie, page);
235 fscache_uncache_page(cookie, page); 232 fscache_uncache_page(cookie, page);
236} 233}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 1525d5e662b6..1c5b770c3141 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -90,12 +90,10 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
90 sg_init_one(&sgout, out, 8); 90 sg_init_one(&sgout, out, 8);
91 91
92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8); 92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
93 if (rc) { 93 if (rc)
94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc); 94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
95 crypto_free_blkcipher(tfm_des);
96 goto smbhash_err;
97 }
98 95
96 crypto_free_blkcipher(tfm_des);
99smbhash_err: 97smbhash_err:
100 return rc; 98 return rc;
101} 99}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 6cbb3afb36dc..cb140ef293e4 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = {
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) 44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
45{ 45{
46 if (flags & IPERM_FLAG_RCU)
47 return -ECHILD;
48 return (mask & MAY_EXEC) ? -EACCES : 0; 46 return (mask & MAY_EXEC) ? -EACCES : 0;
49} 47}
50 48
diff --git a/fs/dcookies.c b/fs/dcookies.c
index a21cabdbd87b..dda0dc702d1b 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -178,6 +178,8 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
178 /* FIXME: (deleted) ? */ 178 /* FIXME: (deleted) ? */
179 path = d_path(&dcs->path, kbuf, PAGE_SIZE); 179 path = d_path(&dcs->path, kbuf, PAGE_SIZE);
180 180
181 mutex_unlock(&dcookie_mutex);
182
181 if (IS_ERR(path)) { 183 if (IS_ERR(path)) {
182 err = PTR_ERR(path); 184 err = PTR_ERR(path);
183 goto out_free; 185 goto out_free;
@@ -194,6 +196,7 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
194 196
195out_free: 197out_free:
196 kfree(kbuf); 198 kfree(kbuf);
199 return err;
197out: 200out:
198 mutex_unlock(&dcookie_mutex); 201 mutex_unlock(&dcookie_mutex);
199 return err; 202 return err;
diff --git a/fs/exec.c b/fs/exec.c
index ea5f748906a8..6075a1e727ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1093,6 +1093,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1093 1093
1094 bprm->mm = NULL; /* We're using it now */ 1094 bprm->mm = NULL; /* We're using it now */
1095 1095
1096 set_fs(USER_DS);
1096 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); 1097 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
1097 flush_thread(); 1098 flush_thread();
1098 current->personality &= ~bprm->per_clear; 1099 current->personality &= ~bprm->per_clear;
@@ -1357,10 +1358,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1357 if (retval) 1358 if (retval)
1358 return retval; 1359 return retval;
1359 1360
1360 /* kernel module loader fixup */
1361 /* so we don't try to load run modprobe in kernel space. */
1362 set_fs(USER_DS);
1363
1364 retval = audit_bprm(bprm); 1361 retval = audit_bprm(bprm);
1365 if (retval) 1362 if (retval)
1366 return retval; 1363 return retval;
@@ -1999,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file)
1999 * is a special value that we use to trap recursive 1996 * is a special value that we use to trap recursive
2000 * core dumps 1997 * core dumps
2001 */ 1998 */
2002static int umh_pipe_setup(struct subprocess_info *info) 1999static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2003{ 2000{
2004 struct file *rp, *wp; 2001 struct file *rp, *wp;
2005 struct fdtable *fdt; 2002 struct fdtable *fdt;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2e29abb30f76..095c36f3b612 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -125,7 +125,7 @@ struct ext4_ext_path {
125 * positive retcode - signal for ext4_ext_walk_space(), see below 125 * positive retcode - signal for ext4_ext_walk_space(), see below
126 * callback must return valid extent (passed or newly created) 126 * callback must return valid extent (passed or newly created)
127 */ 127 */
128typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, 128typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t,
129 struct ext4_ext_cache *, 129 struct ext4_ext_cache *,
130 struct ext4_extent *, void *); 130 struct ext4_extent *, void *);
131 131
@@ -133,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
133#define EXT_BREAK 1 133#define EXT_BREAK 1
134#define EXT_REPEAT 2 134#define EXT_REPEAT 2
135 135
136/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ 136/*
137#define EXT_MAX_BLOCK 0xffffffff 137 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
138 * __le32.
139 */
140#define EXT_MAX_BLOCKS 0xffffffff
138 141
139/* 142/*
140 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an 143 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5199bac7fc62..f815cc81e7a2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1408,7 +1408,7 @@ got_index:
1408 1408
1409/* 1409/*
1410 * ext4_ext_next_allocated_block: 1410 * ext4_ext_next_allocated_block:
1411 * returns allocated block in subsequent extent or EXT_MAX_BLOCK. 1411 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1412 * NOTE: it considers block number from index entry as 1412 * NOTE: it considers block number from index entry as
1413 * allocated block. Thus, index entries have to be consistent 1413 * allocated block. Thus, index entries have to be consistent
1414 * with leaves. 1414 * with leaves.
@@ -1422,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1422 depth = path->p_depth; 1422 depth = path->p_depth;
1423 1423
1424 if (depth == 0 && path->p_ext == NULL) 1424 if (depth == 0 && path->p_ext == NULL)
1425 return EXT_MAX_BLOCK; 1425 return EXT_MAX_BLOCKS;
1426 1426
1427 while (depth >= 0) { 1427 while (depth >= 0) {
1428 if (depth == path->p_depth) { 1428 if (depth == path->p_depth) {
@@ -1439,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1439 depth--; 1439 depth--;
1440 } 1440 }
1441 1441
1442 return EXT_MAX_BLOCK; 1442 return EXT_MAX_BLOCKS;
1443} 1443}
1444 1444
1445/* 1445/*
1446 * ext4_ext_next_leaf_block: 1446 * ext4_ext_next_leaf_block:
1447 * returns first allocated block from next leaf or EXT_MAX_BLOCK 1447 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1448 */ 1448 */
1449static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, 1449static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
1450 struct ext4_ext_path *path) 1450 struct ext4_ext_path *path)
@@ -1456,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
1456 1456
1457 /* zero-tree has no leaf blocks at all */ 1457 /* zero-tree has no leaf blocks at all */
1458 if (depth == 0) 1458 if (depth == 0)
1459 return EXT_MAX_BLOCK; 1459 return EXT_MAX_BLOCKS;
1460 1460
1461 /* go to index block */ 1461 /* go to index block */
1462 depth--; 1462 depth--;
@@ -1469,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
1469 depth--; 1469 depth--;
1470 } 1470 }
1471 1471
1472 return EXT_MAX_BLOCK; 1472 return EXT_MAX_BLOCKS;
1473} 1473}
1474 1474
1475/* 1475/*
@@ -1677,13 +1677,13 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1677 */ 1677 */
1678 if (b2 < b1) { 1678 if (b2 < b1) {
1679 b2 = ext4_ext_next_allocated_block(path); 1679 b2 = ext4_ext_next_allocated_block(path);
1680 if (b2 == EXT_MAX_BLOCK) 1680 if (b2 == EXT_MAX_BLOCKS)
1681 goto out; 1681 goto out;
1682 } 1682 }
1683 1683
1684 /* check for wrap through zero on extent logical start block*/ 1684 /* check for wrap through zero on extent logical start block*/
1685 if (b1 + len1 < b1) { 1685 if (b1 + len1 < b1) {
1686 len1 = EXT_MAX_BLOCK - b1; 1686 len1 = EXT_MAX_BLOCKS - b1;
1687 newext->ee_len = cpu_to_le16(len1); 1687 newext->ee_len = cpu_to_le16(len1);
1688 ret = 1; 1688 ret = 1;
1689 } 1689 }
@@ -1767,7 +1767,7 @@ repeat:
1767 fex = EXT_LAST_EXTENT(eh); 1767 fex = EXT_LAST_EXTENT(eh);
1768 next = ext4_ext_next_leaf_block(inode, path); 1768 next = ext4_ext_next_leaf_block(inode, path);
1769 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1769 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
1770 && next != EXT_MAX_BLOCK) { 1770 && next != EXT_MAX_BLOCKS) {
1771 ext_debug("next leaf block - %d\n", next); 1771 ext_debug("next leaf block - %d\n", next);
1772 BUG_ON(npath != NULL); 1772 BUG_ON(npath != NULL);
1773 npath = ext4_ext_find_extent(inode, next, NULL); 1773 npath = ext4_ext_find_extent(inode, next, NULL);
@@ -1887,7 +1887,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1887 BUG_ON(func == NULL); 1887 BUG_ON(func == NULL);
1888 BUG_ON(inode == NULL); 1888 BUG_ON(inode == NULL);
1889 1889
1890 while (block < last && block != EXT_MAX_BLOCK) { 1890 while (block < last && block != EXT_MAX_BLOCKS) {
1891 num = last - block; 1891 num = last - block;
1892 /* find extent for this block */ 1892 /* find extent for this block */
1893 down_read(&EXT4_I(inode)->i_data_sem); 1893 down_read(&EXT4_I(inode)->i_data_sem);
@@ -1958,7 +1958,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1958 err = -EIO; 1958 err = -EIO;
1959 break; 1959 break;
1960 } 1960 }
1961 err = func(inode, path, &cbex, ex, cbdata); 1961 err = func(inode, next, &cbex, ex, cbdata);
1962 ext4_ext_drop_refs(path); 1962 ext4_ext_drop_refs(path);
1963 1963
1964 if (err < 0) 1964 if (err < 0)
@@ -2020,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2020 if (ex == NULL) { 2020 if (ex == NULL) {
2021 /* there is no extent yet, so gap is [0;-] */ 2021 /* there is no extent yet, so gap is [0;-] */
2022 lblock = 0; 2022 lblock = 0;
2023 len = EXT_MAX_BLOCK; 2023 len = EXT_MAX_BLOCKS;
2024 ext_debug("cache gap(whole file):"); 2024 ext_debug("cache gap(whole file):");
2025 } else if (block < le32_to_cpu(ex->ee_block)) { 2025 } else if (block < le32_to_cpu(ex->ee_block)) {
2026 lblock = block; 2026 lblock = block;
@@ -2350,7 +2350,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2350 * never happen because at least one of the end points 2350 * never happen because at least one of the end points
2351 * needs to be on the edge of the extent. 2351 * needs to be on the edge of the extent.
2352 */ 2352 */
2353 if (end == EXT_MAX_BLOCK) { 2353 if (end == EXT_MAX_BLOCKS - 1) {
2354 ext_debug(" bad truncate %u:%u\n", 2354 ext_debug(" bad truncate %u:%u\n",
2355 start, end); 2355 start, end);
2356 block = 0; 2356 block = 0;
@@ -2398,7 +2398,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2398 * If this is a truncate, this condition 2398 * If this is a truncate, this condition
2399 * should never happen 2399 * should never happen
2400 */ 2400 */
2401 if (end == EXT_MAX_BLOCK) { 2401 if (end == EXT_MAX_BLOCKS - 1) {
2402 ext_debug(" bad truncate %u:%u\n", 2402 ext_debug(" bad truncate %u:%u\n",
2403 start, end); 2403 start, end);
2404 err = -EIO; 2404 err = -EIO;
@@ -2478,7 +2478,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2478 * we need to remove it from the leaf 2478 * we need to remove it from the leaf
2479 */ 2479 */
2480 if (num == 0) { 2480 if (num == 0) {
2481 if (end != EXT_MAX_BLOCK) { 2481 if (end != EXT_MAX_BLOCKS - 1) {
2482 /* 2482 /*
2483 * For hole punching, we need to scoot all the 2483 * For hole punching, we need to scoot all the
2484 * extents up when an extent is removed so that 2484 * extents up when an extent is removed so that
@@ -3699,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode)
3699 3699
3700 last_block = (inode->i_size + sb->s_blocksize - 1) 3700 last_block = (inode->i_size + sb->s_blocksize - 1)
3701 >> EXT4_BLOCK_SIZE_BITS(sb); 3701 >> EXT4_BLOCK_SIZE_BITS(sb);
3702 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); 3702 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
3703 3703
3704 /* In a multi-transaction truncate, we only make the final 3704 /* In a multi-transaction truncate, we only make the final
3705 * transaction synchronous. 3705 * transaction synchronous.
@@ -3914,14 +3914,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3914/* 3914/*
3915 * Callback function called for each extent to gather FIEMAP information. 3915 * Callback function called for each extent to gather FIEMAP information.
3916 */ 3916 */
3917static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, 3917static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
3918 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3918 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3919 void *data) 3919 void *data)
3920{ 3920{
3921 __u64 logical; 3921 __u64 logical;
3922 __u64 physical; 3922 __u64 physical;
3923 __u64 length; 3923 __u64 length;
3924 loff_t size;
3925 __u32 flags = 0; 3924 __u32 flags = 0;
3926 int ret = 0; 3925 int ret = 0;
3927 struct fiemap_extent_info *fieinfo = data; 3926 struct fiemap_extent_info *fieinfo = data;
@@ -4103,8 +4102,7 @@ found_delayed_extent:
4103 if (ex && ext4_ext_is_uninitialized(ex)) 4102 if (ex && ext4_ext_is_uninitialized(ex))
4104 flags |= FIEMAP_EXTENT_UNWRITTEN; 4103 flags |= FIEMAP_EXTENT_UNWRITTEN;
4105 4104
4106 size = i_size_read(inode); 4105 if (next == EXT_MAX_BLOCKS)
4107 if (logical + length >= size)
4108 flags |= FIEMAP_EXTENT_LAST; 4106 flags |= FIEMAP_EXTENT_LAST;
4109 4107
4110 ret = fiemap_fill_next_extent(fieinfo, logical, physical, 4108 ret = fiemap_fill_next_extent(fieinfo, logical, physical,
@@ -4347,8 +4345,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4347 4345
4348 start_blk = start >> inode->i_sb->s_blocksize_bits; 4346 start_blk = start >> inode->i_sb->s_blocksize_bits;
4349 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; 4347 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
4350 if (last_blk >= EXT_MAX_BLOCK) 4348 if (last_blk >= EXT_MAX_BLOCKS)
4351 last_blk = EXT_MAX_BLOCK-1; 4349 last_blk = EXT_MAX_BLOCKS-1;
4352 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 4350 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
4353 4351
4354 /* 4352 /*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a5763e3505ba..e3126c051006 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2634,7 +2634,7 @@ static int ext4_writepage(struct page *page,
2634 struct buffer_head *page_bufs = NULL; 2634 struct buffer_head *page_bufs = NULL;
2635 struct inode *inode = page->mapping->host; 2635 struct inode *inode = page->mapping->host;
2636 2636
2637 trace_ext4_writepage(inode, page); 2637 trace_ext4_writepage(page);
2638 size = i_size_read(inode); 2638 size = i_size_read(inode);
2639 if (page->index == size >> PAGE_CACHE_SHIFT) 2639 if (page->index == size >> PAGE_CACHE_SHIFT)
2640 len = size & ~PAGE_CACHE_MASK; 2640 len = size & ~PAGE_CACHE_MASK;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 859f2ae8864e..6ed859d56850 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3578,8 +3578,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3578 free += next - bit; 3578 free += next - bit;
3579 3579
3580 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); 3580 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3581 trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, 3581 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
3582 grp_blk_start + bit, next - bit); 3582 next - bit);
3583 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3583 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3584 bit = next + 1; 3584 bit = next + 1;
3585 } 3585 }
@@ -3608,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3608 ext4_group_t group; 3608 ext4_group_t group;
3609 ext4_grpblk_t bit; 3609 ext4_grpblk_t bit;
3610 3610
3611 trace_ext4_mb_release_group_pa(sb, pa); 3611 trace_ext4_mb_release_group_pa(pa);
3612 BUG_ON(pa->pa_deleted == 0); 3612 BUG_ON(pa->pa_deleted == 0);
3613 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3613 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3614 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3614 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -4448,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4448 * @inode: inode 4448 * @inode: inode
4449 * @block: start physical block to free 4449 * @block: start physical block to free
4450 * @count: number of blocks to count 4450 * @count: number of blocks to count
4451 * @metadata: Are these metadata blocks 4451 * @flags: flags used by ext4_free_blocks
4452 */ 4452 */
4453void ext4_free_blocks(handle_t *handle, struct inode *inode, 4453void ext4_free_blocks(handle_t *handle, struct inode *inode,
4454 struct buffer_head *bh, ext4_fsblk_t block, 4454 struct buffer_head *bh, ext4_fsblk_t block,
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2b8304bf3c50..f57455a1b1b2 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1002,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode,
1002 return -EINVAL; 1002 return -EINVAL;
1003 } 1003 }
1004 1004
1005 if ((orig_start > EXT_MAX_BLOCK) || 1005 if ((orig_start >= EXT_MAX_BLOCKS) ||
1006 (donor_start > EXT_MAX_BLOCK) || 1006 (donor_start >= EXT_MAX_BLOCKS) ||
1007 (*len > EXT_MAX_BLOCK) || 1007 (*len > EXT_MAX_BLOCKS) ||
1008 (orig_start + *len > EXT_MAX_BLOCK)) { 1008 (orig_start + *len >= EXT_MAX_BLOCKS)) {
1009 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 1009 ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
1010 "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, 1010 "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
1011 orig_inode->i_ino, donor_inode->i_ino); 1011 orig_inode->i_ino, donor_inode->i_ino);
1012 return -EINVAL; 1012 return -EINVAL;
1013 } 1013 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cc5c157aa11d..9ea71aa864b3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2243,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2243 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2243 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
2244 * so that won't be a limiting factor. 2244 * so that won't be a limiting factor.
2245 * 2245 *
2246 * However there is other limiting factor. We do store extents in the form
2247 * of starting block and length, hence the resulting length of the extent
2248 * covering maximum file size must fit into on-disk format containers as
2249 * well. Given that length is always by 1 unit bigger than max unit (because
2250 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2251 *
2246 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2252 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2247 */ 2253 */
2248static loff_t ext4_max_size(int blkbits, int has_huge_files) 2254static loff_t ext4_max_size(int blkbits, int has_huge_files)
@@ -2264,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
2264 upper_limit <<= blkbits; 2270 upper_limit <<= blkbits;
2265 } 2271 }
2266 2272
2267 /* 32-bit extent-start container, ee_block */ 2273 /*
2268 res = 1LL << 32; 2274 * 32-bit extent-start container, ee_block. We lower the maxbytes
2275 * by one fs block, so ee_len can cover the extent of maximum file
2276 * size
2277 */
2278 res = (1LL << 32) - 1;
2269 res <<= blkbits; 2279 res <<= blkbits;
2270 res -= 1;
2271 2280
2272 /* Sanity check against vm- & vfs- imposed limits */ 2281 /* Sanity check against vm- & vfs- imposed limits */
2273 if (res > upper_limit) 2282 if (res > upper_limit)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7257752b6d5d..7018e1d8902d 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -102,7 +102,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
102 if (attr & ATTR_SYS) 102 if (attr & ATTR_SYS)
103 inode->i_flags |= S_IMMUTABLE; 103 inode->i_flags |= S_IMMUTABLE;
104 else 104 else
105 inode->i_flags &= S_IMMUTABLE; 105 inode->i_flags &= ~S_IMMUTABLE;
106 } 106 }
107 107
108 fat_save_attrs(inode, attr); 108 fat_save_attrs(inode, attr);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cc6ec4b2f0ff..38f84cd48b67 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -921,6 +921,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
921 if (sb->s_flags & MS_MANDLOCK) 921 if (sb->s_flags & MS_MANDLOCK)
922 goto err; 922 goto err;
923 923
924 sb->s_flags &= ~MS_NOSEC;
925
924 if (!parse_fuse_opt((char *) data, &d, is_bdev)) 926 if (!parse_fuse_opt((char *) data, &d, is_bdev))
925 goto err; 927 goto err;
926 928
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2792a790e50b..1c1336e7b3b2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -663,14 +663,19 @@ static void glock_work_func(struct work_struct *work)
663 drop_ref = 1; 663 drop_ref = 1;
664 } 664 }
665 spin_lock(&gl->gl_spin); 665 spin_lock(&gl->gl_spin);
666 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 666 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
667 gl->gl_state != LM_ST_UNLOCKED && 667 gl->gl_state != LM_ST_UNLOCKED &&
668 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 668 gl->gl_demote_state != LM_ST_EXCLUSIVE) {
669 unsigned long holdtime, now = jiffies; 669 unsigned long holdtime, now = jiffies;
670
670 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 671 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
671 if (time_before(now, holdtime)) 672 if (time_before(now, holdtime))
672 delay = holdtime - now; 673 delay = holdtime - now;
673 set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); 674
675 if (!delay) {
676 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
677 set_bit(GLF_DEMOTE, &gl->gl_flags);
678 }
674 } 679 }
675 run_queue(gl, 0); 680 run_queue(gl, 0);
676 spin_unlock(&gl->gl_spin); 681 spin_unlock(&gl->gl_spin);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3db5ba4568fc..b3cc8586984e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -974,7 +974,7 @@ out_no_inode:
974out_no_read: 974out_no_read:
975 printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", 975 printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
976 __func__, s->s_id, iso_blknum, block); 976 __func__, s->s_id, iso_blknum, block);
977 goto out_freesbi; 977 goto out_freebh;
978out_bad_zone_size: 978out_bad_zone_size:
979 printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", 979 printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
980 sbi->s_log_zone_size); 980 sbi->s_log_zone_size);
@@ -989,6 +989,7 @@ out_unknown_format:
989 989
990out_freebh: 990out_freebh:
991 brelse(bh); 991 brelse(bh);
992 brelse(pri_bh);
992out_freesbi: 993out_freesbi:
993 kfree(opt.iocharset); 994 kfree(opt.iocharset);
994 kfree(sbi); 995 kfree(sbi);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6a79fd0a1a32..2c62c5aae82f 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
97 97
98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /*
101 * Get our reference so that bh cannot be freed before
102 * we unlock it
103 */
104 get_bh(bh);
100 JBUFFER_TRACE(jh, "remove from checkpoint list"); 105 JBUFFER_TRACE(jh, "remove from checkpoint list");
101 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 106 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
102 jbd_unlock_bh_state(bh); 107 jbd_unlock_bh_state(bh);
103 jbd2_journal_remove_journal_head(bh);
104 BUFFER_TRACE(bh, "release"); 108 BUFFER_TRACE(bh, "release");
105 __brelse(bh); 109 __brelse(bh);
106 } else { 110 } else {
@@ -223,8 +227,8 @@ restart:
223 spin_lock(&journal->j_list_lock); 227 spin_lock(&journal->j_list_lock);
224 goto restart; 228 goto restart;
225 } 229 }
230 get_bh(bh);
226 if (buffer_locked(bh)) { 231 if (buffer_locked(bh)) {
227 atomic_inc(&bh->b_count);
228 spin_unlock(&journal->j_list_lock); 232 spin_unlock(&journal->j_list_lock);
229 jbd_unlock_bh_state(bh); 233 jbd_unlock_bh_state(bh);
230 wait_on_buffer(bh); 234 wait_on_buffer(bh);
@@ -243,7 +247,6 @@ restart:
243 */ 247 */
244 released = __jbd2_journal_remove_checkpoint(jh); 248 released = __jbd2_journal_remove_checkpoint(jh);
245 jbd_unlock_bh_state(bh); 249 jbd_unlock_bh_state(bh);
246 jbd2_journal_remove_journal_head(bh);
247 __brelse(bh); 250 __brelse(bh);
248 } 251 }
249 252
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
284 int ret = 0; 287 int ret = 0;
285 288
286 if (buffer_locked(bh)) { 289 if (buffer_locked(bh)) {
287 atomic_inc(&bh->b_count); 290 get_bh(bh);
288 spin_unlock(&journal->j_list_lock); 291 spin_unlock(&journal->j_list_lock);
289 jbd_unlock_bh_state(bh); 292 jbd_unlock_bh_state(bh);
290 wait_on_buffer(bh); 293 wait_on_buffer(bh);
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
316 ret = 1; 319 ret = 1;
317 if (unlikely(buffer_write_io_error(bh))) 320 if (unlikely(buffer_write_io_error(bh)))
318 ret = -EIO; 321 ret = -EIO;
322 get_bh(bh);
319 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 323 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
320 BUFFER_TRACE(bh, "remove from checkpoint"); 324 BUFFER_TRACE(bh, "remove from checkpoint");
321 __jbd2_journal_remove_checkpoint(jh); 325 __jbd2_journal_remove_checkpoint(jh);
322 spin_unlock(&journal->j_list_lock); 326 spin_unlock(&journal->j_list_lock);
323 jbd_unlock_bh_state(bh); 327 jbd_unlock_bh_state(bh);
324 jbd2_journal_remove_journal_head(bh);
325 __brelse(bh); 328 __brelse(bh);
326 } else { 329 } else {
327 /* 330 /*
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
554/* 557/*
555 * journal_clean_one_cp_list 558 * journal_clean_one_cp_list
556 * 559 *
557 * Find all the written-back checkpoint buffers in the given list and release them. 560 * Find all the written-back checkpoint buffers in the given list and
561 * release them.
558 * 562 *
559 * Called with the journal locked. 563 * Called with the journal locked.
560 * Called with j_list_lock held. 564 * Called with j_list_lock held.
@@ -663,8 +667,8 @@ out:
663 * checkpoint lists. 667 * checkpoint lists.
664 * 668 *
665 * The function returns 1 if it frees the transaction, 0 otherwise. 669 * The function returns 1 if it frees the transaction, 0 otherwise.
670 * The function can free jh and bh.
666 * 671 *
667 * This function is called with the journal locked.
668 * This function is called with j_list_lock held. 672 * This function is called with j_list_lock held.
669 * This function is called with jbd_lock_bh_state(jh2bh(jh)) 673 * This function is called with jbd_lock_bh_state(jh2bh(jh))
670 */ 674 */
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
684 } 688 }
685 journal = transaction->t_journal; 689 journal = transaction->t_journal;
686 690
691 JBUFFER_TRACE(jh, "removing from transaction");
687 __buffer_unlink(jh); 692 __buffer_unlink(jh);
688 jh->b_cp_transaction = NULL; 693 jh->b_cp_transaction = NULL;
694 jbd2_journal_put_journal_head(jh);
689 695
690 if (transaction->t_checkpoint_list != NULL || 696 if (transaction->t_checkpoint_list != NULL ||
691 transaction->t_checkpoint_io_list != NULL) 697 transaction->t_checkpoint_io_list != NULL)
692 goto out; 698 goto out;
693 JBUFFER_TRACE(jh, "transaction has no more buffers");
694 699
695 /* 700 /*
696 * There is one special case to worry about: if we have just pulled the 701 * There is one special case to worry about: if we have just pulled the
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
701 * The locking here around t_state is a bit sleazy. 706 * The locking here around t_state is a bit sleazy.
702 * See the comment at the end of jbd2_journal_commit_transaction(). 707 * See the comment at the end of jbd2_journal_commit_transaction().
703 */ 708 */
704 if (transaction->t_state != T_FINISHED) { 709 if (transaction->t_state != T_FINISHED)
705 JBUFFER_TRACE(jh, "belongs to running/committing transaction");
706 goto out; 710 goto out;
707 }
708 711
709 /* OK, that was the last buffer for the transaction: we can now 712 /* OK, that was the last buffer for the transaction: we can now
710 safely remove this transaction from the log */ 713 safely remove this transaction from the log */
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
723 wake_up(&journal->j_wait_logspace); 726 wake_up(&journal->j_wait_logspace);
724 ret = 1; 727 ret = 1;
725out: 728out:
726 JBUFFER_TRACE(jh, "exit");
727 return ret; 729 return ret;
728} 730}
729 731
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
742 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); 744 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
743 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 745 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
744 746
747 /* Get reference for checkpointing transaction */
748 jbd2_journal_grab_journal_head(jh2bh(jh));
745 jh->b_cp_transaction = transaction; 749 jh->b_cp_transaction = transaction;
746 750
747 if (!transaction->t_checkpoint_list) { 751 if (!transaction->t_checkpoint_list) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7f21cf3aaf92..eef6979821a4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -848,10 +848,16 @@ restart_loop:
848 while (commit_transaction->t_forget) { 848 while (commit_transaction->t_forget) {
849 transaction_t *cp_transaction; 849 transaction_t *cp_transaction;
850 struct buffer_head *bh; 850 struct buffer_head *bh;
851 int try_to_free = 0;
851 852
852 jh = commit_transaction->t_forget; 853 jh = commit_transaction->t_forget;
853 spin_unlock(&journal->j_list_lock); 854 spin_unlock(&journal->j_list_lock);
854 bh = jh2bh(jh); 855 bh = jh2bh(jh);
856 /*
857 * Get a reference so that bh cannot be freed before we are
858 * done with it.
859 */
860 get_bh(bh);
855 jbd_lock_bh_state(bh); 861 jbd_lock_bh_state(bh);
856 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); 862 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
857 863
@@ -914,28 +920,27 @@ restart_loop:
914 __jbd2_journal_insert_checkpoint(jh, commit_transaction); 920 __jbd2_journal_insert_checkpoint(jh, commit_transaction);
915 if (is_journal_aborted(journal)) 921 if (is_journal_aborted(journal))
916 clear_buffer_jbddirty(bh); 922 clear_buffer_jbddirty(bh);
917 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
918 __jbd2_journal_refile_buffer(jh);
919 jbd_unlock_bh_state(bh);
920 } else { 923 } else {
921 J_ASSERT_BH(bh, !buffer_dirty(bh)); 924 J_ASSERT_BH(bh, !buffer_dirty(bh));
922 /* The buffer on BJ_Forget list and not jbddirty means 925 /*
926 * The buffer on BJ_Forget list and not jbddirty means
923 * it has been freed by this transaction and hence it 927 * it has been freed by this transaction and hence it
924 * could not have been reallocated until this 928 * could not have been reallocated until this
925 * transaction has committed. *BUT* it could be 929 * transaction has committed. *BUT* it could be
926 * reallocated once we have written all the data to 930 * reallocated once we have written all the data to
927 * disk and before we process the buffer on BJ_Forget 931 * disk and before we process the buffer on BJ_Forget
928 * list. */ 932 * list.
929 JBUFFER_TRACE(jh, "refile or unfile freed buffer"); 933 */
930 __jbd2_journal_refile_buffer(jh); 934 if (!jh->b_next_transaction)
931 if (!jh->b_transaction) { 935 try_to_free = 1;
932 jbd_unlock_bh_state(bh);
933 /* needs a brelse */
934 jbd2_journal_remove_journal_head(bh);
935 release_buffer_page(bh);
936 } else
937 jbd_unlock_bh_state(bh);
938 } 936 }
937 JBUFFER_TRACE(jh, "refile or unfile buffer");
938 __jbd2_journal_refile_buffer(jh);
939 jbd_unlock_bh_state(bh);
940 if (try_to_free)
941 release_buffer_page(bh); /* Drops bh reference */
942 else
943 __brelse(bh);
939 cond_resched_lock(&journal->j_list_lock); 944 cond_resched_lock(&journal->j_list_lock);
940 } 945 }
941 spin_unlock(&journal->j_list_lock); 946 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9a7826990304..0dfa5b598e68 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh)
2078 * When a buffer has its BH_JBD bit set it is immune from being released by 2078 * When a buffer has its BH_JBD bit set it is immune from being released by
2079 * core kernel code, mainly via ->b_count. 2079 * core kernel code, mainly via ->b_count.
2080 * 2080 *
2081 * A journal_head may be detached from its buffer_head when the journal_head's 2081 * A journal_head is detached from its buffer_head when the journal_head's
2082 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 2082 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
2083 * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the 2083 * transaction (b_cp_transaction) hold their references to b_jcount.
2084 * journal_head can be dropped if needed.
2085 * 2084 *
2086 * Various places in the kernel want to attach a journal_head to a buffer_head 2085 * Various places in the kernel want to attach a journal_head to a buffer_head
2087 * _before_ attaching the journal_head to a transaction. To protect the 2086 * _before_ attaching the journal_head to a transaction. To protect the
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh)
2094 * (Attach a journal_head if needed. Increments b_jcount) 2093 * (Attach a journal_head if needed. Increments b_jcount)
2095 * struct journal_head *jh = jbd2_journal_add_journal_head(bh); 2094 * struct journal_head *jh = jbd2_journal_add_journal_head(bh);
2096 * ... 2095 * ...
2096 * (Get another reference for transaction)
2097 * jbd2_journal_grab_journal_head(bh);
2097 * jh->b_transaction = xxx; 2098 * jh->b_transaction = xxx;
2099 * (Put original reference)
2098 * jbd2_journal_put_journal_head(jh); 2100 * jbd2_journal_put_journal_head(jh);
2099 *
2100 * Now, the journal_head's b_jcount is zero, but it is safe from being released
2101 * because it has a non-zero b_transaction.
2102 */ 2101 */
2103 2102
2104/* 2103/*
2105 * Give a buffer_head a journal_head. 2104 * Give a buffer_head a journal_head.
2106 * 2105 *
2107 * Doesn't need the journal lock.
2108 * May sleep. 2106 * May sleep.
2109 */ 2107 */
2110struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) 2108struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
2168 struct journal_head *jh = bh2jh(bh); 2166 struct journal_head *jh = bh2jh(bh);
2169 2167
2170 J_ASSERT_JH(jh, jh->b_jcount >= 0); 2168 J_ASSERT_JH(jh, jh->b_jcount >= 0);
2171 2169 J_ASSERT_JH(jh, jh->b_transaction == NULL);
2172 get_bh(bh); 2170 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2173 if (jh->b_jcount == 0) { 2171 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
2174 if (jh->b_transaction == NULL && 2172 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2175 jh->b_next_transaction == NULL && 2173 J_ASSERT_BH(bh, buffer_jbd(bh));
2176 jh->b_cp_transaction == NULL) { 2174 J_ASSERT_BH(bh, jh2bh(jh) == bh);
2177 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 2175 BUFFER_TRACE(bh, "remove journal_head");
2178 J_ASSERT_BH(bh, buffer_jbd(bh)); 2176 if (jh->b_frozen_data) {
2179 J_ASSERT_BH(bh, jh2bh(jh) == bh); 2177 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2180 BUFFER_TRACE(bh, "remove journal_head"); 2178 jbd2_free(jh->b_frozen_data, bh->b_size);
2181 if (jh->b_frozen_data) {
2182 printk(KERN_WARNING "%s: freeing "
2183 "b_frozen_data\n",
2184 __func__);
2185 jbd2_free(jh->b_frozen_data, bh->b_size);
2186 }
2187 if (jh->b_committed_data) {
2188 printk(KERN_WARNING "%s: freeing "
2189 "b_committed_data\n",
2190 __func__);
2191 jbd2_free(jh->b_committed_data, bh->b_size);
2192 }
2193 bh->b_private = NULL;
2194 jh->b_bh = NULL; /* debug, really */
2195 clear_buffer_jbd(bh);
2196 __brelse(bh);
2197 journal_free_journal_head(jh);
2198 } else {
2199 BUFFER_TRACE(bh, "journal_head was locked");
2200 }
2201 } 2179 }
2180 if (jh->b_committed_data) {
2181 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
2182 jbd2_free(jh->b_committed_data, bh->b_size);
2183 }
2184 bh->b_private = NULL;
2185 jh->b_bh = NULL; /* debug, really */
2186 clear_buffer_jbd(bh);
2187 journal_free_journal_head(jh);
2202} 2188}
2203 2189
2204/* 2190/*
2205 * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction 2191 * Drop a reference on the passed journal_head. If it fell to zero then
2206 * and has a zero b_jcount then remove and release its journal_head. If we did
2207 * see that the buffer is not used by any transaction we also "logically"
2208 * decrement ->b_count.
2209 *
2210 * We in fact take an additional increment on ->b_count as a convenience,
2211 * because the caller usually wants to do additional things with the bh
2212 * after calling here.
2213 * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
2214 * time. Once the caller has run __brelse(), the buffer is eligible for
2215 * reaping by try_to_free_buffers().
2216 */
2217void jbd2_journal_remove_journal_head(struct buffer_head *bh)
2218{
2219 jbd_lock_bh_journal_head(bh);
2220 __journal_remove_journal_head(bh);
2221 jbd_unlock_bh_journal_head(bh);
2222}
2223
2224/*
2225 * Drop a reference on the passed journal_head. If it fell to zero then try to
2226 * release the journal_head from the buffer_head. 2192 * release the journal_head from the buffer_head.
2227 */ 2193 */
2228void jbd2_journal_put_journal_head(struct journal_head *jh) 2194void jbd2_journal_put_journal_head(struct journal_head *jh)
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
2232 jbd_lock_bh_journal_head(bh); 2198 jbd_lock_bh_journal_head(bh);
2233 J_ASSERT_JH(jh, jh->b_jcount > 0); 2199 J_ASSERT_JH(jh, jh->b_jcount > 0);
2234 --jh->b_jcount; 2200 --jh->b_jcount;
2235 if (!jh->b_jcount && !jh->b_transaction) { 2201 if (!jh->b_jcount) {
2236 __journal_remove_journal_head(bh); 2202 __journal_remove_journal_head(bh);
2203 jbd_unlock_bh_journal_head(bh);
2237 __brelse(bh); 2204 __brelse(bh);
2238 } 2205 } else
2239 jbd_unlock_bh_journal_head(bh); 2206 jbd_unlock_bh_journal_head(bh);
2240} 2207}
2241 2208
2242/* 2209/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3eec82d32fd4..2d7109414cdd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31 31
32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
33static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
33 34
34/* 35/*
35 * jbd2_get_transaction: obtain a new transaction_t object. 36 * jbd2_get_transaction: obtain a new transaction_t object.
@@ -764,7 +765,6 @@ repeat:
764 if (!jh->b_transaction) { 765 if (!jh->b_transaction) {
765 JBUFFER_TRACE(jh, "no transaction"); 766 JBUFFER_TRACE(jh, "no transaction");
766 J_ASSERT_JH(jh, !jh->b_next_transaction); 767 J_ASSERT_JH(jh, !jh->b_next_transaction);
767 jh->b_transaction = transaction;
768 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 768 JBUFFER_TRACE(jh, "file as BJ_Reserved");
769 spin_lock(&journal->j_list_lock); 769 spin_lock(&journal->j_list_lock);
770 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 770 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -814,7 +814,6 @@ out:
814 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. 814 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
815 * @handle: transaction to add buffer modifications to 815 * @handle: transaction to add buffer modifications to
816 * @bh: bh to be used for metadata writes 816 * @bh: bh to be used for metadata writes
817 * @credits: variable that will receive credits for the buffer
818 * 817 *
819 * Returns an error code or 0 on success. 818 * Returns an error code or 0 on success.
820 * 819 *
@@ -896,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
896 * committed and so it's safe to clear the dirty bit. 895 * committed and so it's safe to clear the dirty bit.
897 */ 896 */
898 clear_buffer_dirty(jh2bh(jh)); 897 clear_buffer_dirty(jh2bh(jh));
899 jh->b_transaction = transaction;
900
901 /* first access by this transaction */ 898 /* first access by this transaction */
902 jh->b_modified = 0; 899 jh->b_modified = 0;
903 900
@@ -932,7 +929,6 @@ out:
932 * non-rewindable consequences 929 * non-rewindable consequences
933 * @handle: transaction 930 * @handle: transaction
934 * @bh: buffer to undo 931 * @bh: buffer to undo
935 * @credits: store the number of taken credits here (if not NULL)
936 * 932 *
937 * Sometimes there is a need to distinguish between metadata which has 933 * Sometimes there is a need to distinguish between metadata which has
938 * been committed to disk and that which has not. The ext3fs code uses 934 * been committed to disk and that which has not. The ext3fs code uses
@@ -1232,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1232 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1228 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1233 } else { 1229 } else {
1234 __jbd2_journal_unfile_buffer(jh); 1230 __jbd2_journal_unfile_buffer(jh);
1235 jbd2_journal_remove_journal_head(bh);
1236 __brelse(bh);
1237 if (!buffer_jbd(bh)) { 1231 if (!buffer_jbd(bh)) {
1238 spin_unlock(&journal->j_list_lock); 1232 spin_unlock(&journal->j_list_lock);
1239 jbd_unlock_bh_state(bh); 1233 jbd_unlock_bh_state(bh);
@@ -1556,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1556 mark_buffer_dirty(bh); /* Expose it to the VM */ 1550 mark_buffer_dirty(bh); /* Expose it to the VM */
1557} 1551}
1558 1552
1559void __jbd2_journal_unfile_buffer(struct journal_head *jh) 1553/*
1554 * Remove buffer from all transactions.
1555 *
1556 * Called with bh_state lock and j_list_lock
1557 *
1558 * jh and bh may be already freed when this function returns.
1559 */
1560static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
1560{ 1561{
1561 __jbd2_journal_temp_unlink_buffer(jh); 1562 __jbd2_journal_temp_unlink_buffer(jh);
1562 jh->b_transaction = NULL; 1563 jh->b_transaction = NULL;
1564 jbd2_journal_put_journal_head(jh);
1563} 1565}
1564 1566
1565void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1567void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1566{ 1568{
1567 jbd_lock_bh_state(jh2bh(jh)); 1569 struct buffer_head *bh = jh2bh(jh);
1570
1571 /* Get reference so that buffer cannot be freed before we unlock it */
1572 get_bh(bh);
1573 jbd_lock_bh_state(bh);
1568 spin_lock(&journal->j_list_lock); 1574 spin_lock(&journal->j_list_lock);
1569 __jbd2_journal_unfile_buffer(jh); 1575 __jbd2_journal_unfile_buffer(jh);
1570 spin_unlock(&journal->j_list_lock); 1576 spin_unlock(&journal->j_list_lock);
1571 jbd_unlock_bh_state(jh2bh(jh)); 1577 jbd_unlock_bh_state(bh);
1578 __brelse(bh);
1572} 1579}
1573 1580
1574/* 1581/*
@@ -1595,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1595 if (jh->b_jlist == BJ_None) { 1602 if (jh->b_jlist == BJ_None) {
1596 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1603 JBUFFER_TRACE(jh, "remove from checkpoint list");
1597 __jbd2_journal_remove_checkpoint(jh); 1604 __jbd2_journal_remove_checkpoint(jh);
1598 jbd2_journal_remove_journal_head(bh);
1599 __brelse(bh);
1600 } 1605 }
1601 } 1606 }
1602 spin_unlock(&journal->j_list_lock); 1607 spin_unlock(&journal->j_list_lock);
@@ -1659,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1659 /* 1664 /*
1660 * We take our own ref against the journal_head here to avoid 1665 * We take our own ref against the journal_head here to avoid
1661 * having to add tons of locking around each instance of 1666 * having to add tons of locking around each instance of
1662 * jbd2_journal_remove_journal_head() and
1663 * jbd2_journal_put_journal_head(). 1667 * jbd2_journal_put_journal_head().
1664 */ 1668 */
1665 jh = jbd2_journal_grab_journal_head(bh); 1669 jh = jbd2_journal_grab_journal_head(bh);
@@ -1697,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1697 int may_free = 1; 1701 int may_free = 1;
1698 struct buffer_head *bh = jh2bh(jh); 1702 struct buffer_head *bh = jh2bh(jh);
1699 1703
1700 __jbd2_journal_unfile_buffer(jh);
1701
1702 if (jh->b_cp_transaction) { 1704 if (jh->b_cp_transaction) {
1703 JBUFFER_TRACE(jh, "on running+cp transaction"); 1705 JBUFFER_TRACE(jh, "on running+cp transaction");
1706 __jbd2_journal_temp_unlink_buffer(jh);
1704 /* 1707 /*
1705 * We don't want to write the buffer anymore, clear the 1708 * We don't want to write the buffer anymore, clear the
1706 * bit so that we don't confuse checks in 1709 * bit so that we don't confuse checks in
@@ -1711,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1711 may_free = 0; 1714 may_free = 0;
1712 } else { 1715 } else {
1713 JBUFFER_TRACE(jh, "on running transaction"); 1716 JBUFFER_TRACE(jh, "on running transaction");
1714 jbd2_journal_remove_journal_head(bh); 1717 __jbd2_journal_unfile_buffer(jh);
1715 __brelse(bh);
1716 } 1718 }
1717 return may_free; 1719 return may_free;
1718} 1720}
@@ -1990,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1990 1992
1991 if (jh->b_transaction) 1993 if (jh->b_transaction)
1992 __jbd2_journal_temp_unlink_buffer(jh); 1994 __jbd2_journal_temp_unlink_buffer(jh);
1995 else
1996 jbd2_journal_grab_journal_head(bh);
1993 jh->b_transaction = transaction; 1997 jh->b_transaction = transaction;
1994 1998
1995 switch (jlist) { 1999 switch (jlist) {
@@ -2041,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
2041 * already started to be used by a subsequent transaction, refile the 2045 * already started to be used by a subsequent transaction, refile the
2042 * buffer on that transaction's metadata list. 2046 * buffer on that transaction's metadata list.
2043 * 2047 *
2044 * Called under journal->j_list_lock 2048 * Called under j_list_lock
2045 *
2046 * Called under jbd_lock_bh_state(jh2bh(jh)) 2049 * Called under jbd_lock_bh_state(jh2bh(jh))
2050 *
2051 * jh and bh may be already free when this function returns
2047 */ 2052 */
2048void __jbd2_journal_refile_buffer(struct journal_head *jh) 2053void __jbd2_journal_refile_buffer(struct journal_head *jh)
2049{ 2054{
@@ -2067,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2067 2072
2068 was_dirty = test_clear_buffer_jbddirty(bh); 2073 was_dirty = test_clear_buffer_jbddirty(bh);
2069 __jbd2_journal_temp_unlink_buffer(jh); 2074 __jbd2_journal_temp_unlink_buffer(jh);
2075 /*
2076 * We set b_transaction here because b_next_transaction will inherit
2077 * our jh reference and thus __jbd2_journal_file_buffer() must not
2078 * take a new one.
2079 */
2070 jh->b_transaction = jh->b_next_transaction; 2080 jh->b_transaction = jh->b_next_transaction;
2071 jh->b_next_transaction = NULL; 2081 jh->b_next_transaction = NULL;
2072 if (buffer_freed(bh)) 2082 if (buffer_freed(bh))
@@ -2083,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2083} 2093}
2084 2094
2085/* 2095/*
2086 * For the unlocked version of this call, also make sure that any 2096 * __jbd2_journal_refile_buffer() with necessary locking added. We take our
2087 * hanging journal_head is cleaned up if necessary. 2097 * bh reference so that we can safely unlock bh.
2088 * 2098 *
2089 * __jbd2_journal_refile_buffer is usually called as part of a single locked 2099 * The jh and bh may be freed by this call.
2090 * operation on a buffer_head, in which the caller is probably going to
2091 * be hooking the journal_head onto other lists. In that case it is up
2092 * to the caller to remove the journal_head if necessary. For the
2093 * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
2094 * doing anything else to the buffer so we need to do the cleanup
2095 * ourselves to avoid a jh leak.
2096 *
2097 * *** The journal_head may be freed by this call! ***
2098 */ 2100 */
2099void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2101void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2100{ 2102{
2101 struct buffer_head *bh = jh2bh(jh); 2103 struct buffer_head *bh = jh2bh(jh);
2102 2104
2105 /* Get reference so that buffer cannot be freed before we unlock it */
2106 get_bh(bh);
2103 jbd_lock_bh_state(bh); 2107 jbd_lock_bh_state(bh);
2104 spin_lock(&journal->j_list_lock); 2108 spin_lock(&journal->j_list_lock);
2105
2106 __jbd2_journal_refile_buffer(jh); 2109 __jbd2_journal_refile_buffer(jh);
2107 jbd_unlock_bh_state(bh); 2110 jbd_unlock_bh_state(bh);
2108 jbd2_journal_remove_journal_head(bh);
2109
2110 spin_unlock(&journal->j_list_lock); 2111 spin_unlock(&journal->j_list_lock);
2111 __brelse(bh); 2112 __brelse(bh);
2112} 2113}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index c5ce6c1d1ff4..2f3f531f3606 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -66,9 +66,9 @@ static int jfs_open(struct inode *inode, struct file *file)
66 struct jfs_inode_info *ji = JFS_IP(inode); 66 struct jfs_inode_info *ji = JFS_IP(inode);
67 spin_lock_irq(&ji->ag_lock); 67 spin_lock_irq(&ji->ag_lock);
68 if (ji->active_ag == -1) { 68 if (ji->active_ag == -1) {
69 ji->active_ag = ji->agno; 69 struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb);
70 atomic_inc( 70 ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb);
71 &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]); 71 atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]);
72 } 72 }
73 spin_unlock_irq(&ji->ag_lock); 73 spin_unlock_irq(&ji->ag_lock);
74 } 74 }
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ed53a4740168..b78b2f978f04 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -397,7 +397,7 @@ int diRead(struct inode *ip)
397 release_metapage(mp); 397 release_metapage(mp);
398 398
399 /* set the ag for the inode */ 399 /* set the ag for the inode */
400 JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); 400 JFS_IP(ip)->agstart = agstart;
401 JFS_IP(ip)->active_ag = -1; 401 JFS_IP(ip)->active_ag = -1;
402 402
403 return (rc); 403 return (rc);
@@ -901,7 +901,7 @@ int diFree(struct inode *ip)
901 901
902 /* get the allocation group for this ino. 902 /* get the allocation group for this ino.
903 */ 903 */
904 agno = JFS_IP(ip)->agno; 904 agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
905 905
906 /* Lock the AG specific inode map information 906 /* Lock the AG specific inode map information
907 */ 907 */
@@ -1315,12 +1315,11 @@ int diFree(struct inode *ip)
1315static inline void 1315static inline void
1316diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1316diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1317{ 1317{
1318 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
1319 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1318 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1320 1319
1321 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1320 ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1322 jfs_ip->ixpxd = iagp->inoext[extno]; 1321 jfs_ip->ixpxd = iagp->inoext[extno];
1323 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 1322 jfs_ip->agstart = le64_to_cpu(iagp->agstart);
1324 jfs_ip->active_ag = -1; 1323 jfs_ip->active_ag = -1;
1325} 1324}
1326 1325
@@ -1379,7 +1378,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1379 */ 1378 */
1380 1379
1381 /* get the ag number of this iag */ 1380 /* get the ag number of this iag */
1382 agno = JFS_IP(pip)->agno; 1381 agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
1383 1382
1384 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1383 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1385 /* 1384 /*
@@ -2921,10 +2920,9 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2921 continue; 2920 continue;
2922 } 2921 }
2923 2922
2924 /* agstart that computes to the same ag is treated as same; */
2925 agstart = le64_to_cpu(iagp->agstart); 2923 agstart = le64_to_cpu(iagp->agstart);
2926 /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */
2927 n = agstart >> mp->db_agl2size; 2924 n = agstart >> mp->db_agl2size;
2925 iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
2928 2926
2929 /* compute backed inodes */ 2927 /* compute backed inodes */
2930 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2928 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1439f119ec83..584a4a1a6e81 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -50,8 +50,9 @@ struct jfs_inode_info {
50 short btindex; /* btpage entry index*/ 50 short btindex; /* btpage entry index*/
51 struct inode *ipimap; /* inode map */ 51 struct inode *ipimap; /* inode map */
52 unsigned long cflag; /* commit flags */ 52 unsigned long cflag; /* commit flags */
53 u64 agstart; /* agstart of the containing IAG */
53 u16 bxflag; /* xflag of pseudo buffer? */ 54 u16 bxflag; /* xflag of pseudo buffer? */
54 unchar agno; /* ag number */ 55 unchar pad;
55 signed char active_ag; /* ag currently allocating from */ 56 signed char active_ag; /* ag currently allocating from */
56 lid_t blid; /* lid of pseudo buffer? */ 57 lid_t blid; /* lid of pseudo buffer? */
57 lid_t atlhead; /* anonymous tlock list head */ 58 lid_t atlhead; /* anonymous tlock list head */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 278e3fb40b71..583636f745e5 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1123,7 +1123,7 @@ int lmLogOpen(struct super_block *sb)
1123 bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 1123 bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1124 log); 1124 log);
1125 if (IS_ERR(bdev)) { 1125 if (IS_ERR(bdev)) {
1126 rc = -PTR_ERR(bdev); 1126 rc = PTR_ERR(bdev);
1127 goto free; 1127 goto free;
1128 } 1128 }
1129 1129
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 8ea5efb5a34e..8d0c1c7c0820 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -80,7 +80,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
80 int log_formatted = 0; 80 int log_formatted = 0;
81 struct inode *iplist[1]; 81 struct inode *iplist[1];
82 struct jfs_superblock *j_sb, *j_sb2; 82 struct jfs_superblock *j_sb, *j_sb2;
83 uint old_agsize; 83 s64 old_agsize;
84 int agsizechanged = 0; 84 int agsizechanged = 0;
85 struct buffer_head *bh, *bh2; 85 struct buffer_head *bh, *bh2;
86 86
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index adb45ec9038c..e374050a911c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -708,7 +708,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
708 708
709 if (task->tk_status < 0) { 709 if (task->tk_status < 0) {
710 dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); 710 dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
711 goto retry_rebind; 711 switch (task->tk_status) {
712 case -EACCES:
713 case -EIO:
714 goto die;
715 default:
716 goto retry_rebind;
717 }
712 } 718 }
713 if (status == NLM_LCK_DENIED_GRACE_PERIOD) { 719 if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
714 rpc_delay(task, NLMCLNT_GRACE_WAIT); 720 rpc_delay(task, NLMCLNT_GRACE_WAIT);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 9ed89d1663f8..1afae26cf236 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
555 return __logfs_create(dir, dentry, inode, target, destlen); 555 return __logfs_create(dir, dentry, inode, target, destlen);
556} 556}
557 557
558static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
559{
560 if (flags & IPERM_FLAG_RCU)
561 return -ECHILD;
562 return generic_permission(inode, mask, flags, NULL);
563}
564
565static int logfs_link(struct dentry *old_dentry, struct inode *dir, 558static int logfs_link(struct dentry *old_dentry, struct inode *dir,
566 struct dentry *dentry) 559 struct dentry *dentry)
567{ 560{
@@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = {
820 .mknod = logfs_mknod, 813 .mknod = logfs_mknod,
821 .rename = logfs_rename, 814 .rename = logfs_rename,
822 .rmdir = logfs_rmdir, 815 .rmdir = logfs_rmdir,
823 .permission = logfs_permission,
824 .symlink = logfs_symlink, 816 .symlink = logfs_symlink,
825 .unlink = logfs_unlink, 817 .unlink = logfs_unlink,
826}; 818};
diff --git a/fs/namei.c b/fs/namei.c
index e2e4e8d032ee..0223c41fb114 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
238 238
239 /* 239 /*
240 * Read/write DACs are always overridable. 240 * Read/write DACs are always overridable.
241 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable for all directories and
242 * for non-directories that have least one exec bit set.
242 */ 243 */
243 if (!(mask & MAY_EXEC) || execute_ok(inode)) 244 if (!(mask & MAY_EXEC) || execute_ok(inode))
244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) 245 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
@@ -812,6 +813,11 @@ static int follow_automount(struct path *path, unsigned flags,
812 if (!mnt) /* mount collision */ 813 if (!mnt) /* mount collision */
813 return 0; 814 return 0;
814 815
816 if (!*need_mntput) {
817 /* lock_mount() may release path->mnt on error */
818 mntget(path->mnt);
819 *need_mntput = true;
820 }
815 err = finish_automount(mnt, path); 821 err = finish_automount(mnt, path);
816 822
817 switch (err) { 823 switch (err) {
@@ -819,12 +825,9 @@ static int follow_automount(struct path *path, unsigned flags,
819 /* Someone else made a mount here whilst we were busy */ 825 /* Someone else made a mount here whilst we were busy */
820 return 0; 826 return 0;
821 case 0: 827 case 0:
822 dput(path->dentry); 828 path_put(path);
823 if (*need_mntput)
824 mntput(path->mnt);
825 path->mnt = mnt; 829 path->mnt = mnt;
826 path->dentry = dget(mnt->mnt_root); 830 path->dentry = dget(mnt->mnt_root);
827 *need_mntput = true;
828 return 0; 831 return 0;
829 default: 832 default:
830 return err; 833 return err;
@@ -844,9 +847,10 @@ static int follow_automount(struct path *path, unsigned flags,
844 */ 847 */
845static int follow_managed(struct path *path, unsigned flags) 848static int follow_managed(struct path *path, unsigned flags)
846{ 849{
850 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
847 unsigned managed; 851 unsigned managed;
848 bool need_mntput = false; 852 bool need_mntput = false;
849 int ret; 853 int ret = 0;
850 854
851 /* Given that we're not holding a lock here, we retain the value in a 855 /* Given that we're not holding a lock here, we retain the value in a
852 * local variable for each dentry as we look at it so that we don't see 856 * local variable for each dentry as we look at it so that we don't see
@@ -861,7 +865,7 @@ static int follow_managed(struct path *path, unsigned flags)
861 BUG_ON(!path->dentry->d_op->d_manage); 865 BUG_ON(!path->dentry->d_op->d_manage);
862 ret = path->dentry->d_op->d_manage(path->dentry, false); 866 ret = path->dentry->d_op->d_manage(path->dentry, false);
863 if (ret < 0) 867 if (ret < 0)
864 return ret == -EISDIR ? 0 : ret; 868 break;
865 } 869 }
866 870
867 /* Transit to a mounted filesystem. */ 871 /* Transit to a mounted filesystem. */
@@ -887,14 +891,19 @@ static int follow_managed(struct path *path, unsigned flags)
887 if (managed & DCACHE_NEED_AUTOMOUNT) { 891 if (managed & DCACHE_NEED_AUTOMOUNT) {
888 ret = follow_automount(path, flags, &need_mntput); 892 ret = follow_automount(path, flags, &need_mntput);
889 if (ret < 0) 893 if (ret < 0)
890 return ret == -EISDIR ? 0 : ret; 894 break;
891 continue; 895 continue;
892 } 896 }
893 897
894 /* We didn't change the current path point */ 898 /* We didn't change the current path point */
895 break; 899 break;
896 } 900 }
897 return 0; 901
902 if (need_mntput && path->mnt == mnt)
903 mntput(path->mnt);
904 if (ret == -EISDIR)
905 ret = 0;
906 return ret;
898} 907}
899 908
900int follow_down_one(struct path *path) 909int follow_down_one(struct path *path)
@@ -1003,9 +1012,6 @@ failed:
1003 * Follow down to the covering mount currently visible to userspace. At each 1012 * Follow down to the covering mount currently visible to userspace. At each
1004 * point, the filesystem owning that dentry may be queried as to whether the 1013 * point, the filesystem owning that dentry may be queried as to whether the
1005 * caller is permitted to proceed or not. 1014 * caller is permitted to proceed or not.
1006 *
1007 * Care must be taken as namespace_sem may be held (indicated by mounting_here
1008 * being true).
1009 */ 1015 */
1010int follow_down(struct path *path) 1016int follow_down(struct path *path)
1011{ 1017{
@@ -2624,6 +2630,10 @@ static long do_rmdir(int dfd, const char __user *pathname)
2624 error = PTR_ERR(dentry); 2630 error = PTR_ERR(dentry);
2625 if (IS_ERR(dentry)) 2631 if (IS_ERR(dentry))
2626 goto exit2; 2632 goto exit2;
2633 if (!dentry->d_inode) {
2634 error = -ENOENT;
2635 goto exit3;
2636 }
2627 error = mnt_want_write(nd.path.mnt); 2637 error = mnt_want_write(nd.path.mnt);
2628 if (error) 2638 if (error)
2629 goto exit3; 2639 goto exit3;
@@ -2712,8 +2722,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2712 if (nd.last.name[nd.last.len]) 2722 if (nd.last.name[nd.last.len])
2713 goto slashes; 2723 goto slashes;
2714 inode = dentry->d_inode; 2724 inode = dentry->d_inode;
2715 if (inode) 2725 if (!inode)
2716 ihold(inode); 2726 goto slashes;
2727 ihold(inode);
2717 error = mnt_want_write(nd.path.mnt); 2728 error = mnt_want_write(nd.path.mnt);
2718 if (error) 2729 if (error)
2719 goto exit2; 2730 goto exit2;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 144f2a3c7185..6f4850deb272 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -256,7 +256,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
256 256
257 nfs_attr_check_mountpoint(sb, fattr); 257 nfs_attr_check_mountpoint(sb, fattr);
258 258
259 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) 259 if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) &&
260 !nfs_attr_use_mounted_on_fileid(fattr))
260 goto out_no_inode; 261 goto out_no_inode;
261 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) 262 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
262 goto out_no_inode; 263 goto out_no_inode;
@@ -1294,7 +1295,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1294 if (new_isize != cur_isize) { 1295 if (new_isize != cur_isize) {
1295 /* Do we perhaps have any outstanding writes, or has 1296 /* Do we perhaps have any outstanding writes, or has
1296 * the file grown beyond our last write? */ 1297 * the file grown beyond our last write? */
1297 if (nfsi->npages == 0 || new_isize > cur_isize) { 1298 if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
1299 new_isize > cur_isize) {
1298 i_size_write(inode, new_isize); 1300 i_size_write(inode, new_isize);
1299 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1301 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1300 } 1302 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b9056cbe68d6..2a55347a2daa 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -45,6 +45,17 @@ static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct
45 fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; 45 fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
46} 46}
47 47
48static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
49{
50 if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) ||
51 (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) &&
52 ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0)))
53 return 0;
54
55 fattr->fileid = fattr->mounted_on_fileid;
56 return 1;
57}
58
48struct nfs_clone_mount { 59struct nfs_clone_mount {
49 const struct super_block *sb; 60 const struct super_block *sb;
50 const struct dentry *dentry; 61 const struct dentry *dentry;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 426908809c97..0bafcc91c27f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -30,6 +30,7 @@
30 */ 30 */
31 31
32#include <linux/nfs_fs.h> 32#include <linux/nfs_fs.h>
33#include <linux/nfs_page.h>
33 34
34#include "internal.h" 35#include "internal.h"
35#include "nfs4filelayout.h" 36#include "nfs4filelayout.h"
@@ -552,13 +553,18 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
552 __func__, nfl_util, fl->num_fh, fl->first_stripe_index, 553 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
553 fl->pattern_offset); 554 fl->pattern_offset);
554 555
555 if (!fl->num_fh) 556 /* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
557 * Futher checking is done in filelayout_check_layout */
558 if (fl->num_fh < 0 || fl->num_fh >
559 max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
556 goto out_err; 560 goto out_err;
557 561
558 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 562 if (fl->num_fh > 0) {
559 gfp_flags); 563 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
560 if (!fl->fh_array) 564 gfp_flags);
561 goto out_err; 565 if (!fl->fh_array)
566 goto out_err;
567 }
562 568
563 for (i = 0; i < fl->num_fh; i++) { 569 for (i = 0; i < fl->num_fh; i++) {
564 /* Do we want to use a mempool here? */ 570 /* Do we want to use a mempool here? */
@@ -661,8 +667,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
661 u64 p_stripe, r_stripe; 667 u64 p_stripe, r_stripe;
662 u32 stripe_unit; 668 u32 stripe_unit;
663 669
664 if (!pnfs_generic_pg_test(pgio, prev, req)) 670 if (!pnfs_generic_pg_test(pgio, prev, req) ||
665 return 0; 671 !nfs_generic_pg_test(pgio, prev, req))
672 return false;
666 673
667 if (!pgio->pg_lseg) 674 if (!pgio->pg_lseg)
668 return 1; 675 return 1;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d2c4b59c896d..5879b23e0c99 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2265,12 +2265,14 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2265 return nfs4_map_errors(status); 2265 return nfs4_map_errors(status);
2266} 2266}
2267 2267
2268static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
2268/* 2269/*
2269 * Get locations and (maybe) other attributes of a referral. 2270 * Get locations and (maybe) other attributes of a referral.
2270 * Note that we'll actually follow the referral later when 2271 * Note that we'll actually follow the referral later when
2271 * we detect fsid mismatch in inode revalidation 2272 * we detect fsid mismatch in inode revalidation
2272 */ 2273 */
2273static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle) 2274static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
2275 struct nfs_fattr *fattr, struct nfs_fh *fhandle)
2274{ 2276{
2275 int status = -ENOMEM; 2277 int status = -ENOMEM;
2276 struct page *page = NULL; 2278 struct page *page = NULL;
@@ -2288,15 +2290,16 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct
2288 goto out; 2290 goto out;
2289 /* Make sure server returned a different fsid for the referral */ 2291 /* Make sure server returned a different fsid for the referral */
2290 if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { 2292 if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
2291 dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name); 2293 dprintk("%s: server did not return a different fsid for"
2294 " a referral at %s\n", __func__, name->name);
2292 status = -EIO; 2295 status = -EIO;
2293 goto out; 2296 goto out;
2294 } 2297 }
2298 /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
2299 nfs_fixup_referral_attributes(&locations->fattr);
2295 2300
2301 /* replace the lookup nfs_fattr with the locations nfs_fattr */
2296 memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); 2302 memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
2297 fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL;
2298 if (!fattr->mode)
2299 fattr->mode = S_IFDIR;
2300 memset(fhandle, 0, sizeof(struct nfs_fh)); 2303 memset(fhandle, 0, sizeof(struct nfs_fh));
2301out: 2304out:
2302 if (page) 2305 if (page)
@@ -4667,11 +4670,15 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
4667 return len; 4670 return len;
4668} 4671}
4669 4672
4673/*
4674 * nfs_fhget will use either the mounted_on_fileid or the fileid
4675 */
4670static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) 4676static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
4671{ 4677{
4672 if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) && 4678 if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
4673 (fattr->valid & NFS_ATTR_FATTR_FSID) && 4679 (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
4674 (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) 4680 (fattr->valid & NFS_ATTR_FATTR_FSID) &&
4681 (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
4675 return; 4682 return;
4676 4683
4677 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | 4684 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
@@ -4686,7 +4693,6 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4686 struct nfs_server *server = NFS_SERVER(dir); 4693 struct nfs_server *server = NFS_SERVER(dir);
4687 u32 bitmask[2] = { 4694 u32 bitmask[2] = {
4688 [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, 4695 [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
4689 [1] = FATTR4_WORD1_MOUNTED_ON_FILEID,
4690 }; 4696 };
4691 struct nfs4_fs_locations_arg args = { 4697 struct nfs4_fs_locations_arg args = {
4692 .dir_fh = NFS_FH(dir), 4698 .dir_fh = NFS_FH(dir),
@@ -4705,11 +4711,18 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4705 int status; 4711 int status;
4706 4712
4707 dprintk("%s: start\n", __func__); 4713 dprintk("%s: start\n", __func__);
4714
4715 /* Ask for the fileid of the absent filesystem if mounted_on_fileid
4716 * is not supported */
4717 if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
4718 bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
4719 else
4720 bitmask[0] |= FATTR4_WORD0_FILEID;
4721
4708 nfs_fattr_init(&fs_locations->fattr); 4722 nfs_fattr_init(&fs_locations->fattr);
4709 fs_locations->server = server; 4723 fs_locations->server = server;
4710 fs_locations->nlocations = 0; 4724 fs_locations->nlocations = 0;
4711 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 4725 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
4712 nfs_fixup_referral_attributes(&fs_locations->fattr);
4713 dprintk("%s: returned status = %d\n", __func__, status); 4726 dprintk("%s: returned status = %d\n", __func__, status);
4714 return status; 4727 return status;
4715} 4728}
@@ -5098,7 +5111,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
5098 if (mxresp_sz == 0) 5111 if (mxresp_sz == 0)
5099 mxresp_sz = NFS_MAX_FILE_IO_SIZE; 5112 mxresp_sz = NFS_MAX_FILE_IO_SIZE;
5100 /* Fore channel attributes */ 5113 /* Fore channel attributes */
5101 args->fc_attrs.headerpadsz = 0;
5102 args->fc_attrs.max_rqst_sz = mxrqst_sz; 5114 args->fc_attrs.max_rqst_sz = mxrqst_sz;
5103 args->fc_attrs.max_resp_sz = mxresp_sz; 5115 args->fc_attrs.max_resp_sz = mxresp_sz;
5104 args->fc_attrs.max_ops = NFS4_MAX_OPS; 5116 args->fc_attrs.max_ops = NFS4_MAX_OPS;
@@ -5111,7 +5123,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
5111 args->fc_attrs.max_ops, args->fc_attrs.max_reqs); 5123 args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
5112 5124
5113 /* Back channel attributes */ 5125 /* Back channel attributes */
5114 args->bc_attrs.headerpadsz = 0;
5115 args->bc_attrs.max_rqst_sz = PAGE_SIZE; 5126 args->bc_attrs.max_rqst_sz = PAGE_SIZE;
5116 args->bc_attrs.max_resp_sz = PAGE_SIZE; 5127 args->bc_attrs.max_resp_sz = PAGE_SIZE;
5117 args->bc_attrs.max_resp_sz_cached = 0; 5128 args->bc_attrs.max_resp_sz_cached = 0;
@@ -5131,8 +5142,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
5131 struct nfs4_channel_attrs *sent = &args->fc_attrs; 5142 struct nfs4_channel_attrs *sent = &args->fc_attrs;
5132 struct nfs4_channel_attrs *rcvd = &session->fc_attrs; 5143 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
5133 5144
5134 if (rcvd->headerpadsz > sent->headerpadsz)
5135 return -EINVAL;
5136 if (rcvd->max_resp_sz > sent->max_resp_sz) 5145 if (rcvd->max_resp_sz > sent->max_resp_sz)
5137 return -EINVAL; 5146 return -EINVAL;
5138 /* 5147 /*
@@ -5697,6 +5706,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
5697{ 5706{
5698 struct nfs4_layoutreturn *lrp = calldata; 5707 struct nfs4_layoutreturn *lrp = calldata;
5699 struct nfs_server *server; 5708 struct nfs_server *server;
5709 struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
5700 5710
5701 dprintk("--> %s\n", __func__); 5711 dprintk("--> %s\n", __func__);
5702 5712
@@ -5708,16 +5718,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
5708 nfs_restart_rpc(task, lrp->clp); 5718 nfs_restart_rpc(task, lrp->clp);
5709 return; 5719 return;
5710 } 5720 }
5721 spin_lock(&lo->plh_inode->i_lock);
5711 if (task->tk_status == 0) { 5722 if (task->tk_status == 0) {
5712 struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
5713
5714 if (lrp->res.lrs_present) { 5723 if (lrp->res.lrs_present) {
5715 spin_lock(&lo->plh_inode->i_lock);
5716 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 5724 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
5717 spin_unlock(&lo->plh_inode->i_lock);
5718 } else 5725 } else
5719 BUG_ON(!list_empty(&lo->plh_segs)); 5726 BUG_ON(!list_empty(&lo->plh_segs));
5720 } 5727 }
5728 lo->plh_block_lgets--;
5729 spin_unlock(&lo->plh_inode->i_lock);
5721 dprintk("<-- %s\n", __func__); 5730 dprintk("<-- %s\n", __func__);
5722} 5731}
5723 5732
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d869a5e5464b..6870bc61ceec 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -255,7 +255,7 @@ static int nfs4_stat_to_errno(int);
255#define decode_fs_locations_maxsz \ 255#define decode_fs_locations_maxsz \
256 (0) 256 (0)
257#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) 257#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
258#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN))) 258#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
259 259
260#if defined(CONFIG_NFS_V4_1) 260#if defined(CONFIG_NFS_V4_1)
261#define NFS4_MAX_MACHINE_NAME_LEN (64) 261#define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1725 *p++ = cpu_to_be32(args->flags); /*flags */ 1725 *p++ = cpu_to_be32(args->flags); /*flags */
1726 1726
1727 /* Fore Channel */ 1727 /* Fore Channel */
1728 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ 1728 *p++ = cpu_to_be32(0); /* header padding size */
1729 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ 1729 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
1730 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ 1730 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
1731 *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ 1731 *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */
@@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1734 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ 1734 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1735 1735
1736 /* Back Channel */ 1736 /* Back Channel */
1737 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ 1737 *p++ = cpu_to_be32(0); /* header padding size */
1738 *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ 1738 *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
1739 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ 1739 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
1740 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1740 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
@@ -3098,7 +3098,7 @@ out_overflow:
3098 return -EIO; 3098 return -EIO;
3099} 3099}
3100 3100
3101static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) 3101static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res)
3102{ 3102{
3103 __be32 *p; 3103 __be32 *p;
3104 3104
@@ -3109,7 +3109,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
3109 if (unlikely(!p)) 3109 if (unlikely(!p))
3110 goto out_overflow; 3110 goto out_overflow;
3111 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 3111 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3112 return -be32_to_cpup(p); 3112 *res = -be32_to_cpup(p);
3113 } 3113 }
3114 return 0; 3114 return 0;
3115out_overflow: 3115out_overflow:
@@ -4070,6 +4070,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4070 int status; 4070 int status;
4071 umode_t fmode = 0; 4071 umode_t fmode = 0;
4072 uint32_t type; 4072 uint32_t type;
4073 int32_t err;
4073 4074
4074 status = decode_attr_type(xdr, bitmap, &type); 4075 status = decode_attr_type(xdr, bitmap, &type);
4075 if (status < 0) 4076 if (status < 0)
@@ -4095,13 +4096,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4095 goto xdr_error; 4096 goto xdr_error;
4096 fattr->valid |= status; 4097 fattr->valid |= status;
4097 4098
4098 status = decode_attr_error(xdr, bitmap); 4099 err = 0;
4099 if (status == -NFS4ERR_WRONGSEC) { 4100 status = decode_attr_error(xdr, bitmap, &err);
4100 nfs_fixup_secinfo_attributes(fattr, fh);
4101 status = 0;
4102 }
4103 if (status < 0) 4101 if (status < 0)
4104 goto xdr_error; 4102 goto xdr_error;
4103 if (err == -NFS4ERR_WRONGSEC)
4104 nfs_fixup_secinfo_attributes(fattr, fh);
4105 4105
4106 status = decode_attr_filehandle(xdr, bitmap, fh); 4106 status = decode_attr_filehandle(xdr, bitmap, fh);
4107 if (status < 0) 4107 if (status < 0)
@@ -4997,12 +4997,14 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
4997 struct nfs4_channel_attrs *attrs) 4997 struct nfs4_channel_attrs *attrs)
4998{ 4998{
4999 __be32 *p; 4999 __be32 *p;
5000 u32 nr_attrs; 5000 u32 nr_attrs, val;
5001 5001
5002 p = xdr_inline_decode(xdr, 28); 5002 p = xdr_inline_decode(xdr, 28);
5003 if (unlikely(!p)) 5003 if (unlikely(!p))
5004 goto out_overflow; 5004 goto out_overflow;
5005 attrs->headerpadsz = be32_to_cpup(p++); 5005 val = be32_to_cpup(p++); /* headerpadsz */
5006 if (val)
5007 return -EINVAL; /* no support for header padding yet */
5006 attrs->max_rqst_sz = be32_to_cpup(p++); 5008 attrs->max_rqst_sz = be32_to_cpup(p++);
5007 attrs->max_resp_sz = be32_to_cpup(p++); 5009 attrs->max_resp_sz = be32_to_cpup(p++);
5008 attrs->max_resp_sz_cached = be32_to_cpup(p++); 5010 attrs->max_resp_sz_cached = be32_to_cpup(p++);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9cf208df1f25..8ff2ea3f10ef 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -108,7 +108,6 @@ _dev_list_add(const struct nfs_server *nfss,
108 de = n; 108 de = n;
109 } 109 }
110 110
111 atomic_inc(&de->id_node.ref);
112 return de; 111 return de;
113} 112}
114 113
@@ -1001,6 +1000,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
1001 if (!pnfs_generic_pg_test(pgio, prev, req)) 1000 if (!pnfs_generic_pg_test(pgio, prev, req))
1002 return false; 1001 return false;
1003 1002
1003 if (pgio->pg_lseg == NULL)
1004 return true;
1005
1004 return pgio->pg_count + req->wb_bytes <= 1006 return pgio->pg_count + req->wb_bytes <=
1005 OBJIO_LSEG(pgio->pg_lseg)->max_io_size; 1007 OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
1006} 1008}
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index dc3956c0de80..1d06f8e2adea 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -291,7 +291,7 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
291 struct nfs_read_data *rdata; 291 struct nfs_read_data *rdata;
292 292
293 state->status = status; 293 state->status = status;
294 dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); 294 dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
295 rdata = state->rpcdata; 295 rdata = state->rpcdata;
296 rdata->task.tk_status = status; 296 rdata->task.tk_status = status;
297 if (status >= 0) { 297 if (status >= 0) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961aff22..009855716286 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
204 TASK_UNINTERRUPTIBLE); 204 TASK_UNINTERRUPTIBLE);
205} 205}
206 206
207static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 207bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
208{ 208{
209 /* 209 /*
210 * FIXME: ideally we should be able to coalesce all requests 210 * FIXME: ideally we should be able to coalesce all requests
@@ -218,6 +218,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
218 218
219 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 219 return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
220} 220}
221EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
221 222
222/** 223/**
223 * nfs_pageio_init - initialise a page io descriptor 224 * nfs_pageio_init - initialise a page io descriptor
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8c1309d852a6..29c0ca7fc347 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -634,14 +634,16 @@ _pnfs_return_layout(struct inode *ino)
634 634
635 spin_lock(&ino->i_lock); 635 spin_lock(&ino->i_lock);
636 lo = nfsi->layout; 636 lo = nfsi->layout;
637 if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { 637 if (!lo) {
638 spin_unlock(&ino->i_lock); 638 spin_unlock(&ino->i_lock);
639 dprintk("%s: no layout segments to return\n", __func__); 639 dprintk("%s: no layout to return\n", __func__);
640 goto out; 640 return status;
641 } 641 }
642 stateid = nfsi->layout->plh_stateid; 642 stateid = nfsi->layout->plh_stateid;
643 /* Reference matched in nfs4_layoutreturn_release */ 643 /* Reference matched in nfs4_layoutreturn_release */
644 get_layout_hdr(lo); 644 get_layout_hdr(lo);
645 mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
646 lo->plh_block_lgets++;
645 spin_unlock(&ino->i_lock); 647 spin_unlock(&ino->i_lock);
646 pnfs_free_lseg_list(&tmp_list); 648 pnfs_free_lseg_list(&tmp_list);
647 649
@@ -650,6 +652,9 @@ _pnfs_return_layout(struct inode *ino)
650 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 652 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
651 if (unlikely(lrp == NULL)) { 653 if (unlikely(lrp == NULL)) {
652 status = -ENOMEM; 654 status = -ENOMEM;
655 set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
656 set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
657 put_layout_hdr(lo);
653 goto out; 658 goto out;
654 } 659 }
655 660
@@ -887,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
887 ret = get_lseg(lseg); 892 ret = get_lseg(lseg);
888 break; 893 break;
889 } 894 }
890 if (cmp_layout(range, &lseg->pls_range) > 0) 895 if (lseg->pls_range.offset > range->offset)
891 break; 896 break;
892 } 897 }
893 898
@@ -1059,23 +1064,36 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1059 gfp_flags = GFP_NOFS; 1064 gfp_flags = GFP_NOFS;
1060 } 1065 }
1061 1066
1062 if (pgio->pg_count == prev->wb_bytes) { 1067 if (pgio->pg_lseg == NULL) {
1068 if (pgio->pg_count != prev->wb_bytes)
1069 return true;
1063 /* This is first coelesce call for a series of nfs_pages */ 1070 /* This is first coelesce call for a series of nfs_pages */
1064 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1071 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1065 prev->wb_context, 1072 prev->wb_context,
1066 req_offset(req), 1073 req_offset(prev),
1067 pgio->pg_count, 1074 pgio->pg_count,
1068 access_type, 1075 access_type,
1069 gfp_flags); 1076 gfp_flags);
1070 return true; 1077 if (pgio->pg_lseg == NULL)
1078 return true;
1071 } 1079 }
1072 1080
1073 if (pgio->pg_lseg && 1081 /*
1074 req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, 1082 * Test if a nfs_page is fully contained in the pnfs_layout_range.
1075 pgio->pg_lseg->pls_range.length)) 1083 * Note that this test makes several assumptions:
1076 return false; 1084 * - that the previous nfs_page in the struct nfs_pageio_descriptor
1077 1085 * is known to lie within the range.
1078 return true; 1086 * - that the nfs_page being tested is known to be contiguous with the
1087 * previous nfs_page.
1088 * - Layout ranges are page aligned, so we only have to test the
1089 * start offset of the request.
1090 *
1091 * Please also note that 'end_offset' is actually the offset of the
1092 * first byte that lies outside the pnfs_layout_range. FIXME?
1093 *
1094 */
1095 return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
1096 pgio->pg_lseg->pls_range.length);
1079} 1097}
1080EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1098EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1081 1099
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e4d062..96bf4e6f45be 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
186/* pnfs_dev.c */ 186/* pnfs_dev.c */
187struct nfs4_deviceid_node { 187struct nfs4_deviceid_node {
188 struct hlist_node node; 188 struct hlist_node node;
189 struct hlist_node tmpnode;
189 const struct pnfs_layoutdriver_type *ld; 190 const struct pnfs_layoutdriver_type *ld;
190 const struct nfs_client *nfs_client; 191 const struct nfs_client *nfs_client;
191 struct nfs4_deviceid deviceid; 192 struct nfs4_deviceid deviceid;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index c65e133ce9c0..f0f8e1e22f6c 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
174 const struct nfs4_deviceid *id) 174 const struct nfs4_deviceid *id)
175{ 175{
176 INIT_HLIST_NODE(&d->node); 176 INIT_HLIST_NODE(&d->node);
177 INIT_HLIST_NODE(&d->tmpnode);
177 d->ld = ld; 178 d->ld = ld;
178 d->nfs_client = nfs_client; 179 d->nfs_client = nfs_client;
179 d->deviceid = *id; 180 d->deviceid = *id;
@@ -208,6 +209,7 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new)
208 209
209 hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); 210 hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
210 spin_unlock(&nfs4_deviceid_lock); 211 spin_unlock(&nfs4_deviceid_lock);
212 atomic_inc(&new->ref);
211 213
212 return new; 214 return new;
213} 215}
@@ -238,24 +240,29 @@ static void
238_deviceid_purge_client(const struct nfs_client *clp, long hash) 240_deviceid_purge_client(const struct nfs_client *clp, long hash)
239{ 241{
240 struct nfs4_deviceid_node *d; 242 struct nfs4_deviceid_node *d;
241 struct hlist_node *n, *next; 243 struct hlist_node *n;
242 HLIST_HEAD(tmp); 244 HLIST_HEAD(tmp);
243 245
246 spin_lock(&nfs4_deviceid_lock);
244 rcu_read_lock(); 247 rcu_read_lock();
245 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) 248 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
246 if (d->nfs_client == clp && atomic_read(&d->ref)) { 249 if (d->nfs_client == clp && atomic_read(&d->ref)) {
247 hlist_del_init_rcu(&d->node); 250 hlist_del_init_rcu(&d->node);
248 hlist_add_head(&d->node, &tmp); 251 hlist_add_head(&d->tmpnode, &tmp);
249 } 252 }
250 rcu_read_unlock(); 253 rcu_read_unlock();
254 spin_unlock(&nfs4_deviceid_lock);
251 255
252 if (hlist_empty(&tmp)) 256 if (hlist_empty(&tmp))
253 return; 257 return;
254 258
255 synchronize_rcu(); 259 synchronize_rcu();
256 hlist_for_each_entry_safe(d, n, next, &tmp, node) 260 while (!hlist_empty(&tmp)) {
261 d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
262 hlist_del(&d->tmpnode);
257 if (atomic_dec_and_test(&d->ref)) 263 if (atomic_dec_and_test(&d->ref))
258 d->ld->free_deviceid_node(d); 264 d->ld->free_deviceid_node(d);
265 }
259} 266}
260 267
261void 268void
@@ -263,8 +270,8 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
263{ 270{
264 long h; 271 long h;
265 272
266 spin_lock(&nfs4_deviceid_lock); 273 if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
274 return;
267 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) 275 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
268 _deviceid_purge_client(clp, h); 276 _deviceid_purge_client(clp, h);
269 spin_unlock(&nfs4_deviceid_lock);
270} 277}
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 18b3e8975fe0..fbb2a5ef5817 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,7 @@ config NFSD_V4
82 select NFSD_V3 82 select NFSD_V3
83 select FS_POSIX_ACL 83 select FS_POSIX_ACL
84 select SUNRPC_GSS 84 select SUNRPC_GSS
85 select CRYPTO
85 help 86 help
86 This option enables support in your system's NFS server for 87 This option enables support in your system's NFS server for
87 version 4 of the NFS protocol (RFC 3530). 88 version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f5eae40f34e..2b1449dd2f49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,6 +13,7 @@
13#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
14#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/gss_api.h> 15#include <linux/sunrpc/gss_api.h>
16#include <linux/sunrpc/gss_krb5_enctypes.h>
16 17
17#include "idmap.h" 18#include "idmap.h"
18#include "nfsd.h" 19#include "nfsd.h"
@@ -189,18 +190,10 @@ static struct file_operations export_features_operations = {
189 .release = single_release, 190 .release = single_release,
190}; 191};
191 192
192#ifdef CONFIG_SUNRPC_GSS 193#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
193static int supported_enctypes_show(struct seq_file *m, void *v) 194static int supported_enctypes_show(struct seq_file *m, void *v)
194{ 195{
195 struct gss_api_mech *k5mech; 196 seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
196
197 k5mech = gss_mech_get_by_name("krb5");
198 if (k5mech == NULL)
199 goto out;
200 if (k5mech->gm_upcall_enctypes != NULL)
201 seq_printf(m, k5mech->gm_upcall_enctypes);
202 gss_mech_put(k5mech);
203out:
204 return 0; 197 return 0;
205} 198}
206 199
@@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = {
215 .llseek = seq_lseek, 208 .llseek = seq_lseek,
216 .release = single_release, 209 .release = single_release,
217}; 210};
218#endif /* CONFIG_SUNRPC_GSS */ 211#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
219 212
220extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 213extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
221extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 214extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
@@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1427 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1420 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1428 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1421 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1429 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1422 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1430#ifdef CONFIG_SUNRPC_GSS 1423#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
1431 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, 1424 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
1432#endif /* CONFIG_SUNRPC_GSS */ 1425#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
1433#ifdef CONFIG_NFSD_V4 1426#ifdef CONFIG_NFSD_V4
1434 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1427 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1435 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1428 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d5718273bb32..fd0acca5370a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
696} 696}
697#endif /* CONFIG_NFSD_V3 */ 697#endif /* CONFIG_NFSD_V3 */
698 698
699static int nfsd_open_break_lease(struct inode *inode, int access)
700{
701 unsigned int mode;
699 702
703 if (access & NFSD_MAY_NOT_BREAK_LEASE)
704 return 0;
705 mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
706 return break_lease(inode, mode | O_NONBLOCK);
707}
700 708
701/* 709/*
702 * Open an existing file or directory. 710 * Open an existing file or directory.
@@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
744 if (!inode->i_fop) 752 if (!inode->i_fop)
745 goto out; 753 goto out;
746 754
747 /* 755 host_err = nfsd_open_break_lease(inode, access);
748 * Check to see if there are any leases on this file.
749 * This may block while leases are broken.
750 */
751 if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
752 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
753 if (host_err) /* NOMEM or WOULDBLOCK */ 756 if (host_err) /* NOMEM or WOULDBLOCK */
754 goto out_nfserr; 757 goto out_nfserr;
755 758
@@ -1660,8 +1663,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1660 if (!dold->d_inode) 1663 if (!dold->d_inode)
1661 goto out_drop_write; 1664 goto out_drop_write;
1662 host_err = nfsd_break_lease(dold->d_inode); 1665 host_err = nfsd_break_lease(dold->d_inode);
1663 if (host_err) 1666 if (host_err) {
1667 err = nfserrno(host_err);
1664 goto out_drop_write; 1668 goto out_drop_write;
1669 }
1665 host_err = vfs_link(dold, dirp, dnew); 1670 host_err = vfs_link(dold, dirp, dnew);
1666 if (!host_err) { 1671 if (!host_err) {
1667 err = nfserrno(commit_metadata(ffhp)); 1672 err = nfserrno(commit_metadata(ffhp));
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7eafe468a29c..b2e3ff347620 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1346 path[level].bp_bh = NULL; 1346 path[level].bp_bh = NULL;
1347} 1347}
1348 1348
1349static void nilfs_btree_nop(struct nilfs_bmap *btree,
1350 struct nilfs_btree_path *path,
1351 int level, __u64 *keyp, __u64 *ptrp)
1352{
1353}
1349 1354
1350static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1355static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1351 struct nilfs_btree_path *path, 1356 struct nilfs_btree_path *path,
@@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1356 struct buffer_head *bh; 1361 struct buffer_head *bh;
1357 struct nilfs_btree_node *node, *parent, *sib; 1362 struct nilfs_btree_node *node, *parent, *sib;
1358 __u64 sibptr; 1363 __u64 sibptr;
1359 int pindex, level, ncmin, ncmax, ncblk, ret; 1364 int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1360 1365
1361 ret = 0; 1366 ret = 0;
1362 stats->bs_nblocks = 0; 1367 stats->bs_nblocks = 0;
1363 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1368 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1364 ncblk = nilfs_btree_nchildren_per_block(btree); 1369 ncblk = nilfs_btree_nchildren_per_block(btree);
1365 1370
1366 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1371 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1367 level < nilfs_btree_height(btree) - 1; 1372 level < nilfs_btree_height(btree) - 1;
1368 level++) { 1373 level++) {
1369 node = nilfs_btree_get_nonroot_node(path, level); 1374 node = nilfs_btree_get_nonroot_node(path, level);
1370 path[level].bp_oldreq.bpr_ptr = 1375 path[level].bp_oldreq.bpr_ptr =
1371 nilfs_btree_node_get_ptr(node, path[level].bp_index, 1376 nilfs_btree_node_get_ptr(node, dindex, ncblk);
1372 ncblk);
1373 ret = nilfs_bmap_prepare_end_ptr(btree, 1377 ret = nilfs_bmap_prepare_end_ptr(btree,
1374 &path[level].bp_oldreq, dat); 1378 &path[level].bp_oldreq, dat);
1375 if (ret < 0) 1379 if (ret < 0)
@@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1383 1387
1384 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1388 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1385 pindex = path[level + 1].bp_index; 1389 pindex = path[level + 1].bp_index;
1390 dindex = pindex;
1386 1391
1387 if (pindex > 0) { 1392 if (pindex > 0) {
1388 /* left sibling */ 1393 /* left sibling */
@@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1421 path[level].bp_sib_bh = bh; 1426 path[level].bp_sib_bh = bh;
1422 path[level].bp_op = nilfs_btree_concat_right; 1427 path[level].bp_op = nilfs_btree_concat_right;
1423 stats->bs_nblocks++; 1428 stats->bs_nblocks++;
1429 /*
1430 * When merging right sibling node
1431 * into the current node, pointer to
1432 * the right sibling node must be
1433 * terminated instead. The adjustment
1434 * below is required for that.
1435 */
1436 dindex = pindex + 1;
1424 /* continue; */ 1437 /* continue; */
1425 } 1438 }
1426 } else { 1439 } else {
@@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1431 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1444 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1432 path[level].bp_op = nilfs_btree_shrink; 1445 path[level].bp_op = nilfs_btree_shrink;
1433 stats->bs_nblocks += 2; 1446 stats->bs_nblocks += 2;
1447 level++;
1448 path[level].bp_op = nilfs_btree_nop;
1449 goto shrink_root_child;
1434 } else { 1450 } else {
1435 path[level].bp_op = nilfs_btree_do_delete; 1451 path[level].bp_op = nilfs_btree_do_delete;
1436 stats->bs_nblocks++; 1452 stats->bs_nblocks++;
1453 goto out;
1437 } 1454 }
1438
1439 goto out;
1440
1441 } 1455 }
1442 } 1456 }
1443 1457
1458 /* child of the root node is deleted */
1459 path[level].bp_op = nilfs_btree_do_delete;
1460 stats->bs_nblocks++;
1461
1462shrink_root_child:
1444 node = nilfs_btree_get_root(btree); 1463 node = nilfs_btree_get_root(btree);
1445 path[level].bp_oldreq.bpr_ptr = 1464 path[level].bp_oldreq.bpr_ptr =
1446 nilfs_btree_node_get_ptr(node, path[level].bp_index, 1465 nilfs_btree_node_get_ptr(node, dindex,
1447 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1466 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1448 1467
1449 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1468 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1450 if (ret < 0) 1469 if (ret < 0)
1451 goto err_out_child_node; 1470 goto err_out_child_node;
1452 1471
1453 /* child of the root node is deleted */
1454 path[level].bp_op = nilfs_btree_do_delete;
1455 stats->bs_nblocks++;
1456
1457 /* success */ 1472 /* success */
1458 out: 1473 out:
1459 *levelp = level; 1474 *levelp = level;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b954878ad6ce..b9b45fc2903e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -801,12 +801,7 @@ out_err:
801 801
802int nilfs_permission(struct inode *inode, int mask, unsigned int flags) 802int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
803{ 803{
804 struct nilfs_root *root; 804 struct nilfs_root *root = NILFS_I(inode)->i_root;
805
806 if (flags & IPERM_FLAG_RCU)
807 return -ECHILD;
808
809 root = NILFS_I(inode)->i_root;
810 if ((mask & MAY_WRITE) && root && 805 if ((mask & MAY_WRITE) && root &&
811 root->cno != NILFS_CPTREE_CURRENT_CNO) 806 root->cno != NILFS_CPTREE_CURRENT_CNO)
812 return -EROFS; /* snapshot is not writable */ 807 return -EROFS; /* snapshot is not writable */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 141646e88fb5..bb24ab6c282f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2573,7 +2573,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2573 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2573 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2574 2574
2575 if (nilfs->ns_interval) 2575 if (nilfs->ns_interval)
2576 sci->sc_interval = nilfs->ns_interval; 2576 sci->sc_interval = HZ * nilfs->ns_interval;
2577 if (nilfs->ns_watermark) 2577 if (nilfs->ns_watermark)
2578 sci->sc_watermark = nilfs->ns_watermark; 2578 sci->sc_watermark = nilfs->ns_watermark;
2579 return sci; 2579 return sci;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cdbaf5e97308..56f61027236b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1072,7 +1072,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1072 1072
1073 sb->s_magic = OCFS2_SUPER_MAGIC; 1073 sb->s_magic = OCFS2_SUPER_MAGIC;
1074 1074
1075 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1075 sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) |
1076 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1076 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
1077 1077
1078 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 1078 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d738a7e493dd..2c6d95257a4d 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -4,7 +4,6 @@
4 * Released under GPL v2. 4 * Released under GPL v2.
5 */ 5 */
6 6
7#include <linux/version.h>
8#include <linux/module.h> 7#include <linux/module.h>
9#include <linux/fs.h> 8#include <linux/fs.h>
10#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0ead43549431..e3c63d1c5e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,13 +255,7 @@ static ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 struct gendisk *disk = dev_to_disk(dev); 258 return sprintf(buf, "%u\n", p->discard_alignment);
259 unsigned int alignment = 0;
260
261 if (disk->queue)
262 alignment = queue_limit_discard_alignment(&disk->queue->limits,
263 p->start_sect);
264 return sprintf(buf, "%u\n", alignment);
265} 259}
266 260
267ssize_t part_stat_show(struct device *dev, 261ssize_t part_stat_show(struct device *dev,
@@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
455 p->start_sect = start; 449 p->start_sect = start;
456 p->alignment_offset = 450 p->alignment_offset =
457 queue_limit_alignment_offset(&disk->queue->limits, start); 451 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
458 p->nr_sects = len; 454 p->nr_sects = len;
459 p->partno = partno; 455 p->partno = partno;
460 p->policy = get_disk_ro(disk); 456 p->policy = get_disk_ro(disk);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 14def991d9dd..8a84210ca080 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = {
2169 */ 2169 */
2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2171{ 2171{
2172 int rv; 2172 int rv = generic_permission(inode, mask, flags, NULL);
2173
2174 if (flags & IPERM_FLAG_RCU)
2175 return -ECHILD;
2176 rv = generic_permission(inode, mask, flags, NULL);
2177 if (rv == 0) 2173 if (rv == 0)
2178 return 0; 2174 return 0;
2179 if (task_pid(current) == proc_pid(inode)) 2175 if (task_pid(current) == proc_pid(inode))
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 781dec5bd682..be177f702acb 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -38,18 +38,21 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
38 struct inode *inode; 38 struct inode *inode;
39 struct proc_inode *ei; 39 struct proc_inode *ei;
40 struct dentry *error = ERR_PTR(-ENOENT); 40 struct dentry *error = ERR_PTR(-ENOENT);
41 void *ns;
41 42
42 inode = proc_pid_make_inode(dir->i_sb, task); 43 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode) 44 if (!inode)
44 goto out; 45 goto out;
45 46
47 ns = ns_ops->get(task);
48 if (!ns)
49 goto out_iput;
50
46 ei = PROC_I(inode); 51 ei = PROC_I(inode);
47 inode->i_mode = S_IFREG|S_IRUSR; 52 inode->i_mode = S_IFREG|S_IRUSR;
48 inode->i_fop = &ns_file_operations; 53 inode->i_fop = &ns_file_operations;
49 ei->ns_ops = ns_ops; 54 ei->ns_ops = ns_ops;
50 ei->ns = ns_ops->get(task); 55 ei->ns = ns;
51 if (!ei->ns)
52 goto out_iput;
53 56
54 dentry->d_op = &pid_dentry_operations; 57 dentry->d_op = &pid_dentry_operations;
55 d_add(dentry, inode); 58 d_add(dentry, inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f50133c11c24..d167de365a8d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
304 struct ctl_table *table; 304 struct ctl_table *table;
305 int error; 305 int error;
306 306
307 if (flags & IPERM_FLAG_RCU)
308 return -ECHILD;
309
310 /* Executable files are not allowed under /proc/sys/ */ 307 /* Executable files are not allowed under /proc/sys/ */
311 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 308 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
312 return -EACCES; 309 return -EACCES;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a9000e9cfee5..d6c3b416529b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -28,11 +28,12 @@ static int proc_test_super(struct super_block *sb, void *data)
28 28
29static int proc_set_super(struct super_block *sb, void *data) 29static int proc_set_super(struct super_block *sb, void *data)
30{ 30{
31 struct pid_namespace *ns; 31 int err = set_anon_super(sb, NULL);
32 32 if (!err) {
33 ns = (struct pid_namespace *)data; 33 struct pid_namespace *ns = (struct pid_namespace *)data;
34 sb->s_fs_info = get_pid_ns(ns); 34 sb->s_fs_info = get_pid_ns(ns);
35 return set_anon_super(sb, NULL); 35 }
36 return err;
36} 37}
37 38
38static struct dentry *proc_mount(struct file_system_type *fs_type, 39static struct dentry *proc_mount(struct file_system_type *fs_type,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e8a62f41b458..d78089690965 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s)
954 954
955int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) 955int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
956{ 956{
957 if (flags & IPERM_FLAG_RCU)
958 return -ECHILD;
959 /* 957 /*
960 * We don't do permission checks on the internal objects. 958 * We don't do permission checks on the internal objects.
961 * Permissions are determined by the "owning" object. 959 * Permissions are determined by the "owning" object.
diff --git a/fs/super.c b/fs/super.c
index c75593953c52..ab3d672db0de 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -822,7 +822,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
822 } else { 822 } else {
823 char b[BDEVNAME_SIZE]; 823 char b[BDEVNAME_SIZE];
824 824
825 s->s_flags = flags; 825 s->s_flags = flags | MS_NOSEC;
826 s->s_mode = mode; 826 s->s_mode = mode;
827 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 827 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
828 sb_set_blocksize(s, block_size(bdev)); 828 sb_set_blocksize(s, block_size(bdev));
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 266895783b47..e34f0d99ea4e 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data)
95 return error; 95 return error;
96} 96}
97 97
98static void free_sysfs_super_info(struct sysfs_super_info *info)
99{
100 int type;
101 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
102 kobj_ns_drop(type, info->ns[type]);
103 kfree(info);
104}
105
98static struct dentry *sysfs_mount(struct file_system_type *fs_type, 106static struct dentry *sysfs_mount(struct file_system_type *fs_type,
99 int flags, const char *dev_name, void *data) 107 int flags, const char *dev_name, void *data)
100{ 108{
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
108 return ERR_PTR(-ENOMEM); 116 return ERR_PTR(-ENOMEM);
109 117
110 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 118 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
111 info->ns[type] = kobj_ns_current(type); 119 info->ns[type] = kobj_ns_grab_current(type);
112 120
113 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 121 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
114 if (IS_ERR(sb) || sb->s_fs_info != info) 122 if (IS_ERR(sb) || sb->s_fs_info != info)
115 kfree(info); 123 free_sysfs_super_info(info);
116 if (IS_ERR(sb)) 124 if (IS_ERR(sb))
117 return ERR_CAST(sb); 125 return ERR_CAST(sb);
118 if (!sb->s_root) { 126 if (!sb->s_root) {
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
131static void sysfs_kill_sb(struct super_block *sb) 139static void sysfs_kill_sb(struct super_block *sb)
132{ 140{
133 struct sysfs_super_info *info = sysfs_info(sb); 141 struct sysfs_super_info *info = sysfs_info(sb);
134
135 /* Remove the superblock from fs_supers/s_instances 142 /* Remove the superblock from fs_supers/s_instances
136 * so we can't find it, before freeing sysfs_super_info. 143 * so we can't find it, before freeing sysfs_super_info.
137 */ 144 */
138 kill_anon_super(sb); 145 kill_anon_super(sb);
139 kfree(info); 146 free_sysfs_super_info(info);
140} 147}
141 148
142static struct file_system_type sysfs_fs_type = { 149static struct file_system_type sysfs_fs_type = {
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = {
145 .kill_sb = sysfs_kill_sb, 152 .kill_sb = sysfs_kill_sb,
146}; 153};
147 154
148void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
149{
150 struct super_block *sb;
151
152 mutex_lock(&sysfs_mutex);
153 spin_lock(&sb_lock);
154 list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
155 struct sysfs_super_info *info = sysfs_info(sb);
156 /*
157 * If we see a superblock on the fs_supers/s_instances
158 * list the unmount has not completed and sb->s_fs_info
159 * points to a valid struct sysfs_super_info.
160 */
161 /* Ignore superblocks with the wrong ns */
162 if (info->ns[type] != ns)
163 continue;
164 info->ns[type] = NULL;
165 }
166 spin_unlock(&sb_lock);
167 mutex_unlock(&sysfs_mutex);
168}
169
170int __init sysfs_init(void) 155int __init sysfs_init(void)
171{ 156{
172 int err = -ENOMEM; 157 int err = -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3d28af31d863..2ed2404f3113 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt {
136 * instance). 136 * instance).
137 */ 137 */
138struct sysfs_super_info { 138struct sysfs_super_info {
139 const void *ns[KOBJ_NS_TYPES]; 139 void *ns[KOBJ_NS_TYPES];
140}; 140};
141#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) 141#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
142extern struct sysfs_dirent sysfs_root; 142extern struct sysfs_dirent sysfs_root;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index f67acbdda5e8..dffeb3795af1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
61 61
62/* 62/*
63 * Called when the clock was set to cancel the timers in the cancel 63 * Called when the clock was set to cancel the timers in the cancel
64 * list. 64 * list. This will wake up processes waiting on these timers. The
65 * wake-up requires ctx->ticks to be non zero, therefore we increment
66 * it before calling wake_up_locked().
65 */ 67 */
66void timerfd_clock_was_set(void) 68void timerfd_clock_was_set(void)
67{ 69{
@@ -76,6 +78,7 @@ void timerfd_clock_was_set(void)
76 spin_lock_irqsave(&ctx->wqh.lock, flags); 78 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) { 79 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX; 80 ctx->moffs.tv64 = KTIME_MAX;
81 ctx->ticks++;
79 wake_up_locked(&ctx->wqh); 82 wake_up_locked(&ctx->wqh);
80 } 83 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 84 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 166951e0dcd3..3be645e012c9 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
581 ubifs_assert(wbuf->size % c->min_io_size == 0); 581 ubifs_assert(wbuf->size % c->min_io_size == 0);
582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
583 ubifs_assert(!c->ro_media && !c->ro_mount); 583 ubifs_assert(!c->ro_media && !c->ro_mount);
584 ubifs_assert(!c->space_fixup);
584 if (c->leb_size - wbuf->offs >= c->max_write_size) 585 if (c->leb_size - wbuf->offs >= c->max_write_size)
585 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 586 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
586 587
@@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
759 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 760 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
760 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 761 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
761 ubifs_assert(!c->ro_media && !c->ro_mount); 762 ubifs_assert(!c->ro_media && !c->ro_mount);
763 ubifs_assert(!c->space_fixup);
762 764
763 if (c->ro_error) 765 if (c->ro_error)
764 return -EROFS; 766 return -EROFS;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 34b1679e6e3a..cef0460f4c54 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -669,6 +669,7 @@ out_free:
669 669
670out_release: 670out_release:
671 release_head(c, BASEHD); 671 release_head(c, BASEHD);
672 kfree(dent);
672out_ro: 673out_ro:
673 ubifs_ro_mode(c, err); 674 ubifs_ro_mode(c, err);
674 if (last_reference) 675 if (last_reference)
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index bd644bf587a8..a5422fffbd69 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c)
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, 0); 677 c->sbuf, -1);
678 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
679 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
680 break; 680 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 731d9e2e7b50..783d8e0beb76 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_last_node - drop the last node or group of nodes. 567 * drop_last_group - drop the last group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
571 * 570 *
572 * This is a helper function for 'ubifs_recover_leb()' which drops the last 571 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero. 572 * group of nodes of the scanned LEB.
574 * This function returns %1 if a node was dropped and %0 otherwise.
575 */ 573 */
576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) 574static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
577{ 575{
578 int dropped = 0;
579
580 while (!list_empty(&sleb->nodes)) { 576 while (!list_empty(&sleb->nodes)) {
581 struct ubifs_scan_node *snod; 577 struct ubifs_scan_node *snod;
582 struct ubifs_ch *ch; 578 struct ubifs_ch *ch;
@@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
585 list); 581 list);
586 ch = snod->node; 582 ch = snod->node;
587 if (ch->group_type != UBIFS_IN_NODE_GROUP) 583 if (ch->group_type != UBIFS_IN_NODE_GROUP)
588 return dropped; 584 break;
589 dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); 585
586 dbg_rcvry("dropping grouped node at %d:%d",
587 sleb->lnum, snod->offs);
588 *offs = snod->offs;
589 list_del(&snod->list);
590 kfree(snod);
591 sleb->nodes_cnt -= 1;
592 }
593}
594
595/**
596 * drop_last_node - drop the last node.
597 * @sleb: scanned LEB information
598 * @offs: offset of dropped nodes is returned here
599 * @grouped: non-zero if whole group of nodes have to be dropped
600 *
601 * This is a helper function for 'ubifs_recover_leb()' which drops the last
602 * node of the scanned LEB.
603 */
604static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
605{
606 struct ubifs_scan_node *snod;
607
608 if (!list_empty(&sleb->nodes)) {
609 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
610 list);
611
612 dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
590 *offs = snod->offs; 613 *offs = snod->offs;
591 list_del(&snod->list); 614 list_del(&snod->list);
592 kfree(snod); 615 kfree(snod);
593 sleb->nodes_cnt -= 1; 616 sleb->nodes_cnt -= 1;
594 dropped = 1;
595 if (!grouped)
596 break;
597 } 617 }
598 return dropped;
599} 618}
600 619
601/** 620/**
@@ -604,7 +623,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
604 * @lnum: LEB number 623 * @lnum: LEB number
605 * @offs: offset 624 * @offs: offset
606 * @sbuf: LEB-sized buffer to use 625 * @sbuf: LEB-sized buffer to use
607 * @grouped: nodes may be grouped for recovery 626 * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
627 * belong to any journal head)
608 * 628 *
609 * This function does a scan of a LEB, but caters for errors that might have 629 * This function does a scan of a LEB, but caters for errors that might have
610 * been caused by the unclean unmount from which we are attempting to recover. 630 * been caused by the unclean unmount from which we are attempting to recover.
@@ -612,13 +632,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
612 * found, and a negative error code in case of failure. 632 * found, and a negative error code in case of failure.
613 */ 633 */
614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 634struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
615 int offs, void *sbuf, int grouped) 635 int offs, void *sbuf, int jhead)
616{ 636{
617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; 637 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
638 int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
618 struct ubifs_scan_leb *sleb; 639 struct ubifs_scan_leb *sleb;
619 void *buf = sbuf + offs; 640 void *buf = sbuf + offs;
620 641
621 dbg_rcvry("%d:%d", lnum, offs); 642 dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
622 643
623 sleb = ubifs_start_scan(c, lnum, offs, sbuf); 644 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
624 if (IS_ERR(sleb)) 645 if (IS_ERR(sleb))
@@ -635,7 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 656 * Scan quietly until there is an error from which we cannot
636 * recover 657 * recover
637 */ 658 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 659 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
639 if (ret == SCANNED_A_NODE) { 660 if (ret == SCANNED_A_NODE) {
640 /* A valid node, and not a padding node */ 661 /* A valid node, and not a padding node */
641 struct ubifs_ch *ch = buf; 662 struct ubifs_ch *ch = buf;
@@ -695,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
695 * If nodes are grouped, always drop the incomplete group at 716 * If nodes are grouped, always drop the incomplete group at
696 * the end. 717 * the end.
697 */ 718 */
698 drop_last_node(sleb, &offs, 1); 719 drop_last_group(sleb, &offs);
699 720
700 /* 721 if (jhead == GCHD) {
701 * While we are in the middle of the same min. I/O unit keep dropping 722 /*
702 * nodes. So basically, what we want is to make sure that the last min. 723 * If this LEB belongs to the GC head then while we are in the
703 * I/O unit where we saw the corruption is dropped completely with all 724 * middle of the same min. I/O unit keep dropping nodes. So
704 * the uncorrupted node which may possibly sit there. 725 * basically, what we want is to make sure that the last min.
705 * 726 * I/O unit where we saw the corruption is dropped completely
706 * In other words, let's name the min. I/O unit where the corruption 727 * with all the uncorrupted nodes which may possibly sit there.
707 * starts B, and the previous min. I/O unit A. The below code tries to 728 *
708 * deal with a situation when half of B contains valid nodes or the end 729 * In other words, let's name the min. I/O unit where the
709 * of a valid node, and the second half of B contains corrupted data or 730 * corruption starts B, and the previous min. I/O unit A. The
710 * garbage. This means that UBIFS had been writing to B just before the 731 * below code tries to deal with a situation when half of B
711 * power cut happened. I do not know how realistic is this scenario 732 * contains valid nodes or the end of a valid node, and the
712 * that half of the min. I/O unit had been written successfully and the 733 * second half of B contains corrupted data or garbage. This
713 * other half not, but this is possible in our 'failure mode emulation' 734 * means that UBIFS had been writing to B just before the power
714 * infrastructure at least. 735 * cut happened. I do not know how realistic is this scenario
715 * 736 * that half of the min. I/O unit had been written successfully
716 * So what is the problem, why we need to drop those nodes? Whey can't 737 * and the other half not, but this is possible in our 'failure
717 * we just clean-up the second half of B by putting a padding node 738 * mode emulation' infrastructure at least.
718 * there? We can, and this works fine with one exception which was 739 *
719 * reproduced with power cut emulation testing and happens extremely 740 * So what is the problem, why we need to drop those nodes? Why
720 * rarely. The description follows, but it is worth noting that that is 741 * can't we just clean-up the second half of B by putting a
721 * only about the GC head, so we could do this trick only if the bud 742 * padding node there? We can, and this works fine with one
722 * belongs to the GC head, but it does not seem to be worth an 743 * exception which was reproduced with power cut emulation
723 * additional "if" statement. 744 * testing and happens extremely rarely.
724 * 745 *
725 * So, imagine the file-system is full, we run GC which is moving valid 746 * Imagine the file-system is full, we run GC which starts
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head 747 * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X 748 * the current GC head LEB). The @c->gc_lnum is -1, which means
728 * and will try to continue. Imagine that LEB X is currently the 749 * that GC will retain LEB X and will try to continue. Imagine
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the 750 * that LEB X is currently the dirtiest LEB, and the amount of
730 * same as amount of free space in LEB X. 751 * used space in LEB Y is exactly the same as amount of free
731 * 752 * space in LEB X.
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We 753 *
733 * are here trying to recover LEB Y which is the GC head LEB. We find 754 * And a power cut happens when nodes are moved from LEB X to
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by 755 * LEB Y. We are here trying to recover LEB Y which is the GC
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function 756 * head LEB. We find the min. I/O unit B as described above.
736 * fails, because it cannot find a dirty LEB which could be GC'd into 757 * Then we clean-up LEB Y by padding min. I/O unit. And later
737 * LEB Y! Even LEB X does not match because the amount of valid nodes 758 * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
738 * there does not fit the free space in LEB Y any more! And this is 759 * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
739 * because of the padding node which we added to LEB Y. The 760 * does not match because the amount of valid nodes there does
740 * user-visible effect of this which I once observed and analysed is 761 * not fit the free space in LEB Y any more! And this is
741 * that we cannot mount the file-system with -ENOSPC error. 762 * because of the padding node which we added to LEB Y. The
742 * 763 * user-visible effect of this which I once observed and
743 * So obviously, to make sure that situation does not happen we should 764 * analysed is that we cannot mount the file-system with
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O 765 * -ENOSPC error.
745 * unit in LEB Y should be A. This is basically what the below code 766 *
746 * tries to do. 767 * So obviously, to make sure that situation does not happen we
747 */ 768 * should free min. I/O unit B in LEB Y completely and the last
748 while (min_io_unit == round_down(offs, c->min_io_size) && 769 * used min. I/O unit in LEB Y should be A. This is basically
749 min_io_unit != offs && 770 * what the below code tries to do.
750 drop_last_node(sleb, &offs, grouped)); 771 */
772 while (offs > min_io_unit)
773 drop_last_node(sleb, &offs);
774 }
751 775
752 buf = sbuf + offs; 776 buf = sbuf + offs;
753 len = c->leb_size - offs; 777 len = c->leb_size - offs;
@@ -881,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
881 } 905 }
882 ubifs_scan_destroy(sleb); 906 ubifs_scan_destroy(sleb);
883 } 907 }
884 return ubifs_recover_leb(c, lnum, offs, sbuf, 0); 908 return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
885} 909}
886 910
887/** 911/**
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 6617280d1679..5e97161ce4d3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
557 * these LEBs could possibly be written to at the power cut 557 * these LEBs could possibly be written to at the power cut
558 * time. 558 * time.
559 */ 559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, 560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
561 b->bud->jhead != GCHD);
562 else 561 else
563 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 562 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
564 if (IS_ERR(sleb)) 563 if (IS_ERR(sleb))
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index ca953a945029..9e1d05666fed 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -284,7 +284,11 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
285 285
286 if (nr == 0) 286 if (nr == 0)
287 return clean_zn_cnt; 287 /*
288 * Due to the way UBIFS updates the clean znode counter it may
289 * temporarily be negative.
290 */
291 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
288 292
289 if (!clean_zn_cnt) { 293 if (!clean_zn_cnt) {
290 /* 294 /*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1ab0d22e4c94..529be0582029 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c)
811 811
812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; 812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
813 c->jheads[i].wbuf.jhead = i; 813 c->jheads[i].wbuf.jhead = i;
814 c->jheads[i].grouped = 1;
814 } 815 }
815 816
816 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; 817 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
817 /* 818 /*
818 * Garbage Collector head likely contains long-term data and 819 * Garbage Collector head likely contains long-term data and
819 * does not need to be synchronized by timer. 820 * does not need to be synchronized by timer. Also GC head nodes are
821 * not grouped.
820 */ 822 */
821 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 823 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
822 c->jheads[GCHD].wbuf.no_timer = 1; 824 c->jheads[GCHD].wbuf.no_timer = 1;
825 c->jheads[GCHD].grouped = 0;
823 826
824 return 0; 827 return 0;
825} 828}
@@ -1284,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c)
1284 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1287 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1285 ubifs_msg("recovery needed"); 1288 ubifs_msg("recovery needed");
1286 c->need_recovery = 1; 1289 c->need_recovery = 1;
1287 if (!c->ro_mount) { 1290 }
1288 err = ubifs_recover_inl_heads(c, c->sbuf); 1291
1289 if (err) 1292 if (c->need_recovery && !c->ro_mount) {
1290 goto out_master; 1293 err = ubifs_recover_inl_heads(c, c->sbuf);
1291 } 1294 if (err)
1292 } else if (!c->ro_mount) { 1295 goto out_master;
1296 }
1297
1298 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1299 if (err)
1300 goto out_master;
1301
1302 if (!c->ro_mount && c->space_fixup) {
1303 err = ubifs_fixup_free_space(c);
1304 if (err)
1305 goto out_master;
1306 }
1307
1308 if (!c->ro_mount) {
1293 /* 1309 /*
1294 * Set the "dirty" flag so that if we reboot uncleanly we 1310 * Set the "dirty" flag so that if we reboot uncleanly we
1295 * will notice this immediately on the next mount. 1311 * will notice this immediately on the next mount.
@@ -1297,13 +1313,9 @@ static int mount_ubifs(struct ubifs_info *c)
1297 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 1313 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1298 err = ubifs_write_master(c); 1314 err = ubifs_write_master(c);
1299 if (err) 1315 if (err)
1300 goto out_master; 1316 goto out_lpt;
1301 } 1317 }
1302 1318
1303 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1304 if (err)
1305 goto out_lpt;
1306
1307 err = dbg_check_idx_size(c, c->bi.old_idx_sz); 1319 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1308 if (err) 1320 if (err)
1309 goto out_lpt; 1321 goto out_lpt;
@@ -1396,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c)
1396 } else 1408 } else
1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1409 ubifs_assert(c->lst.taken_empty_lebs > 0);
1398 1410
1399 if (!c->ro_mount && c->space_fixup) {
1400 err = ubifs_fixup_free_space(c);
1401 if (err)
1402 goto out_infos;
1403 }
1404
1405 err = dbg_check_filesystem(c); 1411 err = dbg_check_filesystem(c);
1406 if (err) 1412 if (err)
1407 goto out_infos; 1413 goto out_infos;
@@ -1842,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb)
1842 bdi_destroy(&c->bdi); 1848 bdi_destroy(&c->bdi);
1843 ubi_close_volume(c->ubi); 1849 ubi_close_volume(c->ubi);
1844 mutex_unlock(&c->umount_mutex); 1850 mutex_unlock(&c->umount_mutex);
1845 kfree(c);
1846} 1851}
1847 1852
1848static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) 1853static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1965,61 +1970,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1965 return ERR_PTR(-EINVAL); 1970 return ERR_PTR(-EINVAL);
1966} 1971}
1967 1972
1968static int ubifs_fill_super(struct super_block *sb, void *data, int silent) 1973static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
1969{ 1974{
1970 struct ubi_volume_desc *ubi = sb->s_fs_info;
1971 struct ubifs_info *c; 1975 struct ubifs_info *c;
1972 struct inode *root;
1973 int err;
1974 1976
1975 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); 1977 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
1976 if (!c) 1978 if (c) {
1977 return -ENOMEM; 1979 spin_lock_init(&c->cnt_lock);
1980 spin_lock_init(&c->cs_lock);
1981 spin_lock_init(&c->buds_lock);
1982 spin_lock_init(&c->space_lock);
1983 spin_lock_init(&c->orphan_lock);
1984 init_rwsem(&c->commit_sem);
1985 mutex_init(&c->lp_mutex);
1986 mutex_init(&c->tnc_mutex);
1987 mutex_init(&c->log_mutex);
1988 mutex_init(&c->mst_mutex);
1989 mutex_init(&c->umount_mutex);
1990 mutex_init(&c->bu_mutex);
1991 mutex_init(&c->write_reserve_mutex);
1992 init_waitqueue_head(&c->cmt_wq);
1993 c->buds = RB_ROOT;
1994 c->old_idx = RB_ROOT;
1995 c->size_tree = RB_ROOT;
1996 c->orph_tree = RB_ROOT;
1997 INIT_LIST_HEAD(&c->infos_list);
1998 INIT_LIST_HEAD(&c->idx_gc);
1999 INIT_LIST_HEAD(&c->replay_list);
2000 INIT_LIST_HEAD(&c->replay_buds);
2001 INIT_LIST_HEAD(&c->uncat_list);
2002 INIT_LIST_HEAD(&c->empty_list);
2003 INIT_LIST_HEAD(&c->freeable_list);
2004 INIT_LIST_HEAD(&c->frdi_idx_list);
2005 INIT_LIST_HEAD(&c->unclean_leb_list);
2006 INIT_LIST_HEAD(&c->old_buds);
2007 INIT_LIST_HEAD(&c->orph_list);
2008 INIT_LIST_HEAD(&c->orph_new);
2009 c->no_chk_data_crc = 1;
2010
2011 c->highest_inum = UBIFS_FIRST_INO;
2012 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
2013
2014 ubi_get_volume_info(ubi, &c->vi);
2015 ubi_get_device_info(c->vi.ubi_num, &c->di);
2016 }
2017 return c;
2018}
1978 2019
1979 spin_lock_init(&c->cnt_lock); 2020static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1980 spin_lock_init(&c->cs_lock); 2021{
1981 spin_lock_init(&c->buds_lock); 2022 struct ubifs_info *c = sb->s_fs_info;
1982 spin_lock_init(&c->space_lock); 2023 struct inode *root;
1983 spin_lock_init(&c->orphan_lock); 2024 int err;
1984 init_rwsem(&c->commit_sem);
1985 mutex_init(&c->lp_mutex);
1986 mutex_init(&c->tnc_mutex);
1987 mutex_init(&c->log_mutex);
1988 mutex_init(&c->mst_mutex);
1989 mutex_init(&c->umount_mutex);
1990 mutex_init(&c->bu_mutex);
1991 mutex_init(&c->write_reserve_mutex);
1992 init_waitqueue_head(&c->cmt_wq);
1993 c->buds = RB_ROOT;
1994 c->old_idx = RB_ROOT;
1995 c->size_tree = RB_ROOT;
1996 c->orph_tree = RB_ROOT;
1997 INIT_LIST_HEAD(&c->infos_list);
1998 INIT_LIST_HEAD(&c->idx_gc);
1999 INIT_LIST_HEAD(&c->replay_list);
2000 INIT_LIST_HEAD(&c->replay_buds);
2001 INIT_LIST_HEAD(&c->uncat_list);
2002 INIT_LIST_HEAD(&c->empty_list);
2003 INIT_LIST_HEAD(&c->freeable_list);
2004 INIT_LIST_HEAD(&c->frdi_idx_list);
2005 INIT_LIST_HEAD(&c->unclean_leb_list);
2006 INIT_LIST_HEAD(&c->old_buds);
2007 INIT_LIST_HEAD(&c->orph_list);
2008 INIT_LIST_HEAD(&c->orph_new);
2009 c->no_chk_data_crc = 1;
2010 2025
2011 c->vfs_sb = sb; 2026 c->vfs_sb = sb;
2012 c->highest_inum = UBIFS_FIRST_INO;
2013 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
2014
2015 ubi_get_volume_info(ubi, &c->vi);
2016 ubi_get_device_info(c->vi.ubi_num, &c->di);
2017
2018 /* Re-open the UBI device in read-write mode */ 2027 /* Re-open the UBI device in read-write mode */
2019 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); 2028 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
2020 if (IS_ERR(c->ubi)) { 2029 if (IS_ERR(c->ubi)) {
2021 err = PTR_ERR(c->ubi); 2030 err = PTR_ERR(c->ubi);
2022 goto out_free; 2031 goto out;
2023 } 2032 }
2024 2033
2025 /* 2034 /*
@@ -2085,24 +2094,29 @@ out_bdi:
2085 bdi_destroy(&c->bdi); 2094 bdi_destroy(&c->bdi);
2086out_close: 2095out_close:
2087 ubi_close_volume(c->ubi); 2096 ubi_close_volume(c->ubi);
2088out_free: 2097out:
2089 kfree(c);
2090 return err; 2098 return err;
2091} 2099}
2092 2100
2093static int sb_test(struct super_block *sb, void *data) 2101static int sb_test(struct super_block *sb, void *data)
2094{ 2102{
2095 dev_t *dev = data; 2103 struct ubifs_info *c1 = data;
2096 struct ubifs_info *c = sb->s_fs_info; 2104 struct ubifs_info *c = sb->s_fs_info;
2097 2105
2098 return c->vi.cdev == *dev; 2106 return c->vi.cdev == c1->vi.cdev;
2107}
2108
2109static int sb_set(struct super_block *sb, void *data)
2110{
2111 sb->s_fs_info = data;
2112 return set_anon_super(sb, NULL);
2099} 2113}
2100 2114
2101static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, 2115static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2102 const char *name, void *data) 2116 const char *name, void *data)
2103{ 2117{
2104 struct ubi_volume_desc *ubi; 2118 struct ubi_volume_desc *ubi;
2105 struct ubi_volume_info vi; 2119 struct ubifs_info *c;
2106 struct super_block *sb; 2120 struct super_block *sb;
2107 int err; 2121 int err;
2108 2122
@@ -2119,19 +2133,25 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2119 name, (int)PTR_ERR(ubi)); 2133 name, (int)PTR_ERR(ubi));
2120 return ERR_CAST(ubi); 2134 return ERR_CAST(ubi);
2121 } 2135 }
2122 ubi_get_volume_info(ubi, &vi);
2123 2136
2124 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); 2137 c = alloc_ubifs_info(ubi);
2138 if (!c) {
2139 err = -ENOMEM;
2140 goto out_close;
2141 }
2142
2143 dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2125 2144
2126 sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); 2145 sb = sget(fs_type, sb_test, sb_set, c);
2127 if (IS_ERR(sb)) { 2146 if (IS_ERR(sb)) {
2128 err = PTR_ERR(sb); 2147 err = PTR_ERR(sb);
2148 kfree(c);
2129 goto out_close; 2149 goto out_close;
2130 } 2150 }
2131 2151
2132 if (sb->s_root) { 2152 if (sb->s_root) {
2133 struct ubifs_info *c1 = sb->s_fs_info; 2153 struct ubifs_info *c1 = sb->s_fs_info;
2134 2154 kfree(c);
2135 /* A new mount point for already mounted UBIFS */ 2155 /* A new mount point for already mounted UBIFS */
2136 dbg_gen("this ubi volume is already mounted"); 2156 dbg_gen("this ubi volume is already mounted");
2137 if (!!(flags & MS_RDONLY) != c1->ro_mount) { 2157 if (!!(flags & MS_RDONLY) != c1->ro_mount) {
@@ -2140,11 +2160,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2140 } 2160 }
2141 } else { 2161 } else {
2142 sb->s_flags = flags; 2162 sb->s_flags = flags;
2143 /*
2144 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
2145 * replaced by 'c'.
2146 */
2147 sb->s_fs_info = ubi;
2148 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 2163 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
2149 if (err) 2164 if (err)
2150 goto out_deact; 2165 goto out_deact;
@@ -2164,11 +2179,18 @@ out_close:
2164 return ERR_PTR(err); 2179 return ERR_PTR(err);
2165} 2180}
2166 2181
2182static void kill_ubifs_super(struct super_block *s)
2183{
2184 struct ubifs_info *c = s->s_fs_info;
2185 kill_anon_super(s);
2186 kfree(c);
2187}
2188
2167static struct file_system_type ubifs_fs_type = { 2189static struct file_system_type ubifs_fs_type = {
2168 .name = "ubifs", 2190 .name = "ubifs",
2169 .owner = THIS_MODULE, 2191 .owner = THIS_MODULE,
2170 .mount = ubifs_mount, 2192 .mount = ubifs_mount,
2171 .kill_sb = kill_anon_super, 2193 .kill_sb = kill_ubifs_super,
2172}; 2194};
2173 2195
2174/* 2196/*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 8119b1fd8d94..91b4213dde84 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
2876 */ 2876 */
2877void ubifs_tnc_close(struct ubifs_info *c) 2877void ubifs_tnc_close(struct ubifs_info *c)
2878{ 2878{
2879 long clean_freed;
2880
2881 tnc_destroy_cnext(c); 2879 tnc_destroy_cnext(c);
2882 if (c->zroot.znode) { 2880 if (c->zroot.znode) {
2883 clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); 2881 long n;
2884 atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); 2882
2883 ubifs_destroy_tnc_subtree(c->zroot.znode);
2884 n = atomic_long_read(&c->clean_zn_cnt);
2885 atomic_long_sub(n, &ubifs_clean_zn_cnt);
2885 } 2886 }
2886 kfree(c->gap_lebs); 2887 kfree(c->gap_lebs);
2887 kfree(c->ilebs); 2888 kfree(c->ilebs);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a70d7b4ffb25..f79983d6f860 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -722,12 +722,14 @@ struct ubifs_bud {
722 * struct ubifs_jhead - journal head. 722 * struct ubifs_jhead - journal head.
723 * @wbuf: head's write-buffer 723 * @wbuf: head's write-buffer
724 * @buds_list: list of bud LEBs belonging to this journal head 724 * @buds_list: list of bud LEBs belonging to this journal head
725 * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
725 * 726 *
726 * Note, the @buds list is protected by the @c->buds_lock. 727 * Note, the @buds list is protected by the @c->buds_lock.
727 */ 728 */
728struct ubifs_jhead { 729struct ubifs_jhead {
729 struct ubifs_wbuf wbuf; 730 struct ubifs_wbuf wbuf;
730 struct list_head buds_list; 731 struct list_head buds_list;
732 unsigned int grouped:1;
731}; 733};
732 734
733/** 735/**
@@ -1742,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
1742int ubifs_recover_master_node(struct ubifs_info *c); 1744int ubifs_recover_master_node(struct ubifs_info *c);
1743int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); 1745int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
1744struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 1746struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1745 int offs, void *sbuf, int grouped); 1747 int offs, void *sbuf, int jhead);
1746struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 1748struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1747 int offs, void *sbuf); 1749 int offs, void *sbuf);
1748int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); 1750int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba1ff85..7f782af286bf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
131{ 131{
132 struct inode *inode = file->f_mapping->host; 132 struct inode *inode = file->f_mapping->host;
133 struct xfs_inode *ip = XFS_I(inode); 133 struct xfs_inode *ip = XFS_I(inode);
134 struct xfs_mount *mp = ip->i_mount;
134 struct xfs_trans *tp; 135 struct xfs_trans *tp;
135 int error = 0; 136 int error = 0;
136 int log_flushed = 0; 137 int log_flushed = 0;
137 138
138 trace_xfs_file_fsync(ip); 139 trace_xfs_file_fsync(ip);
139 140
140 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 141 if (XFS_FORCED_SHUTDOWN(mp))
141 return -XFS_ERROR(EIO); 142 return -XFS_ERROR(EIO);
142 143
143 xfs_iflags_clear(ip, XFS_ITRUNCATED); 144 xfs_iflags_clear(ip, XFS_ITRUNCATED);
144 145
145 xfs_ioend_wait(ip); 146 xfs_ioend_wait(ip);
146 147
148 if (mp->m_flags & XFS_MOUNT_BARRIER) {
149 /*
150 * If we have an RT and/or log subvolume we need to make sure
151 * to flush the write cache the device used for file data
152 * first. This is to ensure newly written file data make
153 * it to disk before logging the new inode size in case of
154 * an extending write.
155 */
156 if (XFS_IS_REALTIME_INODE(ip))
157 xfs_blkdev_issue_flush(mp->m_rtdev_targp);
158 else if (mp->m_logdev_targp != mp->m_ddev_targp)
159 xfs_blkdev_issue_flush(mp->m_ddev_targp);
160 }
161
147 /* 162 /*
148 * We always need to make sure that the required inode state is safe on 163 * We always need to make sure that the required inode state is safe on
149 * disk. The inode might be clean but we still might need to force the 164 * disk. The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
175 * updates. The sync transaction will also force the log. 190 * updates. The sync transaction will also force the log.
176 */ 191 */
177 xfs_iunlock(ip, XFS_ILOCK_SHARED); 192 xfs_iunlock(ip, XFS_ILOCK_SHARED);
178 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 193 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
179 error = xfs_trans_reserve(tp, 0, 194 error = xfs_trans_reserve(tp, 0,
180 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); 195 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
181 if (error) { 196 if (error) {
182 xfs_trans_cancel(tp, 0); 197 xfs_trans_cancel(tp, 0);
183 return -error; 198 return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
209 * force the log. 224 * force the log.
210 */ 225 */
211 if (xfs_ipincount(ip)) { 226 if (xfs_ipincount(ip)) {
212 error = _xfs_log_force_lsn(ip->i_mount, 227 error = _xfs_log_force_lsn(mp,
213 ip->i_itemp->ili_last_lsn, 228 ip->i_itemp->ili_last_lsn,
214 XFS_LOG_SYNC, &log_flushed); 229 XFS_LOG_SYNC, &log_flushed);
215 } 230 }
216 xfs_iunlock(ip, XFS_ILOCK_SHARED); 231 xfs_iunlock(ip, XFS_ILOCK_SHARED);
217 } 232 }
218 233
219 if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { 234 /*
220 /* 235 * If we only have a single device, and the log force about was
221 * If the log write didn't issue an ordered tag we need 236 * a no-op we might have to flush the data device cache here.
222 * to flush the disk cache for the data device now. 237 * This can only happen for fdatasync/O_DSYNC if we were overwriting
223 */ 238 * an already allocated file and thus do not have any metadata to
224 if (!log_flushed) 239 * commit.
225 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 240 */
226 241 if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
227 /* 242 mp->m_logdev_targp == mp->m_ddev_targp &&
228 * If this inode is on the RT dev we need to flush that 243 !XFS_IS_REALTIME_INODE(ip) &&
229 * cache as well. 244 !log_flushed)
230 */ 245 xfs_blkdev_issue_flush(mp->m_ddev_targp);
231 if (XFS_IS_REALTIME_INODE(ip))
232 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
233 }
234 246
235 return -error; 247 return -error;
236} 248}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index dd21784525a8..d44d92cd12b1 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -182,7 +182,7 @@ xfs_vn_mknod(
182 if (IS_POSIXACL(dir)) { 182 if (IS_POSIXACL(dir)) {
183 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); 183 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
184 if (IS_ERR(default_acl)) 184 if (IS_ERR(default_acl))
185 return -PTR_ERR(default_acl); 185 return PTR_ERR(default_acl);
186 186
187 if (!default_acl) 187 if (!default_acl)
188 mode &= ~current_umask(); 188 mode &= ~current_umask();
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce804dc..a1a881e68a9a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
627 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 627 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
628} 628}
629 629
630/*
631 * Try to write out the superblock using barriers.
632 */
633STATIC int
634xfs_barrier_test(
635 xfs_mount_t *mp)
636{
637 xfs_buf_t *sbp = xfs_getsb(mp, 0);
638 int error;
639
640 XFS_BUF_UNDONE(sbp);
641 XFS_BUF_UNREAD(sbp);
642 XFS_BUF_UNDELAYWRITE(sbp);
643 XFS_BUF_WRITE(sbp);
644 XFS_BUF_UNASYNC(sbp);
645 XFS_BUF_ORDERED(sbp);
646
647 xfsbdstrat(mp, sbp);
648 error = xfs_buf_iowait(sbp);
649
650 /*
651 * Clear all the flags we set and possible error state in the
652 * buffer. We only did the write to try out whether barriers
653 * worked and shouldn't leave any traces in the superblock
654 * buffer.
655 */
656 XFS_BUF_DONE(sbp);
657 XFS_BUF_ERROR(sbp, 0);
658 XFS_BUF_UNORDERED(sbp);
659
660 xfs_buf_relse(sbp);
661 return error;
662}
663
664STATIC void
665xfs_mountfs_check_barriers(xfs_mount_t *mp)
666{
667 int error;
668
669 if (mp->m_logdev_targp != mp->m_ddev_targp) {
670 xfs_notice(mp,
671 "Disabling barriers, not supported with external log device");
672 mp->m_flags &= ~XFS_MOUNT_BARRIER;
673 return;
674 }
675
676 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
677 xfs_notice(mp,
678 "Disabling barriers, underlying device is readonly");
679 mp->m_flags &= ~XFS_MOUNT_BARRIER;
680 return;
681 }
682
683 error = xfs_barrier_test(mp);
684 if (error) {
685 xfs_notice(mp,
686 "Disabling barriers, trial barrier write failed");
687 mp->m_flags &= ~XFS_MOUNT_BARRIER;
688 return;
689 }
690}
691
692void 630void
693xfs_blkdev_issue_flush( 631xfs_blkdev_issue_flush(
694 xfs_buftarg_t *buftarg) 632 xfs_buftarg_t *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
1240 switch (token) { 1178 switch (token) {
1241 case Opt_barrier: 1179 case Opt_barrier:
1242 mp->m_flags |= XFS_MOUNT_BARRIER; 1180 mp->m_flags |= XFS_MOUNT_BARRIER;
1243
1244 /*
1245 * Test if barriers are actually working if we can,
1246 * else delay this check until the filesystem is
1247 * marked writeable.
1248 */
1249 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1250 xfs_mountfs_check_barriers(mp);
1251 break; 1181 break;
1252 case Opt_nobarrier: 1182 case Opt_nobarrier:
1253 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1183 mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
1282 /* ro -> rw */ 1212 /* ro -> rw */
1283 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { 1213 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1284 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1214 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1285 if (mp->m_flags & XFS_MOUNT_BARRIER)
1286 xfs_mountfs_check_barriers(mp);
1287 1215
1288 /* 1216 /*
1289 * If this is the first remount to writeable state we 1217 * If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
1465 if (error) 1393 if (error)
1466 goto out_free_sb; 1394 goto out_free_sb;
1467 1395
1468 if (mp->m_flags & XFS_MOUNT_BARRIER)
1469 xfs_mountfs_check_barriers(mp);
1470
1471 error = xfs_filestream_mount(mp); 1396 error = xfs_filestream_mount(mp);
1472 if (error) 1397 if (error)
1473 goto out_free_sb; 1398 goto out_free_sb;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 211930246f20..41d5b8f2bf92 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
1372 XFS_BUF_ASYNC(bp); 1372 XFS_BUF_ASYNC(bp);
1373 bp->b_flags |= XBF_LOG_BUFFER; 1373 bp->b_flags |= XBF_LOG_BUFFER;
1374 1374
1375 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1375 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
1376 /*
1377 * If we have an external log device, flush the data device
1378 * before flushing the log to make sure all meta data
1379 * written back from the AIL actually made it to disk
1380 * before writing out the new log tail LSN in the log buffer.
1381 */
1382 if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
1383 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
1376 XFS_BUF_ORDERED(bp); 1384 XFS_BUF_ORDERED(bp);
1385 }
1377 1386
1378 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1387 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1379 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1388 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);