aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@suse.de>2011-11-26 22:46:19 -0500
committerGreg Kroah-Hartman <gregkh@suse.de>2011-11-26 22:46:48 -0500
commit47b649590dbbea182f854d6470ee1cd59b7b7684 (patch)
treea2de3610f9c5e963234cffb35b9d559aab62e164 /fs
parent3af5154a869bc278a829bb03e65a709480e821b0 (diff)
parentcaca6a03d365883564885f2c1da3e88dcf65d139 (diff)
Merge 3.2-rc3 into usb-linus
This pulls in the latest USB bugfixes and helps a few of the drivers merge nicer in the future due to changes in both branches. Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c7
-rw-r--r--fs/btrfs/backref.c2
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-inode.c58
-rw-r--r--fs/btrfs/disk-io.c189
-rw-r--r--fs/btrfs/extent-tree.c169
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c80
-rw-r--r--fs/btrfs/inode-map.c28
-rw-r--r--fs/btrfs/inode.c90
-rw-r--r--fs/btrfs/ioctl.c15
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c66
-rw-r--r--fs/btrfs/super.c87
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/btrfs/volumes.c5
-rw-r--r--fs/btrfs/volumes.h6
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c9
-rw-r--r--fs/ceph/super.c6
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/ecryptfs/crypto.c26
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h5
-rw-r--r--fs/ecryptfs/file.c23
-rw-r--r--fs/ecryptfs/inode.c52
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/hfs/trans.c2
-rw-r--r--fs/minix/bitmap.c55
-rw-r--r--fs/minix/inode.c25
-rw-r--r--fs/minix/minix.h11
-rw-r--r--fs/namespace.c32
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/file.c91
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pnfs.c26
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c14
-rw-r--r--fs/nfs/super.c37
-rw-r--r--fs/proc/base.c146
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_dquot_item.c6
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_qm.c3
-rw-r--r--fs/xfs/xfs_trans.h6
-rw-r--r--fs/xfs/xfs_vnodeops.c14
57 files changed, 903 insertions, 587 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 41c93c722244..b1fe82cf88cf 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -337,7 +337,7 @@ static void bio_fs_destructor(struct bio *bio)
337 * RETURNS: 337 * RETURNS:
338 * Pointer to new bio on success, NULL on failure. 338 * Pointer to new bio on success, NULL on failure.
339 */ 339 */
340struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) 340struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
341{ 341{
342 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); 342 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
343 343
@@ -365,7 +365,7 @@ static void bio_kmalloc_destructor(struct bio *bio)
365 * %__GFP_WAIT, the allocation is guaranteed to succeed. 365 * %__GFP_WAIT, the allocation is guaranteed to succeed.
366 * 366 *
367 **/ 367 **/
368struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) 368struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
369{ 369{
370 struct bio *bio; 370 struct bio *bio;
371 371
@@ -696,7 +696,8 @@ static void bio_free_map_data(struct bio_map_data *bmd)
696 kfree(bmd); 696 kfree(bmd);
697} 697}
698 698
699static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, 699static struct bio_map_data *bio_alloc_map_data(int nr_segs,
700 unsigned int iov_count,
700 gfp_t gfp_mask) 701 gfp_t gfp_mask)
701{ 702{
702 struct bio_map_data *bmd; 703 struct bio_map_data *bmd;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8855aad3929c..22c64fff1bd5 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -683,7 +683,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
683 return PTR_ERR(fspath); 683 return PTR_ERR(fspath);
684 684
685 if (fspath > fspath_min) { 685 if (fspath > fspath_min) {
686 ipath->fspath->val[i] = (u64)fspath; 686 ipath->fspath->val[i] = (u64)(unsigned long)fspath;
687 ++ipath->fspath->elem_cnt; 687 ++ipath->fspath->elem_cnt;
688 ipath->fspath->bytes_left = fspath - fspath_min; 688 ipath->fspath->bytes_left = fspath - fspath_min;
689 } else { 689 } else {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 5a5d325a3935..634608d2a6d0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -147,14 +147,12 @@ struct btrfs_inode {
147 * the btrfs file release call will add this inode to the 147 * the btrfs file release call will add this inode to the
148 * ordered operations list so that we make sure to flush out any 148 * ordered operations list so that we make sure to flush out any
149 * new data the application may have written before commit. 149 * new data the application may have written before commit.
150 *
151 * yes, its silly to have a single bitflag, but we might grow more
152 * of these.
153 */ 150 */
154 unsigned ordered_data_close:1; 151 unsigned ordered_data_close:1;
155 unsigned orphan_meta_reserved:1; 152 unsigned orphan_meta_reserved:1;
156 unsigned dummy_inode:1; 153 unsigned dummy_inode:1;
157 unsigned in_defrag:1; 154 unsigned in_defrag:1;
155 unsigned delalloc_meta_reserved:1;
158 156
159 /* 157 /*
160 * always compress this one file 158 * always compress this one file
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0fe615e4ea38..dede441bdeee 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -514,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
514 struct btrfs_root *root, 514 struct btrfs_root *root,
515 struct extent_buffer *buf) 515 struct extent_buffer *buf)
516{ 516{
517 /* ensure we can see the force_cow */
518 smp_rmb();
519
520 /*
521 * We do not need to cow a block if
522 * 1) this block is not created or changed in this transaction;
523 * 2) this block does not belong to TREE_RELOC tree;
524 * 3) the root is not forced COW.
525 *
526 * What is forced COW:
527 * when we create snapshot during commiting the transaction,
528 * after we've finished coping src root, we must COW the shared
529 * block to ensure the metadata consistency.
530 */
517 if (btrfs_header_generation(buf) == trans->transid && 531 if (btrfs_header_generation(buf) == trans->transid &&
518 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 532 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
519 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 533 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
520 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 534 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
535 !root->force_cow)
521 return 0; 536 return 0;
522 return 1; 537 return 1;
523} 538}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9ba59ff9292..04a5dfcee5a1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -848,7 +848,8 @@ struct btrfs_free_cluster {
848enum btrfs_caching_type { 848enum btrfs_caching_type {
849 BTRFS_CACHE_NO = 0, 849 BTRFS_CACHE_NO = 0,
850 BTRFS_CACHE_STARTED = 1, 850 BTRFS_CACHE_STARTED = 1,
851 BTRFS_CACHE_FINISHED = 2, 851 BTRFS_CACHE_FAST = 2,
852 BTRFS_CACHE_FINISHED = 3,
852}; 853};
853 854
854enum btrfs_disk_cache_state { 855enum btrfs_disk_cache_state {
@@ -1271,6 +1272,8 @@ struct btrfs_root {
1271 * for stat. It may be used for more later 1272 * for stat. It may be used for more later
1272 */ 1273 */
1273 dev_t anon_dev; 1274 dev_t anon_dev;
1275
1276 int force_cow;
1274}; 1277};
1275 1278
1276struct btrfs_ioctl_defrag_range_args { 1279struct btrfs_ioctl_defrag_range_args {
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 3a1b939c9ae2..5b163572e0ca 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -617,12 +617,14 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
617static int btrfs_delayed_inode_reserve_metadata( 617static int btrfs_delayed_inode_reserve_metadata(
618 struct btrfs_trans_handle *trans, 618 struct btrfs_trans_handle *trans,
619 struct btrfs_root *root, 619 struct btrfs_root *root,
620 struct inode *inode,
620 struct btrfs_delayed_node *node) 621 struct btrfs_delayed_node *node)
621{ 622{
622 struct btrfs_block_rsv *src_rsv; 623 struct btrfs_block_rsv *src_rsv;
623 struct btrfs_block_rsv *dst_rsv; 624 struct btrfs_block_rsv *dst_rsv;
624 u64 num_bytes; 625 u64 num_bytes;
625 int ret; 626 int ret;
627 int release = false;
626 628
627 src_rsv = trans->block_rsv; 629 src_rsv = trans->block_rsv;
628 dst_rsv = &root->fs_info->delayed_block_rsv; 630 dst_rsv = &root->fs_info->delayed_block_rsv;
@@ -652,12 +654,65 @@ static int btrfs_delayed_inode_reserve_metadata(
652 if (!ret) 654 if (!ret)
653 node->bytes_reserved = num_bytes; 655 node->bytes_reserved = num_bytes;
654 return ret; 656 return ret;
657 } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
658 spin_lock(&BTRFS_I(inode)->lock);
659 if (BTRFS_I(inode)->delalloc_meta_reserved) {
660 BTRFS_I(inode)->delalloc_meta_reserved = 0;
661 spin_unlock(&BTRFS_I(inode)->lock);
662 release = true;
663 goto migrate;
664 }
665 spin_unlock(&BTRFS_I(inode)->lock);
666
667 /* Ok we didn't have space pre-reserved. This shouldn't happen
668 * too often but it can happen if we do delalloc to an existing
669 * inode which gets dirtied because of the time update, and then
670 * isn't touched again until after the transaction commits and
671 * then we try to write out the data. First try to be nice and
672 * reserve something strictly for us. If not be a pain and try
673 * to steal from the delalloc block rsv.
674 */
675 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
676 if (!ret)
677 goto out;
678
679 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
680 if (!ret)
681 goto out;
682
683 /*
684 * Ok this is a problem, let's just steal from the global rsv
685 * since this really shouldn't happen that often.
686 */
687 WARN_ON(1);
688 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
689 dst_rsv, num_bytes);
690 goto out;
655 } 691 }
656 692
693migrate:
657 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 694 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
695
696out:
697 /*
698 * Migrate only takes a reservation, it doesn't touch the size of the
699 * block_rsv. This is to simplify people who don't normally have things
700 * migrated from their block rsv. If they go to release their
701 * reservation, that will decrease the size as well, so if migrate
702 * reduced size we'd end up with a negative size. But for the
703 * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
704 * but we could in fact do this reserve/migrate dance several times
705 * between the time we did the original reservation and we'd clean it
706 * up. So to take care of this, release the space for the meta
707 * reservation here. I think it may be time for a documentation page on
708 * how block rsvs. work.
709 */
658 if (!ret) 710 if (!ret)
659 node->bytes_reserved = num_bytes; 711 node->bytes_reserved = num_bytes;
660 712
713 if (release)
714 btrfs_block_rsv_release(root, src_rsv, num_bytes);
715
661 return ret; 716 return ret;
662} 717}
663 718
@@ -1708,7 +1763,8 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1708 goto release_node; 1763 goto release_node;
1709 } 1764 }
1710 1765
1711 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); 1766 ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1767 delayed_node);
1712 if (ret) 1768 if (ret)
1713 goto release_node; 1769 goto release_node;
1714 1770
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 102c176fc29c..632f8f3cc9db 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -620,7 +620,7 @@ out:
620 620
621static int btree_io_failed_hook(struct bio *failed_bio, 621static int btree_io_failed_hook(struct bio *failed_bio,
622 struct page *page, u64 start, u64 end, 622 struct page *page, u64 start, u64 end,
623 u64 mirror_num, struct extent_state *state) 623 int mirror_num, struct extent_state *state)
624{ 624{
625 struct extent_io_tree *tree; 625 struct extent_io_tree *tree;
626 unsigned long len; 626 unsigned long len;
@@ -1890,31 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1890 u64 features; 1890 u64 features;
1891 struct btrfs_key location; 1891 struct btrfs_key location;
1892 struct buffer_head *bh; 1892 struct buffer_head *bh;
1893 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), 1893 struct btrfs_super_block *disk_super;
1894 GFP_NOFS);
1895 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1896 GFP_NOFS);
1897 struct btrfs_root *tree_root = btrfs_sb(sb); 1894 struct btrfs_root *tree_root = btrfs_sb(sb);
1898 struct btrfs_fs_info *fs_info = NULL; 1895 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1899 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1896 struct btrfs_root *extent_root;
1900 GFP_NOFS); 1897 struct btrfs_root *csum_root;
1901 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1898 struct btrfs_root *chunk_root;
1902 GFP_NOFS); 1899 struct btrfs_root *dev_root;
1903 struct btrfs_root *log_tree_root; 1900 struct btrfs_root *log_tree_root;
1904
1905 int ret; 1901 int ret;
1906 int err = -EINVAL; 1902 int err = -EINVAL;
1907 int num_backups_tried = 0; 1903 int num_backups_tried = 0;
1908 int backup_index = 0; 1904 int backup_index = 0;
1909 1905
1910 struct btrfs_super_block *disk_super; 1906 extent_root = fs_info->extent_root =
1907 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1908 csum_root = fs_info->csum_root =
1909 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1910 chunk_root = fs_info->chunk_root =
1911 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1912 dev_root = fs_info->dev_root =
1913 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1911 1914
1912 if (!extent_root || !tree_root || !tree_root->fs_info || 1915 if (!extent_root || !csum_root || !chunk_root || !dev_root) {
1913 !chunk_root || !dev_root || !csum_root) {
1914 err = -ENOMEM; 1916 err = -ENOMEM;
1915 goto fail; 1917 goto fail;
1916 } 1918 }
1917 fs_info = tree_root->fs_info;
1918 1919
1919 ret = init_srcu_struct(&fs_info->subvol_srcu); 1920 ret = init_srcu_struct(&fs_info->subvol_srcu);
1920 if (ret) { 1921 if (ret) {
@@ -1954,12 +1955,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1954 mutex_init(&fs_info->reloc_mutex); 1955 mutex_init(&fs_info->reloc_mutex);
1955 1956
1956 init_completion(&fs_info->kobj_unregister); 1957 init_completion(&fs_info->kobj_unregister);
1957 fs_info->tree_root = tree_root;
1958 fs_info->extent_root = extent_root;
1959 fs_info->csum_root = csum_root;
1960 fs_info->chunk_root = chunk_root;
1961 fs_info->dev_root = dev_root;
1962 fs_info->fs_devices = fs_devices;
1963 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 1958 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
1964 INIT_LIST_HEAD(&fs_info->space_info); 1959 INIT_LIST_HEAD(&fs_info->space_info);
1965 btrfs_mapping_init(&fs_info->mapping_tree); 1960 btrfs_mapping_init(&fs_info->mapping_tree);
@@ -2465,21 +2460,20 @@ fail_sb_buffer:
2465 btrfs_stop_workers(&fs_info->caching_workers); 2460 btrfs_stop_workers(&fs_info->caching_workers);
2466fail_alloc: 2461fail_alloc:
2467fail_iput: 2462fail_iput:
2463 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2464
2468 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2465 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2469 iput(fs_info->btree_inode); 2466 iput(fs_info->btree_inode);
2470
2471 btrfs_close_devices(fs_info->fs_devices);
2472 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2473fail_bdi: 2467fail_bdi:
2474 bdi_destroy(&fs_info->bdi); 2468 bdi_destroy(&fs_info->bdi);
2475fail_srcu: 2469fail_srcu:
2476 cleanup_srcu_struct(&fs_info->subvol_srcu); 2470 cleanup_srcu_struct(&fs_info->subvol_srcu);
2477fail: 2471fail:
2472 btrfs_close_devices(fs_info->fs_devices);
2478 free_fs_info(fs_info); 2473 free_fs_info(fs_info);
2479 return ERR_PTR(err); 2474 return ERR_PTR(err);
2480 2475
2481recovery_tree_root: 2476recovery_tree_root:
2482
2483 if (!btrfs_test_opt(tree_root, RECOVERY)) 2477 if (!btrfs_test_opt(tree_root, RECOVERY))
2484 goto fail_tree_roots; 2478 goto fail_tree_roots;
2485 2479
@@ -2579,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,
2579 int errors = 0; 2573 int errors = 0;
2580 u32 crc; 2574 u32 crc;
2581 u64 bytenr; 2575 u64 bytenr;
2582 int last_barrier = 0;
2583 2576
2584 if (max_mirrors == 0) 2577 if (max_mirrors == 0)
2585 max_mirrors = BTRFS_SUPER_MIRROR_MAX; 2578 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
2586 2579
2587 /* make sure only the last submit_bh does a barrier */
2588 if (do_barriers) {
2589 for (i = 0; i < max_mirrors; i++) {
2590 bytenr = btrfs_sb_offset(i);
2591 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
2592 device->total_bytes)
2593 break;
2594 last_barrier = i;
2595 }
2596 }
2597
2598 for (i = 0; i < max_mirrors; i++) { 2580 for (i = 0; i < max_mirrors; i++) {
2599 bytenr = btrfs_sb_offset(i); 2581 bytenr = btrfs_sb_offset(i);
2600 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2582 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2640,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,
2640 bh->b_end_io = btrfs_end_buffer_write_sync; 2622 bh->b_end_io = btrfs_end_buffer_write_sync;
2641 } 2623 }
2642 2624
2643 if (i == last_barrier && do_barriers) 2625 /*
2644 ret = submit_bh(WRITE_FLUSH_FUA, bh); 2626 * we fua the first super. The others we allow
2645 else 2627 * to go down lazy.
2646 ret = submit_bh(WRITE_SYNC, bh); 2628 */
2647 2629 ret = submit_bh(WRITE_FUA, bh);
2648 if (ret) 2630 if (ret)
2649 errors++; 2631 errors++;
2650 } 2632 }
2651 return errors < i ? 0 : -1; 2633 return errors < i ? 0 : -1;
2652} 2634}
2653 2635
2636/*
2637 * endio for the write_dev_flush, this will wake anyone waiting
2638 * for the barrier when it is done
2639 */
2640static void btrfs_end_empty_barrier(struct bio *bio, int err)
2641{
2642 if (err) {
2643 if (err == -EOPNOTSUPP)
2644 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2645 clear_bit(BIO_UPTODATE, &bio->bi_flags);
2646 }
2647 if (bio->bi_private)
2648 complete(bio->bi_private);
2649 bio_put(bio);
2650}
2651
2652/*
2653 * trigger flushes for one the devices. If you pass wait == 0, the flushes are
2654 * sent down. With wait == 1, it waits for the previous flush.
2655 *
2656 * any device where the flush fails with eopnotsupp are flagged as not-barrier
2657 * capable
2658 */
2659static int write_dev_flush(struct btrfs_device *device, int wait)
2660{
2661 struct bio *bio;
2662 int ret = 0;
2663
2664 if (device->nobarriers)
2665 return 0;
2666
2667 if (wait) {
2668 bio = device->flush_bio;
2669 if (!bio)
2670 return 0;
2671
2672 wait_for_completion(&device->flush_wait);
2673
2674 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2675 printk("btrfs: disabling barriers on dev %s\n",
2676 device->name);
2677 device->nobarriers = 1;
2678 }
2679 if (!bio_flagged(bio, BIO_UPTODATE)) {
2680 ret = -EIO;
2681 }
2682
2683 /* drop the reference from the wait == 0 run */
2684 bio_put(bio);
2685 device->flush_bio = NULL;
2686
2687 return ret;
2688 }
2689
2690 /*
2691 * one reference for us, and we leave it for the
2692 * caller
2693 */
2694 device->flush_bio = NULL;;
2695 bio = bio_alloc(GFP_NOFS, 0);
2696 if (!bio)
2697 return -ENOMEM;
2698
2699 bio->bi_end_io = btrfs_end_empty_barrier;
2700 bio->bi_bdev = device->bdev;
2701 init_completion(&device->flush_wait);
2702 bio->bi_private = &device->flush_wait;
2703 device->flush_bio = bio;
2704
2705 bio_get(bio);
2706 submit_bio(WRITE_FLUSH, bio);
2707
2708 return 0;
2709}
2710
2711/*
2712 * send an empty flush down to each device in parallel,
2713 * then wait for them
2714 */
2715static int barrier_all_devices(struct btrfs_fs_info *info)
2716{
2717 struct list_head *head;
2718 struct btrfs_device *dev;
2719 int errors = 0;
2720 int ret;
2721
2722 /* send down all the barriers */
2723 head = &info->fs_devices->devices;
2724 list_for_each_entry_rcu(dev, head, dev_list) {
2725 if (!dev->bdev) {
2726 errors++;
2727 continue;
2728 }
2729 if (!dev->in_fs_metadata || !dev->writeable)
2730 continue;
2731
2732 ret = write_dev_flush(dev, 0);
2733 if (ret)
2734 errors++;
2735 }
2736
2737 /* wait for all the barriers */
2738 list_for_each_entry_rcu(dev, head, dev_list) {
2739 if (!dev->bdev) {
2740 errors++;
2741 continue;
2742 }
2743 if (!dev->in_fs_metadata || !dev->writeable)
2744 continue;
2745
2746 ret = write_dev_flush(dev, 1);
2747 if (ret)
2748 errors++;
2749 }
2750 if (errors)
2751 return -EIO;
2752 return 0;
2753}
2754
2654int write_all_supers(struct btrfs_root *root, int max_mirrors) 2755int write_all_supers(struct btrfs_root *root, int max_mirrors)
2655{ 2756{
2656 struct list_head *head; 2757 struct list_head *head;
@@ -2672,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2672 2773
2673 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2774 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2674 head = &root->fs_info->fs_devices->devices; 2775 head = &root->fs_info->fs_devices->devices;
2776
2777 if (do_barriers)
2778 barrier_all_devices(root->fs_info);
2779
2675 list_for_each_entry_rcu(dev, head, dev_list) { 2780 list_for_each_entry_rcu(dev, head, dev_list) {
2676 if (!dev->bdev) { 2781 if (!dev->bdev) {
2677 total_errors++; 2782 total_errors++;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9879bd474632..930ae8949737 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
467 struct btrfs_root *root, 467 struct btrfs_root *root,
468 int load_cache_only) 468 int load_cache_only)
469{ 469{
470 DEFINE_WAIT(wait);
470 struct btrfs_fs_info *fs_info = cache->fs_info; 471 struct btrfs_fs_info *fs_info = cache->fs_info;
471 struct btrfs_caching_control *caching_ctl; 472 struct btrfs_caching_control *caching_ctl;
472 int ret = 0; 473 int ret = 0;
473 474
474 smp_mb(); 475 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
475 if (cache->cached != BTRFS_CACHE_NO) 476 BUG_ON(!caching_ctl);
477
478 INIT_LIST_HEAD(&caching_ctl->list);
479 mutex_init(&caching_ctl->mutex);
480 init_waitqueue_head(&caching_ctl->wait);
481 caching_ctl->block_group = cache;
482 caching_ctl->progress = cache->key.objectid;
483 atomic_set(&caching_ctl->count, 1);
484 caching_ctl->work.func = caching_thread;
485
486 spin_lock(&cache->lock);
487 /*
488 * This should be a rare occasion, but this could happen I think in the
489 * case where one thread starts to load the space cache info, and then
490 * some other thread starts a transaction commit which tries to do an
491 * allocation while the other thread is still loading the space cache
492 * info. The previous loop should have kept us from choosing this block
493 * group, but if we've moved to the state where we will wait on caching
494 * block groups we need to first check if we're doing a fast load here,
495 * so we can wait for it to finish, otherwise we could end up allocating
496 * from a block group who's cache gets evicted for one reason or
497 * another.
498 */
499 while (cache->cached == BTRFS_CACHE_FAST) {
500 struct btrfs_caching_control *ctl;
501
502 ctl = cache->caching_ctl;
503 atomic_inc(&ctl->count);
504 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
505 spin_unlock(&cache->lock);
506
507 schedule();
508
509 finish_wait(&ctl->wait, &wait);
510 put_caching_control(ctl);
511 spin_lock(&cache->lock);
512 }
513
514 if (cache->cached != BTRFS_CACHE_NO) {
515 spin_unlock(&cache->lock);
516 kfree(caching_ctl);
476 return 0; 517 return 0;
518 }
519 WARN_ON(cache->caching_ctl);
520 cache->caching_ctl = caching_ctl;
521 cache->cached = BTRFS_CACHE_FAST;
522 spin_unlock(&cache->lock);
477 523
478 /* 524 /*
479 * We can't do the read from on-disk cache during a commit since we need 525 * We can't do the read from on-disk cache during a commit since we need
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
484 if (trans && (!trans->transaction->in_commit) && 530 if (trans && (!trans->transaction->in_commit) &&
485 (root && root != root->fs_info->tree_root) && 531 (root && root != root->fs_info->tree_root) &&
486 btrfs_test_opt(root, SPACE_CACHE)) { 532 btrfs_test_opt(root, SPACE_CACHE)) {
487 spin_lock(&cache->lock);
488 if (cache->cached != BTRFS_CACHE_NO) {
489 spin_unlock(&cache->lock);
490 return 0;
491 }
492 cache->cached = BTRFS_CACHE_STARTED;
493 spin_unlock(&cache->lock);
494
495 ret = load_free_space_cache(fs_info, cache); 533 ret = load_free_space_cache(fs_info, cache);
496 534
497 spin_lock(&cache->lock); 535 spin_lock(&cache->lock);
498 if (ret == 1) { 536 if (ret == 1) {
537 cache->caching_ctl = NULL;
499 cache->cached = BTRFS_CACHE_FINISHED; 538 cache->cached = BTRFS_CACHE_FINISHED;
500 cache->last_byte_to_unpin = (u64)-1; 539 cache->last_byte_to_unpin = (u64)-1;
501 } else { 540 } else {
502 cache->cached = BTRFS_CACHE_NO; 541 if (load_cache_only) {
542 cache->caching_ctl = NULL;
543 cache->cached = BTRFS_CACHE_NO;
544 } else {
545 cache->cached = BTRFS_CACHE_STARTED;
546 }
503 } 547 }
504 spin_unlock(&cache->lock); 548 spin_unlock(&cache->lock);
549 wake_up(&caching_ctl->wait);
505 if (ret == 1) { 550 if (ret == 1) {
551 put_caching_control(caching_ctl);
506 free_excluded_extents(fs_info->extent_root, cache); 552 free_excluded_extents(fs_info->extent_root, cache);
507 return 0; 553 return 0;
508 } 554 }
555 } else {
556 /*
557 * We are not going to do the fast caching, set cached to the
558 * appropriate value and wakeup any waiters.
559 */
560 spin_lock(&cache->lock);
561 if (load_cache_only) {
562 cache->caching_ctl = NULL;
563 cache->cached = BTRFS_CACHE_NO;
564 } else {
565 cache->cached = BTRFS_CACHE_STARTED;
566 }
567 spin_unlock(&cache->lock);
568 wake_up(&caching_ctl->wait);
509 } 569 }
510 570
511 if (load_cache_only) 571 if (load_cache_only) {
512 return 0; 572 put_caching_control(caching_ctl);
513
514 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
515 BUG_ON(!caching_ctl);
516
517 INIT_LIST_HEAD(&caching_ctl->list);
518 mutex_init(&caching_ctl->mutex);
519 init_waitqueue_head(&caching_ctl->wait);
520 caching_ctl->block_group = cache;
521 caching_ctl->progress = cache->key.objectid;
522 /* one for caching kthread, one for caching block group list */
523 atomic_set(&caching_ctl->count, 2);
524 caching_ctl->work.func = caching_thread;
525
526 spin_lock(&cache->lock);
527 if (cache->cached != BTRFS_CACHE_NO) {
528 spin_unlock(&cache->lock);
529 kfree(caching_ctl);
530 return 0; 573 return 0;
531 } 574 }
532 cache->caching_ctl = caching_ctl;
533 cache->cached = BTRFS_CACHE_STARTED;
534 spin_unlock(&cache->lock);
535 575
536 down_write(&fs_info->extent_commit_sem); 576 down_write(&fs_info->extent_commit_sem);
577 atomic_inc(&caching_ctl->count);
537 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 578 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
538 up_write(&fs_info->extent_commit_sem); 579 up_write(&fs_info->extent_commit_sem);
539 580
@@ -3797,16 +3838,16 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
3797 kfree(rsv); 3838 kfree(rsv);
3798} 3839}
3799 3840
3800int btrfs_block_rsv_add(struct btrfs_root *root, 3841static inline int __block_rsv_add(struct btrfs_root *root,
3801 struct btrfs_block_rsv *block_rsv, 3842 struct btrfs_block_rsv *block_rsv,
3802 u64 num_bytes) 3843 u64 num_bytes, int flush)
3803{ 3844{
3804 int ret; 3845 int ret;
3805 3846
3806 if (num_bytes == 0) 3847 if (num_bytes == 0)
3807 return 0; 3848 return 0;
3808 3849
3809 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); 3850 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
3810 if (!ret) { 3851 if (!ret) {
3811 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3852 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3812 return 0; 3853 return 0;
@@ -3815,22 +3856,18 @@ int btrfs_block_rsv_add(struct btrfs_root *root,
3815 return ret; 3856 return ret;
3816} 3857}
3817 3858
3859int btrfs_block_rsv_add(struct btrfs_root *root,
3860 struct btrfs_block_rsv *block_rsv,
3861 u64 num_bytes)
3862{
3863 return __block_rsv_add(root, block_rsv, num_bytes, 1);
3864}
3865
3818int btrfs_block_rsv_add_noflush(struct btrfs_root *root, 3866int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
3819 struct btrfs_block_rsv *block_rsv, 3867 struct btrfs_block_rsv *block_rsv,
3820 u64 num_bytes) 3868 u64 num_bytes)
3821{ 3869{
3822 int ret; 3870 return __block_rsv_add(root, block_rsv, num_bytes, 0);
3823
3824 if (num_bytes == 0)
3825 return 0;
3826
3827 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0);
3828 if (!ret) {
3829 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3830 return 0;
3831 }
3832
3833 return ret;
3834} 3871}
3835 3872
3836int btrfs_block_rsv_check(struct btrfs_root *root, 3873int btrfs_block_rsv_check(struct btrfs_root *root,
@@ -4064,23 +4101,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4064 */ 4101 */
4065static unsigned drop_outstanding_extent(struct inode *inode) 4102static unsigned drop_outstanding_extent(struct inode *inode)
4066{ 4103{
4104 unsigned drop_inode_space = 0;
4067 unsigned dropped_extents = 0; 4105 unsigned dropped_extents = 0;
4068 4106
4069 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 4107 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
4070 BTRFS_I(inode)->outstanding_extents--; 4108 BTRFS_I(inode)->outstanding_extents--;
4071 4109
4110 if (BTRFS_I(inode)->outstanding_extents == 0 &&
4111 BTRFS_I(inode)->delalloc_meta_reserved) {
4112 drop_inode_space = 1;
4113 BTRFS_I(inode)->delalloc_meta_reserved = 0;
4114 }
4115
4072 /* 4116 /*
4073 * If we have more or the same amount of outsanding extents than we have 4117 * If we have more or the same amount of outsanding extents than we have
4074 * reserved then we need to leave the reserved extents count alone. 4118 * reserved then we need to leave the reserved extents count alone.
4075 */ 4119 */
4076 if (BTRFS_I(inode)->outstanding_extents >= 4120 if (BTRFS_I(inode)->outstanding_extents >=
4077 BTRFS_I(inode)->reserved_extents) 4121 BTRFS_I(inode)->reserved_extents)
4078 return 0; 4122 return drop_inode_space;
4079 4123
4080 dropped_extents = BTRFS_I(inode)->reserved_extents - 4124 dropped_extents = BTRFS_I(inode)->reserved_extents -
4081 BTRFS_I(inode)->outstanding_extents; 4125 BTRFS_I(inode)->outstanding_extents;
4082 BTRFS_I(inode)->reserved_extents -= dropped_extents; 4126 BTRFS_I(inode)->reserved_extents -= dropped_extents;
4083 return dropped_extents; 4127 return dropped_extents + drop_inode_space;
4084} 4128}
4085 4129
4086/** 4130/**
@@ -4166,9 +4210,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4166 nr_extents = BTRFS_I(inode)->outstanding_extents - 4210 nr_extents = BTRFS_I(inode)->outstanding_extents -
4167 BTRFS_I(inode)->reserved_extents; 4211 BTRFS_I(inode)->reserved_extents;
4168 BTRFS_I(inode)->reserved_extents += nr_extents; 4212 BTRFS_I(inode)->reserved_extents += nr_extents;
4213 }
4169 4214
4170 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4215 /*
4216 * Add an item to reserve for updating the inode when we complete the
4217 * delalloc io.
4218 */
4219 if (!BTRFS_I(inode)->delalloc_meta_reserved) {
4220 nr_extents++;
4221 BTRFS_I(inode)->delalloc_meta_reserved = 1;
4171 } 4222 }
4223
4224 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4172 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 4225 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4173 spin_unlock(&BTRFS_I(inode)->lock); 4226 spin_unlock(&BTRFS_I(inode)->lock);
4174 4227
@@ -5166,13 +5219,15 @@ search:
5166 } 5219 }
5167 5220
5168have_block_group: 5221have_block_group:
5169 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 5222 cached = block_group_cache_done(block_group);
5223 if (unlikely(!cached)) {
5170 u64 free_percent; 5224 u64 free_percent;
5171 5225
5226 found_uncached_bg = true;
5172 ret = cache_block_group(block_group, trans, 5227 ret = cache_block_group(block_group, trans,
5173 orig_root, 1); 5228 orig_root, 1);
5174 if (block_group->cached == BTRFS_CACHE_FINISHED) 5229 if (block_group->cached == BTRFS_CACHE_FINISHED)
5175 goto have_block_group; 5230 goto alloc;
5176 5231
5177 free_percent = btrfs_block_group_used(&block_group->item); 5232 free_percent = btrfs_block_group_used(&block_group->item);
5178 free_percent *= 100; 5233 free_percent *= 100;
@@ -5194,7 +5249,6 @@ have_block_group:
5194 orig_root, 0); 5249 orig_root, 0);
5195 BUG_ON(ret); 5250 BUG_ON(ret);
5196 } 5251 }
5197 found_uncached_bg = true;
5198 5252
5199 /* 5253 /*
5200 * If loop is set for cached only, try the next block 5254 * If loop is set for cached only, try the next block
@@ -5204,10 +5258,7 @@ have_block_group:
5204 goto loop; 5258 goto loop;
5205 } 5259 }
5206 5260
5207 cached = block_group_cache_done(block_group); 5261alloc:
5208 if (unlikely(!cached))
5209 found_uncached_bg = true;
5210
5211 if (unlikely(block_group->ro)) 5262 if (unlikely(block_group->ro))
5212 goto loop; 5263 goto loop;
5213 5264
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1f87c4d0e7a0..9472d3de5e52 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2285,8 +2285,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2285 clean_io_failure(start, page); 2285 clean_io_failure(start, page);
2286 } 2286 }
2287 if (!uptodate) { 2287 if (!uptodate) {
2288 u64 failed_mirror; 2288 int failed_mirror;
2289 failed_mirror = (u64)bio->bi_bdev; 2289 failed_mirror = (int)(unsigned long)bio->bi_bdev;
2290 if (tree->ops && tree->ops->readpage_io_failed_hook) 2290 if (tree->ops && tree->ops->readpage_io_failed_hook)
2291 ret = tree->ops->readpage_io_failed_hook( 2291 ret = tree->ops->readpage_io_failed_hook(
2292 bio, page, start, end, 2292 bio, page, start, end,
@@ -3366,6 +3366,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3366 return -ENOMEM; 3366 return -ENOMEM;
3367 path->leave_spinning = 1; 3367 path->leave_spinning = 1;
3368 3368
3369 start = ALIGN(start, BTRFS_I(inode)->root->sectorsize);
3370 len = ALIGN(len, BTRFS_I(inode)->root->sectorsize);
3371
3369 /* 3372 /*
3370 * lookup the last file extent. We're not using i_size here 3373 * lookup the last file extent. We're not using i_size here
3371 * because there might be preallocation past i_size 3374 * because there might be preallocation past i_size
@@ -3413,7 +3416,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3413 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3416 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
3414 &cached_state, GFP_NOFS); 3417 &cached_state, GFP_NOFS);
3415 3418
3416 em = get_extent_skip_holes(inode, off, last_for_get_extent, 3419 em = get_extent_skip_holes(inode, start, last_for_get_extent,
3417 get_extent); 3420 get_extent);
3418 if (!em) 3421 if (!em)
3419 goto out; 3422 goto out;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index feb9be0e23bc..7604c3001322 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -70,7 +70,7 @@ struct extent_io_ops {
70 unsigned long bio_flags); 70 unsigned long bio_flags);
71 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 71 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
72 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, 72 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
73 u64 start, u64 end, u64 failed_mirror, 73 u64 start, u64 end, int failed_mirror,
74 struct extent_state *state); 74 struct extent_state *state);
75 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, 75 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
76 u64 start, u64 end, 76 u64 start, u64 end,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7a15fcfb3e1f..6e5b7e463698 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -351,6 +351,11 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
351 } 351 }
352 } 352 }
353 353
354 for (i = 0; i < io_ctl->num_pages; i++) {
355 clear_page_dirty_for_io(io_ctl->pages[i]);
356 set_page_extent_mapped(io_ctl->pages[i]);
357 }
358
354 return 0; 359 return 0;
355} 360}
356 361
@@ -537,6 +542,13 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
537 struct btrfs_free_space *entry, u8 *type) 542 struct btrfs_free_space *entry, u8 *type)
538{ 543{
539 struct btrfs_free_space_entry *e; 544 struct btrfs_free_space_entry *e;
545 int ret;
546
547 if (!io_ctl->cur) {
548 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
549 if (ret)
550 return ret;
551 }
540 552
541 e = io_ctl->cur; 553 e = io_ctl->cur;
542 entry->offset = le64_to_cpu(e->offset); 554 entry->offset = le64_to_cpu(e->offset);
@@ -550,10 +562,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
550 562
551 io_ctl_unmap_page(io_ctl); 563 io_ctl_unmap_page(io_ctl);
552 564
553 if (io_ctl->index >= io_ctl->num_pages) 565 return 0;
554 return 0;
555
556 return io_ctl_check_crc(io_ctl, io_ctl->index);
557} 566}
558 567
559static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 568static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
@@ -561,9 +570,6 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
561{ 570{
562 int ret; 571 int ret;
563 572
564 if (io_ctl->cur && io_ctl->cur != io_ctl->orig)
565 io_ctl_unmap_page(io_ctl);
566
567 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 573 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
568 if (ret) 574 if (ret)
569 return ret; 575 return ret;
@@ -699,6 +705,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
699 num_entries--; 705 num_entries--;
700 } 706 }
701 707
708 io_ctl_unmap_page(&io_ctl);
709
702 /* 710 /*
703 * We add the bitmaps at the end of the entries in order that 711 * We add the bitmaps at the end of the entries in order that
704 * the bitmap entries are added to the cache. 712 * the bitmap entries are added to the cache.
@@ -1841,7 +1849,13 @@ again:
1841 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1849 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1842 1, 0); 1850 1, 0);
1843 if (!info) { 1851 if (!info) {
1844 WARN_ON(1); 1852 /* the tree logging code might be calling us before we
1853 * have fully loaded the free space rbtree for this
1854 * block group. So it is possible the entry won't
1855 * be in the rbtree yet at all. The caching code
1856 * will make sure not to put it in the rbtree if
1857 * the logging code has pinned it.
1858 */
1845 goto out_lock; 1859 goto out_lock;
1846 } 1860 }
1847 } 1861 }
@@ -2448,16 +2462,23 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2448{ 2462{
2449 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2463 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2450 struct btrfs_free_space *entry; 2464 struct btrfs_free_space *entry;
2451 struct rb_node *node;
2452 int ret = -ENOSPC; 2465 int ret = -ENOSPC;
2466 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2453 2467
2454 if (ctl->total_bitmaps == 0) 2468 if (ctl->total_bitmaps == 0)
2455 return -ENOSPC; 2469 return -ENOSPC;
2456 2470
2457 /* 2471 /*
2458 * First check our cached list of bitmaps and see if there is an entry 2472 * The bitmap that covers offset won't be in the list unless offset
2459 * here that will work. 2473 * is just its start offset.
2460 */ 2474 */
2475 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2476 if (entry->offset != bitmap_offset) {
2477 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2478 if (entry && list_empty(&entry->list))
2479 list_add(&entry->list, bitmaps);
2480 }
2481
2461 list_for_each_entry(entry, bitmaps, list) { 2482 list_for_each_entry(entry, bitmaps, list) {
2462 if (entry->bytes < min_bytes) 2483 if (entry->bytes < min_bytes)
2463 continue; 2484 continue;
@@ -2468,38 +2489,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2468 } 2489 }
2469 2490
2470 /* 2491 /*
2471 * If we do have entries on our list and we are here then we didn't find 2492 * The bitmaps list has all the bitmaps that record free space
2472 * anything, so go ahead and get the next entry after the last entry in 2493 * starting after offset, so no more search is required.
2473 * this list and start the search from there.
2474 */ 2494 */
2475 if (!list_empty(bitmaps)) { 2495 return -ENOSPC;
2476 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2477 list);
2478 node = rb_next(&entry->offset_index);
2479 if (!node)
2480 return -ENOSPC;
2481 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2482 goto search;
2483 }
2484
2485 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2486 if (!entry)
2487 return -ENOSPC;
2488
2489search:
2490 node = &entry->offset_index;
2491 do {
2492 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2493 node = rb_next(&entry->offset_index);
2494 if (!entry->bitmap)
2495 continue;
2496 if (entry->bytes < min_bytes)
2497 continue;
2498 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2499 bytes, min_bytes);
2500 } while (ret && node);
2501
2502 return ret;
2503} 2496}
2504 2497
2505/* 2498/*
@@ -2517,8 +2510,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2517 u64 offset, u64 bytes, u64 empty_size) 2510 u64 offset, u64 bytes, u64 empty_size)
2518{ 2511{
2519 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2512 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2520 struct list_head bitmaps;
2521 struct btrfs_free_space *entry, *tmp; 2513 struct btrfs_free_space *entry, *tmp;
2514 LIST_HEAD(bitmaps);
2522 u64 min_bytes; 2515 u64 min_bytes;
2523 int ret; 2516 int ret;
2524 2517
@@ -2557,7 +2550,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2557 goto out; 2550 goto out;
2558 } 2551 }
2559 2552
2560 INIT_LIST_HEAD(&bitmaps);
2561 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2553 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2562 bytes, min_bytes); 2554 bytes, min_bytes);
2563 if (ret) 2555 if (ret)
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 53dcbdf446cd..f8962a957d65 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -398,6 +398,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
398 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 398 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
399 struct btrfs_path *path; 399 struct btrfs_path *path;
400 struct inode *inode; 400 struct inode *inode;
401 struct btrfs_block_rsv *rsv;
402 u64 num_bytes;
401 u64 alloc_hint = 0; 403 u64 alloc_hint = 0;
402 int ret; 404 int ret;
403 int prealloc; 405 int prealloc;
@@ -421,11 +423,26 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
421 if (!path) 423 if (!path)
422 return -ENOMEM; 424 return -ENOMEM;
423 425
426 rsv = trans->block_rsv;
427 trans->block_rsv = &root->fs_info->trans_block_rsv;
428
429 num_bytes = trans->bytes_reserved;
430 /*
431 * 1 item for inode item insertion if need
432 * 3 items for inode item update (in the worst case)
433 * 1 item for free space object
434 * 3 items for pre-allocation
435 */
436 trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
437 ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
438 trans->bytes_reserved);
439 if (ret)
440 goto out;
424again: 441again:
425 inode = lookup_free_ino_inode(root, path); 442 inode = lookup_free_ino_inode(root, path);
426 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 443 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
427 ret = PTR_ERR(inode); 444 ret = PTR_ERR(inode);
428 goto out; 445 goto out_release;
429 } 446 }
430 447
431 if (IS_ERR(inode)) { 448 if (IS_ERR(inode)) {
@@ -434,7 +451,7 @@ again:
434 451
435 ret = create_free_ino_inode(root, trans, path); 452 ret = create_free_ino_inode(root, trans, path);
436 if (ret) 453 if (ret)
437 goto out; 454 goto out_release;
438 goto again; 455 goto again;
439 } 456 }
440 457
@@ -477,11 +494,14 @@ again:
477 } 494 }
478 btrfs_free_reserved_data_space(inode, prealloc); 495 btrfs_free_reserved_data_space(inode, prealloc);
479 496
497 ret = btrfs_write_out_ino_cache(root, trans, path);
480out_put: 498out_put:
481 iput(inode); 499 iput(inode);
500out_release:
501 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
482out: 502out:
483 if (ret == 0) 503 trans->block_rsv = rsv;
484 ret = btrfs_write_out_ino_cache(root, trans, path); 504 trans->bytes_reserved = num_bytes;
485 505
486 btrfs_free_path(path); 506 btrfs_free_path(path);
487 return ret; 507 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 966ddcc4c63d..526dd51a1966 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -93,6 +93,8 @@ static noinline int cow_file_range(struct inode *inode,
93 struct page *locked_page, 93 struct page *locked_page,
94 u64 start, u64 end, int *page_started, 94 u64 start, u64 end, int *page_started,
95 unsigned long *nr_written, int unlock); 95 unsigned long *nr_written, int unlock);
96static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root, struct inode *inode);
96 98
97static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 99static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct inode *dir, 100 struct inode *inode, struct inode *dir,
@@ -1741,7 +1743,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1741 trans = btrfs_join_transaction(root); 1743 trans = btrfs_join_transaction(root);
1742 BUG_ON(IS_ERR(trans)); 1744 BUG_ON(IS_ERR(trans));
1743 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1745 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1744 ret = btrfs_update_inode(trans, root, inode); 1746 ret = btrfs_update_inode_fallback(trans, root, inode);
1745 BUG_ON(ret); 1747 BUG_ON(ret);
1746 } 1748 }
1747 goto out; 1749 goto out;
@@ -1791,7 +1793,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1791 1793
1792 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1794 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1793 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1795 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1794 ret = btrfs_update_inode(trans, root, inode); 1796 ret = btrfs_update_inode_fallback(trans, root, inode);
1795 BUG_ON(ret); 1797 BUG_ON(ret);
1796 } 1798 }
1797 ret = 0; 1799 ret = 0;
@@ -2199,6 +2201,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2199 if (ret) 2201 if (ret)
2200 goto out; 2202 goto out;
2201 } 2203 }
2204 /* release the path since we're done with it */
2205 btrfs_release_path(path);
2206
2202 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; 2207 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2203 2208
2204 if (root->orphan_block_rsv) 2209 if (root->orphan_block_rsv)
@@ -2426,7 +2431,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2426/* 2431/*
2427 * copy everything in the in-memory inode into the btree. 2432 * copy everything in the in-memory inode into the btree.
2428 */ 2433 */
2429noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, 2434static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
2430 struct btrfs_root *root, struct inode *inode) 2435 struct btrfs_root *root, struct inode *inode)
2431{ 2436{
2432 struct btrfs_inode_item *inode_item; 2437 struct btrfs_inode_item *inode_item;
@@ -2434,21 +2439,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2434 struct extent_buffer *leaf; 2439 struct extent_buffer *leaf;
2435 int ret; 2440 int ret;
2436 2441
2437 /*
2438 * If the inode is a free space inode, we can deadlock during commit
2439 * if we put it into the delayed code.
2440 *
2441 * The data relocation inode should also be directly updated
2442 * without delay
2443 */
2444 if (!btrfs_is_free_space_inode(root, inode)
2445 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
2446 ret = btrfs_delayed_update_inode(trans, root, inode);
2447 if (!ret)
2448 btrfs_set_inode_last_trans(trans, inode);
2449 return ret;
2450 }
2451
2452 path = btrfs_alloc_path(); 2442 path = btrfs_alloc_path();
2453 if (!path) 2443 if (!path)
2454 return -ENOMEM; 2444 return -ENOMEM;
@@ -2477,6 +2467,43 @@ failed:
2477} 2467}
2478 2468
2479/* 2469/*
2470 * copy everything in the in-memory inode into the btree.
2471 */
2472noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2473 struct btrfs_root *root, struct inode *inode)
2474{
2475 int ret;
2476
2477 /*
2478 * If the inode is a free space inode, we can deadlock during commit
2479 * if we put it into the delayed code.
2480 *
2481 * The data relocation inode should also be directly updated
2482 * without delay
2483 */
2484 if (!btrfs_is_free_space_inode(root, inode)
2485 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
2486 ret = btrfs_delayed_update_inode(trans, root, inode);
2487 if (!ret)
2488 btrfs_set_inode_last_trans(trans, inode);
2489 return ret;
2490 }
2491
2492 return btrfs_update_inode_item(trans, root, inode);
2493}
2494
2495static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
2496 struct btrfs_root *root, struct inode *inode)
2497{
2498 int ret;
2499
2500 ret = btrfs_update_inode(trans, root, inode);
2501 if (ret == -ENOSPC)
2502 return btrfs_update_inode_item(trans, root, inode);
2503 return ret;
2504}
2505
2506/*
2480 * unlink helper that gets used here in inode.c and in the tree logging 2507 * unlink helper that gets used here in inode.c and in the tree logging
2481 * recovery code. It remove a link in a directory with a given name, and 2508 * recovery code. It remove a link in a directory with a given name, and
2482 * also drops the back refs in the inode to the directory 2509 * also drops the back refs in the inode to the directory
@@ -5632,7 +5659,7 @@ again:
5632 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { 5659 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5633 ret = btrfs_ordered_update_i_size(inode, 0, ordered); 5660 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5634 if (!ret) 5661 if (!ret)
5635 err = btrfs_update_inode(trans, root, inode); 5662 err = btrfs_update_inode_fallback(trans, root, inode);
5636 goto out; 5663 goto out;
5637 } 5664 }
5638 5665
@@ -5670,7 +5697,7 @@ again:
5670 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5697 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5671 ret = btrfs_ordered_update_i_size(inode, 0, ordered); 5698 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5672 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) 5699 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
5673 btrfs_update_inode(trans, root, inode); 5700 btrfs_update_inode_fallback(trans, root, inode);
5674 ret = 0; 5701 ret = 0;
5675out_unlock: 5702out_unlock:
5676 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5703 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
@@ -6529,14 +6556,16 @@ end_trans:
6529 ret = btrfs_orphan_del(NULL, inode); 6556 ret = btrfs_orphan_del(NULL, inode);
6530 } 6557 }
6531 6558
6532 trans->block_rsv = &root->fs_info->trans_block_rsv; 6559 if (trans) {
6533 ret = btrfs_update_inode(trans, root, inode); 6560 trans->block_rsv = &root->fs_info->trans_block_rsv;
6534 if (ret && !err) 6561 ret = btrfs_update_inode(trans, root, inode);
6535 err = ret; 6562 if (ret && !err)
6563 err = ret;
6536 6564
6537 nr = trans->blocks_used; 6565 nr = trans->blocks_used;
6538 ret = btrfs_end_transaction_throttle(trans, root); 6566 ret = btrfs_end_transaction_throttle(trans, root);
6539 btrfs_btree_balance_dirty(root, nr); 6567 btrfs_btree_balance_dirty(root, nr);
6568 }
6540 6569
6541out: 6570out:
6542 btrfs_free_block_rsv(root, rsv); 6571 btrfs_free_block_rsv(root, rsv);
@@ -6605,6 +6634,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6605 ei->orphan_meta_reserved = 0; 6634 ei->orphan_meta_reserved = 0;
6606 ei->dummy_inode = 0; 6635 ei->dummy_inode = 0;
6607 ei->in_defrag = 0; 6636 ei->in_defrag = 0;
6637 ei->delalloc_meta_reserved = 0;
6608 ei->force_compress = BTRFS_COMPRESS_NONE; 6638 ei->force_compress = BTRFS_COMPRESS_NONE;
6609 6639
6610 ei->delayed_node = NULL; 6640 ei->delayed_node = NULL;
@@ -6764,11 +6794,13 @@ static int btrfs_getattr(struct vfsmount *mnt,
6764 struct dentry *dentry, struct kstat *stat) 6794 struct dentry *dentry, struct kstat *stat)
6765{ 6795{
6766 struct inode *inode = dentry->d_inode; 6796 struct inode *inode = dentry->d_inode;
6797 u32 blocksize = inode->i_sb->s_blocksize;
6798
6767 generic_fillattr(inode, stat); 6799 generic_fillattr(inode, stat);
6768 stat->dev = BTRFS_I(inode)->root->anon_dev; 6800 stat->dev = BTRFS_I(inode)->root->anon_dev;
6769 stat->blksize = PAGE_CACHE_SIZE; 6801 stat->blksize = PAGE_CACHE_SIZE;
6770 stat->blocks = (inode_get_bytes(inode) + 6802 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
6771 BTRFS_I(inode)->delalloc_bytes) >> 9; 6803 ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
6772 return 0; 6804 return 0;
6773} 6805}
6774 6806
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4a34c472f126..a90e749ed6d2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1216,12 +1216,12 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1216 *devstr = '\0'; 1216 *devstr = '\0';
1217 devstr = vol_args->name; 1217 devstr = vol_args->name;
1218 devid = simple_strtoull(devstr, &end, 10); 1218 devid = simple_strtoull(devstr, &end, 10);
1219 printk(KERN_INFO "resizing devid %llu\n", 1219 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1220 (unsigned long long)devid); 1220 (unsigned long long)devid);
1221 } 1221 }
1222 device = btrfs_find_device(root, devid, NULL, NULL); 1222 device = btrfs_find_device(root, devid, NULL, NULL);
1223 if (!device) { 1223 if (!device) {
1224 printk(KERN_INFO "resizer unable to find device %llu\n", 1224 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1225 (unsigned long long)devid); 1225 (unsigned long long)devid);
1226 ret = -EINVAL; 1226 ret = -EINVAL;
1227 goto out_unlock; 1227 goto out_unlock;
@@ -1267,7 +1267,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1267 do_div(new_size, root->sectorsize); 1267 do_div(new_size, root->sectorsize);
1268 new_size *= root->sectorsize; 1268 new_size *= root->sectorsize;
1269 1269
1270 printk(KERN_INFO "new size for %s is %llu\n", 1270 printk(KERN_INFO "btrfs: new size for %s is %llu\n",
1271 device->name, (unsigned long long)new_size); 1271 device->name, (unsigned long long)new_size);
1272 1272
1273 if (new_size > old_size) { 1273 if (new_size > old_size) {
@@ -2930,11 +2930,13 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
2930 goto out; 2930 goto out;
2931 2931
2932 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 2932 for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
2933 rel_ptr = ipath->fspath->val[i] - (u64)ipath->fspath->val; 2933 rel_ptr = ipath->fspath->val[i] -
2934 (u64)(unsigned long)ipath->fspath->val;
2934 ipath->fspath->val[i] = rel_ptr; 2935 ipath->fspath->val[i] = rel_ptr;
2935 } 2936 }
2936 2937
2937 ret = copy_to_user((void *)ipa->fspath, (void *)ipath->fspath, size); 2938 ret = copy_to_user((void *)(unsigned long)ipa->fspath,
2939 (void *)(unsigned long)ipath->fspath, size);
2938 if (ret) { 2940 if (ret) {
2939 ret = -EFAULT; 2941 ret = -EFAULT;
2940 goto out; 2942 goto out;
@@ -3017,7 +3019,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3017 if (ret < 0) 3019 if (ret < 0)
3018 goto out; 3020 goto out;
3019 3021
3020 ret = copy_to_user((void *)loi->inodes, (void *)inodes, size); 3022 ret = copy_to_user((void *)(unsigned long)loi->inodes,
3023 (void *)(unsigned long)inodes, size);
3021 if (ret) 3024 if (ret)
3022 ret = -EFAULT; 3025 ret = -EFAULT;
3023 3026
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 24d654ce7a06..dff29d5e151a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1174,6 +1174,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1174 list_add_tail(&new_edge->list[UPPER], 1174 list_add_tail(&new_edge->list[UPPER],
1175 &new_node->lower); 1175 &new_node->lower);
1176 } 1176 }
1177 } else {
1178 list_add_tail(&new_node->lower, &cache->leaves);
1177 } 1179 }
1178 1180
1179 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1181 rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ed11d3866afd..fab420db5121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -272,7 +272,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
272 swarn->logical, swarn->dev->name, 272 swarn->logical, swarn->dev->name,
273 (unsigned long long)swarn->sector, root, inum, offset, 273 (unsigned long long)swarn->sector, root, inum, offset,
274 min(isize - offset, (u64)PAGE_SIZE), nlink, 274 min(isize - offset, (u64)PAGE_SIZE), nlink,
275 (char *)ipath->fspath->val[i]); 275 (char *)(unsigned long)ipath->fspath->val[i]);
276 276
277 free_ipath(ipath); 277 free_ipath(ipath);
278 return 0; 278 return 0;
@@ -944,50 +944,18 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
944static int scrub_submit(struct scrub_dev *sdev) 944static int scrub_submit(struct scrub_dev *sdev)
945{ 945{
946 struct scrub_bio *sbio; 946 struct scrub_bio *sbio;
947 struct bio *bio;
948 int i;
949 947
950 if (sdev->curr == -1) 948 if (sdev->curr == -1)
951 return 0; 949 return 0;
952 950
953 sbio = sdev->bios[sdev->curr]; 951 sbio = sdev->bios[sdev->curr];
954
955 bio = bio_alloc(GFP_NOFS, sbio->count);
956 if (!bio)
957 goto nomem;
958
959 bio->bi_private = sbio;
960 bio->bi_end_io = scrub_bio_end_io;
961 bio->bi_bdev = sdev->dev->bdev;
962 bio->bi_sector = sbio->physical >> 9;
963
964 for (i = 0; i < sbio->count; ++i) {
965 struct page *page;
966 int ret;
967
968 page = alloc_page(GFP_NOFS);
969 if (!page)
970 goto nomem;
971
972 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
973 if (!ret) {
974 __free_page(page);
975 goto nomem;
976 }
977 }
978
979 sbio->err = 0; 952 sbio->err = 0;
980 sdev->curr = -1; 953 sdev->curr = -1;
981 atomic_inc(&sdev->in_flight); 954 atomic_inc(&sdev->in_flight);
982 955
983 submit_bio(READ, bio); 956 submit_bio(READ, sbio->bio);
984 957
985 return 0; 958 return 0;
986
987nomem:
988 scrub_free_bio(bio);
989
990 return -ENOMEM;
991} 959}
992 960
993static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 961static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -995,6 +963,8 @@ static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
995 u8 *csum, int force) 963 u8 *csum, int force)
996{ 964{
997 struct scrub_bio *sbio; 965 struct scrub_bio *sbio;
966 struct page *page;
967 int ret;
998 968
999again: 969again:
1000 /* 970 /*
@@ -1015,12 +985,22 @@ again:
1015 } 985 }
1016 sbio = sdev->bios[sdev->curr]; 986 sbio = sdev->bios[sdev->curr];
1017 if (sbio->count == 0) { 987 if (sbio->count == 0) {
988 struct bio *bio;
989
1018 sbio->physical = physical; 990 sbio->physical = physical;
1019 sbio->logical = logical; 991 sbio->logical = logical;
992 bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
993 if (!bio)
994 return -ENOMEM;
995
996 bio->bi_private = sbio;
997 bio->bi_end_io = scrub_bio_end_io;
998 bio->bi_bdev = sdev->dev->bdev;
999 bio->bi_sector = sbio->physical >> 9;
1000 sbio->err = 0;
1001 sbio->bio = bio;
1020 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 1002 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
1021 sbio->logical + sbio->count * PAGE_SIZE != logical) { 1003 sbio->logical + sbio->count * PAGE_SIZE != logical) {
1022 int ret;
1023
1024 ret = scrub_submit(sdev); 1004 ret = scrub_submit(sdev);
1025 if (ret) 1005 if (ret)
1026 return ret; 1006 return ret;
@@ -1030,6 +1010,20 @@ again:
1030 sbio->spag[sbio->count].generation = gen; 1010 sbio->spag[sbio->count].generation = gen;
1031 sbio->spag[sbio->count].have_csum = 0; 1011 sbio->spag[sbio->count].have_csum = 0;
1032 sbio->spag[sbio->count].mirror_num = mirror_num; 1012 sbio->spag[sbio->count].mirror_num = mirror_num;
1013
1014 page = alloc_page(GFP_NOFS);
1015 if (!page)
1016 return -ENOMEM;
1017
1018 ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0);
1019 if (!ret) {
1020 __free_page(page);
1021 ret = scrub_submit(sdev);
1022 if (ret)
1023 return ret;
1024 goto again;
1025 }
1026
1033 if (csum) { 1027 if (csum) {
1034 sbio->spag[sbio->count].have_csum = 1; 1028 sbio->spag[sbio->count].have_csum = 1;
1035 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 1029 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 57080dffdfc6..17ee7fc5e64e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -197,7 +197,7 @@ static match_table_t tokens = {
197 {Opt_subvolrootid, "subvolrootid=%d"}, 197 {Opt_subvolrootid, "subvolrootid=%d"},
198 {Opt_defrag, "autodefrag"}, 198 {Opt_defrag, "autodefrag"},
199 {Opt_inode_cache, "inode_cache"}, 199 {Opt_inode_cache, "inode_cache"},
200 {Opt_no_space_cache, "no_space_cache"}, 200 {Opt_no_space_cache, "nospace_cache"},
201 {Opt_recovery, "recovery"}, 201 {Opt_recovery, "recovery"},
202 {Opt_err, NULL}, 202 {Opt_err, NULL},
203}; 203};
@@ -448,6 +448,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
448 token = match_token(p, tokens, args); 448 token = match_token(p, tokens, args);
449 switch (token) { 449 switch (token) {
450 case Opt_subvol: 450 case Opt_subvol:
451 kfree(*subvol_name);
451 *subvol_name = match_strdup(&args[0]); 452 *subvol_name = match_strdup(&args[0]);
452 break; 453 break;
453 case Opt_subvolid: 454 case Opt_subvolid:
@@ -710,7 +711,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
710 if (btrfs_test_opt(root, SPACE_CACHE)) 711 if (btrfs_test_opt(root, SPACE_CACHE))
711 seq_puts(seq, ",space_cache"); 712 seq_puts(seq, ",space_cache");
712 else 713 else
713 seq_puts(seq, ",no_space_cache"); 714 seq_puts(seq, ",nospace_cache");
714 if (btrfs_test_opt(root, CLEAR_CACHE)) 715 if (btrfs_test_opt(root, CLEAR_CACHE))
715 seq_puts(seq, ",clear_cache"); 716 seq_puts(seq, ",clear_cache");
716 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 717 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
@@ -824,13 +825,9 @@ static char *setup_root_args(char *args)
824static struct dentry *mount_subvol(const char *subvol_name, int flags, 825static struct dentry *mount_subvol(const char *subvol_name, int flags,
825 const char *device_name, char *data) 826 const char *device_name, char *data)
826{ 827{
827 struct super_block *s;
828 struct dentry *root; 828 struct dentry *root;
829 struct vfsmount *mnt; 829 struct vfsmount *mnt;
830 struct mnt_namespace *ns_private;
831 char *newargs; 830 char *newargs;
832 struct path path;
833 int error;
834 831
835 newargs = setup_root_args(data); 832 newargs = setup_root_args(data);
836 if (!newargs) 833 if (!newargs)
@@ -841,39 +838,17 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
841 if (IS_ERR(mnt)) 838 if (IS_ERR(mnt))
842 return ERR_CAST(mnt); 839 return ERR_CAST(mnt);
843 840
844 ns_private = create_mnt_ns(mnt); 841 root = mount_subtree(mnt, subvol_name);
845 if (IS_ERR(ns_private)) {
846 mntput(mnt);
847 return ERR_CAST(ns_private);
848 }
849
850 /*
851 * This will trigger the automount of the subvol so we can just
852 * drop the mnt we have here and return the dentry that we
853 * found.
854 */
855 error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name,
856 LOOKUP_FOLLOW, &path);
857 put_mnt_ns(ns_private);
858 if (error)
859 return ERR_PTR(error);
860 842
861 if (!is_subvolume_inode(path.dentry->d_inode)) { 843 if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
862 path_put(&path); 844 struct super_block *s = root->d_sb;
863 mntput(mnt); 845 dput(root);
864 error = -EINVAL; 846 root = ERR_PTR(-EINVAL);
847 deactivate_locked_super(s);
865 printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", 848 printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n",
866 subvol_name); 849 subvol_name);
867 return ERR_PTR(-EINVAL);
868 } 850 }
869 851
870 /* Get a ref to the sb and the dentry we found and return it */
871 s = path.mnt->mnt_sb;
872 atomic_inc(&s->s_active);
873 root = dget(path.dentry);
874 path_put(&path);
875 down_write(&s->s_umount);
876
877 return root; 852 return root;
878} 853}
879 854
@@ -890,7 +865,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
890 struct super_block *s; 865 struct super_block *s;
891 struct dentry *root; 866 struct dentry *root;
892 struct btrfs_fs_devices *fs_devices = NULL; 867 struct btrfs_fs_devices *fs_devices = NULL;
893 struct btrfs_root *tree_root = NULL;
894 struct btrfs_fs_info *fs_info = NULL; 868 struct btrfs_fs_info *fs_info = NULL;
895 fmode_t mode = FMODE_READ; 869 fmode_t mode = FMODE_READ;
896 char *subvol_name = NULL; 870 char *subvol_name = NULL;
@@ -904,8 +878,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
904 error = btrfs_parse_early_options(data, mode, fs_type, 878 error = btrfs_parse_early_options(data, mode, fs_type,
905 &subvol_name, &subvol_objectid, 879 &subvol_name, &subvol_objectid,
906 &subvol_rootid, &fs_devices); 880 &subvol_rootid, &fs_devices);
907 if (error) 881 if (error) {
882 kfree(subvol_name);
908 return ERR_PTR(error); 883 return ERR_PTR(error);
884 }
909 885
910 if (subvol_name) { 886 if (subvol_name) {
911 root = mount_subvol(subvol_name, flags, device_name, data); 887 root = mount_subvol(subvol_name, flags, device_name, data);
@@ -917,15 +893,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
917 if (error) 893 if (error)
918 return ERR_PTR(error); 894 return ERR_PTR(error);
919 895
920 error = btrfs_open_devices(fs_devices, mode, fs_type);
921 if (error)
922 return ERR_PTR(error);
923
924 if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
925 error = -EACCES;
926 goto error_close_devices;
927 }
928
929 /* 896 /*
930 * Setup a dummy root and fs_info for test/set super. This is because 897 * Setup a dummy root and fs_info for test/set super. This is because
931 * we don't actually fill this stuff out until open_ctree, but we need 898 * we don't actually fill this stuff out until open_ctree, but we need
@@ -933,24 +900,36 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
933 * then open_ctree will properly initialize everything later. 900 * then open_ctree will properly initialize everything later.
934 */ 901 */
935 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); 902 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
936 tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); 903 if (!fs_info)
937 if (!fs_info || !tree_root) { 904 return ERR_PTR(-ENOMEM);
905
906 fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
907 if (!fs_info->tree_root) {
938 error = -ENOMEM; 908 error = -ENOMEM;
939 goto error_close_devices; 909 goto error_fs_info;
940 } 910 }
941 fs_info->tree_root = tree_root; 911 fs_info->tree_root->fs_info = fs_info;
942 fs_info->fs_devices = fs_devices; 912 fs_info->fs_devices = fs_devices;
943 tree_root->fs_info = fs_info;
944 913
945 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); 914 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
946 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); 915 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
947 if (!fs_info->super_copy || !fs_info->super_for_commit) { 916 if (!fs_info->super_copy || !fs_info->super_for_commit) {
948 error = -ENOMEM; 917 error = -ENOMEM;
918 goto error_fs_info;
919 }
920
921 error = btrfs_open_devices(fs_devices, mode, fs_type);
922 if (error)
923 goto error_fs_info;
924
925 if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
926 error = -EACCES;
949 goto error_close_devices; 927 goto error_close_devices;
950 } 928 }
951 929
952 bdev = fs_devices->latest_bdev; 930 bdev = fs_devices->latest_bdev;
953 s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); 931 s = sget(fs_type, btrfs_test_super, btrfs_set_super,
932 fs_info->tree_root);
954 if (IS_ERR(s)) { 933 if (IS_ERR(s)) {
955 error = PTR_ERR(s); 934 error = PTR_ERR(s);
956 goto error_close_devices; 935 goto error_close_devices;
@@ -959,12 +938,12 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
959 if (s->s_root) { 938 if (s->s_root) {
960 if ((flags ^ s->s_flags) & MS_RDONLY) { 939 if ((flags ^ s->s_flags) & MS_RDONLY) {
961 deactivate_locked_super(s); 940 deactivate_locked_super(s);
962 return ERR_PTR(-EBUSY); 941 error = -EBUSY;
942 goto error_close_devices;
963 } 943 }
964 944
965 btrfs_close_devices(fs_devices); 945 btrfs_close_devices(fs_devices);
966 free_fs_info(fs_info); 946 free_fs_info(fs_info);
967 kfree(tree_root);
968 } else { 947 } else {
969 char b[BDEVNAME_SIZE]; 948 char b[BDEVNAME_SIZE];
970 949
@@ -991,8 +970,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
991 970
992error_close_devices: 971error_close_devices:
993 btrfs_close_devices(fs_devices); 972 btrfs_close_devices(fs_devices);
973error_fs_info:
994 free_fs_info(fs_info); 974 free_fs_info(fs_info);
995 kfree(tree_root);
996 return ERR_PTR(error); 975 return ERR_PTR(error);
997} 976}
998 977
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 960835eaf4da..81376d94cd3c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -785,6 +785,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
785 785
786 btrfs_save_ino_cache(root, trans); 786 btrfs_save_ino_cache(root, trans);
787 787
788 /* see comments in should_cow_block() */
789 root->force_cow = 0;
790 smp_wmb();
791
788 if (root->commit_root != root->node) { 792 if (root->commit_root != root->node) {
789 mutex_lock(&root->fs_commit_mutex); 793 mutex_lock(&root->fs_commit_mutex);
790 switch_commit_root(root); 794 switch_commit_root(root);
@@ -882,8 +886,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
882 btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); 886 btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
883 887
884 if (to_reserve > 0) { 888 if (to_reserve > 0) {
885 ret = btrfs_block_rsv_add(root, &pending->block_rsv, 889 ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv,
886 to_reserve); 890 to_reserve);
887 if (ret) { 891 if (ret) {
888 pending->error = ret; 892 pending->error = ret;
889 goto fail; 893 goto fail;
@@ -947,6 +951,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
947 btrfs_tree_unlock(old); 951 btrfs_tree_unlock(old);
948 free_extent_buffer(old); 952 free_extent_buffer(old);
949 953
954 /* see comments in should_cow_block() */
955 root->force_cow = 1;
956 smp_wmb();
957
950 btrfs_set_root_node(new_root_item, tmp); 958 btrfs_set_root_node(new_root_item, tmp);
951 /* record when the snapshot was created in key.offset */ 959 /* record when the snapshot was created in key.offset */
952 key.offset = trans->transid; 960 key.offset = trans->transid;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f8e2943101a1..c37433d3cd82 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -999,7 +999,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
999 key.objectid = device->devid; 999 key.objectid = device->devid;
1000 key.offset = start; 1000 key.offset = start;
1001 key.type = BTRFS_DEV_EXTENT_KEY; 1001 key.type = BTRFS_DEV_EXTENT_KEY;
1002 1002again:
1003 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1003 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1004 if (ret > 0) { 1004 if (ret > 0) {
1005 ret = btrfs_previous_item(root, path, key.objectid, 1005 ret = btrfs_previous_item(root, path, key.objectid,
@@ -1012,6 +1012,9 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1012 struct btrfs_dev_extent); 1012 struct btrfs_dev_extent);
1013 BUG_ON(found_key.offset > start || found_key.offset + 1013 BUG_ON(found_key.offset > start || found_key.offset +
1014 btrfs_dev_extent_length(leaf, extent) < start); 1014 btrfs_dev_extent_length(leaf, extent) < start);
1015 key = found_key;
1016 btrfs_release_path(path);
1017 goto again;
1015 } else if (ret == 0) { 1018 } else if (ret == 0) {
1016 leaf = path->nodes[0]; 1019 leaf = path->nodes[0];
1017 extent = btrfs_item_ptr(leaf, path->slots[0], 1020 extent = btrfs_item_ptr(leaf, path->slots[0],
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ab5b1c49f352..78f2d4d4f37f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -100,6 +100,12 @@ struct btrfs_device {
100 struct reada_zone *reada_curr_zone; 100 struct reada_zone *reada_curr_zone;
101 struct radix_tree_root reada_zones; 101 struct radix_tree_root reada_zones;
102 struct radix_tree_root reada_extents; 102 struct radix_tree_root reada_extents;
103
104 /* for sending down flush barriers */
105 struct bio *flush_bio;
106 struct completion flush_wait;
107 int nobarriers;
108
103}; 109};
104 110
105struct btrfs_fs_devices { 111struct btrfs_fs_devices {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2abd0dfad7f8..bca3948e9dbf 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1143,7 +1143,7 @@ static void ceph_d_prune(struct dentry *dentry)
1143{ 1143{
1144 struct ceph_dentry_info *di; 1144 struct ceph_dentry_info *di;
1145 1145
1146 dout("d_release %p\n", dentry); 1146 dout("ceph_d_prune %p\n", dentry);
1147 1147
1148 /* do we have a valid parent? */ 1148 /* do we have a valid parent? */
1149 if (!dentry->d_parent || IS_ROOT(dentry)) 1149 if (!dentry->d_parent || IS_ROOT(dentry))
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e392bfce84a3..116f36502f17 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1328,12 +1328,13 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1328 */ 1328 */
1329void ceph_queue_writeback(struct inode *inode) 1329void ceph_queue_writeback(struct inode *inode)
1330{ 1330{
1331 ihold(inode);
1331 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1332 if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1332 &ceph_inode(inode)->i_wb_work)) { 1333 &ceph_inode(inode)->i_wb_work)) {
1333 dout("ceph_queue_writeback %p\n", inode); 1334 dout("ceph_queue_writeback %p\n", inode);
1334 ihold(inode);
1335 } else { 1335 } else {
1336 dout("ceph_queue_writeback %p failed\n", inode); 1336 dout("ceph_queue_writeback %p failed\n", inode);
1337 iput(inode);
1337 } 1338 }
1338} 1339}
1339 1340
@@ -1353,12 +1354,13 @@ static void ceph_writeback_work(struct work_struct *work)
1353 */ 1354 */
1354void ceph_queue_invalidate(struct inode *inode) 1355void ceph_queue_invalidate(struct inode *inode)
1355{ 1356{
1357 ihold(inode);
1356 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1358 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1357 &ceph_inode(inode)->i_pg_inv_work)) { 1359 &ceph_inode(inode)->i_pg_inv_work)) {
1358 dout("ceph_queue_invalidate %p\n", inode); 1360 dout("ceph_queue_invalidate %p\n", inode);
1359 ihold(inode);
1360 } else { 1361 } else {
1361 dout("ceph_queue_invalidate %p failed\n", inode); 1362 dout("ceph_queue_invalidate %p failed\n", inode);
1363 iput(inode);
1362 } 1364 }
1363} 1365}
1364 1366
@@ -1434,13 +1436,14 @@ void ceph_queue_vmtruncate(struct inode *inode)
1434{ 1436{
1435 struct ceph_inode_info *ci = ceph_inode(inode); 1437 struct ceph_inode_info *ci = ceph_inode(inode);
1436 1438
1439 ihold(inode);
1437 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1440 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1438 &ci->i_vmtruncate_work)) { 1441 &ci->i_vmtruncate_work)) {
1439 dout("ceph_queue_vmtruncate %p\n", inode); 1442 dout("ceph_queue_vmtruncate %p\n", inode);
1440 ihold(inode);
1441 } else { 1443 } else {
1442 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1444 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1443 inode, ci->i_truncate_pending); 1445 inode, ci->i_truncate_pending);
1446 iput(inode);
1444 } 1447 }
1445} 1448}
1446 1449
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a90846fac759..8dc73a594a90 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -638,10 +638,12 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
638 if (err == 0) { 638 if (err == 0) {
639 dout("open_root_inode success\n"); 639 dout("open_root_inode success\n");
640 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && 640 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
641 fsc->sb->s_root == NULL) 641 fsc->sb->s_root == NULL) {
642 root = d_alloc_root(req->r_target_inode); 642 root = d_alloc_root(req->r_target_inode);
643 else 643 ceph_init_dentry(root);
644 } else {
644 root = d_obtain_alias(req->r_target_inode); 645 root = d_obtain_alias(req->r_target_inode);
646 }
645 req->r_target_inode = NULL; 647 req->r_target_inode = NULL;
646 dout("open_root_inode success, root dentry is %p\n", root); 648 dout("open_root_inode success, root dentry is %p\n", root);
647 } else { 649 } else {
diff --git a/fs/dcache.c b/fs/dcache.c
index a901c6901bce..10ba92def3f6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -36,6 +36,7 @@
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39#include <linux/ratelimit.h>
39#include "internal.h" 40#include "internal.h"
40 41
41/* 42/*
@@ -2383,8 +2384,16 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2383 actual = __d_unalias(inode, dentry, alias); 2384 actual = __d_unalias(inode, dentry, alias);
2384 } 2385 }
2385 write_sequnlock(&rename_lock); 2386 write_sequnlock(&rename_lock);
2386 if (IS_ERR(actual)) 2387 if (IS_ERR(actual)) {
2388 if (PTR_ERR(actual) == -ELOOP)
2389 pr_warn_ratelimited(
2390 "VFS: Lookup of '%s' in %s %s"
2391 " would have caused loop\n",
2392 dentry->d_name.name,
2393 inode->i_sb->s_type->name,
2394 inode->i_sb->s_id);
2387 dput(alias); 2395 dput(alias);
2396 }
2388 goto out_nolock; 2397 goto out_nolock;
2389 } 2398 }
2390 } 2399 }
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 58609bde3b9f..2a834255c75d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -967,7 +967,7 @@ static void ecryptfs_set_default_crypt_stat_vals(
967 967
968/** 968/**
969 * ecryptfs_new_file_context 969 * ecryptfs_new_file_context
970 * @ecryptfs_dentry: The eCryptfs dentry 970 * @ecryptfs_inode: The eCryptfs inode
971 * 971 *
972 * If the crypto context for the file has not yet been established, 972 * If the crypto context for the file has not yet been established,
973 * this is where we do that. Establishing a new crypto context 973 * this is where we do that. Establishing a new crypto context
@@ -984,13 +984,13 @@ static void ecryptfs_set_default_crypt_stat_vals(
984 * 984 *
985 * Returns zero on success; non-zero otherwise 985 * Returns zero on success; non-zero otherwise
986 */ 986 */
987int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) 987int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
988{ 988{
989 struct ecryptfs_crypt_stat *crypt_stat = 989 struct ecryptfs_crypt_stat *crypt_stat =
990 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 990 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
991 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 991 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
992 &ecryptfs_superblock_to_private( 992 &ecryptfs_superblock_to_private(
993 ecryptfs_dentry->d_sb)->mount_crypt_stat; 993 ecryptfs_inode->i_sb)->mount_crypt_stat;
994 int cipher_name_len; 994 int cipher_name_len;
995 int rc = 0; 995 int rc = 0;
996 996
@@ -1299,12 +1299,12 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
1299} 1299}
1300 1300
1301static int 1301static int
1302ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry, 1302ecryptfs_write_metadata_to_contents(struct inode *ecryptfs_inode,
1303 char *virt, size_t virt_len) 1303 char *virt, size_t virt_len)
1304{ 1304{
1305 int rc; 1305 int rc;
1306 1306
1307 rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, 1307 rc = ecryptfs_write_lower(ecryptfs_inode, virt,
1308 0, virt_len); 1308 0, virt_len);
1309 if (rc < 0) 1309 if (rc < 0)
1310 printk(KERN_ERR "%s: Error attempting to write header " 1310 printk(KERN_ERR "%s: Error attempting to write header "
@@ -1338,7 +1338,8 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
1338 1338
1339/** 1339/**
1340 * ecryptfs_write_metadata 1340 * ecryptfs_write_metadata
1341 * @ecryptfs_dentry: The eCryptfs dentry 1341 * @ecryptfs_dentry: The eCryptfs dentry, which should be negative
1342 * @ecryptfs_inode: The newly created eCryptfs inode
1342 * 1343 *
1343 * Write the file headers out. This will likely involve a userspace 1344 * Write the file headers out. This will likely involve a userspace
1344 * callout, in which the session key is encrypted with one or more 1345 * callout, in which the session key is encrypted with one or more
@@ -1348,10 +1349,11 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
1348 * 1349 *
1349 * Returns zero on success; non-zero on error 1350 * Returns zero on success; non-zero on error
1350 */ 1351 */
1351int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) 1352int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
1353 struct inode *ecryptfs_inode)
1352{ 1354{
1353 struct ecryptfs_crypt_stat *crypt_stat = 1355 struct ecryptfs_crypt_stat *crypt_stat =
1354 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 1356 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
1355 unsigned int order; 1357 unsigned int order;
1356 char *virt; 1358 char *virt;
1357 size_t virt_len; 1359 size_t virt_len;
@@ -1391,7 +1393,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1391 rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt, 1393 rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
1392 size); 1394 size);
1393 else 1395 else
1394 rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt, 1396 rc = ecryptfs_write_metadata_to_contents(ecryptfs_inode, virt,
1395 virt_len); 1397 virt_len);
1396 if (rc) { 1398 if (rc) {
1397 printk(KERN_ERR "%s: Error writing metadata out to lower file; " 1399 printk(KERN_ERR "%s: Error writing metadata out to lower file; "
@@ -1943,7 +1945,7 @@ static unsigned char *portable_filename_chars = ("-.0123456789ABCD"
1943 1945
1944/* We could either offset on every reverse map or just pad some 0x00's 1946/* We could either offset on every reverse map or just pad some 0x00's
1945 * at the front here */ 1947 * at the front here */
1946static const unsigned char filename_rev_map[] = { 1948static const unsigned char filename_rev_map[256] = {
1947 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */ 1949 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */
1948 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */ 1950 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */
1949 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */ 1951 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */
@@ -1959,7 +1961,7 @@ static const unsigned char filename_rev_map[] = {
1959 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */ 1961 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */
1960 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */ 1962 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */
1961 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */ 1963 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */
1962 0x3D, 0x3E, 0x3F 1964 0x3D, 0x3E, 0x3F /* 123 - 255 initialized to 0x00 */
1963}; 1965};
1964 1966
1965/** 1967/**
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 54481a3b2c79..a9f29b12fbf2 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -584,9 +584,10 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
584int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode); 584int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode);
585int ecryptfs_encrypt_page(struct page *page); 585int ecryptfs_encrypt_page(struct page *page);
586int ecryptfs_decrypt_page(struct page *page); 586int ecryptfs_decrypt_page(struct page *page);
587int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); 587int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
588 struct inode *ecryptfs_inode);
588int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); 589int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
589int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); 590int ecryptfs_new_file_context(struct inode *ecryptfs_inode);
590void ecryptfs_write_crypt_stat_flags(char *page_virt, 591void ecryptfs_write_crypt_stat_flags(char *page_virt,
591 struct ecryptfs_crypt_stat *crypt_stat, 592 struct ecryptfs_crypt_stat *crypt_stat,
592 size_t *written); 593 size_t *written);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c6ac98cf9baa..d3f95f941c47 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -139,6 +139,27 @@ out:
139 return rc; 139 return rc;
140} 140}
141 141
142static void ecryptfs_vma_close(struct vm_area_struct *vma)
143{
144 filemap_write_and_wait(vma->vm_file->f_mapping);
145}
146
147static const struct vm_operations_struct ecryptfs_file_vm_ops = {
148 .close = ecryptfs_vma_close,
149 .fault = filemap_fault,
150};
151
152static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
153{
154 int rc;
155
156 rc = generic_file_mmap(file, vma);
157 if (!rc)
158 vma->vm_ops = &ecryptfs_file_vm_ops;
159
160 return rc;
161}
162
142struct kmem_cache *ecryptfs_file_info_cache; 163struct kmem_cache *ecryptfs_file_info_cache;
143 164
144/** 165/**
@@ -349,7 +370,7 @@ const struct file_operations ecryptfs_main_fops = {
349#ifdef CONFIG_COMPAT 370#ifdef CONFIG_COMPAT
350 .compat_ioctl = ecryptfs_compat_ioctl, 371 .compat_ioctl = ecryptfs_compat_ioctl,
351#endif 372#endif
352 .mmap = generic_file_mmap, 373 .mmap = ecryptfs_file_mmap,
353 .open = ecryptfs_open, 374 .open = ecryptfs_open,
354 .flush = ecryptfs_flush, 375 .flush = ecryptfs_flush,
355 .release = ecryptfs_release, 376 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a36d327f1521..32f90a3ae63e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -172,22 +172,23 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
172 * it. It will also update the eCryptfs directory inode to mimic the 172 * it. It will also update the eCryptfs directory inode to mimic the
173 * stat of the lower directory inode. 173 * stat of the lower directory inode.
174 * 174 *
175 * Returns zero on success; non-zero on error condition 175 * Returns the new eCryptfs inode on success; an ERR_PTR on error condition
176 */ 176 */
177static int 177static struct inode *
178ecryptfs_do_create(struct inode *directory_inode, 178ecryptfs_do_create(struct inode *directory_inode,
179 struct dentry *ecryptfs_dentry, int mode) 179 struct dentry *ecryptfs_dentry, int mode)
180{ 180{
181 int rc; 181 int rc;
182 struct dentry *lower_dentry; 182 struct dentry *lower_dentry;
183 struct dentry *lower_dir_dentry; 183 struct dentry *lower_dir_dentry;
184 struct inode *inode;
184 185
185 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 186 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
186 lower_dir_dentry = lock_parent(lower_dentry); 187 lower_dir_dentry = lock_parent(lower_dentry);
187 if (IS_ERR(lower_dir_dentry)) { 188 if (IS_ERR(lower_dir_dentry)) {
188 ecryptfs_printk(KERN_ERR, "Error locking directory of " 189 ecryptfs_printk(KERN_ERR, "Error locking directory of "
189 "dentry\n"); 190 "dentry\n");
190 rc = PTR_ERR(lower_dir_dentry); 191 inode = ERR_CAST(lower_dir_dentry);
191 goto out; 192 goto out;
192 } 193 }
193 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, 194 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
@@ -195,20 +196,19 @@ ecryptfs_do_create(struct inode *directory_inode,
195 if (rc) { 196 if (rc) {
196 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 197 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
197 "rc = [%d]\n", __func__, rc); 198 "rc = [%d]\n", __func__, rc);
199 inode = ERR_PTR(rc);
198 goto out_lock; 200 goto out_lock;
199 } 201 }
200 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 202 inode = __ecryptfs_get_inode(lower_dentry->d_inode,
201 directory_inode->i_sb); 203 directory_inode->i_sb);
202 if (rc) { 204 if (IS_ERR(inode))
203 ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
204 goto out_lock; 205 goto out_lock;
205 }
206 fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); 206 fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
207 fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); 207 fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
208out_lock: 208out_lock:
209 unlock_dir(lower_dir_dentry); 209 unlock_dir(lower_dir_dentry);
210out: 210out:
211 return rc; 211 return inode;
212} 212}
213 213
214/** 214/**
@@ -219,26 +219,26 @@ out:
219 * 219 *
220 * Returns zero on success 220 * Returns zero on success
221 */ 221 */
222static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) 222static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
223 struct inode *ecryptfs_inode)
223{ 224{
224 struct ecryptfs_crypt_stat *crypt_stat = 225 struct ecryptfs_crypt_stat *crypt_stat =
225 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 226 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
226 int rc = 0; 227 int rc = 0;
227 228
228 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 229 if (S_ISDIR(ecryptfs_inode->i_mode)) {
229 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 230 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
230 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 231 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
231 goto out; 232 goto out;
232 } 233 }
233 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); 234 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
234 rc = ecryptfs_new_file_context(ecryptfs_dentry); 235 rc = ecryptfs_new_file_context(ecryptfs_inode);
235 if (rc) { 236 if (rc) {
236 ecryptfs_printk(KERN_ERR, "Error creating new file " 237 ecryptfs_printk(KERN_ERR, "Error creating new file "
237 "context; rc = [%d]\n", rc); 238 "context; rc = [%d]\n", rc);
238 goto out; 239 goto out;
239 } 240 }
240 rc = ecryptfs_get_lower_file(ecryptfs_dentry, 241 rc = ecryptfs_get_lower_file(ecryptfs_dentry, ecryptfs_inode);
241 ecryptfs_dentry->d_inode);
242 if (rc) { 242 if (rc) {
243 printk(KERN_ERR "%s: Error attempting to initialize " 243 printk(KERN_ERR "%s: Error attempting to initialize "
244 "the lower file for the dentry with name " 244 "the lower file for the dentry with name "
@@ -246,10 +246,10 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
246 ecryptfs_dentry->d_name.name, rc); 246 ecryptfs_dentry->d_name.name, rc);
247 goto out; 247 goto out;
248 } 248 }
249 rc = ecryptfs_write_metadata(ecryptfs_dentry); 249 rc = ecryptfs_write_metadata(ecryptfs_dentry, ecryptfs_inode);
250 if (rc) 250 if (rc)
251 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 251 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
252 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode); 252 ecryptfs_put_lower_file(ecryptfs_inode);
253out: 253out:
254 return rc; 254 return rc;
255} 255}
@@ -269,18 +269,28 @@ static int
269ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, 269ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
270 int mode, struct nameidata *nd) 270 int mode, struct nameidata *nd)
271{ 271{
272 struct inode *ecryptfs_inode;
272 int rc; 273 int rc;
273 274
274 /* ecryptfs_do_create() calls ecryptfs_interpose() */ 275 ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry,
275 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode); 276 mode);
276 if (unlikely(rc)) { 277 if (unlikely(IS_ERR(ecryptfs_inode))) {
277 ecryptfs_printk(KERN_WARNING, "Failed to create file in" 278 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
278 "lower filesystem\n"); 279 "lower filesystem\n");
280 rc = PTR_ERR(ecryptfs_inode);
279 goto out; 281 goto out;
280 } 282 }
281 /* At this point, a file exists on "disk"; we need to make sure 283 /* At this point, a file exists on "disk"; we need to make sure
282 * that this on disk file is prepared to be an ecryptfs file */ 284 * that this on disk file is prepared to be an ecryptfs file */
283 rc = ecryptfs_initialize_file(ecryptfs_dentry); 285 rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
286 if (rc) {
287 drop_nlink(ecryptfs_inode);
288 unlock_new_inode(ecryptfs_inode);
289 iput(ecryptfs_inode);
290 goto out;
291 }
292 d_instantiate(ecryptfs_dentry, ecryptfs_inode);
293 unlock_new_inode(ecryptfs_inode);
284out: 294out:
285 return rc; 295 return rc;
286} 296}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f6dba4505f1c..12ccacda44e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -565,7 +565,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
565 brelse(bitmap_bh); 565 brelse(bitmap_bh);
566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" 566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
567 ", computed = %llu, %llu\n", 567 ", computed = %llu, %llu\n",
568 EXT4_B2C(sbi, ext4_free_blocks_count(es)), 568 EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
569 desc_count, bitmap_count); 569 desc_count, bitmap_count);
570 return bitmap_count; 570 return bitmap_count;
571#else 571#else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 240f6e2dc7ee..fffec40d5996 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2270,6 +2270,7 @@ retry:
2270 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2270 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2271 "%ld pages, ino %lu; err %d", __func__, 2271 "%ld pages, ino %lu; err %d", __func__,
2272 wbc->nr_to_write, inode->i_ino, ret); 2272 wbc->nr_to_write, inode->i_ino, ret);
2273 blk_finish_plug(&plug);
2273 goto out_writepages; 2274 goto out_writepages;
2274 } 2275 }
2275 2276
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9953d80145ad..3858767ec672 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1683,7 +1683,9 @@ static int parse_options(char *options, struct super_block *sb,
1683 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1683 data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1684 datacheck: 1684 datacheck:
1685 if (is_remount) { 1685 if (is_remount) {
1686 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1686 if (!sbi->s_journal)
1687 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1688 else if (test_opt(sb, DATA_FLAGS) != data_opt) {
1687 ext4_msg(sb, KERN_ERR, 1689 ext4_msg(sb, KERN_ERR,
1688 "Cannot change data mode on remount"); 1690 "Cannot change data mode on remount");
1689 return 0; 1691 return 0;
@@ -3099,8 +3101,6 @@ static void ext4_destroy_lazyinit_thread(void)
3099} 3101}
3100 3102
3101static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3103static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3102 __releases(kernel_lock)
3103 __acquires(kernel_lock)
3104{ 3104{
3105 char *orig_data = kstrdup(data, GFP_KERNEL); 3105 char *orig_data = kstrdup(data, GFP_KERNEL);
3106 struct buffer_head *bh; 3106 struct buffer_head *bh;
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index e673a88b8ae7..b1ce4c7ad3fb 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -40,6 +40,8 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
40 40
41 src = in->name; 41 src = in->name;
42 srclen = in->len; 42 srclen = in->len;
43 if (srclen > HFS_NAMELEN)
44 srclen = HFS_NAMELEN;
43 dst = out; 45 dst = out;
44 dstlen = HFS_MAX_NAMELEN; 46 dstlen = HFS_MAX_NAMELEN;
45 if (nls_io) { 47 if (nls_io) {
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 3f32bcb0d9bd..ef175cb8cfd8 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -16,38 +16,26 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18 18
19static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 };
20
21static DEFINE_SPINLOCK(bitmap_lock); 19static DEFINE_SPINLOCK(bitmap_lock);
22 20
23static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits) 21/*
22 * bitmap consists of blocks filled with 16bit words
23 * bit set == busy, bit clear == free
24 * endianness is a mess, but for counting zero bits it really doesn't matter...
25 */
26static __u32 count_free(struct buffer_head *map[], unsigned blocksize, __u32 numbits)
24{ 27{
25 unsigned i, j, sum = 0; 28 __u32 sum = 0;
26 struct buffer_head *bh; 29 unsigned blocks = DIV_ROUND_UP(numbits, blocksize * 8);
27
28 for (i=0; i<numblocks-1; i++) {
29 if (!(bh=map[i]))
30 return(0);
31 for (j=0; j<bh->b_size; j++)
32 sum += nibblemap[bh->b_data[j] & 0xf]
33 + nibblemap[(bh->b_data[j]>>4) & 0xf];
34 }
35 30
36 if (numblocks==0 || !(bh=map[numblocks-1])) 31 while (blocks--) {
37 return(0); 32 unsigned words = blocksize / 2;
38 i = ((numbits - (numblocks-1) * bh->b_size * 8) / 16) * 2; 33 __u16 *p = (__u16 *)(*map++)->b_data;
39 for (j=0; j<i; j++) { 34 while (words--)
40 sum += nibblemap[bh->b_data[j] & 0xf] 35 sum += 16 - hweight16(*p++);
41 + nibblemap[(bh->b_data[j]>>4) & 0xf];
42 } 36 }
43 37
44 i = numbits%16; 38 return sum;
45 if (i!=0) {
46 i = *(__u16 *)(&bh->b_data[j]) | ~((1<<i) - 1);
47 sum += nibblemap[i & 0xf] + nibblemap[(i>>4) & 0xf];
48 sum += nibblemap[(i>>8) & 0xf] + nibblemap[(i>>12) & 0xf];
49 }
50 return(sum);
51} 39}
52 40
53void minix_free_block(struct inode *inode, unsigned long block) 41void minix_free_block(struct inode *inode, unsigned long block)
@@ -105,10 +93,12 @@ int minix_new_block(struct inode * inode)
105 return 0; 93 return 0;
106} 94}
107 95
108unsigned long minix_count_free_blocks(struct minix_sb_info *sbi) 96unsigned long minix_count_free_blocks(struct super_block *sb)
109{ 97{
110 return (count_free(sbi->s_zmap, sbi->s_zmap_blocks, 98 struct minix_sb_info *sbi = minix_sb(sb);
111 sbi->s_nzones - sbi->s_firstdatazone + 1) 99 u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1);
100
101 return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
112 << sbi->s_log_zone_size); 102 << sbi->s_log_zone_size);
113} 103}
114 104
@@ -273,7 +263,10 @@ struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
273 return inode; 263 return inode;
274} 264}
275 265
276unsigned long minix_count_free_inodes(struct minix_sb_info *sbi) 266unsigned long minix_count_free_inodes(struct super_block *sb)
277{ 267{
278 return count_free(sbi->s_imap, sbi->s_imap_blocks, sbi->s_ninodes + 1); 268 struct minix_sb_info *sbi = minix_sb(sb);
269 u32 bits = sbi->s_ninodes + 1;
270
271 return count_free(sbi->s_imap, sb->s_blocksize, bits);
279} 272}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 64cdcd662ffc..1d9e33966db0 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -279,6 +279,27 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
279 else if (sbi->s_mount_state & MINIX_ERROR_FS) 279 else if (sbi->s_mount_state & MINIX_ERROR_FS)
280 printk("MINIX-fs: mounting file system with errors, " 280 printk("MINIX-fs: mounting file system with errors, "
281 "running fsck is recommended\n"); 281 "running fsck is recommended\n");
282
283 /* Apparently minix can create filesystems that allocate more blocks for
284 * the bitmaps than needed. We simply ignore that, but verify it didn't
285 * create one with not enough blocks and bail out if so.
286 */
287 block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
288 if (sbi->s_imap_blocks < block) {
289 printk("MINIX-fs: file system does not have enough "
290 "imap blocks allocated. Refusing to mount\n");
291 goto out_iput;
292 }
293
294 block = minix_blocks_needed(
295 (sbi->s_nzones - (sbi->s_firstdatazone + 1)),
296 s->s_blocksize);
297 if (sbi->s_zmap_blocks < block) {
298 printk("MINIX-fs: file system does not have enough "
299 "zmap blocks allocated. Refusing to mount.\n");
300 goto out_iput;
301 }
302
282 return 0; 303 return 0;
283 304
284out_iput: 305out_iput:
@@ -339,10 +360,10 @@ static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
339 buf->f_type = sb->s_magic; 360 buf->f_type = sb->s_magic;
340 buf->f_bsize = sb->s_blocksize; 361 buf->f_bsize = sb->s_blocksize;
341 buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size; 362 buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
342 buf->f_bfree = minix_count_free_blocks(sbi); 363 buf->f_bfree = minix_count_free_blocks(sb);
343 buf->f_bavail = buf->f_bfree; 364 buf->f_bavail = buf->f_bfree;
344 buf->f_files = sbi->s_ninodes; 365 buf->f_files = sbi->s_ninodes;
345 buf->f_ffree = minix_count_free_inodes(sbi); 366 buf->f_ffree = minix_count_free_inodes(sb);
346 buf->f_namelen = sbi->s_namelen; 367 buf->f_namelen = sbi->s_namelen;
347 buf->f_fsid.val[0] = (u32)id; 368 buf->f_fsid.val[0] = (u32)id;
348 buf->f_fsid.val[1] = (u32)(id >> 32); 369 buf->f_fsid.val[1] = (u32)(id >> 32);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 341e2122879a..26bbd55e82ea 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -48,10 +48,10 @@ extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, stru
48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); 48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
49extern struct inode * minix_new_inode(const struct inode *, int, int *); 49extern struct inode * minix_new_inode(const struct inode *, int, int *);
50extern void minix_free_inode(struct inode * inode); 50extern void minix_free_inode(struct inode * inode);
51extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); 51extern unsigned long minix_count_free_inodes(struct super_block *sb);
52extern int minix_new_block(struct inode * inode); 52extern int minix_new_block(struct inode * inode);
53extern void minix_free_block(struct inode *inode, unsigned long block); 53extern void minix_free_block(struct inode *inode, unsigned long block);
54extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); 54extern unsigned long minix_count_free_blocks(struct super_block *sb);
55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); 55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
56extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len); 56extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
57 57
@@ -88,6 +88,11 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
88 return list_entry(inode, struct minix_inode_info, vfs_inode); 88 return list_entry(inode, struct minix_inode_info, vfs_inode);
89} 89}
90 90
91static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize)
92{
93 return DIV_ROUND_UP(bits, blocksize * 8);
94}
95
91#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \ 96#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
92 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED) 97 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
93 98
@@ -125,7 +130,7 @@ static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
125 if (!size) 130 if (!size)
126 return 0; 131 return 0;
127 132
128 size = (size >> 4) + ((size & 15) > 0); 133 size >>= 4;
129 while (*p++ == 0xffff) { 134 while (*p++ == 0xffff) {
130 if (--size == 0) 135 if (--size == 0)
131 return (p - addr) << 4; 136 return (p - addr) << 4;
diff --git a/fs/namespace.c b/fs/namespace.c
index e5e1c7d1839b..6d3a1963879b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2483,11 +2483,43 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
2483 __mnt_make_longterm(mnt); 2483 __mnt_make_longterm(mnt);
2484 new_ns->root = mnt; 2484 new_ns->root = mnt;
2485 list_add(&new_ns->list, &new_ns->root->mnt_list); 2485 list_add(&new_ns->list, &new_ns->root->mnt_list);
2486 } else {
2487 mntput(mnt);
2486 } 2488 }
2487 return new_ns; 2489 return new_ns;
2488} 2490}
2489EXPORT_SYMBOL(create_mnt_ns); 2491EXPORT_SYMBOL(create_mnt_ns);
2490 2492
2493struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2494{
2495 struct mnt_namespace *ns;
2496 struct super_block *s;
2497 struct path path;
2498 int err;
2499
2500 ns = create_mnt_ns(mnt);
2501 if (IS_ERR(ns))
2502 return ERR_CAST(ns);
2503
2504 err = vfs_path_lookup(mnt->mnt_root, mnt,
2505 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2506
2507 put_mnt_ns(ns);
2508
2509 if (err)
2510 return ERR_PTR(err);
2511
2512 /* trade a vfsmount reference for active sb one */
2513 s = path.mnt->mnt_sb;
2514 atomic_inc(&s->s_active);
2515 mntput(path.mnt);
2516 /* lock the sucker */
2517 down_write(&s->s_umount);
2518 /* ... and return the root of (sub)tree on it */
2519 return path.dentry;
2520}
2521EXPORT_SYMBOL(mount_subtree);
2522
2491SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, 2523SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2492 char __user *, type, unsigned long, flags, void __user *, data) 2524 char __user *, type, unsigned long, flags, void __user *, data)
2493{ 2525{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b238d95ac48c..ac2899098147 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1468,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1468 res = NULL; 1468 res = NULL;
1469 goto out; 1469 goto out;
1470 /* This turned out not to be a regular file */ 1470 /* This turned out not to be a regular file */
1471 case -EISDIR:
1471 case -ENOTDIR: 1472 case -ENOTDIR:
1472 goto no_open; 1473 goto no_open;
1473 case -ELOOP: 1474 case -ELOOP:
1474 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1475 if (!(nd->intent.open.flags & O_NOFOLLOW))
1475 goto no_open; 1476 goto no_open;
1476 /* case -EISDIR: */
1477 /* case -EINVAL: */ 1477 /* case -EINVAL: */
1478 default: 1478 default:
1479 res = ERR_CAST(inode); 1479 res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0a1f8312b4dc..eca56d4b39c0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -40,48 +40,8 @@
40 40
41#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
42 42
43static int nfs_file_open(struct inode *, struct file *);
44static int nfs_file_release(struct inode *, struct file *);
45static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
46static int nfs_file_mmap(struct file *, struct vm_area_struct *);
47static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
48 struct pipe_inode_info *pipe,
49 size_t count, unsigned int flags);
50static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
51 unsigned long nr_segs, loff_t pos);
52static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
53 struct file *filp, loff_t *ppos,
54 size_t count, unsigned int flags);
55static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos);
57static int nfs_file_flush(struct file *, fl_owner_t id);
58static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync);
59static int nfs_check_flags(int flags);
60static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
61static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
62static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
63
64static const struct vm_operations_struct nfs_file_vm_ops; 43static const struct vm_operations_struct nfs_file_vm_ops;
65 44
66const struct file_operations nfs_file_operations = {
67 .llseek = nfs_file_llseek,
68 .read = do_sync_read,
69 .write = do_sync_write,
70 .aio_read = nfs_file_read,
71 .aio_write = nfs_file_write,
72 .mmap = nfs_file_mmap,
73 .open = nfs_file_open,
74 .flush = nfs_file_flush,
75 .release = nfs_file_release,
76 .fsync = nfs_file_fsync,
77 .lock = nfs_lock,
78 .flock = nfs_flock,
79 .splice_read = nfs_file_splice_read,
80 .splice_write = nfs_file_splice_write,
81 .check_flags = nfs_check_flags,
82 .setlease = nfs_setlease,
83};
84
85const struct inode_operations nfs_file_inode_operations = { 45const struct inode_operations nfs_file_inode_operations = {
86 .permission = nfs_permission, 46 .permission = nfs_permission,
87 .getattr = nfs_getattr, 47 .getattr = nfs_getattr,
@@ -886,3 +846,54 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
886 file->f_path.dentry->d_name.name, arg); 846 file->f_path.dentry->d_name.name, arg);
887 return -EINVAL; 847 return -EINVAL;
888} 848}
849
850const struct file_operations nfs_file_operations = {
851 .llseek = nfs_file_llseek,
852 .read = do_sync_read,
853 .write = do_sync_write,
854 .aio_read = nfs_file_read,
855 .aio_write = nfs_file_write,
856 .mmap = nfs_file_mmap,
857 .open = nfs_file_open,
858 .flush = nfs_file_flush,
859 .release = nfs_file_release,
860 .fsync = nfs_file_fsync,
861 .lock = nfs_lock,
862 .flock = nfs_flock,
863 .splice_read = nfs_file_splice_read,
864 .splice_write = nfs_file_splice_write,
865 .check_flags = nfs_check_flags,
866 .setlease = nfs_setlease,
867};
868
869#ifdef CONFIG_NFS_V4
870static int
871nfs4_file_open(struct inode *inode, struct file *filp)
872{
873 /*
874 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
875 * this point, then something is very wrong
876 */
877 dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
878 return -ENOTDIR;
879}
880
881const struct file_operations nfs4_file_operations = {
882 .llseek = nfs_file_llseek,
883 .read = do_sync_read,
884 .write = do_sync_write,
885 .aio_read = nfs_file_read,
886 .aio_write = nfs_file_write,
887 .mmap = nfs_file_mmap,
888 .open = nfs4_file_open,
889 .flush = nfs_file_flush,
890 .release = nfs_file_release,
891 .fsync = nfs_file_fsync,
892 .lock = nfs_lock,
893 .flock = nfs_flock,
894 .splice_read = nfs_file_splice_read,
895 .splice_write = nfs_file_splice_write,
896 .check_flags = nfs_check_flags,
897 .setlease = nfs_setlease,
898};
899#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c07a55aec838..50a15fa8cf98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 */ 291 */
292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; 292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
293 if (S_ISREG(inode->i_mode)) { 293 if (S_ISREG(inode->i_mode)) {
294 inode->i_fop = &nfs_file_operations; 294 inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
295 inode->i_data.a_ops = &nfs_file_aops; 295 inode->i_data.a_ops = &nfs_file_aops;
296 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; 296 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
297 } else if (S_ISDIR(inode->i_mode)) { 297 } else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c1a1bd8ddf1c..3f4d95751d52 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -299,6 +299,8 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
299extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 299extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
300 struct list_head *head); 300 struct list_head *head);
301 301
302extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
303 struct inode *inode);
302extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 304extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
303extern void nfs_readdata_release(struct nfs_read_data *rdata); 305extern void nfs_readdata_release(struct nfs_read_data *rdata);
304 306
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 85f1690ca08c..d4bc9ed91748 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
853 .dentry_ops = &nfs_dentry_operations, 853 .dentry_ops = &nfs_dentry_operations,
854 .dir_inode_ops = &nfs3_dir_inode_operations, 854 .dir_inode_ops = &nfs3_dir_inode_operations,
855 .file_inode_ops = &nfs3_file_inode_operations, 855 .file_inode_ops = &nfs3_file_inode_operations,
856 .file_ops = &nfs_file_operations,
856 .getroot = nfs3_proc_get_root, 857 .getroot = nfs3_proc_get_root,
857 .getattr = nfs3_proc_getattr, 858 .getattr = nfs3_proc_getattr,
858 .setattr = nfs3_proc_setattr, 859 .setattr = nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b60fddf606f7..be2bbac13817 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2464,8 +2464,7 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst
2464 case -NFS4ERR_BADNAME: 2464 case -NFS4ERR_BADNAME:
2465 return -ENOENT; 2465 return -ENOENT;
2466 case -NFS4ERR_MOVED: 2466 case -NFS4ERR_MOVED:
2467 err = nfs4_get_referral(dir, name, fattr, fhandle); 2467 return nfs4_get_referral(dir, name, fattr, fhandle);
2468 break;
2469 case -NFS4ERR_WRONGSEC: 2468 case -NFS4ERR_WRONGSEC:
2470 nfs_fixup_secinfo_attributes(fattr, fhandle); 2469 nfs_fixup_secinfo_attributes(fattr, fhandle);
2471 } 2470 }
@@ -6253,6 +6252,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6253 .dentry_ops = &nfs4_dentry_operations, 6252 .dentry_ops = &nfs4_dentry_operations,
6254 .dir_inode_ops = &nfs4_dir_inode_operations, 6253 .dir_inode_ops = &nfs4_dir_inode_operations,
6255 .file_inode_ops = &nfs4_file_inode_operations, 6254 .file_inode_ops = &nfs4_file_inode_operations,
6255 .file_ops = &nfs4_file_operations,
6256 .getroot = nfs4_proc_get_root, 6256 .getroot = nfs4_proc_get_root,
6257 .getattr = nfs4_proc_getattr, 6257 .getattr = nfs4_proc_getattr,
6258 .setattr = nfs4_proc_setattr, 6258 .setattr = nfs4_proc_setattr,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index baf73536bc04..8e672a2b2d69 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1260,6 +1260,25 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1260} 1260}
1261EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1261EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1262 1262
1263static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1264{
1265 struct nfs_pageio_descriptor pgio;
1266
1267 put_lseg(data->lseg);
1268 data->lseg = NULL;
1269 dprintk("pnfs write error = %d\n", data->pnfs_error);
1270
1271 nfs_pageio_init_read_mds(&pgio, data->inode);
1272
1273 while (!list_empty(&data->pages)) {
1274 struct nfs_page *req = nfs_list_entry(data->pages.next);
1275
1276 nfs_list_remove_request(req);
1277 nfs_pageio_add_request(&pgio, req);
1278 }
1279 nfs_pageio_complete(&pgio);
1280}
1281
1263/* 1282/*
1264 * Called by non rpc-based layout drivers 1283 * Called by non rpc-based layout drivers
1265 */ 1284 */
@@ -1268,11 +1287,8 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
1268 if (likely(!data->pnfs_error)) { 1287 if (likely(!data->pnfs_error)) {
1269 __nfs4_read_done_cb(data); 1288 __nfs4_read_done_cb(data);
1270 data->mds_ops->rpc_call_done(&data->task, data); 1289 data->mds_ops->rpc_call_done(&data->task, data);
1271 } else { 1290 } else
1272 put_lseg(data->lseg); 1291 pnfs_ld_handle_read_error(data);
1273 data->lseg = NULL;
1274 dprintk("pnfs write error = %d\n", data->pnfs_error);
1275 }
1276 data->mds_ops->rpc_release(data); 1292 data->mds_ops->rpc_release(data);
1277} 1293}
1278EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1294EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b8535d7e..f48125da198a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
710 .dentry_ops = &nfs_dentry_operations, 710 .dentry_ops = &nfs_dentry_operations,
711 .dir_inode_ops = &nfs_dir_inode_operations, 711 .dir_inode_ops = &nfs_dir_inode_operations,
712 .file_inode_ops = &nfs_file_inode_operations, 712 .file_inode_ops = &nfs_file_inode_operations,
713 .file_ops = &nfs_file_operations,
713 .getroot = nfs_proc_get_root, 714 .getroot = nfs_proc_get_root,
714 .getattr = nfs_proc_getattr, 715 .getattr = nfs_proc_getattr,
715 .setattr = nfs_proc_setattr, 716 .setattr = nfs_proc_setattr,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8b48ec63f722..cfa175c223dc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -109,7 +109,7 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
109 } 109 }
110} 110}
111 111
112static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 112void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
113 struct inode *inode) 113 struct inode *inode)
114{ 114{
115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
@@ -534,23 +534,13 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
534static void nfs_readpage_release_full(void *calldata) 534static void nfs_readpage_release_full(void *calldata)
535{ 535{
536 struct nfs_read_data *data = calldata; 536 struct nfs_read_data *data = calldata;
537 struct nfs_pageio_descriptor pgio;
538 537
539 if (data->pnfs_error) {
540 nfs_pageio_init_read_mds(&pgio, data->inode);
541 pgio.pg_recoalesce = 1;
542 }
543 while (!list_empty(&data->pages)) { 538 while (!list_empty(&data->pages)) {
544 struct nfs_page *req = nfs_list_entry(data->pages.next); 539 struct nfs_page *req = nfs_list_entry(data->pages.next);
545 540
546 nfs_list_remove_request(req); 541 nfs_list_remove_request(req);
547 if (!data->pnfs_error) 542 nfs_readpage_release(req);
548 nfs_readpage_release(req);
549 else
550 nfs_pageio_add_request(&pgio, req);
551 } 543 }
552 if (data->pnfs_error)
553 nfs_pageio_complete(&pgio);
554 nfs_readdata_release(calldata); 544 nfs_readdata_release(calldata);
555} 545}
556 546
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 480b3b6bf71e..134777406ee3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2787,43 +2787,18 @@ static void nfs_referral_loop_unprotect(void)
2787static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, 2787static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2788 const char *export_path) 2788 const char *export_path)
2789{ 2789{
2790 struct mnt_namespace *ns_private;
2791 struct super_block *s;
2792 struct dentry *dentry; 2790 struct dentry *dentry;
2793 struct path path; 2791 int ret = nfs_referral_loop_protect();
2794 int ret;
2795
2796 ns_private = create_mnt_ns(root_mnt);
2797 ret = PTR_ERR(ns_private);
2798 if (IS_ERR(ns_private))
2799 goto out_mntput;
2800
2801 ret = nfs_referral_loop_protect();
2802 if (ret != 0)
2803 goto out_put_mnt_ns;
2804 2792
2805 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2793 if (ret) {
2806 export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); 2794 mntput(root_mnt);
2795 return ERR_PTR(ret);
2796 }
2807 2797
2798 dentry = mount_subtree(root_mnt, export_path);
2808 nfs_referral_loop_unprotect(); 2799 nfs_referral_loop_unprotect();
2809 put_mnt_ns(ns_private);
2810
2811 if (ret != 0)
2812 goto out_err;
2813
2814 s = path.mnt->mnt_sb;
2815 atomic_inc(&s->s_active);
2816 dentry = dget(path.dentry);
2817 2800
2818 path_put(&path);
2819 down_write(&s->s_umount);
2820 return dentry; 2801 return dentry;
2821out_put_mnt_ns:
2822 put_mnt_ns(ns_private);
2823out_mntput:
2824 mntput(root_mnt);
2825out_err:
2826 return ERR_PTR(ret);
2827} 2802}
2828 2803
2829static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 2804static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2db1bd3173b2..851ba3dcdc29 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1652,46 +1652,12 @@ out:
1652 return error; 1652 return error;
1653} 1653}
1654 1654
1655static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
1656 struct kstat *stat)
1657{
1658 struct inode *inode = dentry->d_inode;
1659 struct task_struct *task = get_proc_task(inode);
1660 int rc;
1661
1662 if (task == NULL)
1663 return -ESRCH;
1664
1665 rc = -EACCES;
1666 if (lock_trace(task))
1667 goto out_task;
1668
1669 generic_fillattr(inode, stat);
1670 unlock_trace(task);
1671 rc = 0;
1672out_task:
1673 put_task_struct(task);
1674 return rc;
1675}
1676
1677static const struct inode_operations proc_pid_link_inode_operations = { 1655static const struct inode_operations proc_pid_link_inode_operations = {
1678 .readlink = proc_pid_readlink, 1656 .readlink = proc_pid_readlink,
1679 .follow_link = proc_pid_follow_link, 1657 .follow_link = proc_pid_follow_link,
1680 .setattr = proc_setattr, 1658 .setattr = proc_setattr,
1681}; 1659};
1682 1660
1683static const struct inode_operations proc_fdinfo_link_inode_operations = {
1684 .setattr = proc_setattr,
1685 .getattr = proc_pid_fd_link_getattr,
1686};
1687
1688static const struct inode_operations proc_fd_link_inode_operations = {
1689 .readlink = proc_pid_readlink,
1690 .follow_link = proc_pid_follow_link,
1691 .setattr = proc_setattr,
1692 .getattr = proc_pid_fd_link_getattr,
1693};
1694
1695 1661
1696/* building an inode */ 1662/* building an inode */
1697 1663
@@ -1923,61 +1889,49 @@ out:
1923 1889
1924static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1890static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1925{ 1891{
1926 struct task_struct *task; 1892 struct task_struct *task = get_proc_task(inode);
1927 struct files_struct *files; 1893 struct files_struct *files = NULL;
1928 struct file *file; 1894 struct file *file;
1929 int fd = proc_fd(inode); 1895 int fd = proc_fd(inode);
1930 int rc;
1931
1932 task = get_proc_task(inode);
1933 if (!task)
1934 return -ENOENT;
1935
1936 rc = -EACCES;
1937 if (lock_trace(task))
1938 goto out_task;
1939
1940 rc = -ENOENT;
1941 files = get_files_struct(task);
1942 if (files == NULL)
1943 goto out_unlock;
1944 1896
1945 /* 1897 if (task) {
1946 * We are not taking a ref to the file structure, so we must 1898 files = get_files_struct(task);
1947 * hold ->file_lock. 1899 put_task_struct(task);
1948 */ 1900 }
1949 spin_lock(&files->file_lock); 1901 if (files) {
1950 file = fcheck_files(files, fd); 1902 /*
1951 if (file) { 1903 * We are not taking a ref to the file structure, so we must
1952 unsigned int f_flags; 1904 * hold ->file_lock.
1953 struct fdtable *fdt; 1905 */
1954 1906 spin_lock(&files->file_lock);
1955 fdt = files_fdtable(files); 1907 file = fcheck_files(files, fd);
1956 f_flags = file->f_flags & ~O_CLOEXEC; 1908 if (file) {
1957 if (FD_ISSET(fd, fdt->close_on_exec)) 1909 unsigned int f_flags;
1958 f_flags |= O_CLOEXEC; 1910 struct fdtable *fdt;
1959 1911
1960 if (path) { 1912 fdt = files_fdtable(files);
1961 *path = file->f_path; 1913 f_flags = file->f_flags & ~O_CLOEXEC;
1962 path_get(&file->f_path); 1914 if (FD_ISSET(fd, fdt->close_on_exec))
1915 f_flags |= O_CLOEXEC;
1916
1917 if (path) {
1918 *path = file->f_path;
1919 path_get(&file->f_path);
1920 }
1921 if (info)
1922 snprintf(info, PROC_FDINFO_MAX,
1923 "pos:\t%lli\n"
1924 "flags:\t0%o\n",
1925 (long long) file->f_pos,
1926 f_flags);
1927 spin_unlock(&files->file_lock);
1928 put_files_struct(files);
1929 return 0;
1963 } 1930 }
1964 if (info) 1931 spin_unlock(&files->file_lock);
1965 snprintf(info, PROC_FDINFO_MAX, 1932 put_files_struct(files);
1966 "pos:\t%lli\n" 1933 }
1967 "flags:\t0%o\n", 1934 return -ENOENT;
1968 (long long) file->f_pos,
1969 f_flags);
1970 rc = 0;
1971 } else
1972 rc = -ENOENT;
1973 spin_unlock(&files->file_lock);
1974 put_files_struct(files);
1975
1976out_unlock:
1977 unlock_trace(task);
1978out_task:
1979 put_task_struct(task);
1980 return rc;
1981} 1935}
1982 1936
1983static int proc_fd_link(struct inode *inode, struct path *path) 1937static int proc_fd_link(struct inode *inode, struct path *path)
@@ -2072,7 +2026,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
2072 spin_unlock(&files->file_lock); 2026 spin_unlock(&files->file_lock);
2073 put_files_struct(files); 2027 put_files_struct(files);
2074 2028
2075 inode->i_op = &proc_fd_link_inode_operations; 2029 inode->i_op = &proc_pid_link_inode_operations;
2076 inode->i_size = 64; 2030 inode->i_size = 64;
2077 ei->op.proc_get_link = proc_fd_link; 2031 ei->op.proc_get_link = proc_fd_link;
2078 d_set_d_op(dentry, &tid_fd_dentry_operations); 2032 d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2104,12 +2058,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
2104 if (fd == ~0U) 2058 if (fd == ~0U)
2105 goto out; 2059 goto out;
2106 2060
2107 result = ERR_PTR(-EACCES);
2108 if (lock_trace(task))
2109 goto out;
2110
2111 result = instantiate(dir, dentry, task, &fd); 2061 result = instantiate(dir, dentry, task, &fd);
2112 unlock_trace(task);
2113out: 2062out:
2114 put_task_struct(task); 2063 put_task_struct(task);
2115out_no_task: 2064out_no_task:
@@ -2129,28 +2078,23 @@ static int proc_readfd_common(struct file * filp, void * dirent,
2129 retval = -ENOENT; 2078 retval = -ENOENT;
2130 if (!p) 2079 if (!p)
2131 goto out_no_task; 2080 goto out_no_task;
2132
2133 retval = -EACCES;
2134 if (lock_trace(p))
2135 goto out;
2136
2137 retval = 0; 2081 retval = 0;
2138 2082
2139 fd = filp->f_pos; 2083 fd = filp->f_pos;
2140 switch (fd) { 2084 switch (fd) {
2141 case 0: 2085 case 0:
2142 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 2086 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
2143 goto out_unlock; 2087 goto out;
2144 filp->f_pos++; 2088 filp->f_pos++;
2145 case 1: 2089 case 1:
2146 ino = parent_ino(dentry); 2090 ino = parent_ino(dentry);
2147 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 2091 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2148 goto out_unlock; 2092 goto out;
2149 filp->f_pos++; 2093 filp->f_pos++;
2150 default: 2094 default:
2151 files = get_files_struct(p); 2095 files = get_files_struct(p);
2152 if (!files) 2096 if (!files)
2153 goto out_unlock; 2097 goto out;
2154 rcu_read_lock(); 2098 rcu_read_lock();
2155 for (fd = filp->f_pos-2; 2099 for (fd = filp->f_pos-2;
2156 fd < files_fdtable(files)->max_fds; 2100 fd < files_fdtable(files)->max_fds;
@@ -2174,9 +2118,6 @@ static int proc_readfd_common(struct file * filp, void * dirent,
2174 rcu_read_unlock(); 2118 rcu_read_unlock();
2175 put_files_struct(files); 2119 put_files_struct(files);
2176 } 2120 }
2177
2178out_unlock:
2179 unlock_trace(p);
2180out: 2121out:
2181 put_task_struct(p); 2122 put_task_struct(p);
2182out_no_task: 2123out_no_task:
@@ -2254,7 +2195,6 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2254 ei->fd = fd; 2195 ei->fd = fd;
2255 inode->i_mode = S_IFREG | S_IRUSR; 2196 inode->i_mode = S_IFREG | S_IRUSR;
2256 inode->i_fop = &proc_fdinfo_file_operations; 2197 inode->i_fop = &proc_fdinfo_file_operations;
2257 inode->i_op = &proc_fdinfo_link_inode_operations;
2258 d_set_d_op(dentry, &tid_fd_dentry_operations); 2198 d_set_d_op(dentry, &tid_fd_dentry_operations);
2259 d_add(dentry, inode); 2199 d_add(dentry, inode);
2260 /* Close the race of the process dying before we return the dentry */ 2200 /* Close the race of the process dying before we return the dentry */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 33b13310ee0c..574d4ee9b625 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -189,7 +189,7 @@ xfs_end_io(
189 int error = 0; 189 int error = 0;
190 190
191 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 191 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
192 error = -EIO; 192 ioend->io_error = -EIO;
193 goto done; 193 goto done;
194 } 194 }
195 if (ioend->io_error) 195 if (ioend->io_error)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1a3513881bce..eac97ef81e2a 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -656,7 +656,7 @@ xfs_buf_item_committing(
656/* 656/*
657 * This is the ops vector shared by all buf log items. 657 * This is the ops vector shared by all buf log items.
658 */ 658 */
659static struct xfs_item_ops xfs_buf_item_ops = { 659static const struct xfs_item_ops xfs_buf_item_ops = {
660 .iop_size = xfs_buf_item_size, 660 .iop_size = xfs_buf_item_size,
661 .iop_format = xfs_buf_item_format, 661 .iop_format = xfs_buf_item_format,
662 .iop_pin = xfs_buf_item_pin, 662 .iop_pin = xfs_buf_item_pin,
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index bb3f71d236d2..0dee0b71029d 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -295,7 +295,7 @@ xfs_qm_dquot_logitem_committing(
295/* 295/*
296 * This is the ops vector for dquots 296 * This is the ops vector for dquots
297 */ 297 */
298static struct xfs_item_ops xfs_dquot_item_ops = { 298static const struct xfs_item_ops xfs_dquot_item_ops = {
299 .iop_size = xfs_qm_dquot_logitem_size, 299 .iop_size = xfs_qm_dquot_logitem_size,
300 .iop_format = xfs_qm_dquot_logitem_format, 300 .iop_format = xfs_qm_dquot_logitem_format,
301 .iop_pin = xfs_qm_dquot_logitem_pin, 301 .iop_pin = xfs_qm_dquot_logitem_pin,
@@ -483,7 +483,7 @@ xfs_qm_qoff_logitem_committing(
483{ 483{
484} 484}
485 485
486static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 486static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
487 .iop_size = xfs_qm_qoff_logitem_size, 487 .iop_size = xfs_qm_qoff_logitem_size,
488 .iop_format = xfs_qm_qoff_logitem_format, 488 .iop_format = xfs_qm_qoff_logitem_format,
489 .iop_pin = xfs_qm_qoff_logitem_pin, 489 .iop_pin = xfs_qm_qoff_logitem_pin,
@@ -498,7 +498,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
498/* 498/*
499 * This is the ops vector shared by all quotaoff-start log items. 499 * This is the ops vector shared by all quotaoff-start log items.
500 */ 500 */
501static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 501static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
502 .iop_size = xfs_qm_qoff_logitem_size, 502 .iop_size = xfs_qm_qoff_logitem_size,
503 .iop_format = xfs_qm_qoff_logitem_format, 503 .iop_format = xfs_qm_qoff_logitem_format,
504 .iop_pin = xfs_qm_qoff_logitem_pin, 504 .iop_pin = xfs_qm_qoff_logitem_pin,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d22e62623437..35c2aff38b20 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -217,7 +217,7 @@ xfs_efi_item_committing(
217/* 217/*
218 * This is the ops vector shared by all efi log items. 218 * This is the ops vector shared by all efi log items.
219 */ 219 */
220static struct xfs_item_ops xfs_efi_item_ops = { 220static const struct xfs_item_ops xfs_efi_item_ops = {
221 .iop_size = xfs_efi_item_size, 221 .iop_size = xfs_efi_item_size,
222 .iop_format = xfs_efi_item_format, 222 .iop_format = xfs_efi_item_format,
223 .iop_pin = xfs_efi_item_pin, 223 .iop_pin = xfs_efi_item_pin,
@@ -477,7 +477,7 @@ xfs_efd_item_committing(
477/* 477/*
478 * This is the ops vector shared by all efd log items. 478 * This is the ops vector shared by all efd log items.
479 */ 479 */
480static struct xfs_item_ops xfs_efd_item_ops = { 480static const struct xfs_item_ops xfs_efd_item_ops = {
481 .iop_size = xfs_efd_item_size, 481 .iop_size = xfs_efd_item_size,
482 .iop_format = xfs_efd_item_format, 482 .iop_format = xfs_efd_item_format,
483 .iop_pin = xfs_efd_item_pin, 483 .iop_pin = xfs_efd_item_pin,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b7cf21ba240f..abaafdbb3e65 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -795,7 +795,7 @@ xfs_inode_item_committing(
795/* 795/*
796 * This is the ops vector shared by all buf log items. 796 * This is the ops vector shared by all buf log items.
797 */ 797 */
798static struct xfs_item_ops xfs_inode_item_ops = { 798static const struct xfs_item_ops xfs_inode_item_ops = {
799 .iop_size = xfs_inode_item_size, 799 .iop_size = xfs_inode_item_size,
800 .iop_format = xfs_inode_item_format, 800 .iop_format = xfs_inode_item_format,
801 .iop_pin = xfs_inode_item_pin, 801 .iop_pin = xfs_inode_item_pin,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2758a6277c52..a14cd89fe465 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -626,7 +626,7 @@ xfs_log_item_init(
626 struct xfs_mount *mp, 626 struct xfs_mount *mp,
627 struct xfs_log_item *item, 627 struct xfs_log_item *item,
628 int type, 628 int type,
629 struct xfs_item_ops *ops) 629 const struct xfs_item_ops *ops)
630{ 630{
631 item->li_mountp = mp; 631 item->li_mountp = mp;
632 item->li_ailp = mp->m_ail; 632 item->li_ailp = mp->m_ail;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 78c9039994af..3f7bf451c034 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -137,7 +137,7 @@ struct xfs_trans;
137void xfs_log_item_init(struct xfs_mount *mp, 137void xfs_log_item_init(struct xfs_mount *mp,
138 struct xfs_log_item *item, 138 struct xfs_log_item *item,
139 int type, 139 int type,
140 struct xfs_item_ops *ops); 140 const struct xfs_item_ops *ops);
141 141
142xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 142xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
143 struct xlog_ticket *ticket, 143 struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5cff443f6cdb..0bbb1a41998b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -674,7 +674,8 @@ xfs_qm_dqattach_one(
674 * disk and we didn't ask it to allocate; 674 * disk and we didn't ask it to allocate;
675 * ESRCH if quotas got turned off suddenly. 675 * ESRCH if quotas got turned off suddenly.
676 */ 676 */
677 error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); 677 error = xfs_qm_dqget(ip->i_mount, ip, id, type,
678 doalloc | XFS_QMOPT_DOWARN, &dqp);
678 if (error) 679 if (error)
679 return error; 680 return error;
680 681
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 603f3eb52041..3ae713c0abd9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -326,7 +326,7 @@ typedef struct xfs_log_item {
326 struct xfs_log_item *); 326 struct xfs_log_item *);
327 /* buffer item iodone */ 327 /* buffer item iodone */
328 /* callback func */ 328 /* callback func */
329 struct xfs_item_ops *li_ops; /* function list */ 329 const struct xfs_item_ops *li_ops; /* function list */
330 330
331 /* delayed logging */ 331 /* delayed logging */
332 struct list_head li_cil; /* CIL pointers */ 332 struct list_head li_cil; /* CIL pointers */
@@ -341,7 +341,7 @@ typedef struct xfs_log_item {
341 { XFS_LI_IN_AIL, "IN_AIL" }, \ 341 { XFS_LI_IN_AIL, "IN_AIL" }, \
342 { XFS_LI_ABORTED, "ABORTED" } 342 { XFS_LI_ABORTED, "ABORTED" }
343 343
344typedef struct xfs_item_ops { 344struct xfs_item_ops {
345 uint (*iop_size)(xfs_log_item_t *); 345 uint (*iop_size)(xfs_log_item_t *);
346 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 346 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
347 void (*iop_pin)(xfs_log_item_t *); 347 void (*iop_pin)(xfs_log_item_t *);
@@ -352,7 +352,7 @@ typedef struct xfs_item_ops {
352 void (*iop_push)(xfs_log_item_t *); 352 void (*iop_push)(xfs_log_item_t *);
353 bool (*iop_pushbuf)(xfs_log_item_t *); 353 bool (*iop_pushbuf)(xfs_log_item_t *);
354 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 354 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
355} xfs_item_ops_t; 355};
356 356
357#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) 357#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
358#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) 358#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4ecf2a549060..ce9268a2f56b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -112,7 +112,7 @@ xfs_readlink(
112 char *link) 112 char *link)
113{ 113{
114 xfs_mount_t *mp = ip->i_mount; 114 xfs_mount_t *mp = ip->i_mount;
115 int pathlen; 115 xfs_fsize_t pathlen;
116 int error = 0; 116 int error = 0;
117 117
118 trace_xfs_readlink(ip); 118 trace_xfs_readlink(ip);
@@ -122,13 +122,19 @@ xfs_readlink(
122 122
123 xfs_ilock(ip, XFS_ILOCK_SHARED); 123 xfs_ilock(ip, XFS_ILOCK_SHARED);
124 124
125 ASSERT(S_ISLNK(ip->i_d.di_mode));
126 ASSERT(ip->i_d.di_size <= MAXPATHLEN);
127
128 pathlen = ip->i_d.di_size; 125 pathlen = ip->i_d.di_size;
129 if (!pathlen) 126 if (!pathlen)
130 goto out; 127 goto out;
131 128
129 if (pathlen < 0 || pathlen > MAXPATHLEN) {
130 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
131 __func__, (unsigned long long) ip->i_ino,
132 (long long) pathlen);
133 ASSERT(0);
134 return XFS_ERROR(EFSCORRUPTED);
135 }
136
137
132 if (ip->i_df.if_flags & XFS_IFINLINE) { 138 if (ip->i_df.if_flags & XFS_IFINLINE) {
133 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 139 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
134 link[pathlen] = '\0'; 140 link[pathlen] = '\0';