aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c230
1 files changed, 136 insertions, 94 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 22e98e04c2ea..7cda51995c1e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -46,6 +46,10 @@
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h" 47#include "rcu-string.h"
48 48
49#ifdef CONFIG_X86
50#include <asm/cpufeature.h>
51#endif
52
49static struct extent_io_ops btree_extent_io_ops; 53static struct extent_io_ops btree_extent_io_ops;
50static void end_workqueue_fn(struct btrfs_work *work); 54static void end_workqueue_fn(struct btrfs_work *work);
51static void free_fs_root(struct btrfs_root *root); 55static void free_fs_root(struct btrfs_root *root);
@@ -217,26 +221,16 @@ static struct extent_map *btree_get_extent(struct inode *inode,
217 write_lock(&em_tree->lock); 221 write_lock(&em_tree->lock);
218 ret = add_extent_mapping(em_tree, em); 222 ret = add_extent_mapping(em_tree, em);
219 if (ret == -EEXIST) { 223 if (ret == -EEXIST) {
220 u64 failed_start = em->start;
221 u64 failed_len = em->len;
222
223 free_extent_map(em); 224 free_extent_map(em);
224 em = lookup_extent_mapping(em_tree, start, len); 225 em = lookup_extent_mapping(em_tree, start, len);
225 if (em) { 226 if (!em)
226 ret = 0; 227 em = ERR_PTR(-EIO);
227 } else {
228 em = lookup_extent_mapping(em_tree, failed_start,
229 failed_len);
230 ret = -EIO;
231 }
232 } else if (ret) { 228 } else if (ret) {
233 free_extent_map(em); 229 free_extent_map(em);
234 em = NULL; 230 em = ERR_PTR(ret);
235 } 231 }
236 write_unlock(&em_tree->lock); 232 write_unlock(&em_tree->lock);
237 233
238 if (ret)
239 em = ERR_PTR(ret);
240out: 234out:
241 return em; 235 return em;
242} 236}
@@ -439,10 +433,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
439 WARN_ON(1); 433 WARN_ON(1);
440 return 0; 434 return 0;
441 } 435 }
442 if (eb->pages[0] != page) {
443 WARN_ON(1);
444 return 0;
445 }
446 if (!PageUptodate(page)) { 436 if (!PageUptodate(page)) {
447 WARN_ON(1); 437 WARN_ON(1);
448 return 0; 438 return 0;
@@ -869,10 +859,22 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
869 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); 859 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
870} 860}
871 861
862static int check_async_write(struct inode *inode, unsigned long bio_flags)
863{
864 if (bio_flags & EXTENT_BIO_TREE_LOG)
865 return 0;
866#ifdef CONFIG_X86
867 if (cpu_has_xmm4_2)
868 return 0;
869#endif
870 return 1;
871}
872
872static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 873static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
873 int mirror_num, unsigned long bio_flags, 874 int mirror_num, unsigned long bio_flags,
874 u64 bio_offset) 875 u64 bio_offset)
875{ 876{
877 int async = check_async_write(inode, bio_flags);
876 int ret; 878 int ret;
877 879
878 if (!(rw & REQ_WRITE)) { 880 if (!(rw & REQ_WRITE)) {
@@ -887,6 +889,12 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
887 return ret; 889 return ret;
888 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 890 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
889 mirror_num, 0); 891 mirror_num, 0);
892 } else if (!async) {
893 ret = btree_csum_one_bio(bio);
894 if (ret)
895 return ret;
896 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
897 mirror_num, 0);
890 } 898 }
891 899
892 /* 900 /*
@@ -1168,8 +1176,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1168 atomic_set(&root->log_commit[0], 0); 1176 atomic_set(&root->log_commit[0], 0);
1169 atomic_set(&root->log_commit[1], 0); 1177 atomic_set(&root->log_commit[1], 0);
1170 atomic_set(&root->log_writers, 0); 1178 atomic_set(&root->log_writers, 0);
1179 atomic_set(&root->log_batch, 0);
1171 atomic_set(&root->orphan_inodes, 0); 1180 atomic_set(&root->orphan_inodes, 0);
1172 root->log_batch = 0;
1173 root->log_transid = 0; 1181 root->log_transid = 0;
1174 root->last_log_commit = 0; 1182 root->last_log_commit = 0;
1175 extent_io_tree_init(&root->dirty_log_pages, 1183 extent_io_tree_init(&root->dirty_log_pages,
@@ -1667,9 +1675,10 @@ static int transaction_kthread(void *arg)
1667 spin_unlock(&root->fs_info->trans_lock); 1675 spin_unlock(&root->fs_info->trans_lock);
1668 1676
1669 /* If the file system is aborted, this will always fail. */ 1677 /* If the file system is aborted, this will always fail. */
1670 trans = btrfs_join_transaction(root); 1678 trans = btrfs_attach_transaction(root);
1671 if (IS_ERR(trans)) { 1679 if (IS_ERR(trans)) {
1672 cannot_commit = true; 1680 if (PTR_ERR(trans) != -ENOENT)
1681 cannot_commit = true;
1673 goto sleep; 1682 goto sleep;
1674 } 1683 }
1675 if (transid == trans->transid) { 1684 if (transid == trans->transid) {
@@ -1994,13 +2003,11 @@ int open_ctree(struct super_block *sb,
1994 INIT_LIST_HEAD(&fs_info->trans_list); 2003 INIT_LIST_HEAD(&fs_info->trans_list);
1995 INIT_LIST_HEAD(&fs_info->dead_roots); 2004 INIT_LIST_HEAD(&fs_info->dead_roots);
1996 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2005 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1997 INIT_LIST_HEAD(&fs_info->hashers);
1998 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 2006 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1999 INIT_LIST_HEAD(&fs_info->ordered_operations); 2007 INIT_LIST_HEAD(&fs_info->ordered_operations);
2000 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2008 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2001 spin_lock_init(&fs_info->delalloc_lock); 2009 spin_lock_init(&fs_info->delalloc_lock);
2002 spin_lock_init(&fs_info->trans_lock); 2010 spin_lock_init(&fs_info->trans_lock);
2003 spin_lock_init(&fs_info->ref_cache_lock);
2004 spin_lock_init(&fs_info->fs_roots_radix_lock); 2011 spin_lock_init(&fs_info->fs_roots_radix_lock);
2005 spin_lock_init(&fs_info->delayed_iput_lock); 2012 spin_lock_init(&fs_info->delayed_iput_lock);
2006 spin_lock_init(&fs_info->defrag_inodes_lock); 2013 spin_lock_init(&fs_info->defrag_inodes_lock);
@@ -2014,12 +2021,15 @@ int open_ctree(struct super_block *sb,
2014 INIT_LIST_HEAD(&fs_info->space_info); 2021 INIT_LIST_HEAD(&fs_info->space_info);
2015 INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); 2022 INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
2016 btrfs_mapping_init(&fs_info->mapping_tree); 2023 btrfs_mapping_init(&fs_info->mapping_tree);
2017 btrfs_init_block_rsv(&fs_info->global_block_rsv); 2024 btrfs_init_block_rsv(&fs_info->global_block_rsv,
2018 btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); 2025 BTRFS_BLOCK_RSV_GLOBAL);
2019 btrfs_init_block_rsv(&fs_info->trans_block_rsv); 2026 btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
2020 btrfs_init_block_rsv(&fs_info->chunk_block_rsv); 2027 BTRFS_BLOCK_RSV_DELALLOC);
2021 btrfs_init_block_rsv(&fs_info->empty_block_rsv); 2028 btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
2022 btrfs_init_block_rsv(&fs_info->delayed_block_rsv); 2029 btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
2030 btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
2031 btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
2032 BTRFS_BLOCK_RSV_DELOPS);
2023 atomic_set(&fs_info->nr_async_submits, 0); 2033 atomic_set(&fs_info->nr_async_submits, 0);
2024 atomic_set(&fs_info->async_delalloc_pages, 0); 2034 atomic_set(&fs_info->async_delalloc_pages, 0);
2025 atomic_set(&fs_info->async_submit_draining, 0); 2035 atomic_set(&fs_info->async_submit_draining, 0);
@@ -2491,6 +2501,8 @@ retry_root_backup:
2491 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 2501 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
2492 goto fail_block_groups; 2502 goto fail_block_groups;
2493 } 2503 }
2504 fs_info->num_tolerated_disk_barrier_failures =
2505 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
2494 2506
2495 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 2507 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
2496 "btrfs-cleaner"); 2508 "btrfs-cleaner");
@@ -2874,12 +2886,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2874 printk_in_rcu("btrfs: disabling barriers on dev %s\n", 2886 printk_in_rcu("btrfs: disabling barriers on dev %s\n",
2875 rcu_str_deref(device->name)); 2887 rcu_str_deref(device->name));
2876 device->nobarriers = 1; 2888 device->nobarriers = 1;
2877 } 2889 } else if (!bio_flagged(bio, BIO_UPTODATE)) {
2878 if (!bio_flagged(bio, BIO_UPTODATE)) {
2879 ret = -EIO; 2890 ret = -EIO;
2880 if (!bio_flagged(bio, BIO_EOPNOTSUPP)) 2891 btrfs_dev_stat_inc_and_print(device,
2881 btrfs_dev_stat_inc_and_print(device, 2892 BTRFS_DEV_STAT_FLUSH_ERRS);
2882 BTRFS_DEV_STAT_FLUSH_ERRS);
2883 } 2893 }
2884 2894
2885 /* drop the reference from the wait == 0 run */ 2895 /* drop the reference from the wait == 0 run */
@@ -2918,14 +2928,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
2918{ 2928{
2919 struct list_head *head; 2929 struct list_head *head;
2920 struct btrfs_device *dev; 2930 struct btrfs_device *dev;
2921 int errors = 0; 2931 int errors_send = 0;
2932 int errors_wait = 0;
2922 int ret; 2933 int ret;
2923 2934
2924 /* send down all the barriers */ 2935 /* send down all the barriers */
2925 head = &info->fs_devices->devices; 2936 head = &info->fs_devices->devices;
2926 list_for_each_entry_rcu(dev, head, dev_list) { 2937 list_for_each_entry_rcu(dev, head, dev_list) {
2927 if (!dev->bdev) { 2938 if (!dev->bdev) {
2928 errors++; 2939 errors_send++;
2929 continue; 2940 continue;
2930 } 2941 }
2931 if (!dev->in_fs_metadata || !dev->writeable) 2942 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2933,13 +2944,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
2933 2944
2934 ret = write_dev_flush(dev, 0); 2945 ret = write_dev_flush(dev, 0);
2935 if (ret) 2946 if (ret)
2936 errors++; 2947 errors_send++;
2937 } 2948 }
2938 2949
2939 /* wait for all the barriers */ 2950 /* wait for all the barriers */
2940 list_for_each_entry_rcu(dev, head, dev_list) { 2951 list_for_each_entry_rcu(dev, head, dev_list) {
2941 if (!dev->bdev) { 2952 if (!dev->bdev) {
2942 errors++; 2953 errors_wait++;
2943 continue; 2954 continue;
2944 } 2955 }
2945 if (!dev->in_fs_metadata || !dev->writeable) 2956 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2947,13 +2958,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
2947 2958
2948 ret = write_dev_flush(dev, 1); 2959 ret = write_dev_flush(dev, 1);
2949 if (ret) 2960 if (ret)
2950 errors++; 2961 errors_wait++;
2951 } 2962 }
2952 if (errors) 2963 if (errors_send > info->num_tolerated_disk_barrier_failures ||
2964 errors_wait > info->num_tolerated_disk_barrier_failures)
2953 return -EIO; 2965 return -EIO;
2954 return 0; 2966 return 0;
2955} 2967}
2956 2968
2969int btrfs_calc_num_tolerated_disk_barrier_failures(
2970 struct btrfs_fs_info *fs_info)
2971{
2972 struct btrfs_ioctl_space_info space;
2973 struct btrfs_space_info *sinfo;
2974 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2975 BTRFS_BLOCK_GROUP_SYSTEM,
2976 BTRFS_BLOCK_GROUP_METADATA,
2977 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2978 int num_types = 4;
2979 int i;
2980 int c;
2981 int num_tolerated_disk_barrier_failures =
2982 (int)fs_info->fs_devices->num_devices;
2983
2984 for (i = 0; i < num_types; i++) {
2985 struct btrfs_space_info *tmp;
2986
2987 sinfo = NULL;
2988 rcu_read_lock();
2989 list_for_each_entry_rcu(tmp, &fs_info->space_info, list) {
2990 if (tmp->flags == types[i]) {
2991 sinfo = tmp;
2992 break;
2993 }
2994 }
2995 rcu_read_unlock();
2996
2997 if (!sinfo)
2998 continue;
2999
3000 down_read(&sinfo->groups_sem);
3001 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
3002 if (!list_empty(&sinfo->block_groups[c])) {
3003 u64 flags;
3004
3005 btrfs_get_block_group_info(
3006 &sinfo->block_groups[c], &space);
3007 if (space.total_bytes == 0 ||
3008 space.used_bytes == 0)
3009 continue;
3010 flags = space.flags;
3011 /*
3012 * return
3013 * 0: if dup, single or RAID0 is configured for
3014 * any of metadata, system or data, else
3015 * 1: if RAID5 is configured, or if RAID1 or
3016 * RAID10 is configured and only two mirrors
3017 * are used, else
3018 * 2: if RAID6 is configured, else
3019 * num_mirrors - 1: if RAID1 or RAID10 is
3020 * configured and more than
3021 * 2 mirrors are used.
3022 */
3023 if (num_tolerated_disk_barrier_failures > 0 &&
3024 ((flags & (BTRFS_BLOCK_GROUP_DUP |
3025 BTRFS_BLOCK_GROUP_RAID0)) ||
3026 ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
3027 == 0)))
3028 num_tolerated_disk_barrier_failures = 0;
3029 else if (num_tolerated_disk_barrier_failures > 1
3030 &&
3031 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3032 BTRFS_BLOCK_GROUP_RAID10)))
3033 num_tolerated_disk_barrier_failures = 1;
3034 }
3035 }
3036 up_read(&sinfo->groups_sem);
3037 }
3038
3039 return num_tolerated_disk_barrier_failures;
3040}
3041
2957int write_all_supers(struct btrfs_root *root, int max_mirrors) 3042int write_all_supers(struct btrfs_root *root, int max_mirrors)
2958{ 3043{
2959 struct list_head *head; 3044 struct list_head *head;
@@ -2976,8 +3061,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2976 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 3061 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2977 head = &root->fs_info->fs_devices->devices; 3062 head = &root->fs_info->fs_devices->devices;
2978 3063
2979 if (do_barriers) 3064 if (do_barriers) {
2980 barrier_all_devices(root->fs_info); 3065 ret = barrier_all_devices(root->fs_info);
3066 if (ret) {
3067 mutex_unlock(
3068 &root->fs_info->fs_devices->device_list_mutex);
3069 btrfs_error(root->fs_info, ret,
3070 "errors while submitting device barriers.");
3071 return ret;
3072 }
3073 }
2981 3074
2982 list_for_each_entry_rcu(dev, head, dev_list) { 3075 list_for_each_entry_rcu(dev, head, dev_list) {
2983 if (!dev->bdev) { 3076 if (!dev->bdev) {
@@ -3211,10 +3304,6 @@ int close_ctree(struct btrfs_root *root)
3211 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 3304 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
3212 (unsigned long long)fs_info->delalloc_bytes); 3305 (unsigned long long)fs_info->delalloc_bytes);
3213 } 3306 }
3214 if (fs_info->total_ref_cache_size) {
3215 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
3216 (unsigned long long)fs_info->total_ref_cache_size);
3217 }
3218 3307
3219 free_extent_buffer(fs_info->extent_root->node); 3308 free_extent_buffer(fs_info->extent_root->node);
3220 free_extent_buffer(fs_info->extent_root->commit_root); 3309 free_extent_buffer(fs_info->extent_root->commit_root);
@@ -3360,52 +3449,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3360 return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 3449 return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3361} 3450}
3362 3451
3363int btree_lock_page_hook(struct page *page, void *data,
3364 void (*flush_fn)(void *))
3365{
3366 struct inode *inode = page->mapping->host;
3367 struct btrfs_root *root = BTRFS_I(inode)->root;
3368 struct extent_buffer *eb;
3369
3370 /*
3371 * We culled this eb but the page is still hanging out on the mapping,
3372 * carry on.
3373 */
3374 if (!PagePrivate(page))
3375 goto out;
3376
3377 eb = (struct extent_buffer *)page->private;
3378 if (!eb) {
3379 WARN_ON(1);
3380 goto out;
3381 }
3382 if (page != eb->pages[0])
3383 goto out;
3384
3385 if (!btrfs_try_tree_write_lock(eb)) {
3386 flush_fn(data);
3387 btrfs_tree_lock(eb);
3388 }
3389 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3390
3391 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3392 spin_lock(&root->fs_info->delalloc_lock);
3393 if (root->fs_info->dirty_metadata_bytes >= eb->len)
3394 root->fs_info->dirty_metadata_bytes -= eb->len;
3395 else
3396 WARN_ON(1);
3397 spin_unlock(&root->fs_info->delalloc_lock);
3398 }
3399
3400 btrfs_tree_unlock(eb);
3401out:
3402 if (!trylock_page(page)) {
3403 flush_fn(data);
3404 lock_page(page);
3405 }
3406 return 0;
3407}
3408
3409static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 3452static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3410 int read_only) 3453 int read_only)
3411{ 3454{
@@ -3608,7 +3651,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3608 3651
3609 while (1) { 3652 while (1) {
3610 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 3653 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
3611 mark); 3654 mark, NULL);
3612 if (ret) 3655 if (ret)
3613 break; 3656 break;
3614 3657
@@ -3663,7 +3706,7 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3663again: 3706again:
3664 while (1) { 3707 while (1) {
3665 ret = find_first_extent_bit(unpin, 0, &start, &end, 3708 ret = find_first_extent_bit(unpin, 0, &start, &end,
3666 EXTENT_DIRTY); 3709 EXTENT_DIRTY, NULL);
3667 if (ret) 3710 if (ret)
3668 break; 3711 break;
3669 3712
@@ -3800,7 +3843,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3800} 3843}
3801 3844
3802static struct extent_io_ops btree_extent_io_ops = { 3845static struct extent_io_ops btree_extent_io_ops = {
3803 .write_cache_pages_lock_hook = btree_lock_page_hook,
3804 .readpage_end_io_hook = btree_readpage_end_io_hook, 3846 .readpage_end_io_hook = btree_readpage_end_io_hook,
3805 .readpage_io_failed_hook = btree_io_failed_hook, 3847 .readpage_io_failed_hook = btree_io_failed_hook,
3806 .submit_bio_hook = btree_submit_bio_hook, 3848 .submit_bio_hook = btree_submit_bio_hook,