diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/acl.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 8 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 27 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 391 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 92 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 13 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 35 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 239 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 62 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 93 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.h | 4 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/xattr.c | 2 |
16 files changed, 749 insertions, 249 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f128427b995b..69b355ae7f49 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include "btrfs_inode.h" | 27 | #include "btrfs_inode.h" |
| 28 | #include "xattr.h" | 28 | #include "xattr.h" |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_FS_POSIX_ACL | 30 | #ifdef CONFIG_BTRFS_POSIX_ACL |
| 31 | 31 | ||
| 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
| 33 | { | 33 | { |
| @@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
| 313 | .set = btrfs_xattr_acl_access_set, | 313 | .set = btrfs_xattr_acl_access_set, |
| 314 | }; | 314 | }; |
| 315 | 315 | ||
| 316 | #else /* CONFIG_FS_POSIX_ACL */ | 316 | #else /* CONFIG_BTRFS_POSIX_ACL */ |
| 317 | 317 | ||
| 318 | int btrfs_acl_chmod(struct inode *inode) | 318 | int btrfs_acl_chmod(struct inode *inode) |
| 319 | { | 319 | { |
| @@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
| 325 | return 0; | 325 | return 0; |
| 326 | } | 326 | } |
| 327 | 327 | ||
| 328 | #endif /* CONFIG_FS_POSIX_ACL */ | 328 | #endif /* CONFIG_BTRFS_POSIX_ACL */ |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 82ee56bba299..a54d354cefcb 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -128,6 +128,14 @@ struct btrfs_inode { | |||
| 128 | u64 last_unlink_trans; | 128 | u64 last_unlink_trans; |
| 129 | 129 | ||
| 130 | /* | 130 | /* |
| 131 | * These two counters are for delalloc metadata reservations. We keep | ||
| 132 | * track of how many extents we've accounted for vs how many extents we | ||
| 133 | * have. | ||
| 134 | */ | ||
| 135 | int delalloc_reserved_extents; | ||
| 136 | int delalloc_extents; | ||
| 137 | |||
| 138 | /* | ||
| 131 | * ordered_data_close is set by truncate when a file that used | 139 | * ordered_data_close is set by truncate when a file that used |
| 132 | * to have good data has been truncated to zero. When it is set | 140 | * to have good data has been truncated to zero. When it is set |
| 133 | * the btrfs file release call will add this inode to the | 141 | * the btrfs file release call will add this inode to the |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80599b4e42bd..dd8ced9814c4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -675,18 +675,19 @@ struct btrfs_space_info { | |||
| 675 | current allocations */ | 675 | current allocations */ |
| 676 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
| 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ |
| 678 | 678 | u64 bytes_root; /* the number of bytes needed to commit a | |
| 679 | /* delalloc accounting */ | 679 | transaction */ |
| 680 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | ||
| 681 | this space is not necessarily reserved yet | ||
| 682 | by the allocator */ | ||
| 683 | u64 bytes_may_use; /* number of bytes that may be used for | 680 | u64 bytes_may_use; /* number of bytes that may be used for |
| 684 | delalloc */ | 681 | delalloc/allocations */ |
| 682 | u64 bytes_delalloc; /* number of bytes currently reserved for | ||
| 683 | delayed allocation */ | ||
| 685 | 684 | ||
| 686 | int full; /* indicates that we cannot allocate any more | 685 | int full; /* indicates that we cannot allocate any more |
| 687 | chunks for this space */ | 686 | chunks for this space */ |
| 688 | int force_alloc; /* set if we need to force a chunk alloc for | 687 | int force_alloc; /* set if we need to force a chunk alloc for |
| 689 | this space */ | 688 | this space */ |
| 689 | int force_delalloc; /* make people start doing filemap_flush until | ||
| 690 | we're under a threshold */ | ||
| 690 | 691 | ||
| 691 | struct list_head list; | 692 | struct list_head list; |
| 692 | 693 | ||
| @@ -695,6 +696,9 @@ struct btrfs_space_info { | |||
| 695 | spinlock_t lock; | 696 | spinlock_t lock; |
| 696 | struct rw_semaphore groups_sem; | 697 | struct rw_semaphore groups_sem; |
| 697 | atomic_t caching_threads; | 698 | atomic_t caching_threads; |
| 699 | |||
| 700 | int allocating_chunk; | ||
| 701 | wait_queue_head_t wait; | ||
| 698 | }; | 702 | }; |
| 699 | 703 | ||
| 700 | /* | 704 | /* |
| @@ -2022,7 +2026,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | |||
| 2022 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2026 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 2023 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2027 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 2024 | 2028 | ||
| 2025 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | 2029 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); |
| 2030 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
| 2031 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2032 | struct inode *inode, int num_items); | ||
| 2033 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2034 | struct inode *inode, int num_items); | ||
| 2026 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2035 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, |
| 2027 | u64 bytes); | 2036 | u64 bytes); |
| 2028 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2037 | void btrfs_free_reserved_data_space(struct btrfs_root *root, |
| @@ -2326,7 +2335,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | |||
| 2326 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2335 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 2327 | int skip_pinned); | 2336 | int skip_pinned); |
| 2328 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2337 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
| 2329 | extern struct file_operations btrfs_file_operations; | 2338 | extern const struct file_operations btrfs_file_operations; |
| 2330 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2339 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 2331 | struct btrfs_root *root, struct inode *inode, | 2340 | struct btrfs_root *root, struct inode *inode, |
| 2332 | u64 start, u64 end, u64 locked_end, | 2341 | u64 start, u64 end, u64 locked_end, |
| @@ -2357,7 +2366,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); | |||
| 2357 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2366 | int btrfs_sync_fs(struct super_block *sb, int wait); |
| 2358 | 2367 | ||
| 2359 | /* acl.c */ | 2368 | /* acl.c */ |
| 2360 | #ifdef CONFIG_FS_POSIX_ACL | 2369 | #ifdef CONFIG_BTRFS_POSIX_ACL |
| 2361 | int btrfs_check_acl(struct inode *inode, int mask); | 2370 | int btrfs_check_acl(struct inode *inode, int mask); |
| 2362 | #else | 2371 | #else |
| 2363 | #define btrfs_check_acl NULL | 2372 | #define btrfs_check_acl NULL |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 644e796fd643..af0435f79fa6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -822,14 +822,14 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
| 822 | 822 | ||
| 823 | int btrfs_write_tree_block(struct extent_buffer *buf) | 823 | int btrfs_write_tree_block(struct extent_buffer *buf) |
| 824 | { | 824 | { |
| 825 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | 825 | return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, |
| 826 | buf->start + buf->len - 1, WB_SYNC_ALL); | 826 | buf->start + buf->len - 1); |
| 827 | } | 827 | } |
| 828 | 828 | ||
| 829 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | 829 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) |
| 830 | { | 830 | { |
| 831 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | 831 | return filemap_fdatawait_range(buf->first_page->mapping, |
| 832 | buf->start, buf->start + buf->len - 1); | 832 | buf->start, buf->start + buf->len - 1); |
| 833 | } | 833 | } |
| 834 | 834 | ||
| 835 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 835 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
| @@ -1630,7 +1630,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1630 | fs_info->sb = sb; | 1630 | fs_info->sb = sb; |
| 1631 | fs_info->max_extent = (u64)-1; | 1631 | fs_info->max_extent = (u64)-1; |
| 1632 | fs_info->max_inline = 8192 * 1024; | 1632 | fs_info->max_inline = 8192 * 1024; |
| 1633 | fs_info->metadata_ratio = 8; | 1633 | fs_info->metadata_ratio = 0; |
| 1634 | 1634 | ||
| 1635 | fs_info->thread_pool_size = min_t(unsigned long, | 1635 | fs_info->thread_pool_size = min_t(unsigned long, |
| 1636 | num_online_cpus() + 2, 8); | 1636 | num_online_cpus() + 2, 8); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 993f93ff7ba6..359a754c782c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
| 68 | struct extent_buffer **must_clean); | 68 | struct extent_buffer **must_clean); |
| 69 | static int find_next_key(struct btrfs_path *path, int level, | 69 | static int find_next_key(struct btrfs_path *path, int level, |
| 70 | struct btrfs_key *key); | 70 | struct btrfs_key *key); |
| 71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
| 72 | int dump_block_groups); | ||
| 71 | 73 | ||
| 72 | static noinline int | 74 | static noinline int |
| 73 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -2765,67 +2767,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
| 2765 | alloc_target); | 2767 | alloc_target); |
| 2766 | } | 2768 | } |
| 2767 | 2769 | ||
| 2770 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
| 2771 | { | ||
| 2772 | u64 num_bytes; | ||
| 2773 | int level; | ||
| 2774 | |||
| 2775 | level = BTRFS_MAX_LEVEL - 2; | ||
| 2776 | /* | ||
| 2777 | * NOTE: these calculations are absolutely the worst possible case. | ||
| 2778 | * This assumes that _every_ item we insert will require a new leaf, and | ||
| 2779 | * that the tree has grown to its maximum level size. | ||
| 2780 | */ | ||
| 2781 | |||
| 2782 | /* | ||
| 2783 | * for every item we insert we could insert both an extent item and a | ||
| 2784 | * extent ref item. Then for ever item we insert, we will need to cow | ||
| 2785 | * both the original leaf, plus the leaf to the left and right of it. | ||
| 2786 | * | ||
| 2787 | * Unless we are talking about the extent root, then we just want the | ||
| 2788 | * number of items * 2, since we just need the extent item plus its ref. | ||
| 2789 | */ | ||
| 2790 | if (root == root->fs_info->extent_root) | ||
| 2791 | num_bytes = num_items * 2; | ||
| 2792 | else | ||
| 2793 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
| 2794 | |||
| 2795 | /* | ||
| 2796 | * num_bytes is total number of leaves we could need times the leaf | ||
| 2797 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
| 2798 | * level, down to the leaf level. | ||
| 2799 | */ | ||
| 2800 | num_bytes = (num_bytes * root->leafsize) + | ||
| 2801 | (num_bytes * (level * 2)) * root->nodesize; | ||
| 2802 | |||
| 2803 | return num_bytes; | ||
| 2804 | } | ||
| 2805 | |||
| 2768 | /* | 2806 | /* |
| 2769 | * for now this just makes sure we have at least 5% of our metadata space free | 2807 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
| 2770 | * for use. | 2808 | * we have extents, this function does nothing. |
| 2771 | */ | 2809 | */ |
| 2772 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2810 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
| 2811 | struct inode *inode, int num_items) | ||
| 2773 | { | 2812 | { |
| 2774 | struct btrfs_fs_info *info = root->fs_info; | 2813 | struct btrfs_fs_info *info = root->fs_info; |
| 2775 | struct btrfs_space_info *meta_sinfo; | 2814 | struct btrfs_space_info *meta_sinfo; |
| 2776 | u64 alloc_target, thresh; | 2815 | u64 num_bytes; |
| 2777 | int committed = 0, ret; | 2816 | u64 alloc_target; |
| 2817 | bool bug = false; | ||
| 2778 | 2818 | ||
| 2779 | /* get the space info for where the metadata will live */ | 2819 | /* get the space info for where the metadata will live */ |
| 2780 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2820 | alloc_target = btrfs_get_alloc_profile(root, 0); |
| 2781 | meta_sinfo = __find_space_info(info, alloc_target); | 2821 | meta_sinfo = __find_space_info(info, alloc_target); |
| 2782 | if (!meta_sinfo) | ||
| 2783 | goto alloc; | ||
| 2784 | 2822 | ||
| 2785 | again: | 2823 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
| 2824 | num_items); | ||
| 2825 | |||
| 2786 | spin_lock(&meta_sinfo->lock); | 2826 | spin_lock(&meta_sinfo->lock); |
| 2787 | if (!meta_sinfo->full) | 2827 | if (BTRFS_I(inode)->delalloc_reserved_extents <= |
| 2788 | thresh = meta_sinfo->total_bytes * 80; | 2828 | BTRFS_I(inode)->delalloc_extents) { |
| 2789 | else | 2829 | spin_unlock(&meta_sinfo->lock); |
| 2790 | thresh = meta_sinfo->total_bytes * 95; | 2830 | return 0; |
| 2831 | } | ||
| 2832 | |||
| 2833 | BTRFS_I(inode)->delalloc_reserved_extents--; | ||
| 2834 | BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); | ||
| 2835 | |||
| 2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
| 2837 | bug = true; | ||
| 2838 | meta_sinfo->bytes_delalloc = 0; | ||
| 2839 | } else { | ||
| 2840 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2841 | } | ||
| 2842 | spin_unlock(&meta_sinfo->lock); | ||
| 2791 | 2843 | ||
| 2844 | BUG_ON(bug); | ||
| 2845 | |||
| 2846 | return 0; | ||
| 2847 | } | ||
| 2848 | |||
| 2849 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
| 2850 | { | ||
| 2851 | u64 thresh; | ||
| 2852 | |||
| 2853 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2854 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2855 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2856 | meta_sinfo->bytes_may_use; | ||
| 2857 | |||
| 2858 | thresh = meta_sinfo->total_bytes - thresh; | ||
| 2859 | thresh *= 80; | ||
| 2792 | do_div(thresh, 100); | 2860 | do_div(thresh, 100); |
| 2861 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
| 2862 | meta_sinfo->force_delalloc = 1; | ||
| 2863 | else | ||
| 2864 | meta_sinfo->force_delalloc = 0; | ||
| 2865 | } | ||
| 2793 | 2866 | ||
| 2794 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2867 | static int maybe_allocate_chunk(struct btrfs_root *root, |
| 2795 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2868 | struct btrfs_space_info *info) |
| 2796 | meta_sinfo->bytes_super > thresh) { | 2869 | { |
| 2797 | struct btrfs_trans_handle *trans; | 2870 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; |
| 2798 | if (!meta_sinfo->full) { | 2871 | struct btrfs_trans_handle *trans; |
| 2799 | meta_sinfo->force_alloc = 1; | 2872 | bool wait = false; |
| 2873 | int ret = 0; | ||
| 2874 | u64 min_metadata; | ||
| 2875 | u64 free_space; | ||
| 2876 | |||
| 2877 | free_space = btrfs_super_total_bytes(disk_super); | ||
| 2878 | /* | ||
| 2879 | * we allow the metadata to grow to a max of either 5gb or 5% of the | ||
| 2880 | * space in the volume. | ||
| 2881 | */ | ||
| 2882 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | ||
| 2883 | div64_u64(free_space * 5, 100)); | ||
| 2884 | if (info->total_bytes >= min_metadata) { | ||
| 2885 | spin_unlock(&info->lock); | ||
| 2886 | return 0; | ||
| 2887 | } | ||
| 2888 | |||
| 2889 | if (info->full) { | ||
| 2890 | spin_unlock(&info->lock); | ||
| 2891 | return 0; | ||
| 2892 | } | ||
| 2893 | |||
| 2894 | if (!info->allocating_chunk) { | ||
| 2895 | info->force_alloc = 1; | ||
| 2896 | info->allocating_chunk = 1; | ||
| 2897 | init_waitqueue_head(&info->wait); | ||
| 2898 | } else { | ||
| 2899 | wait = true; | ||
| 2900 | } | ||
| 2901 | |||
| 2902 | spin_unlock(&info->lock); | ||
| 2903 | |||
| 2904 | if (wait) { | ||
| 2905 | wait_event(info->wait, | ||
| 2906 | !info->allocating_chunk); | ||
| 2907 | return 1; | ||
| 2908 | } | ||
| 2909 | |||
| 2910 | trans = btrfs_start_transaction(root, 1); | ||
| 2911 | if (!trans) { | ||
| 2912 | ret = -ENOMEM; | ||
| 2913 | goto out; | ||
| 2914 | } | ||
| 2915 | |||
| 2916 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2917 | 4096 + 2 * 1024 * 1024, | ||
| 2918 | info->flags, 0); | ||
| 2919 | btrfs_end_transaction(trans, root); | ||
| 2920 | if (ret) | ||
| 2921 | goto out; | ||
| 2922 | out: | ||
| 2923 | spin_lock(&info->lock); | ||
| 2924 | info->allocating_chunk = 0; | ||
| 2925 | spin_unlock(&info->lock); | ||
| 2926 | wake_up(&info->wait); | ||
| 2927 | |||
| 2928 | if (ret) | ||
| 2929 | return 0; | ||
| 2930 | return 1; | ||
| 2931 | } | ||
| 2932 | |||
| 2933 | /* | ||
| 2934 | * Reserve metadata space for delalloc. | ||
| 2935 | */ | ||
| 2936 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2937 | struct inode *inode, int num_items) | ||
| 2938 | { | ||
| 2939 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2940 | struct btrfs_space_info *meta_sinfo; | ||
| 2941 | u64 num_bytes; | ||
| 2942 | u64 used; | ||
| 2943 | u64 alloc_target; | ||
| 2944 | int flushed = 0; | ||
| 2945 | int force_delalloc; | ||
| 2946 | |||
| 2947 | /* get the space info for where the metadata will live */ | ||
| 2948 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 2949 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 2950 | |||
| 2951 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 2952 | num_items); | ||
| 2953 | again: | ||
| 2954 | spin_lock(&meta_sinfo->lock); | ||
| 2955 | |||
| 2956 | force_delalloc = meta_sinfo->force_delalloc; | ||
| 2957 | |||
| 2958 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 2959 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 2960 | |||
| 2961 | if (!flushed) | ||
| 2962 | meta_sinfo->bytes_delalloc += num_bytes; | ||
| 2963 | |||
| 2964 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2965 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2966 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2967 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 2968 | |||
| 2969 | if (used > meta_sinfo->total_bytes) { | ||
| 2970 | flushed++; | ||
| 2971 | |||
| 2972 | if (flushed == 1) { | ||
| 2973 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 2974 | goto again; | ||
| 2975 | flushed++; | ||
| 2976 | } else { | ||
| 2800 | spin_unlock(&meta_sinfo->lock); | 2977 | spin_unlock(&meta_sinfo->lock); |
| 2801 | alloc: | 2978 | } |
| 2802 | trans = btrfs_start_transaction(root, 1); | ||
| 2803 | if (!trans) | ||
| 2804 | return -ENOMEM; | ||
| 2805 | 2979 | ||
| 2806 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2980 | if (flushed == 2) { |
| 2807 | 2 * 1024 * 1024, alloc_target, 0); | 2981 | filemap_flush(inode->i_mapping); |
| 2808 | btrfs_end_transaction(trans, root); | 2982 | goto again; |
| 2809 | if (!meta_sinfo) { | 2983 | } else if (flushed == 3) { |
| 2810 | meta_sinfo = __find_space_info(info, | 2984 | btrfs_start_delalloc_inodes(root); |
| 2811 | alloc_target); | 2985 | btrfs_wait_ordered_extents(root, 0); |
| 2812 | } | ||
| 2813 | goto again; | 2986 | goto again; |
| 2814 | } | 2987 | } |
| 2988 | spin_lock(&meta_sinfo->lock); | ||
| 2989 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2815 | spin_unlock(&meta_sinfo->lock); | 2990 | spin_unlock(&meta_sinfo->lock); |
| 2991 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
| 2992 | BTRFS_I(inode)->delalloc_extents, | ||
| 2993 | BTRFS_I(inode)->delalloc_reserved_extents); | ||
| 2994 | dump_space_info(meta_sinfo, 0, 0); | ||
| 2995 | return -ENOSPC; | ||
| 2996 | } | ||
| 2816 | 2997 | ||
| 2817 | if (!committed) { | 2998 | BTRFS_I(inode)->delalloc_reserved_extents++; |
| 2818 | committed = 1; | 2999 | check_force_delalloc(meta_sinfo); |
| 2819 | trans = btrfs_join_transaction(root, 1); | 3000 | spin_unlock(&meta_sinfo->lock); |
| 2820 | if (!trans) | 3001 | |
| 2821 | return -ENOMEM; | 3002 | if (!flushed && force_delalloc) |
| 2822 | ret = btrfs_commit_transaction(trans, root); | 3003 | filemap_flush(inode->i_mapping); |
| 2823 | if (ret) | 3004 | |
| 2824 | return ret; | 3005 | return 0; |
| 3006 | } | ||
| 3007 | |||
| 3008 | /* | ||
| 3009 | * unreserve num_items number of items worth of metadata space. This needs to | ||
| 3010 | * be paired with btrfs_reserve_metadata_space. | ||
| 3011 | * | ||
| 3012 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
| 3013 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
| 3014 | * oprations which will result in more used metadata, so we want to make sure we | ||
| 3015 | * can do that without issue. | ||
| 3016 | */ | ||
| 3017 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3018 | { | ||
| 3019 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3020 | struct btrfs_space_info *meta_sinfo; | ||
| 3021 | u64 num_bytes; | ||
| 3022 | u64 alloc_target; | ||
| 3023 | bool bug = false; | ||
| 3024 | |||
| 3025 | /* get the space info for where the metadata will live */ | ||
| 3026 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3027 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3028 | |||
| 3029 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3030 | |||
| 3031 | spin_lock(&meta_sinfo->lock); | ||
| 3032 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
| 3033 | bug = true; | ||
| 3034 | meta_sinfo->bytes_may_use = 0; | ||
| 3035 | } else { | ||
| 3036 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3037 | } | ||
| 3038 | spin_unlock(&meta_sinfo->lock); | ||
| 3039 | |||
| 3040 | BUG_ON(bug); | ||
| 3041 | |||
| 3042 | return 0; | ||
| 3043 | } | ||
| 3044 | |||
| 3045 | /* | ||
| 3046 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
| 3047 | * of bytes that would be needed to modify num_items number of items. If we | ||
| 3048 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
| 3049 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
| 3050 | * items you reserved, since whatever metadata you needed should have already | ||
| 3051 | * been allocated. | ||
| 3052 | * | ||
| 3053 | * This will commit the transaction to make more space if we don't have enough | ||
| 3054 | * metadata space. THe only time we don't do this is if we're reserving space | ||
| 3055 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
| 3056 | * callers responsibility to handle it properly. | ||
| 3057 | */ | ||
| 3058 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3059 | { | ||
| 3060 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3061 | struct btrfs_space_info *meta_sinfo; | ||
| 3062 | u64 num_bytes; | ||
| 3063 | u64 used; | ||
| 3064 | u64 alloc_target; | ||
| 3065 | int retries = 0; | ||
| 3066 | |||
| 3067 | /* get the space info for where the metadata will live */ | ||
| 3068 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3069 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3070 | |||
| 3071 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3072 | again: | ||
| 3073 | spin_lock(&meta_sinfo->lock); | ||
| 3074 | |||
| 3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3077 | |||
| 3078 | if (!retries) | ||
| 3079 | meta_sinfo->bytes_may_use += num_bytes; | ||
| 3080 | |||
| 3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3085 | |||
| 3086 | if (used > meta_sinfo->total_bytes) { | ||
| 3087 | retries++; | ||
| 3088 | if (retries == 1) { | ||
| 3089 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3090 | goto again; | ||
| 3091 | retries++; | ||
| 3092 | } else { | ||
| 3093 | spin_unlock(&meta_sinfo->lock); | ||
| 3094 | } | ||
| 3095 | |||
| 3096 | if (retries == 2) { | ||
| 3097 | btrfs_start_delalloc_inodes(root); | ||
| 3098 | btrfs_wait_ordered_extents(root, 0); | ||
| 2825 | goto again; | 3099 | goto again; |
| 2826 | } | 3100 | } |
| 3101 | spin_lock(&meta_sinfo->lock); | ||
| 3102 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3103 | spin_unlock(&meta_sinfo->lock); | ||
| 3104 | |||
| 3105 | dump_space_info(meta_sinfo, 0, 0); | ||
| 2827 | return -ENOSPC; | 3106 | return -ENOSPC; |
| 2828 | } | 3107 | } |
| 3108 | |||
| 3109 | check_force_delalloc(meta_sinfo); | ||
| 2829 | spin_unlock(&meta_sinfo->lock); | 3110 | spin_unlock(&meta_sinfo->lock); |
| 2830 | 3111 | ||
| 2831 | return 0; | 3112 | return 0; |
| @@ -2888,7 +3169,7 @@ alloc: | |||
| 2888 | spin_unlock(&data_sinfo->lock); | 3169 | spin_unlock(&data_sinfo->lock); |
| 2889 | 3170 | ||
| 2890 | /* commit the current transaction and try again */ | 3171 | /* commit the current transaction and try again */ |
| 2891 | if (!committed) { | 3172 | if (!committed && !root->fs_info->open_ioctl_trans) { |
| 2892 | committed = 1; | 3173 | committed = 1; |
| 2893 | trans = btrfs_join_transaction(root, 1); | 3174 | trans = btrfs_join_transaction(root, 1); |
| 2894 | if (!trans) | 3175 | if (!trans) |
| @@ -2916,7 +3197,7 @@ alloc: | |||
| 2916 | BTRFS_I(inode)->reserved_bytes += bytes; | 3197 | BTRFS_I(inode)->reserved_bytes += bytes; |
| 2917 | spin_unlock(&data_sinfo->lock); | 3198 | spin_unlock(&data_sinfo->lock); |
| 2918 | 3199 | ||
| 2919 | return btrfs_check_metadata_free_space(root); | 3200 | return 0; |
| 2920 | } | 3201 | } |
| 2921 | 3202 | ||
| 2922 | /* | 3203 | /* |
| @@ -3015,17 +3296,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3015 | BUG_ON(!space_info); | 3296 | BUG_ON(!space_info); |
| 3016 | 3297 | ||
| 3017 | spin_lock(&space_info->lock); | 3298 | spin_lock(&space_info->lock); |
| 3018 | if (space_info->force_alloc) { | 3299 | if (space_info->force_alloc) |
| 3019 | force = 1; | 3300 | force = 1; |
| 3020 | space_info->force_alloc = 0; | ||
| 3021 | } | ||
| 3022 | if (space_info->full) { | 3301 | if (space_info->full) { |
| 3023 | spin_unlock(&space_info->lock); | 3302 | spin_unlock(&space_info->lock); |
| 3024 | goto out; | 3303 | goto out; |
| 3025 | } | 3304 | } |
| 3026 | 3305 | ||
| 3027 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3306 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
| 3028 | thresh = div_factor(thresh, 6); | 3307 | thresh = div_factor(thresh, 8); |
| 3029 | if (!force && | 3308 | if (!force && |
| 3030 | (space_info->bytes_used + space_info->bytes_pinned + | 3309 | (space_info->bytes_used + space_info->bytes_pinned + |
| 3031 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3310 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
| @@ -3039,7 +3318,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3039 | * we keep a reasonable number of metadata chunks allocated in the | 3318 | * we keep a reasonable number of metadata chunks allocated in the |
| 3040 | * FS as well. | 3319 | * FS as well. |
| 3041 | */ | 3320 | */ |
| 3042 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3321 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
| 3043 | fs_info->data_chunk_allocations++; | 3322 | fs_info->data_chunk_allocations++; |
| 3044 | if (!(fs_info->data_chunk_allocations % | 3323 | if (!(fs_info->data_chunk_allocations % |
| 3045 | fs_info->metadata_ratio)) | 3324 | fs_info->metadata_ratio)) |
| @@ -3047,8 +3326,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3047 | } | 3326 | } |
| 3048 | 3327 | ||
| 3049 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3328 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
| 3329 | spin_lock(&space_info->lock); | ||
| 3050 | if (ret) | 3330 | if (ret) |
| 3051 | space_info->full = 1; | 3331 | space_info->full = 1; |
| 3332 | space_info->force_alloc = 0; | ||
| 3333 | spin_unlock(&space_info->lock); | ||
| 3052 | out: | 3334 | out: |
| 3053 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3335 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
| 3054 | return ret; | 3336 | return ret; |
| @@ -4063,21 +4345,32 @@ loop: | |||
| 4063 | return ret; | 4345 | return ret; |
| 4064 | } | 4346 | } |
| 4065 | 4347 | ||
| 4066 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4348 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| 4349 | int dump_block_groups) | ||
| 4067 | { | 4350 | { |
| 4068 | struct btrfs_block_group_cache *cache; | 4351 | struct btrfs_block_group_cache *cache; |
| 4069 | 4352 | ||
| 4353 | spin_lock(&info->lock); | ||
| 4070 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4354 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
| 4071 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4355 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 4072 | info->bytes_pinned - info->bytes_reserved), | 4356 | info->bytes_pinned - info->bytes_reserved - |
| 4357 | info->bytes_super), | ||
| 4073 | (info->full) ? "" : "not "); | 4358 | (info->full) ? "" : "not "); |
| 4074 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4359 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
| 4075 | " may_use=%llu, used=%llu\n", | 4360 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
| 4361 | "\n", | ||
| 4076 | (unsigned long long)info->total_bytes, | 4362 | (unsigned long long)info->total_bytes, |
| 4077 | (unsigned long long)info->bytes_pinned, | 4363 | (unsigned long long)info->bytes_pinned, |
| 4078 | (unsigned long long)info->bytes_delalloc, | 4364 | (unsigned long long)info->bytes_delalloc, |
| 4079 | (unsigned long long)info->bytes_may_use, | 4365 | (unsigned long long)info->bytes_may_use, |
| 4080 | (unsigned long long)info->bytes_used); | 4366 | (unsigned long long)info->bytes_used, |
| 4367 | (unsigned long long)info->bytes_root, | ||
| 4368 | (unsigned long long)info->bytes_super, | ||
| 4369 | (unsigned long long)info->bytes_reserved); | ||
| 4370 | spin_unlock(&info->lock); | ||
| 4371 | |||
| 4372 | if (!dump_block_groups) | ||
| 4373 | return; | ||
| 4081 | 4374 | ||
| 4082 | down_read(&info->groups_sem); | 4375 | down_read(&info->groups_sem); |
| 4083 | list_for_each_entry(cache, &info->block_groups, list) { | 4376 | list_for_each_entry(cache, &info->block_groups, list) { |
| @@ -4145,7 +4438,7 @@ again: | |||
| 4145 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4438 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
| 4146 | "wanted %llu\n", (unsigned long long)data, | 4439 | "wanted %llu\n", (unsigned long long)data, |
| 4147 | (unsigned long long)num_bytes); | 4440 | (unsigned long long)num_bytes); |
| 4148 | dump_space_info(sinfo, num_bytes); | 4441 | dump_space_info(sinfo, num_bytes, 1); |
| 4149 | } | 4442 | } |
| 4150 | 4443 | ||
| 4151 | return ret; | 4444 | return ret; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0cb88f8146ea..de1793ba004a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | |||
| 280 | return NULL; | 280 | return NULL; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | ||
| 284 | struct extent_state *other) | ||
| 285 | { | ||
| 286 | if (tree->ops && tree->ops->merge_extent_hook) | ||
| 287 | tree->ops->merge_extent_hook(tree->mapping->host, new, | ||
| 288 | other); | ||
| 289 | } | ||
| 290 | |||
| 283 | /* | 291 | /* |
| 284 | * utility function to look for merge candidates inside a given range. | 292 | * utility function to look for merge candidates inside a given range. |
| 285 | * Any extents with matching state are merged together into a single | 293 | * Any extents with matching state are merged together into a single |
| @@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 303 | other = rb_entry(other_node, struct extent_state, rb_node); | 311 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 304 | if (other->end == state->start - 1 && | 312 | if (other->end == state->start - 1 && |
| 305 | other->state == state->state) { | 313 | other->state == state->state) { |
| 314 | merge_cb(tree, state, other); | ||
| 306 | state->start = other->start; | 315 | state->start = other->start; |
| 307 | other->tree = NULL; | 316 | other->tree = NULL; |
| 308 | rb_erase(&other->rb_node, &tree->state); | 317 | rb_erase(&other->rb_node, &tree->state); |
| @@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 314 | other = rb_entry(other_node, struct extent_state, rb_node); | 323 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 315 | if (other->start == state->end + 1 && | 324 | if (other->start == state->end + 1 && |
| 316 | other->state == state->state) { | 325 | other->state == state->state) { |
| 326 | merge_cb(tree, state, other); | ||
| 317 | other->start = state->start; | 327 | other->start = state->start; |
| 318 | state->tree = NULL; | 328 | state->tree = NULL; |
| 319 | rb_erase(&state->rb_node, &tree->state); | 329 | rb_erase(&state->rb_node, &tree->state); |
| 320 | free_extent_state(state); | 330 | free_extent_state(state); |
| 331 | state = NULL; | ||
| 321 | } | 332 | } |
| 322 | } | 333 | } |
| 334 | |||
| 323 | return 0; | 335 | return 0; |
| 324 | } | 336 | } |
| 325 | 337 | ||
| 326 | static void set_state_cb(struct extent_io_tree *tree, | 338 | static int set_state_cb(struct extent_io_tree *tree, |
| 327 | struct extent_state *state, | 339 | struct extent_state *state, |
| 328 | unsigned long bits) | 340 | unsigned long bits) |
| 329 | { | 341 | { |
| 330 | if (tree->ops && tree->ops->set_bit_hook) { | 342 | if (tree->ops && tree->ops->set_bit_hook) { |
| 331 | tree->ops->set_bit_hook(tree->mapping->host, state->start, | 343 | return tree->ops->set_bit_hook(tree->mapping->host, |
| 332 | state->end, state->state, bits); | 344 | state->start, state->end, |
| 345 | state->state, bits); | ||
| 333 | } | 346 | } |
| 347 | |||
| 348 | return 0; | ||
| 334 | } | 349 | } |
| 335 | 350 | ||
| 336 | static void clear_state_cb(struct extent_io_tree *tree, | 351 | static void clear_state_cb(struct extent_io_tree *tree, |
| 337 | struct extent_state *state, | 352 | struct extent_state *state, |
| 338 | unsigned long bits) | 353 | unsigned long bits) |
| 339 | { | 354 | { |
| 340 | if (tree->ops && tree->ops->clear_bit_hook) { | 355 | if (tree->ops && tree->ops->clear_bit_hook) |
| 341 | tree->ops->clear_bit_hook(tree->mapping->host, state->start, | 356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| 342 | state->end, state->state, bits); | ||
| 343 | } | ||
| 344 | } | 357 | } |
| 345 | 358 | ||
| 346 | /* | 359 | /* |
| @@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 358 | int bits) | 371 | int bits) |
| 359 | { | 372 | { |
| 360 | struct rb_node *node; | 373 | struct rb_node *node; |
| 374 | int ret; | ||
| 361 | 375 | ||
| 362 | if (end < start) { | 376 | if (end < start) { |
| 363 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 377 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
| @@ -365,11 +379,14 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 365 | (unsigned long long)start); | 379 | (unsigned long long)start); |
| 366 | WARN_ON(1); | 380 | WARN_ON(1); |
| 367 | } | 381 | } |
| 368 | if (bits & EXTENT_DIRTY) | ||
| 369 | tree->dirty_bytes += end - start + 1; | ||
| 370 | state->start = start; | 382 | state->start = start; |
| 371 | state->end = end; | 383 | state->end = end; |
| 372 | set_state_cb(tree, state, bits); | 384 | ret = set_state_cb(tree, state, bits); |
| 385 | if (ret) | ||
| 386 | return ret; | ||
| 387 | |||
| 388 | if (bits & EXTENT_DIRTY) | ||
| 389 | tree->dirty_bytes += end - start + 1; | ||
| 373 | state->state |= bits; | 390 | state->state |= bits; |
| 374 | node = tree_insert(&tree->state, end, &state->rb_node); | 391 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 375 | if (node) { | 392 | if (node) { |
| @@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 387 | return 0; | 404 | return 0; |
| 388 | } | 405 | } |
| 389 | 406 | ||
| 407 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | ||
| 408 | u64 split) | ||
| 409 | { | ||
| 410 | if (tree->ops && tree->ops->split_extent_hook) | ||
| 411 | return tree->ops->split_extent_hook(tree->mapping->host, | ||
| 412 | orig, split); | ||
| 413 | return 0; | ||
| 414 | } | ||
| 415 | |||
| 390 | /* | 416 | /* |
| 391 | * split a given extent state struct in two, inserting the preallocated | 417 | * split a given extent state struct in two, inserting the preallocated |
| 392 | * struct 'prealloc' as the newly created second half. 'split' indicates an | 418 | * struct 'prealloc' as the newly created second half. 'split' indicates an |
| @@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 405 | struct extent_state *prealloc, u64 split) | 431 | struct extent_state *prealloc, u64 split) |
| 406 | { | 432 | { |
| 407 | struct rb_node *node; | 433 | struct rb_node *node; |
| 434 | |||
| 435 | split_cb(tree, orig, split); | ||
| 436 | |||
| 408 | prealloc->start = orig->start; | 437 | prealloc->start = orig->start; |
| 409 | prealloc->end = split - 1; | 438 | prealloc->end = split - 1; |
| 410 | prealloc->state = orig->state; | 439 | prealloc->state = orig->state; |
| @@ -542,8 +571,8 @@ hit_next: | |||
| 542 | if (err) | 571 | if (err) |
| 543 | goto out; | 572 | goto out; |
| 544 | if (state->end <= end) { | 573 | if (state->end <= end) { |
| 545 | set |= clear_state_bit(tree, state, bits, | 574 | set |= clear_state_bit(tree, state, bits, wake, |
| 546 | wake, delete); | 575 | delete); |
| 547 | if (last_end == (u64)-1) | 576 | if (last_end == (u64)-1) |
| 548 | goto out; | 577 | goto out; |
| 549 | start = last_end + 1; | 578 | start = last_end + 1; |
| @@ -561,12 +590,11 @@ hit_next: | |||
| 561 | prealloc = alloc_extent_state(GFP_ATOMIC); | 590 | prealloc = alloc_extent_state(GFP_ATOMIC); |
| 562 | err = split_state(tree, state, prealloc, end + 1); | 591 | err = split_state(tree, state, prealloc, end + 1); |
| 563 | BUG_ON(err == -EEXIST); | 592 | BUG_ON(err == -EEXIST); |
| 564 | |||
| 565 | if (wake) | 593 | if (wake) |
| 566 | wake_up(&state->wq); | 594 | wake_up(&state->wq); |
| 567 | 595 | ||
| 568 | set |= clear_state_bit(tree, prealloc, bits, | 596 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); |
| 569 | wake, delete); | 597 | |
| 570 | prealloc = NULL; | 598 | prealloc = NULL; |
| 571 | goto out; | 599 | goto out; |
| 572 | } | 600 | } |
| @@ -667,16 +695,23 @@ out: | |||
| 667 | return 0; | 695 | return 0; |
| 668 | } | 696 | } |
| 669 | 697 | ||
| 670 | static void set_state_bits(struct extent_io_tree *tree, | 698 | static int set_state_bits(struct extent_io_tree *tree, |
| 671 | struct extent_state *state, | 699 | struct extent_state *state, |
| 672 | int bits) | 700 | int bits) |
| 673 | { | 701 | { |
| 702 | int ret; | ||
| 703 | |||
| 704 | ret = set_state_cb(tree, state, bits); | ||
| 705 | if (ret) | ||
| 706 | return ret; | ||
| 707 | |||
| 674 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 708 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
| 675 | u64 range = state->end - state->start + 1; | 709 | u64 range = state->end - state->start + 1; |
| 676 | tree->dirty_bytes += range; | 710 | tree->dirty_bytes += range; |
| 677 | } | 711 | } |
| 678 | set_state_cb(tree, state, bits); | ||
| 679 | state->state |= bits; | 712 | state->state |= bits; |
| 713 | |||
| 714 | return 0; | ||
| 680 | } | 715 | } |
| 681 | 716 | ||
| 682 | static void cache_state(struct extent_state *state, | 717 | static void cache_state(struct extent_state *state, |
| @@ -758,7 +793,10 @@ hit_next: | |||
| 758 | goto out; | 793 | goto out; |
| 759 | } | 794 | } |
| 760 | 795 | ||
| 761 | set_state_bits(tree, state, bits); | 796 | err = set_state_bits(tree, state, bits); |
| 797 | if (err) | ||
| 798 | goto out; | ||
| 799 | |||
| 762 | cache_state(state, cached_state); | 800 | cache_state(state, cached_state); |
| 763 | merge_state(tree, state); | 801 | merge_state(tree, state); |
| 764 | if (last_end == (u64)-1) | 802 | if (last_end == (u64)-1) |
| @@ -805,7 +843,9 @@ hit_next: | |||
| 805 | if (err) | 843 | if (err) |
| 806 | goto out; | 844 | goto out; |
| 807 | if (state->end <= end) { | 845 | if (state->end <= end) { |
| 808 | set_state_bits(tree, state, bits); | 846 | err = set_state_bits(tree, state, bits); |
| 847 | if (err) | ||
| 848 | goto out; | ||
| 809 | cache_state(state, cached_state); | 849 | cache_state(state, cached_state); |
| 810 | merge_state(tree, state); | 850 | merge_state(tree, state); |
| 811 | if (last_end == (u64)-1) | 851 | if (last_end == (u64)-1) |
| @@ -829,11 +869,13 @@ hit_next: | |||
| 829 | this_end = last_start - 1; | 869 | this_end = last_start - 1; |
| 830 | err = insert_state(tree, prealloc, start, this_end, | 870 | err = insert_state(tree, prealloc, start, this_end, |
| 831 | bits); | 871 | bits); |
| 832 | cache_state(prealloc, cached_state); | ||
| 833 | prealloc = NULL; | ||
| 834 | BUG_ON(err == -EEXIST); | 872 | BUG_ON(err == -EEXIST); |
| 835 | if (err) | 873 | if (err) { |
| 874 | prealloc = NULL; | ||
| 836 | goto out; | 875 | goto out; |
| 876 | } | ||
| 877 | cache_state(prealloc, cached_state); | ||
| 878 | prealloc = NULL; | ||
| 837 | start = this_end + 1; | 879 | start = this_end + 1; |
| 838 | goto search_again; | 880 | goto search_again; |
| 839 | } | 881 | } |
| @@ -852,7 +894,11 @@ hit_next: | |||
| 852 | err = split_state(tree, state, prealloc, end + 1); | 894 | err = split_state(tree, state, prealloc, end + 1); |
| 853 | BUG_ON(err == -EEXIST); | 895 | BUG_ON(err == -EEXIST); |
| 854 | 896 | ||
| 855 | set_state_bits(tree, prealloc, bits); | 897 | err = set_state_bits(tree, prealloc, bits); |
| 898 | if (err) { | ||
| 899 | prealloc = NULL; | ||
| 900 | goto out; | ||
| 901 | } | ||
| 856 | cache_state(prealloc, cached_state); | 902 | cache_state(prealloc, cached_state); |
| 857 | merge_state(tree, prealloc); | 903 | merge_state(tree, prealloc); |
| 858 | prealloc = NULL; | 904 | prealloc = NULL; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 14ed16fd862d..4794ec891fed 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -60,8 +60,13 @@ struct extent_io_ops { | |||
| 60 | struct extent_state *state, int uptodate); | 60 | struct extent_state *state, int uptodate); |
| 61 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 61 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, |
| 62 | unsigned long old, unsigned long bits); | 62 | unsigned long old, unsigned long bits); |
| 63 | int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, | 63 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 64 | unsigned long old, unsigned long bits); | 64 | unsigned long bits); |
| 65 | int (*merge_extent_hook)(struct inode *inode, | ||
| 66 | struct extent_state *new, | ||
| 67 | struct extent_state *other); | ||
| 68 | int (*split_extent_hook)(struct inode *inode, | ||
| 69 | struct extent_state *orig, u64 split); | ||
| 65 | int (*write_cache_pages_lock_hook)(struct page *page); | 70 | int (*write_cache_pages_lock_hook)(struct page *page); |
| 66 | }; | 71 | }; |
| 67 | 72 | ||
| @@ -79,10 +84,14 @@ struct extent_state { | |||
| 79 | u64 start; | 84 | u64 start; |
| 80 | u64 end; /* inclusive */ | 85 | u64 end; /* inclusive */ |
| 81 | struct rb_node rb_node; | 86 | struct rb_node rb_node; |
| 87 | |||
| 88 | /* ADD NEW ELEMENTS AFTER THIS */ | ||
| 82 | struct extent_io_tree *tree; | 89 | struct extent_io_tree *tree; |
| 83 | wait_queue_head_t wq; | 90 | wait_queue_head_t wq; |
| 84 | atomic_t refs; | 91 | atomic_t refs; |
| 85 | unsigned long state; | 92 | unsigned long state; |
| 93 | u64 split_start; | ||
| 94 | u64 split_end; | ||
| 86 | 95 | ||
| 87 | /* for use by the FS */ | 96 | /* for use by the FS */ |
| 88 | u64 private; | 97 | u64 private; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a3492a3ad96b..f19e1259a971 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -123,7 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 124 | 124 | ||
| 125 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
| 126 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | 126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); |
| 127 | if (err) | ||
| 128 | return err; | ||
| 129 | |||
| 127 | for (i = 0; i < num_pages; i++) { | 130 | for (i = 0; i < num_pages; i++) { |
| 128 | struct page *p = pages[i]; | 131 | struct page *p = pages[i]; |
| 129 | SetPageUptodate(p); | 132 | SetPageUptodate(p); |
| @@ -917,21 +920,35 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 917 | start_pos = pos; | 920 | start_pos = pos; |
| 918 | 921 | ||
| 919 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 922 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 923 | |||
| 924 | /* do the reserve before the mutex lock in case we have to do some | ||
| 925 | * flushing. We wouldn't deadlock, but this is more polite. | ||
| 926 | */ | ||
| 927 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 928 | if (err) | ||
| 929 | goto out_nolock; | ||
| 930 | |||
| 931 | mutex_lock(&inode->i_mutex); | ||
| 932 | |||
| 920 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 933 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 921 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 934 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 922 | if (err) | 935 | if (err) |
| 923 | goto out_nolock; | 936 | goto out; |
| 937 | |||
| 924 | if (count == 0) | 938 | if (count == 0) |
| 925 | goto out_nolock; | 939 | goto out; |
| 926 | 940 | ||
| 927 | err = file_remove_suid(file); | 941 | err = file_remove_suid(file); |
| 928 | if (err) | 942 | if (err) |
| 929 | goto out_nolock; | 943 | goto out; |
| 944 | |||
| 930 | file_update_time(file); | 945 | file_update_time(file); |
| 931 | 946 | ||
| 932 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 947 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| 933 | 948 | ||
| 934 | mutex_lock(&inode->i_mutex); | 949 | /* generic_write_checks can change our pos */ |
| 950 | start_pos = pos; | ||
| 951 | |||
| 935 | BTRFS_I(inode)->sequence++; | 952 | BTRFS_I(inode)->sequence++; |
| 936 | first_index = pos >> PAGE_CACHE_SHIFT; | 953 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 937 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 954 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
| @@ -1005,9 +1022,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 1005 | } | 1022 | } |
| 1006 | 1023 | ||
| 1007 | if (will_write) { | 1024 | if (will_write) { |
| 1008 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1025 | filemap_fdatawrite_range(inode->i_mapping, pos, |
| 1009 | pos + write_bytes - 1, | 1026 | pos + write_bytes - 1); |
| 1010 | WB_SYNC_ALL); | ||
| 1011 | } else { | 1027 | } else { |
| 1012 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1028 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
| 1013 | num_pages); | 1029 | num_pages); |
| @@ -1028,6 +1044,7 @@ out: | |||
| 1028 | mutex_unlock(&inode->i_mutex); | 1044 | mutex_unlock(&inode->i_mutex); |
| 1029 | if (ret) | 1045 | if (ret) |
| 1030 | err = ret; | 1046 | err = ret; |
| 1047 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1031 | 1048 | ||
| 1032 | out_nolock: | 1049 | out_nolock: |
| 1033 | kfree(pages); | 1050 | kfree(pages); |
| @@ -1196,7 +1213,7 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
| 1196 | return 0; | 1213 | return 0; |
| 1197 | } | 1214 | } |
| 1198 | 1215 | ||
| 1199 | struct file_operations btrfs_file_operations = { | 1216 | const struct file_operations btrfs_file_operations = { |
| 1200 | .llseek = generic_file_llseek, | 1217 | .llseek = generic_file_llseek, |
| 1201 | .read = do_sync_read, | 1218 | .read = do_sync_read, |
| 1202 | .aio_read = generic_file_aio_read, | 1219 | .aio_read = generic_file_aio_read, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9b76bcd1c12..112e5aa85892 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -62,7 +62,7 @@ static const struct inode_operations btrfs_special_inode_operations; | |||
| 62 | static const struct inode_operations btrfs_file_inode_operations; | 62 | static const struct inode_operations btrfs_file_inode_operations; |
| 63 | static const struct address_space_operations btrfs_aops; | 63 | static const struct address_space_operations btrfs_aops; |
| 64 | static const struct address_space_operations btrfs_symlink_aops; | 64 | static const struct address_space_operations btrfs_symlink_aops; |
| 65 | static struct file_operations btrfs_dir_file_operations; | 65 | static const struct file_operations btrfs_dir_file_operations; |
| 66 | static struct extent_io_ops btrfs_extent_io_ops; | 66 | static struct extent_io_ops btrfs_extent_io_ops; |
| 67 | 67 | ||
| 68 | static struct kmem_cache *btrfs_inode_cachep; | 68 | static struct kmem_cache *btrfs_inode_cachep; |
| @@ -1159,6 +1159,83 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1159 | return ret; | 1159 | return ret; |
| 1160 | } | 1160 | } |
| 1161 | 1161 | ||
| 1162 | static int btrfs_split_extent_hook(struct inode *inode, | ||
| 1163 | struct extent_state *orig, u64 split) | ||
| 1164 | { | ||
| 1165 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1166 | u64 size; | ||
| 1167 | |||
| 1168 | if (!(orig->state & EXTENT_DELALLOC)) | ||
| 1169 | return 0; | ||
| 1170 | |||
| 1171 | size = orig->end - orig->start + 1; | ||
| 1172 | if (size > root->fs_info->max_extent) { | ||
| 1173 | u64 num_extents; | ||
| 1174 | u64 new_size; | ||
| 1175 | |||
| 1176 | new_size = orig->end - split + 1; | ||
| 1177 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
| 1178 | root->fs_info->max_extent); | ||
| 1179 | |||
| 1180 | /* | ||
| 1181 | * if we break a large extent up then leave delalloc_extents be, | ||
| 1182 | * since we've already accounted for the large extent. | ||
| 1183 | */ | ||
| 1184 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1185 | root->fs_info->max_extent) < num_extents) | ||
| 1186 | return 0; | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | BTRFS_I(inode)->delalloc_extents++; | ||
| 1190 | |||
| 1191 | return 0; | ||
| 1192 | } | ||
| 1193 | |||
| 1194 | /* | ||
| 1195 | * extent_io.c merge_extent_hook, used to track merged delayed allocation | ||
| 1196 | * extents so we can keep track of new extents that are just merged onto old | ||
| 1197 | * extents, such as when we are doing sequential writes, so we can properly | ||
| 1198 | * account for the metadata space we'll need. | ||
| 1199 | */ | ||
| 1200 | static int btrfs_merge_extent_hook(struct inode *inode, | ||
| 1201 | struct extent_state *new, | ||
| 1202 | struct extent_state *other) | ||
| 1203 | { | ||
| 1204 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1205 | u64 new_size, old_size; | ||
| 1206 | u64 num_extents; | ||
| 1207 | |||
| 1208 | /* not delalloc, ignore it */ | ||
| 1209 | if (!(other->state & EXTENT_DELALLOC)) | ||
| 1210 | return 0; | ||
| 1211 | |||
| 1212 | old_size = other->end - other->start + 1; | ||
| 1213 | if (new->start < other->start) | ||
| 1214 | new_size = other->end - new->start + 1; | ||
| 1215 | else | ||
| 1216 | new_size = new->end - other->start + 1; | ||
| 1217 | |||
| 1218 | /* we're not bigger than the max, unreserve the space and go */ | ||
| 1219 | if (new_size <= root->fs_info->max_extent) { | ||
| 1220 | BTRFS_I(inode)->delalloc_extents--; | ||
| 1221 | return 0; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | /* | ||
| 1225 | * If we grew by another max_extent, just return, we want to keep that | ||
| 1226 | * reserved amount. | ||
| 1227 | */ | ||
| 1228 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
| 1229 | root->fs_info->max_extent); | ||
| 1230 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1231 | root->fs_info->max_extent) > num_extents) | ||
| 1232 | return 0; | ||
| 1233 | |||
| 1234 | BTRFS_I(inode)->delalloc_extents--; | ||
| 1235 | |||
| 1236 | return 0; | ||
| 1237 | } | ||
| 1238 | |||
| 1162 | /* | 1239 | /* |
| 1163 | * extent_io.c set_bit_hook, used to track delayed allocation | 1240 | * extent_io.c set_bit_hook, used to track delayed allocation |
| 1164 | * bytes in this file, and to maintain the list of inodes that | 1241 | * bytes in this file, and to maintain the list of inodes that |
| @@ -1167,6 +1244,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1167 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1244 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, |
| 1168 | unsigned long old, unsigned long bits) | 1245 | unsigned long old, unsigned long bits) |
| 1169 | { | 1246 | { |
| 1247 | |||
| 1170 | /* | 1248 | /* |
| 1171 | * set_bit and clear bit hooks normally require _irqsave/restore | 1249 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1172 | * but in this case, we are only testeing for the DELALLOC | 1250 | * but in this case, we are only testeing for the DELALLOC |
| @@ -1174,6 +1252,8 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1174 | */ | 1252 | */ |
| 1175 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1253 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1176 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1254 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1255 | |||
| 1256 | BTRFS_I(inode)->delalloc_extents++; | ||
| 1177 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1257 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); |
| 1178 | spin_lock(&root->fs_info->delalloc_lock); | 1258 | spin_lock(&root->fs_info->delalloc_lock); |
| 1179 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1259 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; |
| @@ -1190,22 +1270,27 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1190 | /* | 1270 | /* |
| 1191 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1271 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
| 1192 | */ | 1272 | */ |
| 1193 | static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | 1273 | static int btrfs_clear_bit_hook(struct inode *inode, |
| 1194 | unsigned long old, unsigned long bits) | 1274 | struct extent_state *state, unsigned long bits) |
| 1195 | { | 1275 | { |
| 1196 | /* | 1276 | /* |
| 1197 | * set_bit and clear bit hooks normally require _irqsave/restore | 1277 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1198 | * but in this case, we are only testeing for the DELALLOC | 1278 | * but in this case, we are only testeing for the DELALLOC |
| 1199 | * bit, which is only set or cleared with irqs on | 1279 | * bit, which is only set or cleared with irqs on |
| 1200 | */ | 1280 | */ |
| 1201 | if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1281 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1202 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1282 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1203 | 1283 | ||
| 1284 | BTRFS_I(inode)->delalloc_extents--; | ||
| 1285 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1286 | |||
| 1204 | spin_lock(&root->fs_info->delalloc_lock); | 1287 | spin_lock(&root->fs_info->delalloc_lock); |
| 1205 | if (end - start + 1 > root->fs_info->delalloc_bytes) { | 1288 | if (state->end - state->start + 1 > |
| 1289 | root->fs_info->delalloc_bytes) { | ||
| 1206 | printk(KERN_INFO "btrfs warning: delalloc account " | 1290 | printk(KERN_INFO "btrfs warning: delalloc account " |
| 1207 | "%llu %llu\n", | 1291 | "%llu %llu\n", |
| 1208 | (unsigned long long)end - start + 1, | 1292 | (unsigned long long) |
| 1293 | state->end - state->start + 1, | ||
| 1209 | (unsigned long long) | 1294 | (unsigned long long) |
| 1210 | root->fs_info->delalloc_bytes); | 1295 | root->fs_info->delalloc_bytes); |
| 1211 | btrfs_delalloc_free_space(root, inode, (u64)-1); | 1296 | btrfs_delalloc_free_space(root, inode, (u64)-1); |
| @@ -1213,9 +1298,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1213 | BTRFS_I(inode)->delalloc_bytes = 0; | 1298 | BTRFS_I(inode)->delalloc_bytes = 0; |
| 1214 | } else { | 1299 | } else { |
| 1215 | btrfs_delalloc_free_space(root, inode, | 1300 | btrfs_delalloc_free_space(root, inode, |
| 1216 | end - start + 1); | 1301 | state->end - |
| 1217 | root->fs_info->delalloc_bytes -= end - start + 1; | 1302 | state->start + 1); |
| 1218 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | 1303 | root->fs_info->delalloc_bytes -= state->end - |
| 1304 | state->start + 1; | ||
| 1305 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
| 1306 | state->start + 1; | ||
| 1219 | } | 1307 | } |
| 1220 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1308 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1221 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1309 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| @@ -2950,7 +3038,12 @@ again: | |||
| 2950 | goto again; | 3038 | goto again; |
| 2951 | } | 3039 | } |
| 2952 | 3040 | ||
| 2953 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 3041 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 3042 | if (ret) { | ||
| 3043 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3044 | goto out_unlock; | ||
| 3045 | } | ||
| 3046 | |||
| 2954 | ret = 0; | 3047 | ret = 0; |
| 2955 | if (offset != PAGE_CACHE_SIZE) { | 3048 | if (offset != PAGE_CACHE_SIZE) { |
| 2956 | kaddr = kmap(page); | 3049 | kaddr = kmap(page); |
| @@ -2981,15 +3074,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2981 | u64 last_byte; | 3074 | u64 last_byte; |
| 2982 | u64 cur_offset; | 3075 | u64 cur_offset; |
| 2983 | u64 hole_size; | 3076 | u64 hole_size; |
| 2984 | int err; | 3077 | int err = 0; |
| 2985 | 3078 | ||
| 2986 | if (size <= hole_start) | 3079 | if (size <= hole_start) |
| 2987 | return 0; | 3080 | return 0; |
| 2988 | 3081 | ||
| 2989 | err = btrfs_check_metadata_free_space(root); | ||
| 2990 | if (err) | ||
| 2991 | return err; | ||
| 2992 | |||
| 2993 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | 3082 | btrfs_truncate_page(inode->i_mapping, inode->i_size); |
| 2994 | 3083 | ||
| 2995 | while (1) { | 3084 | while (1) { |
| @@ -3024,12 +3113,18 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3024 | cur_offset, &hint_byte, 1); | 3113 | cur_offset, &hint_byte, 1); |
| 3025 | if (err) | 3114 | if (err) |
| 3026 | break; | 3115 | break; |
| 3116 | |||
| 3117 | err = btrfs_reserve_metadata_space(root, 1); | ||
| 3118 | if (err) | ||
| 3119 | break; | ||
| 3120 | |||
| 3027 | err = btrfs_insert_file_extent(trans, root, | 3121 | err = btrfs_insert_file_extent(trans, root, |
| 3028 | inode->i_ino, cur_offset, 0, | 3122 | inode->i_ino, cur_offset, 0, |
| 3029 | 0, hole_size, 0, hole_size, | 3123 | 0, hole_size, 0, hole_size, |
| 3030 | 0, 0, 0); | 3124 | 0, 0, 0); |
| 3031 | btrfs_drop_extent_cache(inode, hole_start, | 3125 | btrfs_drop_extent_cache(inode, hole_start, |
| 3032 | last_byte - 1, 0); | 3126 | last_byte - 1, 0); |
| 3127 | btrfs_unreserve_metadata_space(root, 1); | ||
| 3033 | } | 3128 | } |
| 3034 | free_extent_map(em); | 3129 | free_extent_map(em); |
| 3035 | cur_offset = last_byte; | 3130 | cur_offset = last_byte; |
| @@ -3990,11 +4085,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 3990 | if (!new_valid_dev(rdev)) | 4085 | if (!new_valid_dev(rdev)) |
| 3991 | return -EINVAL; | 4086 | return -EINVAL; |
| 3992 | 4087 | ||
| 3993 | err = btrfs_check_metadata_free_space(root); | 4088 | /* |
| 4089 | * 2 for inode item and ref | ||
| 4090 | * 2 for dir items | ||
| 4091 | * 1 for xattr if selinux is on | ||
| 4092 | */ | ||
| 4093 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3994 | if (err) | 4094 | if (err) |
| 3995 | goto fail; | 4095 | return err; |
| 3996 | 4096 | ||
| 3997 | trans = btrfs_start_transaction(root, 1); | 4097 | trans = btrfs_start_transaction(root, 1); |
| 4098 | if (!trans) | ||
| 4099 | goto fail; | ||
| 3998 | btrfs_set_trans_block_group(trans, dir); | 4100 | btrfs_set_trans_block_group(trans, dir); |
| 3999 | 4101 | ||
| 4000 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4102 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -4032,6 +4134,7 @@ out_unlock: | |||
| 4032 | nr = trans->blocks_used; | 4134 | nr = trans->blocks_used; |
| 4033 | btrfs_end_transaction_throttle(trans, root); | 4135 | btrfs_end_transaction_throttle(trans, root); |
| 4034 | fail: | 4136 | fail: |
| 4137 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4035 | if (drop_inode) { | 4138 | if (drop_inode) { |
| 4036 | inode_dec_link_count(inode); | 4139 | inode_dec_link_count(inode); |
| 4037 | iput(inode); | 4140 | iput(inode); |
| @@ -4052,10 +4155,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4052 | u64 objectid; | 4155 | u64 objectid; |
| 4053 | u64 index = 0; | 4156 | u64 index = 0; |
| 4054 | 4157 | ||
| 4055 | err = btrfs_check_metadata_free_space(root); | 4158 | /* |
| 4159 | * 2 for inode item and ref | ||
| 4160 | * 2 for dir items | ||
| 4161 | * 1 for xattr if selinux is on | ||
| 4162 | */ | ||
| 4163 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 4056 | if (err) | 4164 | if (err) |
| 4057 | goto fail; | 4165 | return err; |
| 4166 | |||
| 4058 | trans = btrfs_start_transaction(root, 1); | 4167 | trans = btrfs_start_transaction(root, 1); |
| 4168 | if (!trans) | ||
| 4169 | goto fail; | ||
| 4059 | btrfs_set_trans_block_group(trans, dir); | 4170 | btrfs_set_trans_block_group(trans, dir); |
| 4060 | 4171 | ||
| 4061 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4172 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -4096,6 +4207,7 @@ out_unlock: | |||
| 4096 | nr = trans->blocks_used; | 4207 | nr = trans->blocks_used; |
| 4097 | btrfs_end_transaction_throttle(trans, root); | 4208 | btrfs_end_transaction_throttle(trans, root); |
| 4098 | fail: | 4209 | fail: |
| 4210 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4099 | if (drop_inode) { | 4211 | if (drop_inode) { |
| 4100 | inode_dec_link_count(inode); | 4212 | inode_dec_link_count(inode); |
| 4101 | iput(inode); | 4213 | iput(inode); |
| @@ -4118,10 +4230,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4118 | if (inode->i_nlink == 0) | 4230 | if (inode->i_nlink == 0) |
| 4119 | return -ENOENT; | 4231 | return -ENOENT; |
| 4120 | 4232 | ||
| 4121 | btrfs_inc_nlink(inode); | 4233 | /* |
| 4122 | err = btrfs_check_metadata_free_space(root); | 4234 | * 1 item for inode ref |
| 4235 | * 2 items for dir items | ||
| 4236 | */ | ||
| 4237 | err = btrfs_reserve_metadata_space(root, 3); | ||
| 4123 | if (err) | 4238 | if (err) |
| 4124 | goto fail; | 4239 | return err; |
| 4240 | |||
| 4241 | btrfs_inc_nlink(inode); | ||
| 4242 | |||
| 4125 | err = btrfs_set_inode_index(dir, &index); | 4243 | err = btrfs_set_inode_index(dir, &index); |
| 4126 | if (err) | 4244 | if (err) |
| 4127 | goto fail; | 4245 | goto fail; |
| @@ -4145,6 +4263,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4145 | nr = trans->blocks_used; | 4263 | nr = trans->blocks_used; |
| 4146 | btrfs_end_transaction_throttle(trans, root); | 4264 | btrfs_end_transaction_throttle(trans, root); |
| 4147 | fail: | 4265 | fail: |
| 4266 | btrfs_unreserve_metadata_space(root, 3); | ||
| 4148 | if (drop_inode) { | 4267 | if (drop_inode) { |
| 4149 | inode_dec_link_count(inode); | 4268 | inode_dec_link_count(inode); |
| 4150 | iput(inode); | 4269 | iput(inode); |
| @@ -4164,17 +4283,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 4164 | u64 index = 0; | 4283 | u64 index = 0; |
| 4165 | unsigned long nr = 1; | 4284 | unsigned long nr = 1; |
| 4166 | 4285 | ||
| 4167 | err = btrfs_check_metadata_free_space(root); | 4286 | /* |
| 4287 | * 2 items for inode and ref | ||
| 4288 | * 2 items for dir items | ||
| 4289 | * 1 for xattr if selinux is on | ||
| 4290 | */ | ||
| 4291 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 4168 | if (err) | 4292 | if (err) |
| 4169 | goto out_unlock; | 4293 | return err; |
| 4170 | 4294 | ||
| 4171 | trans = btrfs_start_transaction(root, 1); | 4295 | trans = btrfs_start_transaction(root, 1); |
| 4172 | btrfs_set_trans_block_group(trans, dir); | 4296 | if (!trans) { |
| 4173 | 4297 | err = -ENOMEM; | |
| 4174 | if (IS_ERR(trans)) { | ||
| 4175 | err = PTR_ERR(trans); | ||
| 4176 | goto out_unlock; | 4298 | goto out_unlock; |
| 4177 | } | 4299 | } |
| 4300 | btrfs_set_trans_block_group(trans, dir); | ||
| 4178 | 4301 | ||
| 4179 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4302 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| 4180 | if (err) { | 4303 | if (err) { |
| @@ -4223,6 +4346,7 @@ out_fail: | |||
| 4223 | btrfs_end_transaction_throttle(trans, root); | 4346 | btrfs_end_transaction_throttle(trans, root); |
| 4224 | 4347 | ||
| 4225 | out_unlock: | 4348 | out_unlock: |
| 4349 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4226 | if (drop_on_err) | 4350 | if (drop_on_err) |
| 4227 | iput(inode); | 4351 | iput(inode); |
| 4228 | btrfs_btree_balance_dirty(root, nr); | 4352 | btrfs_btree_balance_dirty(root, nr); |
| @@ -4747,6 +4871,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 4747 | goto out; | 4871 | goto out; |
| 4748 | } | 4872 | } |
| 4749 | 4873 | ||
| 4874 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 4875 | if (ret) { | ||
| 4876 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 4877 | ret = VM_FAULT_SIGBUS; | ||
| 4878 | goto out; | ||
| 4879 | } | ||
| 4880 | |||
| 4750 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 4881 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
| 4751 | again: | 4882 | again: |
| 4752 | lock_page(page); | 4883 | lock_page(page); |
| @@ -4778,7 +4909,23 @@ again: | |||
| 4778 | goto again; | 4909 | goto again; |
| 4779 | } | 4910 | } |
| 4780 | 4911 | ||
| 4781 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 4912 | /* |
| 4913 | * XXX - page_mkwrite gets called every time the page is dirtied, even | ||
| 4914 | * if it was already dirty, so for space accounting reasons we need to | ||
| 4915 | * clear any delalloc bits for the range we are fixing to save. There | ||
| 4916 | * is probably a better way to do this, but for now keep consistent with | ||
| 4917 | * prepare_pages in the normal write path. | ||
| 4918 | */ | ||
| 4919 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
| 4920 | EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); | ||
| 4921 | |||
| 4922 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 4923 | if (ret) { | ||
| 4924 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 4925 | ret = VM_FAULT_SIGBUS; | ||
| 4926 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 4927 | goto out_unlock; | ||
| 4928 | } | ||
| 4782 | ret = 0; | 4929 | ret = 0; |
| 4783 | 4930 | ||
| 4784 | /* page is wholly or partially inside EOF */ | 4931 | /* page is wholly or partially inside EOF */ |
| @@ -4801,6 +4948,7 @@ again: | |||
| 4801 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4948 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 4802 | 4949 | ||
| 4803 | out_unlock: | 4950 | out_unlock: |
| 4951 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 4804 | if (!ret) | 4952 | if (!ret) |
| 4805 | return VM_FAULT_LOCKED; | 4953 | return VM_FAULT_LOCKED; |
| 4806 | unlock_page(page); | 4954 | unlock_page(page); |
| @@ -4917,6 +5065,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 4917 | return NULL; | 5065 | return NULL; |
| 4918 | ei->last_trans = 0; | 5066 | ei->last_trans = 0; |
| 4919 | ei->logged_trans = 0; | 5067 | ei->logged_trans = 0; |
| 5068 | ei->delalloc_extents = 0; | ||
| 5069 | ei->delalloc_reserved_extents = 0; | ||
| 4920 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 5070 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 4921 | INIT_LIST_HEAD(&ei->i_orphan); | 5071 | INIT_LIST_HEAD(&ei->i_orphan); |
| 4922 | INIT_LIST_HEAD(&ei->ordered_operations); | 5072 | INIT_LIST_HEAD(&ei->ordered_operations); |
| @@ -5070,7 +5220,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 5070 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 5220 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 5071 | return -ENOTEMPTY; | 5221 | return -ENOTEMPTY; |
| 5072 | 5222 | ||
| 5073 | ret = btrfs_check_metadata_free_space(root); | 5223 | /* |
| 5224 | * 2 items for dir items | ||
| 5225 | * 1 item for orphan entry | ||
| 5226 | * 1 item for ref | ||
| 5227 | */ | ||
| 5228 | ret = btrfs_reserve_metadata_space(root, 4); | ||
| 5074 | if (ret) | 5229 | if (ret) |
| 5075 | return ret; | 5230 | return ret; |
| 5076 | 5231 | ||
| @@ -5185,6 +5340,8 @@ out_fail: | |||
| 5185 | 5340 | ||
| 5186 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 5341 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 5187 | up_read(&root->fs_info->subvol_sem); | 5342 | up_read(&root->fs_info->subvol_sem); |
| 5343 | |||
| 5344 | btrfs_unreserve_metadata_space(root, 4); | ||
| 5188 | return ret; | 5345 | return ret; |
| 5189 | } | 5346 | } |
| 5190 | 5347 | ||
| @@ -5256,11 +5413,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 5256 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 5413 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| 5257 | return -ENAMETOOLONG; | 5414 | return -ENAMETOOLONG; |
| 5258 | 5415 | ||
| 5259 | err = btrfs_check_metadata_free_space(root); | 5416 | /* |
| 5417 | * 2 items for inode item and ref | ||
| 5418 | * 2 items for dir items | ||
| 5419 | * 1 item for xattr if selinux is on | ||
| 5420 | */ | ||
| 5421 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 5260 | if (err) | 5422 | if (err) |
| 5261 | goto out_fail; | 5423 | return err; |
| 5262 | 5424 | ||
| 5263 | trans = btrfs_start_transaction(root, 1); | 5425 | trans = btrfs_start_transaction(root, 1); |
| 5426 | if (!trans) | ||
| 5427 | goto out_fail; | ||
| 5264 | btrfs_set_trans_block_group(trans, dir); | 5428 | btrfs_set_trans_block_group(trans, dir); |
| 5265 | 5429 | ||
| 5266 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 5430 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -5341,6 +5505,7 @@ out_unlock: | |||
| 5341 | nr = trans->blocks_used; | 5505 | nr = trans->blocks_used; |
| 5342 | btrfs_end_transaction_throttle(trans, root); | 5506 | btrfs_end_transaction_throttle(trans, root); |
| 5343 | out_fail: | 5507 | out_fail: |
| 5508 | btrfs_unreserve_metadata_space(root, 5); | ||
| 5344 | if (drop_inode) { | 5509 | if (drop_inode) { |
| 5345 | inode_dec_link_count(inode); | 5510 | inode_dec_link_count(inode); |
| 5346 | iput(inode); | 5511 | iput(inode); |
| @@ -5362,6 +5527,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5362 | 5527 | ||
| 5363 | while (num_bytes > 0) { | 5528 | while (num_bytes > 0) { |
| 5364 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5529 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
| 5530 | |||
| 5531 | ret = btrfs_reserve_metadata_space(root, 1); | ||
| 5532 | if (ret) | ||
| 5533 | goto out; | ||
| 5534 | |||
| 5365 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5535 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
| 5366 | root->sectorsize, 0, alloc_hint, | 5536 | root->sectorsize, 0, alloc_hint, |
| 5367 | (u64)-1, &ins, 1); | 5537 | (u64)-1, &ins, 1); |
| @@ -5381,6 +5551,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5381 | num_bytes -= ins.offset; | 5551 | num_bytes -= ins.offset; |
| 5382 | cur_offset += ins.offset; | 5552 | cur_offset += ins.offset; |
| 5383 | alloc_hint = ins.objectid + ins.offset; | 5553 | alloc_hint = ins.objectid + ins.offset; |
| 5554 | btrfs_unreserve_metadata_space(root, 1); | ||
| 5384 | } | 5555 | } |
| 5385 | out: | 5556 | out: |
| 5386 | if (cur_offset > start) { | 5557 | if (cur_offset > start) { |
| @@ -5544,7 +5715,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { | |||
| 5544 | .permission = btrfs_permission, | 5715 | .permission = btrfs_permission, |
| 5545 | }; | 5716 | }; |
| 5546 | 5717 | ||
| 5547 | static struct file_operations btrfs_dir_file_operations = { | 5718 | static const struct file_operations btrfs_dir_file_operations = { |
| 5548 | .llseek = generic_file_llseek, | 5719 | .llseek = generic_file_llseek, |
| 5549 | .read = generic_read_dir, | 5720 | .read = generic_read_dir, |
| 5550 | .readdir = btrfs_real_readdir, | 5721 | .readdir = btrfs_real_readdir, |
| @@ -5566,6 +5737,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 5566 | .readpage_io_failed_hook = btrfs_io_failed_hook, | 5737 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
| 5567 | .set_bit_hook = btrfs_set_bit_hook, | 5738 | .set_bit_hook = btrfs_set_bit_hook, |
| 5568 | .clear_bit_hook = btrfs_clear_bit_hook, | 5739 | .clear_bit_hook = btrfs_clear_bit_hook, |
| 5740 | .merge_extent_hook = btrfs_merge_extent_hook, | ||
| 5741 | .split_extent_hook = btrfs_split_extent_hook, | ||
| 5569 | }; | 5742 | }; |
| 5570 | 5743 | ||
| 5571 | /* | 5744 | /* |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a8577a7f26ab..9a780c8d0ac8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -239,7 +239,13 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 239 | u64 index = 0; | 239 | u64 index = 0; |
| 240 | unsigned long nr = 1; | 240 | unsigned long nr = 1; |
| 241 | 241 | ||
| 242 | ret = btrfs_check_metadata_free_space(root); | 242 | /* |
| 243 | * 1 - inode item | ||
| 244 | * 2 - refs | ||
| 245 | * 1 - root item | ||
| 246 | * 2 - dir items | ||
| 247 | */ | ||
| 248 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 243 | if (ret) | 249 | if (ret) |
| 244 | return ret; | 250 | return ret; |
| 245 | 251 | ||
| @@ -340,6 +346,9 @@ fail: | |||
| 340 | err = btrfs_commit_transaction(trans, root); | 346 | err = btrfs_commit_transaction(trans, root); |
| 341 | if (err && !ret) | 347 | if (err && !ret) |
| 342 | ret = err; | 348 | ret = err; |
| 349 | |||
| 350 | btrfs_unreserve_metadata_space(root, 6); | ||
| 351 | btrfs_btree_balance_dirty(root, nr); | ||
| 343 | return ret; | 352 | return ret; |
| 344 | } | 353 | } |
| 345 | 354 | ||
| @@ -355,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 355 | if (!root->ref_cows) | 364 | if (!root->ref_cows) |
| 356 | return -EINVAL; | 365 | return -EINVAL; |
| 357 | 366 | ||
| 358 | ret = btrfs_check_metadata_free_space(root); | 367 | /* |
| 368 | * 1 - inode item | ||
| 369 | * 2 - refs | ||
| 370 | * 1 - root item | ||
| 371 | * 2 - dir items | ||
| 372 | */ | ||
| 373 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 359 | if (ret) | 374 | if (ret) |
| 360 | goto fail_unlock; | 375 | goto fail_unlock; |
| 361 | 376 | ||
| 362 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 377 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 363 | if (!pending_snapshot) { | 378 | if (!pending_snapshot) { |
| 364 | ret = -ENOMEM; | 379 | ret = -ENOMEM; |
| 380 | btrfs_unreserve_metadata_space(root, 6); | ||
| 365 | goto fail_unlock; | 381 | goto fail_unlock; |
| 366 | } | 382 | } |
| 367 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | 383 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); |
| 368 | if (!pending_snapshot->name) { | 384 | if (!pending_snapshot->name) { |
| 369 | ret = -ENOMEM; | 385 | ret = -ENOMEM; |
| 370 | kfree(pending_snapshot); | 386 | kfree(pending_snapshot); |
| 387 | btrfs_unreserve_metadata_space(root, 6); | ||
| 371 | goto fail_unlock; | 388 | goto fail_unlock; |
| 372 | } | 389 | } |
| 373 | memcpy(pending_snapshot->name, name, namelen); | 390 | memcpy(pending_snapshot->name, name, namelen); |
| @@ -1215,15 +1232,15 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1215 | struct inode *inode = fdentry(file)->d_inode; | 1232 | struct inode *inode = fdentry(file)->d_inode; |
| 1216 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1233 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1217 | struct btrfs_trans_handle *trans; | 1234 | struct btrfs_trans_handle *trans; |
| 1218 | int ret = 0; | 1235 | int ret; |
| 1219 | 1236 | ||
| 1237 | ret = -EPERM; | ||
| 1220 | if (!capable(CAP_SYS_ADMIN)) | 1238 | if (!capable(CAP_SYS_ADMIN)) |
| 1221 | return -EPERM; | 1239 | goto out; |
| 1222 | 1240 | ||
| 1223 | if (file->private_data) { | 1241 | ret = -EINPROGRESS; |
| 1224 | ret = -EINPROGRESS; | 1242 | if (file->private_data) |
| 1225 | goto out; | 1243 | goto out; |
| 1226 | } | ||
| 1227 | 1244 | ||
| 1228 | ret = mnt_want_write(file->f_path.mnt); | 1245 | ret = mnt_want_write(file->f_path.mnt); |
| 1229 | if (ret) | 1246 | if (ret) |
| @@ -1233,12 +1250,19 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1233 | root->fs_info->open_ioctl_trans++; | 1250 | root->fs_info->open_ioctl_trans++; |
| 1234 | mutex_unlock(&root->fs_info->trans_mutex); | 1251 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1235 | 1252 | ||
| 1253 | ret = -ENOMEM; | ||
| 1236 | trans = btrfs_start_ioctl_transaction(root, 0); | 1254 | trans = btrfs_start_ioctl_transaction(root, 0); |
| 1237 | if (trans) | 1255 | if (!trans) |
| 1238 | file->private_data = trans; | 1256 | goto out_drop; |
| 1239 | else | 1257 | |
| 1240 | ret = -ENOMEM; | 1258 | file->private_data = trans; |
| 1241 | /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ | 1259 | return 0; |
| 1260 | |||
| 1261 | out_drop: | ||
| 1262 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1263 | root->fs_info->open_ioctl_trans--; | ||
| 1264 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1265 | mnt_drop_write(file->f_path.mnt); | ||
| 1242 | out: | 1266 | out: |
| 1243 | return ret; | 1267 | return ret; |
| 1244 | } | 1268 | } |
| @@ -1254,24 +1278,20 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
| 1254 | struct inode *inode = fdentry(file)->d_inode; | 1278 | struct inode *inode = fdentry(file)->d_inode; |
| 1255 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1279 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1256 | struct btrfs_trans_handle *trans; | 1280 | struct btrfs_trans_handle *trans; |
| 1257 | int ret = 0; | ||
| 1258 | 1281 | ||
| 1259 | trans = file->private_data; | 1282 | trans = file->private_data; |
| 1260 | if (!trans) { | 1283 | if (!trans) |
| 1261 | ret = -EINVAL; | 1284 | return -EINVAL; |
| 1262 | goto out; | ||
| 1263 | } | ||
| 1264 | btrfs_end_transaction(trans, root); | ||
| 1265 | file->private_data = NULL; | 1285 | file->private_data = NULL; |
| 1266 | 1286 | ||
| 1287 | btrfs_end_transaction(trans, root); | ||
| 1288 | |||
| 1267 | mutex_lock(&root->fs_info->trans_mutex); | 1289 | mutex_lock(&root->fs_info->trans_mutex); |
| 1268 | root->fs_info->open_ioctl_trans--; | 1290 | root->fs_info->open_ioctl_trans--; |
| 1269 | mutex_unlock(&root->fs_info->trans_mutex); | 1291 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1270 | 1292 | ||
| 1271 | mnt_drop_write(file->f_path.mnt); | 1293 | mnt_drop_write(file->f_path.mnt); |
| 1272 | 1294 | return 0; | |
| 1273 | out: | ||
| 1274 | return ret; | ||
| 1275 | } | 1295 | } |
| 1276 | 1296 | ||
| 1277 | long btrfs_ioctl(struct file *file, unsigned int | 1297 | long btrfs_ioctl(struct file *file, unsigned int |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b5d6d24726b0..897fba835f89 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -458,7 +458,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 458 | * start IO on any dirty ones so the wait doesn't stall waiting | 458 | * start IO on any dirty ones so the wait doesn't stall waiting |
| 459 | * for pdflush to find them | 459 | * for pdflush to find them |
| 460 | */ | 460 | */ |
| 461 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); | 461 | filemap_fdatawrite_range(inode->i_mapping, start, end); |
| 462 | if (wait) { | 462 | if (wait) { |
| 463 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 463 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
| 464 | &entry->flags)); | 464 | &entry->flags)); |
| @@ -488,17 +488,15 @@ again: | |||
| 488 | /* start IO across the range first to instantiate any delalloc | 488 | /* start IO across the range first to instantiate any delalloc |
| 489 | * extents | 489 | * extents |
| 490 | */ | 490 | */ |
| 491 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 491 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
| 492 | 492 | ||
| 493 | /* The compression code will leave pages locked but return from | 493 | /* The compression code will leave pages locked but return from |
| 494 | * writepage without setting the page writeback. Starting again | 494 | * writepage without setting the page writeback. Starting again |
| 495 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | 495 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. |
| 496 | */ | 496 | */ |
| 497 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 497 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
| 498 | 498 | ||
| 499 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 499 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
| 500 | start >> PAGE_CACHE_SHIFT, | ||
| 501 | orig_end >> PAGE_CACHE_SHIFT); | ||
| 502 | 500 | ||
| 503 | end = orig_end; | 501 | end = orig_end; |
| 504 | found = 0; | 502 | found = 0; |
| @@ -716,89 +714,6 @@ out: | |||
| 716 | } | 714 | } |
| 717 | 715 | ||
| 718 | 716 | ||
| 719 | /** | ||
| 720 | * taken from mm/filemap.c because it isn't exported | ||
| 721 | * | ||
| 722 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
| 723 | * @mapping: address space structure to write | ||
| 724 | * @start: offset in bytes where the range starts | ||
| 725 | * @end: offset in bytes where the range ends (inclusive) | ||
| 726 | * @sync_mode: enable synchronous operation | ||
| 727 | * | ||
| 728 | * Start writeback against all of a mapping's dirty pages that lie | ||
| 729 | * within the byte offsets <start, end> inclusive. | ||
| 730 | * | ||
| 731 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
| 732 | * opposed to a regular memory cleansing writeback. The difference between | ||
| 733 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
| 734 | * be waited upon, and not just skipped over. | ||
| 735 | */ | ||
| 736 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 737 | loff_t end, int sync_mode) | ||
| 738 | { | ||
| 739 | struct writeback_control wbc = { | ||
| 740 | .sync_mode = sync_mode, | ||
| 741 | .nr_to_write = mapping->nrpages * 2, | ||
| 742 | .range_start = start, | ||
| 743 | .range_end = end, | ||
| 744 | }; | ||
| 745 | return btrfs_writepages(mapping, &wbc); | ||
| 746 | } | ||
| 747 | |||
| 748 | /** | ||
| 749 | * taken from mm/filemap.c because it isn't exported | ||
| 750 | * | ||
| 751 | * wait_on_page_writeback_range - wait for writeback to complete | ||
| 752 | * @mapping: target address_space | ||
| 753 | * @start: beginning page index | ||
| 754 | * @end: ending page index | ||
| 755 | * | ||
| 756 | * Wait for writeback to complete against pages indexed by start->end | ||
| 757 | * inclusive | ||
| 758 | */ | ||
| 759 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 760 | pgoff_t start, pgoff_t end) | ||
| 761 | { | ||
| 762 | struct pagevec pvec; | ||
| 763 | int nr_pages; | ||
| 764 | int ret = 0; | ||
| 765 | pgoff_t index; | ||
| 766 | |||
| 767 | if (end < start) | ||
| 768 | return 0; | ||
| 769 | |||
| 770 | pagevec_init(&pvec, 0); | ||
| 771 | index = start; | ||
| 772 | while ((index <= end) && | ||
| 773 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 774 | PAGECACHE_TAG_WRITEBACK, | ||
| 775 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
| 776 | unsigned i; | ||
| 777 | |||
| 778 | for (i = 0; i < nr_pages; i++) { | ||
| 779 | struct page *page = pvec.pages[i]; | ||
| 780 | |||
| 781 | /* until radix tree lookup accepts end_index */ | ||
| 782 | if (page->index > end) | ||
| 783 | continue; | ||
| 784 | |||
| 785 | wait_on_page_writeback(page); | ||
| 786 | if (PageError(page)) | ||
| 787 | ret = -EIO; | ||
| 788 | } | ||
| 789 | pagevec_release(&pvec); | ||
| 790 | cond_resched(); | ||
| 791 | } | ||
| 792 | |||
| 793 | /* Check for outstanding write errors */ | ||
| 794 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
| 795 | ret = -ENOSPC; | ||
| 796 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
| 797 | ret = -EIO; | ||
| 798 | |||
| 799 | return ret; | ||
| 800 | } | ||
| 801 | |||
| 802 | /* | 717 | /* |
| 803 | * add a given inode to the list of inodes that must be fully on | 718 | * add a given inode to the list of inodes that must be fully on |
| 804 | * disk before a transaction commit finishes. | 719 | * disk before a transaction commit finishes. |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 993a7ea45c70..f82e87488ca8 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -153,10 +153,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | |||
| 153 | int btrfs_ordered_update_i_size(struct inode *inode, | 153 | int btrfs_ordered_update_i_size(struct inode *inode, |
| 154 | struct btrfs_ordered_extent *ordered); | 154 | struct btrfs_ordered_extent *ordered); |
| 155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
| 156 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 157 | pgoff_t start, pgoff_t end); | ||
| 158 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 159 | loff_t end, int sync_mode); | ||
| 160 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 156 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
| 161 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 157 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
| 162 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 158 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67035385444c..9de9b2236419 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -344,7 +344,9 @@ static int btrfs_fill_super(struct super_block *sb, | |||
| 344 | sb->s_export_op = &btrfs_export_ops; | 344 | sb->s_export_op = &btrfs_export_ops; |
| 345 | sb->s_xattr = btrfs_xattr_handlers; | 345 | sb->s_xattr = btrfs_xattr_handlers; |
| 346 | sb->s_time_gran = 1; | 346 | sb->s_time_gran = 1; |
| 347 | #ifdef CONFIG_BTRFS_POSIX_ACL | ||
| 347 | sb->s_flags |= MS_POSIXACL; | 348 | sb->s_flags |= MS_POSIXACL; |
| 349 | #endif | ||
| 348 | 350 | ||
| 349 | tree_root = open_ctree(sb, fs_devices, (char *)data); | 351 | tree_root = open_ctree(sb, fs_devices, (char *)data); |
| 350 | 352 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88f866f85e7a..0b8f36d4400a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -186,6 +186,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 186 | h->alloc_exclude_start = 0; | 186 | h->alloc_exclude_start = 0; |
| 187 | h->delayed_ref_updates = 0; | 187 | h->delayed_ref_updates = 0; |
| 188 | 188 | ||
| 189 | if (!current->journal_info) | ||
| 190 | current->journal_info = h; | ||
| 191 | |||
| 189 | root->fs_info->running_transaction->use_count++; | 192 | root->fs_info->running_transaction->use_count++; |
| 190 | record_root_in_trans(h, root); | 193 | record_root_in_trans(h, root); |
| 191 | mutex_unlock(&root->fs_info->trans_mutex); | 194 | mutex_unlock(&root->fs_info->trans_mutex); |
| @@ -317,6 +320,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 317 | wake_up(&cur_trans->writer_wait); | 320 | wake_up(&cur_trans->writer_wait); |
| 318 | put_transaction(cur_trans); | 321 | put_transaction(cur_trans); |
| 319 | mutex_unlock(&info->trans_mutex); | 322 | mutex_unlock(&info->trans_mutex); |
| 323 | |||
| 324 | if (current->journal_info == trans) | ||
| 325 | current->journal_info = NULL; | ||
| 320 | memset(trans, 0, sizeof(*trans)); | 326 | memset(trans, 0, sizeof(*trans)); |
| 321 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 327 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 322 | 328 | ||
| @@ -743,6 +749,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 743 | memcpy(&pending->root_key, &key, sizeof(key)); | 749 | memcpy(&pending->root_key, &key, sizeof(key)); |
| 744 | fail: | 750 | fail: |
| 745 | kfree(new_root_item); | 751 | kfree(new_root_item); |
| 752 | btrfs_unreserve_metadata_space(root, 6); | ||
| 746 | return ret; | 753 | return ret; |
| 747 | } | 754 | } |
| 748 | 755 | ||
| @@ -1059,6 +1066,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1059 | 1066 | ||
| 1060 | mutex_unlock(&root->fs_info->trans_mutex); | 1067 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1061 | 1068 | ||
| 1069 | if (current->journal_info == trans) | ||
| 1070 | current->journal_info = NULL; | ||
| 1071 | |||
| 1062 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1072 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1063 | return ret; | 1073 | return ret; |
| 1064 | } | 1074 | } |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 23e7d36ff325..7eda483d7b5a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -446,8 +446,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
| 446 | goto error; | 446 | goto error; |
| 447 | 447 | ||
| 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); | 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); |
| 449 | if (!device->name) | 449 | if (!device->name) { |
| 450 | kfree(device); | ||
| 450 | goto error; | 451 | goto error; |
| 452 | } | ||
| 451 | 453 | ||
| 452 | device->devid = orig_dev->devid; | 454 | device->devid = orig_dev->devid; |
| 453 | device->work.func = pending_bios_fn; | 455 | device->work.func = pending_bios_fn; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a9d3bf4d2689..b0fc93f95fd0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -260,7 +260,7 @@ err: | |||
| 260 | * attributes are handled directly. | 260 | * attributes are handled directly. |
| 261 | */ | 261 | */ |
| 262 | struct xattr_handler *btrfs_xattr_handlers[] = { | 262 | struct xattr_handler *btrfs_xattr_handlers[] = { |
| 263 | #ifdef CONFIG_FS_POSIX_ACL | 263 | #ifdef CONFIG_BTRFS_POSIX_ACL |
| 264 | &btrfs_xattr_acl_access_handler, | 264 | &btrfs_xattr_acl_access_handler, |
| 265 | &btrfs_xattr_acl_default_handler, | 265 | &btrfs_xattr_acl_default_handler, |
| 266 | #endif | 266 | #endif |
