diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 192 |
1 files changed, 147 insertions, 45 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2e993cf1766e..fd3f172e94e6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1433 | */ | 1433 | */ |
1434 | u64 count_range_bits(struct extent_io_tree *tree, | 1434 | u64 count_range_bits(struct extent_io_tree *tree, |
1435 | u64 *start, u64 search_end, u64 max_bytes, | 1435 | u64 *start, u64 search_end, u64 max_bytes, |
1436 | unsigned long bits) | 1436 | unsigned long bits, int contig) |
1437 | { | 1437 | { |
1438 | struct rb_node *node; | 1438 | struct rb_node *node; |
1439 | struct extent_state *state; | 1439 | struct extent_state *state; |
1440 | u64 cur_start = *start; | 1440 | u64 cur_start = *start; |
1441 | u64 total_bytes = 0; | 1441 | u64 total_bytes = 0; |
1442 | u64 last = 0; | ||
1442 | int found = 0; | 1443 | int found = 0; |
1443 | 1444 | ||
1444 | if (search_end <= cur_start) { | 1445 | if (search_end <= cur_start) { |
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1463 | state = rb_entry(node, struct extent_state, rb_node); | 1464 | state = rb_entry(node, struct extent_state, rb_node); |
1464 | if (state->start > search_end) | 1465 | if (state->start > search_end) |
1465 | break; | 1466 | break; |
1466 | if (state->end >= cur_start && (state->state & bits)) { | 1467 | if (contig && found && state->start > last + 1) |
1468 | break; | ||
1469 | if (state->end >= cur_start && (state->state & bits) == bits) { | ||
1467 | total_bytes += min(search_end, state->end) + 1 - | 1470 | total_bytes += min(search_end, state->end) + 1 - |
1468 | max(cur_start, state->start); | 1471 | max(cur_start, state->start); |
1469 | if (total_bytes >= max_bytes) | 1472 | if (total_bytes >= max_bytes) |
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1472 | *start = state->start; | 1475 | *start = state->start; |
1473 | found = 1; | 1476 | found = 1; |
1474 | } | 1477 | } |
1478 | last = state->end; | ||
1479 | } else if (contig && found) { | ||
1480 | break; | ||
1475 | } | 1481 | } |
1476 | node = rb_next(node); | 1482 | node = rb_next(node); |
1477 | if (!node) | 1483 | if (!node) |
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1865 | bio_get(bio); | 1871 | bio_get(bio); |
1866 | 1872 | ||
1867 | if (tree->ops && tree->ops->submit_bio_hook) | 1873 | if (tree->ops && tree->ops->submit_bio_hook) |
1868 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1874 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1869 | mirror_num, bio_flags, start); | 1875 | mirror_num, bio_flags, start); |
1870 | else | 1876 | else |
1871 | submit_bio(rw, bio); | 1877 | submit_bio(rw, bio); |
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1920 | nr = bio_get_nr_vecs(bdev); | 1926 | nr = bio_get_nr_vecs(bdev); |
1921 | 1927 | ||
1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1928 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1929 | if (!bio) | ||
1930 | return -ENOMEM; | ||
1923 | 1931 | ||
1924 | bio_add_page(bio, page, page_size, offset); | 1932 | bio_add_page(bio, page, page_size, offset); |
1925 | bio->bi_end_io = end_io_func; | 1933 | bio->bi_end_io = end_io_func; |
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page) | |||
1944 | 1952 | ||
1945 | static void set_page_extent_head(struct page *page, unsigned long len) | 1953 | static void set_page_extent_head(struct page *page, unsigned long len) |
1946 | { | 1954 | { |
1955 | WARN_ON(!PagePrivate(page)); | ||
1947 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); | 1956 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); |
1948 | } | 1957 | } |
1949 | 1958 | ||
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2126 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2135 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2127 | &bio_flags); | 2136 | &bio_flags); |
2128 | if (bio) | 2137 | if (bio) |
2129 | submit_one_bio(READ, bio, 0, bio_flags); | 2138 | ret = submit_one_bio(READ, bio, 0, bio_flags); |
2130 | return ret; | 2139 | return ret; |
2131 | } | 2140 | } |
2132 | 2141 | ||
@@ -2819,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2819 | * at this point we can safely clear everything except the | 2828 | * at this point we can safely clear everything except the |
2820 | * locked bit and the nodatasum bit | 2829 | * locked bit and the nodatasum bit |
2821 | */ | 2830 | */ |
2822 | clear_extent_bit(tree, start, end, | 2831 | ret = clear_extent_bit(tree, start, end, |
2823 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | 2832 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), |
2824 | 0, 0, NULL, mask); | 2833 | 0, 0, NULL, mask); |
2834 | |||
2835 | /* if clear_extent_bit failed for enomem reasons, | ||
2836 | * we can't allow the release to continue. | ||
2837 | */ | ||
2838 | if (ret < 0) | ||
2839 | ret = 0; | ||
2840 | else | ||
2841 | ret = 1; | ||
2825 | } | 2842 | } |
2826 | return ret; | 2843 | return ret; |
2827 | } | 2844 | } |
@@ -2901,6 +2918,46 @@ out: | |||
2901 | return sector; | 2918 | return sector; |
2902 | } | 2919 | } |
2903 | 2920 | ||
2921 | /* | ||
2922 | * helper function for fiemap, which doesn't want to see any holes. | ||
2923 | * This maps until we find something past 'last' | ||
2924 | */ | ||
2925 | static struct extent_map *get_extent_skip_holes(struct inode *inode, | ||
2926 | u64 offset, | ||
2927 | u64 last, | ||
2928 | get_extent_t *get_extent) | ||
2929 | { | ||
2930 | u64 sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
2931 | struct extent_map *em; | ||
2932 | u64 len; | ||
2933 | |||
2934 | if (offset >= last) | ||
2935 | return NULL; | ||
2936 | |||
2937 | while(1) { | ||
2938 | len = last - offset; | ||
2939 | if (len == 0) | ||
2940 | break; | ||
2941 | len = (len + sectorsize - 1) & ~(sectorsize - 1); | ||
2942 | em = get_extent(inode, NULL, 0, offset, len, 0); | ||
2943 | if (!em || IS_ERR(em)) | ||
2944 | return em; | ||
2945 | |||
2946 | /* if this isn't a hole return it */ | ||
2947 | if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && | ||
2948 | em->block_start != EXTENT_MAP_HOLE) { | ||
2949 | return em; | ||
2950 | } | ||
2951 | |||
2952 | /* this is a hole, advance to the next extent */ | ||
2953 | offset = extent_map_end(em); | ||
2954 | free_extent_map(em); | ||
2955 | if (offset >= last) | ||
2956 | break; | ||
2957 | } | ||
2958 | return NULL; | ||
2959 | } | ||
2960 | |||
2904 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2961 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2905 | __u64 start, __u64 len, get_extent_t *get_extent) | 2962 | __u64 start, __u64 len, get_extent_t *get_extent) |
2906 | { | 2963 | { |
@@ -2910,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2910 | u32 flags = 0; | 2967 | u32 flags = 0; |
2911 | u32 found_type; | 2968 | u32 found_type; |
2912 | u64 last; | 2969 | u64 last; |
2970 | u64 last_for_get_extent = 0; | ||
2913 | u64 disko = 0; | 2971 | u64 disko = 0; |
2972 | u64 isize = i_size_read(inode); | ||
2914 | struct btrfs_key found_key; | 2973 | struct btrfs_key found_key; |
2915 | struct extent_map *em = NULL; | 2974 | struct extent_map *em = NULL; |
2916 | struct extent_state *cached_state = NULL; | 2975 | struct extent_state *cached_state = NULL; |
2917 | struct btrfs_path *path; | 2976 | struct btrfs_path *path; |
2918 | struct btrfs_file_extent_item *item; | 2977 | struct btrfs_file_extent_item *item; |
2919 | int end = 0; | 2978 | int end = 0; |
2920 | u64 em_start = 0, em_len = 0; | 2979 | u64 em_start = 0; |
2980 | u64 em_len = 0; | ||
2981 | u64 em_end = 0; | ||
2921 | unsigned long emflags; | 2982 | unsigned long emflags; |
2922 | int hole = 0; | ||
2923 | 2983 | ||
2924 | if (len == 0) | 2984 | if (len == 0) |
2925 | return -EINVAL; | 2985 | return -EINVAL; |
@@ -2929,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2929 | return -ENOMEM; | 2989 | return -ENOMEM; |
2930 | path->leave_spinning = 1; | 2990 | path->leave_spinning = 1; |
2931 | 2991 | ||
2992 | /* | ||
2993 | * lookup the last file extent. We're not using i_size here | ||
2994 | * because there might be preallocation past i_size | ||
2995 | */ | ||
2932 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | 2996 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, |
2933 | path, inode->i_ino, -1, 0); | 2997 | path, inode->i_ino, -1, 0); |
2934 | if (ret < 0) { | 2998 | if (ret < 0) { |
@@ -2942,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2942 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | 3006 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); |
2943 | found_type = btrfs_key_type(&found_key); | 3007 | found_type = btrfs_key_type(&found_key); |
2944 | 3008 | ||
2945 | /* No extents, just return */ | 3009 | /* No extents, but there might be delalloc bits */ |
2946 | if (found_key.objectid != inode->i_ino || | 3010 | if (found_key.objectid != inode->i_ino || |
2947 | found_type != BTRFS_EXTENT_DATA_KEY) { | 3011 | found_type != BTRFS_EXTENT_DATA_KEY) { |
2948 | btrfs_free_path(path); | 3012 | /* have to trust i_size as the end */ |
2949 | return 0; | 3013 | last = (u64)-1; |
3014 | last_for_get_extent = isize; | ||
3015 | } else { | ||
3016 | /* | ||
3017 | * remember the start of the last extent. There are a | ||
3018 | * bunch of different factors that go into the length of the | ||
3019 | * extent, so its much less complex to remember where it started | ||
3020 | */ | ||
3021 | last = found_key.offset; | ||
3022 | last_for_get_extent = last + 1; | ||
2950 | } | 3023 | } |
2951 | last = found_key.offset; | ||
2952 | btrfs_free_path(path); | 3024 | btrfs_free_path(path); |
2953 | 3025 | ||
3026 | /* | ||
3027 | * we might have some extents allocated but more delalloc past those | ||
3028 | * extents. so, we trust isize unless the start of the last extent is | ||
3029 | * beyond isize | ||
3030 | */ | ||
3031 | if (last < isize) { | ||
3032 | last = (u64)-1; | ||
3033 | last_for_get_extent = isize; | ||
3034 | } | ||
3035 | |||
2954 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3036 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2955 | &cached_state, GFP_NOFS); | 3037 | &cached_state, GFP_NOFS); |
2956 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3038 | |
3039 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3040 | get_extent); | ||
2957 | if (!em) | 3041 | if (!em) |
2958 | goto out; | 3042 | goto out; |
2959 | if (IS_ERR(em)) { | 3043 | if (IS_ERR(em)) { |
@@ -2962,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2962 | } | 3046 | } |
2963 | 3047 | ||
2964 | while (!end) { | 3048 | while (!end) { |
2965 | hole = 0; | 3049 | off = extent_map_end(em); |
2966 | off = em->start + em->len; | ||
2967 | if (off >= max) | 3050 | if (off >= max) |
2968 | end = 1; | 3051 | end = 1; |
2969 | 3052 | ||
2970 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2971 | hole = 1; | ||
2972 | goto next; | ||
2973 | } | ||
2974 | |||
2975 | em_start = em->start; | 3053 | em_start = em->start; |
2976 | em_len = em->len; | 3054 | em_len = em->len; |
2977 | 3055 | em_end = extent_map_end(em); | |
3056 | emflags = em->flags; | ||
2978 | disko = 0; | 3057 | disko = 0; |
2979 | flags = 0; | 3058 | flags = 0; |
2980 | 3059 | ||
@@ -2993,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2993 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 3072 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2994 | flags |= FIEMAP_EXTENT_ENCODED; | 3073 | flags |= FIEMAP_EXTENT_ENCODED; |
2995 | 3074 | ||
2996 | next: | ||
2997 | emflags = em->flags; | ||
2998 | free_extent_map(em); | 3075 | free_extent_map(em); |
2999 | em = NULL; | 3076 | em = NULL; |
3000 | if (!end) { | 3077 | if ((em_start >= last) || em_len == (u64)-1 || |
3001 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3078 | (last == (u64)-1 && isize <= em_end)) { |
3002 | if (!em) | ||
3003 | goto out; | ||
3004 | if (IS_ERR(em)) { | ||
3005 | ret = PTR_ERR(em); | ||
3006 | goto out; | ||
3007 | } | ||
3008 | emflags = em->flags; | ||
3009 | } | ||
3010 | |||
3011 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | ||
3012 | flags |= FIEMAP_EXTENT_LAST; | 3079 | flags |= FIEMAP_EXTENT_LAST; |
3013 | end = 1; | 3080 | end = 1; |
3014 | } | 3081 | } |
3015 | 3082 | ||
3016 | if (em_start == last) { | 3083 | /* now scan forward to see if this is really the last extent. */ |
3084 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3085 | get_extent); | ||
3086 | if (IS_ERR(em)) { | ||
3087 | ret = PTR_ERR(em); | ||
3088 | goto out; | ||
3089 | } | ||
3090 | if (!em) { | ||
3017 | flags |= FIEMAP_EXTENT_LAST; | 3091 | flags |= FIEMAP_EXTENT_LAST; |
3018 | end = 1; | 3092 | end = 1; |
3019 | } | 3093 | } |
3020 | 3094 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | |
3021 | if (!hole) { | 3095 | em_len, flags); |
3022 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3096 | if (ret) |
3023 | em_len, flags); | 3097 | goto out_free; |
3024 | if (ret) | ||
3025 | goto out_free; | ||
3026 | } | ||
3027 | } | 3098 | } |
3028 | out_free: | 3099 | out_free: |
3029 | free_extent_map(em); | 3100 | free_extent_map(em); |
@@ -3192,7 +3263,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3192 | } | 3263 | } |
3193 | if (!PageUptodate(p)) | 3264 | if (!PageUptodate(p)) |
3194 | uptodate = 0; | 3265 | uptodate = 0; |
3195 | unlock_page(p); | 3266 | |
3267 | /* | ||
3268 | * see below about how we avoid a nasty race with release page | ||
3269 | * and why we unlock later | ||
3270 | */ | ||
3271 | if (i != 0) | ||
3272 | unlock_page(p); | ||
3196 | } | 3273 | } |
3197 | if (uptodate) | 3274 | if (uptodate) |
3198 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3275 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
@@ -3216,9 +3293,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3216 | atomic_inc(&eb->refs); | 3293 | atomic_inc(&eb->refs); |
3217 | spin_unlock(&tree->buffer_lock); | 3294 | spin_unlock(&tree->buffer_lock); |
3218 | radix_tree_preload_end(); | 3295 | radix_tree_preload_end(); |
3296 | |||
3297 | /* | ||
3298 | * there is a race where release page may have | ||
3299 | * tried to find this extent buffer in the radix | ||
3300 | * but failed. It will tell the VM it is safe to | ||
3301 | * reclaim the, and it will clear the page private bit. | ||
3302 | * We must make sure to set the page private bit properly | ||
3303 | * after the extent buffer is in the radix tree so | ||
3304 | * it doesn't get lost | ||
3305 | */ | ||
3306 | set_page_extent_mapped(eb->first_page); | ||
3307 | set_page_extent_head(eb->first_page, eb->len); | ||
3308 | if (!page0) | ||
3309 | unlock_page(eb->first_page); | ||
3219 | return eb; | 3310 | return eb; |
3220 | 3311 | ||
3221 | free_eb: | 3312 | free_eb: |
3313 | if (eb->first_page && !page0) | ||
3314 | unlock_page(eb->first_page); | ||
3315 | |||
3222 | if (!atomic_dec_and_test(&eb->refs)) | 3316 | if (!atomic_dec_and_test(&eb->refs)) |
3223 | return exists; | 3317 | return exists; |
3224 | btrfs_release_extent_buffer(eb); | 3318 | btrfs_release_extent_buffer(eb); |
@@ -3269,10 +3363,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3269 | continue; | 3363 | continue; |
3270 | 3364 | ||
3271 | lock_page(page); | 3365 | lock_page(page); |
3366 | WARN_ON(!PagePrivate(page)); | ||
3367 | |||
3368 | set_page_extent_mapped(page); | ||
3272 | if (i == 0) | 3369 | if (i == 0) |
3273 | set_page_extent_head(page, eb->len); | 3370 | set_page_extent_head(page, eb->len); |
3274 | else | ||
3275 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
3276 | 3371 | ||
3277 | clear_page_dirty_for_io(page); | 3372 | clear_page_dirty_for_io(page); |
3278 | spin_lock_irq(&page->mapping->tree_lock); | 3373 | spin_lock_irq(&page->mapping->tree_lock); |
@@ -3462,6 +3557,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3462 | 3557 | ||
3463 | for (i = start_i; i < num_pages; i++) { | 3558 | for (i = start_i; i < num_pages; i++) { |
3464 | page = extent_buffer_page(eb, i); | 3559 | page = extent_buffer_page(eb, i); |
3560 | |||
3561 | WARN_ON(!PagePrivate(page)); | ||
3562 | |||
3563 | set_page_extent_mapped(page); | ||
3564 | if (i == 0) | ||
3565 | set_page_extent_head(page, eb->len); | ||
3566 | |||
3465 | if (inc_all_pages) | 3567 | if (inc_all_pages) |
3466 | page_cache_get(page); | 3568 | page_cache_get(page); |
3467 | if (!PageUptodate(page)) { | 3569 | if (!PageUptodate(page)) { |