aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c46
-rw-r--r--fs/btrfs/extent_io.c213
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file.c115
-rw-r--r--fs/btrfs/inode.c135
-rw-r--r--fs/btrfs/ioctl.c17
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c14
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c15
13 files changed, 482 insertions, 127 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1263#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1264#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1265#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1266#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1267
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1268#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1269#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2228 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2230 u64 num_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type);
2221 2233
2222/* ctree.c */ 2234/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fdce8799b98d..e1aa8d607bc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
359 359
360 tree = &BTRFS_I(page->mapping->host)->io_tree; 360 tree = &BTRFS_I(page->mapping->host)->io_tree;
361 361
362 if (page->private == EXTENT_PAGE_PRIVATE) 362 if (page->private == EXTENT_PAGE_PRIVATE) {
363 WARN_ON(1);
363 goto out; 364 goto out;
364 if (!page->private) 365 }
366 if (!page->private) {
367 WARN_ON(1);
365 goto out; 368 goto out;
369 }
366 len = page->private >> 2; 370 len = page->private >> 2;
367 WARN_ON(len == 0); 371 WARN_ON(len == 0);
368 372
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e7e012ad667..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
@@ -5376,7 +5387,7 @@ again:
5376 num_bytes, data, 1); 5387 num_bytes, data, 1);
5377 goto again; 5388 goto again;
5378 } 5389 }
5379 if (ret == -ENOSPC) { 5390 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5380 struct btrfs_space_info *sinfo; 5391 struct btrfs_space_info *sinfo;
5381 5392
5382 sinfo = __find_space_info(root->fs_info, data); 5393 sinfo = __find_space_info(root->fs_info, data);
@@ -6583,7 +6594,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
6583 u64 end = start + extent_key->offset - 1; 6594 u64 end = start + extent_key->offset - 1;
6584 6595
6585 em = alloc_extent_map(GFP_NOFS); 6596 em = alloc_extent_map(GFP_NOFS);
6586 BUG_ON(!em || IS_ERR(em)); 6597 BUG_ON(!em);
6587 6598
6588 em->start = start; 6599 em->start = start;
6589 em->len = extent_key->offset; 6600 em->len = extent_key->offset;
@@ -8065,6 +8076,13 @@ out:
8065 return ret; 8076 return ret;
8066} 8077}
8067 8078
8079int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8080 struct btrfs_root *root, u64 type)
8081{
8082 u64 alloc_flags = get_alloc_profile(root, type);
8083 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8084}
8085
8068/* 8086/*
8069 * helper to account the unused space of all the readonly block group in the 8087 * helper to account the unused space of all the readonly block group in the
8070 * list. takes mirrors into account. 8088 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5e76a474cb7e..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -1946,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
1946 1952
1947static void set_page_extent_head(struct page *page, unsigned long len) 1953static void set_page_extent_head(struct page *page, unsigned long len)
1948{ 1954{
1955 WARN_ON(!PagePrivate(page));
1949 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1956 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1950} 1957}
1951 1958
@@ -2821,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2821 * at this point we can safely clear everything except the 2828 * at this point we can safely clear everything except the
2822 * locked bit and the nodatasum bit 2829 * locked bit and the nodatasum bit
2823 */ 2830 */
2824 clear_extent_bit(tree, start, end, 2831 ret = clear_extent_bit(tree, start, end,
2825 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2832 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2826 0, 0, NULL, mask); 2833 0, 0, NULL, mask);
2834
2835 /* if clear_extent_bit failed for enomem reasons,
2836 * we can't allow the release to continue.
2837 */
2838 if (ret < 0)
2839 ret = 0;
2840 else
2841 ret = 1;
2827 } 2842 }
2828 return ret; 2843 return ret;
2829} 2844}
@@ -2903,6 +2918,46 @@ out:
2903 return sector; 2918 return sector;
2904} 2919}
2905 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2906int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2907 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2908{ 2963{
@@ -2912,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2912 u32 flags = 0; 2967 u32 flags = 0;
2913 u32 found_type; 2968 u32 found_type;
2914 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2915 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2916 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2917 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2918 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2919 struct btrfs_path *path; 2976 struct btrfs_path *path;
2920 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2921 int end = 0; 2978 int end = 0;
2922 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2923 unsigned long emflags; 2982 unsigned long emflags;
2924 int hole = 0;
2925 2983
2926 if (len == 0) 2984 if (len == 0)
2927 return -EINVAL; 2985 return -EINVAL;
@@ -2931,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2931 return -ENOMEM; 2989 return -ENOMEM;
2932 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2933 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2934 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2935 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2936 if (ret < 0) { 2998 if (ret < 0) {
@@ -2944,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2944 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2945 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2946 3008
2947 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2948 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2949 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2950 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2951 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2952 } 3023 }
2953 last = found_key.offset;
2954 btrfs_free_path(path); 3024 btrfs_free_path(path);
2955 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2956 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2957 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2958 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2959 if (!em) 3041 if (!em)
2960 goto out; 3042 goto out;
2961 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2964,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2964 } 3046 }
2965 3047
2966 while (!end) { 3048 while (!end) {
2967 hole = 0; 3049 u64 offset_in_extent;
2968 off = em->start + em->len;
2969 if (off >= max)
2970 end = 1;
2971 3050
2972 if (em->block_start == EXTENT_MAP_HOLE) { 3051 /* break if the extent we found is outside the range */
2973 hole = 1; 3052 if (em->start >= max || extent_map_end(em) < off)
2974 goto next; 3053 break;
2975 }
2976 3054
2977 em_start = em->start; 3055 /*
2978 em_len = em->len; 3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
2979 3062
3063 /*
3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3070 emflags = em->flags;
2980 disko = 0; 3071 disko = 0;
2981 flags = 0; 3072 flags = 0;
2982 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
2983 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2984 end = 1; 3082 end = 1;
2985 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -2990,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2990 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
2991 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
2992 } else { 3090 } else {
2993 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
2994 } 3092 }
2995 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2996 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
2997 3095
2998next:
2999 emflags = em->flags;
3000 free_extent_map(em); 3096 free_extent_map(em);
3001 em = NULL; 3097 em = NULL;
3002 if (!end) { 3098 if ((em_start >= last) || em_len == (u64)-1 ||
3003 em = get_extent(inode, NULL, 0, off, max - off, 0); 3099 (last == (u64)-1 && isize <= em_end)) {
3004 if (!em)
3005 goto out;
3006 if (IS_ERR(em)) {
3007 ret = PTR_ERR(em);
3008 goto out;
3009 }
3010 emflags = em->flags;
3011 }
3012
3013 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3014 flags |= FIEMAP_EXTENT_LAST; 3100 flags |= FIEMAP_EXTENT_LAST;
3015 end = 1; 3101 end = 1;
3016 } 3102 }
3017 3103
3018 if (em_start == last) { 3104 /* now scan forward to see if this is really the last extent. */
3105 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3106 get_extent);
3107 if (IS_ERR(em)) {
3108 ret = PTR_ERR(em);
3109 goto out;
3110 }
3111 if (!em) {
3019 flags |= FIEMAP_EXTENT_LAST; 3112 flags |= FIEMAP_EXTENT_LAST;
3020 end = 1; 3113 end = 1;
3021 } 3114 }
3022 3115 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3023 if (!hole) { 3116 em_len, flags);
3024 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3117 if (ret)
3025 em_len, flags); 3118 goto out_free;
3026 if (ret)
3027 goto out_free;
3028 }
3029 } 3119 }
3030out_free: 3120out_free:
3031 free_extent_map(em); 3121 free_extent_map(em);
@@ -3194,7 +3284,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3194 } 3284 }
3195 if (!PageUptodate(p)) 3285 if (!PageUptodate(p))
3196 uptodate = 0; 3286 uptodate = 0;
3197 unlock_page(p); 3287
3288 /*
3289 * see below about how we avoid a nasty race with release page
3290 * and why we unlock later
3291 */
3292 if (i != 0)
3293 unlock_page(p);
3198 } 3294 }
3199 if (uptodate) 3295 if (uptodate)
3200 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3296 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3218,9 +3314,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3218 atomic_inc(&eb->refs); 3314 atomic_inc(&eb->refs);
3219 spin_unlock(&tree->buffer_lock); 3315 spin_unlock(&tree->buffer_lock);
3220 radix_tree_preload_end(); 3316 radix_tree_preload_end();
3317
3318 /*
3319 * there is a race where release page may have
3320 * tried to find this extent buffer in the radix
3321 * but failed. It will tell the VM it is safe to
3322 * reclaim the, and it will clear the page private bit.
3323 * We must make sure to set the page private bit properly
3324 * after the extent buffer is in the radix tree so
3325 * it doesn't get lost
3326 */
3327 set_page_extent_mapped(eb->first_page);
3328 set_page_extent_head(eb->first_page, eb->len);
3329 if (!page0)
3330 unlock_page(eb->first_page);
3221 return eb; 3331 return eb;
3222 3332
3223free_eb: 3333free_eb:
3334 if (eb->first_page && !page0)
3335 unlock_page(eb->first_page);
3336
3224 if (!atomic_dec_and_test(&eb->refs)) 3337 if (!atomic_dec_and_test(&eb->refs))
3225 return exists; 3338 return exists;
3226 btrfs_release_extent_buffer(eb); 3339 btrfs_release_extent_buffer(eb);
@@ -3271,10 +3384,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3271 continue; 3384 continue;
3272 3385
3273 lock_page(page); 3386 lock_page(page);
3387 WARN_ON(!PagePrivate(page));
3388
3389 set_page_extent_mapped(page);
3274 if (i == 0) 3390 if (i == 0)
3275 set_page_extent_head(page, eb->len); 3391 set_page_extent_head(page, eb->len);
3276 else
3277 set_page_private(page, EXTENT_PAGE_PRIVATE);
3278 3392
3279 clear_page_dirty_for_io(page); 3393 clear_page_dirty_for_io(page);
3280 spin_lock_irq(&page->mapping->tree_lock); 3394 spin_lock_irq(&page->mapping->tree_lock);
@@ -3464,6 +3578,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3464 3578
3465 for (i = start_i; i < num_pages; i++) { 3579 for (i = start_i; i < num_pages; i++) {
3466 page = extent_buffer_page(eb, i); 3580 page = extent_buffer_page(eb, i);
3581
3582 WARN_ON(!PagePrivate(page));
3583
3584 set_page_extent_mapped(page);
3585 if (i == 0)
3586 set_page_extent_head(page, eb->len);
3587
3467 if (inc_all_pages) 3588 if (inc_all_pages)
3468 page_cache_get(page); 3589 page_cache_get(page);
3469 if (!PageUptodate(page)) { 3590 if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
51{ 51{
52 struct extent_map *em; 52 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 53 em = kmem_cache_alloc(extent_map_cache, mask);
54 if (!em || IS_ERR(em)) 54 if (!em)
55 return em; 55 return NULL;
56 em->in_tree = 0; 56 em->in_tree = 0;
57 em->flags = 0; 57 em->flags = 0;
58 em->compress_type = BTRFS_COMPRESS_NONE; 58 em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1d3a818731a..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -186,6 +199,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
186 split = alloc_extent_map(GFP_NOFS); 199 split = alloc_extent_map(GFP_NOFS);
187 if (!split2) 200 if (!split2)
188 split2 = alloc_extent_map(GFP_NOFS); 201 split2 = alloc_extent_map(GFP_NOFS);
202 BUG_ON(!split || !split2);
189 203
190 write_lock(&em_tree->lock); 204 write_lock(&em_tree->lock);
191 em = lookup_extent_mapping(em_tree, start, len); 205 em = lookup_extent_mapping(em_tree, start, len);
@@ -762,6 +776,27 @@ out:
762} 776}
763 777
764/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
765 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
766 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
767 * modified. 802 * modified.
@@ -776,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
776 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
777 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
778 int err = 0; 813 int err = 0;
814 int faili = 0;
779 u64 start_pos; 815 u64 start_pos;
780 u64 last_pos; 816 u64 last_pos;
781 817
@@ -793,15 +829,24 @@ again:
793 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
794 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
795 if (!pages[i]) { 831 if (!pages[i]) {
796 int c; 832 faili = i - 1;
797 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
798 unlock_page(pages[c]); 834 goto fail;
799 page_cache_release(pages[c]); 835 }
800 } 836
801 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
802 } 846 }
803 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
804 } 848 }
849 err = 0;
805 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
806 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
807 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -841,6 +886,14 @@ again:
841 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
842 } 887 }
843 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
844} 897}
845 898
846static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -850,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
850 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
851 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
852 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
853 struct page *pinned[2];
854 struct page **pages = NULL; 906 struct page **pages = NULL;
855 struct iov_iter i; 907 struct iov_iter i;
856 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -871,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
871 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
872 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
873 925
874 pinned[0] = NULL;
875 pinned[1] = NULL;
876
877 start_pos = pos; 926 start_pos = pos;
878 927
879 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -961,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
961 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
962 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
963 1012
964 /*
965 * there are lots of better ways to do this, but this code
966 * makes sure the first and last page in the file range are
967 * up to date and ready for cow
968 */
969 if ((pos & (PAGE_CACHE_SIZE - 1))) {
970 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
971 if (!PageUptodate(pinned[0])) {
972 ret = btrfs_readpage(NULL, pinned[0]);
973 BUG_ON(ret);
974 wait_on_page_locked(pinned[0]);
975 } else {
976 unlock_page(pinned[0]);
977 }
978 }
979 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
980 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
981 if (!PageUptodate(pinned[1])) {
982 ret = btrfs_readpage(NULL, pinned[1]);
983 BUG_ON(ret);
984 wait_on_page_locked(pinned[1]);
985 } else {
986 unlock_page(pinned[1]);
987 }
988 }
989
990 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
991 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
992 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1023,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1023 1046
1024 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1025 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1026 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1027 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1028 1063
1029 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1030 if (copied > 0) 1065 if (copied > 0)
@@ -1068,10 +1103,6 @@ out:
1068 err = ret; 1103 err = ret;
1069 1104
1070 kfree(pages); 1105 kfree(pages);
1071 if (pinned[0])
1072 page_cache_release(pinned[0]);
1073 if (pinned[1])
1074 page_cache_release(pinned[1]);
1075 *ppos = pos; 1106 *ppos = pos;
1076 1107
1077 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bcc461a9695f..9007bbd01dbf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -644,6 +644,7 @@ retry:
644 async_extent->ram_size - 1, 0); 644 async_extent->ram_size - 1, 0);
645 645
646 em = alloc_extent_map(GFP_NOFS); 646 em = alloc_extent_map(GFP_NOFS);
647 BUG_ON(!em);
647 em->start = async_extent->start; 648 em->start = async_extent->start;
648 em->len = async_extent->ram_size; 649 em->len = async_extent->ram_size;
649 em->orig_start = em->start; 650 em->orig_start = em->start;
@@ -820,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
820 BUG_ON(ret); 821 BUG_ON(ret);
821 822
822 em = alloc_extent_map(GFP_NOFS); 823 em = alloc_extent_map(GFP_NOFS);
824 BUG_ON(!em);
823 em->start = start; 825 em->start = start;
824 em->orig_start = em->start; 826 em->orig_start = em->start;
825 ram_size = ins.offset; 827 ram_size = ins.offset;
@@ -1169,6 +1171,7 @@ out_check:
1169 struct extent_map_tree *em_tree; 1171 struct extent_map_tree *em_tree;
1170 em_tree = &BTRFS_I(inode)->extent_tree; 1172 em_tree = &BTRFS_I(inode)->extent_tree;
1171 em = alloc_extent_map(GFP_NOFS); 1173 em = alloc_extent_map(GFP_NOFS);
1174 BUG_ON(!em);
1172 em->start = cur_offset; 1175 em->start = cur_offset;
1173 em->orig_start = em->start; 1176 em->orig_start = em->start;
1174 em->len = num_bytes; 1177 em->len = num_bytes;
@@ -1910,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1910 1913
1911 private = 0; 1914 private = 0;
1912 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1913 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1914 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1915 start, &private_failure); 1918 start, &private_failure);
1916 if (ret == 0) { 1919 if (ret == 0) {
@@ -4818,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4818 goto fail; 4821 goto fail;
4819 4822
4820 /* 4823 /*
4821 * 1 item for inode ref 4824 * 2 items for inode and inode ref
4822 * 2 items for dir items 4825 * 2 items for dir items
4826 * 1 item for parent inode
4823 */ 4827 */
4824 trans = btrfs_start_transaction(root, 3); 4828 trans = btrfs_start_transaction(root, 5);
4825 if (IS_ERR(trans)) { 4829 if (IS_ERR(trans)) {
4826 err = PTR_ERR(trans); 4830 err = PTR_ERR(trans);
4827 goto fail; 4831 goto fail;
@@ -5277,6 +5281,128 @@ out:
5277 return em; 5281 return em;
5278} 5282}
5279 5283
5284struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5285 size_t pg_offset, u64 start, u64 len,
5286 int create)
5287{
5288 struct extent_map *em;
5289 struct extent_map *hole_em = NULL;
5290 u64 range_start = start;
5291 u64 end;
5292 u64 found;
5293 u64 found_end;
5294 int err = 0;
5295
5296 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5297 if (IS_ERR(em))
5298 return em;
5299 if (em) {
5300 /*
5301 * if our em maps to a hole, there might
5302 * actually be delalloc bytes behind it
5303 */
5304 if (em->block_start != EXTENT_MAP_HOLE)
5305 return em;
5306 else
5307 hole_em = em;
5308 }
5309
5310 /* check to see if we've wrapped (len == -1 or similar) */
5311 end = start + len;
5312 if (end < start)
5313 end = (u64)-1;
5314 else
5315 end -= 1;
5316
5317 em = NULL;
5318
5319 /* ok, we didn't find anything, lets look for delalloc */
5320 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5321 end, len, EXTENT_DELALLOC, 1);
5322 found_end = range_start + found;
5323 if (found_end < range_start)
5324 found_end = (u64)-1;
5325
5326 /*
5327 * we didn't find anything useful, return
5328 * the original results from get_extent()
5329 */
5330 if (range_start > end || found_end <= start) {
5331 em = hole_em;
5332 hole_em = NULL;
5333 goto out;
5334 }
5335
5336 /* adjust the range_start to make sure it doesn't
5337 * go backwards from the start they passed in
5338 */
5339 range_start = max(start,range_start);
5340 found = found_end - range_start;
5341
5342 if (found > 0) {
5343 u64 hole_start = start;
5344 u64 hole_len = len;
5345
5346 em = alloc_extent_map(GFP_NOFS);
5347 if (!em) {
5348 err = -ENOMEM;
5349 goto out;
5350 }
5351 /*
5352 * when btrfs_get_extent can't find anything it
5353 * returns one huge hole
5354 *
5355 * make sure what it found really fits our range, and
5356 * adjust to make sure it is based on the start from
5357 * the caller
5358 */
5359 if (hole_em) {
5360 u64 calc_end = extent_map_end(hole_em);
5361
5362 if (calc_end <= start || (hole_em->start > end)) {
5363 free_extent_map(hole_em);
5364 hole_em = NULL;
5365 } else {
5366 hole_start = max(hole_em->start, start);
5367 hole_len = calc_end - hole_start;
5368 }
5369 }
5370 em->bdev = NULL;
5371 if (hole_em && range_start > hole_start) {
5372 /* our hole starts before our delalloc, so we
5373 * have to return just the parts of the hole
5374 * that go until the delalloc starts
5375 */
5376 em->len = min(hole_len,
5377 range_start - hole_start);
5378 em->start = hole_start;
5379 em->orig_start = hole_start;
5380 /*
5381 * don't adjust block start at all,
5382 * it is fixed at EXTENT_MAP_HOLE
5383 */
5384 em->block_start = hole_em->block_start;
5385 em->block_len = hole_len;
5386 } else {
5387 em->start = range_start;
5388 em->len = found;
5389 em->orig_start = range_start;
5390 em->block_start = EXTENT_MAP_DELALLOC;
5391 em->block_len = found;
5392 }
5393 } else if (hole_em) {
5394 return hole_em;
5395 }
5396out:
5397
5398 free_extent_map(hole_em);
5399 if (err) {
5400 free_extent_map(em);
5401 return ERR_PTR(err);
5402 }
5403 return em;
5404}
5405
5280static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5406static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5281 u64 start, u64 len) 5407 u64 start, u64 len)
5282{ 5408{
@@ -5931,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5931 if (!skip_sum) { 6057 if (!skip_sum) {
5932 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6058 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5933 if (!dip->csums) { 6059 if (!dip->csums) {
6060 kfree(dip);
5934 ret = -ENOMEM; 6061 ret = -ENOMEM;
5935 goto free_ordered; 6062 goto free_ordered;
5936 } 6063 }
@@ -6099,7 +6226,7 @@ out:
6099static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6226static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6100 __u64 start, __u64 len) 6227 __u64 start, __u64 len)
6101{ 6228{
6102 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6229 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6103} 6230}
6104 6231
6105int btrfs_readpage(struct file *file, struct page *page) 6232int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 02d224e8c83f..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1071 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1072 return -EFAULT; 1072 return -EFAULT;
1073 1073
1074 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1075 return -EINVAL; 1075 return -EINVAL;
1076 1076
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1080 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1081 1084
1082 /* nothing to do */ 1085 /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1097 goto out_reset; 1100 goto out_reset;
1098 } 1101 }
1099 1102
1100 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1101 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1102 1105
1103 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
@@ -2208,7 +2211,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2208 int num_types = 4; 2211 int num_types = 4;
2209 int alloc_size; 2212 int alloc_size;
2210 int ret = 0; 2213 int ret = 0;
2211 int slot_count = 0; 2214 u64 slot_count = 0;
2212 int i, c; 2215 int i, c;
2213 2216
2214 if (copy_from_user(&space_args, 2217 if (copy_from_user(&space_args,
@@ -2247,7 +2250,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2247 goto out; 2250 goto out;
2248 } 2251 }
2249 2252
2250 slot_count = min_t(int, space_args.space_slots, slot_count); 2253 slot_count = min_t(u64, space_args.space_slots, slot_count);
2251 2254
2252 alloc_size = sizeof(*dest) * slot_count; 2255 alloc_size = sizeof(*dest) * slot_count;
2253 2256
@@ -2267,6 +2270,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2267 for (i = 0; i < num_types; i++) { 2270 for (i = 0; i < num_types; i++) {
2268 struct btrfs_space_info *tmp; 2271 struct btrfs_space_info *tmp;
2269 2272
2273 if (!slot_count)
2274 break;
2275
2270 info = NULL; 2276 info = NULL;
2271 rcu_read_lock(); 2277 rcu_read_lock();
2272 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2278 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2288,7 +2294,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2288 memcpy(dest, &space, sizeof(space)); 2294 memcpy(dest, &space, sizeof(space));
2289 dest++; 2295 dest++;
2290 space_args.total_spaces++; 2296 space_args.total_spaces++;
2297 slot_count--;
2291 } 2298 }
2299 if (!slot_count)
2300 break;
2292 } 2301 }
2293 up_read(&info->groups_sem); 2302 up_read(&info->groups_sem);
2294 } 2303 }
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 1f5556acb530..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1157 new_node->bytenr = dest->node->start; 1157 new_node->bytenr = dest->node->start;
1158 new_node->level = node->level; 1158 new_node->level = node->level;
1159 new_node->lowest = node->lowest; 1159 new_node->lowest = node->lowest;
1160 new_node->checked = 1;
1160 new_node->root = dest; 1161 new_node->root = dest;
1161 1162
1162 if (!node->lowest) { 1163 if (!node->lowest) {
@@ -3653,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3653 u32 item_size; 3654 u32 item_size;
3654 int ret; 3655 int ret;
3655 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3656 3658
3657 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3658 if (!path) 3660 if (!path)
@@ -3665,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3665 } 3667 }
3666 3668
3667 while (1) { 3669 while (1) {
3670 progress++;
3668 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3669 BUG_ON(IS_ERR(trans)); 3672 BUG_ON(IS_ERR(trans));
3670 3673restart:
3671 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3672 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3673 continue; 3676 continue;
@@ -3780,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3780 } 3783 }
3781 } 3784 }
3782 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3783 3795
3784 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3785 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2636a051e4b2..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1338 1338
1339 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1340 if (ret) 1340 if (ret)
1341 goto error_brelse; 1341 goto error_undo;
1342 1342
1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1344 if (ret) 1344 if (ret)
1345 goto error_brelse; 1345 goto error_undo;
1346 1346
1347 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1348 1348
@@ -1416,6 +1416,13 @@ out:
1416 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1417 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1418 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1419} 1426}
1420 1427
1421/* 1428/*
@@ -1605,12 +1612,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1605 1612
1606 ret = find_next_devid(root, &device->devid); 1613 ret = find_next_devid(root, &device->devid);
1607 if (ret) { 1614 if (ret) {
1615 kfree(device->name);
1608 kfree(device); 1616 kfree(device);
1609 goto error; 1617 goto error;
1610 } 1618 }
1611 1619
1612 trans = btrfs_start_transaction(root, 0); 1620 trans = btrfs_start_transaction(root, 0);
1613 if (IS_ERR(trans)) { 1621 if (IS_ERR(trans)) {
1622 kfree(device->name);
1614 kfree(device); 1623 kfree(device);
1615 ret = PTR_ERR(trans); 1624 ret = PTR_ERR(trans);
1616 goto error; 1625 goto error;
@@ -1631,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1631 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1632 device->bdev = bdev; 1641 device->bdev = bdev;
1633 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1634 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1635 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1636 1645
1637 if (seeding_dev) { 1646 if (seeding_dev) {