aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c134
1 files changed, 100 insertions, 34 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c878476bb91..8036d3a84853 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -45,6 +45,7 @@ struct extent_page_data {
45 struct bio *bio; 45 struct bio *bio;
46 struct extent_io_tree *tree; 46 struct extent_io_tree *tree;
47 get_extent_t *get_extent; 47 get_extent_t *get_extent;
48 unsigned long bio_flags;
48 49
49 /* tells writepage not to lock the state bits for this range 50 /* tells writepage not to lock the state bits for this range
50 * it still does the unlocking 51 * it still does the unlocking
@@ -64,13 +65,13 @@ tree_fs_info(struct extent_io_tree *tree)
64 65
65int __init extent_io_init(void) 66int __init extent_io_init(void)
66{ 67{
67 extent_state_cache = kmem_cache_create("extent_state", 68 extent_state_cache = kmem_cache_create("btrfs_extent_state",
68 sizeof(struct extent_state), 0, 69 sizeof(struct extent_state), 0,
69 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 70 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
70 if (!extent_state_cache) 71 if (!extent_state_cache)
71 return -ENOMEM; 72 return -ENOMEM;
72 73
73 extent_buffer_cache = kmem_cache_create("extent_buffers", 74 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
74 sizeof(struct extent_buffer), 0, 75 sizeof(struct extent_buffer), 0,
75 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 76 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
76 if (!extent_buffer_cache) 77 if (!extent_buffer_cache)
@@ -107,6 +108,12 @@ void extent_io_exit(void)
107 list_del(&eb->leak_list); 108 list_del(&eb->leak_list);
108 kmem_cache_free(extent_buffer_cache, eb); 109 kmem_cache_free(extent_buffer_cache, eb);
109 } 110 }
111
112 /*
113 * Make sure all delayed rcu free are flushed before we
114 * destroy caches.
115 */
116 rcu_barrier();
110 if (extent_state_cache) 117 if (extent_state_cache)
111 kmem_cache_destroy(extent_state_cache); 118 kmem_cache_destroy(extent_state_cache);
112 if (extent_buffer_cache) 119 if (extent_buffer_cache)
@@ -936,6 +943,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
936 * @end: the end offset in bytes (inclusive) 943 * @end: the end offset in bytes (inclusive)
937 * @bits: the bits to set in this range 944 * @bits: the bits to set in this range
938 * @clear_bits: the bits to clear in this range 945 * @clear_bits: the bits to clear in this range
946 * @cached_state: state that we're going to cache
939 * @mask: the allocation mask 947 * @mask: the allocation mask
940 * 948 *
941 * This will go through and set bits for the given range. If any states exist 949 * This will go through and set bits for the given range. If any states exist
@@ -945,7 +953,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
945 * boundary bits like LOCK. 953 * boundary bits like LOCK.
946 */ 954 */
947int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 955int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
948 int bits, int clear_bits, gfp_t mask) 956 int bits, int clear_bits,
957 struct extent_state **cached_state, gfp_t mask)
949{ 958{
950 struct extent_state *state; 959 struct extent_state *state;
951 struct extent_state *prealloc = NULL; 960 struct extent_state *prealloc = NULL;
@@ -962,6 +971,15 @@ again:
962 } 971 }
963 972
964 spin_lock(&tree->lock); 973 spin_lock(&tree->lock);
974 if (cached_state && *cached_state) {
975 state = *cached_state;
976 if (state->start <= start && state->end > start &&
977 state->tree) {
978 node = &state->rb_node;
979 goto hit_next;
980 }
981 }
982
965 /* 983 /*
966 * this search will find all the extents that end after 984 * this search will find all the extents that end after
967 * our range starts. 985 * our range starts.
@@ -992,6 +1010,7 @@ hit_next:
992 */ 1010 */
993 if (state->start == start && state->end <= end) { 1011 if (state->start == start && state->end <= end) {
994 set_state_bits(tree, state, &bits); 1012 set_state_bits(tree, state, &bits);
1013 cache_state(state, cached_state);
995 state = clear_state_bit(tree, state, &clear_bits, 0); 1014 state = clear_state_bit(tree, state, &clear_bits, 0);
996 if (last_end == (u64)-1) 1015 if (last_end == (u64)-1)
997 goto out; 1016 goto out;
@@ -1032,6 +1051,7 @@ hit_next:
1032 goto out; 1051 goto out;
1033 if (state->end <= end) { 1052 if (state->end <= end) {
1034 set_state_bits(tree, state, &bits); 1053 set_state_bits(tree, state, &bits);
1054 cache_state(state, cached_state);
1035 state = clear_state_bit(tree, state, &clear_bits, 0); 1055 state = clear_state_bit(tree, state, &clear_bits, 0);
1036 if (last_end == (u64)-1) 1056 if (last_end == (u64)-1)
1037 goto out; 1057 goto out;
@@ -1070,6 +1090,7 @@ hit_next:
1070 &bits); 1090 &bits);
1071 if (err) 1091 if (err)
1072 extent_io_tree_panic(tree, err); 1092 extent_io_tree_panic(tree, err);
1093 cache_state(prealloc, cached_state);
1073 prealloc = NULL; 1094 prealloc = NULL;
1074 start = this_end + 1; 1095 start = this_end + 1;
1075 goto search_again; 1096 goto search_again;
@@ -1092,6 +1113,7 @@ hit_next:
1092 extent_io_tree_panic(tree, err); 1113 extent_io_tree_panic(tree, err);
1093 1114
1094 set_state_bits(tree, prealloc, &bits); 1115 set_state_bits(tree, prealloc, &bits);
1116 cache_state(prealloc, cached_state);
1095 clear_state_bit(tree, prealloc, &clear_bits, 0); 1117 clear_state_bit(tree, prealloc, &clear_bits, 0);
1096 prealloc = NULL; 1118 prealloc = NULL;
1097 goto out; 1119 goto out;
@@ -1144,6 +1166,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
1144 NULL, cached_state, mask); 1166 NULL, cached_state, mask);
1145} 1167}
1146 1168
1169int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
1170 struct extent_state **cached_state, gfp_t mask)
1171{
1172 return set_extent_bit(tree, start, end,
1173 EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
1174 NULL, cached_state, mask);
1175}
1176
1147int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 1177int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1148 gfp_t mask) 1178 gfp_t mask)
1149{ 1179{
@@ -1288,18 +1318,42 @@ out:
1288 * If nothing was found, 1 is returned. If found something, return 0. 1318 * If nothing was found, 1 is returned. If found something, return 0.
1289 */ 1319 */
1290int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1320int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1291 u64 *start_ret, u64 *end_ret, int bits) 1321 u64 *start_ret, u64 *end_ret, int bits,
1322 struct extent_state **cached_state)
1292{ 1323{
1293 struct extent_state *state; 1324 struct extent_state *state;
1325 struct rb_node *n;
1294 int ret = 1; 1326 int ret = 1;
1295 1327
1296 spin_lock(&tree->lock); 1328 spin_lock(&tree->lock);
1329 if (cached_state && *cached_state) {
1330 state = *cached_state;
1331 if (state->end == start - 1 && state->tree) {
1332 n = rb_next(&state->rb_node);
1333 while (n) {
1334 state = rb_entry(n, struct extent_state,
1335 rb_node);
1336 if (state->state & bits)
1337 goto got_it;
1338 n = rb_next(n);
1339 }
1340 free_extent_state(*cached_state);
1341 *cached_state = NULL;
1342 goto out;
1343 }
1344 free_extent_state(*cached_state);
1345 *cached_state = NULL;
1346 }
1347
1297 state = find_first_extent_bit_state(tree, start, bits); 1348 state = find_first_extent_bit_state(tree, start, bits);
1349got_it:
1298 if (state) { 1350 if (state) {
1351 cache_state(state, cached_state);
1299 *start_ret = state->start; 1352 *start_ret = state->start;
1300 *end_ret = state->end; 1353 *end_ret = state->end;
1301 ret = 0; 1354 ret = 0;
1302 } 1355 }
1356out:
1303 spin_unlock(&tree->lock); 1357 spin_unlock(&tree->lock);
1304 return ret; 1358 return ret;
1305} 1359}
@@ -2062,7 +2116,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2062 } 2116 }
2063 read_unlock(&em_tree->lock); 2117 read_unlock(&em_tree->lock);
2064 2118
2065 if (!em || IS_ERR(em)) { 2119 if (!em) {
2066 kfree(failrec); 2120 kfree(failrec);
2067 return -EIO; 2121 return -EIO;
2068 } 2122 }
@@ -2298,8 +2352,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2298 struct extent_state *cached = NULL; 2352 struct extent_state *cached = NULL;
2299 struct extent_state *state; 2353 struct extent_state *state;
2300 2354
2301 pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " 2355 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2302 "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, 2356 "mirror=%ld\n", (u64)bio->bi_sector, err,
2303 (long int)bio->bi_bdev); 2357 (long int)bio->bi_bdev);
2304 tree = &BTRFS_I(page->mapping->host)->io_tree; 2358 tree = &BTRFS_I(page->mapping->host)->io_tree;
2305 2359
@@ -2703,12 +2757,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2703 end_bio_extent_readpage, mirror_num, 2757 end_bio_extent_readpage, mirror_num,
2704 *bio_flags, 2758 *bio_flags,
2705 this_bio_flag); 2759 this_bio_flag);
2706 BUG_ON(ret == -ENOMEM); 2760 if (!ret) {
2707 nr++; 2761 nr++;
2708 *bio_flags = this_bio_flag; 2762 *bio_flags = this_bio_flag;
2763 }
2709 } 2764 }
2710 if (ret) 2765 if (ret) {
2711 SetPageError(page); 2766 SetPageError(page);
2767 unlock_extent(tree, cur, cur + iosize - 1);
2768 }
2712 cur = cur + iosize; 2769 cur = cur + iosize;
2713 pg_offset += iosize; 2770 pg_offset += iosize;
2714 } 2771 }
@@ -3155,12 +3212,16 @@ static int write_one_eb(struct extent_buffer *eb,
3155 struct block_device *bdev = fs_info->fs_devices->latest_bdev; 3212 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3156 u64 offset = eb->start; 3213 u64 offset = eb->start;
3157 unsigned long i, num_pages; 3214 unsigned long i, num_pages;
3215 unsigned long bio_flags = 0;
3158 int rw = (epd->sync_io ? WRITE_SYNC : WRITE); 3216 int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
3159 int ret = 0; 3217 int ret = 0;
3160 3218
3161 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3219 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3162 num_pages = num_extent_pages(eb->start, eb->len); 3220 num_pages = num_extent_pages(eb->start, eb->len);
3163 atomic_set(&eb->io_pages, num_pages); 3221 atomic_set(&eb->io_pages, num_pages);
3222 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3223 bio_flags = EXTENT_BIO_TREE_LOG;
3224
3164 for (i = 0; i < num_pages; i++) { 3225 for (i = 0; i < num_pages; i++) {
3165 struct page *p = extent_buffer_page(eb, i); 3226 struct page *p = extent_buffer_page(eb, i);
3166 3227
@@ -3169,7 +3230,8 @@ static int write_one_eb(struct extent_buffer *eb,
3169 ret = submit_extent_page(rw, eb->tree, p, offset >> 9, 3230 ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
3170 PAGE_CACHE_SIZE, 0, bdev, &epd->bio, 3231 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3171 -1, end_bio_extent_buffer_writepage, 3232 -1, end_bio_extent_buffer_writepage,
3172 0, 0, 0); 3233 0, epd->bio_flags, bio_flags);
3234 epd->bio_flags = bio_flags;
3173 if (ret) { 3235 if (ret) {
3174 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3236 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3175 SetPageError(p); 3237 SetPageError(p);
@@ -3204,6 +3266,7 @@ int btree_write_cache_pages(struct address_space *mapping,
3204 .tree = tree, 3266 .tree = tree,
3205 .extent_locked = 0, 3267 .extent_locked = 0,
3206 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 3268 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3269 .bio_flags = 0,
3207 }; 3270 };
3208 int ret = 0; 3271 int ret = 0;
3209 int done = 0; 3272 int done = 0;
@@ -3248,19 +3311,34 @@ retry:
3248 break; 3311 break;
3249 } 3312 }
3250 3313
3314 spin_lock(&mapping->private_lock);
3315 if (!PagePrivate(page)) {
3316 spin_unlock(&mapping->private_lock);
3317 continue;
3318 }
3319
3251 eb = (struct extent_buffer *)page->private; 3320 eb = (struct extent_buffer *)page->private;
3321
3322 /*
3323 * Shouldn't happen and normally this would be a BUG_ON
3324 * but no sense in crashing the users box for something
3325 * we can survive anyway.
3326 */
3252 if (!eb) { 3327 if (!eb) {
3328 spin_unlock(&mapping->private_lock);
3253 WARN_ON(1); 3329 WARN_ON(1);
3254 continue; 3330 continue;
3255 } 3331 }
3256 3332
3257 if (eb == prev_eb) 3333 if (eb == prev_eb) {
3334 spin_unlock(&mapping->private_lock);
3258 continue; 3335 continue;
3336 }
3259 3337
3260 if (!atomic_inc_not_zero(&eb->refs)) { 3338 ret = atomic_inc_not_zero(&eb->refs);
3261 WARN_ON(1); 3339 spin_unlock(&mapping->private_lock);
3340 if (!ret)
3262 continue; 3341 continue;
3263 }
3264 3342
3265 prev_eb = eb; 3343 prev_eb = eb;
3266 ret = lock_extent_buffer_for_io(eb, fs_info, &epd); 3344 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
@@ -3451,7 +3529,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
3451 if (epd->sync_io) 3529 if (epd->sync_io)
3452 rw = WRITE_SYNC; 3530 rw = WRITE_SYNC;
3453 3531
3454 ret = submit_one_bio(rw, epd->bio, 0, 0); 3532 ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
3455 BUG_ON(ret < 0); /* -ENOMEM */ 3533 BUG_ON(ret < 0); /* -ENOMEM */
3456 epd->bio = NULL; 3534 epd->bio = NULL;
3457 } 3535 }
@@ -3474,6 +3552,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
3474 .get_extent = get_extent, 3552 .get_extent = get_extent,
3475 .extent_locked = 0, 3553 .extent_locked = 0,
3476 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 3554 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3555 .bio_flags = 0,
3477 }; 3556 };
3478 3557
3479 ret = __extent_writepage(page, wbc, &epd); 3558 ret = __extent_writepage(page, wbc, &epd);
@@ -3498,6 +3577,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
3498 .get_extent = get_extent, 3577 .get_extent = get_extent,
3499 .extent_locked = 1, 3578 .extent_locked = 1,
3500 .sync_io = mode == WB_SYNC_ALL, 3579 .sync_io = mode == WB_SYNC_ALL,
3580 .bio_flags = 0,
3501 }; 3581 };
3502 struct writeback_control wbc_writepages = { 3582 struct writeback_control wbc_writepages = {
3503 .sync_mode = mode, 3583 .sync_mode = mode,
@@ -3537,6 +3617,7 @@ int extent_writepages(struct extent_io_tree *tree,
3537 .get_extent = get_extent, 3617 .get_extent = get_extent,
3538 .extent_locked = 0, 3618 .extent_locked = 0,
3539 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 3619 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3620 .bio_flags = 0,
3540 }; 3621 };
3541 3622
3542 ret = extent_write_cache_pages(tree, mapping, wbc, 3623 ret = extent_write_cache_pages(tree, mapping, wbc,
@@ -3914,18 +3995,6 @@ out:
3914 return ret; 3995 return ret;
3915} 3996}
3916 3997
3917inline struct page *extent_buffer_page(struct extent_buffer *eb,
3918 unsigned long i)
3919{
3920 return eb->pages[i];
3921}
3922
3923inline unsigned long num_extent_pages(u64 start, u64 len)
3924{
3925 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
3926 (start >> PAGE_CACHE_SHIFT);
3927}
3928
3929static void __free_extent_buffer(struct extent_buffer *eb) 3998static void __free_extent_buffer(struct extent_buffer *eb)
3930{ 3999{
3931#if LEAK_DEBUG 4000#if LEAK_DEBUG
@@ -4041,7 +4110,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
4041 4110
4042 return eb; 4111 return eb;
4043err: 4112err:
4044 for (i--; i > 0; i--) 4113 for (i--; i >= 0; i--)
4045 __free_page(eb->pages[i]); 4114 __free_page(eb->pages[i]);
4046 __free_extent_buffer(eb); 4115 __free_extent_buffer(eb);
4047 return NULL; 4116 return NULL;
@@ -4186,10 +4255,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
4186 4255
4187 for (i = 0; i < num_pages; i++, index++) { 4256 for (i = 0; i < num_pages; i++, index++) {
4188 p = find_or_create_page(mapping, index, GFP_NOFS); 4257 p = find_or_create_page(mapping, index, GFP_NOFS);
4189 if (!p) { 4258 if (!p)
4190 WARN_ON(1);
4191 goto free_eb; 4259 goto free_eb;
4192 }
4193 4260
4194 spin_lock(&mapping->private_lock); 4261 spin_lock(&mapping->private_lock);
4195 if (PagePrivate(p)) { 4262 if (PagePrivate(p)) {
@@ -4332,7 +4399,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
4332 4399
4333 /* Should be safe to release our pages at this point */ 4400 /* Should be safe to release our pages at this point */
4334 btrfs_release_extent_buffer_page(eb, 0); 4401 btrfs_release_extent_buffer_page(eb, 0);
4335
4336 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4402 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4337 return 1; 4403 return 1;
4338 } 4404 }