aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c453
1 files changed, 302 insertions, 151 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3955e475ceec..a389820d158b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1693,6 +1693,7 @@ again:
1693 * shortening the size of the delalloc range we're searching 1693 * shortening the size of the delalloc range we're searching
1694 */ 1694 */
1695 free_extent_state(cached_state); 1695 free_extent_state(cached_state);
1696 cached_state = NULL;
1696 if (!loops) { 1697 if (!loops) {
1697 max_bytes = PAGE_CACHE_SIZE; 1698 max_bytes = PAGE_CACHE_SIZE;
1698 loops = 1; 1699 loops = 1;
@@ -2353,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2353{ 2354{
2354 int uptodate = (err == 0); 2355 int uptodate = (err == 0);
2355 struct extent_io_tree *tree; 2356 struct extent_io_tree *tree;
2356 int ret; 2357 int ret = 0;
2357 2358
2358 tree = &BTRFS_I(page->mapping->host)->io_tree; 2359 tree = &BTRFS_I(page->mapping->host)->io_tree;
2359 2360
@@ -2367,6 +2368,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2367 if (!uptodate) { 2368 if (!uptodate) {
2368 ClearPageUptodate(page); 2369 ClearPageUptodate(page);
2369 SetPageError(page); 2370 SetPageError(page);
2371 ret = ret < 0 ? ret : -EIO;
2372 mapping_set_error(page->mapping, ret);
2370 } 2373 }
2371 return 0; 2374 return 0;
2372} 2375}
@@ -3098,143 +3101,130 @@ static noinline void update_nr_written(struct page *page,
3098} 3101}
3099 3102
3100/* 3103/*
3101 * the writepage semantics are similar to regular writepage. extent 3104 * helper for __extent_writepage, doing all of the delayed allocation setup.
3102 * records are inserted to lock ranges in the tree, and as dirty areas 3105 *
3103 * are found, they are marked writeback. Then the lock bits are removed 3106 * This returns 1 if our fill_delalloc function did all the work required
3104 * and the end_io handler clears the writeback ranges 3107 * to write the page (copy into inline extent). In this case the IO has
3108 * been started and the page is already unlocked.
3109 *
3110 * This returns 0 if all went well (page still locked)
3111 * This returns < 0 if there were errors (page still locked)
3105 */ 3112 */
3106static int __extent_writepage(struct page *page, struct writeback_control *wbc, 3113static noinline_for_stack int writepage_delalloc(struct inode *inode,
3107 void *data) 3114 struct page *page, struct writeback_control *wbc,
3115 struct extent_page_data *epd,
3116 u64 delalloc_start,
3117 unsigned long *nr_written)
3118{
3119 struct extent_io_tree *tree = epd->tree;
3120 u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
3121 u64 nr_delalloc;
3122 u64 delalloc_to_write = 0;
3123 u64 delalloc_end = 0;
3124 int ret;
3125 int page_started = 0;
3126
3127 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3128 return 0;
3129
3130 while (delalloc_end < page_end) {
3131 nr_delalloc = find_lock_delalloc_range(inode, tree,
3132 page,
3133 &delalloc_start,
3134 &delalloc_end,
3135 128 * 1024 * 1024);
3136 if (nr_delalloc == 0) {
3137 delalloc_start = delalloc_end + 1;
3138 continue;
3139 }
3140 ret = tree->ops->fill_delalloc(inode, page,
3141 delalloc_start,
3142 delalloc_end,
3143 &page_started,
3144 nr_written);
3145 /* File system has been set read-only */
3146 if (ret) {
3147 SetPageError(page);
3148 /* fill_delalloc should be return < 0 for error
3149 * but just in case, we use > 0 here meaning the
3150 * IO is started, so we don't want to return > 0
3151 * unless things are going well.
3152 */
3153 ret = ret < 0 ? ret : -EIO;
3154 goto done;
3155 }
3156 /*
3157 * delalloc_end is already one less than the total
3158 * length, so we don't subtract one from
3159 * PAGE_CACHE_SIZE
3160 */
3161 delalloc_to_write += (delalloc_end - delalloc_start +
3162 PAGE_CACHE_SIZE) >>
3163 PAGE_CACHE_SHIFT;
3164 delalloc_start = delalloc_end + 1;
3165 }
3166 if (wbc->nr_to_write < delalloc_to_write) {
3167 int thresh = 8192;
3168
3169 if (delalloc_to_write < thresh * 2)
3170 thresh = delalloc_to_write;
3171 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3172 thresh);
3173 }
3174
3175 /* did the fill delalloc function already unlock and start
3176 * the IO?
3177 */
3178 if (page_started) {
3179 /*
3180 * we've unlocked the page, so we can't update
3181 * the mapping's writeback index, just update
3182 * nr_to_write.
3183 */
3184 wbc->nr_to_write -= *nr_written;
3185 return 1;
3186 }
3187
3188 ret = 0;
3189
3190done:
3191 return ret;
3192}
3193
3194/*
3195 * helper for __extent_writepage. This calls the writepage start hooks,
3196 * and does the loop to map the page into extents and bios.
3197 *
3198 * We return 1 if the IO is started and the page is unlocked,
3199 * 0 if all went well (page still locked)
3200 * < 0 if there were errors (page still locked)
3201 */
3202static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3203 struct page *page,
3204 struct writeback_control *wbc,
3205 struct extent_page_data *epd,
3206 loff_t i_size,
3207 unsigned long nr_written,
3208 int write_flags, int *nr_ret)
3108{ 3209{
3109 struct inode *inode = page->mapping->host;
3110 struct extent_page_data *epd = data;
3111 struct extent_io_tree *tree = epd->tree; 3210 struct extent_io_tree *tree = epd->tree;
3112 u64 start = page_offset(page); 3211 u64 start = page_offset(page);
3113 u64 delalloc_start;
3114 u64 page_end = start + PAGE_CACHE_SIZE - 1; 3212 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3115 u64 end; 3213 u64 end;
3116 u64 cur = start; 3214 u64 cur = start;
3117 u64 extent_offset; 3215 u64 extent_offset;
3118 u64 last_byte = i_size_read(inode);
3119 u64 block_start; 3216 u64 block_start;
3120 u64 iosize; 3217 u64 iosize;
3121 sector_t sector; 3218 sector_t sector;
3122 struct extent_state *cached_state = NULL; 3219 struct extent_state *cached_state = NULL;
3123 struct extent_map *em; 3220 struct extent_map *em;
3124 struct block_device *bdev; 3221 struct block_device *bdev;
3125 int ret;
3126 int nr = 0;
3127 size_t pg_offset = 0; 3222 size_t pg_offset = 0;
3128 size_t blocksize; 3223 size_t blocksize;
3129 loff_t i_size = i_size_read(inode); 3224 int ret = 0;
3130 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 3225 int nr = 0;
3131 u64 nr_delalloc; 3226 bool compressed;
3132 u64 delalloc_end;
3133 int page_started;
3134 int compressed;
3135 int write_flags;
3136 unsigned long nr_written = 0;
3137 bool fill_delalloc = true;
3138
3139 if (wbc->sync_mode == WB_SYNC_ALL)
3140 write_flags = WRITE_SYNC;
3141 else
3142 write_flags = WRITE;
3143
3144 trace___extent_writepage(page, inode, wbc);
3145
3146 WARN_ON(!PageLocked(page));
3147
3148 ClearPageError(page);
3149
3150 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3151 if (page->index > end_index ||
3152 (page->index == end_index && !pg_offset)) {
3153 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3154 unlock_page(page);
3155 return 0;
3156 }
3157
3158 if (page->index == end_index) {
3159 char *userpage;
3160
3161 userpage = kmap_atomic(page);
3162 memset(userpage + pg_offset, 0,
3163 PAGE_CACHE_SIZE - pg_offset);
3164 kunmap_atomic(userpage);
3165 flush_dcache_page(page);
3166 }
3167 pg_offset = 0;
3168
3169 set_page_extent_mapped(page);
3170
3171 if (!tree->ops || !tree->ops->fill_delalloc)
3172 fill_delalloc = false;
3173
3174 delalloc_start = start;
3175 delalloc_end = 0;
3176 page_started = 0;
3177 if (!epd->extent_locked && fill_delalloc) {
3178 u64 delalloc_to_write = 0;
3179 /*
3180 * make sure the wbc mapping index is at least updated
3181 * to this page.
3182 */
3183 update_nr_written(page, wbc, 0);
3184
3185 while (delalloc_end < page_end) {
3186 nr_delalloc = find_lock_delalloc_range(inode, tree,
3187 page,
3188 &delalloc_start,
3189 &delalloc_end,
3190 128 * 1024 * 1024);
3191 if (nr_delalloc == 0) {
3192 delalloc_start = delalloc_end + 1;
3193 continue;
3194 }
3195 ret = tree->ops->fill_delalloc(inode, page,
3196 delalloc_start,
3197 delalloc_end,
3198 &page_started,
3199 &nr_written);
3200 /* File system has been set read-only */
3201 if (ret) {
3202 SetPageError(page);
3203 goto done;
3204 }
3205 /*
3206 * delalloc_end is already one less than the total
3207 * length, so we don't subtract one from
3208 * PAGE_CACHE_SIZE
3209 */
3210 delalloc_to_write += (delalloc_end - delalloc_start +
3211 PAGE_CACHE_SIZE) >>
3212 PAGE_CACHE_SHIFT;
3213 delalloc_start = delalloc_end + 1;
3214 }
3215 if (wbc->nr_to_write < delalloc_to_write) {
3216 int thresh = 8192;
3217
3218 if (delalloc_to_write < thresh * 2)
3219 thresh = delalloc_to_write;
3220 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3221 thresh);
3222 }
3223 3227
3224 /* did the fill delalloc function already unlock and start
3225 * the IO?
3226 */
3227 if (page_started) {
3228 ret = 0;
3229 /*
3230 * we've unlocked the page, so we can't update
3231 * the mapping's writeback index, just update
3232 * nr_to_write.
3233 */
3234 wbc->nr_to_write -= nr_written;
3235 goto done_unlocked;
3236 }
3237 }
3238 if (tree->ops && tree->ops->writepage_start_hook) { 3228 if (tree->ops && tree->ops->writepage_start_hook) {
3239 ret = tree->ops->writepage_start_hook(page, start, 3229 ret = tree->ops->writepage_start_hook(page, start,
3240 page_end); 3230 page_end);
@@ -3244,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3244 wbc->pages_skipped++; 3234 wbc->pages_skipped++;
3245 else 3235 else
3246 redirty_page_for_writepage(wbc, page); 3236 redirty_page_for_writepage(wbc, page);
3237
3247 update_nr_written(page, wbc, nr_written); 3238 update_nr_written(page, wbc, nr_written);
3248 unlock_page(page); 3239 unlock_page(page);
3249 ret = 0; 3240 ret = 1;
3250 goto done_unlocked; 3241 goto done_unlocked;
3251 } 3242 }
3252 } 3243 }
@@ -3258,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3258 update_nr_written(page, wbc, nr_written + 1); 3249 update_nr_written(page, wbc, nr_written + 1);
3259 3250
3260 end = page_end; 3251 end = page_end;
3261 if (last_byte <= start) { 3252 if (i_size <= start) {
3262 if (tree->ops && tree->ops->writepage_end_io_hook) 3253 if (tree->ops && tree->ops->writepage_end_io_hook)
3263 tree->ops->writepage_end_io_hook(page, start, 3254 tree->ops->writepage_end_io_hook(page, start,
3264 page_end, NULL, 1); 3255 page_end, NULL, 1);
@@ -3268,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3268 blocksize = inode->i_sb->s_blocksize; 3259 blocksize = inode->i_sb->s_blocksize;
3269 3260
3270 while (cur <= end) { 3261 while (cur <= end) {
3271 if (cur >= last_byte) { 3262 u64 em_end;
3263 if (cur >= i_size) {
3272 if (tree->ops && tree->ops->writepage_end_io_hook) 3264 if (tree->ops && tree->ops->writepage_end_io_hook)
3273 tree->ops->writepage_end_io_hook(page, cur, 3265 tree->ops->writepage_end_io_hook(page, cur,
3274 page_end, NULL, 1); 3266 page_end, NULL, 1);
@@ -3278,13 +3270,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3278 end - cur + 1, 1); 3270 end - cur + 1, 1);
3279 if (IS_ERR_OR_NULL(em)) { 3271 if (IS_ERR_OR_NULL(em)) {
3280 SetPageError(page); 3272 SetPageError(page);
3273 ret = PTR_ERR_OR_ZERO(em);
3281 break; 3274 break;
3282 } 3275 }
3283 3276
3284 extent_offset = cur - em->start; 3277 extent_offset = cur - em->start;
3285 BUG_ON(extent_map_end(em) <= cur); 3278 em_end = extent_map_end(em);
3279 BUG_ON(em_end <= cur);
3286 BUG_ON(end < cur); 3280 BUG_ON(end < cur);
3287 iosize = min(extent_map_end(em) - cur, end - cur + 1); 3281 iosize = min(em_end - cur, end - cur + 1);
3288 iosize = ALIGN(iosize, blocksize); 3282 iosize = ALIGN(iosize, blocksize);
3289 sector = (em->block_start + extent_offset) >> 9; 3283 sector = (em->block_start + extent_offset) >> 9;
3290 bdev = em->bdev; 3284 bdev = em->bdev;
@@ -3320,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3320 pg_offset += iosize; 3314 pg_offset += iosize;
3321 continue; 3315 continue;
3322 } 3316 }
3323 /* leave this out until we have a page_mkwrite call */
3324 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
3325 EXTENT_DIRTY, 0, NULL)) {
3326 cur = cur + iosize;
3327 pg_offset += iosize;
3328 continue;
3329 }
3330 3317
3331 if (tree->ops && tree->ops->writepage_io_hook) { 3318 if (tree->ops && tree->ops->writepage_io_hook) {
3332 ret = tree->ops->writepage_io_hook(page, cur, 3319 ret = tree->ops->writepage_io_hook(page, cur,
@@ -3337,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3337 if (ret) { 3324 if (ret) {
3338 SetPageError(page); 3325 SetPageError(page);
3339 } else { 3326 } else {
3340 unsigned long max_nr = end_index + 1; 3327 unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
3341 3328
3342 set_range_writeback(tree, cur, cur + iosize - 1); 3329 set_range_writeback(tree, cur, cur + iosize - 1);
3343 if (!PageWriteback(page)) { 3330 if (!PageWriteback(page)) {
@@ -3359,17 +3346,94 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3359 nr++; 3346 nr++;
3360 } 3347 }
3361done: 3348done:
3349 *nr_ret = nr;
3350
3351done_unlocked:
3352
3353 /* drop our reference on any cached states */
3354 free_extent_state(cached_state);
3355 return ret;
3356}
3357
3358/*
3359 * the writepage semantics are similar to regular writepage. extent
3360 * records are inserted to lock ranges in the tree, and as dirty areas
3361 * are found, they are marked writeback. Then the lock bits are removed
3362 * and the end_io handler clears the writeback ranges
3363 */
3364static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3365 void *data)
3366{
3367 struct inode *inode = page->mapping->host;
3368 struct extent_page_data *epd = data;
3369 u64 start = page_offset(page);
3370 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3371 int ret;
3372 int nr = 0;
3373 size_t pg_offset = 0;
3374 loff_t i_size = i_size_read(inode);
3375 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
3376 int write_flags;
3377 unsigned long nr_written = 0;
3378
3379 if (wbc->sync_mode == WB_SYNC_ALL)
3380 write_flags = WRITE_SYNC;
3381 else
3382 write_flags = WRITE;
3383
3384 trace___extent_writepage(page, inode, wbc);
3385
3386 WARN_ON(!PageLocked(page));
3387
3388 ClearPageError(page);
3389
3390 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3391 if (page->index > end_index ||
3392 (page->index == end_index && !pg_offset)) {
3393 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3394 unlock_page(page);
3395 return 0;
3396 }
3397
3398 if (page->index == end_index) {
3399 char *userpage;
3400
3401 userpage = kmap_atomic(page);
3402 memset(userpage + pg_offset, 0,
3403 PAGE_CACHE_SIZE - pg_offset);
3404 kunmap_atomic(userpage);
3405 flush_dcache_page(page);
3406 }
3407
3408 pg_offset = 0;
3409
3410 set_page_extent_mapped(page);
3411
3412 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3413 if (ret == 1)
3414 goto done_unlocked;
3415 if (ret)
3416 goto done;
3417
3418 ret = __extent_writepage_io(inode, page, wbc, epd,
3419 i_size, nr_written, write_flags, &nr);
3420 if (ret == 1)
3421 goto done_unlocked;
3422
3423done:
3362 if (nr == 0) { 3424 if (nr == 0) {
3363 /* make sure the mapping tag for page dirty gets cleared */ 3425 /* make sure the mapping tag for page dirty gets cleared */
3364 set_page_writeback(page); 3426 set_page_writeback(page);
3365 end_page_writeback(page); 3427 end_page_writeback(page);
3366 } 3428 }
3429 if (PageError(page)) {
3430 ret = ret < 0 ? ret : -EIO;
3431 end_extent_writepage(page, ret, start, page_end);
3432 }
3367 unlock_page(page); 3433 unlock_page(page);
3434 return ret;
3368 3435
3369done_unlocked: 3436done_unlocked:
3370
3371 /* drop our reference on any cached states */
3372 free_extent_state(cached_state);
3373 return 0; 3437 return 0;
3374} 3438}
3375 3439
@@ -3385,9 +3449,10 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3385 TASK_UNINTERRUPTIBLE); 3449 TASK_UNINTERRUPTIBLE);
3386} 3450}
3387 3451
3388static int lock_extent_buffer_for_io(struct extent_buffer *eb, 3452static noinline_for_stack int
3389 struct btrfs_fs_info *fs_info, 3453lock_extent_buffer_for_io(struct extent_buffer *eb,
3390 struct extent_page_data *epd) 3454 struct btrfs_fs_info *fs_info,
3455 struct extent_page_data *epd)
3391{ 3456{
3392 unsigned long i, num_pages; 3457 unsigned long i, num_pages;
3393 int flush = 0; 3458 int flush = 0;
@@ -3458,7 +3523,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
3458static void end_extent_buffer_writeback(struct extent_buffer *eb) 3523static void end_extent_buffer_writeback(struct extent_buffer *eb)
3459{ 3524{
3460 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); 3525 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3461 smp_mb__after_clear_bit(); 3526 smp_mb__after_atomic();
3462 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); 3527 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3463} 3528}
3464 3529
@@ -3492,7 +3557,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3492 bio_put(bio); 3557 bio_put(bio);
3493} 3558}
3494 3559
3495static int write_one_eb(struct extent_buffer *eb, 3560static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3496 struct btrfs_fs_info *fs_info, 3561 struct btrfs_fs_info *fs_info,
3497 struct writeback_control *wbc, 3562 struct writeback_control *wbc,
3498 struct extent_page_data *epd) 3563 struct extent_page_data *epd)
@@ -3690,6 +3755,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3690 struct inode *inode = mapping->host; 3755 struct inode *inode = mapping->host;
3691 int ret = 0; 3756 int ret = 0;
3692 int done = 0; 3757 int done = 0;
3758 int err = 0;
3693 int nr_to_write_done = 0; 3759 int nr_to_write_done = 0;
3694 struct pagevec pvec; 3760 struct pagevec pvec;
3695 int nr_pages; 3761 int nr_pages;
@@ -3776,8 +3842,8 @@ retry:
3776 unlock_page(page); 3842 unlock_page(page);
3777 ret = 0; 3843 ret = 0;
3778 } 3844 }
3779 if (ret) 3845 if (!err && ret < 0)
3780 done = 1; 3846 err = ret;
3781 3847
3782 /* 3848 /*
3783 * the filesystem may choose to bump up nr_to_write. 3849 * the filesystem may choose to bump up nr_to_write.
@@ -3789,7 +3855,7 @@ retry:
3789 pagevec_release(&pvec); 3855 pagevec_release(&pvec);
3790 cond_resched(); 3856 cond_resched();
3791 } 3857 }
3792 if (!scanned && !done) { 3858 if (!scanned && !done && !err) {
3793 /* 3859 /*
3794 * We hit the last page and there is more work to be done: wrap 3860 * We hit the last page and there is more work to be done: wrap
3795 * back to the start of the file 3861 * back to the start of the file
@@ -3799,7 +3865,7 @@ retry:
3799 goto retry; 3865 goto retry;
3800 } 3866 }
3801 btrfs_add_delayed_iput(inode); 3867 btrfs_add_delayed_iput(inode);
3802 return ret; 3868 return err;
3803} 3869}
3804 3870
3805static void flush_epd_write_bio(struct extent_page_data *epd) 3871static void flush_epd_write_bio(struct extent_page_data *epd)
@@ -4510,7 +4576,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
4510 spin_unlock(&eb->refs_lock); 4576 spin_unlock(&eb->refs_lock);
4511} 4577}
4512 4578
4513static void mark_extent_buffer_accessed(struct extent_buffer *eb) 4579static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4580 struct page *accessed)
4514{ 4581{
4515 unsigned long num_pages, i; 4582 unsigned long num_pages, i;
4516 4583
@@ -4519,7 +4586,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
4519 num_pages = num_extent_pages(eb->start, eb->len); 4586 num_pages = num_extent_pages(eb->start, eb->len);
4520 for (i = 0; i < num_pages; i++) { 4587 for (i = 0; i < num_pages; i++) {
4521 struct page *p = extent_buffer_page(eb, i); 4588 struct page *p = extent_buffer_page(eb, i);
4522 mark_page_accessed(p); 4589 if (p != accessed)
4590 mark_page_accessed(p);
4523 } 4591 }
4524} 4592}
4525 4593
@@ -4533,7 +4601,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4533 start >> PAGE_CACHE_SHIFT); 4601 start >> PAGE_CACHE_SHIFT);
4534 if (eb && atomic_inc_not_zero(&eb->refs)) { 4602 if (eb && atomic_inc_not_zero(&eb->refs)) {
4535 rcu_read_unlock(); 4603 rcu_read_unlock();
4536 mark_extent_buffer_accessed(eb); 4604 mark_extent_buffer_accessed(eb, NULL);
4537 return eb; 4605 return eb;
4538 } 4606 }
4539 rcu_read_unlock(); 4607 rcu_read_unlock();
@@ -4541,6 +4609,53 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4541 return NULL; 4609 return NULL;
4542} 4610}
4543 4611
4612#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4613struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4614 u64 start, unsigned long len)
4615{
4616 struct extent_buffer *eb, *exists = NULL;
4617 int ret;
4618
4619 eb = find_extent_buffer(fs_info, start);
4620 if (eb)
4621 return eb;
4622 eb = alloc_dummy_extent_buffer(start, len);
4623 if (!eb)
4624 return NULL;
4625 eb->fs_info = fs_info;
4626again:
4627 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4628 if (ret)
4629 goto free_eb;
4630 spin_lock(&fs_info->buffer_lock);
4631 ret = radix_tree_insert(&fs_info->buffer_radix,
4632 start >> PAGE_CACHE_SHIFT, eb);
4633 spin_unlock(&fs_info->buffer_lock);
4634 radix_tree_preload_end();
4635 if (ret == -EEXIST) {
4636 exists = find_extent_buffer(fs_info, start);
4637 if (exists)
4638 goto free_eb;
4639 else
4640 goto again;
4641 }
4642 check_buffer_tree_ref(eb);
4643 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4644
4645 /*
4646 * We will free dummy extent buffer's if they come into
4647 * free_extent_buffer with a ref count of 2, but if we are using this we
4648 * want the buffers to stay in memory until we're done with them, so
4649 * bump the ref count again.
4650 */
4651 atomic_inc(&eb->refs);
4652 return eb;
4653free_eb:
4654 btrfs_release_extent_buffer(eb);
4655 return exists;
4656}
4657#endif
4658
4544struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 4659struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4545 u64 start, unsigned long len) 4660 u64 start, unsigned long len)
4546{ 4661{
@@ -4581,7 +4696,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4581 spin_unlock(&mapping->private_lock); 4696 spin_unlock(&mapping->private_lock);
4582 unlock_page(p); 4697 unlock_page(p);
4583 page_cache_release(p); 4698 page_cache_release(p);
4584 mark_extent_buffer_accessed(exists); 4699 mark_extent_buffer_accessed(exists, p);
4585 goto free_eb; 4700 goto free_eb;
4586 } 4701 }
4587 4702
@@ -4596,7 +4711,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4596 attach_extent_buffer_page(eb, p); 4711 attach_extent_buffer_page(eb, p);
4597 spin_unlock(&mapping->private_lock); 4712 spin_unlock(&mapping->private_lock);
4598 WARN_ON(PageDirty(p)); 4713 WARN_ON(PageDirty(p));
4599 mark_page_accessed(p);
4600 eb->pages[i] = p; 4714 eb->pages[i] = p;
4601 if (!PageUptodate(p)) 4715 if (!PageUptodate(p))
4602 uptodate = 0; 4716 uptodate = 0;
@@ -4954,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
4954 } 5068 }
4955} 5069}
4956 5070
5071int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5072 unsigned long start,
5073 unsigned long len)
5074{
5075 size_t cur;
5076 size_t offset;
5077 struct page *page;
5078 char *kaddr;
5079 char __user *dst = (char __user *)dstv;
5080 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5081 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5082 int ret = 0;
5083
5084 WARN_ON(start > eb->len);
5085 WARN_ON(start + len > eb->start + eb->len);
5086
5087 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5088
5089 while (len > 0) {
5090 page = extent_buffer_page(eb, i);
5091
5092 cur = min(len, (PAGE_CACHE_SIZE - offset));
5093 kaddr = page_address(page);
5094 if (copy_to_user(dst, kaddr + offset, cur)) {
5095 ret = -EFAULT;
5096 break;
5097 }
5098
5099 dst += cur;
5100 len -= cur;
5101 offset = 0;
5102 i++;
5103 }
5104
5105 return ret;
5106}
5107
4957int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 5108int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
4958 unsigned long min_len, char **map, 5109 unsigned long min_len, char **map,
4959 unsigned long *map_start, 5110 unsigned long *map_start,