diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 453 |
1 files changed, 302 insertions, 151 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3955e475ceec..a389820d158b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1693,6 +1693,7 @@ again: | |||
1693 | * shortening the size of the delalloc range we're searching | 1693 | * shortening the size of the delalloc range we're searching |
1694 | */ | 1694 | */ |
1695 | free_extent_state(cached_state); | 1695 | free_extent_state(cached_state); |
1696 | cached_state = NULL; | ||
1696 | if (!loops) { | 1697 | if (!loops) { |
1697 | max_bytes = PAGE_CACHE_SIZE; | 1698 | max_bytes = PAGE_CACHE_SIZE; |
1698 | loops = 1; | 1699 | loops = 1; |
@@ -2353,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
2353 | { | 2354 | { |
2354 | int uptodate = (err == 0); | 2355 | int uptodate = (err == 0); |
2355 | struct extent_io_tree *tree; | 2356 | struct extent_io_tree *tree; |
2356 | int ret; | 2357 | int ret = 0; |
2357 | 2358 | ||
2358 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2359 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2359 | 2360 | ||
@@ -2367,6 +2368,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
2367 | if (!uptodate) { | 2368 | if (!uptodate) { |
2368 | ClearPageUptodate(page); | 2369 | ClearPageUptodate(page); |
2369 | SetPageError(page); | 2370 | SetPageError(page); |
2371 | ret = ret < 0 ? ret : -EIO; | ||
2372 | mapping_set_error(page->mapping, ret); | ||
2370 | } | 2373 | } |
2371 | return 0; | 2374 | return 0; |
2372 | } | 2375 | } |
@@ -3098,143 +3101,130 @@ static noinline void update_nr_written(struct page *page, | |||
3098 | } | 3101 | } |
3099 | 3102 | ||
3100 | /* | 3103 | /* |
3101 | * the writepage semantics are similar to regular writepage. extent | 3104 | * helper for __extent_writepage, doing all of the delayed allocation setup. |
3102 | * records are inserted to lock ranges in the tree, and as dirty areas | 3105 | * |
3103 | * are found, they are marked writeback. Then the lock bits are removed | 3106 | * This returns 1 if our fill_delalloc function did all the work required |
3104 | * and the end_io handler clears the writeback ranges | 3107 | * to write the page (copy into inline extent). In this case the IO has |
3108 | * been started and the page is already unlocked. | ||
3109 | * | ||
3110 | * This returns 0 if all went well (page still locked) | ||
3111 | * This returns < 0 if there were errors (page still locked) | ||
3105 | */ | 3112 | */ |
3106 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | 3113 | static noinline_for_stack int writepage_delalloc(struct inode *inode, |
3107 | void *data) | 3114 | struct page *page, struct writeback_control *wbc, |
3115 | struct extent_page_data *epd, | ||
3116 | u64 delalloc_start, | ||
3117 | unsigned long *nr_written) | ||
3118 | { | ||
3119 | struct extent_io_tree *tree = epd->tree; | ||
3120 | u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1; | ||
3121 | u64 nr_delalloc; | ||
3122 | u64 delalloc_to_write = 0; | ||
3123 | u64 delalloc_end = 0; | ||
3124 | int ret; | ||
3125 | int page_started = 0; | ||
3126 | |||
3127 | if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc) | ||
3128 | return 0; | ||
3129 | |||
3130 | while (delalloc_end < page_end) { | ||
3131 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
3132 | page, | ||
3133 | &delalloc_start, | ||
3134 | &delalloc_end, | ||
3135 | 128 * 1024 * 1024); | ||
3136 | if (nr_delalloc == 0) { | ||
3137 | delalloc_start = delalloc_end + 1; | ||
3138 | continue; | ||
3139 | } | ||
3140 | ret = tree->ops->fill_delalloc(inode, page, | ||
3141 | delalloc_start, | ||
3142 | delalloc_end, | ||
3143 | &page_started, | ||
3144 | nr_written); | ||
3145 | /* File system has been set read-only */ | ||
3146 | if (ret) { | ||
3147 | SetPageError(page); | ||
3148 | /* fill_delalloc should be return < 0 for error | ||
3149 | * but just in case, we use > 0 here meaning the | ||
3150 | * IO is started, so we don't want to return > 0 | ||
3151 | * unless things are going well. | ||
3152 | */ | ||
3153 | ret = ret < 0 ? ret : -EIO; | ||
3154 | goto done; | ||
3155 | } | ||
3156 | /* | ||
3157 | * delalloc_end is already one less than the total | ||
3158 | * length, so we don't subtract one from | ||
3159 | * PAGE_CACHE_SIZE | ||
3160 | */ | ||
3161 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
3162 | PAGE_CACHE_SIZE) >> | ||
3163 | PAGE_CACHE_SHIFT; | ||
3164 | delalloc_start = delalloc_end + 1; | ||
3165 | } | ||
3166 | if (wbc->nr_to_write < delalloc_to_write) { | ||
3167 | int thresh = 8192; | ||
3168 | |||
3169 | if (delalloc_to_write < thresh * 2) | ||
3170 | thresh = delalloc_to_write; | ||
3171 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
3172 | thresh); | ||
3173 | } | ||
3174 | |||
3175 | /* did the fill delalloc function already unlock and start | ||
3176 | * the IO? | ||
3177 | */ | ||
3178 | if (page_started) { | ||
3179 | /* | ||
3180 | * we've unlocked the page, so we can't update | ||
3181 | * the mapping's writeback index, just update | ||
3182 | * nr_to_write. | ||
3183 | */ | ||
3184 | wbc->nr_to_write -= *nr_written; | ||
3185 | return 1; | ||
3186 | } | ||
3187 | |||
3188 | ret = 0; | ||
3189 | |||
3190 | done: | ||
3191 | return ret; | ||
3192 | } | ||
3193 | |||
3194 | /* | ||
3195 | * helper for __extent_writepage. This calls the writepage start hooks, | ||
3196 | * and does the loop to map the page into extents and bios. | ||
3197 | * | ||
3198 | * We return 1 if the IO is started and the page is unlocked, | ||
3199 | * 0 if all went well (page still locked) | ||
3200 | * < 0 if there were errors (page still locked) | ||
3201 | */ | ||
3202 | static noinline_for_stack int __extent_writepage_io(struct inode *inode, | ||
3203 | struct page *page, | ||
3204 | struct writeback_control *wbc, | ||
3205 | struct extent_page_data *epd, | ||
3206 | loff_t i_size, | ||
3207 | unsigned long nr_written, | ||
3208 | int write_flags, int *nr_ret) | ||
3108 | { | 3209 | { |
3109 | struct inode *inode = page->mapping->host; | ||
3110 | struct extent_page_data *epd = data; | ||
3111 | struct extent_io_tree *tree = epd->tree; | 3210 | struct extent_io_tree *tree = epd->tree; |
3112 | u64 start = page_offset(page); | 3211 | u64 start = page_offset(page); |
3113 | u64 delalloc_start; | ||
3114 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | 3212 | u64 page_end = start + PAGE_CACHE_SIZE - 1; |
3115 | u64 end; | 3213 | u64 end; |
3116 | u64 cur = start; | 3214 | u64 cur = start; |
3117 | u64 extent_offset; | 3215 | u64 extent_offset; |
3118 | u64 last_byte = i_size_read(inode); | ||
3119 | u64 block_start; | 3216 | u64 block_start; |
3120 | u64 iosize; | 3217 | u64 iosize; |
3121 | sector_t sector; | 3218 | sector_t sector; |
3122 | struct extent_state *cached_state = NULL; | 3219 | struct extent_state *cached_state = NULL; |
3123 | struct extent_map *em; | 3220 | struct extent_map *em; |
3124 | struct block_device *bdev; | 3221 | struct block_device *bdev; |
3125 | int ret; | ||
3126 | int nr = 0; | ||
3127 | size_t pg_offset = 0; | 3222 | size_t pg_offset = 0; |
3128 | size_t blocksize; | 3223 | size_t blocksize; |
3129 | loff_t i_size = i_size_read(inode); | 3224 | int ret = 0; |
3130 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 3225 | int nr = 0; |
3131 | u64 nr_delalloc; | 3226 | bool compressed; |
3132 | u64 delalloc_end; | ||
3133 | int page_started; | ||
3134 | int compressed; | ||
3135 | int write_flags; | ||
3136 | unsigned long nr_written = 0; | ||
3137 | bool fill_delalloc = true; | ||
3138 | |||
3139 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3140 | write_flags = WRITE_SYNC; | ||
3141 | else | ||
3142 | write_flags = WRITE; | ||
3143 | |||
3144 | trace___extent_writepage(page, inode, wbc); | ||
3145 | |||
3146 | WARN_ON(!PageLocked(page)); | ||
3147 | |||
3148 | ClearPageError(page); | ||
3149 | |||
3150 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
3151 | if (page->index > end_index || | ||
3152 | (page->index == end_index && !pg_offset)) { | ||
3153 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
3154 | unlock_page(page); | ||
3155 | return 0; | ||
3156 | } | ||
3157 | |||
3158 | if (page->index == end_index) { | ||
3159 | char *userpage; | ||
3160 | |||
3161 | userpage = kmap_atomic(page); | ||
3162 | memset(userpage + pg_offset, 0, | ||
3163 | PAGE_CACHE_SIZE - pg_offset); | ||
3164 | kunmap_atomic(userpage); | ||
3165 | flush_dcache_page(page); | ||
3166 | } | ||
3167 | pg_offset = 0; | ||
3168 | |||
3169 | set_page_extent_mapped(page); | ||
3170 | |||
3171 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
3172 | fill_delalloc = false; | ||
3173 | |||
3174 | delalloc_start = start; | ||
3175 | delalloc_end = 0; | ||
3176 | page_started = 0; | ||
3177 | if (!epd->extent_locked && fill_delalloc) { | ||
3178 | u64 delalloc_to_write = 0; | ||
3179 | /* | ||
3180 | * make sure the wbc mapping index is at least updated | ||
3181 | * to this page. | ||
3182 | */ | ||
3183 | update_nr_written(page, wbc, 0); | ||
3184 | |||
3185 | while (delalloc_end < page_end) { | ||
3186 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
3187 | page, | ||
3188 | &delalloc_start, | ||
3189 | &delalloc_end, | ||
3190 | 128 * 1024 * 1024); | ||
3191 | if (nr_delalloc == 0) { | ||
3192 | delalloc_start = delalloc_end + 1; | ||
3193 | continue; | ||
3194 | } | ||
3195 | ret = tree->ops->fill_delalloc(inode, page, | ||
3196 | delalloc_start, | ||
3197 | delalloc_end, | ||
3198 | &page_started, | ||
3199 | &nr_written); | ||
3200 | /* File system has been set read-only */ | ||
3201 | if (ret) { | ||
3202 | SetPageError(page); | ||
3203 | goto done; | ||
3204 | } | ||
3205 | /* | ||
3206 | * delalloc_end is already one less than the total | ||
3207 | * length, so we don't subtract one from | ||
3208 | * PAGE_CACHE_SIZE | ||
3209 | */ | ||
3210 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
3211 | PAGE_CACHE_SIZE) >> | ||
3212 | PAGE_CACHE_SHIFT; | ||
3213 | delalloc_start = delalloc_end + 1; | ||
3214 | } | ||
3215 | if (wbc->nr_to_write < delalloc_to_write) { | ||
3216 | int thresh = 8192; | ||
3217 | |||
3218 | if (delalloc_to_write < thresh * 2) | ||
3219 | thresh = delalloc_to_write; | ||
3220 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
3221 | thresh); | ||
3222 | } | ||
3223 | 3227 | ||
3224 | /* did the fill delalloc function already unlock and start | ||
3225 | * the IO? | ||
3226 | */ | ||
3227 | if (page_started) { | ||
3228 | ret = 0; | ||
3229 | /* | ||
3230 | * we've unlocked the page, so we can't update | ||
3231 | * the mapping's writeback index, just update | ||
3232 | * nr_to_write. | ||
3233 | */ | ||
3234 | wbc->nr_to_write -= nr_written; | ||
3235 | goto done_unlocked; | ||
3236 | } | ||
3237 | } | ||
3238 | if (tree->ops && tree->ops->writepage_start_hook) { | 3228 | if (tree->ops && tree->ops->writepage_start_hook) { |
3239 | ret = tree->ops->writepage_start_hook(page, start, | 3229 | ret = tree->ops->writepage_start_hook(page, start, |
3240 | page_end); | 3230 | page_end); |
@@ -3244,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3244 | wbc->pages_skipped++; | 3234 | wbc->pages_skipped++; |
3245 | else | 3235 | else |
3246 | redirty_page_for_writepage(wbc, page); | 3236 | redirty_page_for_writepage(wbc, page); |
3237 | |||
3247 | update_nr_written(page, wbc, nr_written); | 3238 | update_nr_written(page, wbc, nr_written); |
3248 | unlock_page(page); | 3239 | unlock_page(page); |
3249 | ret = 0; | 3240 | ret = 1; |
3250 | goto done_unlocked; | 3241 | goto done_unlocked; |
3251 | } | 3242 | } |
3252 | } | 3243 | } |
@@ -3258,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3258 | update_nr_written(page, wbc, nr_written + 1); | 3249 | update_nr_written(page, wbc, nr_written + 1); |
3259 | 3250 | ||
3260 | end = page_end; | 3251 | end = page_end; |
3261 | if (last_byte <= start) { | 3252 | if (i_size <= start) { |
3262 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3253 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3263 | tree->ops->writepage_end_io_hook(page, start, | 3254 | tree->ops->writepage_end_io_hook(page, start, |
3264 | page_end, NULL, 1); | 3255 | page_end, NULL, 1); |
@@ -3268,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3268 | blocksize = inode->i_sb->s_blocksize; | 3259 | blocksize = inode->i_sb->s_blocksize; |
3269 | 3260 | ||
3270 | while (cur <= end) { | 3261 | while (cur <= end) { |
3271 | if (cur >= last_byte) { | 3262 | u64 em_end; |
3263 | if (cur >= i_size) { | ||
3272 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3264 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3273 | tree->ops->writepage_end_io_hook(page, cur, | 3265 | tree->ops->writepage_end_io_hook(page, cur, |
3274 | page_end, NULL, 1); | 3266 | page_end, NULL, 1); |
@@ -3278,13 +3270,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3278 | end - cur + 1, 1); | 3270 | end - cur + 1, 1); |
3279 | if (IS_ERR_OR_NULL(em)) { | 3271 | if (IS_ERR_OR_NULL(em)) { |
3280 | SetPageError(page); | 3272 | SetPageError(page); |
3273 | ret = PTR_ERR_OR_ZERO(em); | ||
3281 | break; | 3274 | break; |
3282 | } | 3275 | } |
3283 | 3276 | ||
3284 | extent_offset = cur - em->start; | 3277 | extent_offset = cur - em->start; |
3285 | BUG_ON(extent_map_end(em) <= cur); | 3278 | em_end = extent_map_end(em); |
3279 | BUG_ON(em_end <= cur); | ||
3286 | BUG_ON(end < cur); | 3280 | BUG_ON(end < cur); |
3287 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 3281 | iosize = min(em_end - cur, end - cur + 1); |
3288 | iosize = ALIGN(iosize, blocksize); | 3282 | iosize = ALIGN(iosize, blocksize); |
3289 | sector = (em->block_start + extent_offset) >> 9; | 3283 | sector = (em->block_start + extent_offset) >> 9; |
3290 | bdev = em->bdev; | 3284 | bdev = em->bdev; |
@@ -3320,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3320 | pg_offset += iosize; | 3314 | pg_offset += iosize; |
3321 | continue; | 3315 | continue; |
3322 | } | 3316 | } |
3323 | /* leave this out until we have a page_mkwrite call */ | ||
3324 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | ||
3325 | EXTENT_DIRTY, 0, NULL)) { | ||
3326 | cur = cur + iosize; | ||
3327 | pg_offset += iosize; | ||
3328 | continue; | ||
3329 | } | ||
3330 | 3317 | ||
3331 | if (tree->ops && tree->ops->writepage_io_hook) { | 3318 | if (tree->ops && tree->ops->writepage_io_hook) { |
3332 | ret = tree->ops->writepage_io_hook(page, cur, | 3319 | ret = tree->ops->writepage_io_hook(page, cur, |
@@ -3337,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3337 | if (ret) { | 3324 | if (ret) { |
3338 | SetPageError(page); | 3325 | SetPageError(page); |
3339 | } else { | 3326 | } else { |
3340 | unsigned long max_nr = end_index + 1; | 3327 | unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1; |
3341 | 3328 | ||
3342 | set_range_writeback(tree, cur, cur + iosize - 1); | 3329 | set_range_writeback(tree, cur, cur + iosize - 1); |
3343 | if (!PageWriteback(page)) { | 3330 | if (!PageWriteback(page)) { |
@@ -3359,17 +3346,94 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3359 | nr++; | 3346 | nr++; |
3360 | } | 3347 | } |
3361 | done: | 3348 | done: |
3349 | *nr_ret = nr; | ||
3350 | |||
3351 | done_unlocked: | ||
3352 | |||
3353 | /* drop our reference on any cached states */ | ||
3354 | free_extent_state(cached_state); | ||
3355 | return ret; | ||
3356 | } | ||
3357 | |||
3358 | /* | ||
3359 | * the writepage semantics are similar to regular writepage. extent | ||
3360 | * records are inserted to lock ranges in the tree, and as dirty areas | ||
3361 | * are found, they are marked writeback. Then the lock bits are removed | ||
3362 | * and the end_io handler clears the writeback ranges | ||
3363 | */ | ||
3364 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | ||
3365 | void *data) | ||
3366 | { | ||
3367 | struct inode *inode = page->mapping->host; | ||
3368 | struct extent_page_data *epd = data; | ||
3369 | u64 start = page_offset(page); | ||
3370 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | ||
3371 | int ret; | ||
3372 | int nr = 0; | ||
3373 | size_t pg_offset = 0; | ||
3374 | loff_t i_size = i_size_read(inode); | ||
3375 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | ||
3376 | int write_flags; | ||
3377 | unsigned long nr_written = 0; | ||
3378 | |||
3379 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3380 | write_flags = WRITE_SYNC; | ||
3381 | else | ||
3382 | write_flags = WRITE; | ||
3383 | |||
3384 | trace___extent_writepage(page, inode, wbc); | ||
3385 | |||
3386 | WARN_ON(!PageLocked(page)); | ||
3387 | |||
3388 | ClearPageError(page); | ||
3389 | |||
3390 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
3391 | if (page->index > end_index || | ||
3392 | (page->index == end_index && !pg_offset)) { | ||
3393 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
3394 | unlock_page(page); | ||
3395 | return 0; | ||
3396 | } | ||
3397 | |||
3398 | if (page->index == end_index) { | ||
3399 | char *userpage; | ||
3400 | |||
3401 | userpage = kmap_atomic(page); | ||
3402 | memset(userpage + pg_offset, 0, | ||
3403 | PAGE_CACHE_SIZE - pg_offset); | ||
3404 | kunmap_atomic(userpage); | ||
3405 | flush_dcache_page(page); | ||
3406 | } | ||
3407 | |||
3408 | pg_offset = 0; | ||
3409 | |||
3410 | set_page_extent_mapped(page); | ||
3411 | |||
3412 | ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written); | ||
3413 | if (ret == 1) | ||
3414 | goto done_unlocked; | ||
3415 | if (ret) | ||
3416 | goto done; | ||
3417 | |||
3418 | ret = __extent_writepage_io(inode, page, wbc, epd, | ||
3419 | i_size, nr_written, write_flags, &nr); | ||
3420 | if (ret == 1) | ||
3421 | goto done_unlocked; | ||
3422 | |||
3423 | done: | ||
3362 | if (nr == 0) { | 3424 | if (nr == 0) { |
3363 | /* make sure the mapping tag for page dirty gets cleared */ | 3425 | /* make sure the mapping tag for page dirty gets cleared */ |
3364 | set_page_writeback(page); | 3426 | set_page_writeback(page); |
3365 | end_page_writeback(page); | 3427 | end_page_writeback(page); |
3366 | } | 3428 | } |
3429 | if (PageError(page)) { | ||
3430 | ret = ret < 0 ? ret : -EIO; | ||
3431 | end_extent_writepage(page, ret, start, page_end); | ||
3432 | } | ||
3367 | unlock_page(page); | 3433 | unlock_page(page); |
3434 | return ret; | ||
3368 | 3435 | ||
3369 | done_unlocked: | 3436 | done_unlocked: |
3370 | |||
3371 | /* drop our reference on any cached states */ | ||
3372 | free_extent_state(cached_state); | ||
3373 | return 0; | 3437 | return 0; |
3374 | } | 3438 | } |
3375 | 3439 | ||
@@ -3385,9 +3449,10 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) | |||
3385 | TASK_UNINTERRUPTIBLE); | 3449 | TASK_UNINTERRUPTIBLE); |
3386 | } | 3450 | } |
3387 | 3451 | ||
3388 | static int lock_extent_buffer_for_io(struct extent_buffer *eb, | 3452 | static noinline_for_stack int |
3389 | struct btrfs_fs_info *fs_info, | 3453 | lock_extent_buffer_for_io(struct extent_buffer *eb, |
3390 | struct extent_page_data *epd) | 3454 | struct btrfs_fs_info *fs_info, |
3455 | struct extent_page_data *epd) | ||
3391 | { | 3456 | { |
3392 | unsigned long i, num_pages; | 3457 | unsigned long i, num_pages; |
3393 | int flush = 0; | 3458 | int flush = 0; |
@@ -3458,7 +3523,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3458 | static void end_extent_buffer_writeback(struct extent_buffer *eb) | 3523 | static void end_extent_buffer_writeback(struct extent_buffer *eb) |
3459 | { | 3524 | { |
3460 | clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); | 3525 | clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); |
3461 | smp_mb__after_clear_bit(); | 3526 | smp_mb__after_atomic(); |
3462 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); | 3527 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); |
3463 | } | 3528 | } |
3464 | 3529 | ||
@@ -3492,7 +3557,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | |||
3492 | bio_put(bio); | 3557 | bio_put(bio); |
3493 | } | 3558 | } |
3494 | 3559 | ||
3495 | static int write_one_eb(struct extent_buffer *eb, | 3560 | static noinline_for_stack int write_one_eb(struct extent_buffer *eb, |
3496 | struct btrfs_fs_info *fs_info, | 3561 | struct btrfs_fs_info *fs_info, |
3497 | struct writeback_control *wbc, | 3562 | struct writeback_control *wbc, |
3498 | struct extent_page_data *epd) | 3563 | struct extent_page_data *epd) |
@@ -3690,6 +3755,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
3690 | struct inode *inode = mapping->host; | 3755 | struct inode *inode = mapping->host; |
3691 | int ret = 0; | 3756 | int ret = 0; |
3692 | int done = 0; | 3757 | int done = 0; |
3758 | int err = 0; | ||
3693 | int nr_to_write_done = 0; | 3759 | int nr_to_write_done = 0; |
3694 | struct pagevec pvec; | 3760 | struct pagevec pvec; |
3695 | int nr_pages; | 3761 | int nr_pages; |
@@ -3776,8 +3842,8 @@ retry: | |||
3776 | unlock_page(page); | 3842 | unlock_page(page); |
3777 | ret = 0; | 3843 | ret = 0; |
3778 | } | 3844 | } |
3779 | if (ret) | 3845 | if (!err && ret < 0) |
3780 | done = 1; | 3846 | err = ret; |
3781 | 3847 | ||
3782 | /* | 3848 | /* |
3783 | * the filesystem may choose to bump up nr_to_write. | 3849 | * the filesystem may choose to bump up nr_to_write. |
@@ -3789,7 +3855,7 @@ retry: | |||
3789 | pagevec_release(&pvec); | 3855 | pagevec_release(&pvec); |
3790 | cond_resched(); | 3856 | cond_resched(); |
3791 | } | 3857 | } |
3792 | if (!scanned && !done) { | 3858 | if (!scanned && !done && !err) { |
3793 | /* | 3859 | /* |
3794 | * We hit the last page and there is more work to be done: wrap | 3860 | * We hit the last page and there is more work to be done: wrap |
3795 | * back to the start of the file | 3861 | * back to the start of the file |
@@ -3799,7 +3865,7 @@ retry: | |||
3799 | goto retry; | 3865 | goto retry; |
3800 | } | 3866 | } |
3801 | btrfs_add_delayed_iput(inode); | 3867 | btrfs_add_delayed_iput(inode); |
3802 | return ret; | 3868 | return err; |
3803 | } | 3869 | } |
3804 | 3870 | ||
3805 | static void flush_epd_write_bio(struct extent_page_data *epd) | 3871 | static void flush_epd_write_bio(struct extent_page_data *epd) |
@@ -4510,7 +4576,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) | |||
4510 | spin_unlock(&eb->refs_lock); | 4576 | spin_unlock(&eb->refs_lock); |
4511 | } | 4577 | } |
4512 | 4578 | ||
4513 | static void mark_extent_buffer_accessed(struct extent_buffer *eb) | 4579 | static void mark_extent_buffer_accessed(struct extent_buffer *eb, |
4580 | struct page *accessed) | ||
4514 | { | 4581 | { |
4515 | unsigned long num_pages, i; | 4582 | unsigned long num_pages, i; |
4516 | 4583 | ||
@@ -4519,7 +4586,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) | |||
4519 | num_pages = num_extent_pages(eb->start, eb->len); | 4586 | num_pages = num_extent_pages(eb->start, eb->len); |
4520 | for (i = 0; i < num_pages; i++) { | 4587 | for (i = 0; i < num_pages; i++) { |
4521 | struct page *p = extent_buffer_page(eb, i); | 4588 | struct page *p = extent_buffer_page(eb, i); |
4522 | mark_page_accessed(p); | 4589 | if (p != accessed) |
4590 | mark_page_accessed(p); | ||
4523 | } | 4591 | } |
4524 | } | 4592 | } |
4525 | 4593 | ||
@@ -4533,7 +4601,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4533 | start >> PAGE_CACHE_SHIFT); | 4601 | start >> PAGE_CACHE_SHIFT); |
4534 | if (eb && atomic_inc_not_zero(&eb->refs)) { | 4602 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
4535 | rcu_read_unlock(); | 4603 | rcu_read_unlock(); |
4536 | mark_extent_buffer_accessed(eb); | 4604 | mark_extent_buffer_accessed(eb, NULL); |
4537 | return eb; | 4605 | return eb; |
4538 | } | 4606 | } |
4539 | rcu_read_unlock(); | 4607 | rcu_read_unlock(); |
@@ -4541,6 +4609,53 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4541 | return NULL; | 4609 | return NULL; |
4542 | } | 4610 | } |
4543 | 4611 | ||
4612 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
4613 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | ||
4614 | u64 start, unsigned long len) | ||
4615 | { | ||
4616 | struct extent_buffer *eb, *exists = NULL; | ||
4617 | int ret; | ||
4618 | |||
4619 | eb = find_extent_buffer(fs_info, start); | ||
4620 | if (eb) | ||
4621 | return eb; | ||
4622 | eb = alloc_dummy_extent_buffer(start, len); | ||
4623 | if (!eb) | ||
4624 | return NULL; | ||
4625 | eb->fs_info = fs_info; | ||
4626 | again: | ||
4627 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
4628 | if (ret) | ||
4629 | goto free_eb; | ||
4630 | spin_lock(&fs_info->buffer_lock); | ||
4631 | ret = radix_tree_insert(&fs_info->buffer_radix, | ||
4632 | start >> PAGE_CACHE_SHIFT, eb); | ||
4633 | spin_unlock(&fs_info->buffer_lock); | ||
4634 | radix_tree_preload_end(); | ||
4635 | if (ret == -EEXIST) { | ||
4636 | exists = find_extent_buffer(fs_info, start); | ||
4637 | if (exists) | ||
4638 | goto free_eb; | ||
4639 | else | ||
4640 | goto again; | ||
4641 | } | ||
4642 | check_buffer_tree_ref(eb); | ||
4643 | set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); | ||
4644 | |||
4645 | /* | ||
4646 | * We will free dummy extent buffer's if they come into | ||
4647 | * free_extent_buffer with a ref count of 2, but if we are using this we | ||
4648 | * want the buffers to stay in memory until we're done with them, so | ||
4649 | * bump the ref count again. | ||
4650 | */ | ||
4651 | atomic_inc(&eb->refs); | ||
4652 | return eb; | ||
4653 | free_eb: | ||
4654 | btrfs_release_extent_buffer(eb); | ||
4655 | return exists; | ||
4656 | } | ||
4657 | #endif | ||
4658 | |||
4544 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | 4659 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
4545 | u64 start, unsigned long len) | 4660 | u64 start, unsigned long len) |
4546 | { | 4661 | { |
@@ -4581,7 +4696,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4581 | spin_unlock(&mapping->private_lock); | 4696 | spin_unlock(&mapping->private_lock); |
4582 | unlock_page(p); | 4697 | unlock_page(p); |
4583 | page_cache_release(p); | 4698 | page_cache_release(p); |
4584 | mark_extent_buffer_accessed(exists); | 4699 | mark_extent_buffer_accessed(exists, p); |
4585 | goto free_eb; | 4700 | goto free_eb; |
4586 | } | 4701 | } |
4587 | 4702 | ||
@@ -4596,7 +4711,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4596 | attach_extent_buffer_page(eb, p); | 4711 | attach_extent_buffer_page(eb, p); |
4597 | spin_unlock(&mapping->private_lock); | 4712 | spin_unlock(&mapping->private_lock); |
4598 | WARN_ON(PageDirty(p)); | 4713 | WARN_ON(PageDirty(p)); |
4599 | mark_page_accessed(p); | ||
4600 | eb->pages[i] = p; | 4714 | eb->pages[i] = p; |
4601 | if (!PageUptodate(p)) | 4715 | if (!PageUptodate(p)) |
4602 | uptodate = 0; | 4716 | uptodate = 0; |
@@ -4954,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
4954 | } | 5068 | } |
4955 | } | 5069 | } |
4956 | 5070 | ||
5071 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, | ||
5072 | unsigned long start, | ||
5073 | unsigned long len) | ||
5074 | { | ||
5075 | size_t cur; | ||
5076 | size_t offset; | ||
5077 | struct page *page; | ||
5078 | char *kaddr; | ||
5079 | char __user *dst = (char __user *)dstv; | ||
5080 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
5081 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
5082 | int ret = 0; | ||
5083 | |||
5084 | WARN_ON(start > eb->len); | ||
5085 | WARN_ON(start + len > eb->start + eb->len); | ||
5086 | |||
5087 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | ||
5088 | |||
5089 | while (len > 0) { | ||
5090 | page = extent_buffer_page(eb, i); | ||
5091 | |||
5092 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | ||
5093 | kaddr = page_address(page); | ||
5094 | if (copy_to_user(dst, kaddr + offset, cur)) { | ||
5095 | ret = -EFAULT; | ||
5096 | break; | ||
5097 | } | ||
5098 | |||
5099 | dst += cur; | ||
5100 | len -= cur; | ||
5101 | offset = 0; | ||
5102 | i++; | ||
5103 | } | ||
5104 | |||
5105 | return ret; | ||
5106 | } | ||
5107 | |||
4957 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 5108 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
4958 | unsigned long min_len, char **map, | 5109 | unsigned long min_len, char **map, |
4959 | unsigned long *map_start, | 5110 | unsigned long *map_start, |