aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2014-05-21 16:35:51 -0400
committerChris Mason <clm@fb.com>2014-06-09 20:20:58 -0400
commit40f765805f082ed679c55bf6ab60212e55fb6fc1 (patch)
tree32896ba4b98e34067bc26834c8cc26bb13106f9c /fs/btrfs
parentfc4adbff823f76577ece26dcb88bf6f8392dbd43 (diff)
Btrfs: split up __extent_writepage to lower stack usage
__extent_writepage has two unrelated parts. First it does the delayed allocation dance and second it does the mapping and IO for the page we're actually writing. This splits it up into those two parts so the stack from one doesn't impact the stack from the other. Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent_io.c330
-rw-r--r--fs/btrfs/inode.c2
2 files changed, 194 insertions, 138 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 51299c261d56..0b5fa91d9a88 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3101,143 +3101,130 @@ static noinline void update_nr_written(struct page *page,
3101} 3101}
3102 3102
3103/* 3103/*
3104 * the writepage semantics are similar to regular writepage. extent 3104 * helper for __extent_writepage, doing all of the delayed allocation setup.
3105 * records are inserted to lock ranges in the tree, and as dirty areas 3105 *
3106 * are found, they are marked writeback. Then the lock bits are removed 3106 * This returns 1 if our fill_delalloc function did all the work required
3107 * and the end_io handler clears the writeback ranges 3107 * to write the page (copy into inline extent). In this case the IO has
3108 * been started and the page is already unlocked.
3109 *
3110 * This returns 0 if all went well (page still locked)
3111 * This returns < 0 if there were errors (page still locked)
3108 */ 3112 */
3109static int __extent_writepage(struct page *page, struct writeback_control *wbc, 3113static noinline_for_stack int writepage_delalloc(struct inode *inode,
3110 void *data) 3114 struct page *page, struct writeback_control *wbc,
3115 struct extent_page_data *epd,
3116 u64 delalloc_start,
3117 unsigned long *nr_written)
3118{
3119 struct extent_io_tree *tree = epd->tree;
3120 u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
3121 u64 nr_delalloc;
3122 u64 delalloc_to_write = 0;
3123 u64 delalloc_end = 0;
3124 int ret;
3125 int page_started = 0;
3126
3127 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3128 return 0;
3129
3130 while (delalloc_end < page_end) {
3131 nr_delalloc = find_lock_delalloc_range(inode, tree,
3132 page,
3133 &delalloc_start,
3134 &delalloc_end,
3135 128 * 1024 * 1024);
3136 if (nr_delalloc == 0) {
3137 delalloc_start = delalloc_end + 1;
3138 continue;
3139 }
3140 ret = tree->ops->fill_delalloc(inode, page,
3141 delalloc_start,
3142 delalloc_end,
3143 &page_started,
3144 nr_written);
3145 /* File system has been set read-only */
3146 if (ret) {
3147 SetPageError(page);
3148 /* fill_delalloc should be return < 0 for error
3149 * but just in case, we use > 0 here meaning the
3150 * IO is started, so we don't want to return > 0
3151 * unless things are going well.
3152 */
3153 ret = ret < 0 ? ret : -EIO;
3154 goto done;
3155 }
3156 /*
3157 * delalloc_end is already one less than the total
3158 * length, so we don't subtract one from
3159 * PAGE_CACHE_SIZE
3160 */
3161 delalloc_to_write += (delalloc_end - delalloc_start +
3162 PAGE_CACHE_SIZE) >>
3163 PAGE_CACHE_SHIFT;
3164 delalloc_start = delalloc_end + 1;
3165 }
3166 if (wbc->nr_to_write < delalloc_to_write) {
3167 int thresh = 8192;
3168
3169 if (delalloc_to_write < thresh * 2)
3170 thresh = delalloc_to_write;
3171 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3172 thresh);
3173 }
3174
3175 /* did the fill delalloc function already unlock and start
3176 * the IO?
3177 */
3178 if (page_started) {
3179 /*
3180 * we've unlocked the page, so we can't update
3181 * the mapping's writeback index, just update
3182 * nr_to_write.
3183 */
3184 wbc->nr_to_write -= *nr_written;
3185 return 1;
3186 }
3187
3188 ret = 0;
3189
3190done:
3191 return ret;
3192}
3193
3194/*
3195 * helper for __extent_writepage. This calls the writepage start hooks,
3196 * and does the loop to map the page into extents and bios.
3197 *
3198 * We return 1 if the IO is started and the page is unlocked,
3199 * 0 if all went well (page still locked)
3200 * < 0 if there were errors (page still locked)
3201 */
3202static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3203 struct page *page,
3204 struct writeback_control *wbc,
3205 struct extent_page_data *epd,
3206 loff_t i_size,
3207 unsigned long nr_written,
3208 int write_flags, int *nr_ret)
3111{ 3209{
3112 struct inode *inode = page->mapping->host;
3113 struct extent_page_data *epd = data;
3114 struct extent_io_tree *tree = epd->tree; 3210 struct extent_io_tree *tree = epd->tree;
3115 u64 start = page_offset(page); 3211 u64 start = page_offset(page);
3116 u64 delalloc_start;
3117 u64 page_end = start + PAGE_CACHE_SIZE - 1; 3212 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3118 u64 end; 3213 u64 end;
3119 u64 cur = start; 3214 u64 cur = start;
3120 u64 extent_offset; 3215 u64 extent_offset;
3121 u64 last_byte = i_size_read(inode);
3122 u64 block_start; 3216 u64 block_start;
3123 u64 iosize; 3217 u64 iosize;
3124 sector_t sector; 3218 sector_t sector;
3125 struct extent_state *cached_state = NULL; 3219 struct extent_state *cached_state = NULL;
3126 struct extent_map *em; 3220 struct extent_map *em;
3127 struct block_device *bdev; 3221 struct block_device *bdev;
3128 int ret;
3129 int nr = 0;
3130 size_t pg_offset = 0; 3222 size_t pg_offset = 0;
3131 size_t blocksize; 3223 size_t blocksize;
3132 loff_t i_size = i_size_read(inode); 3224 int ret = 0;
3133 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 3225 int nr = 0;
3134 u64 nr_delalloc; 3226 bool compressed;
3135 u64 delalloc_end;
3136 int page_started;
3137 int compressed;
3138 int write_flags;
3139 unsigned long nr_written = 0;
3140 bool fill_delalloc = true;
3141
3142 if (wbc->sync_mode == WB_SYNC_ALL)
3143 write_flags = WRITE_SYNC;
3144 else
3145 write_flags = WRITE;
3146
3147 trace___extent_writepage(page, inode, wbc);
3148
3149 WARN_ON(!PageLocked(page));
3150
3151 ClearPageError(page);
3152
3153 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3154 if (page->index > end_index ||
3155 (page->index == end_index && !pg_offset)) {
3156 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3157 unlock_page(page);
3158 return 0;
3159 }
3160
3161 if (page->index == end_index) {
3162 char *userpage;
3163
3164 userpage = kmap_atomic(page);
3165 memset(userpage + pg_offset, 0,
3166 PAGE_CACHE_SIZE - pg_offset);
3167 kunmap_atomic(userpage);
3168 flush_dcache_page(page);
3169 }
3170 pg_offset = 0;
3171
3172 set_page_extent_mapped(page);
3173
3174 if (!tree->ops || !tree->ops->fill_delalloc)
3175 fill_delalloc = false;
3176
3177 delalloc_start = start;
3178 delalloc_end = 0;
3179 page_started = 0;
3180 if (!epd->extent_locked && fill_delalloc) {
3181 u64 delalloc_to_write = 0;
3182 /*
3183 * make sure the wbc mapping index is at least updated
3184 * to this page.
3185 */
3186 update_nr_written(page, wbc, 0);
3187
3188 while (delalloc_end < page_end) {
3189 nr_delalloc = find_lock_delalloc_range(inode, tree,
3190 page,
3191 &delalloc_start,
3192 &delalloc_end,
3193 128 * 1024 * 1024);
3194 if (nr_delalloc == 0) {
3195 delalloc_start = delalloc_end + 1;
3196 continue;
3197 }
3198 ret = tree->ops->fill_delalloc(inode, page,
3199 delalloc_start,
3200 delalloc_end,
3201 &page_started,
3202 &nr_written);
3203 /* File system has been set read-only */
3204 if (ret) {
3205 SetPageError(page);
3206 goto done;
3207 }
3208 /*
3209 * delalloc_end is already one less than the total
3210 * length, so we don't subtract one from
3211 * PAGE_CACHE_SIZE
3212 */
3213 delalloc_to_write += (delalloc_end - delalloc_start +
3214 PAGE_CACHE_SIZE) >>
3215 PAGE_CACHE_SHIFT;
3216 delalloc_start = delalloc_end + 1;
3217 }
3218 if (wbc->nr_to_write < delalloc_to_write) {
3219 int thresh = 8192;
3220
3221 if (delalloc_to_write < thresh * 2)
3222 thresh = delalloc_to_write;
3223 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3224 thresh);
3225 }
3226 3227
3227 /* did the fill delalloc function already unlock and start
3228 * the IO?
3229 */
3230 if (page_started) {
3231 ret = 0;
3232 /*
3233 * we've unlocked the page, so we can't update
3234 * the mapping's writeback index, just update
3235 * nr_to_write.
3236 */
3237 wbc->nr_to_write -= nr_written;
3238 goto done_unlocked;
3239 }
3240 }
3241 if (tree->ops && tree->ops->writepage_start_hook) { 3228 if (tree->ops && tree->ops->writepage_start_hook) {
3242 ret = tree->ops->writepage_start_hook(page, start, 3229 ret = tree->ops->writepage_start_hook(page, start,
3243 page_end); 3230 page_end);
@@ -3247,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3247 wbc->pages_skipped++; 3234 wbc->pages_skipped++;
3248 else 3235 else
3249 redirty_page_for_writepage(wbc, page); 3236 redirty_page_for_writepage(wbc, page);
3237
3250 update_nr_written(page, wbc, nr_written); 3238 update_nr_written(page, wbc, nr_written);
3251 unlock_page(page); 3239 unlock_page(page);
3252 ret = 0; 3240 ret = 1;
3253 goto done_unlocked; 3241 goto done_unlocked;
3254 } 3242 }
3255 } 3243 }
@@ -3261,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3261 update_nr_written(page, wbc, nr_written + 1); 3249 update_nr_written(page, wbc, nr_written + 1);
3262 3250
3263 end = page_end; 3251 end = page_end;
3264 if (last_byte <= start) { 3252 if (i_size <= start) {
3265 if (tree->ops && tree->ops->writepage_end_io_hook) 3253 if (tree->ops && tree->ops->writepage_end_io_hook)
3266 tree->ops->writepage_end_io_hook(page, start, 3254 tree->ops->writepage_end_io_hook(page, start,
3267 page_end, NULL, 1); 3255 page_end, NULL, 1);
@@ -3271,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3271 blocksize = inode->i_sb->s_blocksize; 3259 blocksize = inode->i_sb->s_blocksize;
3272 3260
3273 while (cur <= end) { 3261 while (cur <= end) {
3274 if (cur >= last_byte) { 3262 u64 em_end;
3263 if (cur >= i_size) {
3275 if (tree->ops && tree->ops->writepage_end_io_hook) 3264 if (tree->ops && tree->ops->writepage_end_io_hook)
3276 tree->ops->writepage_end_io_hook(page, cur, 3265 tree->ops->writepage_end_io_hook(page, cur,
3277 page_end, NULL, 1); 3266 page_end, NULL, 1);
@@ -3286,9 +3275,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3286 } 3275 }
3287 3276
3288 extent_offset = cur - em->start; 3277 extent_offset = cur - em->start;
3289 BUG_ON(extent_map_end(em) <= cur); 3278 em_end = extent_map_end(em);
3279 BUG_ON(em_end <= cur);
3290 BUG_ON(end < cur); 3280 BUG_ON(end < cur);
3291 iosize = min(extent_map_end(em) - cur, end - cur + 1); 3281 iosize = min(em_end - cur, end - cur + 1);
3292 iosize = ALIGN(iosize, blocksize); 3282 iosize = ALIGN(iosize, blocksize);
3293 sector = (em->block_start + extent_offset) >> 9; 3283 sector = (em->block_start + extent_offset) >> 9;
3294 bdev = em->bdev; 3284 bdev = em->bdev;
@@ -3324,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3324 pg_offset += iosize; 3314 pg_offset += iosize;
3325 continue; 3315 continue;
3326 } 3316 }
3327 /* leave this out until we have a page_mkwrite call */
3328 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
3329 EXTENT_DIRTY, 0, NULL)) {
3330 cur = cur + iosize;
3331 pg_offset += iosize;
3332 continue;
3333 }
3334 3317
3335 if (tree->ops && tree->ops->writepage_io_hook) { 3318 if (tree->ops && tree->ops->writepage_io_hook) {
3336 ret = tree->ops->writepage_io_hook(page, cur, 3319 ret = tree->ops->writepage_io_hook(page, cur,
@@ -3341,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3341 if (ret) { 3324 if (ret) {
3342 SetPageError(page); 3325 SetPageError(page);
3343 } else { 3326 } else {
3344 unsigned long max_nr = end_index + 1; 3327 unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
3345 3328
3346 set_range_writeback(tree, cur, cur + iosize - 1); 3329 set_range_writeback(tree, cur, cur + iosize - 1);
3347 if (!PageWriteback(page)) { 3330 if (!PageWriteback(page)) {
@@ -3363,6 +3346,81 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3363 nr++; 3346 nr++;
3364 } 3347 }
3365done: 3348done:
3349 *nr_ret = nr;
3350
3351done_unlocked:
3352
3353 /* drop our reference on any cached states */
3354 free_extent_state(cached_state);
3355 return ret;
3356}
3357
3358/*
3359 * the writepage semantics are similar to regular writepage. extent
3360 * records are inserted to lock ranges in the tree, and as dirty areas
3361 * are found, they are marked writeback. Then the lock bits are removed
3362 * and the end_io handler clears the writeback ranges
3363 */
3364static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3365 void *data)
3366{
3367 struct inode *inode = page->mapping->host;
3368 struct extent_page_data *epd = data;
3369 u64 start = page_offset(page);
3370 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3371 int ret;
3372 int nr = 0;
3373 size_t pg_offset = 0;
3374 loff_t i_size = i_size_read(inode);
3375 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
3376 int write_flags;
3377 unsigned long nr_written = 0;
3378
3379 if (wbc->sync_mode == WB_SYNC_ALL)
3380 write_flags = WRITE_SYNC;
3381 else
3382 write_flags = WRITE;
3383
3384 trace___extent_writepage(page, inode, wbc);
3385
3386 WARN_ON(!PageLocked(page));
3387
3388 ClearPageError(page);
3389
3390 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3391 if (page->index > end_index ||
3392 (page->index == end_index && !pg_offset)) {
3393 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3394 unlock_page(page);
3395 return 0;
3396 }
3397
3398 if (page->index == end_index) {
3399 char *userpage;
3400
3401 userpage = kmap_atomic(page);
3402 memset(userpage + pg_offset, 0,
3403 PAGE_CACHE_SIZE - pg_offset);
3404 kunmap_atomic(userpage);
3405 flush_dcache_page(page);
3406 }
3407
3408 pg_offset = 0;
3409
3410 set_page_extent_mapped(page);
3411
3412 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3413 if (ret == 1)
3414 goto done_unlocked;
3415 if (ret)
3416 goto done;
3417
3418 ret = __extent_writepage_io(inode, page, wbc, epd,
3419 i_size, nr_written, write_flags, &nr);
3420 if (ret == 1)
3421 goto done_unlocked;
3422
3423done:
3366 if (nr == 0) { 3424 if (nr == 0) {
3367 /* make sure the mapping tag for page dirty gets cleared */ 3425 /* make sure the mapping tag for page dirty gets cleared */
3368 set_page_writeback(page); 3426 set_page_writeback(page);
@@ -3373,12 +3431,10 @@ done:
3373 end_extent_writepage(page, ret, start, page_end); 3431 end_extent_writepage(page, ret, start, page_end);
3374 } 3432 }
3375 unlock_page(page); 3433 unlock_page(page);
3434 return ret;
3376 3435
3377done_unlocked: 3436done_unlocked:
3378 3437 return 0;
3379 /* drop our reference on any cached states */
3380 free_extent_state(cached_state);
3381 return ret;
3382} 3438}
3383 3439
3384static int eb_wait(void *word) 3440static int eb_wait(void *word)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a462da1a3e6a..992aae6c00b0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -125,7 +125,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
125 * the btree. The caller should have done a btrfs_drop_extents so that 125 * the btree. The caller should have done a btrfs_drop_extents so that
126 * no overlapping inline items exist in the btree 126 * no overlapping inline items exist in the btree
127 */ 127 */
128static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 128static int insert_inline_extent(struct btrfs_trans_handle *trans,
129 struct btrfs_path *path, int extent_inserted, 129 struct btrfs_path *path, int extent_inserted,
130 struct btrfs_root *root, struct inode *inode, 130 struct btrfs_root *root, struct inode *inode,
131 u64 start, size_t size, size_t compressed_size, 131 u64 start, size_t size, size_t compressed_size,