diff options
-rw-r--r-- | fs/btrfs/extent_io.c | 330 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 2 |
2 files changed, 194 insertions, 138 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 51299c261d56..0b5fa91d9a88 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3101,143 +3101,130 @@ static noinline void update_nr_written(struct page *page, | |||
3101 | } | 3101 | } |
3102 | 3102 | ||
3103 | /* | 3103 | /* |
3104 | * the writepage semantics are similar to regular writepage. extent | 3104 | * helper for __extent_writepage, doing all of the delayed allocation setup. |
3105 | * records are inserted to lock ranges in the tree, and as dirty areas | 3105 | * |
3106 | * are found, they are marked writeback. Then the lock bits are removed | 3106 | * This returns 1 if our fill_delalloc function did all the work required |
3107 | * and the end_io handler clears the writeback ranges | 3107 | * to write the page (copy into inline extent). In this case the IO has |
3108 | * been started and the page is already unlocked. | ||
3109 | * | ||
3110 | * This returns 0 if all went well (page still locked) | ||
3111 | * This returns < 0 if there were errors (page still locked) | ||
3108 | */ | 3112 | */ |
3109 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | 3113 | static noinline_for_stack int writepage_delalloc(struct inode *inode, |
3110 | void *data) | 3114 | struct page *page, struct writeback_control *wbc, |
3115 | struct extent_page_data *epd, | ||
3116 | u64 delalloc_start, | ||
3117 | unsigned long *nr_written) | ||
3118 | { | ||
3119 | struct extent_io_tree *tree = epd->tree; | ||
3120 | u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1; | ||
3121 | u64 nr_delalloc; | ||
3122 | u64 delalloc_to_write = 0; | ||
3123 | u64 delalloc_end = 0; | ||
3124 | int ret; | ||
3125 | int page_started = 0; | ||
3126 | |||
3127 | if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc) | ||
3128 | return 0; | ||
3129 | |||
3130 | while (delalloc_end < page_end) { | ||
3131 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
3132 | page, | ||
3133 | &delalloc_start, | ||
3134 | &delalloc_end, | ||
3135 | 128 * 1024 * 1024); | ||
3136 | if (nr_delalloc == 0) { | ||
3137 | delalloc_start = delalloc_end + 1; | ||
3138 | continue; | ||
3139 | } | ||
3140 | ret = tree->ops->fill_delalloc(inode, page, | ||
3141 | delalloc_start, | ||
3142 | delalloc_end, | ||
3143 | &page_started, | ||
3144 | nr_written); | ||
3145 | /* File system has been set read-only */ | ||
3146 | if (ret) { | ||
3147 | SetPageError(page); | ||
3148 | /* fill_delalloc should be return < 0 for error | ||
3149 | * but just in case, we use > 0 here meaning the | ||
3150 | * IO is started, so we don't want to return > 0 | ||
3151 | * unless things are going well. | ||
3152 | */ | ||
3153 | ret = ret < 0 ? ret : -EIO; | ||
3154 | goto done; | ||
3155 | } | ||
3156 | /* | ||
3157 | * delalloc_end is already one less than the total | ||
3158 | * length, so we don't subtract one from | ||
3159 | * PAGE_CACHE_SIZE | ||
3160 | */ | ||
3161 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
3162 | PAGE_CACHE_SIZE) >> | ||
3163 | PAGE_CACHE_SHIFT; | ||
3164 | delalloc_start = delalloc_end + 1; | ||
3165 | } | ||
3166 | if (wbc->nr_to_write < delalloc_to_write) { | ||
3167 | int thresh = 8192; | ||
3168 | |||
3169 | if (delalloc_to_write < thresh * 2) | ||
3170 | thresh = delalloc_to_write; | ||
3171 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
3172 | thresh); | ||
3173 | } | ||
3174 | |||
3175 | /* did the fill delalloc function already unlock and start | ||
3176 | * the IO? | ||
3177 | */ | ||
3178 | if (page_started) { | ||
3179 | /* | ||
3180 | * we've unlocked the page, so we can't update | ||
3181 | * the mapping's writeback index, just update | ||
3182 | * nr_to_write. | ||
3183 | */ | ||
3184 | wbc->nr_to_write -= *nr_written; | ||
3185 | return 1; | ||
3186 | } | ||
3187 | |||
3188 | ret = 0; | ||
3189 | |||
3190 | done: | ||
3191 | return ret; | ||
3192 | } | ||
3193 | |||
3194 | /* | ||
3195 | * helper for __extent_writepage. This calls the writepage start hooks, | ||
3196 | * and does the loop to map the page into extents and bios. | ||
3197 | * | ||
3198 | * We return 1 if the IO is started and the page is unlocked, | ||
3199 | * 0 if all went well (page still locked) | ||
3200 | * < 0 if there were errors (page still locked) | ||
3201 | */ | ||
3202 | static noinline_for_stack int __extent_writepage_io(struct inode *inode, | ||
3203 | struct page *page, | ||
3204 | struct writeback_control *wbc, | ||
3205 | struct extent_page_data *epd, | ||
3206 | loff_t i_size, | ||
3207 | unsigned long nr_written, | ||
3208 | int write_flags, int *nr_ret) | ||
3111 | { | 3209 | { |
3112 | struct inode *inode = page->mapping->host; | ||
3113 | struct extent_page_data *epd = data; | ||
3114 | struct extent_io_tree *tree = epd->tree; | 3210 | struct extent_io_tree *tree = epd->tree; |
3115 | u64 start = page_offset(page); | 3211 | u64 start = page_offset(page); |
3116 | u64 delalloc_start; | ||
3117 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | 3212 | u64 page_end = start + PAGE_CACHE_SIZE - 1; |
3118 | u64 end; | 3213 | u64 end; |
3119 | u64 cur = start; | 3214 | u64 cur = start; |
3120 | u64 extent_offset; | 3215 | u64 extent_offset; |
3121 | u64 last_byte = i_size_read(inode); | ||
3122 | u64 block_start; | 3216 | u64 block_start; |
3123 | u64 iosize; | 3217 | u64 iosize; |
3124 | sector_t sector; | 3218 | sector_t sector; |
3125 | struct extent_state *cached_state = NULL; | 3219 | struct extent_state *cached_state = NULL; |
3126 | struct extent_map *em; | 3220 | struct extent_map *em; |
3127 | struct block_device *bdev; | 3221 | struct block_device *bdev; |
3128 | int ret; | ||
3129 | int nr = 0; | ||
3130 | size_t pg_offset = 0; | 3222 | size_t pg_offset = 0; |
3131 | size_t blocksize; | 3223 | size_t blocksize; |
3132 | loff_t i_size = i_size_read(inode); | 3224 | int ret = 0; |
3133 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 3225 | int nr = 0; |
3134 | u64 nr_delalloc; | 3226 | bool compressed; |
3135 | u64 delalloc_end; | ||
3136 | int page_started; | ||
3137 | int compressed; | ||
3138 | int write_flags; | ||
3139 | unsigned long nr_written = 0; | ||
3140 | bool fill_delalloc = true; | ||
3141 | |||
3142 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3143 | write_flags = WRITE_SYNC; | ||
3144 | else | ||
3145 | write_flags = WRITE; | ||
3146 | |||
3147 | trace___extent_writepage(page, inode, wbc); | ||
3148 | |||
3149 | WARN_ON(!PageLocked(page)); | ||
3150 | |||
3151 | ClearPageError(page); | ||
3152 | |||
3153 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
3154 | if (page->index > end_index || | ||
3155 | (page->index == end_index && !pg_offset)) { | ||
3156 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
3157 | unlock_page(page); | ||
3158 | return 0; | ||
3159 | } | ||
3160 | |||
3161 | if (page->index == end_index) { | ||
3162 | char *userpage; | ||
3163 | |||
3164 | userpage = kmap_atomic(page); | ||
3165 | memset(userpage + pg_offset, 0, | ||
3166 | PAGE_CACHE_SIZE - pg_offset); | ||
3167 | kunmap_atomic(userpage); | ||
3168 | flush_dcache_page(page); | ||
3169 | } | ||
3170 | pg_offset = 0; | ||
3171 | |||
3172 | set_page_extent_mapped(page); | ||
3173 | |||
3174 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
3175 | fill_delalloc = false; | ||
3176 | |||
3177 | delalloc_start = start; | ||
3178 | delalloc_end = 0; | ||
3179 | page_started = 0; | ||
3180 | if (!epd->extent_locked && fill_delalloc) { | ||
3181 | u64 delalloc_to_write = 0; | ||
3182 | /* | ||
3183 | * make sure the wbc mapping index is at least updated | ||
3184 | * to this page. | ||
3185 | */ | ||
3186 | update_nr_written(page, wbc, 0); | ||
3187 | |||
3188 | while (delalloc_end < page_end) { | ||
3189 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
3190 | page, | ||
3191 | &delalloc_start, | ||
3192 | &delalloc_end, | ||
3193 | 128 * 1024 * 1024); | ||
3194 | if (nr_delalloc == 0) { | ||
3195 | delalloc_start = delalloc_end + 1; | ||
3196 | continue; | ||
3197 | } | ||
3198 | ret = tree->ops->fill_delalloc(inode, page, | ||
3199 | delalloc_start, | ||
3200 | delalloc_end, | ||
3201 | &page_started, | ||
3202 | &nr_written); | ||
3203 | /* File system has been set read-only */ | ||
3204 | if (ret) { | ||
3205 | SetPageError(page); | ||
3206 | goto done; | ||
3207 | } | ||
3208 | /* | ||
3209 | * delalloc_end is already one less than the total | ||
3210 | * length, so we don't subtract one from | ||
3211 | * PAGE_CACHE_SIZE | ||
3212 | */ | ||
3213 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
3214 | PAGE_CACHE_SIZE) >> | ||
3215 | PAGE_CACHE_SHIFT; | ||
3216 | delalloc_start = delalloc_end + 1; | ||
3217 | } | ||
3218 | if (wbc->nr_to_write < delalloc_to_write) { | ||
3219 | int thresh = 8192; | ||
3220 | |||
3221 | if (delalloc_to_write < thresh * 2) | ||
3222 | thresh = delalloc_to_write; | ||
3223 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
3224 | thresh); | ||
3225 | } | ||
3226 | 3227 | ||
3227 | /* did the fill delalloc function already unlock and start | ||
3228 | * the IO? | ||
3229 | */ | ||
3230 | if (page_started) { | ||
3231 | ret = 0; | ||
3232 | /* | ||
3233 | * we've unlocked the page, so we can't update | ||
3234 | * the mapping's writeback index, just update | ||
3235 | * nr_to_write. | ||
3236 | */ | ||
3237 | wbc->nr_to_write -= nr_written; | ||
3238 | goto done_unlocked; | ||
3239 | } | ||
3240 | } | ||
3241 | if (tree->ops && tree->ops->writepage_start_hook) { | 3228 | if (tree->ops && tree->ops->writepage_start_hook) { |
3242 | ret = tree->ops->writepage_start_hook(page, start, | 3229 | ret = tree->ops->writepage_start_hook(page, start, |
3243 | page_end); | 3230 | page_end); |
@@ -3247,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3247 | wbc->pages_skipped++; | 3234 | wbc->pages_skipped++; |
3248 | else | 3235 | else |
3249 | redirty_page_for_writepage(wbc, page); | 3236 | redirty_page_for_writepage(wbc, page); |
3237 | |||
3250 | update_nr_written(page, wbc, nr_written); | 3238 | update_nr_written(page, wbc, nr_written); |
3251 | unlock_page(page); | 3239 | unlock_page(page); |
3252 | ret = 0; | 3240 | ret = 1; |
3253 | goto done_unlocked; | 3241 | goto done_unlocked; |
3254 | } | 3242 | } |
3255 | } | 3243 | } |
@@ -3261,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3261 | update_nr_written(page, wbc, nr_written + 1); | 3249 | update_nr_written(page, wbc, nr_written + 1); |
3262 | 3250 | ||
3263 | end = page_end; | 3251 | end = page_end; |
3264 | if (last_byte <= start) { | 3252 | if (i_size <= start) { |
3265 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3253 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3266 | tree->ops->writepage_end_io_hook(page, start, | 3254 | tree->ops->writepage_end_io_hook(page, start, |
3267 | page_end, NULL, 1); | 3255 | page_end, NULL, 1); |
@@ -3271,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3271 | blocksize = inode->i_sb->s_blocksize; | 3259 | blocksize = inode->i_sb->s_blocksize; |
3272 | 3260 | ||
3273 | while (cur <= end) { | 3261 | while (cur <= end) { |
3274 | if (cur >= last_byte) { | 3262 | u64 em_end; |
3263 | if (cur >= i_size) { | ||
3275 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3264 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3276 | tree->ops->writepage_end_io_hook(page, cur, | 3265 | tree->ops->writepage_end_io_hook(page, cur, |
3277 | page_end, NULL, 1); | 3266 | page_end, NULL, 1); |
@@ -3286,9 +3275,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3286 | } | 3275 | } |
3287 | 3276 | ||
3288 | extent_offset = cur - em->start; | 3277 | extent_offset = cur - em->start; |
3289 | BUG_ON(extent_map_end(em) <= cur); | 3278 | em_end = extent_map_end(em); |
3279 | BUG_ON(em_end <= cur); | ||
3290 | BUG_ON(end < cur); | 3280 | BUG_ON(end < cur); |
3291 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 3281 | iosize = min(em_end - cur, end - cur + 1); |
3292 | iosize = ALIGN(iosize, blocksize); | 3282 | iosize = ALIGN(iosize, blocksize); |
3293 | sector = (em->block_start + extent_offset) >> 9; | 3283 | sector = (em->block_start + extent_offset) >> 9; |
3294 | bdev = em->bdev; | 3284 | bdev = em->bdev; |
@@ -3324,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3324 | pg_offset += iosize; | 3314 | pg_offset += iosize; |
3325 | continue; | 3315 | continue; |
3326 | } | 3316 | } |
3327 | /* leave this out until we have a page_mkwrite call */ | ||
3328 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | ||
3329 | EXTENT_DIRTY, 0, NULL)) { | ||
3330 | cur = cur + iosize; | ||
3331 | pg_offset += iosize; | ||
3332 | continue; | ||
3333 | } | ||
3334 | 3317 | ||
3335 | if (tree->ops && tree->ops->writepage_io_hook) { | 3318 | if (tree->ops && tree->ops->writepage_io_hook) { |
3336 | ret = tree->ops->writepage_io_hook(page, cur, | 3319 | ret = tree->ops->writepage_io_hook(page, cur, |
@@ -3341,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3341 | if (ret) { | 3324 | if (ret) { |
3342 | SetPageError(page); | 3325 | SetPageError(page); |
3343 | } else { | 3326 | } else { |
3344 | unsigned long max_nr = end_index + 1; | 3327 | unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1; |
3345 | 3328 | ||
3346 | set_range_writeback(tree, cur, cur + iosize - 1); | 3329 | set_range_writeback(tree, cur, cur + iosize - 1); |
3347 | if (!PageWriteback(page)) { | 3330 | if (!PageWriteback(page)) { |
@@ -3363,6 +3346,81 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3363 | nr++; | 3346 | nr++; |
3364 | } | 3347 | } |
3365 | done: | 3348 | done: |
3349 | *nr_ret = nr; | ||
3350 | |||
3351 | done_unlocked: | ||
3352 | |||
3353 | /* drop our reference on any cached states */ | ||
3354 | free_extent_state(cached_state); | ||
3355 | return ret; | ||
3356 | } | ||
3357 | |||
3358 | /* | ||
3359 | * the writepage semantics are similar to regular writepage. extent | ||
3360 | * records are inserted to lock ranges in the tree, and as dirty areas | ||
3361 | * are found, they are marked writeback. Then the lock bits are removed | ||
3362 | * and the end_io handler clears the writeback ranges | ||
3363 | */ | ||
3364 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | ||
3365 | void *data) | ||
3366 | { | ||
3367 | struct inode *inode = page->mapping->host; | ||
3368 | struct extent_page_data *epd = data; | ||
3369 | u64 start = page_offset(page); | ||
3370 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | ||
3371 | int ret; | ||
3372 | int nr = 0; | ||
3373 | size_t pg_offset = 0; | ||
3374 | loff_t i_size = i_size_read(inode); | ||
3375 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | ||
3376 | int write_flags; | ||
3377 | unsigned long nr_written = 0; | ||
3378 | |||
3379 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3380 | write_flags = WRITE_SYNC; | ||
3381 | else | ||
3382 | write_flags = WRITE; | ||
3383 | |||
3384 | trace___extent_writepage(page, inode, wbc); | ||
3385 | |||
3386 | WARN_ON(!PageLocked(page)); | ||
3387 | |||
3388 | ClearPageError(page); | ||
3389 | |||
3390 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
3391 | if (page->index > end_index || | ||
3392 | (page->index == end_index && !pg_offset)) { | ||
3393 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
3394 | unlock_page(page); | ||
3395 | return 0; | ||
3396 | } | ||
3397 | |||
3398 | if (page->index == end_index) { | ||
3399 | char *userpage; | ||
3400 | |||
3401 | userpage = kmap_atomic(page); | ||
3402 | memset(userpage + pg_offset, 0, | ||
3403 | PAGE_CACHE_SIZE - pg_offset); | ||
3404 | kunmap_atomic(userpage); | ||
3405 | flush_dcache_page(page); | ||
3406 | } | ||
3407 | |||
3408 | pg_offset = 0; | ||
3409 | |||
3410 | set_page_extent_mapped(page); | ||
3411 | |||
3412 | ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written); | ||
3413 | if (ret == 1) | ||
3414 | goto done_unlocked; | ||
3415 | if (ret) | ||
3416 | goto done; | ||
3417 | |||
3418 | ret = __extent_writepage_io(inode, page, wbc, epd, | ||
3419 | i_size, nr_written, write_flags, &nr); | ||
3420 | if (ret == 1) | ||
3421 | goto done_unlocked; | ||
3422 | |||
3423 | done: | ||
3366 | if (nr == 0) { | 3424 | if (nr == 0) { |
3367 | /* make sure the mapping tag for page dirty gets cleared */ | 3425 | /* make sure the mapping tag for page dirty gets cleared */ |
3368 | set_page_writeback(page); | 3426 | set_page_writeback(page); |
@@ -3373,12 +3431,10 @@ done: | |||
3373 | end_extent_writepage(page, ret, start, page_end); | 3431 | end_extent_writepage(page, ret, start, page_end); |
3374 | } | 3432 | } |
3375 | unlock_page(page); | 3433 | unlock_page(page); |
3434 | return ret; | ||
3376 | 3435 | ||
3377 | done_unlocked: | 3436 | done_unlocked: |
3378 | 3437 | return 0; | |
3379 | /* drop our reference on any cached states */ | ||
3380 | free_extent_state(cached_state); | ||
3381 | return ret; | ||
3382 | } | 3438 | } |
3383 | 3439 | ||
3384 | static int eb_wait(void *word) | 3440 | static int eb_wait(void *word) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a462da1a3e6a..992aae6c00b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -125,7 +125,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | |||
125 | * the btree. The caller should have done a btrfs_drop_extents so that | 125 | * the btree. The caller should have done a btrfs_drop_extents so that |
126 | * no overlapping inline items exist in the btree | 126 | * no overlapping inline items exist in the btree |
127 | */ | 127 | */ |
128 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | 128 | static int insert_inline_extent(struct btrfs_trans_handle *trans, |
129 | struct btrfs_path *path, int extent_inserted, | 129 | struct btrfs_path *path, int extent_inserted, |
130 | struct btrfs_root *root, struct inode *inode, | 130 | struct btrfs_root *root, struct inode *inode, |
131 | u64 start, size_t size, size_t compressed_size, | 131 | u64 start, size_t size, size_t compressed_size, |