diff options
-rw-r--r-- | fs/ext4/inode.c | 52 |
1 files changed, 50 insertions, 2 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7875a2dd54b9..b1a6a7373f07 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2458,6 +2458,33 @@ out_writepages: | |||
2458 | return ret; | 2458 | return ret; |
2459 | } | 2459 | } |
2460 | 2460 | ||
2461 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
2462 | static int ext4_nonda_switch(struct super_block *sb) | ||
2463 | { | ||
2464 | s64 free_blocks, dirty_blocks; | ||
2465 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2466 | |||
2467 | /* | ||
2468 | * switch to non delalloc mode if we are running low | ||
2469 | * on free block. The free block accounting via percpu | ||
2470 | * counters can get slightly wrong with FBC_BATCH getting | ||
2471 | * accumulated on each CPU without updating global counters | ||
2472 | * Delalloc need an accurate free block accounting. So switch | ||
2473 | * to non delalloc when we are near to error range. | ||
2474 | */ | ||
2475 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
2476 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | ||
2477 | if (2 * free_blocks < 3 * dirty_blocks || | ||
2478 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | ||
2479 | /* | ||
2480 | * free block count is less that 150% of dirty blocks | ||
2481 | * or free blocks is less that watermark | ||
2482 | */ | ||
2483 | return 1; | ||
2484 | } | ||
2485 | return 0; | ||
2486 | } | ||
2487 | |||
2461 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2488 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2462 | loff_t pos, unsigned len, unsigned flags, | 2489 | loff_t pos, unsigned len, unsigned flags, |
2463 | struct page **pagep, void **fsdata) | 2490 | struct page **pagep, void **fsdata) |
@@ -2472,6 +2499,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2472 | index = pos >> PAGE_CACHE_SHIFT; | 2499 | index = pos >> PAGE_CACHE_SHIFT; |
2473 | from = pos & (PAGE_CACHE_SIZE - 1); | 2500 | from = pos & (PAGE_CACHE_SIZE - 1); |
2474 | to = from + len; | 2501 | to = from + len; |
2502 | |||
2503 | if (ext4_nonda_switch(inode->i_sb)) { | ||
2504 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | ||
2505 | return ext4_write_begin(file, mapping, pos, | ||
2506 | len, flags, pagep, fsdata); | ||
2507 | } | ||
2508 | *fsdata = (void *)0; | ||
2475 | retry: | 2509 | retry: |
2476 | /* | 2510 | /* |
2477 | * With delayed allocation, we don't log the i_disksize update | 2511 | * With delayed allocation, we don't log the i_disksize update |
@@ -2540,6 +2574,19 @@ static int ext4_da_write_end(struct file *file, | |||
2540 | handle_t *handle = ext4_journal_current_handle(); | 2574 | handle_t *handle = ext4_journal_current_handle(); |
2541 | loff_t new_i_size; | 2575 | loff_t new_i_size; |
2542 | unsigned long start, end; | 2576 | unsigned long start, end; |
2577 | int write_mode = (int)(unsigned long)fsdata; | ||
2578 | |||
2579 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | ||
2580 | if (ext4_should_order_data(inode)) { | ||
2581 | return ext4_ordered_write_end(file, mapping, pos, | ||
2582 | len, copied, page, fsdata); | ||
2583 | } else if (ext4_should_writeback_data(inode)) { | ||
2584 | return ext4_writeback_write_end(file, mapping, pos, | ||
2585 | len, copied, page, fsdata); | ||
2586 | } else { | ||
2587 | BUG(); | ||
2588 | } | ||
2589 | } | ||
2543 | 2590 | ||
2544 | start = pos & (PAGE_CACHE_SIZE - 1); | 2591 | start = pos & (PAGE_CACHE_SIZE - 1); |
2545 | end = start + copied - 1; | 2592 | end = start + copied - 1; |
@@ -4877,6 +4924,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4877 | loff_t size; | 4924 | loff_t size; |
4878 | unsigned long len; | 4925 | unsigned long len; |
4879 | int ret = -EINVAL; | 4926 | int ret = -EINVAL; |
4927 | void *fsdata; | ||
4880 | struct file *file = vma->vm_file; | 4928 | struct file *file = vma->vm_file; |
4881 | struct inode *inode = file->f_path.dentry->d_inode; | 4929 | struct inode *inode = file->f_path.dentry->d_inode; |
4882 | struct address_space *mapping = inode->i_mapping; | 4930 | struct address_space *mapping = inode->i_mapping; |
@@ -4915,11 +4963,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4915 | * on the same page though | 4963 | * on the same page though |
4916 | */ | 4964 | */ |
4917 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 4965 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), |
4918 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 4966 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); |
4919 | if (ret < 0) | 4967 | if (ret < 0) |
4920 | goto out_unlock; | 4968 | goto out_unlock; |
4921 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | 4969 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), |
4922 | len, len, page, NULL); | 4970 | len, len, page, fsdata); |
4923 | if (ret < 0) | 4971 | if (ret < 0) |
4924 | goto out_unlock; | 4972 | goto out_unlock; |
4925 | ret = 0; | 4973 | ret = 0; |