aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-10-08 23:13:30 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-10-08 23:13:30 -0400
commit79f0be8d2e6ebde27dfb3beff18eb689d5c4e36c (patch)
tree7ddadff1a3f0f4f6814104aed42f20c52a7f5f24 /fs/ext4/inode.c
parent6bc6e63fcd7dac9e633ea29f1fddd9580ab28f3f (diff)
ext4: Switch to non delalloc mode when we are low on free blocks count.
The delayed allocation code allocates blocks during writepages(), which can not handle block allocation failures. To deal with this, we switch away from delayed allocation mode when we are running low on free blocks. This also allows us to avoid needing to reserve a large number of meta-data blocks in case all of the requested blocks are discontiguous. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c52
1 files changed, 50 insertions, 2 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7875a2dd54b9..b1a6a7373f07 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2458,6 +2458,33 @@ out_writepages:
2458 return ret; 2458 return ret;
2459} 2459}
2460 2460
2461#define FALL_BACK_TO_NONDELALLOC 1
2462static int ext4_nonda_switch(struct super_block *sb)
2463{
2464 s64 free_blocks, dirty_blocks;
2465 struct ext4_sb_info *sbi = EXT4_SB(sb);
2466
2467 /*
2468 * switch to non delalloc mode if we are running low
2469 * on free block. The free block accounting via percpu
2470 * counters can get slightly wrong with FBC_BATCH getting
2471 * accumulated on each CPU without updating global counters
2472 * Delalloc need an accurate free block accounting. So switch
2473 * to non delalloc when we are near to error range.
2474 */
2475 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
2476 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
2477 if (2 * free_blocks < 3 * dirty_blocks ||
2478 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2479 /*
2480 * free block count is less that 150% of dirty blocks
2481 * or free blocks is less that watermark
2482 */
2483 return 1;
2484 }
2485 return 0;
2486}
2487
2461static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2488static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2462 loff_t pos, unsigned len, unsigned flags, 2489 loff_t pos, unsigned len, unsigned flags,
2463 struct page **pagep, void **fsdata) 2490 struct page **pagep, void **fsdata)
@@ -2472,6 +2499,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2472 index = pos >> PAGE_CACHE_SHIFT; 2499 index = pos >> PAGE_CACHE_SHIFT;
2473 from = pos & (PAGE_CACHE_SIZE - 1); 2500 from = pos & (PAGE_CACHE_SIZE - 1);
2474 to = from + len; 2501 to = from + len;
2502
2503 if (ext4_nonda_switch(inode->i_sb)) {
2504 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2505 return ext4_write_begin(file, mapping, pos,
2506 len, flags, pagep, fsdata);
2507 }
2508 *fsdata = (void *)0;
2475retry: 2509retry:
2476 /* 2510 /*
2477 * With delayed allocation, we don't log the i_disksize update 2511 * With delayed allocation, we don't log the i_disksize update
@@ -2540,6 +2574,19 @@ static int ext4_da_write_end(struct file *file,
2540 handle_t *handle = ext4_journal_current_handle(); 2574 handle_t *handle = ext4_journal_current_handle();
2541 loff_t new_i_size; 2575 loff_t new_i_size;
2542 unsigned long start, end; 2576 unsigned long start, end;
2577 int write_mode = (int)(unsigned long)fsdata;
2578
2579 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2580 if (ext4_should_order_data(inode)) {
2581 return ext4_ordered_write_end(file, mapping, pos,
2582 len, copied, page, fsdata);
2583 } else if (ext4_should_writeback_data(inode)) {
2584 return ext4_writeback_write_end(file, mapping, pos,
2585 len, copied, page, fsdata);
2586 } else {
2587 BUG();
2588 }
2589 }
2543 2590
2544 start = pos & (PAGE_CACHE_SIZE - 1); 2591 start = pos & (PAGE_CACHE_SIZE - 1);
2545 end = start + copied - 1; 2592 end = start + copied - 1;
@@ -4877,6 +4924,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4877 loff_t size; 4924 loff_t size;
4878 unsigned long len; 4925 unsigned long len;
4879 int ret = -EINVAL; 4926 int ret = -EINVAL;
4927 void *fsdata;
4880 struct file *file = vma->vm_file; 4928 struct file *file = vma->vm_file;
4881 struct inode *inode = file->f_path.dentry->d_inode; 4929 struct inode *inode = file->f_path.dentry->d_inode;
4882 struct address_space *mapping = inode->i_mapping; 4930 struct address_space *mapping = inode->i_mapping;
@@ -4915,11 +4963,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4915 * on the same page though 4963 * on the same page though
4916 */ 4964 */
4917 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 4965 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
4918 len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 4966 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
4919 if (ret < 0) 4967 if (ret < 0)
4920 goto out_unlock; 4968 goto out_unlock;
4921 ret = mapping->a_ops->write_end(file, mapping, page_offset(page), 4969 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
4922 len, len, page, NULL); 4970 len, len, page, fsdata);
4923 if (ret < 0) 4971 if (ret < 0)
4924 goto out_unlock; 4972 goto out_unlock;
4925 ret = 0; 4973 ret = 0;