aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commit632eaeab1feb5d78c1e2bfb1d2dfc0ebb8ac187f (patch)
treef20c438da1e05dccc4c70083c635a20da139e116 /fs
parentf0e6c98593eb8a77edb7dd0edb22bb9f9368c567 (diff)
ext4: fix delalloc i_disksize early update issue
Ext4_da_write_end() used walk_page_buffers() with a callback function of ext4_bh_unmapped_or_delay() to check if it extended the file size without allocating any blocks (since in this case i_disksize needs to be updated). However, this is didn't work proprely because the buffer head has not been marked dirty yet --- this is done later in block_commit_write() --- which caused ext4_bh_unmapped_or_delay() to always return false. In addition, walk_page_buffers() checks all of the buffer heads covering the page, and the only buffer_head that should be checked is the one covering the end of the write. Otherwise, given a 1k blocksize filesystem and a 4k page size, the buffer head covering the first 1k stripe of the file could be unmapped (because it was a sparse file), and the second or third buffer_head covering that page could be mapped, and using walk_page_buffers() would fail in this case since it would stop at the first unmapped buffer_head and return true. The core problem is that walk_page_buffers() was intended to do work in a callback function, and a non-zero return value indicated a failure, which termined the walk of the buffer heads covering the page. It was not intended to be used with a boolean function, such as ext4_bh_unmapped_or_delay(). Add addtional fix from Aneesh to protect i_disksize update rave with truncate. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/inode.c63
1 files changed, 47 insertions, 16 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ce47847bb37d..0fbe678d40bb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2298,6 +2298,29 @@ out:
2298 return ret; 2298 return ret;
2299} 2299}
2300 2300
2301/*
2302 * Check if we should update i_disksize
2303 * when write to the end of file but not require block allocation
2304 */
2305static int ext4_da_should_update_i_disksize(struct page *page,
2306 unsigned long offset)
2307{
2308 struct buffer_head *bh;
2309 struct inode *inode = page->mapping->host;
2310 unsigned int idx;
2311 int i;
2312
2313 bh = page_buffers(page);
2314 idx = offset >> inode->i_blkbits;
2315
2316 for (i=0; i < idx; i++)
2317 bh = bh->b_this_page;
2318
2319 if (!buffer_mapped(bh) || (buffer_delay(bh)))
2320 return 0;
2321 return 1;
2322}
2323
2301static int ext4_da_write_end(struct file *file, 2324static int ext4_da_write_end(struct file *file,
2302 struct address_space *mapping, 2325 struct address_space *mapping,
2303 loff_t pos, unsigned len, unsigned copied, 2326 loff_t pos, unsigned len, unsigned copied,
@@ -2307,6 +2330,10 @@ static int ext4_da_write_end(struct file *file,
2307 int ret = 0, ret2; 2330 int ret = 0, ret2;
2308 handle_t *handle = ext4_journal_current_handle(); 2331 handle_t *handle = ext4_journal_current_handle();
2309 loff_t new_i_size; 2332 loff_t new_i_size;
2333 unsigned long start, end;
2334
2335 start = pos & (PAGE_CACHE_SIZE - 1);
2336 end = start + copied -1;
2310 2337
2311 /* 2338 /*
2312 * generic_write_end() will run mark_inode_dirty() if i_size 2339 * generic_write_end() will run mark_inode_dirty() if i_size
@@ -2315,18 +2342,23 @@ static int ext4_da_write_end(struct file *file,
2315 */ 2342 */
2316 2343
2317 new_i_size = pos + copied; 2344 new_i_size = pos + copied;
2318 if (new_i_size > EXT4_I(inode)->i_disksize) 2345 if (new_i_size > EXT4_I(inode)->i_disksize) {
2319 if (!walk_page_buffers(NULL, page_buffers(page), 2346 if (ext4_da_should_update_i_disksize(page, end)) {
2320 0, len, NULL, ext4_bh_unmapped_or_delay)){ 2347 down_write(&EXT4_I(inode)->i_data_sem);
2321 /* 2348 if (new_i_size > EXT4_I(inode)->i_disksize) {
2322 * Updating i_disksize when extending file without 2349 /*
2323 * needing block allocation 2350 * Updating i_disksize when extending file
2324 */ 2351 * without needing block allocation
2325 if (ext4_should_order_data(inode)) 2352 */
2326 ret = ext4_jbd2_file_inode(handle, inode); 2353 if (ext4_should_order_data(inode))
2354 ret = ext4_jbd2_file_inode(handle,
2355 inode);
2327 2356
2328 EXT4_I(inode)->i_disksize = new_i_size; 2357 EXT4_I(inode)->i_disksize = new_i_size;
2358 }
2359 up_write(&EXT4_I(inode)->i_data_sem);
2329 } 2360 }
2361 }
2330 ret2 = generic_write_end(file, mapping, pos, len, copied, 2362 ret2 = generic_write_end(file, mapping, pos, len, copied,
2331 page, fsdata); 2363 page, fsdata);
2332 copied = ret2; 2364 copied = ret2;
@@ -3394,6 +3426,11 @@ void ext4_truncate(struct inode *inode)
3394 goto out_stop; 3426 goto out_stop;
3395 3427
3396 /* 3428 /*
3429 * From here we block out all ext4_get_block() callers who want to
3430 * modify the block allocation tree.
3431 */
3432 down_write(&ei->i_data_sem);
3433 /*
3397 * The orphan list entry will now protect us from any crash which 3434 * The orphan list entry will now protect us from any crash which
3398 * occurs before the truncate completes, so it is now safe to propagate 3435 * occurs before the truncate completes, so it is now safe to propagate
3399 * the new, shorter inode size (held for now in i_size) into the 3436 * the new, shorter inode size (held for now in i_size) into the
@@ -3402,12 +3439,6 @@ void ext4_truncate(struct inode *inode)
3402 */ 3439 */
3403 ei->i_disksize = inode->i_size; 3440 ei->i_disksize = inode->i_size;
3404 3441
3405 /*
3406 * From here we block out all ext4_get_block() callers who want to
3407 * modify the block allocation tree.
3408 */
3409 down_write(&ei->i_data_sem);
3410
3411 if (n == 1) { /* direct blocks */ 3442 if (n == 1) { /* direct blocks */
3412 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 3443 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
3413 i_data + EXT4_NDIR_BLOCKS); 3444 i_data + EXT4_NDIR_BLOCKS);