diff options
author | Mingming Cao <cmm@us.ibm.com> | 2008-07-11 19:27:31 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-07-11 19:27:31 -0400 |
commit | 632eaeab1feb5d78c1e2bfb1d2dfc0ebb8ac187f (patch) | |
tree | f20c438da1e05dccc4c70083c635a20da139e116 /fs/ext4/inode.c | |
parent | f0e6c98593eb8a77edb7dd0edb22bb9f9368c567 (diff) |
ext4: fix delalloc i_disksize early update issue
Ext4_da_write_end() used walk_page_buffers() with a callback function of
ext4_bh_unmapped_or_delay() to check if it extended the file size
without allocating any blocks (since in this case i_disksize needs to be
updated). However, this is didn't work proprely because the buffer head
has not been marked dirty yet --- this is done later in
block_commit_write() --- which caused ext4_bh_unmapped_or_delay() to
always return false.
In addition, walk_page_buffers() checks all of the buffer heads covering
the page, and the only buffer_head that should be checked is the one
covering the end of the write. Otherwise, given a 1k blocksize
filesystem and a 4k page size, the buffer head covering the first 1k
stripe of the file could be unmapped (because it was a sparse file), and
the second or third buffer_head covering that page could be mapped, and
using walk_page_buffers() would fail in this case since it would stop at
the first unmapped buffer_head and return true.
The core problem is that walk_page_buffers() was intended to do work in
a callback function, and a non-zero return value indicated a failure,
which termined the walk of the buffer heads covering the page. It was
not intended to be used with a boolean function, such as
ext4_bh_unmapped_or_delay().
Add addtional fix from Aneesh to protect i_disksize update rave with truncate.
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 63 |
1 files changed, 47 insertions, 16 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ce47847bb37..0fbe678d40b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2298,6 +2298,29 @@ out: | |||
2298 | return ret; | 2298 | return ret; |
2299 | } | 2299 | } |
2300 | 2300 | ||
2301 | /* | ||
2302 | * Check if we should update i_disksize | ||
2303 | * when write to the end of file but not require block allocation | ||
2304 | */ | ||
2305 | static int ext4_da_should_update_i_disksize(struct page *page, | ||
2306 | unsigned long offset) | ||
2307 | { | ||
2308 | struct buffer_head *bh; | ||
2309 | struct inode *inode = page->mapping->host; | ||
2310 | unsigned int idx; | ||
2311 | int i; | ||
2312 | |||
2313 | bh = page_buffers(page); | ||
2314 | idx = offset >> inode->i_blkbits; | ||
2315 | |||
2316 | for (i=0; i < idx; i++) | ||
2317 | bh = bh->b_this_page; | ||
2318 | |||
2319 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | ||
2320 | return 0; | ||
2321 | return 1; | ||
2322 | } | ||
2323 | |||
2301 | static int ext4_da_write_end(struct file *file, | 2324 | static int ext4_da_write_end(struct file *file, |
2302 | struct address_space *mapping, | 2325 | struct address_space *mapping, |
2303 | loff_t pos, unsigned len, unsigned copied, | 2326 | loff_t pos, unsigned len, unsigned copied, |
@@ -2307,6 +2330,10 @@ static int ext4_da_write_end(struct file *file, | |||
2307 | int ret = 0, ret2; | 2330 | int ret = 0, ret2; |
2308 | handle_t *handle = ext4_journal_current_handle(); | 2331 | handle_t *handle = ext4_journal_current_handle(); |
2309 | loff_t new_i_size; | 2332 | loff_t new_i_size; |
2333 | unsigned long start, end; | ||
2334 | |||
2335 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
2336 | end = start + copied -1; | ||
2310 | 2337 | ||
2311 | /* | 2338 | /* |
2312 | * generic_write_end() will run mark_inode_dirty() if i_size | 2339 | * generic_write_end() will run mark_inode_dirty() if i_size |
@@ -2315,18 +2342,23 @@ static int ext4_da_write_end(struct file *file, | |||
2315 | */ | 2342 | */ |
2316 | 2343 | ||
2317 | new_i_size = pos + copied; | 2344 | new_i_size = pos + copied; |
2318 | if (new_i_size > EXT4_I(inode)->i_disksize) | 2345 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
2319 | if (!walk_page_buffers(NULL, page_buffers(page), | 2346 | if (ext4_da_should_update_i_disksize(page, end)) { |
2320 | 0, len, NULL, ext4_bh_unmapped_or_delay)){ | 2347 | down_write(&EXT4_I(inode)->i_data_sem); |
2321 | /* | 2348 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
2322 | * Updating i_disksize when extending file without | 2349 | /* |
2323 | * needing block allocation | 2350 | * Updating i_disksize when extending file |
2324 | */ | 2351 | * without needing block allocation |
2325 | if (ext4_should_order_data(inode)) | 2352 | */ |
2326 | ret = ext4_jbd2_file_inode(handle, inode); | 2353 | if (ext4_should_order_data(inode)) |
2354 | ret = ext4_jbd2_file_inode(handle, | ||
2355 | inode); | ||
2327 | 2356 | ||
2328 | EXT4_I(inode)->i_disksize = new_i_size; | 2357 | EXT4_I(inode)->i_disksize = new_i_size; |
2358 | } | ||
2359 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2329 | } | 2360 | } |
2361 | } | ||
2330 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2362 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
2331 | page, fsdata); | 2363 | page, fsdata); |
2332 | copied = ret2; | 2364 | copied = ret2; |
@@ -3394,6 +3426,11 @@ void ext4_truncate(struct inode *inode) | |||
3394 | goto out_stop; | 3426 | goto out_stop; |
3395 | 3427 | ||
3396 | /* | 3428 | /* |
3429 | * From here we block out all ext4_get_block() callers who want to | ||
3430 | * modify the block allocation tree. | ||
3431 | */ | ||
3432 | down_write(&ei->i_data_sem); | ||
3433 | /* | ||
3397 | * The orphan list entry will now protect us from any crash which | 3434 | * The orphan list entry will now protect us from any crash which |
3398 | * occurs before the truncate completes, so it is now safe to propagate | 3435 | * occurs before the truncate completes, so it is now safe to propagate |
3399 | * the new, shorter inode size (held for now in i_size) into the | 3436 | * the new, shorter inode size (held for now in i_size) into the |
@@ -3402,12 +3439,6 @@ void ext4_truncate(struct inode *inode) | |||
3402 | */ | 3439 | */ |
3403 | ei->i_disksize = inode->i_size; | 3440 | ei->i_disksize = inode->i_size; |
3404 | 3441 | ||
3405 | /* | ||
3406 | * From here we block out all ext4_get_block() callers who want to | ||
3407 | * modify the block allocation tree. | ||
3408 | */ | ||
3409 | down_write(&ei->i_data_sem); | ||
3410 | |||
3411 | if (n == 1) { /* direct blocks */ | 3442 | if (n == 1) { /* direct blocks */ |
3412 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 3443 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
3413 | i_data + EXT4_NDIR_BLOCKS); | 3444 | i_data + EXT4_NDIR_BLOCKS); |