diff options
-rw-r--r-- | fs/ext4/inode.c | 169 |
1 files changed, 124 insertions, 45 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 24518b57733e..ce47847bb37d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2003,11 +2003,15 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2003 | handle_t *handle = NULL; | 2003 | handle_t *handle = NULL; |
2004 | 2004 | ||
2005 | handle = ext4_journal_current_handle(); | 2005 | handle = ext4_journal_current_handle(); |
2006 | BUG_ON(handle == NULL); | 2006 | if (!handle) { |
2007 | BUG_ON(create == 0); | 2007 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, |
2008 | 2008 | bh_result, 0, 0, 0); | |
2009 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | 2009 | BUG_ON(!ret); |
2010 | } else { | ||
2011 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2010 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | 2012 | bh_result, create, 0, EXT4_DELALLOC_RSVED); |
2013 | } | ||
2014 | |||
2011 | if (ret > 0) { | 2015 | if (ret > 0) { |
2012 | bh_result->b_size = (ret << inode->i_blkbits); | 2016 | bh_result->b_size = (ret << inode->i_blkbits); |
2013 | 2017 | ||
@@ -2040,15 +2044,37 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2040 | 2044 | ||
2041 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2045 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) |
2042 | { | 2046 | { |
2043 | return !buffer_mapped(bh) || buffer_delay(bh); | 2047 | /* |
2048 | * unmapped buffer is possible for holes. | ||
2049 | * delay buffer is possible with delayed allocation | ||
2050 | */ | ||
2051 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | ||
2052 | } | ||
2053 | |||
2054 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | ||
2055 | struct buffer_head *bh_result, int create) | ||
2056 | { | ||
2057 | int ret = 0; | ||
2058 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2059 | |||
2060 | /* | ||
2061 | * we don't want to do block allocation in writepage | ||
2062 | * so call get_block_wrap with create = 0 | ||
2063 | */ | ||
2064 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | ||
2065 | bh_result, 0, 0, 0); | ||
2066 | if (ret > 0) { | ||
2067 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2068 | ret = 0; | ||
2069 | } | ||
2070 | return ret; | ||
2044 | } | 2071 | } |
2045 | 2072 | ||
2046 | /* | 2073 | /* |
2047 | * get called vi ext4_da_writepages after taking page lock | 2074 | * get called vi ext4_da_writepages after taking page lock (have journal handle) |
2048 | * We may end up doing block allocation here in case | 2075 | * get called via journal_submit_inode_data_buffers (no journal handle) |
2049 | * mpage_da_map_blocks failed to allocate blocks. | 2076 | * get called via shrink_page_list via pdflush (no journal handle) |
2050 | * | 2077 | * or grab_page_cache when doing write_begin (have journal handle) |
2051 | * We also get called via journal_submit_inode_data_buffers | ||
2052 | */ | 2078 | */ |
2053 | static int ext4_da_writepage(struct page *page, | 2079 | static int ext4_da_writepage(struct page *page, |
2054 | struct writeback_control *wbc) | 2080 | struct writeback_control *wbc) |
@@ -2056,37 +2082,61 @@ static int ext4_da_writepage(struct page *page, | |||
2056 | int ret = 0; | 2082 | int ret = 0; |
2057 | loff_t size; | 2083 | loff_t size; |
2058 | unsigned long len; | 2084 | unsigned long len; |
2059 | handle_t *handle = NULL; | ||
2060 | struct buffer_head *page_bufs; | 2085 | struct buffer_head *page_bufs; |
2061 | struct inode *inode = page->mapping->host; | 2086 | struct inode *inode = page->mapping->host; |
2062 | 2087 | ||
2063 | handle = ext4_journal_current_handle(); | 2088 | size = i_size_read(inode); |
2064 | if (!handle) { | 2089 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2065 | /* | 2090 | len = size & ~PAGE_CACHE_MASK; |
2066 | * This can happen when we aren't called via | 2091 | else |
2067 | * ext4_da_writepages() but directly (shrink_page_list). | 2092 | len = PAGE_CACHE_SIZE; |
2068 | * We cannot easily start a transaction here so we just skip | ||
2069 | * writing the page in case we would have to do so. | ||
2070 | * We reach here also via journal_submit_inode_data_buffers | ||
2071 | */ | ||
2072 | size = i_size_read(inode); | ||
2073 | 2093 | ||
2094 | if (page_has_buffers(page)) { | ||
2074 | page_bufs = page_buffers(page); | 2095 | page_bufs = page_buffers(page); |
2075 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2096 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2076 | len = size & ~PAGE_CACHE_MASK; | 2097 | ext4_bh_unmapped_or_delay)) { |
2077 | else | ||
2078 | len = PAGE_CACHE_SIZE; | ||
2079 | |||
2080 | if (walk_page_buffers(NULL, page_bufs, 0, | ||
2081 | len, NULL, ext4_bh_unmapped_or_delay)) { | ||
2082 | /* | 2098 | /* |
2083 | * We can't do block allocation under | 2099 | * We don't want to do block allocation |
2084 | * page lock without a handle . So redirty | 2100 | * So redirty the page and return |
2085 | * the page and return | ||
2086 | * We may reach here when we do a journal commit | 2101 | * We may reach here when we do a journal commit |
2087 | * via journal_submit_inode_data_buffers. | 2102 | * via journal_submit_inode_data_buffers. |
2088 | * If we don't have mapping block we just ignore | 2103 | * If we don't have mapping block we just ignore |
2089 | * them | 2104 | * them. We can also reach here via shrink_page_list |
2105 | */ | ||
2106 | redirty_page_for_writepage(wbc, page); | ||
2107 | unlock_page(page); | ||
2108 | return 0; | ||
2109 | } | ||
2110 | } else { | ||
2111 | /* | ||
2112 | * The test for page_has_buffers() is subtle: | ||
2113 | * We know the page is dirty but it lost buffers. That means | ||
2114 | * that at some moment in time after write_begin()/write_end() | ||
2115 | * has been called all buffers have been clean and thus they | ||
2116 | * must have been written at least once. So they are all | ||
2117 | * mapped and we can happily proceed with mapping them | ||
2118 | * and writing the page. | ||
2119 | * | ||
2120 | * Try to initialize the buffer_heads and check whether | ||
2121 | * all are mapped and non delay. We don't want to | ||
2122 | * do block allocation here. | ||
2123 | */ | ||
2124 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
2125 | ext4_normal_get_block_write); | ||
2126 | if (!ret) { | ||
2127 | page_bufs = page_buffers(page); | ||
2128 | /* check whether all are mapped and non delay */ | ||
2129 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2130 | ext4_bh_unmapped_or_delay)) { | ||
2131 | redirty_page_for_writepage(wbc, page); | ||
2132 | unlock_page(page); | ||
2133 | return 0; | ||
2134 | } | ||
2135 | } else { | ||
2136 | /* | ||
2137 | * We can't do block allocation here | ||
2138 | * so just redity the page and unlock | ||
2139 | * and return | ||
2090 | */ | 2140 | */ |
2091 | redirty_page_for_writepage(wbc, page); | 2141 | redirty_page_for_writepage(wbc, page); |
2092 | unlock_page(page); | 2142 | unlock_page(page); |
@@ -2095,9 +2145,11 @@ static int ext4_da_writepage(struct page *page, | |||
2095 | } | 2145 | } |
2096 | 2146 | ||
2097 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2147 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2098 | ret = nobh_writepage(page, ext4_da_get_block_write, wbc); | 2148 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); |
2099 | else | 2149 | else |
2100 | ret = block_write_full_page(page, ext4_da_get_block_write, wbc); | 2150 | ret = block_write_full_page(page, |
2151 | ext4_normal_get_block_write, | ||
2152 | wbc); | ||
2101 | 2153 | ||
2102 | return ret; | 2154 | return ret; |
2103 | } | 2155 | } |
@@ -2438,12 +2490,14 @@ static int __ext4_normal_writepage(struct page *page, | |||
2438 | struct inode *inode = page->mapping->host; | 2490 | struct inode *inode = page->mapping->host; |
2439 | 2491 | ||
2440 | if (test_opt(inode->i_sb, NOBH)) | 2492 | if (test_opt(inode->i_sb, NOBH)) |
2441 | return nobh_writepage(page, ext4_get_block, wbc); | 2493 | return nobh_writepage(page, |
2494 | ext4_normal_get_block_write, wbc); | ||
2442 | else | 2495 | else |
2443 | return block_write_full_page(page, ext4_get_block, wbc); | 2496 | return block_write_full_page(page, |
2497 | ext4_normal_get_block_write, | ||
2498 | wbc); | ||
2444 | } | 2499 | } |
2445 | 2500 | ||
2446 | |||
2447 | static int ext4_normal_writepage(struct page *page, | 2501 | static int ext4_normal_writepage(struct page *page, |
2448 | struct writeback_control *wbc) | 2502 | struct writeback_control *wbc) |
2449 | { | 2503 | { |
@@ -2452,13 +2506,24 @@ static int ext4_normal_writepage(struct page *page, | |||
2452 | loff_t len; | 2506 | loff_t len; |
2453 | 2507 | ||
2454 | J_ASSERT(PageLocked(page)); | 2508 | J_ASSERT(PageLocked(page)); |
2455 | J_ASSERT(page_has_buffers(page)); | ||
2456 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2509 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2457 | len = size & ~PAGE_CACHE_MASK; | 2510 | len = size & ~PAGE_CACHE_MASK; |
2458 | else | 2511 | else |
2459 | len = PAGE_CACHE_SIZE; | 2512 | len = PAGE_CACHE_SIZE; |
2460 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 2513 | |
2461 | ext4_bh_unmapped_or_delay)); | 2514 | if (page_has_buffers(page)) { |
2515 | /* if page has buffers it should all be mapped | ||
2516 | * and allocated. If there are not buffers attached | ||
2517 | * to the page we know the page is dirty but it lost | ||
2518 | * buffers. That means that at some moment in time | ||
2519 | * after write_begin() / write_end() has been called | ||
2520 | * all buffers have been clean and thus they must have been | ||
2521 | * written at least once. So they are all mapped and we can | ||
2522 | * happily proceed with mapping them and writing the page. | ||
2523 | */ | ||
2524 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
2525 | ext4_bh_unmapped_or_delay)); | ||
2526 | } | ||
2462 | 2527 | ||
2463 | if (!ext4_journal_current_handle()) | 2528 | if (!ext4_journal_current_handle()) |
2464 | return __ext4_normal_writepage(page, wbc); | 2529 | return __ext4_normal_writepage(page, wbc); |
@@ -2478,7 +2543,8 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2478 | int ret = 0; | 2543 | int ret = 0; |
2479 | int err; | 2544 | int err; |
2480 | 2545 | ||
2481 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext4_get_block); | 2546 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
2547 | ext4_normal_get_block_write); | ||
2482 | if (ret != 0) | 2548 | if (ret != 0) |
2483 | goto out_unlock; | 2549 | goto out_unlock; |
2484 | 2550 | ||
@@ -2525,13 +2591,24 @@ static int ext4_journalled_writepage(struct page *page, | |||
2525 | loff_t len; | 2591 | loff_t len; |
2526 | 2592 | ||
2527 | J_ASSERT(PageLocked(page)); | 2593 | J_ASSERT(PageLocked(page)); |
2528 | J_ASSERT(page_has_buffers(page)); | ||
2529 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2594 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2530 | len = size & ~PAGE_CACHE_MASK; | 2595 | len = size & ~PAGE_CACHE_MASK; |
2531 | else | 2596 | else |
2532 | len = PAGE_CACHE_SIZE; | 2597 | len = PAGE_CACHE_SIZE; |
2533 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 2598 | |
2534 | ext4_bh_unmapped_or_delay)); | 2599 | if (page_has_buffers(page)) { |
2600 | /* if page has buffers it should all be mapped | ||
2601 | * and allocated. If there are not buffers attached | ||
2602 | * to the page we know the page is dirty but it lost | ||
2603 | * buffers. That means that at some moment in time | ||
2604 | * after write_begin() / write_end() has been called | ||
2605 | * all buffers have been clean and thus they must have been | ||
2606 | * written at least once. So they are all mapped and we can | ||
2607 | * happily proceed with mapping them and writing the page. | ||
2608 | */ | ||
2609 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
2610 | ext4_bh_unmapped_or_delay)); | ||
2611 | } | ||
2535 | 2612 | ||
2536 | if (ext4_journal_current_handle()) | 2613 | if (ext4_journal_current_handle()) |
2537 | goto no_write; | 2614 | goto no_write; |
@@ -2549,7 +2626,9 @@ static int ext4_journalled_writepage(struct page *page, | |||
2549 | * really know unless we go poke around in the buffer_heads. | 2626 | * really know unless we go poke around in the buffer_heads. |
2550 | * But block_write_full_page will do the right thing. | 2627 | * But block_write_full_page will do the right thing. |
2551 | */ | 2628 | */ |
2552 | return block_write_full_page(page, ext4_get_block, wbc); | 2629 | return block_write_full_page(page, |
2630 | ext4_normal_get_block_write, | ||
2631 | wbc); | ||
2553 | } | 2632 | } |
2554 | no_write: | 2633 | no_write: |
2555 | redirty_page_for_writepage(wbc, page); | 2634 | redirty_page_for_writepage(wbc, page); |