aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commitf0e6c98593eb8a77edb7dd0edb22bb9f9368c567 (patch)
tree07c588e757d533988d230f43d9c794dbc4a58977 /fs
parentcd1aac32923a9c8adcc0ae85e33c1ca0c5855838 (diff)
ext4: Handle page without buffers in ext4_*_writepage()
It can happen that buffers are removed from the page before it gets marked dirty and then is passed to writepage(). In writepage() we just initialize the buffers and check whether they are mapped and non delay. If they are mapped and non delay we write the page. Otherwise we mark them dirty. With this change we don't do block allocation at all in ext4_*_write_page. writepage() can get called under many condition and with a locking order of journal_start -> lock_page, we should not try to allocate blocks in writepage() which get called after taking page lock. writepage() can get called via shrink_page_list even with a journal handle which was created for doing inode update. For example when doing ext4_da_write_begin we create a journal handle with credit 1 expecting a i_disksize update for the inode. But ext4_da_write_begin can cause shrink_page_list via _grab_page_cache. So having a valid handle via ext4_journal_current_handle is not a guarantee that we can use the handle for block allocation in writepage, since we shouldn't be using credits that had been reserved for other updates. That it could result in we running out of credits when we update inodes. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/inode.c169
1 files changed, 124 insertions, 45 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 24518b57733e..ce47847bb37d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2003,11 +2003,15 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2003 handle_t *handle = NULL; 2003 handle_t *handle = NULL;
2004 2004
2005 handle = ext4_journal_current_handle(); 2005 handle = ext4_journal_current_handle();
2006 BUG_ON(handle == NULL); 2006 if (!handle) {
2007 BUG_ON(create == 0); 2007 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2008 2008 bh_result, 0, 0, 0);
2009 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, 2009 BUG_ON(!ret);
2010 } else {
2011 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2010 bh_result, create, 0, EXT4_DELALLOC_RSVED); 2012 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2013 }
2014
2011 if (ret > 0) { 2015 if (ret > 0) {
2012 bh_result->b_size = (ret << inode->i_blkbits); 2016 bh_result->b_size = (ret << inode->i_blkbits);
2013 2017
@@ -2040,15 +2044,37 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2040 2044
2041static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2045static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2042{ 2046{
2043 return !buffer_mapped(bh) || buffer_delay(bh); 2047 /*
2048 * unmapped buffer is possible for holes.
2049 * delay buffer is possible with delayed allocation
2050 */
2051 return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
2052}
2053
2054static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
2055 struct buffer_head *bh_result, int create)
2056{
2057 int ret = 0;
2058 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2059
2060 /*
2061 * we don't want to do block allocation in writepage
2062 * so call get_block_wrap with create = 0
2063 */
2064 ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
2065 bh_result, 0, 0, 0);
2066 if (ret > 0) {
2067 bh_result->b_size = (ret << inode->i_blkbits);
2068 ret = 0;
2069 }
2070 return ret;
2044} 2071}
2045 2072
2046/* 2073/*
2047 * get called vi ext4_da_writepages after taking page lock 2074 * get called vi ext4_da_writepages after taking page lock (have journal handle)
2048 * We may end up doing block allocation here in case 2075 * get called via journal_submit_inode_data_buffers (no journal handle)
2049 * mpage_da_map_blocks failed to allocate blocks. 2076 * get called via shrink_page_list via pdflush (no journal handle)
2050 * 2077 * or grab_page_cache when doing write_begin (have journal handle)
2051 * We also get called via journal_submit_inode_data_buffers
2052 */ 2078 */
2053static int ext4_da_writepage(struct page *page, 2079static int ext4_da_writepage(struct page *page,
2054 struct writeback_control *wbc) 2080 struct writeback_control *wbc)
@@ -2056,37 +2082,61 @@ static int ext4_da_writepage(struct page *page,
2056 int ret = 0; 2082 int ret = 0;
2057 loff_t size; 2083 loff_t size;
2058 unsigned long len; 2084 unsigned long len;
2059 handle_t *handle = NULL;
2060 struct buffer_head *page_bufs; 2085 struct buffer_head *page_bufs;
2061 struct inode *inode = page->mapping->host; 2086 struct inode *inode = page->mapping->host;
2062 2087
2063 handle = ext4_journal_current_handle(); 2088 size = i_size_read(inode);
2064 if (!handle) { 2089 if (page->index == size >> PAGE_CACHE_SHIFT)
2065 /* 2090 len = size & ~PAGE_CACHE_MASK;
2066 * This can happen when we aren't called via 2091 else
2067 * ext4_da_writepages() but directly (shrink_page_list). 2092 len = PAGE_CACHE_SIZE;
2068 * We cannot easily start a transaction here so we just skip
2069 * writing the page in case we would have to do so.
2070 * We reach here also via journal_submit_inode_data_buffers
2071 */
2072 size = i_size_read(inode);
2073 2093
2094 if (page_has_buffers(page)) {
2074 page_bufs = page_buffers(page); 2095 page_bufs = page_buffers(page);
2075 if (page->index == size >> PAGE_CACHE_SHIFT) 2096 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2076 len = size & ~PAGE_CACHE_MASK; 2097 ext4_bh_unmapped_or_delay)) {
2077 else
2078 len = PAGE_CACHE_SIZE;
2079
2080 if (walk_page_buffers(NULL, page_bufs, 0,
2081 len, NULL, ext4_bh_unmapped_or_delay)) {
2082 /* 2098 /*
2083 * We can't do block allocation under 2099 * We don't want to do block allocation
2084 * page lock without a handle . So redirty 2100 * So redirty the page and return
2085 * the page and return
2086 * We may reach here when we do a journal commit 2101 * We may reach here when we do a journal commit
2087 * via journal_submit_inode_data_buffers. 2102 * via journal_submit_inode_data_buffers.
2088 * If we don't have mapping block we just ignore 2103 * If we don't have mapping block we just ignore
2089 * them 2104 * them. We can also reach here via shrink_page_list
2105 */
2106 redirty_page_for_writepage(wbc, page);
2107 unlock_page(page);
2108 return 0;
2109 }
2110 } else {
2111 /*
2112 * The test for page_has_buffers() is subtle:
2113 * We know the page is dirty but it lost buffers. That means
2114 * that at some moment in time after write_begin()/write_end()
2115 * has been called all buffers have been clean and thus they
2116 * must have been written at least once. So they are all
2117 * mapped and we can happily proceed with mapping them
2118 * and writing the page.
2119 *
2120 * Try to initialize the buffer_heads and check whether
2121 * all are mapped and non delay. We don't want to
2122 * do block allocation here.
2123 */
2124 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
2125 ext4_normal_get_block_write);
2126 if (!ret) {
2127 page_bufs = page_buffers(page);
2128 /* check whether all are mapped and non delay */
2129 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2130 ext4_bh_unmapped_or_delay)) {
2131 redirty_page_for_writepage(wbc, page);
2132 unlock_page(page);
2133 return 0;
2134 }
2135 } else {
2136 /*
2137 * We can't do block allocation here
2138 * so just redity the page and unlock
2139 * and return
2090 */ 2140 */
2091 redirty_page_for_writepage(wbc, page); 2141 redirty_page_for_writepage(wbc, page);
2092 unlock_page(page); 2142 unlock_page(page);
@@ -2095,9 +2145,11 @@ static int ext4_da_writepage(struct page *page,
2095 } 2145 }
2096 2146
2097 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2147 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2098 ret = nobh_writepage(page, ext4_da_get_block_write, wbc); 2148 ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
2099 else 2149 else
2100 ret = block_write_full_page(page, ext4_da_get_block_write, wbc); 2150 ret = block_write_full_page(page,
2151 ext4_normal_get_block_write,
2152 wbc);
2101 2153
2102 return ret; 2154 return ret;
2103} 2155}
@@ -2438,12 +2490,14 @@ static int __ext4_normal_writepage(struct page *page,
2438 struct inode *inode = page->mapping->host; 2490 struct inode *inode = page->mapping->host;
2439 2491
2440 if (test_opt(inode->i_sb, NOBH)) 2492 if (test_opt(inode->i_sb, NOBH))
2441 return nobh_writepage(page, ext4_get_block, wbc); 2493 return nobh_writepage(page,
2494 ext4_normal_get_block_write, wbc);
2442 else 2495 else
2443 return block_write_full_page(page, ext4_get_block, wbc); 2496 return block_write_full_page(page,
2497 ext4_normal_get_block_write,
2498 wbc);
2444} 2499}
2445 2500
2446
2447static int ext4_normal_writepage(struct page *page, 2501static int ext4_normal_writepage(struct page *page,
2448 struct writeback_control *wbc) 2502 struct writeback_control *wbc)
2449{ 2503{
@@ -2452,13 +2506,24 @@ static int ext4_normal_writepage(struct page *page,
2452 loff_t len; 2506 loff_t len;
2453 2507
2454 J_ASSERT(PageLocked(page)); 2508 J_ASSERT(PageLocked(page));
2455 J_ASSERT(page_has_buffers(page));
2456 if (page->index == size >> PAGE_CACHE_SHIFT) 2509 if (page->index == size >> PAGE_CACHE_SHIFT)
2457 len = size & ~PAGE_CACHE_MASK; 2510 len = size & ~PAGE_CACHE_MASK;
2458 else 2511 else
2459 len = PAGE_CACHE_SIZE; 2512 len = PAGE_CACHE_SIZE;
2460 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 2513
2461 ext4_bh_unmapped_or_delay)); 2514 if (page_has_buffers(page)) {
2515 /* if page has buffers it should all be mapped
2516 * and allocated. If there are not buffers attached
2517 * to the page we know the page is dirty but it lost
2518 * buffers. That means that at some moment in time
2519 * after write_begin() / write_end() has been called
2520 * all buffers have been clean and thus they must have been
2521 * written at least once. So they are all mapped and we can
2522 * happily proceed with mapping them and writing the page.
2523 */
2524 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
2525 ext4_bh_unmapped_or_delay));
2526 }
2462 2527
2463 if (!ext4_journal_current_handle()) 2528 if (!ext4_journal_current_handle())
2464 return __ext4_normal_writepage(page, wbc); 2529 return __ext4_normal_writepage(page, wbc);
@@ -2478,7 +2543,8 @@ static int __ext4_journalled_writepage(struct page *page,
2478 int ret = 0; 2543 int ret = 0;
2479 int err; 2544 int err;
2480 2545
2481 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext4_get_block); 2546 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
2547 ext4_normal_get_block_write);
2482 if (ret != 0) 2548 if (ret != 0)
2483 goto out_unlock; 2549 goto out_unlock;
2484 2550
@@ -2525,13 +2591,24 @@ static int ext4_journalled_writepage(struct page *page,
2525 loff_t len; 2591 loff_t len;
2526 2592
2527 J_ASSERT(PageLocked(page)); 2593 J_ASSERT(PageLocked(page));
2528 J_ASSERT(page_has_buffers(page));
2529 if (page->index == size >> PAGE_CACHE_SHIFT) 2594 if (page->index == size >> PAGE_CACHE_SHIFT)
2530 len = size & ~PAGE_CACHE_MASK; 2595 len = size & ~PAGE_CACHE_MASK;
2531 else 2596 else
2532 len = PAGE_CACHE_SIZE; 2597 len = PAGE_CACHE_SIZE;
2533 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 2598
2534 ext4_bh_unmapped_or_delay)); 2599 if (page_has_buffers(page)) {
2600 /* if page has buffers it should all be mapped
2601 * and allocated. If there are not buffers attached
2602 * to the page we know the page is dirty but it lost
2603 * buffers. That means that at some moment in time
2604 * after write_begin() / write_end() has been called
2605 * all buffers have been clean and thus they must have been
2606 * written at least once. So they are all mapped and we can
2607 * happily proceed with mapping them and writing the page.
2608 */
2609 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
2610 ext4_bh_unmapped_or_delay));
2611 }
2535 2612
2536 if (ext4_journal_current_handle()) 2613 if (ext4_journal_current_handle())
2537 goto no_write; 2614 goto no_write;
@@ -2549,7 +2626,9 @@ static int ext4_journalled_writepage(struct page *page,
2549 * really know unless we go poke around in the buffer_heads. 2626 * really know unless we go poke around in the buffer_heads.
2550 * But block_write_full_page will do the right thing. 2627 * But block_write_full_page will do the right thing.
2551 */ 2628 */
2552 return block_write_full_page(page, ext4_get_block, wbc); 2629 return block_write_full_page(page,
2630 ext4_normal_get_block_write,
2631 wbc);
2553 } 2632 }
2554no_write: 2633no_write:
2555 redirty_page_for_writepage(wbc, page); 2634 redirty_page_for_writepage(wbc, page);