diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 599 |
1 files changed, 209 insertions, 390 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4b8debeb3965..bdbe69902207 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -53,6 +53,7 @@ | |||
53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
54 | loff_t new_size) | 54 | loff_t new_size) |
55 | { | 55 | { |
56 | trace_ext4_begin_ordered_truncate(inode, new_size); | ||
56 | return jbd2_journal_begin_ordered_truncate( | 57 | return jbd2_journal_begin_ordered_truncate( |
57 | EXT4_SB(inode->i_sb)->s_journal, | 58 | EXT4_SB(inode->i_sb)->s_journal, |
58 | &EXT4_I(inode)->jinode, | 59 | &EXT4_I(inode)->jinode, |
@@ -60,6 +61,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
60 | } | 61 | } |
61 | 62 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 63 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
64 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
65 | struct buffer_head *bh_result, int create); | ||
66 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
67 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
68 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
69 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 70 | ||
64 | /* | 71 | /* |
65 | * Test whether an inode is a fast symlink. | 72 | * Test whether an inode is a fast symlink. |
@@ -172,6 +179,7 @@ void ext4_evict_inode(struct inode *inode) | |||
172 | handle_t *handle; | 179 | handle_t *handle; |
173 | int err; | 180 | int err; |
174 | 181 | ||
182 | trace_ext4_evict_inode(inode); | ||
175 | if (inode->i_nlink) { | 183 | if (inode->i_nlink) { |
176 | truncate_inode_pages(&inode->i_data, 0); | 184 | truncate_inode_pages(&inode->i_data, 0); |
177 | goto no_delete; | 185 | goto no_delete; |
@@ -755,6 +763,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
755 | * parent to disk. | 763 | * parent to disk. |
756 | */ | 764 | */ |
757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 765 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
766 | if (unlikely(!bh)) { | ||
767 | err = -EIO; | ||
768 | goto failed; | ||
769 | } | ||
770 | |||
758 | branch[n].bh = bh; | 771 | branch[n].bh = bh; |
759 | lock_buffer(bh); | 772 | lock_buffer(bh); |
760 | BUFFER_TRACE(bh, "call get_create_access"); | 773 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -1207,8 +1220,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1207 | break; | 1220 | break; |
1208 | idx++; | 1221 | idx++; |
1209 | num++; | 1222 | num++; |
1210 | if (num >= max_pages) | 1223 | if (num >= max_pages) { |
1224 | done = 1; | ||
1211 | break; | 1225 | break; |
1226 | } | ||
1212 | } | 1227 | } |
1213 | pagevec_release(&pvec); | 1228 | pagevec_release(&pvec); |
1214 | } | 1229 | } |
@@ -1538,10 +1553,10 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1553 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1539 | return 0; | 1554 | return 0; |
1540 | /* | 1555 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | 1556 | * __block_write_begin() could have dirtied some buffers. Clean |
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1557 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | 1558 | * otherwise about fs integrity issues. Setting of the dirty bit |
1544 | * by __block_prepare_write() isn't a real problem here as we clear | 1559 | * by __block_write_begin() isn't a real problem here as we clear |
1545 | * the bit before releasing a page lock and thus writeback cannot | 1560 | * the bit before releasing a page lock and thus writeback cannot |
1546 | * ever write the buffer. | 1561 | * ever write the buffer. |
1547 | */ | 1562 | */ |
@@ -1995,16 +2010,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1995 | * | 2010 | * |
1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2011 | * As pages are already locked by write_cache_pages(), we can't use it |
1997 | */ | 2012 | */ |
1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2013 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2014 | struct ext4_map_blocks *map) | ||
1999 | { | 2015 | { |
2000 | long pages_skipped; | ||
2001 | struct pagevec pvec; | 2016 | struct pagevec pvec; |
2002 | unsigned long index, end; | 2017 | unsigned long index, end; |
2003 | int ret = 0, err, nr_pages, i; | 2018 | int ret = 0, err, nr_pages, i; |
2004 | struct inode *inode = mpd->inode; | 2019 | struct inode *inode = mpd->inode; |
2005 | struct address_space *mapping = inode->i_mapping; | 2020 | struct address_space *mapping = inode->i_mapping; |
2021 | loff_t size = i_size_read(inode); | ||
2022 | unsigned int len, block_start; | ||
2023 | struct buffer_head *bh, *page_bufs = NULL; | ||
2024 | int journal_data = ext4_should_journal_data(inode); | ||
2025 | sector_t pblock = 0, cur_logical = 0; | ||
2026 | struct ext4_io_submit io_submit; | ||
2006 | 2027 | ||
2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2028 | BUG_ON(mpd->next_page <= mpd->first_page); |
2029 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2008 | /* | 2030 | /* |
2009 | * We need to start from the first_page to the next_page - 1 | 2031 | * We need to start from the first_page to the next_page - 1 |
2010 | * to make sure we also write the mapped dirty buffer_heads. | 2032 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2020,122 +2042,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2020 | if (nr_pages == 0) | 2042 | if (nr_pages == 0) |
2021 | break; | 2043 | break; |
2022 | for (i = 0; i < nr_pages; i++) { | 2044 | for (i = 0; i < nr_pages; i++) { |
2045 | int commit_write = 0, redirty_page = 0; | ||
2023 | struct page *page = pvec.pages[i]; | 2046 | struct page *page = pvec.pages[i]; |
2024 | 2047 | ||
2025 | index = page->index; | 2048 | index = page->index; |
2026 | if (index > end) | 2049 | if (index > end) |
2027 | break; | 2050 | break; |
2051 | |||
2052 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2053 | len = size & ~PAGE_CACHE_MASK; | ||
2054 | else | ||
2055 | len = PAGE_CACHE_SIZE; | ||
2056 | if (map) { | ||
2057 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2058 | inode->i_blkbits); | ||
2059 | pblock = map->m_pblk + (cur_logical - | ||
2060 | map->m_lblk); | ||
2061 | } | ||
2028 | index++; | 2062 | index++; |
2029 | 2063 | ||
2030 | BUG_ON(!PageLocked(page)); | 2064 | BUG_ON(!PageLocked(page)); |
2031 | BUG_ON(PageWriteback(page)); | 2065 | BUG_ON(PageWriteback(page)); |
2032 | 2066 | ||
2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2036 | /* | ||
2037 | * have successfully written the page | ||
2038 | * without skipping the same | ||
2039 | */ | ||
2040 | mpd->pages_written++; | ||
2041 | /* | 2067 | /* |
2042 | * In error case, we have to continue because | 2068 | * If the page does not have buffers (for |
2043 | * remaining pages are still locked | 2069 | * whatever reason), try to create them using |
2044 | * XXX: unlock and re-dirty them? | 2070 | * __block_write_begin. If this fails, |
2071 | * redirty the page and move on. | ||
2045 | */ | 2072 | */ |
2046 | if (ret == 0) | 2073 | if (!page_has_buffers(page)) { |
2047 | ret = err; | 2074 | if (__block_write_begin(page, 0, len, |
2048 | } | 2075 | noalloc_get_block_write)) { |
2049 | pagevec_release(&pvec); | 2076 | redirty_page: |
2050 | } | 2077 | redirty_page_for_writepage(mpd->wbc, |
2051 | return ret; | 2078 | page); |
2052 | } | 2079 | unlock_page(page); |
2053 | 2080 | continue; | |
2054 | /* | 2081 | } |
2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2082 | commit_write = 1; |
2056 | * | 2083 | } |
2057 | * the function goes through all passed space and put actual disk | ||
2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2059 | */ | ||
2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2061 | struct ext4_map_blocks *map) | ||
2062 | { | ||
2063 | struct inode *inode = mpd->inode; | ||
2064 | struct address_space *mapping = inode->i_mapping; | ||
2065 | int blocks = map->m_len; | ||
2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
2067 | struct buffer_head *head, *bh; | ||
2068 | pgoff_t index, end; | ||
2069 | struct pagevec pvec; | ||
2070 | int nr_pages, i; | ||
2071 | |||
2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2075 | |||
2076 | pagevec_init(&pvec, 0); | ||
2077 | |||
2078 | while (index <= end) { | ||
2079 | /* XXX: optimize tail */ | ||
2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2081 | if (nr_pages == 0) | ||
2082 | break; | ||
2083 | for (i = 0; i < nr_pages; i++) { | ||
2084 | struct page *page = pvec.pages[i]; | ||
2085 | |||
2086 | index = page->index; | ||
2087 | if (index > end) | ||
2088 | break; | ||
2089 | index++; | ||
2090 | |||
2091 | BUG_ON(!PageLocked(page)); | ||
2092 | BUG_ON(PageWriteback(page)); | ||
2093 | BUG_ON(!page_has_buffers(page)); | ||
2094 | |||
2095 | bh = page_buffers(page); | ||
2096 | head = bh; | ||
2097 | |||
2098 | /* skip blocks out of the range */ | ||
2099 | do { | ||
2100 | if (cur_logical >= map->m_lblk) | ||
2101 | break; | ||
2102 | cur_logical++; | ||
2103 | } while ((bh = bh->b_this_page) != head); | ||
2104 | 2084 | ||
2085 | bh = page_bufs = page_buffers(page); | ||
2086 | block_start = 0; | ||
2105 | do { | 2087 | do { |
2106 | if (cur_logical >= map->m_lblk + blocks) | 2088 | if (!bh) |
2107 | break; | 2089 | goto redirty_page; |
2108 | 2090 | if (map && (cur_logical >= map->m_lblk) && | |
2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2091 | (cur_logical <= (map->m_lblk + |
2110 | 2092 | (map->m_len - 1)))) { | |
2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2112 | |||
2113 | if (buffer_delay(bh)) { | 2093 | if (buffer_delay(bh)) { |
2114 | clear_buffer_delay(bh); | 2094 | clear_buffer_delay(bh); |
2115 | bh->b_blocknr = pblock; | 2095 | bh->b_blocknr = pblock; |
2116 | } else { | ||
2117 | /* | ||
2118 | * unwritten already should have | ||
2119 | * blocknr assigned. Verify that | ||
2120 | */ | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | BUG_ON(bh->b_blocknr != pblock); | ||
2123 | } | 2096 | } |
2097 | if (buffer_unwritten(bh) || | ||
2098 | buffer_mapped(bh)) | ||
2099 | BUG_ON(bh->b_blocknr != pblock); | ||
2100 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2101 | set_buffer_uninit(bh); | ||
2102 | clear_buffer_unwritten(bh); | ||
2103 | } | ||
2124 | 2104 | ||
2125 | } else if (buffer_mapped(bh)) | 2105 | /* redirty page if block allocation undone */ |
2126 | BUG_ON(bh->b_blocknr != pblock); | 2106 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2127 | 2107 | redirty_page = 1; | |
2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2108 | bh = bh->b_this_page; |
2129 | set_buffer_uninit(bh); | 2109 | block_start += bh->b_size; |
2130 | cur_logical++; | 2110 | cur_logical++; |
2131 | pblock++; | 2111 | pblock++; |
2132 | } while ((bh = bh->b_this_page) != head); | 2112 | } while (bh != page_bufs); |
2113 | |||
2114 | if (redirty_page) | ||
2115 | goto redirty_page; | ||
2116 | |||
2117 | if (commit_write) | ||
2118 | /* mark the buffer_heads as dirty & uptodate */ | ||
2119 | block_commit_write(page, 0, len); | ||
2120 | |||
2121 | /* | ||
2122 | * Delalloc doesn't support data journalling, | ||
2123 | * but eventually maybe we'll lift this | ||
2124 | * restriction. | ||
2125 | */ | ||
2126 | if (unlikely(journal_data && PageChecked(page))) | ||
2127 | err = __ext4_journalled_writepage(page, len); | ||
2128 | else | ||
2129 | err = ext4_bio_write_page(&io_submit, page, | ||
2130 | len, mpd->wbc); | ||
2131 | |||
2132 | if (!err) | ||
2133 | mpd->pages_written++; | ||
2134 | /* | ||
2135 | * In error case, we have to continue because | ||
2136 | * remaining pages are still locked | ||
2137 | */ | ||
2138 | if (ret == 0) | ||
2139 | ret = err; | ||
2133 | } | 2140 | } |
2134 | pagevec_release(&pvec); | 2141 | pagevec_release(&pvec); |
2135 | } | 2142 | } |
2143 | ext4_io_submit(&io_submit); | ||
2144 | return ret; | ||
2136 | } | 2145 | } |
2137 | 2146 | ||
2138 | |||
2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2147 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2140 | sector_t logical, long blk_cnt) | 2148 | sector_t logical, long blk_cnt) |
2141 | { | 2149 | { |
@@ -2187,35 +2195,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2187 | } | 2195 | } |
2188 | 2196 | ||
2189 | /* | 2197 | /* |
2190 | * mpage_da_map_blocks - go through given space | 2198 | * mpage_da_map_and_submit - go through given space, map them |
2199 | * if necessary, and then submit them for I/O | ||
2191 | * | 2200 | * |
2192 | * @mpd - bh describing space | 2201 | * @mpd - bh describing space |
2193 | * | 2202 | * |
2194 | * The function skips space we know is already mapped to disk blocks. | 2203 | * The function skips space we know is already mapped to disk blocks. |
2195 | * | 2204 | * |
2196 | */ | 2205 | */ |
2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2206 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2198 | { | 2207 | { |
2199 | int err, blks, get_blocks_flags; | 2208 | int err, blks, get_blocks_flags; |
2200 | struct ext4_map_blocks map; | 2209 | struct ext4_map_blocks map, *mapp = NULL; |
2201 | sector_t next = mpd->b_blocknr; | 2210 | sector_t next = mpd->b_blocknr; |
2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2211 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2212 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2204 | handle_t *handle = NULL; | 2213 | handle_t *handle = NULL; |
2205 | 2214 | ||
2206 | /* | 2215 | /* |
2207 | * We consider only non-mapped and non-allocated blocks | 2216 | * If the blocks are mapped already, or we couldn't accumulate |
2217 | * any blocks, then proceed immediately to the submission stage. | ||
2208 | */ | 2218 | */ |
2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2219 | if ((mpd->b_size == 0) || |
2210 | !(mpd->b_state & (1 << BH_Delay)) && | 2220 | ((mpd->b_state & (1 << BH_Mapped)) && |
2211 | !(mpd->b_state & (1 << BH_Unwritten))) | 2221 | !(mpd->b_state & (1 << BH_Delay)) && |
2212 | return 0; | 2222 | !(mpd->b_state & (1 << BH_Unwritten)))) |
2213 | 2223 | goto submit_io; | |
2214 | /* | ||
2215 | * If we didn't accumulate anything to write simply return | ||
2216 | */ | ||
2217 | if (!mpd->b_size) | ||
2218 | return 0; | ||
2219 | 2224 | ||
2220 | handle = ext4_journal_current_handle(); | 2225 | handle = ext4_journal_current_handle(); |
2221 | BUG_ON(!handle); | 2226 | BUG_ON(!handle); |
@@ -2252,17 +2257,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2252 | 2257 | ||
2253 | err = blks; | 2258 | err = blks; |
2254 | /* | 2259 | /* |
2255 | * If get block returns with error we simply | 2260 | * If get block returns EAGAIN or ENOSPC and there |
2256 | * return. Later writepage will redirty the page and | 2261 | * appears to be free blocks we will call |
2257 | * writepages will find the dirty page again | 2262 | * ext4_writepage() for all of the pages which will |
2263 | * just redirty the pages. | ||
2258 | */ | 2264 | */ |
2259 | if (err == -EAGAIN) | 2265 | if (err == -EAGAIN) |
2260 | return 0; | 2266 | goto submit_io; |
2261 | 2267 | ||
2262 | if (err == -ENOSPC && | 2268 | if (err == -ENOSPC && |
2263 | ext4_count_free_blocks(sb)) { | 2269 | ext4_count_free_blocks(sb)) { |
2264 | mpd->retval = err; | 2270 | mpd->retval = err; |
2265 | return 0; | 2271 | goto submit_io; |
2266 | } | 2272 | } |
2267 | 2273 | ||
2268 | /* | 2274 | /* |
@@ -2287,10 +2293,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2287 | /* invalidate all the pages */ | 2293 | /* invalidate all the pages */ |
2288 | ext4_da_block_invalidatepages(mpd, next, | 2294 | ext4_da_block_invalidatepages(mpd, next, |
2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2295 | mpd->b_size >> mpd->inode->i_blkbits); |
2290 | return err; | 2296 | return; |
2291 | } | 2297 | } |
2292 | BUG_ON(blks == 0); | 2298 | BUG_ON(blks == 0); |
2293 | 2299 | ||
2300 | mapp = ↦ | ||
2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2301 | if (map.m_flags & EXT4_MAP_NEW) { |
2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2302 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2296 | int i; | 2303 | int i; |
@@ -2299,18 +2306,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2306 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2300 | } | 2307 | } |
2301 | 2308 | ||
2302 | /* | ||
2303 | * If blocks are delayed marked, we need to | ||
2304 | * put actual blocknr and drop delayed bit | ||
2305 | */ | ||
2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2309 | |||
2310 | if (ext4_should_order_data(mpd->inode)) { | 2309 | if (ext4_should_order_data(mpd->inode)) { |
2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2310 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2312 | if (err) | 2311 | if (err) |
2313 | return err; | 2312 | /* This only happens if the journal is aborted */ |
2313 | return; | ||
2314 | } | 2314 | } |
2315 | 2315 | ||
2316 | /* | 2316 | /* |
@@ -2321,10 +2321,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2321 | disksize = i_size_read(mpd->inode); | 2321 | disksize = i_size_read(mpd->inode); |
2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2323 | ext4_update_i_disksize(mpd->inode, disksize); |
2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2324 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2325 | if (err) | ||
2326 | ext4_error(mpd->inode->i_sb, | ||
2327 | "Failed to mark inode %lu dirty", | ||
2328 | mpd->inode->i_ino); | ||
2325 | } | 2329 | } |
2326 | 2330 | ||
2327 | return 0; | 2331 | submit_io: |
2332 | mpage_da_submit_io(mpd, mapp); | ||
2333 | mpd->io_done = 1; | ||
2328 | } | 2334 | } |
2329 | 2335 | ||
2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2336 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2401,9 +2407,7 @@ flush_it: | |||
2401 | * We couldn't merge the block to our extent, so we | 2407 | * We couldn't merge the block to our extent, so we |
2402 | * need to flush current extent and start new one | 2408 | * need to flush current extent and start new one |
2403 | */ | 2409 | */ |
2404 | if (mpage_da_map_blocks(mpd) == 0) | 2410 | mpage_da_map_and_submit(mpd); |
2405 | mpage_da_submit_io(mpd); | ||
2406 | mpd->io_done = 1; | ||
2407 | return; | 2411 | return; |
2408 | } | 2412 | } |
2409 | 2413 | ||
@@ -2422,9 +2426,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2422 | * The function finds extents of pages and scan them for all blocks. | 2426 | * The function finds extents of pages and scan them for all blocks. |
2423 | */ | 2427 | */ |
2424 | static int __mpage_da_writepage(struct page *page, | 2428 | static int __mpage_da_writepage(struct page *page, |
2425 | struct writeback_control *wbc, void *data) | 2429 | struct writeback_control *wbc, |
2430 | struct mpage_da_data *mpd) | ||
2426 | { | 2431 | { |
2427 | struct mpage_da_data *mpd = data; | ||
2428 | struct inode *inode = mpd->inode; | 2432 | struct inode *inode = mpd->inode; |
2429 | struct buffer_head *bh, *head; | 2433 | struct buffer_head *bh, *head; |
2430 | sector_t logical; | 2434 | sector_t logical; |
@@ -2435,15 +2439,13 @@ static int __mpage_da_writepage(struct page *page, | |||
2435 | if (mpd->next_page != page->index) { | 2439 | if (mpd->next_page != page->index) { |
2436 | /* | 2440 | /* |
2437 | * Nope, we can't. So, we map non-allocated blocks | 2441 | * Nope, we can't. So, we map non-allocated blocks |
2438 | * and start IO on them using writepage() | 2442 | * and start IO on them |
2439 | */ | 2443 | */ |
2440 | if (mpd->next_page != mpd->first_page) { | 2444 | if (mpd->next_page != mpd->first_page) { |
2441 | if (mpage_da_map_blocks(mpd) == 0) | 2445 | mpage_da_map_and_submit(mpd); |
2442 | mpage_da_submit_io(mpd); | ||
2443 | /* | 2446 | /* |
2444 | * skip rest of the page in the page_vec | 2447 | * skip rest of the page in the page_vec |
2445 | */ | 2448 | */ |
2446 | mpd->io_done = 1; | ||
2447 | redirty_page_for_writepage(wbc, page); | 2449 | redirty_page_for_writepage(wbc, page); |
2448 | unlock_page(page); | 2450 | unlock_page(page); |
2449 | return MPAGE_DA_EXTENT_TAIL; | 2451 | return MPAGE_DA_EXTENT_TAIL; |
@@ -2550,8 +2552,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2550 | if (buffer_delay(bh)) | 2552 | if (buffer_delay(bh)) |
2551 | return 0; /* Not sure this could or should happen */ | 2553 | return 0; /* Not sure this could or should happen */ |
2552 | /* | 2554 | /* |
2553 | * XXX: __block_prepare_write() unmaps passed block, | 2555 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2554 | * is it OK? | ||
2555 | */ | 2556 | */ |
2556 | ret = ext4_da_reserve_space(inode, iblock); | 2557 | ret = ext4_da_reserve_space(inode, iblock); |
2557 | if (ret) | 2558 | if (ret) |
@@ -2583,7 +2584,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2583 | /* | 2584 | /* |
2584 | * This function is used as a standard get_block_t calback function | 2585 | * This function is used as a standard get_block_t calback function |
2585 | * when there is no desire to allocate any blocks. It is used as a | 2586 | * when there is no desire to allocate any blocks. It is used as a |
2586 | * callback function for block_prepare_write() and block_write_full_page(). | 2587 | * callback function for block_write_begin() and block_write_full_page(). |
2587 | * These functions should only try to map a single block at a time. | 2588 | * These functions should only try to map a single block at a time. |
2588 | * | 2589 | * |
2589 | * Since this function doesn't do block allocations even if the caller | 2590 | * Since this function doesn't do block allocations even if the caller |
@@ -2623,6 +2624,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2623 | int ret = 0; | 2624 | int ret = 0; |
2624 | int err; | 2625 | int err; |
2625 | 2626 | ||
2627 | ClearPageChecked(page); | ||
2626 | page_bufs = page_buffers(page); | 2628 | page_bufs = page_buffers(page); |
2627 | BUG_ON(!page_bufs); | 2629 | BUG_ON(!page_bufs); |
2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2630 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2700,7 +2702,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2700 | static int ext4_writepage(struct page *page, | 2702 | static int ext4_writepage(struct page *page, |
2701 | struct writeback_control *wbc) | 2703 | struct writeback_control *wbc) |
2702 | { | 2704 | { |
2703 | int ret = 0; | 2705 | int ret = 0, commit_write = 0; |
2704 | loff_t size; | 2706 | loff_t size; |
2705 | unsigned int len; | 2707 | unsigned int len; |
2706 | struct buffer_head *page_bufs = NULL; | 2708 | struct buffer_head *page_bufs = NULL; |
@@ -2713,71 +2715,44 @@ static int ext4_writepage(struct page *page, | |||
2713 | else | 2715 | else |
2714 | len = PAGE_CACHE_SIZE; | 2716 | len = PAGE_CACHE_SIZE; |
2715 | 2717 | ||
2716 | if (page_has_buffers(page)) { | 2718 | /* |
2717 | page_bufs = page_buffers(page); | 2719 | * If the page does not have buffers (for whatever reason), |
2718 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2720 | * try to create them using __block_write_begin. If this |
2719 | ext4_bh_delay_or_unwritten)) { | 2721 | * fails, redirty the page and move on. |
2720 | /* | 2722 | */ |
2721 | * We don't want to do block allocation | 2723 | if (!page_has_buffers(page)) { |
2722 | * So redirty the page and return | 2724 | if (__block_write_begin(page, 0, len, |
2723 | * We may reach here when we do a journal commit | 2725 | noalloc_get_block_write)) { |
2724 | * via journal_submit_inode_data_buffers. | 2726 | redirty_page: |
2725 | * If we don't have mapping block we just ignore | ||
2726 | * them. We can also reach here via shrink_page_list | ||
2727 | */ | ||
2728 | redirty_page_for_writepage(wbc, page); | 2727 | redirty_page_for_writepage(wbc, page); |
2729 | unlock_page(page); | 2728 | unlock_page(page); |
2730 | return 0; | 2729 | return 0; |
2731 | } | 2730 | } |
2732 | } else { | 2731 | commit_write = 1; |
2732 | } | ||
2733 | page_bufs = page_buffers(page); | ||
2734 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2735 | ext4_bh_delay_or_unwritten)) { | ||
2733 | /* | 2736 | /* |
2734 | * The test for page_has_buffers() is subtle: | 2737 | * We don't want to do block allocation, so redirty |
2735 | * We know the page is dirty but it lost buffers. That means | 2738 | * the page and return. We may reach here when we do |
2736 | * that at some moment in time after write_begin()/write_end() | 2739 | * a journal commit via journal_submit_inode_data_buffers. |
2737 | * has been called all buffers have been clean and thus they | 2740 | * We can also reach here via shrink_page_list |
2738 | * must have been written at least once. So they are all | ||
2739 | * mapped and we can happily proceed with mapping them | ||
2740 | * and writing the page. | ||
2741 | * | ||
2742 | * Try to initialize the buffer_heads and check whether | ||
2743 | * all are mapped and non delay. We don't want to | ||
2744 | * do block allocation here. | ||
2745 | */ | 2741 | */ |
2746 | ret = block_prepare_write(page, 0, len, | 2742 | goto redirty_page; |
2747 | noalloc_get_block_write); | 2743 | } |
2748 | if (!ret) { | 2744 | if (commit_write) |
2749 | page_bufs = page_buffers(page); | ||
2750 | /* check whether all are mapped and non delay */ | ||
2751 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2752 | ext4_bh_delay_or_unwritten)) { | ||
2753 | redirty_page_for_writepage(wbc, page); | ||
2754 | unlock_page(page); | ||
2755 | return 0; | ||
2756 | } | ||
2757 | } else { | ||
2758 | /* | ||
2759 | * We can't do block allocation here | ||
2760 | * so just redity the page and unlock | ||
2761 | * and return | ||
2762 | */ | ||
2763 | redirty_page_for_writepage(wbc, page); | ||
2764 | unlock_page(page); | ||
2765 | return 0; | ||
2766 | } | ||
2767 | /* now mark the buffer_heads as dirty and uptodate */ | 2745 | /* now mark the buffer_heads as dirty and uptodate */ |
2768 | block_commit_write(page, 0, len); | 2746 | block_commit_write(page, 0, len); |
2769 | } | ||
2770 | 2747 | ||
2771 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2748 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2772 | /* | 2749 | /* |
2773 | * It's mmapped pagecache. Add buffers and journal it. There | 2750 | * It's mmapped pagecache. Add buffers and journal it. There |
2774 | * doesn't seem much point in redirtying the page here. | 2751 | * doesn't seem much point in redirtying the page here. |
2775 | */ | 2752 | */ |
2776 | ClearPageChecked(page); | ||
2777 | return __ext4_journalled_writepage(page, len); | 2753 | return __ext4_journalled_writepage(page, len); |
2778 | } | ||
2779 | 2754 | ||
2780 | if (page_bufs && buffer_uninit(page_bufs)) { | 2755 | if (buffer_uninit(page_bufs)) { |
2781 | ext4_set_bh_endio(page_bufs, inode); | 2756 | ext4_set_bh_endio(page_bufs, inode); |
2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2757 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2783 | wbc, ext4_end_io_buffer_write); | 2758 | wbc, ext4_end_io_buffer_write); |
@@ -2824,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2824 | */ | 2799 | */ |
2825 | static int write_cache_pages_da(struct address_space *mapping, | 2800 | static int write_cache_pages_da(struct address_space *mapping, |
2826 | struct writeback_control *wbc, | 2801 | struct writeback_control *wbc, |
2827 | struct mpage_da_data *mpd) | 2802 | struct mpage_da_data *mpd, |
2803 | pgoff_t *done_index) | ||
2828 | { | 2804 | { |
2829 | int ret = 0; | 2805 | int ret = 0; |
2830 | int done = 0; | 2806 | int done = 0; |
2831 | struct pagevec pvec; | 2807 | struct pagevec pvec; |
2832 | int nr_pages; | 2808 | unsigned nr_pages; |
2833 | pgoff_t index; | 2809 | pgoff_t index; |
2834 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
2835 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
2812 | int tag; | ||
2836 | 2813 | ||
2837 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
2838 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2839 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2840 | 2817 | ||
2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
2820 | else | ||
2821 | tag = PAGECACHE_TAG_DIRTY; | ||
2822 | |||
2823 | *done_index = index; | ||
2841 | while (!done && (index <= end)) { | 2824 | while (!done && (index <= end)) { |
2842 | int i; | 2825 | int i; |
2843 | 2826 | ||
2844 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2827 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2845 | PAGECACHE_TAG_DIRTY, | ||
2846 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2828 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2847 | if (nr_pages == 0) | 2829 | if (nr_pages == 0) |
2848 | break; | 2830 | break; |
@@ -2862,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2862 | break; | 2844 | break; |
2863 | } | 2845 | } |
2864 | 2846 | ||
2847 | *done_index = page->index + 1; | ||
2848 | |||
2865 | lock_page(page); | 2849 | lock_page(page); |
2866 | 2850 | ||
2867 | /* | 2851 | /* |
@@ -2947,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2947 | long desired_nr_to_write, nr_to_writebump = 0; | 2931 | long desired_nr_to_write, nr_to_writebump = 0; |
2948 | loff_t range_start = wbc->range_start; | 2932 | loff_t range_start = wbc->range_start; |
2949 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2933 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2934 | pgoff_t done_index = 0; | ||
2935 | pgoff_t end; | ||
2950 | 2936 | ||
2951 | trace_ext4_da_writepages(inode, wbc); | 2937 | trace_ext4_da_writepages(inode, wbc); |
2952 | 2938 | ||
@@ -2982,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2982 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2968 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2983 | wbc->range_end = LLONG_MAX; | 2969 | wbc->range_end = LLONG_MAX; |
2984 | wbc->range_cyclic = 0; | 2970 | wbc->range_cyclic = 0; |
2985 | } else | 2971 | end = -1; |
2972 | } else { | ||
2986 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2973 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2974 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2975 | } | ||
2987 | 2976 | ||
2988 | /* | 2977 | /* |
2989 | * This works around two forms of stupidity. The first is in | 2978 | * This works around two forms of stupidity. The first is in |
@@ -3002,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3002 | * sbi->max_writeback_mb_bump whichever is smaller. | 2991 | * sbi->max_writeback_mb_bump whichever is smaller. |
3003 | */ | 2992 | */ |
3004 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2993 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
3005 | if (!range_cyclic && range_whole) | 2994 | if (!range_cyclic && range_whole) { |
3006 | desired_nr_to_write = wbc->nr_to_write * 8; | 2995 | if (wbc->nr_to_write == LONG_MAX) |
3007 | else | 2996 | desired_nr_to_write = wbc->nr_to_write; |
2997 | else | ||
2998 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2999 | } else | ||
3008 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 3000 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
3009 | max_pages); | 3001 | max_pages); |
3010 | if (desired_nr_to_write > max_pages) | 3002 | if (desired_nr_to_write > max_pages) |
@@ -3021,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3021 | pages_skipped = wbc->pages_skipped; | 3013 | pages_skipped = wbc->pages_skipped; |
3022 | 3014 | ||
3023 | retry: | 3015 | retry: |
3016 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3017 | tag_pages_for_writeback(mapping, index, end); | ||
3018 | |||
3024 | while (!ret && wbc->nr_to_write > 0) { | 3019 | while (!ret && wbc->nr_to_write > 0) { |
3025 | 3020 | ||
3026 | /* | 3021 | /* |
@@ -3059,16 +3054,14 @@ retry: | |||
3059 | mpd.io_done = 0; | 3054 | mpd.io_done = 0; |
3060 | mpd.pages_written = 0; | 3055 | mpd.pages_written = 0; |
3061 | mpd.retval = 0; | 3056 | mpd.retval = 0; |
3062 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 3057 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3063 | /* | 3058 | /* |
3064 | * If we have a contiguous extent of pages and we | 3059 | * If we have a contiguous extent of pages and we |
3065 | * haven't done the I/O yet, map the blocks and submit | 3060 | * haven't done the I/O yet, map the blocks and submit |
3066 | * them for I/O. | 3061 | * them for I/O. |
3067 | */ | 3062 | */ |
3068 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3063 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3069 | if (mpage_da_map_blocks(&mpd) == 0) | 3064 | mpage_da_map_and_submit(&mpd); |
3070 | mpage_da_submit_io(&mpd); | ||
3071 | mpd.io_done = 1; | ||
3072 | ret = MPAGE_DA_EXTENT_TAIL; | 3065 | ret = MPAGE_DA_EXTENT_TAIL; |
3073 | } | 3066 | } |
3074 | trace_ext4_da_write_pages(inode, &mpd); | 3067 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3115,14 +3108,13 @@ retry: | |||
3115 | __func__, wbc->nr_to_write, ret); | 3108 | __func__, wbc->nr_to_write, ret); |
3116 | 3109 | ||
3117 | /* Update index */ | 3110 | /* Update index */ |
3118 | index += pages_written; | ||
3119 | wbc->range_cyclic = range_cyclic; | 3111 | wbc->range_cyclic = range_cyclic; |
3120 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3112 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3121 | /* | 3113 | /* |
3122 | * set the writeback_index so that range_cyclic | 3114 | * set the writeback_index so that range_cyclic |
3123 | * mode will write it back later | 3115 | * mode will write it back later |
3124 | */ | 3116 | */ |
3125 | mapping->writeback_index = index; | 3117 | mapping->writeback_index = done_index; |
3126 | 3118 | ||
3127 | out_writepages: | 3119 | out_writepages: |
3128 | wbc->nr_to_write -= nr_to_writebump; | 3120 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3457,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3457 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3449 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3458 | } | 3450 | } |
3459 | 3451 | ||
3460 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3461 | { | ||
3462 | BUG_ON(!io); | ||
3463 | if (io->page) | ||
3464 | put_page(io->page); | ||
3465 | iput(io->inode); | ||
3466 | kfree(io); | ||
3467 | } | ||
3468 | |||
3469 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3452 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3470 | { | 3453 | { |
3471 | struct buffer_head *head, *bh; | 3454 | struct buffer_head *head, *bh; |
@@ -3642,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3642 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3625 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3643 | } | 3626 | } |
3644 | 3627 | ||
3645 | static void dump_completed_IO(struct inode * inode) | ||
3646 | { | ||
3647 | #ifdef EXT4_DEBUG | ||
3648 | struct list_head *cur, *before, *after; | ||
3649 | ext4_io_end_t *io, *io0, *io1; | ||
3650 | unsigned long flags; | ||
3651 | |||
3652 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3653 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3654 | return; | ||
3655 | } | ||
3656 | |||
3657 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3658 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3659 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3660 | cur = &io->list; | ||
3661 | before = cur->prev; | ||
3662 | io0 = container_of(before, ext4_io_end_t, list); | ||
3663 | after = cur->next; | ||
3664 | io1 = container_of(after, ext4_io_end_t, list); | ||
3665 | |||
3666 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3667 | io, inode->i_ino, io0, io1); | ||
3668 | } | ||
3669 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3670 | #endif | ||
3671 | } | ||
3672 | |||
3673 | /* | ||
3674 | * check a range of space and convert unwritten extents to written. | ||
3675 | */ | ||
3676 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3677 | { | ||
3678 | struct inode *inode = io->inode; | ||
3679 | loff_t offset = io->offset; | ||
3680 | ssize_t size = io->size; | ||
3681 | int ret = 0; | ||
3682 | |||
3683 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3684 | "list->prev 0x%p\n", | ||
3685 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3686 | |||
3687 | if (list_empty(&io->list)) | ||
3688 | return ret; | ||
3689 | |||
3690 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3691 | return ret; | ||
3692 | |||
3693 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3694 | if (ret < 0) { | ||
3695 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3696 | "extents to written extents, error is %d" | ||
3697 | " io is still on inode %lu aio dio list\n", | ||
3698 | __func__, ret, inode->i_ino); | ||
3699 | return ret; | ||
3700 | } | ||
3701 | |||
3702 | if (io->iocb) | ||
3703 | aio_complete(io->iocb, io->result, 0); | ||
3704 | /* clear the DIO AIO unwritten flag */ | ||
3705 | io->flag = 0; | ||
3706 | return ret; | ||
3707 | } | ||
3708 | |||
3709 | /* | ||
3710 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3711 | */ | ||
3712 | static void ext4_end_io_work(struct work_struct *work) | ||
3713 | { | ||
3714 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3715 | struct inode *inode = io->inode; | ||
3716 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3717 | unsigned long flags; | ||
3718 | int ret; | ||
3719 | |||
3720 | mutex_lock(&inode->i_mutex); | ||
3721 | ret = ext4_end_io_nolock(io); | ||
3722 | if (ret < 0) { | ||
3723 | mutex_unlock(&inode->i_mutex); | ||
3724 | return; | ||
3725 | } | ||
3726 | |||
3727 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3728 | if (!list_empty(&io->list)) | ||
3729 | list_del_init(&io->list); | ||
3730 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3731 | mutex_unlock(&inode->i_mutex); | ||
3732 | ext4_free_io_end(io); | ||
3733 | } | ||
3734 | |||
3735 | /* | ||
3736 | * This function is called from ext4_sync_file(). | ||
3737 | * | ||
3738 | * When IO is completed, the work to convert unwritten extents to | ||
3739 | * written is queued on workqueue but may not get immediately | ||
3740 | * scheduled. When fsync is called, we need to ensure the | ||
3741 | * conversion is complete before fsync returns. | ||
3742 | * The inode keeps track of a list of pending/completed IO that | ||
3743 | * might needs to do the conversion. This function walks through | ||
3744 | * the list and convert the related unwritten extents for completed IO | ||
3745 | * to written. | ||
3746 | * The function return the number of pending IOs on success. | ||
3747 | */ | ||
3748 | int flush_completed_IO(struct inode *inode) | ||
3749 | { | ||
3750 | ext4_io_end_t *io; | ||
3751 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3752 | unsigned long flags; | ||
3753 | int ret = 0; | ||
3754 | int ret2 = 0; | ||
3755 | |||
3756 | if (list_empty(&ei->i_completed_io_list)) | ||
3757 | return ret; | ||
3758 | |||
3759 | dump_completed_IO(inode); | ||
3760 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3761 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3762 | io = list_entry(ei->i_completed_io_list.next, | ||
3763 | ext4_io_end_t, list); | ||
3764 | /* | ||
3765 | * Calling ext4_end_io_nolock() to convert completed | ||
3766 | * IO to written. | ||
3767 | * | ||
3768 | * When ext4_sync_file() is called, run_queue() may already | ||
3769 | * about to flush the work corresponding to this io structure. | ||
3770 | * It will be upset if it founds the io structure related | ||
3771 | * to the work-to-be schedule is freed. | ||
3772 | * | ||
3773 | * Thus we need to keep the io structure still valid here after | ||
3774 | * convertion finished. The io structure has a flag to | ||
3775 | * avoid double converting from both fsync and background work | ||
3776 | * queue work. | ||
3777 | */ | ||
3778 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3779 | ret = ext4_end_io_nolock(io); | ||
3780 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3781 | if (ret < 0) | ||
3782 | ret2 = ret; | ||
3783 | else | ||
3784 | list_del_init(&io->list); | ||
3785 | } | ||
3786 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3787 | return (ret2 < 0) ? ret2 : 0; | ||
3788 | } | ||
3789 | |||
3790 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3791 | { | ||
3792 | ext4_io_end_t *io = NULL; | ||
3793 | |||
3794 | io = kmalloc(sizeof(*io), flags); | ||
3795 | |||
3796 | if (io) { | ||
3797 | igrab(inode); | ||
3798 | io->inode = inode; | ||
3799 | io->flag = 0; | ||
3800 | io->offset = 0; | ||
3801 | io->size = 0; | ||
3802 | io->page = NULL; | ||
3803 | io->iocb = NULL; | ||
3804 | io->result = 0; | ||
3805 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3806 | INIT_LIST_HEAD(&io->list); | ||
3807 | } | ||
3808 | |||
3809 | return io; | ||
3810 | } | ||
3811 | |||
3812 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3628 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3813 | ssize_t size, void *private, int ret, | 3629 | ssize_t size, void *private, int ret, |
3814 | bool is_async) | 3630 | bool is_async) |
@@ -3828,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3828 | size); | 3644 | size); |
3829 | 3645 | ||
3830 | /* if not aio dio with unwritten extents, just free io and return */ | 3646 | /* if not aio dio with unwritten extents, just free io and return */ |
3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3647 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3832 | ext4_free_io_end(io_end); | 3648 | ext4_free_io_end(io_end); |
3833 | iocb->private = NULL; | 3649 | iocb->private = NULL; |
3834 | out: | 3650 | out: |
@@ -3845,14 +3661,14 @@ out: | |||
3845 | } | 3661 | } |
3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3662 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3847 | 3663 | ||
3848 | /* queue the work to convert unwritten extents to written */ | ||
3849 | queue_work(wq, &io_end->work); | ||
3850 | |||
3851 | /* Add the io_end to per-inode completed aio dio list*/ | 3664 | /* Add the io_end to per-inode completed aio dio list*/ |
3852 | ei = EXT4_I(io_end->inode); | 3665 | ei = EXT4_I(io_end->inode); |
3853 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3666 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3854 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3667 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3855 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3668 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3669 | |||
3670 | /* queue the work to convert unwritten extents to written */ | ||
3671 | queue_work(wq, &io_end->work); | ||
3856 | iocb->private = NULL; | 3672 | iocb->private = NULL; |
3857 | } | 3673 | } |
3858 | 3674 | ||
@@ -3873,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3873 | goto out; | 3689 | goto out; |
3874 | } | 3690 | } |
3875 | 3691 | ||
3876 | io_end->flag = EXT4_IO_UNWRITTEN; | 3692 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3877 | inode = io_end->inode; | 3693 | inode = io_end->inode; |
3878 | 3694 | ||
3879 | /* Add the io_end to per-inode completed io list*/ | 3695 | /* Add the io_end to per-inode completed io list*/ |
@@ -5464,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5464 | { | 5280 | { |
5465 | struct inode *inode = dentry->d_inode; | 5281 | struct inode *inode = dentry->d_inode; |
5466 | int error, rc = 0; | 5282 | int error, rc = 0; |
5283 | int orphan = 0; | ||
5467 | const unsigned int ia_valid = attr->ia_valid; | 5284 | const unsigned int ia_valid = attr->ia_valid; |
5468 | 5285 | ||
5469 | error = inode_change_ok(inode, attr); | 5286 | error = inode_change_ok(inode, attr); |
@@ -5519,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5519 | error = PTR_ERR(handle); | 5336 | error = PTR_ERR(handle); |
5520 | goto err_out; | 5337 | goto err_out; |
5521 | } | 5338 | } |
5522 | 5339 | if (ext4_handle_valid(handle)) { | |
5523 | error = ext4_orphan_add(handle, inode); | 5340 | error = ext4_orphan_add(handle, inode); |
5341 | orphan = 1; | ||
5342 | } | ||
5524 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5343 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5525 | rc = ext4_mark_inode_dirty(handle, inode); | 5344 | rc = ext4_mark_inode_dirty(handle, inode); |
5526 | if (!error) | 5345 | if (!error) |
@@ -5538,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5538 | goto err_out; | 5357 | goto err_out; |
5539 | } | 5358 | } |
5540 | ext4_orphan_del(handle, inode); | 5359 | ext4_orphan_del(handle, inode); |
5360 | orphan = 0; | ||
5541 | ext4_journal_stop(handle); | 5361 | ext4_journal_stop(handle); |
5542 | goto err_out; | 5362 | goto err_out; |
5543 | } | 5363 | } |
@@ -5560,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5560 | * If the call to ext4_truncate failed to get a transaction handle at | 5380 | * If the call to ext4_truncate failed to get a transaction handle at |
5561 | * all, we need to clean up the in-core orphan list manually. | 5381 | * all, we need to clean up the in-core orphan list manually. |
5562 | */ | 5382 | */ |
5563 | if (inode->i_nlink) | 5383 | if (orphan && inode->i_nlink) |
5564 | ext4_orphan_del(NULL, inode); | 5384 | ext4_orphan_del(NULL, inode); |
5565 | 5385 | ||
5566 | if (!rc && (ia_valid & ATTR_MODE)) | 5386 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5592,9 +5412,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5592 | * will return the blocks that include the delayed allocation | 5412 | * will return the blocks that include the delayed allocation |
5593 | * blocks for this file. | 5413 | * blocks for this file. |
5594 | */ | 5414 | */ |
5595 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5596 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 5415 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
5597 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5598 | 5416 | ||
5599 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 5417 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
5600 | return 0; | 5418 | return 0; |
@@ -5643,7 +5461,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5643 | * | 5461 | * |
5644 | * Also account for superblock, inode, quota and xattr blocks | 5462 | * Also account for superblock, inode, quota and xattr blocks |
5645 | */ | 5463 | */ |
5646 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5464 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5647 | { | 5465 | { |
5648 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5466 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5649 | int gdpblocks; | 5467 | int gdpblocks; |
@@ -5831,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5831 | int err, ret; | 5649 | int err, ret; |
5832 | 5650 | ||
5833 | might_sleep(); | 5651 | might_sleep(); |
5652 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | ||
5834 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5653 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5835 | if (ext4_handle_valid(handle) && | 5654 | if (ext4_handle_valid(handle) && |
5836 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5655 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |