diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 23:44:47 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 23:44:47 -0400 |
commit | a107e5a3a473a2ea62bd5af24e11b84adf1486ff (patch) | |
tree | d36c2cb38d8be88d4d75cdebc354aa140aa0e470 /fs/ext4/inode.c | |
parent | e3e1288e86a07cdeb0aee5860a2dff111c6eff79 (diff) | |
parent | a269029d0e2192046be4c07ed78a45022469ee4c (diff) |
Merge branch 'next' into upstream-merge
Conflicts:
fs/ext4/inode.c
fs/ext4/mballoc.c
include/trace/events/ext4.h
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 587 |
1 files changed, 204 insertions, 383 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49635ef236f8..2d6c6c8c036d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
60 | } | 60 | } |
61 | 61 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
63 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
64 | struct buffer_head *bh_result, int create); | ||
65 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
66 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
67 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
68 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 69 | ||
64 | /* | 70 | /* |
65 | * Test whether an inode is a fast symlink. | 71 | * Test whether an inode is a fast symlink. |
@@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
755 | * parent to disk. | 761 | * parent to disk. |
756 | */ | 762 | */ |
757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 763 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
764 | if (unlikely(!bh)) { | ||
765 | err = -EIO; | ||
766 | goto failed; | ||
767 | } | ||
768 | |||
758 | branch[n].bh = bh; | 769 | branch[n].bh = bh; |
759 | lock_buffer(bh); | 770 | lock_buffer(bh); |
760 | BUFFER_TRACE(bh, "call get_create_access"); | 771 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1207 | break; | 1218 | break; |
1208 | idx++; | 1219 | idx++; |
1209 | num++; | 1220 | num++; |
1210 | if (num >= max_pages) | 1221 | if (num >= max_pages) { |
1222 | done = 1; | ||
1211 | break; | 1223 | break; |
1224 | } | ||
1212 | } | 1225 | } |
1213 | pagevec_release(&pvec); | 1226 | pagevec_release(&pvec); |
1214 | } | 1227 | } |
@@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1995 | * | 2008 | * |
1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2009 | * As pages are already locked by write_cache_pages(), we can't use it |
1997 | */ | 2010 | */ |
1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2011 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2012 | struct ext4_map_blocks *map) | ||
1999 | { | 2013 | { |
2000 | long pages_skipped; | ||
2001 | struct pagevec pvec; | 2014 | struct pagevec pvec; |
2002 | unsigned long index, end; | 2015 | unsigned long index, end; |
2003 | int ret = 0, err, nr_pages, i; | 2016 | int ret = 0, err, nr_pages, i; |
2004 | struct inode *inode = mpd->inode; | 2017 | struct inode *inode = mpd->inode; |
2005 | struct address_space *mapping = inode->i_mapping; | 2018 | struct address_space *mapping = inode->i_mapping; |
2019 | loff_t size = i_size_read(inode); | ||
2020 | unsigned int len, block_start; | ||
2021 | struct buffer_head *bh, *page_bufs = NULL; | ||
2022 | int journal_data = ext4_should_journal_data(inode); | ||
2023 | sector_t pblock = 0, cur_logical = 0; | ||
2024 | struct ext4_io_submit io_submit; | ||
2006 | 2025 | ||
2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2026 | BUG_ON(mpd->next_page <= mpd->first_page); |
2027 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2008 | /* | 2028 | /* |
2009 | * We need to start from the first_page to the next_page - 1 | 2029 | * We need to start from the first_page to the next_page - 1 |
2010 | * to make sure we also write the mapped dirty buffer_heads. | 2030 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2020 | if (nr_pages == 0) | 2040 | if (nr_pages == 0) |
2021 | break; | 2041 | break; |
2022 | for (i = 0; i < nr_pages; i++) { | 2042 | for (i = 0; i < nr_pages; i++) { |
2043 | int commit_write = 0, redirty_page = 0; | ||
2023 | struct page *page = pvec.pages[i]; | 2044 | struct page *page = pvec.pages[i]; |
2024 | 2045 | ||
2025 | index = page->index; | 2046 | index = page->index; |
2026 | if (index > end) | 2047 | if (index > end) |
2027 | break; | 2048 | break; |
2049 | |||
2050 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2051 | len = size & ~PAGE_CACHE_MASK; | ||
2052 | else | ||
2053 | len = PAGE_CACHE_SIZE; | ||
2054 | if (map) { | ||
2055 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2056 | inode->i_blkbits); | ||
2057 | pblock = map->m_pblk + (cur_logical - | ||
2058 | map->m_lblk); | ||
2059 | } | ||
2028 | index++; | 2060 | index++; |
2029 | 2061 | ||
2030 | BUG_ON(!PageLocked(page)); | 2062 | BUG_ON(!PageLocked(page)); |
2031 | BUG_ON(PageWriteback(page)); | 2063 | BUG_ON(PageWriteback(page)); |
2032 | 2064 | ||
2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2036 | /* | ||
2037 | * have successfully written the page | ||
2038 | * without skipping the same | ||
2039 | */ | ||
2040 | mpd->pages_written++; | ||
2041 | /* | 2065 | /* |
2042 | * In error case, we have to continue because | 2066 | * If the page does not have buffers (for |
2043 | * remaining pages are still locked | 2067 | * whatever reason), try to create them using |
2044 | * XXX: unlock and re-dirty them? | 2068 | * __block_write_begin. If this fails, |
2069 | * redirty the page and move on. | ||
2045 | */ | 2070 | */ |
2046 | if (ret == 0) | 2071 | if (!page_has_buffers(page)) { |
2047 | ret = err; | 2072 | if (__block_write_begin(page, 0, len, |
2048 | } | 2073 | noalloc_get_block_write)) { |
2049 | pagevec_release(&pvec); | 2074 | redirty_page: |
2050 | } | 2075 | redirty_page_for_writepage(mpd->wbc, |
2051 | return ret; | 2076 | page); |
2052 | } | 2077 | unlock_page(page); |
2053 | 2078 | continue; | |
2054 | /* | 2079 | } |
2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2080 | commit_write = 1; |
2056 | * | 2081 | } |
2057 | * the function goes through all passed space and put actual disk | ||
2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2059 | */ | ||
2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2061 | struct ext4_map_blocks *map) | ||
2062 | { | ||
2063 | struct inode *inode = mpd->inode; | ||
2064 | struct address_space *mapping = inode->i_mapping; | ||
2065 | int blocks = map->m_len; | ||
2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
2067 | struct buffer_head *head, *bh; | ||
2068 | pgoff_t index, end; | ||
2069 | struct pagevec pvec; | ||
2070 | int nr_pages, i; | ||
2071 | |||
2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2075 | |||
2076 | pagevec_init(&pvec, 0); | ||
2077 | |||
2078 | while (index <= end) { | ||
2079 | /* XXX: optimize tail */ | ||
2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2081 | if (nr_pages == 0) | ||
2082 | break; | ||
2083 | for (i = 0; i < nr_pages; i++) { | ||
2084 | struct page *page = pvec.pages[i]; | ||
2085 | |||
2086 | index = page->index; | ||
2087 | if (index > end) | ||
2088 | break; | ||
2089 | index++; | ||
2090 | |||
2091 | BUG_ON(!PageLocked(page)); | ||
2092 | BUG_ON(PageWriteback(page)); | ||
2093 | BUG_ON(!page_has_buffers(page)); | ||
2094 | |||
2095 | bh = page_buffers(page); | ||
2096 | head = bh; | ||
2097 | |||
2098 | /* skip blocks out of the range */ | ||
2099 | do { | ||
2100 | if (cur_logical >= map->m_lblk) | ||
2101 | break; | ||
2102 | cur_logical++; | ||
2103 | } while ((bh = bh->b_this_page) != head); | ||
2104 | 2082 | ||
2083 | bh = page_bufs = page_buffers(page); | ||
2084 | block_start = 0; | ||
2105 | do { | 2085 | do { |
2106 | if (cur_logical >= map->m_lblk + blocks) | 2086 | if (!bh) |
2107 | break; | 2087 | goto redirty_page; |
2108 | 2088 | if (map && (cur_logical >= map->m_lblk) && | |
2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2089 | (cur_logical <= (map->m_lblk + |
2110 | 2090 | (map->m_len - 1)))) { | |
2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2112 | |||
2113 | if (buffer_delay(bh)) { | 2091 | if (buffer_delay(bh)) { |
2114 | clear_buffer_delay(bh); | 2092 | clear_buffer_delay(bh); |
2115 | bh->b_blocknr = pblock; | 2093 | bh->b_blocknr = pblock; |
2116 | } else { | ||
2117 | /* | ||
2118 | * unwritten already should have | ||
2119 | * blocknr assigned. Verify that | ||
2120 | */ | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | BUG_ON(bh->b_blocknr != pblock); | ||
2123 | } | 2094 | } |
2095 | if (buffer_unwritten(bh) || | ||
2096 | buffer_mapped(bh)) | ||
2097 | BUG_ON(bh->b_blocknr != pblock); | ||
2098 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2099 | set_buffer_uninit(bh); | ||
2100 | clear_buffer_unwritten(bh); | ||
2101 | } | ||
2124 | 2102 | ||
2125 | } else if (buffer_mapped(bh)) | 2103 | /* redirty page if block allocation undone */ |
2126 | BUG_ON(bh->b_blocknr != pblock); | 2104 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2127 | 2105 | redirty_page = 1; | |
2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2106 | bh = bh->b_this_page; |
2129 | set_buffer_uninit(bh); | 2107 | block_start += bh->b_size; |
2130 | cur_logical++; | 2108 | cur_logical++; |
2131 | pblock++; | 2109 | pblock++; |
2132 | } while ((bh = bh->b_this_page) != head); | 2110 | } while (bh != page_bufs); |
2111 | |||
2112 | if (redirty_page) | ||
2113 | goto redirty_page; | ||
2114 | |||
2115 | if (commit_write) | ||
2116 | /* mark the buffer_heads as dirty & uptodate */ | ||
2117 | block_commit_write(page, 0, len); | ||
2118 | |||
2119 | /* | ||
2120 | * Delalloc doesn't support data journalling, | ||
2121 | * but eventually maybe we'll lift this | ||
2122 | * restriction. | ||
2123 | */ | ||
2124 | if (unlikely(journal_data && PageChecked(page))) | ||
2125 | err = __ext4_journalled_writepage(page, len); | ||
2126 | else | ||
2127 | err = ext4_bio_write_page(&io_submit, page, | ||
2128 | len, mpd->wbc); | ||
2129 | |||
2130 | if (!err) | ||
2131 | mpd->pages_written++; | ||
2132 | /* | ||
2133 | * In error case, we have to continue because | ||
2134 | * remaining pages are still locked | ||
2135 | */ | ||
2136 | if (ret == 0) | ||
2137 | ret = err; | ||
2133 | } | 2138 | } |
2134 | pagevec_release(&pvec); | 2139 | pagevec_release(&pvec); |
2135 | } | 2140 | } |
2141 | ext4_io_submit(&io_submit); | ||
2142 | return ret; | ||
2136 | } | 2143 | } |
2137 | 2144 | ||
2138 | |||
2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2145 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2140 | sector_t logical, long blk_cnt) | 2146 | sector_t logical, long blk_cnt) |
2141 | { | 2147 | { |
@@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2187 | } | 2193 | } |
2188 | 2194 | ||
2189 | /* | 2195 | /* |
2190 | * mpage_da_map_blocks - go through given space | 2196 | * mpage_da_map_and_submit - go through given space, map them |
2197 | * if necessary, and then submit them for I/O | ||
2191 | * | 2198 | * |
2192 | * @mpd - bh describing space | 2199 | * @mpd - bh describing space |
2193 | * | 2200 | * |
2194 | * The function skips space we know is already mapped to disk blocks. | 2201 | * The function skips space we know is already mapped to disk blocks. |
2195 | * | 2202 | * |
2196 | */ | 2203 | */ |
2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2204 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2198 | { | 2205 | { |
2199 | int err, blks, get_blocks_flags; | 2206 | int err, blks, get_blocks_flags; |
2200 | struct ext4_map_blocks map; | 2207 | struct ext4_map_blocks map, *mapp = NULL; |
2201 | sector_t next = mpd->b_blocknr; | 2208 | sector_t next = mpd->b_blocknr; |
2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2209 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2210 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2204 | handle_t *handle = NULL; | 2211 | handle_t *handle = NULL; |
2205 | 2212 | ||
2206 | /* | 2213 | /* |
2207 | * We consider only non-mapped and non-allocated blocks | 2214 | * If the blocks are mapped already, or we couldn't accumulate |
2208 | */ | 2215 | * any blocks, then proceed immediately to the submission stage. |
2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | ||
2210 | !(mpd->b_state & (1 << BH_Delay)) && | ||
2211 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2212 | return 0; | ||
2213 | |||
2214 | /* | ||
2215 | * If we didn't accumulate anything to write simply return | ||
2216 | */ | 2216 | */ |
2217 | if (!mpd->b_size) | 2217 | if ((mpd->b_size == 0) || |
2218 | return 0; | 2218 | ((mpd->b_state & (1 << BH_Mapped)) && |
2219 | !(mpd->b_state & (1 << BH_Delay)) && | ||
2220 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
2221 | goto submit_io; | ||
2219 | 2222 | ||
2220 | handle = ext4_journal_current_handle(); | 2223 | handle = ext4_journal_current_handle(); |
2221 | BUG_ON(!handle); | 2224 | BUG_ON(!handle); |
@@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2252 | 2255 | ||
2253 | err = blks; | 2256 | err = blks; |
2254 | /* | 2257 | /* |
2255 | * If get block returns with error we simply | 2258 | * If get block returns EAGAIN or ENOSPC and there |
2256 | * return. Later writepage will redirty the page and | 2259 | * appears to be free blocks we will call |
2257 | * writepages will find the dirty page again | 2260 | * ext4_writepage() for all of the pages which will |
2261 | * just redirty the pages. | ||
2258 | */ | 2262 | */ |
2259 | if (err == -EAGAIN) | 2263 | if (err == -EAGAIN) |
2260 | return 0; | 2264 | goto submit_io; |
2261 | 2265 | ||
2262 | if (err == -ENOSPC && | 2266 | if (err == -ENOSPC && |
2263 | ext4_count_free_blocks(sb)) { | 2267 | ext4_count_free_blocks(sb)) { |
2264 | mpd->retval = err; | 2268 | mpd->retval = err; |
2265 | return 0; | 2269 | goto submit_io; |
2266 | } | 2270 | } |
2267 | 2271 | ||
2268 | /* | 2272 | /* |
@@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2287 | /* invalidate all the pages */ | 2291 | /* invalidate all the pages */ |
2288 | ext4_da_block_invalidatepages(mpd, next, | 2292 | ext4_da_block_invalidatepages(mpd, next, |
2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2293 | mpd->b_size >> mpd->inode->i_blkbits); |
2290 | return err; | 2294 | return; |
2291 | } | 2295 | } |
2292 | BUG_ON(blks == 0); | 2296 | BUG_ON(blks == 0); |
2293 | 2297 | ||
2298 | mapp = ↦ | ||
2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2299 | if (map.m_flags & EXT4_MAP_NEW) { |
2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2300 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2296 | int i; | 2301 | int i; |
@@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2304 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2300 | } | 2305 | } |
2301 | 2306 | ||
2302 | /* | ||
2303 | * If blocks are delayed marked, we need to | ||
2304 | * put actual blocknr and drop delayed bit | ||
2305 | */ | ||
2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2309 | |||
2310 | if (ext4_should_order_data(mpd->inode)) { | 2307 | if (ext4_should_order_data(mpd->inode)) { |
2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2308 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2312 | if (err) | 2309 | if (err) |
2313 | return err; | 2310 | /* This only happens if the journal is aborted */ |
2311 | return; | ||
2314 | } | 2312 | } |
2315 | 2313 | ||
2316 | /* | 2314 | /* |
@@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2321 | disksize = i_size_read(mpd->inode); | 2319 | disksize = i_size_read(mpd->inode); |
2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2320 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2321 | ext4_update_i_disksize(mpd->inode, disksize); |
2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2322 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2323 | if (err) | ||
2324 | ext4_error(mpd->inode->i_sb, | ||
2325 | "Failed to mark inode %lu dirty", | ||
2326 | mpd->inode->i_ino); | ||
2325 | } | 2327 | } |
2326 | 2328 | ||
2327 | return 0; | 2329 | submit_io: |
2330 | mpage_da_submit_io(mpd, mapp); | ||
2331 | mpd->io_done = 1; | ||
2328 | } | 2332 | } |
2329 | 2333 | ||
2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2334 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2401,9 +2405,7 @@ flush_it: | |||
2401 | * We couldn't merge the block to our extent, so we | 2405 | * We couldn't merge the block to our extent, so we |
2402 | * need to flush current extent and start new one | 2406 | * need to flush current extent and start new one |
2403 | */ | 2407 | */ |
2404 | if (mpage_da_map_blocks(mpd) == 0) | 2408 | mpage_da_map_and_submit(mpd); |
2405 | mpage_da_submit_io(mpd); | ||
2406 | mpd->io_done = 1; | ||
2407 | return; | 2409 | return; |
2408 | } | 2410 | } |
2409 | 2411 | ||
@@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2422 | * The function finds extents of pages and scan them for all blocks. | 2424 | * The function finds extents of pages and scan them for all blocks. |
2423 | */ | 2425 | */ |
2424 | static int __mpage_da_writepage(struct page *page, | 2426 | static int __mpage_da_writepage(struct page *page, |
2425 | struct writeback_control *wbc, void *data) | 2427 | struct writeback_control *wbc, |
2428 | struct mpage_da_data *mpd) | ||
2426 | { | 2429 | { |
2427 | struct mpage_da_data *mpd = data; | ||
2428 | struct inode *inode = mpd->inode; | 2430 | struct inode *inode = mpd->inode; |
2429 | struct buffer_head *bh, *head; | 2431 | struct buffer_head *bh, *head; |
2430 | sector_t logical; | 2432 | sector_t logical; |
@@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page, | |||
2435 | if (mpd->next_page != page->index) { | 2437 | if (mpd->next_page != page->index) { |
2436 | /* | 2438 | /* |
2437 | * Nope, we can't. So, we map non-allocated blocks | 2439 | * Nope, we can't. So, we map non-allocated blocks |
2438 | * and start IO on them using writepage() | 2440 | * and start IO on them |
2439 | */ | 2441 | */ |
2440 | if (mpd->next_page != mpd->first_page) { | 2442 | if (mpd->next_page != mpd->first_page) { |
2441 | if (mpage_da_map_blocks(mpd) == 0) | 2443 | mpage_da_map_and_submit(mpd); |
2442 | mpage_da_submit_io(mpd); | ||
2443 | /* | 2444 | /* |
2444 | * skip rest of the page in the page_vec | 2445 | * skip rest of the page in the page_vec |
2445 | */ | 2446 | */ |
2446 | mpd->io_done = 1; | ||
2447 | redirty_page_for_writepage(wbc, page); | 2447 | redirty_page_for_writepage(wbc, page); |
2448 | unlock_page(page); | 2448 | unlock_page(page); |
2449 | return MPAGE_DA_EXTENT_TAIL; | 2449 | return MPAGE_DA_EXTENT_TAIL; |
@@ -2622,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2622 | int ret = 0; | 2622 | int ret = 0; |
2623 | int err; | 2623 | int err; |
2624 | 2624 | ||
2625 | ClearPageChecked(page); | ||
2625 | page_bufs = page_buffers(page); | 2626 | page_bufs = page_buffers(page); |
2626 | BUG_ON(!page_bufs); | 2627 | BUG_ON(!page_bufs); |
2627 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2699,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2699 | static int ext4_writepage(struct page *page, | 2700 | static int ext4_writepage(struct page *page, |
2700 | struct writeback_control *wbc) | 2701 | struct writeback_control *wbc) |
2701 | { | 2702 | { |
2702 | int ret = 0; | 2703 | int ret = 0, commit_write = 0; |
2703 | loff_t size; | 2704 | loff_t size; |
2704 | unsigned int len; | 2705 | unsigned int len; |
2705 | struct buffer_head *page_bufs = NULL; | 2706 | struct buffer_head *page_bufs = NULL; |
@@ -2712,71 +2713,46 @@ static int ext4_writepage(struct page *page, | |||
2712 | else | 2713 | else |
2713 | len = PAGE_CACHE_SIZE; | 2714 | len = PAGE_CACHE_SIZE; |
2714 | 2715 | ||
2715 | if (page_has_buffers(page)) { | 2716 | /* |
2716 | page_bufs = page_buffers(page); | 2717 | * If the page does not have buffers (for whatever reason), |
2717 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2718 | * try to create them using __block_write_begin. If this |
2718 | ext4_bh_delay_or_unwritten)) { | 2719 | * fails, redirty the page and move on. |
2719 | /* | 2720 | */ |
2720 | * We don't want to do block allocation | 2721 | if (!page_buffers(page)) { |
2721 | * So redirty the page and return | 2722 | if (__block_write_begin(page, 0, len, |
2722 | * We may reach here when we do a journal commit | 2723 | noalloc_get_block_write)) { |
2723 | * via journal_submit_inode_data_buffers. | 2724 | redirty_page: |
2724 | * If we don't have mapping block we just ignore | ||
2725 | * them. We can also reach here via shrink_page_list | ||
2726 | */ | ||
2727 | redirty_page_for_writepage(wbc, page); | 2725 | redirty_page_for_writepage(wbc, page); |
2728 | unlock_page(page); | 2726 | unlock_page(page); |
2729 | return 0; | 2727 | return 0; |
2730 | } | 2728 | } |
2731 | } else { | 2729 | commit_write = 1; |
2730 | } | ||
2731 | page_bufs = page_buffers(page); | ||
2732 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2733 | ext4_bh_delay_or_unwritten)) { | ||
2732 | /* | 2734 | /* |
2733 | * The test for page_has_buffers() is subtle: | 2735 | * We don't want to do block allocation So redirty the |
2734 | * We know the page is dirty but it lost buffers. That means | 2736 | * page and return We may reach here when we do a |
2735 | * that at some moment in time after write_begin()/write_end() | 2737 | * journal commit via |
2736 | * has been called all buffers have been clean and thus they | 2738 | * journal_submit_inode_data_buffers. If we don't |
2737 | * must have been written at least once. So they are all | 2739 | * have mapping block we just ignore them. We can also |
2738 | * mapped and we can happily proceed with mapping them | 2740 | * reach here via shrink_page_list |
2739 | * and writing the page. | ||
2740 | * | ||
2741 | * Try to initialize the buffer_heads and check whether | ||
2742 | * all are mapped and non delay. We don't want to | ||
2743 | * do block allocation here. | ||
2744 | */ | 2741 | */ |
2745 | ret = __block_write_begin(page, 0, len, | 2742 | goto redirty_page; |
2746 | noalloc_get_block_write); | 2743 | } |
2747 | if (!ret) { | 2744 | if (commit_write) |
2748 | page_bufs = page_buffers(page); | ||
2749 | /* check whether all are mapped and non delay */ | ||
2750 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2751 | ext4_bh_delay_or_unwritten)) { | ||
2752 | redirty_page_for_writepage(wbc, page); | ||
2753 | unlock_page(page); | ||
2754 | return 0; | ||
2755 | } | ||
2756 | } else { | ||
2757 | /* | ||
2758 | * We can't do block allocation here | ||
2759 | * so just redity the page and unlock | ||
2760 | * and return | ||
2761 | */ | ||
2762 | redirty_page_for_writepage(wbc, page); | ||
2763 | unlock_page(page); | ||
2764 | return 0; | ||
2765 | } | ||
2766 | /* now mark the buffer_heads as dirty and uptodate */ | 2745 | /* now mark the buffer_heads as dirty and uptodate */ |
2767 | block_commit_write(page, 0, len); | 2746 | block_commit_write(page, 0, len); |
2768 | } | ||
2769 | 2747 | ||
2770 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2748 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2771 | /* | 2749 | /* |
2772 | * It's mmapped pagecache. Add buffers and journal it. There | 2750 | * It's mmapped pagecache. Add buffers and journal it. There |
2773 | * doesn't seem much point in redirtying the page here. | 2751 | * doesn't seem much point in redirtying the page here. |
2774 | */ | 2752 | */ |
2775 | ClearPageChecked(page); | ||
2776 | return __ext4_journalled_writepage(page, len); | 2753 | return __ext4_journalled_writepage(page, len); |
2777 | } | ||
2778 | 2754 | ||
2779 | if (page_bufs && buffer_uninit(page_bufs)) { | 2755 | if (buffer_uninit(page_bufs)) { |
2780 | ext4_set_bh_endio(page_bufs, inode); | 2756 | ext4_set_bh_endio(page_bufs, inode); |
2781 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2757 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2782 | wbc, ext4_end_io_buffer_write); | 2758 | wbc, ext4_end_io_buffer_write); |
@@ -2823,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2823 | */ | 2799 | */ |
2824 | static int write_cache_pages_da(struct address_space *mapping, | 2800 | static int write_cache_pages_da(struct address_space *mapping, |
2825 | struct writeback_control *wbc, | 2801 | struct writeback_control *wbc, |
2826 | struct mpage_da_data *mpd) | 2802 | struct mpage_da_data *mpd, |
2803 | pgoff_t *done_index) | ||
2827 | { | 2804 | { |
2828 | int ret = 0; | 2805 | int ret = 0; |
2829 | int done = 0; | 2806 | int done = 0; |
2830 | struct pagevec pvec; | 2807 | struct pagevec pvec; |
2831 | int nr_pages; | 2808 | unsigned nr_pages; |
2832 | pgoff_t index; | 2809 | pgoff_t index; |
2833 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
2834 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
2812 | int tag; | ||
2835 | 2813 | ||
2836 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
2837 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2838 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2839 | 2817 | ||
2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
2820 | else | ||
2821 | tag = PAGECACHE_TAG_DIRTY; | ||
2822 | |||
2823 | *done_index = index; | ||
2840 | while (!done && (index <= end)) { | 2824 | while (!done && (index <= end)) { |
2841 | int i; | 2825 | int i; |
2842 | 2826 | ||
2843 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2827 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2844 | PAGECACHE_TAG_DIRTY, | ||
2845 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2828 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2846 | if (nr_pages == 0) | 2829 | if (nr_pages == 0) |
2847 | break; | 2830 | break; |
@@ -2861,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2861 | break; | 2844 | break; |
2862 | } | 2845 | } |
2863 | 2846 | ||
2847 | *done_index = page->index + 1; | ||
2848 | |||
2864 | lock_page(page); | 2849 | lock_page(page); |
2865 | 2850 | ||
2866 | /* | 2851 | /* |
@@ -2946,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2946 | long desired_nr_to_write, nr_to_writebump = 0; | 2931 | long desired_nr_to_write, nr_to_writebump = 0; |
2947 | loff_t range_start = wbc->range_start; | 2932 | loff_t range_start = wbc->range_start; |
2948 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2933 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2934 | pgoff_t done_index = 0; | ||
2935 | pgoff_t end; | ||
2949 | 2936 | ||
2950 | trace_ext4_da_writepages(inode, wbc); | 2937 | trace_ext4_da_writepages(inode, wbc); |
2951 | 2938 | ||
@@ -2981,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2981 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2968 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2982 | wbc->range_end = LLONG_MAX; | 2969 | wbc->range_end = LLONG_MAX; |
2983 | wbc->range_cyclic = 0; | 2970 | wbc->range_cyclic = 0; |
2984 | } else | 2971 | end = -1; |
2972 | } else { | ||
2985 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2973 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2974 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2975 | } | ||
2986 | 2976 | ||
2987 | /* | 2977 | /* |
2988 | * This works around two forms of stupidity. The first is in | 2978 | * This works around two forms of stupidity. The first is in |
@@ -3001,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3001 | * sbi->max_writeback_mb_bump whichever is smaller. | 2991 | * sbi->max_writeback_mb_bump whichever is smaller. |
3002 | */ | 2992 | */ |
3003 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2993 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
3004 | if (!range_cyclic && range_whole) | 2994 | if (!range_cyclic && range_whole) { |
3005 | desired_nr_to_write = wbc->nr_to_write * 8; | 2995 | if (wbc->nr_to_write == LONG_MAX) |
3006 | else | 2996 | desired_nr_to_write = wbc->nr_to_write; |
2997 | else | ||
2998 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2999 | } else | ||
3007 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 3000 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
3008 | max_pages); | 3001 | max_pages); |
3009 | if (desired_nr_to_write > max_pages) | 3002 | if (desired_nr_to_write > max_pages) |
@@ -3020,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3020 | pages_skipped = wbc->pages_skipped; | 3013 | pages_skipped = wbc->pages_skipped; |
3021 | 3014 | ||
3022 | retry: | 3015 | retry: |
3016 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3017 | tag_pages_for_writeback(mapping, index, end); | ||
3018 | |||
3023 | while (!ret && wbc->nr_to_write > 0) { | 3019 | while (!ret && wbc->nr_to_write > 0) { |
3024 | 3020 | ||
3025 | /* | 3021 | /* |
@@ -3058,16 +3054,14 @@ retry: | |||
3058 | mpd.io_done = 0; | 3054 | mpd.io_done = 0; |
3059 | mpd.pages_written = 0; | 3055 | mpd.pages_written = 0; |
3060 | mpd.retval = 0; | 3056 | mpd.retval = 0; |
3061 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 3057 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3062 | /* | 3058 | /* |
3063 | * If we have a contiguous extent of pages and we | 3059 | * If we have a contiguous extent of pages and we |
3064 | * haven't done the I/O yet, map the blocks and submit | 3060 | * haven't done the I/O yet, map the blocks and submit |
3065 | * them for I/O. | 3061 | * them for I/O. |
3066 | */ | 3062 | */ |
3067 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3063 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3068 | if (mpage_da_map_blocks(&mpd) == 0) | 3064 | mpage_da_map_and_submit(&mpd); |
3069 | mpage_da_submit_io(&mpd); | ||
3070 | mpd.io_done = 1; | ||
3071 | ret = MPAGE_DA_EXTENT_TAIL; | 3065 | ret = MPAGE_DA_EXTENT_TAIL; |
3072 | } | 3066 | } |
3073 | trace_ext4_da_write_pages(inode, &mpd); | 3067 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3114,14 +3108,13 @@ retry: | |||
3114 | __func__, wbc->nr_to_write, ret); | 3108 | __func__, wbc->nr_to_write, ret); |
3115 | 3109 | ||
3116 | /* Update index */ | 3110 | /* Update index */ |
3117 | index += pages_written; | ||
3118 | wbc->range_cyclic = range_cyclic; | 3111 | wbc->range_cyclic = range_cyclic; |
3119 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3112 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3120 | /* | 3113 | /* |
3121 | * set the writeback_index so that range_cyclic | 3114 | * set the writeback_index so that range_cyclic |
3122 | * mode will write it back later | 3115 | * mode will write it back later |
3123 | */ | 3116 | */ |
3124 | mapping->writeback_index = index; | 3117 | mapping->writeback_index = done_index; |
3125 | 3118 | ||
3126 | out_writepages: | 3119 | out_writepages: |
3127 | wbc->nr_to_write -= nr_to_writebump; | 3120 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3456,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3456 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3449 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3457 | } | 3450 | } |
3458 | 3451 | ||
3459 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3460 | { | ||
3461 | BUG_ON(!io); | ||
3462 | if (io->page) | ||
3463 | put_page(io->page); | ||
3464 | iput(io->inode); | ||
3465 | kfree(io); | ||
3466 | } | ||
3467 | |||
3468 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3452 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3469 | { | 3453 | { |
3470 | struct buffer_head *head, *bh; | 3454 | struct buffer_head *head, *bh; |
@@ -3641,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3641 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3625 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3642 | } | 3626 | } |
3643 | 3627 | ||
3644 | static void dump_completed_IO(struct inode * inode) | ||
3645 | { | ||
3646 | #ifdef EXT4_DEBUG | ||
3647 | struct list_head *cur, *before, *after; | ||
3648 | ext4_io_end_t *io, *io0, *io1; | ||
3649 | unsigned long flags; | ||
3650 | |||
3651 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3652 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3653 | return; | ||
3654 | } | ||
3655 | |||
3656 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3657 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3658 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3659 | cur = &io->list; | ||
3660 | before = cur->prev; | ||
3661 | io0 = container_of(before, ext4_io_end_t, list); | ||
3662 | after = cur->next; | ||
3663 | io1 = container_of(after, ext4_io_end_t, list); | ||
3664 | |||
3665 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3666 | io, inode->i_ino, io0, io1); | ||
3667 | } | ||
3668 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3669 | #endif | ||
3670 | } | ||
3671 | |||
3672 | /* | ||
3673 | * check a range of space and convert unwritten extents to written. | ||
3674 | */ | ||
3675 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3676 | { | ||
3677 | struct inode *inode = io->inode; | ||
3678 | loff_t offset = io->offset; | ||
3679 | ssize_t size = io->size; | ||
3680 | int ret = 0; | ||
3681 | |||
3682 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3683 | "list->prev 0x%p\n", | ||
3684 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3685 | |||
3686 | if (list_empty(&io->list)) | ||
3687 | return ret; | ||
3688 | |||
3689 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3690 | return ret; | ||
3691 | |||
3692 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3693 | if (ret < 0) { | ||
3694 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3695 | "extents to written extents, error is %d" | ||
3696 | " io is still on inode %lu aio dio list\n", | ||
3697 | __func__, ret, inode->i_ino); | ||
3698 | return ret; | ||
3699 | } | ||
3700 | |||
3701 | if (io->iocb) | ||
3702 | aio_complete(io->iocb, io->result, 0); | ||
3703 | /* clear the DIO AIO unwritten flag */ | ||
3704 | io->flag = 0; | ||
3705 | return ret; | ||
3706 | } | ||
3707 | |||
3708 | /* | ||
3709 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3710 | */ | ||
3711 | static void ext4_end_io_work(struct work_struct *work) | ||
3712 | { | ||
3713 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3714 | struct inode *inode = io->inode; | ||
3715 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3716 | unsigned long flags; | ||
3717 | int ret; | ||
3718 | |||
3719 | mutex_lock(&inode->i_mutex); | ||
3720 | ret = ext4_end_io_nolock(io); | ||
3721 | if (ret < 0) { | ||
3722 | mutex_unlock(&inode->i_mutex); | ||
3723 | return; | ||
3724 | } | ||
3725 | |||
3726 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3727 | if (!list_empty(&io->list)) | ||
3728 | list_del_init(&io->list); | ||
3729 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3730 | mutex_unlock(&inode->i_mutex); | ||
3731 | ext4_free_io_end(io); | ||
3732 | } | ||
3733 | |||
3734 | /* | ||
3735 | * This function is called from ext4_sync_file(). | ||
3736 | * | ||
3737 | * When IO is completed, the work to convert unwritten extents to | ||
3738 | * written is queued on workqueue but may not get immediately | ||
3739 | * scheduled. When fsync is called, we need to ensure the | ||
3740 | * conversion is complete before fsync returns. | ||
3741 | * The inode keeps track of a list of pending/completed IO that | ||
3742 | * might needs to do the conversion. This function walks through | ||
3743 | * the list and convert the related unwritten extents for completed IO | ||
3744 | * to written. | ||
3745 | * The function return the number of pending IOs on success. | ||
3746 | */ | ||
3747 | int flush_completed_IO(struct inode *inode) | ||
3748 | { | ||
3749 | ext4_io_end_t *io; | ||
3750 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3751 | unsigned long flags; | ||
3752 | int ret = 0; | ||
3753 | int ret2 = 0; | ||
3754 | |||
3755 | if (list_empty(&ei->i_completed_io_list)) | ||
3756 | return ret; | ||
3757 | |||
3758 | dump_completed_IO(inode); | ||
3759 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3760 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3761 | io = list_entry(ei->i_completed_io_list.next, | ||
3762 | ext4_io_end_t, list); | ||
3763 | /* | ||
3764 | * Calling ext4_end_io_nolock() to convert completed | ||
3765 | * IO to written. | ||
3766 | * | ||
3767 | * When ext4_sync_file() is called, run_queue() may already | ||
3768 | * about to flush the work corresponding to this io structure. | ||
3769 | * It will be upset if it founds the io structure related | ||
3770 | * to the work-to-be schedule is freed. | ||
3771 | * | ||
3772 | * Thus we need to keep the io structure still valid here after | ||
3773 | * convertion finished. The io structure has a flag to | ||
3774 | * avoid double converting from both fsync and background work | ||
3775 | * queue work. | ||
3776 | */ | ||
3777 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3778 | ret = ext4_end_io_nolock(io); | ||
3779 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3780 | if (ret < 0) | ||
3781 | ret2 = ret; | ||
3782 | else | ||
3783 | list_del_init(&io->list); | ||
3784 | } | ||
3785 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3786 | return (ret2 < 0) ? ret2 : 0; | ||
3787 | } | ||
3788 | |||
3789 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3790 | { | ||
3791 | ext4_io_end_t *io = NULL; | ||
3792 | |||
3793 | io = kmalloc(sizeof(*io), flags); | ||
3794 | |||
3795 | if (io) { | ||
3796 | igrab(inode); | ||
3797 | io->inode = inode; | ||
3798 | io->flag = 0; | ||
3799 | io->offset = 0; | ||
3800 | io->size = 0; | ||
3801 | io->page = NULL; | ||
3802 | io->iocb = NULL; | ||
3803 | io->result = 0; | ||
3804 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3805 | INIT_LIST_HEAD(&io->list); | ||
3806 | } | ||
3807 | |||
3808 | return io; | ||
3809 | } | ||
3810 | |||
3811 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3628 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3812 | ssize_t size, void *private, int ret, | 3629 | ssize_t size, void *private, int ret, |
3813 | bool is_async) | 3630 | bool is_async) |
@@ -3827,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3827 | size); | 3644 | size); |
3828 | 3645 | ||
3829 | /* if not aio dio with unwritten extents, just free io and return */ | 3646 | /* if not aio dio with unwritten extents, just free io and return */ |
3830 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3647 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3831 | ext4_free_io_end(io_end); | 3648 | ext4_free_io_end(io_end); |
3832 | iocb->private = NULL; | 3649 | iocb->private = NULL; |
3833 | out: | 3650 | out: |
@@ -3844,14 +3661,14 @@ out: | |||
3844 | } | 3661 | } |
3845 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3662 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3846 | 3663 | ||
3847 | /* queue the work to convert unwritten extents to written */ | ||
3848 | queue_work(wq, &io_end->work); | ||
3849 | |||
3850 | /* Add the io_end to per-inode completed aio dio list*/ | 3664 | /* Add the io_end to per-inode completed aio dio list*/ |
3851 | ei = EXT4_I(io_end->inode); | 3665 | ei = EXT4_I(io_end->inode); |
3852 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3666 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3853 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3667 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3854 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3668 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3669 | |||
3670 | /* queue the work to convert unwritten extents to written */ | ||
3671 | queue_work(wq, &io_end->work); | ||
3855 | iocb->private = NULL; | 3672 | iocb->private = NULL; |
3856 | } | 3673 | } |
3857 | 3674 | ||
@@ -3872,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3872 | goto out; | 3689 | goto out; |
3873 | } | 3690 | } |
3874 | 3691 | ||
3875 | io_end->flag = EXT4_IO_UNWRITTEN; | 3692 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3876 | inode = io_end->inode; | 3693 | inode = io_end->inode; |
3877 | 3694 | ||
3878 | /* Add the io_end to per-inode completed io list*/ | 3695 | /* Add the io_end to per-inode completed io list*/ |
@@ -5463,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5463 | { | 5280 | { |
5464 | struct inode *inode = dentry->d_inode; | 5281 | struct inode *inode = dentry->d_inode; |
5465 | int error, rc = 0; | 5282 | int error, rc = 0; |
5283 | int orphan = 0; | ||
5466 | const unsigned int ia_valid = attr->ia_valid; | 5284 | const unsigned int ia_valid = attr->ia_valid; |
5467 | 5285 | ||
5468 | error = inode_change_ok(inode, attr); | 5286 | error = inode_change_ok(inode, attr); |
@@ -5518,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5518 | error = PTR_ERR(handle); | 5336 | error = PTR_ERR(handle); |
5519 | goto err_out; | 5337 | goto err_out; |
5520 | } | 5338 | } |
5521 | 5339 | if (ext4_handle_valid(handle)) { | |
5522 | error = ext4_orphan_add(handle, inode); | 5340 | error = ext4_orphan_add(handle, inode); |
5341 | orphan = 1; | ||
5342 | } | ||
5523 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5343 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5524 | rc = ext4_mark_inode_dirty(handle, inode); | 5344 | rc = ext4_mark_inode_dirty(handle, inode); |
5525 | if (!error) | 5345 | if (!error) |
@@ -5537,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5537 | goto err_out; | 5357 | goto err_out; |
5538 | } | 5358 | } |
5539 | ext4_orphan_del(handle, inode); | 5359 | ext4_orphan_del(handle, inode); |
5360 | orphan = 0; | ||
5540 | ext4_journal_stop(handle); | 5361 | ext4_journal_stop(handle); |
5541 | goto err_out; | 5362 | goto err_out; |
5542 | } | 5363 | } |
@@ -5559,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5559 | * If the call to ext4_truncate failed to get a transaction handle at | 5380 | * If the call to ext4_truncate failed to get a transaction handle at |
5560 | * all, we need to clean up the in-core orphan list manually. | 5381 | * all, we need to clean up the in-core orphan list manually. |
5561 | */ | 5382 | */ |
5562 | if (inode->i_nlink) | 5383 | if (orphan && inode->i_nlink) |
5563 | ext4_orphan_del(NULL, inode); | 5384 | ext4_orphan_del(NULL, inode); |
5564 | 5385 | ||
5565 | if (!rc && (ia_valid & ATTR_MODE)) | 5386 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5642,7 +5463,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5642 | * | 5463 | * |
5643 | * Also account for superblock, inode, quota and xattr blocks | 5464 | * Also account for superblock, inode, quota and xattr blocks |
5644 | */ | 5465 | */ |
5645 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5466 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5646 | { | 5467 | { |
5647 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5468 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5648 | int gdpblocks; | 5469 | int gdpblocks; |