diff options
Diffstat (limited to 'fs/ext4/inode.c')
| -rw-r--r-- | fs/ext4/inode.c | 587 |
1 files changed, 204 insertions, 383 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49635ef236f8..2d6c6c8c036d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
| 63 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
| 64 | struct buffer_head *bh_result, int create); | ||
| 65 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
| 66 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
| 67 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
| 68 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
| 63 | 69 | ||
| 64 | /* | 70 | /* |
| 65 | * Test whether an inode is a fast symlink. | 71 | * Test whether an inode is a fast symlink. |
| @@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 755 | * parent to disk. | 761 | * parent to disk. |
| 756 | */ | 762 | */ |
| 757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 763 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
| 764 | if (unlikely(!bh)) { | ||
| 765 | err = -EIO; | ||
| 766 | goto failed; | ||
| 767 | } | ||
| 768 | |||
| 758 | branch[n].bh = bh; | 769 | branch[n].bh = bh; |
| 759 | lock_buffer(bh); | 770 | lock_buffer(bh); |
| 760 | BUFFER_TRACE(bh, "call get_create_access"); | 771 | BUFFER_TRACE(bh, "call get_create_access"); |
| @@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
| 1207 | break; | 1218 | break; |
| 1208 | idx++; | 1219 | idx++; |
| 1209 | num++; | 1220 | num++; |
| 1210 | if (num >= max_pages) | 1221 | if (num >= max_pages) { |
| 1222 | done = 1; | ||
| 1211 | break; | 1223 | break; |
| 1224 | } | ||
| 1212 | } | 1225 | } |
| 1213 | pagevec_release(&pvec); | 1226 | pagevec_release(&pvec); |
| 1214 | } | 1227 | } |
| @@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
| 1995 | * | 2008 | * |
| 1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2009 | * As pages are already locked by write_cache_pages(), we can't use it |
| 1997 | */ | 2010 | */ |
| 1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2011 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
| 2012 | struct ext4_map_blocks *map) | ||
| 1999 | { | 2013 | { |
| 2000 | long pages_skipped; | ||
| 2001 | struct pagevec pvec; | 2014 | struct pagevec pvec; |
| 2002 | unsigned long index, end; | 2015 | unsigned long index, end; |
| 2003 | int ret = 0, err, nr_pages, i; | 2016 | int ret = 0, err, nr_pages, i; |
| 2004 | struct inode *inode = mpd->inode; | 2017 | struct inode *inode = mpd->inode; |
| 2005 | struct address_space *mapping = inode->i_mapping; | 2018 | struct address_space *mapping = inode->i_mapping; |
| 2019 | loff_t size = i_size_read(inode); | ||
| 2020 | unsigned int len, block_start; | ||
| 2021 | struct buffer_head *bh, *page_bufs = NULL; | ||
| 2022 | int journal_data = ext4_should_journal_data(inode); | ||
| 2023 | sector_t pblock = 0, cur_logical = 0; | ||
| 2024 | struct ext4_io_submit io_submit; | ||
| 2006 | 2025 | ||
| 2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2026 | BUG_ON(mpd->next_page <= mpd->first_page); |
| 2027 | memset(&io_submit, 0, sizeof(io_submit)); | ||
| 2008 | /* | 2028 | /* |
| 2009 | * We need to start from the first_page to the next_page - 1 | 2029 | * We need to start from the first_page to the next_page - 1 |
| 2010 | * to make sure we also write the mapped dirty buffer_heads. | 2030 | * to make sure we also write the mapped dirty buffer_heads. |
| @@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
| 2020 | if (nr_pages == 0) | 2040 | if (nr_pages == 0) |
| 2021 | break; | 2041 | break; |
| 2022 | for (i = 0; i < nr_pages; i++) { | 2042 | for (i = 0; i < nr_pages; i++) { |
| 2043 | int commit_write = 0, redirty_page = 0; | ||
| 2023 | struct page *page = pvec.pages[i]; | 2044 | struct page *page = pvec.pages[i]; |
| 2024 | 2045 | ||
| 2025 | index = page->index; | 2046 | index = page->index; |
| 2026 | if (index > end) | 2047 | if (index > end) |
| 2027 | break; | 2048 | break; |
| 2049 | |||
| 2050 | if (index == size >> PAGE_CACHE_SHIFT) | ||
| 2051 | len = size & ~PAGE_CACHE_MASK; | ||
| 2052 | else | ||
| 2053 | len = PAGE_CACHE_SIZE; | ||
| 2054 | if (map) { | ||
| 2055 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
| 2056 | inode->i_blkbits); | ||
| 2057 | pblock = map->m_pblk + (cur_logical - | ||
| 2058 | map->m_lblk); | ||
| 2059 | } | ||
| 2028 | index++; | 2060 | index++; |
| 2029 | 2061 | ||
| 2030 | BUG_ON(!PageLocked(page)); | 2062 | BUG_ON(!PageLocked(page)); |
| 2031 | BUG_ON(PageWriteback(page)); | 2063 | BUG_ON(PageWriteback(page)); |
| 2032 | 2064 | ||
| 2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
| 2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
| 2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
| 2036 | /* | ||
| 2037 | * have successfully written the page | ||
| 2038 | * without skipping the same | ||
| 2039 | */ | ||
| 2040 | mpd->pages_written++; | ||
| 2041 | /* | 2065 | /* |
| 2042 | * In error case, we have to continue because | 2066 | * If the page does not have buffers (for |
| 2043 | * remaining pages are still locked | 2067 | * whatever reason), try to create them using |
| 2044 | * XXX: unlock and re-dirty them? | 2068 | * __block_write_begin. If this fails, |
| 2069 | * redirty the page and move on. | ||
| 2045 | */ | 2070 | */ |
| 2046 | if (ret == 0) | 2071 | if (!page_has_buffers(page)) { |
| 2047 | ret = err; | 2072 | if (__block_write_begin(page, 0, len, |
| 2048 | } | 2073 | noalloc_get_block_write)) { |
| 2049 | pagevec_release(&pvec); | 2074 | redirty_page: |
| 2050 | } | 2075 | redirty_page_for_writepage(mpd->wbc, |
| 2051 | return ret; | 2076 | page); |
| 2052 | } | 2077 | unlock_page(page); |
| 2053 | 2078 | continue; | |
| 2054 | /* | 2079 | } |
| 2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2080 | commit_write = 1; |
| 2056 | * | 2081 | } |
| 2057 | * the function goes through all passed space and put actual disk | ||
| 2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
| 2059 | */ | ||
| 2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
| 2061 | struct ext4_map_blocks *map) | ||
| 2062 | { | ||
| 2063 | struct inode *inode = mpd->inode; | ||
| 2064 | struct address_space *mapping = inode->i_mapping; | ||
| 2065 | int blocks = map->m_len; | ||
| 2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
| 2067 | struct buffer_head *head, *bh; | ||
| 2068 | pgoff_t index, end; | ||
| 2069 | struct pagevec pvec; | ||
| 2070 | int nr_pages, i; | ||
| 2071 | |||
| 2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 2075 | |||
| 2076 | pagevec_init(&pvec, 0); | ||
| 2077 | |||
| 2078 | while (index <= end) { | ||
| 2079 | /* XXX: optimize tail */ | ||
| 2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 2081 | if (nr_pages == 0) | ||
| 2082 | break; | ||
| 2083 | for (i = 0; i < nr_pages; i++) { | ||
| 2084 | struct page *page = pvec.pages[i]; | ||
| 2085 | |||
| 2086 | index = page->index; | ||
| 2087 | if (index > end) | ||
| 2088 | break; | ||
| 2089 | index++; | ||
| 2090 | |||
| 2091 | BUG_ON(!PageLocked(page)); | ||
| 2092 | BUG_ON(PageWriteback(page)); | ||
| 2093 | BUG_ON(!page_has_buffers(page)); | ||
| 2094 | |||
| 2095 | bh = page_buffers(page); | ||
| 2096 | head = bh; | ||
| 2097 | |||
| 2098 | /* skip blocks out of the range */ | ||
| 2099 | do { | ||
| 2100 | if (cur_logical >= map->m_lblk) | ||
| 2101 | break; | ||
| 2102 | cur_logical++; | ||
| 2103 | } while ((bh = bh->b_this_page) != head); | ||
| 2104 | 2082 | ||
| 2083 | bh = page_bufs = page_buffers(page); | ||
| 2084 | block_start = 0; | ||
| 2105 | do { | 2085 | do { |
| 2106 | if (cur_logical >= map->m_lblk + blocks) | 2086 | if (!bh) |
| 2107 | break; | 2087 | goto redirty_page; |
| 2108 | 2088 | if (map && (cur_logical >= map->m_lblk) && | |
| 2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2089 | (cur_logical <= (map->m_lblk + |
| 2110 | 2090 | (map->m_len - 1)))) { | |
| 2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
| 2112 | |||
| 2113 | if (buffer_delay(bh)) { | 2091 | if (buffer_delay(bh)) { |
| 2114 | clear_buffer_delay(bh); | 2092 | clear_buffer_delay(bh); |
| 2115 | bh->b_blocknr = pblock; | 2093 | bh->b_blocknr = pblock; |
| 2116 | } else { | ||
| 2117 | /* | ||
| 2118 | * unwritten already should have | ||
| 2119 | * blocknr assigned. Verify that | ||
| 2120 | */ | ||
| 2121 | clear_buffer_unwritten(bh); | ||
| 2122 | BUG_ON(bh->b_blocknr != pblock); | ||
| 2123 | } | 2094 | } |
| 2095 | if (buffer_unwritten(bh) || | ||
| 2096 | buffer_mapped(bh)) | ||
| 2097 | BUG_ON(bh->b_blocknr != pblock); | ||
| 2098 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
| 2099 | set_buffer_uninit(bh); | ||
| 2100 | clear_buffer_unwritten(bh); | ||
| 2101 | } | ||
| 2124 | 2102 | ||
| 2125 | } else if (buffer_mapped(bh)) | 2103 | /* redirty page if block allocation undone */ |
| 2126 | BUG_ON(bh->b_blocknr != pblock); | 2104 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
| 2127 | 2105 | redirty_page = 1; | |
| 2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2106 | bh = bh->b_this_page; |
| 2129 | set_buffer_uninit(bh); | 2107 | block_start += bh->b_size; |
| 2130 | cur_logical++; | 2108 | cur_logical++; |
| 2131 | pblock++; | 2109 | pblock++; |
| 2132 | } while ((bh = bh->b_this_page) != head); | 2110 | } while (bh != page_bufs); |
| 2111 | |||
| 2112 | if (redirty_page) | ||
| 2113 | goto redirty_page; | ||
| 2114 | |||
| 2115 | if (commit_write) | ||
| 2116 | /* mark the buffer_heads as dirty & uptodate */ | ||
| 2117 | block_commit_write(page, 0, len); | ||
| 2118 | |||
| 2119 | /* | ||
| 2120 | * Delalloc doesn't support data journalling, | ||
| 2121 | * but eventually maybe we'll lift this | ||
| 2122 | * restriction. | ||
| 2123 | */ | ||
| 2124 | if (unlikely(journal_data && PageChecked(page))) | ||
| 2125 | err = __ext4_journalled_writepage(page, len); | ||
| 2126 | else | ||
| 2127 | err = ext4_bio_write_page(&io_submit, page, | ||
| 2128 | len, mpd->wbc); | ||
| 2129 | |||
| 2130 | if (!err) | ||
| 2131 | mpd->pages_written++; | ||
| 2132 | /* | ||
| 2133 | * In error case, we have to continue because | ||
| 2134 | * remaining pages are still locked | ||
| 2135 | */ | ||
| 2136 | if (ret == 0) | ||
| 2137 | ret = err; | ||
| 2133 | } | 2138 | } |
| 2134 | pagevec_release(&pvec); | 2139 | pagevec_release(&pvec); |
| 2135 | } | 2140 | } |
| 2141 | ext4_io_submit(&io_submit); | ||
| 2142 | return ret; | ||
| 2136 | } | 2143 | } |
| 2137 | 2144 | ||
| 2138 | |||
| 2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2145 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
| 2140 | sector_t logical, long blk_cnt) | 2146 | sector_t logical, long blk_cnt) |
| 2141 | { | 2147 | { |
| @@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
| 2187 | } | 2193 | } |
| 2188 | 2194 | ||
| 2189 | /* | 2195 | /* |
| 2190 | * mpage_da_map_blocks - go through given space | 2196 | * mpage_da_map_and_submit - go through given space, map them |
| 2197 | * if necessary, and then submit them for I/O | ||
| 2191 | * | 2198 | * |
| 2192 | * @mpd - bh describing space | 2199 | * @mpd - bh describing space |
| 2193 | * | 2200 | * |
| 2194 | * The function skips space we know is already mapped to disk blocks. | 2201 | * The function skips space we know is already mapped to disk blocks. |
| 2195 | * | 2202 | * |
| 2196 | */ | 2203 | */ |
| 2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2204 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
| 2198 | { | 2205 | { |
| 2199 | int err, blks, get_blocks_flags; | 2206 | int err, blks, get_blocks_flags; |
| 2200 | struct ext4_map_blocks map; | 2207 | struct ext4_map_blocks map, *mapp = NULL; |
| 2201 | sector_t next = mpd->b_blocknr; | 2208 | sector_t next = mpd->b_blocknr; |
| 2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2209 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
| 2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2210 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
| 2204 | handle_t *handle = NULL; | 2211 | handle_t *handle = NULL; |
| 2205 | 2212 | ||
| 2206 | /* | 2213 | /* |
| 2207 | * We consider only non-mapped and non-allocated blocks | 2214 | * If the blocks are mapped already, or we couldn't accumulate |
| 2208 | */ | 2215 | * any blocks, then proceed immediately to the submission stage. |
| 2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | ||
| 2210 | !(mpd->b_state & (1 << BH_Delay)) && | ||
| 2211 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
| 2212 | return 0; | ||
| 2213 | |||
| 2214 | /* | ||
| 2215 | * If we didn't accumulate anything to write simply return | ||
| 2216 | */ | 2216 | */ |
| 2217 | if (!mpd->b_size) | 2217 | if ((mpd->b_size == 0) || |
| 2218 | return 0; | 2218 | ((mpd->b_state & (1 << BH_Mapped)) && |
| 2219 | !(mpd->b_state & (1 << BH_Delay)) && | ||
| 2220 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
| 2221 | goto submit_io; | ||
| 2219 | 2222 | ||
| 2220 | handle = ext4_journal_current_handle(); | 2223 | handle = ext4_journal_current_handle(); |
| 2221 | BUG_ON(!handle); | 2224 | BUG_ON(!handle); |
| @@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2252 | 2255 | ||
| 2253 | err = blks; | 2256 | err = blks; |
| 2254 | /* | 2257 | /* |
| 2255 | * If get block returns with error we simply | 2258 | * If get block returns EAGAIN or ENOSPC and there |
| 2256 | * return. Later writepage will redirty the page and | 2259 | * appears to be free blocks we will call |
| 2257 | * writepages will find the dirty page again | 2260 | * ext4_writepage() for all of the pages which will |
| 2261 | * just redirty the pages. | ||
| 2258 | */ | 2262 | */ |
| 2259 | if (err == -EAGAIN) | 2263 | if (err == -EAGAIN) |
| 2260 | return 0; | 2264 | goto submit_io; |
| 2261 | 2265 | ||
| 2262 | if (err == -ENOSPC && | 2266 | if (err == -ENOSPC && |
| 2263 | ext4_count_free_blocks(sb)) { | 2267 | ext4_count_free_blocks(sb)) { |
| 2264 | mpd->retval = err; | 2268 | mpd->retval = err; |
| 2265 | return 0; | 2269 | goto submit_io; |
| 2266 | } | 2270 | } |
| 2267 | 2271 | ||
| 2268 | /* | 2272 | /* |
| @@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2287 | /* invalidate all the pages */ | 2291 | /* invalidate all the pages */ |
| 2288 | ext4_da_block_invalidatepages(mpd, next, | 2292 | ext4_da_block_invalidatepages(mpd, next, |
| 2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2293 | mpd->b_size >> mpd->inode->i_blkbits); |
| 2290 | return err; | 2294 | return; |
| 2291 | } | 2295 | } |
| 2292 | BUG_ON(blks == 0); | 2296 | BUG_ON(blks == 0); |
| 2293 | 2297 | ||
| 2298 | mapp = ↦ | ||
| 2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2299 | if (map.m_flags & EXT4_MAP_NEW) { |
| 2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2300 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
| 2296 | int i; | 2301 | int i; |
| @@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2304 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
| 2300 | } | 2305 | } |
| 2301 | 2306 | ||
| 2302 | /* | ||
| 2303 | * If blocks are delayed marked, we need to | ||
| 2304 | * put actual blocknr and drop delayed bit | ||
| 2305 | */ | ||
| 2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
| 2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
| 2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
| 2309 | |||
| 2310 | if (ext4_should_order_data(mpd->inode)) { | 2307 | if (ext4_should_order_data(mpd->inode)) { |
| 2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2308 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
| 2312 | if (err) | 2309 | if (err) |
| 2313 | return err; | 2310 | /* This only happens if the journal is aborted */ |
| 2311 | return; | ||
| 2314 | } | 2312 | } |
| 2315 | 2313 | ||
| 2316 | /* | 2314 | /* |
| @@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2321 | disksize = i_size_read(mpd->inode); | 2319 | disksize = i_size_read(mpd->inode); |
| 2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2320 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
| 2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2321 | ext4_update_i_disksize(mpd->inode, disksize); |
| 2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2322 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
| 2323 | if (err) | ||
| 2324 | ext4_error(mpd->inode->i_sb, | ||
| 2325 | "Failed to mark inode %lu dirty", | ||
| 2326 | mpd->inode->i_ino); | ||
| 2325 | } | 2327 | } |
| 2326 | 2328 | ||
| 2327 | return 0; | 2329 | submit_io: |
| 2330 | mpage_da_submit_io(mpd, mapp); | ||
| 2331 | mpd->io_done = 1; | ||
| 2328 | } | 2332 | } |
| 2329 | 2333 | ||
| 2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2334 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
| @@ -2401,9 +2405,7 @@ flush_it: | |||
| 2401 | * We couldn't merge the block to our extent, so we | 2405 | * We couldn't merge the block to our extent, so we |
| 2402 | * need to flush current extent and start new one | 2406 | * need to flush current extent and start new one |
| 2403 | */ | 2407 | */ |
| 2404 | if (mpage_da_map_blocks(mpd) == 0) | 2408 | mpage_da_map_and_submit(mpd); |
| 2405 | mpage_da_submit_io(mpd); | ||
| 2406 | mpd->io_done = 1; | ||
| 2407 | return; | 2409 | return; |
| 2408 | } | 2410 | } |
| 2409 | 2411 | ||
| @@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
| 2422 | * The function finds extents of pages and scan them for all blocks. | 2424 | * The function finds extents of pages and scan them for all blocks. |
| 2423 | */ | 2425 | */ |
| 2424 | static int __mpage_da_writepage(struct page *page, | 2426 | static int __mpage_da_writepage(struct page *page, |
| 2425 | struct writeback_control *wbc, void *data) | 2427 | struct writeback_control *wbc, |
| 2428 | struct mpage_da_data *mpd) | ||
| 2426 | { | 2429 | { |
| 2427 | struct mpage_da_data *mpd = data; | ||
| 2428 | struct inode *inode = mpd->inode; | 2430 | struct inode *inode = mpd->inode; |
| 2429 | struct buffer_head *bh, *head; | 2431 | struct buffer_head *bh, *head; |
| 2430 | sector_t logical; | 2432 | sector_t logical; |
| @@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page, | |||
| 2435 | if (mpd->next_page != page->index) { | 2437 | if (mpd->next_page != page->index) { |
| 2436 | /* | 2438 | /* |
| 2437 | * Nope, we can't. So, we map non-allocated blocks | 2439 | * Nope, we can't. So, we map non-allocated blocks |
| 2438 | * and start IO on them using writepage() | 2440 | * and start IO on them |
| 2439 | */ | 2441 | */ |
| 2440 | if (mpd->next_page != mpd->first_page) { | 2442 | if (mpd->next_page != mpd->first_page) { |
| 2441 | if (mpage_da_map_blocks(mpd) == 0) | 2443 | mpage_da_map_and_submit(mpd); |
| 2442 | mpage_da_submit_io(mpd); | ||
| 2443 | /* | 2444 | /* |
| 2444 | * skip rest of the page in the page_vec | 2445 | * skip rest of the page in the page_vec |
| 2445 | */ | 2446 | */ |
| 2446 | mpd->io_done = 1; | ||
| 2447 | redirty_page_for_writepage(wbc, page); | 2447 | redirty_page_for_writepage(wbc, page); |
| 2448 | unlock_page(page); | 2448 | unlock_page(page); |
| 2449 | return MPAGE_DA_EXTENT_TAIL; | 2449 | return MPAGE_DA_EXTENT_TAIL; |
| @@ -2622,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
| 2622 | int ret = 0; | 2622 | int ret = 0; |
| 2623 | int err; | 2623 | int err; |
| 2624 | 2624 | ||
| 2625 | ClearPageChecked(page); | ||
| 2625 | page_bufs = page_buffers(page); | 2626 | page_bufs = page_buffers(page); |
| 2626 | BUG_ON(!page_bufs); | 2627 | BUG_ON(!page_bufs); |
| 2627 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
| @@ -2699,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
| 2699 | static int ext4_writepage(struct page *page, | 2700 | static int ext4_writepage(struct page *page, |
| 2700 | struct writeback_control *wbc) | 2701 | struct writeback_control *wbc) |
| 2701 | { | 2702 | { |
| 2702 | int ret = 0; | 2703 | int ret = 0, commit_write = 0; |
| 2703 | loff_t size; | 2704 | loff_t size; |
| 2704 | unsigned int len; | 2705 | unsigned int len; |
| 2705 | struct buffer_head *page_bufs = NULL; | 2706 | struct buffer_head *page_bufs = NULL; |
| @@ -2712,71 +2713,46 @@ static int ext4_writepage(struct page *page, | |||
| 2712 | else | 2713 | else |
| 2713 | len = PAGE_CACHE_SIZE; | 2714 | len = PAGE_CACHE_SIZE; |
| 2714 | 2715 | ||
| 2715 | if (page_has_buffers(page)) { | 2716 | /* |
| 2716 | page_bufs = page_buffers(page); | 2717 | * If the page does not have buffers (for whatever reason), |
| 2717 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2718 | * try to create them using __block_write_begin. If this |
| 2718 | ext4_bh_delay_or_unwritten)) { | 2719 | * fails, redirty the page and move on. |
| 2719 | /* | 2720 | */ |
| 2720 | * We don't want to do block allocation | 2721 | if (!page_buffers(page)) { |
| 2721 | * So redirty the page and return | 2722 | if (__block_write_begin(page, 0, len, |
| 2722 | * We may reach here when we do a journal commit | 2723 | noalloc_get_block_write)) { |
| 2723 | * via journal_submit_inode_data_buffers. | 2724 | redirty_page: |
| 2724 | * If we don't have mapping block we just ignore | ||
| 2725 | * them. We can also reach here via shrink_page_list | ||
| 2726 | */ | ||
| 2727 | redirty_page_for_writepage(wbc, page); | 2725 | redirty_page_for_writepage(wbc, page); |
| 2728 | unlock_page(page); | 2726 | unlock_page(page); |
| 2729 | return 0; | 2727 | return 0; |
| 2730 | } | 2728 | } |
| 2731 | } else { | 2729 | commit_write = 1; |
| 2730 | } | ||
| 2731 | page_bufs = page_buffers(page); | ||
| 2732 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2733 | ext4_bh_delay_or_unwritten)) { | ||
| 2732 | /* | 2734 | /* |
| 2733 | * The test for page_has_buffers() is subtle: | 2735 | * We don't want to do block allocation So redirty the |
| 2734 | * We know the page is dirty but it lost buffers. That means | 2736 | * page and return We may reach here when we do a |
| 2735 | * that at some moment in time after write_begin()/write_end() | 2737 | * journal commit via |
| 2736 | * has been called all buffers have been clean and thus they | 2738 | * journal_submit_inode_data_buffers. If we don't |
| 2737 | * must have been written at least once. So they are all | 2739 | * have mapping block we just ignore them. We can also |
| 2738 | * mapped and we can happily proceed with mapping them | 2740 | * reach here via shrink_page_list |
| 2739 | * and writing the page. | ||
| 2740 | * | ||
| 2741 | * Try to initialize the buffer_heads and check whether | ||
| 2742 | * all are mapped and non delay. We don't want to | ||
| 2743 | * do block allocation here. | ||
| 2744 | */ | 2741 | */ |
| 2745 | ret = __block_write_begin(page, 0, len, | 2742 | goto redirty_page; |
| 2746 | noalloc_get_block_write); | 2743 | } |
| 2747 | if (!ret) { | 2744 | if (commit_write) |
| 2748 | page_bufs = page_buffers(page); | ||
| 2749 | /* check whether all are mapped and non delay */ | ||
| 2750 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2751 | ext4_bh_delay_or_unwritten)) { | ||
| 2752 | redirty_page_for_writepage(wbc, page); | ||
| 2753 | unlock_page(page); | ||
| 2754 | return 0; | ||
| 2755 | } | ||
| 2756 | } else { | ||
| 2757 | /* | ||
| 2758 | * We can't do block allocation here | ||
| 2759 | * so just redity the page and unlock | ||
| 2760 | * and return | ||
| 2761 | */ | ||
| 2762 | redirty_page_for_writepage(wbc, page); | ||
| 2763 | unlock_page(page); | ||
| 2764 | return 0; | ||
| 2765 | } | ||
| 2766 | /* now mark the buffer_heads as dirty and uptodate */ | 2745 | /* now mark the buffer_heads as dirty and uptodate */ |
| 2767 | block_commit_write(page, 0, len); | 2746 | block_commit_write(page, 0, len); |
| 2768 | } | ||
| 2769 | 2747 | ||
| 2770 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2748 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
| 2771 | /* | 2749 | /* |
| 2772 | * It's mmapped pagecache. Add buffers and journal it. There | 2750 | * It's mmapped pagecache. Add buffers and journal it. There |
| 2773 | * doesn't seem much point in redirtying the page here. | 2751 | * doesn't seem much point in redirtying the page here. |
| 2774 | */ | 2752 | */ |
| 2775 | ClearPageChecked(page); | ||
| 2776 | return __ext4_journalled_writepage(page, len); | 2753 | return __ext4_journalled_writepage(page, len); |
| 2777 | } | ||
| 2778 | 2754 | ||
| 2779 | if (page_bufs && buffer_uninit(page_bufs)) { | 2755 | if (buffer_uninit(page_bufs)) { |
| 2780 | ext4_set_bh_endio(page_bufs, inode); | 2756 | ext4_set_bh_endio(page_bufs, inode); |
| 2781 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2757 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
| 2782 | wbc, ext4_end_io_buffer_write); | 2758 | wbc, ext4_end_io_buffer_write); |
| @@ -2823,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
| 2823 | */ | 2799 | */ |
| 2824 | static int write_cache_pages_da(struct address_space *mapping, | 2800 | static int write_cache_pages_da(struct address_space *mapping, |
| 2825 | struct writeback_control *wbc, | 2801 | struct writeback_control *wbc, |
| 2826 | struct mpage_da_data *mpd) | 2802 | struct mpage_da_data *mpd, |
| 2803 | pgoff_t *done_index) | ||
| 2827 | { | 2804 | { |
| 2828 | int ret = 0; | 2805 | int ret = 0; |
| 2829 | int done = 0; | 2806 | int done = 0; |
| 2830 | struct pagevec pvec; | 2807 | struct pagevec pvec; |
| 2831 | int nr_pages; | 2808 | unsigned nr_pages; |
| 2832 | pgoff_t index; | 2809 | pgoff_t index; |
| 2833 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
| 2834 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
| 2812 | int tag; | ||
| 2835 | 2813 | ||
| 2836 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
| 2837 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
| 2838 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
| 2839 | 2817 | ||
| 2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
| 2820 | else | ||
| 2821 | tag = PAGECACHE_TAG_DIRTY; | ||
| 2822 | |||
| 2823 | *done_index = index; | ||
| 2840 | while (!done && (index <= end)) { | 2824 | while (!done && (index <= end)) { |
| 2841 | int i; | 2825 | int i; |
| 2842 | 2826 | ||
| 2843 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2827 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
| 2844 | PAGECACHE_TAG_DIRTY, | ||
| 2845 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2828 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
| 2846 | if (nr_pages == 0) | 2829 | if (nr_pages == 0) |
| 2847 | break; | 2830 | break; |
| @@ -2861,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
| 2861 | break; | 2844 | break; |
| 2862 | } | 2845 | } |
| 2863 | 2846 | ||
| 2847 | *done_index = page->index + 1; | ||
| 2848 | |||
| 2864 | lock_page(page); | 2849 | lock_page(page); |
| 2865 | 2850 | ||
| 2866 | /* | 2851 | /* |
| @@ -2946,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2946 | long desired_nr_to_write, nr_to_writebump = 0; | 2931 | long desired_nr_to_write, nr_to_writebump = 0; |
| 2947 | loff_t range_start = wbc->range_start; | 2932 | loff_t range_start = wbc->range_start; |
| 2948 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2933 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
| 2934 | pgoff_t done_index = 0; | ||
| 2935 | pgoff_t end; | ||
| 2949 | 2936 | ||
| 2950 | trace_ext4_da_writepages(inode, wbc); | 2937 | trace_ext4_da_writepages(inode, wbc); |
| 2951 | 2938 | ||
| @@ -2981,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2981 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2968 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
| 2982 | wbc->range_end = LLONG_MAX; | 2969 | wbc->range_end = LLONG_MAX; |
| 2983 | wbc->range_cyclic = 0; | 2970 | wbc->range_cyclic = 0; |
| 2984 | } else | 2971 | end = -1; |
| 2972 | } else { | ||
| 2985 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2973 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
| 2974 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
| 2975 | } | ||
| 2986 | 2976 | ||
| 2987 | /* | 2977 | /* |
| 2988 | * This works around two forms of stupidity. The first is in | 2978 | * This works around two forms of stupidity. The first is in |
| @@ -3001,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 3001 | * sbi->max_writeback_mb_bump whichever is smaller. | 2991 | * sbi->max_writeback_mb_bump whichever is smaller. |
| 3002 | */ | 2992 | */ |
| 3003 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2993 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
| 3004 | if (!range_cyclic && range_whole) | 2994 | if (!range_cyclic && range_whole) { |
| 3005 | desired_nr_to_write = wbc->nr_to_write * 8; | 2995 | if (wbc->nr_to_write == LONG_MAX) |
| 3006 | else | 2996 | desired_nr_to_write = wbc->nr_to_write; |
| 2997 | else | ||
| 2998 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
| 2999 | } else | ||
| 3007 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 3000 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
| 3008 | max_pages); | 3001 | max_pages); |
| 3009 | if (desired_nr_to_write > max_pages) | 3002 | if (desired_nr_to_write > max_pages) |
| @@ -3020,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 3020 | pages_skipped = wbc->pages_skipped; | 3013 | pages_skipped = wbc->pages_skipped; |
| 3021 | 3014 | ||
| 3022 | retry: | 3015 | retry: |
| 3016 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 3017 | tag_pages_for_writeback(mapping, index, end); | ||
| 3018 | |||
| 3023 | while (!ret && wbc->nr_to_write > 0) { | 3019 | while (!ret && wbc->nr_to_write > 0) { |
| 3024 | 3020 | ||
| 3025 | /* | 3021 | /* |
| @@ -3058,16 +3054,14 @@ retry: | |||
| 3058 | mpd.io_done = 0; | 3054 | mpd.io_done = 0; |
| 3059 | mpd.pages_written = 0; | 3055 | mpd.pages_written = 0; |
| 3060 | mpd.retval = 0; | 3056 | mpd.retval = 0; |
| 3061 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 3057 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
| 3062 | /* | 3058 | /* |
| 3063 | * If we have a contiguous extent of pages and we | 3059 | * If we have a contiguous extent of pages and we |
| 3064 | * haven't done the I/O yet, map the blocks and submit | 3060 | * haven't done the I/O yet, map the blocks and submit |
| 3065 | * them for I/O. | 3061 | * them for I/O. |
| 3066 | */ | 3062 | */ |
| 3067 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3063 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
| 3068 | if (mpage_da_map_blocks(&mpd) == 0) | 3064 | mpage_da_map_and_submit(&mpd); |
| 3069 | mpage_da_submit_io(&mpd); | ||
| 3070 | mpd.io_done = 1; | ||
| 3071 | ret = MPAGE_DA_EXTENT_TAIL; | 3065 | ret = MPAGE_DA_EXTENT_TAIL; |
| 3072 | } | 3066 | } |
| 3073 | trace_ext4_da_write_pages(inode, &mpd); | 3067 | trace_ext4_da_write_pages(inode, &mpd); |
| @@ -3114,14 +3108,13 @@ retry: | |||
| 3114 | __func__, wbc->nr_to_write, ret); | 3108 | __func__, wbc->nr_to_write, ret); |
| 3115 | 3109 | ||
| 3116 | /* Update index */ | 3110 | /* Update index */ |
| 3117 | index += pages_written; | ||
| 3118 | wbc->range_cyclic = range_cyclic; | 3111 | wbc->range_cyclic = range_cyclic; |
| 3119 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3112 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
| 3120 | /* | 3113 | /* |
| 3121 | * set the writeback_index so that range_cyclic | 3114 | * set the writeback_index so that range_cyclic |
| 3122 | * mode will write it back later | 3115 | * mode will write it back later |
| 3123 | */ | 3116 | */ |
| 3124 | mapping->writeback_index = index; | 3117 | mapping->writeback_index = done_index; |
| 3125 | 3118 | ||
| 3126 | out_writepages: | 3119 | out_writepages: |
| 3127 | wbc->nr_to_write -= nr_to_writebump; | 3120 | wbc->nr_to_write -= nr_to_writebump; |
| @@ -3456,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
| 3456 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3449 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
| 3457 | } | 3450 | } |
| 3458 | 3451 | ||
| 3459 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
| 3460 | { | ||
| 3461 | BUG_ON(!io); | ||
| 3462 | if (io->page) | ||
| 3463 | put_page(io->page); | ||
| 3464 | iput(io->inode); | ||
| 3465 | kfree(io); | ||
| 3466 | } | ||
| 3467 | |||
| 3468 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3452 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
| 3469 | { | 3453 | { |
| 3470 | struct buffer_head *head, *bh; | 3454 | struct buffer_head *head, *bh; |
| @@ -3641,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
| 3641 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3625 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
| 3642 | } | 3626 | } |
| 3643 | 3627 | ||
| 3644 | static void dump_completed_IO(struct inode * inode) | ||
| 3645 | { | ||
| 3646 | #ifdef EXT4_DEBUG | ||
| 3647 | struct list_head *cur, *before, *after; | ||
| 3648 | ext4_io_end_t *io, *io0, *io1; | ||
| 3649 | unsigned long flags; | ||
| 3650 | |||
| 3651 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
| 3652 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
| 3653 | return; | ||
| 3654 | } | ||
| 3655 | |||
| 3656 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
| 3657 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
| 3658 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
| 3659 | cur = &io->list; | ||
| 3660 | before = cur->prev; | ||
| 3661 | io0 = container_of(before, ext4_io_end_t, list); | ||
| 3662 | after = cur->next; | ||
| 3663 | io1 = container_of(after, ext4_io_end_t, list); | ||
| 3664 | |||
| 3665 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
| 3666 | io, inode->i_ino, io0, io1); | ||
| 3667 | } | ||
| 3668 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
| 3669 | #endif | ||
| 3670 | } | ||
| 3671 | |||
| 3672 | /* | ||
| 3673 | * check a range of space and convert unwritten extents to written. | ||
| 3674 | */ | ||
| 3675 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
| 3676 | { | ||
| 3677 | struct inode *inode = io->inode; | ||
| 3678 | loff_t offset = io->offset; | ||
| 3679 | ssize_t size = io->size; | ||
| 3680 | int ret = 0; | ||
| 3681 | |||
| 3682 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
| 3683 | "list->prev 0x%p\n", | ||
| 3684 | io, inode->i_ino, io->list.next, io->list.prev); | ||
| 3685 | |||
| 3686 | if (list_empty(&io->list)) | ||
| 3687 | return ret; | ||
| 3688 | |||
| 3689 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
| 3690 | return ret; | ||
| 3691 | |||
| 3692 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
| 3693 | if (ret < 0) { | ||
| 3694 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
| 3695 | "extents to written extents, error is %d" | ||
| 3696 | " io is still on inode %lu aio dio list\n", | ||
| 3697 | __func__, ret, inode->i_ino); | ||
| 3698 | return ret; | ||
| 3699 | } | ||
| 3700 | |||
| 3701 | if (io->iocb) | ||
| 3702 | aio_complete(io->iocb, io->result, 0); | ||
| 3703 | /* clear the DIO AIO unwritten flag */ | ||
| 3704 | io->flag = 0; | ||
| 3705 | return ret; | ||
| 3706 | } | ||
| 3707 | |||
| 3708 | /* | ||
| 3709 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
| 3710 | */ | ||
| 3711 | static void ext4_end_io_work(struct work_struct *work) | ||
| 3712 | { | ||
| 3713 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
| 3714 | struct inode *inode = io->inode; | ||
| 3715 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 3716 | unsigned long flags; | ||
| 3717 | int ret; | ||
| 3718 | |||
| 3719 | mutex_lock(&inode->i_mutex); | ||
| 3720 | ret = ext4_end_io_nolock(io); | ||
| 3721 | if (ret < 0) { | ||
| 3722 | mutex_unlock(&inode->i_mutex); | ||
| 3723 | return; | ||
| 3724 | } | ||
| 3725 | |||
| 3726 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 3727 | if (!list_empty(&io->list)) | ||
| 3728 | list_del_init(&io->list); | ||
| 3729 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 3730 | mutex_unlock(&inode->i_mutex); | ||
| 3731 | ext4_free_io_end(io); | ||
| 3732 | } | ||
| 3733 | |||
| 3734 | /* | ||
| 3735 | * This function is called from ext4_sync_file(). | ||
| 3736 | * | ||
| 3737 | * When IO is completed, the work to convert unwritten extents to | ||
| 3738 | * written is queued on workqueue but may not get immediately | ||
| 3739 | * scheduled. When fsync is called, we need to ensure the | ||
| 3740 | * conversion is complete before fsync returns. | ||
| 3741 | * The inode keeps track of a list of pending/completed IO that | ||
| 3742 | * might needs to do the conversion. This function walks through | ||
| 3743 | * the list and convert the related unwritten extents for completed IO | ||
| 3744 | * to written. | ||
| 3745 | * The function return the number of pending IOs on success. | ||
| 3746 | */ | ||
| 3747 | int flush_completed_IO(struct inode *inode) | ||
| 3748 | { | ||
| 3749 | ext4_io_end_t *io; | ||
| 3750 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 3751 | unsigned long flags; | ||
| 3752 | int ret = 0; | ||
| 3753 | int ret2 = 0; | ||
| 3754 | |||
| 3755 | if (list_empty(&ei->i_completed_io_list)) | ||
| 3756 | return ret; | ||
| 3757 | |||
| 3758 | dump_completed_IO(inode); | ||
| 3759 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 3760 | while (!list_empty(&ei->i_completed_io_list)){ | ||
| 3761 | io = list_entry(ei->i_completed_io_list.next, | ||
| 3762 | ext4_io_end_t, list); | ||
| 3763 | /* | ||
| 3764 | * Calling ext4_end_io_nolock() to convert completed | ||
| 3765 | * IO to written. | ||
| 3766 | * | ||
| 3767 | * When ext4_sync_file() is called, run_queue() may already | ||
| 3768 | * about to flush the work corresponding to this io structure. | ||
| 3769 | * It will be upset if it founds the io structure related | ||
| 3770 | * to the work-to-be schedule is freed. | ||
| 3771 | * | ||
| 3772 | * Thus we need to keep the io structure still valid here after | ||
| 3773 | * convertion finished. The io structure has a flag to | ||
| 3774 | * avoid double converting from both fsync and background work | ||
| 3775 | * queue work. | ||
| 3776 | */ | ||
| 3777 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 3778 | ret = ext4_end_io_nolock(io); | ||
| 3779 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 3780 | if (ret < 0) | ||
| 3781 | ret2 = ret; | ||
| 3782 | else | ||
| 3783 | list_del_init(&io->list); | ||
| 3784 | } | ||
| 3785 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 3786 | return (ret2 < 0) ? ret2 : 0; | ||
| 3787 | } | ||
| 3788 | |||
| 3789 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
| 3790 | { | ||
| 3791 | ext4_io_end_t *io = NULL; | ||
| 3792 | |||
| 3793 | io = kmalloc(sizeof(*io), flags); | ||
| 3794 | |||
| 3795 | if (io) { | ||
| 3796 | igrab(inode); | ||
| 3797 | io->inode = inode; | ||
| 3798 | io->flag = 0; | ||
| 3799 | io->offset = 0; | ||
| 3800 | io->size = 0; | ||
| 3801 | io->page = NULL; | ||
| 3802 | io->iocb = NULL; | ||
| 3803 | io->result = 0; | ||
| 3804 | INIT_WORK(&io->work, ext4_end_io_work); | ||
| 3805 | INIT_LIST_HEAD(&io->list); | ||
| 3806 | } | ||
| 3807 | |||
| 3808 | return io; | ||
| 3809 | } | ||
| 3810 | |||
| 3811 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3628 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
| 3812 | ssize_t size, void *private, int ret, | 3629 | ssize_t size, void *private, int ret, |
| 3813 | bool is_async) | 3630 | bool is_async) |
| @@ -3827,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3827 | size); | 3644 | size); |
| 3828 | 3645 | ||
| 3829 | /* if not aio dio with unwritten extents, just free io and return */ | 3646 | /* if not aio dio with unwritten extents, just free io and return */ |
| 3830 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3647 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
| 3831 | ext4_free_io_end(io_end); | 3648 | ext4_free_io_end(io_end); |
| 3832 | iocb->private = NULL; | 3649 | iocb->private = NULL; |
| 3833 | out: | 3650 | out: |
| @@ -3844,14 +3661,14 @@ out: | |||
| 3844 | } | 3661 | } |
| 3845 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3662 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
| 3846 | 3663 | ||
| 3847 | /* queue the work to convert unwritten extents to written */ | ||
| 3848 | queue_work(wq, &io_end->work); | ||
| 3849 | |||
| 3850 | /* Add the io_end to per-inode completed aio dio list*/ | 3664 | /* Add the io_end to per-inode completed aio dio list*/ |
| 3851 | ei = EXT4_I(io_end->inode); | 3665 | ei = EXT4_I(io_end->inode); |
| 3852 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3666 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
| 3853 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3667 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
| 3854 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3668 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
| 3669 | |||
| 3670 | /* queue the work to convert unwritten extents to written */ | ||
| 3671 | queue_work(wq, &io_end->work); | ||
| 3855 | iocb->private = NULL; | 3672 | iocb->private = NULL; |
| 3856 | } | 3673 | } |
| 3857 | 3674 | ||
| @@ -3872,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
| 3872 | goto out; | 3689 | goto out; |
| 3873 | } | 3690 | } |
| 3874 | 3691 | ||
| 3875 | io_end->flag = EXT4_IO_UNWRITTEN; | 3692 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
| 3876 | inode = io_end->inode; | 3693 | inode = io_end->inode; |
| 3877 | 3694 | ||
| 3878 | /* Add the io_end to per-inode completed io list*/ | 3695 | /* Add the io_end to per-inode completed io list*/ |
| @@ -5463,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5463 | { | 5280 | { |
| 5464 | struct inode *inode = dentry->d_inode; | 5281 | struct inode *inode = dentry->d_inode; |
| 5465 | int error, rc = 0; | 5282 | int error, rc = 0; |
| 5283 | int orphan = 0; | ||
| 5466 | const unsigned int ia_valid = attr->ia_valid; | 5284 | const unsigned int ia_valid = attr->ia_valid; |
| 5467 | 5285 | ||
| 5468 | error = inode_change_ok(inode, attr); | 5286 | error = inode_change_ok(inode, attr); |
| @@ -5518,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5518 | error = PTR_ERR(handle); | 5336 | error = PTR_ERR(handle); |
| 5519 | goto err_out; | 5337 | goto err_out; |
| 5520 | } | 5338 | } |
| 5521 | 5339 | if (ext4_handle_valid(handle)) { | |
| 5522 | error = ext4_orphan_add(handle, inode); | 5340 | error = ext4_orphan_add(handle, inode); |
| 5341 | orphan = 1; | ||
| 5342 | } | ||
| 5523 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5343 | EXT4_I(inode)->i_disksize = attr->ia_size; |
| 5524 | rc = ext4_mark_inode_dirty(handle, inode); | 5344 | rc = ext4_mark_inode_dirty(handle, inode); |
| 5525 | if (!error) | 5345 | if (!error) |
| @@ -5537,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5537 | goto err_out; | 5357 | goto err_out; |
| 5538 | } | 5358 | } |
| 5539 | ext4_orphan_del(handle, inode); | 5359 | ext4_orphan_del(handle, inode); |
| 5360 | orphan = 0; | ||
| 5540 | ext4_journal_stop(handle); | 5361 | ext4_journal_stop(handle); |
| 5541 | goto err_out; | 5362 | goto err_out; |
| 5542 | } | 5363 | } |
| @@ -5559,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5559 | * If the call to ext4_truncate failed to get a transaction handle at | 5380 | * If the call to ext4_truncate failed to get a transaction handle at |
| 5560 | * all, we need to clean up the in-core orphan list manually. | 5381 | * all, we need to clean up the in-core orphan list manually. |
| 5561 | */ | 5382 | */ |
| 5562 | if (inode->i_nlink) | 5383 | if (orphan && inode->i_nlink) |
| 5563 | ext4_orphan_del(NULL, inode); | 5384 | ext4_orphan_del(NULL, inode); |
| 5564 | 5385 | ||
| 5565 | if (!rc && (ia_valid & ATTR_MODE)) | 5386 | if (!rc && (ia_valid & ATTR_MODE)) |
| @@ -5642,7 +5463,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 5642 | * | 5463 | * |
| 5643 | * Also account for superblock, inode, quota and xattr blocks | 5464 | * Also account for superblock, inode, quota and xattr blocks |
| 5644 | */ | 5465 | */ |
| 5645 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5466 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
| 5646 | { | 5467 | { |
| 5647 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5468 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
| 5648 | int gdpblocks; | 5469 | int gdpblocks; |
