diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 577 |
1 files changed, 295 insertions, 282 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9f7f9e49914f..50d0e9c64584 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
174 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
175 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
176 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | 176 | ret = ext4_journal_restart(handle, nblocks); |
177 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
178 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
179 | 179 | ||
@@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
639 | while (target > 0) { | 639 | while (target > 0) { |
640 | count = target; | 640 | count = target; |
641 | /* allocating blocks for indirect blocks and direct blocks */ | 641 | /* allocating blocks for indirect blocks and direct blocks */ |
642 | current_block = ext4_new_meta_blocks(handle, inode, | 642 | current_block = ext4_new_meta_blocks(handle, inode, goal, |
643 | goal, &count, err); | 643 | 0, &count, err); |
644 | if (*err) | 644 | if (*err) |
645 | goto failed_out; | 645 | goto failed_out; |
646 | 646 | ||
@@ -720,7 +720,7 @@ allocated: | |||
720 | return ret; | 720 | return ret; |
721 | failed_out: | 721 | failed_out: |
722 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
723 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
724 | return ret; | 724 | return ret; |
725 | } | 725 | } |
726 | 726 | ||
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
823 | return err; | 823 | return err; |
824 | failed: | 824 | failed: |
825 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
826 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
827 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
828 | /* | 828 | /* |
829 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
830 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
831 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
832 | */ | 832 | */ |
833 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
834 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
835 | } | 835 | } |
836 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
837 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
838 | 838 | ||
839 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
840 | 840 | ||
841 | return err; | 841 | return err; |
842 | } | 842 | } |
@@ -924,7 +924,7 @@ err_out: | |||
924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
925 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
926 | } | 926 | } |
927 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
928 | blks, 0); | 928 | blks, 0); |
929 | 929 | ||
930 | return err; | 930 | return err; |
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
973 | int count = 0; | 973 | int count = 0; |
974 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
975 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
976 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
977 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
978 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
@@ -1058,6 +1059,8 @@ cleanup: | |||
1058 | partial--; | 1059 | partial--; |
1059 | } | 1060 | } |
1060 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1061 | return err; | 1064 | return err; |
1062 | } | 1065 | } |
1063 | 1066 | ||
@@ -1927,7 +1930,7 @@ repeat: | |||
1927 | * We do still charge estimated metadata to the sb though; | 1930 | * We do still charge estimated metadata to the sb though; |
1928 | * we cannot afford to run out of free blocks. | 1931 | * we cannot afford to run out of free blocks. |
1929 | */ | 1932 | */ |
1930 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { |
1931 | dquot_release_reservation_block(inode, 1); | 1934 | dquot_release_reservation_block(inode, 1); |
1932 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1933 | yield(); | 1936 | yield(); |
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2060 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2061 | break; | 2064 | break; |
2062 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2063 | int commit_write = 0, redirty_page = 0; | 2066 | int commit_write = 0, skip_page = 0; |
2064 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2065 | 2068 | ||
2066 | index = page->index; | 2069 | index = page->index; |
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2086 | * If the page does not have buffers (for | 2089 | * If the page does not have buffers (for |
2087 | * whatever reason), try to create them using | 2090 | * whatever reason), try to create them using |
2088 | * __block_write_begin. If this fails, | 2091 | * __block_write_begin. If this fails, |
2089 | * redirty the page and move on. | 2092 | * skip the page and move on. |
2090 | */ | 2093 | */ |
2091 | if (!page_has_buffers(page)) { | 2094 | if (!page_has_buffers(page)) { |
2092 | if (__block_write_begin(page, 0, len, | 2095 | if (__block_write_begin(page, 0, len, |
2093 | noalloc_get_block_write)) { | 2096 | noalloc_get_block_write)) { |
2094 | redirty_page: | 2097 | skip_page: |
2095 | redirty_page_for_writepage(mpd->wbc, | ||
2096 | page); | ||
2097 | unlock_page(page); | 2098 | unlock_page(page); |
2098 | continue; | 2099 | continue; |
2099 | } | 2100 | } |
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2104 | block_start = 0; | 2105 | block_start = 0; |
2105 | do { | 2106 | do { |
2106 | if (!bh) | 2107 | if (!bh) |
2107 | goto redirty_page; | 2108 | goto skip_page; |
2108 | if (map && (cur_logical >= map->m_lblk) && | 2109 | if (map && (cur_logical >= map->m_lblk) && |
2109 | (cur_logical <= (map->m_lblk + | 2110 | (cur_logical <= (map->m_lblk + |
2110 | (map->m_len - 1)))) { | 2111 | (map->m_len - 1)))) { |
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2120 | clear_buffer_unwritten(bh); | 2121 | clear_buffer_unwritten(bh); |
2121 | } | 2122 | } |
2122 | 2123 | ||
2123 | /* redirty page if block allocation undone */ | 2124 | /* skip page if block allocation undone */ |
2124 | if (buffer_delay(bh) || buffer_unwritten(bh)) | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2125 | redirty_page = 1; | 2126 | skip_page = 1; |
2126 | bh = bh->b_this_page; | 2127 | bh = bh->b_this_page; |
2127 | block_start += bh->b_size; | 2128 | block_start += bh->b_size; |
2128 | cur_logical++; | 2129 | cur_logical++; |
2129 | pblock++; | 2130 | pblock++; |
2130 | } while (bh != page_bufs); | 2131 | } while (bh != page_bufs); |
2131 | 2132 | ||
2132 | if (redirty_page) | 2133 | if (skip_page) |
2133 | goto redirty_page; | 2134 | goto skip_page; |
2134 | 2135 | ||
2135 | if (commit_write) | 2136 | if (commit_write) |
2136 | /* mark the buffer_heads as dirty & uptodate */ | 2137 | /* mark the buffer_heads as dirty & uptodate */ |
2137 | block_commit_write(page, 0, len); | 2138 | block_commit_write(page, 0, len); |
2138 | 2139 | ||
2140 | clear_page_dirty_for_io(page); | ||
2139 | /* | 2141 | /* |
2140 | * Delalloc doesn't support data journalling, | 2142 | * Delalloc doesn't support data journalling, |
2141 | * but eventually maybe we'll lift this | 2143 | * but eventually maybe we'll lift this |
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2165 | return ret; | 2167 | return ret; |
2166 | } | 2168 | } |
2167 | 2169 | ||
2168 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) |
2169 | sector_t logical, long blk_cnt) | ||
2170 | { | 2171 | { |
2171 | int nr_pages, i; | 2172 | int nr_pages, i; |
2172 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2174 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2175 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2176 | 2177 | ||
2177 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2178 | index = mpd->first_page; |
2178 | end = (logical + blk_cnt - 1) >> | 2179 | end = mpd->next_page - 1; |
2179 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2180 | while (index <= end) { | 2180 | while (index <= end) { |
2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2182 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2279 | err = blks; | 2279 | err = blks; |
2280 | /* | 2280 | /* |
2281 | * If get block returns EAGAIN or ENOSPC and there | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2282 | * appears to be free blocks we will call | 2282 | * appears to be free blocks we will just let |
2283 | * ext4_writepage() for all of the pages which will | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2284 | * just redirty the pages. | ||
2285 | */ | 2284 | */ |
2286 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2287 | goto submit_io; | 2286 | goto submit_io; |
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2312 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2313 | } | 2312 | } |
2314 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2315 | ext4_da_block_invalidatepages(mpd, next, | 2314 | ext4_da_block_invalidatepages(mpd); |
2316 | mpd->b_size >> mpd->inode->i_blkbits); | 2315 | |
2316 | /* Mark this page range as having been completed */ | ||
2317 | mpd->io_done = 1; | ||
2317 | return; | 2318 | return; |
2318 | } | 2319 | } |
2319 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2438 | } | 2439 | } |
2439 | 2440 | ||
2440 | /* | 2441 | /* |
2441 | * __mpage_da_writepage - finds extent of pages and blocks | ||
2442 | * | ||
2443 | * @page: page to consider | ||
2444 | * @wbc: not used, we just follow rules | ||
2445 | * @data: context | ||
2446 | * | ||
2447 | * The function finds extents of pages and scan them for all blocks. | ||
2448 | */ | ||
2449 | static int __mpage_da_writepage(struct page *page, | ||
2450 | struct writeback_control *wbc, | ||
2451 | struct mpage_da_data *mpd) | ||
2452 | { | ||
2453 | struct inode *inode = mpd->inode; | ||
2454 | struct buffer_head *bh, *head; | ||
2455 | sector_t logical; | ||
2456 | |||
2457 | /* | ||
2458 | * Can we merge this page to current extent? | ||
2459 | */ | ||
2460 | if (mpd->next_page != page->index) { | ||
2461 | /* | ||
2462 | * Nope, we can't. So, we map non-allocated blocks | ||
2463 | * and start IO on them | ||
2464 | */ | ||
2465 | if (mpd->next_page != mpd->first_page) { | ||
2466 | mpage_da_map_and_submit(mpd); | ||
2467 | /* | ||
2468 | * skip rest of the page in the page_vec | ||
2469 | */ | ||
2470 | redirty_page_for_writepage(wbc, page); | ||
2471 | unlock_page(page); | ||
2472 | return MPAGE_DA_EXTENT_TAIL; | ||
2473 | } | ||
2474 | |||
2475 | /* | ||
2476 | * Start next extent of pages ... | ||
2477 | */ | ||
2478 | mpd->first_page = page->index; | ||
2479 | |||
2480 | /* | ||
2481 | * ... and blocks | ||
2482 | */ | ||
2483 | mpd->b_size = 0; | ||
2484 | mpd->b_state = 0; | ||
2485 | mpd->b_blocknr = 0; | ||
2486 | } | ||
2487 | |||
2488 | mpd->next_page = page->index + 1; | ||
2489 | logical = (sector_t) page->index << | ||
2490 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2491 | |||
2492 | if (!page_has_buffers(page)) { | ||
2493 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, | ||
2494 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2495 | if (mpd->io_done) | ||
2496 | return MPAGE_DA_EXTENT_TAIL; | ||
2497 | } else { | ||
2498 | /* | ||
2499 | * Page with regular buffer heads, just add all dirty ones | ||
2500 | */ | ||
2501 | head = page_buffers(page); | ||
2502 | bh = head; | ||
2503 | do { | ||
2504 | BUG_ON(buffer_locked(bh)); | ||
2505 | /* | ||
2506 | * We need to try to allocate | ||
2507 | * unmapped blocks in the same page. | ||
2508 | * Otherwise we won't make progress | ||
2509 | * with the page in ext4_writepage | ||
2510 | */ | ||
2511 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2512 | mpage_add_bh_to_extent(mpd, logical, | ||
2513 | bh->b_size, | ||
2514 | bh->b_state); | ||
2515 | if (mpd->io_done) | ||
2516 | return MPAGE_DA_EXTENT_TAIL; | ||
2517 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2518 | /* | ||
2519 | * mapped dirty buffer. We need to update | ||
2520 | * the b_state because we look at | ||
2521 | * b_state in mpage_da_map_blocks. We don't | ||
2522 | * update b_size because if we find an | ||
2523 | * unmapped buffer_head later we need to | ||
2524 | * use the b_state flag of that buffer_head. | ||
2525 | */ | ||
2526 | if (mpd->b_size == 0) | ||
2527 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2528 | } | ||
2529 | logical++; | ||
2530 | } while ((bh = bh->b_this_page) != head); | ||
2531 | } | ||
2532 | |||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | /* | ||
2537 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2538 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2539 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
@@ -2684,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2684 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2589 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
2685 | * need to file the inode to the transaction's list in ordered mode because if | 2590 | * need to file the inode to the transaction's list in ordered mode because if |
2686 | * we are writing back data added by write(), the inode is already there and if | 2591 | * we are writing back data added by write(), the inode is already there and if |
2687 | * we are writing back data modified via mmap(), noone guarantees in which | 2592 | * we are writing back data modified via mmap(), no one guarantees in which |
2688 | * transaction the data will hit the disk. In case we are journaling data, we | 2593 | * transaction the data will hit the disk. In case we are journaling data, we |
2689 | * cannot start transaction directly because transaction start ranks above page | 2594 | * cannot start transaction directly because transaction start ranks above page |
2690 | * lock so we have to do some magic. | 2595 | * lock so we have to do some magic. |
@@ -2786,7 +2691,7 @@ static int ext4_writepage(struct page *page, | |||
2786 | 2691 | ||
2787 | /* | 2692 | /* |
2788 | * This is called via ext4_da_writepages() to | 2693 | * This is called via ext4_da_writepages() to |
2789 | * calulate the total number of credits to reserve to fit | 2694 | * calculate the total number of credits to reserve to fit |
2790 | * a single extent allocation into a single transaction, | 2695 | * a single extent allocation into a single transaction, |
2791 | * ext4_da_writpeages() will loop calling this before | 2696 | * ext4_da_writpeages() will loop calling this before |
2792 | * the block allocation. | 2697 | * the block allocation. |
@@ -2811,27 +2716,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2811 | 2716 | ||
2812 | /* | 2717 | /* |
2813 | * write_cache_pages_da - walk the list of dirty pages of the given | 2718 | * write_cache_pages_da - walk the list of dirty pages of the given |
2814 | * address space and call the callback function (which usually writes | 2719 | * address space and accumulate pages that need writing, and call |
2815 | * the pages). | 2720 | * mpage_da_map_and_submit to map a single contiguous memory region |
2816 | * | 2721 | * and then write them. |
2817 | * This is a forked version of write_cache_pages(). Differences: | ||
2818 | * Range cyclic is ignored. | ||
2819 | * no_nrwrite_index_update is always presumed true | ||
2820 | */ | 2722 | */ |
2821 | static int write_cache_pages_da(struct address_space *mapping, | 2723 | static int write_cache_pages_da(struct address_space *mapping, |
2822 | struct writeback_control *wbc, | 2724 | struct writeback_control *wbc, |
2823 | struct mpage_da_data *mpd, | 2725 | struct mpage_da_data *mpd, |
2824 | pgoff_t *done_index) | 2726 | pgoff_t *done_index) |
2825 | { | 2727 | { |
2826 | int ret = 0; | 2728 | struct buffer_head *bh, *head; |
2827 | int done = 0; | 2729 | struct inode *inode = mapping->host; |
2828 | struct pagevec pvec; | 2730 | struct pagevec pvec; |
2829 | unsigned nr_pages; | 2731 | unsigned int nr_pages; |
2830 | pgoff_t index; | 2732 | sector_t logical; |
2831 | pgoff_t end; /* Inclusive */ | 2733 | pgoff_t index, end; |
2832 | long nr_to_write = wbc->nr_to_write; | 2734 | long nr_to_write = wbc->nr_to_write; |
2833 | int tag; | 2735 | int i, tag, ret = 0; |
2834 | 2736 | ||
2737 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2738 | mpd->wbc = wbc; | ||
2739 | mpd->inode = inode; | ||
2835 | pagevec_init(&pvec, 0); | 2740 | pagevec_init(&pvec, 0); |
2836 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2837 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
@@ -2842,13 +2747,11 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2842 | tag = PAGECACHE_TAG_DIRTY; | 2747 | tag = PAGECACHE_TAG_DIRTY; |
2843 | 2748 | ||
2844 | *done_index = index; | 2749 | *done_index = index; |
2845 | while (!done && (index <= end)) { | 2750 | while (index <= end) { |
2846 | int i; | ||
2847 | |||
2848 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2751 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2849 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2752 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2850 | if (nr_pages == 0) | 2753 | if (nr_pages == 0) |
2851 | break; | 2754 | return 0; |
2852 | 2755 | ||
2853 | for (i = 0; i < nr_pages; i++) { | 2756 | for (i = 0; i < nr_pages; i++) { |
2854 | struct page *page = pvec.pages[i]; | 2757 | struct page *page = pvec.pages[i]; |
@@ -2860,60 +2763,98 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2860 | * mapping. However, page->index will not change | 2763 | * mapping. However, page->index will not change |
2861 | * because we have a reference on the page. | 2764 | * because we have a reference on the page. |
2862 | */ | 2765 | */ |
2863 | if (page->index > end) { | 2766 | if (page->index > end) |
2864 | done = 1; | 2767 | goto out; |
2865 | break; | ||
2866 | } | ||
2867 | 2768 | ||
2868 | *done_index = page->index + 1; | 2769 | *done_index = page->index + 1; |
2869 | 2770 | ||
2771 | /* | ||
2772 | * If we can't merge this page, and we have | ||
2773 | * accumulated an contiguous region, write it | ||
2774 | */ | ||
2775 | if ((mpd->next_page != page->index) && | ||
2776 | (mpd->next_page != mpd->first_page)) { | ||
2777 | mpage_da_map_and_submit(mpd); | ||
2778 | goto ret_extent_tail; | ||
2779 | } | ||
2780 | |||
2870 | lock_page(page); | 2781 | lock_page(page); |
2871 | 2782 | ||
2872 | /* | 2783 | /* |
2873 | * Page truncated or invalidated. We can freely skip it | 2784 | * If the page is no longer dirty, or its |
2874 | * then, even for data integrity operations: the page | 2785 | * mapping no longer corresponds to inode we |
2875 | * has disappeared concurrently, so there could be no | 2786 | * are writing (which means it has been |
2876 | * real expectation of this data interity operation | 2787 | * truncated or invalidated), or the page is |
2877 | * even if there is now a new, dirty page at the same | 2788 | * already under writeback and we are not |
2878 | * pagecache address. | 2789 | * doing a data integrity writeback, skip the page |
2879 | */ | 2790 | */ |
2880 | if (unlikely(page->mapping != mapping)) { | 2791 | if (!PageDirty(page) || |
2881 | continue_unlock: | 2792 | (PageWriteback(page) && |
2793 | (wbc->sync_mode == WB_SYNC_NONE)) || | ||
2794 | unlikely(page->mapping != mapping)) { | ||
2882 | unlock_page(page); | 2795 | unlock_page(page); |
2883 | continue; | 2796 | continue; |
2884 | } | 2797 | } |
2885 | 2798 | ||
2886 | if (!PageDirty(page)) { | 2799 | wait_on_page_writeback(page); |
2887 | /* someone wrote it for us */ | ||
2888 | goto continue_unlock; | ||
2889 | } | ||
2890 | |||
2891 | if (PageWriteback(page)) { | ||
2892 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2893 | wait_on_page_writeback(page); | ||
2894 | else | ||
2895 | goto continue_unlock; | ||
2896 | } | ||
2897 | |||
2898 | BUG_ON(PageWriteback(page)); | 2800 | BUG_ON(PageWriteback(page)); |
2899 | if (!clear_page_dirty_for_io(page)) | ||
2900 | goto continue_unlock; | ||
2901 | 2801 | ||
2902 | ret = __mpage_da_writepage(page, wbc, mpd); | 2802 | if (mpd->next_page != page->index) |
2903 | if (unlikely(ret)) { | 2803 | mpd->first_page = page->index; |
2904 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | 2804 | mpd->next_page = page->index + 1; |
2905 | unlock_page(page); | 2805 | logical = (sector_t) page->index << |
2906 | ret = 0; | 2806 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2907 | } else { | 2807 | |
2908 | done = 1; | 2808 | if (!page_has_buffers(page)) { |
2909 | break; | 2809 | mpage_add_bh_to_extent(mpd, logical, |
2910 | } | 2810 | PAGE_CACHE_SIZE, |
2811 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2812 | if (mpd->io_done) | ||
2813 | goto ret_extent_tail; | ||
2814 | } else { | ||
2815 | /* | ||
2816 | * Page with regular buffer heads, | ||
2817 | * just add all dirty ones | ||
2818 | */ | ||
2819 | head = page_buffers(page); | ||
2820 | bh = head; | ||
2821 | do { | ||
2822 | BUG_ON(buffer_locked(bh)); | ||
2823 | /* | ||
2824 | * We need to try to allocate | ||
2825 | * unmapped blocks in the same page. | ||
2826 | * Otherwise we won't make progress | ||
2827 | * with the page in ext4_writepage | ||
2828 | */ | ||
2829 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2830 | mpage_add_bh_to_extent(mpd, logical, | ||
2831 | bh->b_size, | ||
2832 | bh->b_state); | ||
2833 | if (mpd->io_done) | ||
2834 | goto ret_extent_tail; | ||
2835 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2836 | /* | ||
2837 | * mapped dirty buffer. We need | ||
2838 | * to update the b_state | ||
2839 | * because we look at b_state | ||
2840 | * in mpage_da_map_blocks. We | ||
2841 | * don't update b_size because | ||
2842 | * if we find an unmapped | ||
2843 | * buffer_head later we need to | ||
2844 | * use the b_state flag of that | ||
2845 | * buffer_head. | ||
2846 | */ | ||
2847 | if (mpd->b_size == 0) | ||
2848 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2849 | } | ||
2850 | logical++; | ||
2851 | } while ((bh = bh->b_this_page) != head); | ||
2911 | } | 2852 | } |
2912 | 2853 | ||
2913 | if (nr_to_write > 0) { | 2854 | if (nr_to_write > 0) { |
2914 | nr_to_write--; | 2855 | nr_to_write--; |
2915 | if (nr_to_write == 0 && | 2856 | if (nr_to_write == 0 && |
2916 | wbc->sync_mode == WB_SYNC_NONE) { | 2857 | wbc->sync_mode == WB_SYNC_NONE) |
2917 | /* | 2858 | /* |
2918 | * We stop writing back only if we are | 2859 | * We stop writing back only if we are |
2919 | * not doing integrity sync. In case of | 2860 | * not doing integrity sync. In case of |
@@ -2924,14 +2865,18 @@ continue_unlock: | |||
2924 | * pages, but have not synced all of the | 2865 | * pages, but have not synced all of the |
2925 | * old dirty pages. | 2866 | * old dirty pages. |
2926 | */ | 2867 | */ |
2927 | done = 1; | 2868 | goto out; |
2928 | break; | ||
2929 | } | ||
2930 | } | 2869 | } |
2931 | } | 2870 | } |
2932 | pagevec_release(&pvec); | 2871 | pagevec_release(&pvec); |
2933 | cond_resched(); | 2872 | cond_resched(); |
2934 | } | 2873 | } |
2874 | return 0; | ||
2875 | ret_extent_tail: | ||
2876 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2877 | out: | ||
2878 | pagevec_release(&pvec); | ||
2879 | cond_resched(); | ||
2935 | return ret; | 2880 | return ret; |
2936 | } | 2881 | } |
2937 | 2882 | ||
@@ -2945,7 +2890,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2945 | struct mpage_da_data mpd; | 2890 | struct mpage_da_data mpd; |
2946 | struct inode *inode = mapping->host; | 2891 | struct inode *inode = mapping->host; |
2947 | int pages_written = 0; | 2892 | int pages_written = 0; |
2948 | long pages_skipped; | ||
2949 | unsigned int max_pages; | 2893 | unsigned int max_pages; |
2950 | int range_cyclic, cycled = 1, io_done = 0; | 2894 | int range_cyclic, cycled = 1, io_done = 0; |
2951 | int needed_blocks, ret = 0; | 2895 | int needed_blocks, ret = 0; |
@@ -3028,11 +2972,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3028 | wbc->nr_to_write = desired_nr_to_write; | 2972 | wbc->nr_to_write = desired_nr_to_write; |
3029 | } | 2973 | } |
3030 | 2974 | ||
3031 | mpd.wbc = wbc; | ||
3032 | mpd.inode = mapping->host; | ||
3033 | |||
3034 | pages_skipped = wbc->pages_skipped; | ||
3035 | |||
3036 | retry: | 2975 | retry: |
3037 | if (wbc->sync_mode == WB_SYNC_ALL) | 2976 | if (wbc->sync_mode == WB_SYNC_ALL) |
3038 | tag_pages_for_writeback(mapping, index, end); | 2977 | tag_pages_for_writeback(mapping, index, end); |
@@ -3059,22 +2998,10 @@ retry: | |||
3059 | } | 2998 | } |
3060 | 2999 | ||
3061 | /* | 3000 | /* |
3062 | * Now call __mpage_da_writepage to find the next | 3001 | * Now call write_cache_pages_da() to find the next |
3063 | * contiguous region of logical blocks that need | 3002 | * contiguous region of logical blocks that need |
3064 | * blocks to be allocated by ext4. We don't actually | 3003 | * blocks to be allocated by ext4 and submit them. |
3065 | * submit the blocks for I/O here, even though | ||
3066 | * write_cache_pages thinks it will, and will set the | ||
3067 | * pages as clean for write before calling | ||
3068 | * __mpage_da_writepage(). | ||
3069 | */ | 3004 | */ |
3070 | mpd.b_size = 0; | ||
3071 | mpd.b_state = 0; | ||
3072 | mpd.b_blocknr = 0; | ||
3073 | mpd.first_page = 0; | ||
3074 | mpd.next_page = 0; | ||
3075 | mpd.io_done = 0; | ||
3076 | mpd.pages_written = 0; | ||
3077 | mpd.retval = 0; | ||
3078 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); | 3005 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3079 | /* | 3006 | /* |
3080 | * If we have a contiguous extent of pages and we | 3007 | * If we have a contiguous extent of pages and we |
@@ -3096,7 +3023,6 @@ retry: | |||
3096 | * and try again | 3023 | * and try again |
3097 | */ | 3024 | */ |
3098 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3025 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3099 | wbc->pages_skipped = pages_skipped; | ||
3100 | ret = 0; | 3026 | ret = 0; |
3101 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3027 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3102 | /* | 3028 | /* |
@@ -3104,7 +3030,6 @@ retry: | |||
3104 | * rest of the pages | 3030 | * rest of the pages |
3105 | */ | 3031 | */ |
3106 | pages_written += mpd.pages_written; | 3032 | pages_written += mpd.pages_written; |
3107 | wbc->pages_skipped = pages_skipped; | ||
3108 | ret = 0; | 3033 | ret = 0; |
3109 | io_done = 1; | 3034 | io_done = 1; |
3110 | } else if (wbc->nr_to_write) | 3035 | } else if (wbc->nr_to_write) |
@@ -3122,11 +3047,6 @@ retry: | |||
3122 | wbc->range_end = mapping->writeback_index - 1; | 3047 | wbc->range_end = mapping->writeback_index - 1; |
3123 | goto retry; | 3048 | goto retry; |
3124 | } | 3049 | } |
3125 | if (pages_skipped != wbc->pages_skipped) | ||
3126 | ext4_msg(inode->i_sb, KERN_CRIT, | ||
3127 | "This should not happen leaving %s " | ||
3128 | "with nr_to_write = %ld ret = %d", | ||
3129 | __func__, wbc->nr_to_write, ret); | ||
3130 | 3050 | ||
3131 | /* Update index */ | 3051 | /* Update index */ |
3132 | wbc->range_cyclic = range_cyclic; | 3052 | wbc->range_cyclic = range_cyclic; |
@@ -3383,7 +3303,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
3383 | * the pages by calling redirty_page_for_writepage() but that | 3303 | * the pages by calling redirty_page_for_writepage() but that |
3384 | * would be ugly in the extreme. So instead we would need to | 3304 | * would be ugly in the extreme. So instead we would need to |
3385 | * replicate parts of the code in the above functions, | 3305 | * replicate parts of the code in the above functions, |
3386 | * simplifying them becuase we wouldn't actually intend to | 3306 | * simplifying them because we wouldn't actually intend to |
3387 | * write out the pages, but rather only collect contiguous | 3307 | * write out the pages, but rather only collect contiguous |
3388 | * logical block extents, call the multi-block allocator, and | 3308 | * logical block extents, call the multi-block allocator, and |
3389 | * then update the buffer heads with the block allocations. | 3309 | * then update the buffer heads with the block allocations. |
@@ -3460,6 +3380,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3460 | 3380 | ||
3461 | static int ext4_readpage(struct file *file, struct page *page) | 3381 | static int ext4_readpage(struct file *file, struct page *page) |
3462 | { | 3382 | { |
3383 | trace_ext4_readpage(page); | ||
3463 | return mpage_readpage(page, ext4_get_block); | 3384 | return mpage_readpage(page, ext4_get_block); |
3464 | } | 3385 | } |
3465 | 3386 | ||
@@ -3494,6 +3415,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
3494 | { | 3415 | { |
3495 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3416 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3496 | 3417 | ||
3418 | trace_ext4_invalidatepage(page, offset); | ||
3419 | |||
3497 | /* | 3420 | /* |
3498 | * free any io_end structure allocated for buffers to be discarded | 3421 | * free any io_end structure allocated for buffers to be discarded |
3499 | */ | 3422 | */ |
@@ -3515,6 +3438,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3515 | { | 3438 | { |
3516 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3439 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3517 | 3440 | ||
3441 | trace_ext4_releasepage(page); | ||
3442 | |||
3518 | WARN_ON(PageChecked(page)); | 3443 | WARN_ON(PageChecked(page)); |
3519 | if (!page_has_buffers(page)) | 3444 | if (!page_has_buffers(page)) |
3520 | return 0; | 3445 | return 0; |
@@ -3586,7 +3511,7 @@ retry: | |||
3586 | loff_t end = offset + iov_length(iov, nr_segs); | 3511 | loff_t end = offset + iov_length(iov, nr_segs); |
3587 | 3512 | ||
3588 | if (end > isize) | 3513 | if (end > isize) |
3589 | vmtruncate(inode, isize); | 3514 | ext4_truncate_failed_write(inode); |
3590 | } | 3515 | } |
3591 | } | 3516 | } |
3592 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3517 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3768,7 +3693,7 @@ retry: | |||
3768 | * | 3693 | * |
3769 | * The unwrritten extents will be converted to written when DIO is completed. | 3694 | * The unwrritten extents will be converted to written when DIO is completed. |
3770 | * For async direct IO, since the IO may still pending when return, we | 3695 | * For async direct IO, since the IO may still pending when return, we |
3771 | * set up an end_io call back function, which will do the convertion | 3696 | * set up an end_io call back function, which will do the conversion |
3772 | * when async direct IO completed. | 3697 | * when async direct IO completed. |
3773 | * | 3698 | * |
3774 | * If the O_DIRECT write will extend the file then add this inode to the | 3699 | * If the O_DIRECT write will extend the file then add this inode to the |
@@ -3791,7 +3716,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3791 | * We could direct write to holes and fallocate. | 3716 | * We could direct write to holes and fallocate. |
3792 | * | 3717 | * |
3793 | * Allocated blocks to fill the hole are marked as uninitialized | 3718 | * Allocated blocks to fill the hole are marked as uninitialized |
3794 | * to prevent paralel buffered read to expose the stale data | 3719 | * to prevent parallel buffered read to expose the stale data |
3795 | * before DIO complete the data IO. | 3720 | * before DIO complete the data IO. |
3796 | * | 3721 | * |
3797 | * As to previously fallocated extents, ext4 get_block | 3722 | * As to previously fallocated extents, ext4 get_block |
@@ -3852,7 +3777,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3852 | int err; | 3777 | int err; |
3853 | /* | 3778 | /* |
3854 | * for non AIO case, since the IO is already | 3779 | * for non AIO case, since the IO is already |
3855 | * completed, we could do the convertion right here | 3780 | * completed, we could do the conversion right here |
3856 | */ | 3781 | */ |
3857 | err = ext4_convert_unwritten_extents(inode, | 3782 | err = ext4_convert_unwritten_extents(inode, |
3858 | offset, ret); | 3783 | offset, ret); |
@@ -3873,11 +3798,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3873 | { | 3798 | { |
3874 | struct file *file = iocb->ki_filp; | 3799 | struct file *file = iocb->ki_filp; |
3875 | struct inode *inode = file->f_mapping->host; | 3800 | struct inode *inode = file->f_mapping->host; |
3801 | ssize_t ret; | ||
3876 | 3802 | ||
3803 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
3877 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3804 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3878 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3805 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3879 | 3806 | else | |
3880 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3807 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3808 | trace_ext4_direct_IO_exit(inode, offset, | ||
3809 | iov_length(iov, nr_segs), rw, ret); | ||
3810 | return ret; | ||
3881 | } | 3811 | } |
3882 | 3812 | ||
3883 | /* | 3813 | /* |
@@ -3903,7 +3833,6 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
3903 | .readpage = ext4_readpage, | 3833 | .readpage = ext4_readpage, |
3904 | .readpages = ext4_readpages, | 3834 | .readpages = ext4_readpages, |
3905 | .writepage = ext4_writepage, | 3835 | .writepage = ext4_writepage, |
3906 | .sync_page = block_sync_page, | ||
3907 | .write_begin = ext4_write_begin, | 3836 | .write_begin = ext4_write_begin, |
3908 | .write_end = ext4_ordered_write_end, | 3837 | .write_end = ext4_ordered_write_end, |
3909 | .bmap = ext4_bmap, | 3838 | .bmap = ext4_bmap, |
@@ -3919,7 +3848,6 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
3919 | .readpage = ext4_readpage, | 3848 | .readpage = ext4_readpage, |
3920 | .readpages = ext4_readpages, | 3849 | .readpages = ext4_readpages, |
3921 | .writepage = ext4_writepage, | 3850 | .writepage = ext4_writepage, |
3922 | .sync_page = block_sync_page, | ||
3923 | .write_begin = ext4_write_begin, | 3851 | .write_begin = ext4_write_begin, |
3924 | .write_end = ext4_writeback_write_end, | 3852 | .write_end = ext4_writeback_write_end, |
3925 | .bmap = ext4_bmap, | 3853 | .bmap = ext4_bmap, |
@@ -3935,7 +3863,6 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3935 | .readpage = ext4_readpage, | 3863 | .readpage = ext4_readpage, |
3936 | .readpages = ext4_readpages, | 3864 | .readpages = ext4_readpages, |
3937 | .writepage = ext4_writepage, | 3865 | .writepage = ext4_writepage, |
3938 | .sync_page = block_sync_page, | ||
3939 | .write_begin = ext4_write_begin, | 3866 | .write_begin = ext4_write_begin, |
3940 | .write_end = ext4_journalled_write_end, | 3867 | .write_end = ext4_journalled_write_end, |
3941 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3868 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -3951,7 +3878,6 @@ static const struct address_space_operations ext4_da_aops = { | |||
3951 | .readpages = ext4_readpages, | 3878 | .readpages = ext4_readpages, |
3952 | .writepage = ext4_writepage, | 3879 | .writepage = ext4_writepage, |
3953 | .writepages = ext4_da_writepages, | 3880 | .writepages = ext4_da_writepages, |
3954 | .sync_page = block_sync_page, | ||
3955 | .write_begin = ext4_da_write_begin, | 3881 | .write_begin = ext4_da_write_begin, |
3956 | .write_end = ext4_da_write_end, | 3882 | .write_end = ext4_da_write_end, |
3957 | .bmap = ext4_bmap, | 3883 | .bmap = ext4_bmap, |
@@ -3988,9 +3914,30 @@ void ext4_set_aops(struct inode *inode) | |||
3988 | int ext4_block_truncate_page(handle_t *handle, | 3914 | int ext4_block_truncate_page(handle_t *handle, |
3989 | struct address_space *mapping, loff_t from) | 3915 | struct address_space *mapping, loff_t from) |
3990 | { | 3916 | { |
3917 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3918 | unsigned length; | ||
3919 | unsigned blocksize; | ||
3920 | struct inode *inode = mapping->host; | ||
3921 | |||
3922 | blocksize = inode->i_sb->s_blocksize; | ||
3923 | length = blocksize - (offset & (blocksize - 1)); | ||
3924 | |||
3925 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3926 | } | ||
3927 | |||
3928 | /* | ||
3929 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3930 | * starting from file offset 'from'. The range to be zero'd must | ||
3931 | * be contained with in one block. If the specified range exceeds | ||
3932 | * the end of the block it will be shortened to end of the block | ||
3933 | * that cooresponds to 'from' | ||
3934 | */ | ||
3935 | int ext4_block_zero_page_range(handle_t *handle, | ||
3936 | struct address_space *mapping, loff_t from, loff_t length) | ||
3937 | { | ||
3991 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3938 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3992 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3939 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3993 | unsigned blocksize, length, pos; | 3940 | unsigned blocksize, max, pos; |
3994 | ext4_lblk_t iblock; | 3941 | ext4_lblk_t iblock; |
3995 | struct inode *inode = mapping->host; | 3942 | struct inode *inode = mapping->host; |
3996 | struct buffer_head *bh; | 3943 | struct buffer_head *bh; |
@@ -4003,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4003 | return -EINVAL; | 3950 | return -EINVAL; |
4004 | 3951 | ||
4005 | blocksize = inode->i_sb->s_blocksize; | 3952 | blocksize = inode->i_sb->s_blocksize; |
4006 | length = blocksize - (offset & (blocksize - 1)); | 3953 | max = blocksize - (offset & (blocksize - 1)); |
3954 | |||
3955 | /* | ||
3956 | * correct length if it does not fall between | ||
3957 | * 'from' and the end of the block | ||
3958 | */ | ||
3959 | if (length > max || length < 0) | ||
3960 | length = max; | ||
3961 | |||
4007 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3962 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
4008 | 3963 | ||
4009 | if (!page_has_buffers(page)) | 3964 | if (!page_has_buffers(page)) |
@@ -4098,7 +4053,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) | |||
4098 | * | 4053 | * |
4099 | * When we do truncate() we may have to clean the ends of several | 4054 | * When we do truncate() we may have to clean the ends of several |
4100 | * indirect blocks but leave the blocks themselves alive. Block is | 4055 | * indirect blocks but leave the blocks themselves alive. Block is |
4101 | * partially truncated if some data below the new i_size is refered | 4056 | * partially truncated if some data below the new i_size is referred |
4102 | * from it (and it is on the path to the first completely truncated | 4057 | * from it (and it is on the path to the first completely truncated |
4103 | * data block, indeed). We have to free the top of that path along | 4058 | * data block, indeed). We have to free the top of that path along |
4104 | * with everything to the right of the path. Since no allocation | 4059 | * with everything to the right of the path. Since no allocation |
@@ -4177,6 +4132,9 @@ no_top: | |||
4177 | * | 4132 | * |
4178 | * We release `count' blocks on disk, but (last - first) may be greater | 4133 | * We release `count' blocks on disk, but (last - first) may be greater |
4179 | * than `count' because there can be holes in there. | 4134 | * than `count' because there can be holes in there. |
4135 | * | ||
4136 | * Return 0 on success, 1 on invalid block range | ||
4137 | * and < 0 on fatal error. | ||
4180 | */ | 4138 | */ |
4181 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4139 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4182 | struct buffer_head *bh, | 4140 | struct buffer_head *bh, |
@@ -4203,33 +4161,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4203 | if (bh) { | 4161 | if (bh) { |
4204 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4162 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4205 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4163 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4206 | if (unlikely(err)) { | 4164 | if (unlikely(err)) |
4207 | ext4_std_error(inode->i_sb, err); | 4165 | goto out_err; |
4208 | return 1; | ||
4209 | } | ||
4210 | } | 4166 | } |
4211 | err = ext4_mark_inode_dirty(handle, inode); | 4167 | err = ext4_mark_inode_dirty(handle, inode); |
4212 | if (unlikely(err)) { | 4168 | if (unlikely(err)) |
4213 | ext4_std_error(inode->i_sb, err); | 4169 | goto out_err; |
4214 | return 1; | ||
4215 | } | ||
4216 | err = ext4_truncate_restart_trans(handle, inode, | 4170 | err = ext4_truncate_restart_trans(handle, inode, |
4217 | blocks_for_truncate(inode)); | 4171 | blocks_for_truncate(inode)); |
4218 | if (unlikely(err)) { | 4172 | if (unlikely(err)) |
4219 | ext4_std_error(inode->i_sb, err); | 4173 | goto out_err; |
4220 | return 1; | ||
4221 | } | ||
4222 | if (bh) { | 4174 | if (bh) { |
4223 | BUFFER_TRACE(bh, "retaking write access"); | 4175 | BUFFER_TRACE(bh, "retaking write access"); |
4224 | ext4_journal_get_write_access(handle, bh); | 4176 | err = ext4_journal_get_write_access(handle, bh); |
4177 | if (unlikely(err)) | ||
4178 | goto out_err; | ||
4225 | } | 4179 | } |
4226 | } | 4180 | } |
4227 | 4181 | ||
4228 | for (p = first; p < last; p++) | 4182 | for (p = first; p < last; p++) |
4229 | *p = 0; | 4183 | *p = 0; |
4230 | 4184 | ||
4231 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4185 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4232 | return 0; | 4186 | return 0; |
4187 | out_err: | ||
4188 | ext4_std_error(inode->i_sb, err); | ||
4189 | return err; | ||
4233 | } | 4190 | } |
4234 | 4191 | ||
4235 | /** | 4192 | /** |
@@ -4240,7 +4197,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4240 | * @first: array of block numbers | 4197 | * @first: array of block numbers |
4241 | * @last: points immediately past the end of array | 4198 | * @last: points immediately past the end of array |
4242 | * | 4199 | * |
4243 | * We are freeing all blocks refered from that array (numbers are stored as | 4200 | * We are freeing all blocks referred from that array (numbers are stored as |
4244 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 4201 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
4245 | * | 4202 | * |
4246 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 4203 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
@@ -4263,7 +4220,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4263 | ext4_fsblk_t nr; /* Current block # */ | 4220 | ext4_fsblk_t nr; /* Current block # */ |
4264 | __le32 *p; /* Pointer into inode/ind | 4221 | __le32 *p; /* Pointer into inode/ind |
4265 | for current block */ | 4222 | for current block */ |
4266 | int err; | 4223 | int err = 0; |
4267 | 4224 | ||
4268 | if (this_bh) { /* For indirect block */ | 4225 | if (this_bh) { /* For indirect block */ |
4269 | BUFFER_TRACE(this_bh, "get_write_access"); | 4226 | BUFFER_TRACE(this_bh, "get_write_access"); |
@@ -4285,9 +4242,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4285 | } else if (nr == block_to_free + count) { | 4242 | } else if (nr == block_to_free + count) { |
4286 | count++; | 4243 | count++; |
4287 | } else { | 4244 | } else { |
4288 | if (ext4_clear_blocks(handle, inode, this_bh, | 4245 | err = ext4_clear_blocks(handle, inode, this_bh, |
4289 | block_to_free, count, | 4246 | block_to_free, count, |
4290 | block_to_free_p, p)) | 4247 | block_to_free_p, p); |
4248 | if (err) | ||
4291 | break; | 4249 | break; |
4292 | block_to_free = nr; | 4250 | block_to_free = nr; |
4293 | block_to_free_p = p; | 4251 | block_to_free_p = p; |
@@ -4296,9 +4254,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4296 | } | 4254 | } |
4297 | } | 4255 | } |
4298 | 4256 | ||
4299 | if (count > 0) | 4257 | if (!err && count > 0) |
4300 | ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4258 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4301 | count, block_to_free_p, p); | 4259 | count, block_to_free_p, p); |
4260 | if (err < 0) | ||
4261 | /* fatal error */ | ||
4262 | return; | ||
4302 | 4263 | ||
4303 | if (this_bh) { | 4264 | if (this_bh) { |
4304 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4265 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
@@ -4328,7 +4289,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4328 | * @last: pointer immediately past the end of array | 4289 | * @last: pointer immediately past the end of array |
4329 | * @depth: depth of the branches to free | 4290 | * @depth: depth of the branches to free |
4330 | * | 4291 | * |
4331 | * We are freeing all blocks refered from these branches (numbers are | 4292 | * We are freeing all blocks referred from these branches (numbers are |
4332 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 4293 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
4333 | * appropriately. | 4294 | * appropriately. |
4334 | */ | 4295 | */ |
@@ -4416,7 +4377,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4416 | * transaction where the data blocks are | 4377 | * transaction where the data blocks are |
4417 | * actually freed. | 4378 | * actually freed. |
4418 | */ | 4379 | */ |
4419 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4380 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4420 | EXT4_FREE_BLOCKS_METADATA| | 4381 | EXT4_FREE_BLOCKS_METADATA| |
4421 | EXT4_FREE_BLOCKS_FORGET); | 4382 | EXT4_FREE_BLOCKS_FORGET); |
4422 | 4383 | ||
@@ -4446,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4446 | 4407 | ||
4447 | int ext4_can_truncate(struct inode *inode) | 4408 | int ext4_can_truncate(struct inode *inode) |
4448 | { | 4409 | { |
4449 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
4450 | return 0; | ||
4451 | if (S_ISREG(inode->i_mode)) | 4410 | if (S_ISREG(inode->i_mode)) |
4452 | return 1; | 4411 | return 1; |
4453 | if (S_ISDIR(inode->i_mode)) | 4412 | if (S_ISDIR(inode->i_mode)) |
@@ -4458,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode) | |||
4458 | } | 4417 | } |
4459 | 4418 | ||
4460 | /* | 4419 | /* |
4420 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | ||
4421 | * associated with the given offset and length | ||
4422 | * | ||
4423 | * @inode: File inode | ||
4424 | * @offset: The offset where the hole will begin | ||
4425 | * @len: The length of the hole | ||
4426 | * | ||
4427 | * Returns: 0 on sucess or negative on failure | ||
4428 | */ | ||
4429 | |||
4430 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4431 | { | ||
4432 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4433 | if (!S_ISREG(inode->i_mode)) | ||
4434 | return -ENOTSUPP; | ||
4435 | |||
4436 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
4437 | /* TODO: Add support for non extent hole punching */ | ||
4438 | return -ENOTSUPP; | ||
4439 | } | ||
4440 | |||
4441 | return ext4_ext_punch_hole(file, offset, length); | ||
4442 | } | ||
4443 | |||
4444 | /* | ||
4461 | * ext4_truncate() | 4445 | * ext4_truncate() |
4462 | * | 4446 | * |
4463 | * We block out ext4_get_block() block instantiations across the entire | 4447 | * We block out ext4_get_block() block instantiations across the entire |
@@ -4496,10 +4480,12 @@ void ext4_truncate(struct inode *inode) | |||
4496 | Indirect chain[4]; | 4480 | Indirect chain[4]; |
4497 | Indirect *partial; | 4481 | Indirect *partial; |
4498 | __le32 nr = 0; | 4482 | __le32 nr = 0; |
4499 | int n; | 4483 | int n = 0; |
4500 | ext4_lblk_t last_block; | 4484 | ext4_lblk_t last_block, max_block; |
4501 | unsigned blocksize = inode->i_sb->s_blocksize; | 4485 | unsigned blocksize = inode->i_sb->s_blocksize; |
4502 | 4486 | ||
4487 | trace_ext4_truncate_enter(inode); | ||
4488 | |||
4503 | if (!ext4_can_truncate(inode)) | 4489 | if (!ext4_can_truncate(inode)) |
4504 | return; | 4490 | return; |
4505 | 4491 | ||
@@ -4510,6 +4496,7 @@ void ext4_truncate(struct inode *inode) | |||
4510 | 4496 | ||
4511 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4497 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4512 | ext4_ext_truncate(inode); | 4498 | ext4_ext_truncate(inode); |
4499 | trace_ext4_truncate_exit(inode); | ||
4513 | return; | 4500 | return; |
4514 | } | 4501 | } |
4515 | 4502 | ||
@@ -4519,14 +4506,18 @@ void ext4_truncate(struct inode *inode) | |||
4519 | 4506 | ||
4520 | last_block = (inode->i_size + blocksize-1) | 4507 | last_block = (inode->i_size + blocksize-1) |
4521 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 4508 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
4509 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
4510 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4522 | 4511 | ||
4523 | if (inode->i_size & (blocksize - 1)) | 4512 | if (inode->i_size & (blocksize - 1)) |
4524 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 4513 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
4525 | goto out_stop; | 4514 | goto out_stop; |
4526 | 4515 | ||
4527 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 4516 | if (last_block != max_block) { |
4528 | if (n == 0) | 4517 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
4529 | goto out_stop; /* error */ | 4518 | if (n == 0) |
4519 | goto out_stop; /* error */ | ||
4520 | } | ||
4530 | 4521 | ||
4531 | /* | 4522 | /* |
4532 | * OK. This truncate is going to happen. We add the inode to the | 4523 | * OK. This truncate is going to happen. We add the inode to the |
@@ -4557,7 +4548,13 @@ void ext4_truncate(struct inode *inode) | |||
4557 | */ | 4548 | */ |
4558 | ei->i_disksize = inode->i_size; | 4549 | ei->i_disksize = inode->i_size; |
4559 | 4550 | ||
4560 | if (n == 1) { /* direct blocks */ | 4551 | if (last_block == max_block) { |
4552 | /* | ||
4553 | * It is unnecessary to free any data blocks if last_block is | ||
4554 | * equal to the indirect block limit. | ||
4555 | */ | ||
4556 | goto out_unlock; | ||
4557 | } else if (n == 1) { /* direct blocks */ | ||
4561 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 4558 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
4562 | i_data + EXT4_NDIR_BLOCKS); | 4559 | i_data + EXT4_NDIR_BLOCKS); |
4563 | goto do_indirects; | 4560 | goto do_indirects; |
@@ -4617,6 +4614,7 @@ do_indirects: | |||
4617 | ; | 4614 | ; |
4618 | } | 4615 | } |
4619 | 4616 | ||
4617 | out_unlock: | ||
4620 | up_write(&ei->i_data_sem); | 4618 | up_write(&ei->i_data_sem); |
4621 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4619 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4622 | ext4_mark_inode_dirty(handle, inode); | 4620 | ext4_mark_inode_dirty(handle, inode); |
@@ -4639,6 +4637,7 @@ out_stop: | |||
4639 | ext4_orphan_del(handle, inode); | 4637 | ext4_orphan_del(handle, inode); |
4640 | 4638 | ||
4641 | ext4_journal_stop(handle); | 4639 | ext4_journal_stop(handle); |
4640 | trace_ext4_truncate_exit(inode); | ||
4642 | } | 4641 | } |
4643 | 4642 | ||
4644 | /* | 4643 | /* |
@@ -4668,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4668 | /* | 4667 | /* |
4669 | * Figure out the offset within the block group inode table | 4668 | * Figure out the offset within the block group inode table |
4670 | */ | 4669 | */ |
4671 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | 4670 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
4672 | inode_offset = ((inode->i_ino - 1) % | 4671 | inode_offset = ((inode->i_ino - 1) % |
4673 | EXT4_INODES_PER_GROUP(sb)); | 4672 | EXT4_INODES_PER_GROUP(sb)); |
4674 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | 4673 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); |
@@ -4770,6 +4769,7 @@ make_io: | |||
4770 | * has in-inode xattrs, or we don't have this inode in memory. | 4769 | * has in-inode xattrs, or we don't have this inode in memory. |
4771 | * Read the block from disk. | 4770 | * Read the block from disk. |
4772 | */ | 4771 | */ |
4772 | trace_ext4_load_inode(inode); | ||
4773 | get_bh(bh); | 4773 | get_bh(bh); |
4774 | bh->b_end_io = end_buffer_read_sync; | 4774 | bh->b_end_io = end_buffer_read_sync; |
4775 | submit_bh(READ_META, bh); | 4775 | submit_bh(READ_META, bh); |
@@ -4875,7 +4875,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4875 | return inode; | 4875 | return inode; |
4876 | 4876 | ||
4877 | ei = EXT4_I(inode); | 4877 | ei = EXT4_I(inode); |
4878 | iloc.bh = 0; | 4878 | iloc.bh = NULL; |
4879 | 4879 | ||
4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
4881 | if (ret < 0) | 4881 | if (ret < 0) |
@@ -5361,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5361 | 5361 | ||
5362 | if (S_ISREG(inode->i_mode) && | 5362 | if (S_ISREG(inode->i_mode) && |
5363 | attr->ia_valid & ATTR_SIZE && | 5363 | attr->ia_valid & ATTR_SIZE && |
5364 | (attr->ia_size < inode->i_size || | 5364 | (attr->ia_size < inode->i_size)) { |
5365 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { | ||
5366 | handle_t *handle; | 5365 | handle_t *handle; |
5367 | 5366 | ||
5368 | handle = ext4_journal_start(inode, 3); | 5367 | handle = ext4_journal_start(inode, 3); |
@@ -5396,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5396 | goto err_out; | 5395 | goto err_out; |
5397 | } | 5396 | } |
5398 | } | 5397 | } |
5399 | /* ext4_truncate will clear the flag */ | ||
5400 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) | ||
5401 | ext4_truncate(inode); | ||
5402 | } | 5398 | } |
5403 | 5399 | ||
5404 | if ((attr->ia_valid & ATTR_SIZE) && | 5400 | if (attr->ia_valid & ATTR_SIZE) { |
5405 | attr->ia_size != i_size_read(inode)) | 5401 | if (attr->ia_size != i_size_read(inode)) { |
5406 | rc = vmtruncate(inode, attr->ia_size); | 5402 | truncate_setsize(inode, attr->ia_size); |
5403 | ext4_truncate(inode); | ||
5404 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
5405 | ext4_truncate(inode); | ||
5406 | } | ||
5407 | 5407 | ||
5408 | if (!rc) { | 5408 | if (!rc) { |
5409 | setattr_copy(inode, attr); | 5409 | setattr_copy(inode, attr); |
@@ -5460,13 +5460,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5460 | /* if nrblocks are contiguous */ | 5460 | /* if nrblocks are contiguous */ |
5461 | if (chunk) { | 5461 | if (chunk) { |
5462 | /* | 5462 | /* |
5463 | * With N contiguous data blocks, it need at most | 5463 | * With N contiguous data blocks, we need at most |
5464 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | 5464 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
5465 | * 2 dindirect blocks | 5465 | * 2 dindirect blocks, and 1 tindirect block |
5466 | * 1 tindirect block | ||
5467 | */ | 5466 | */ |
5468 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | 5467 | return DIV_ROUND_UP(nrblocks, |
5469 | return indirects + 3; | 5468 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
5470 | } | 5469 | } |
5471 | /* | 5470 | /* |
5472 | * if nrblocks are not contiguous, worse case, each block touch | 5471 | * if nrblocks are not contiguous, worse case, each block touch |
@@ -5540,7 +5539,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5540 | } | 5539 | } |
5541 | 5540 | ||
5542 | /* | 5541 | /* |
5543 | * Calulate the total number of credits to reserve to fit | 5542 | * Calculate the total number of credits to reserve to fit |
5544 | * the modification of a single pages into a single transaction, | 5543 | * the modification of a single pages into a single transaction, |
5545 | * which may include multiple chunks of block allocations. | 5544 | * which may include multiple chunks of block allocations. |
5546 | * | 5545 | * |
@@ -5862,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5862 | goto out_unlock; | 5861 | goto out_unlock; |
5863 | } | 5862 | } |
5864 | ret = 0; | 5863 | ret = 0; |
5865 | if (PageMappedToDisk(page)) | 5864 | |
5866 | goto out_unlock; | 5865 | lock_page(page); |
5866 | wait_on_page_writeback(page); | ||
5867 | if (PageMappedToDisk(page)) { | ||
5868 | up_read(&inode->i_alloc_sem); | ||
5869 | return VM_FAULT_LOCKED; | ||
5870 | } | ||
5867 | 5871 | ||
5868 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5872 | if (page->index == size >> PAGE_CACHE_SHIFT) |
5869 | len = size & ~PAGE_CACHE_MASK; | 5873 | len = size & ~PAGE_CACHE_MASK; |
5870 | else | 5874 | else |
5871 | len = PAGE_CACHE_SIZE; | 5875 | len = PAGE_CACHE_SIZE; |
5872 | 5876 | ||
5873 | lock_page(page); | ||
5874 | /* | 5877 | /* |
5875 | * return if we have all the buffers mapped. This avoid | 5878 | * return if we have all the buffers mapped. This avoid |
5876 | * the need to call write_begin/write_end which does a | 5879 | * the need to call write_begin/write_end which does a |
@@ -5880,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5880 | if (page_has_buffers(page)) { | 5883 | if (page_has_buffers(page)) { |
5881 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5884 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5882 | ext4_bh_unmapped)) { | 5885 | ext4_bh_unmapped)) { |
5883 | unlock_page(page); | 5886 | up_read(&inode->i_alloc_sem); |
5884 | goto out_unlock; | 5887 | return VM_FAULT_LOCKED; |
5885 | } | 5888 | } |
5886 | } | 5889 | } |
5887 | unlock_page(page); | 5890 | unlock_page(page); |
@@ -5901,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5901 | if (ret < 0) | 5904 | if (ret < 0) |
5902 | goto out_unlock; | 5905 | goto out_unlock; |
5903 | ret = 0; | 5906 | ret = 0; |
5907 | |||
5908 | /* | ||
5909 | * write_begin/end might have created a dirty page and someone | ||
5910 | * could wander in and start the IO. Make sure that hasn't | ||
5911 | * happened. | ||
5912 | */ | ||
5913 | lock_page(page); | ||
5914 | wait_on_page_writeback(page); | ||
5915 | up_read(&inode->i_alloc_sem); | ||
5916 | return VM_FAULT_LOCKED; | ||
5904 | out_unlock: | 5917 | out_unlock: |
5905 | if (ret) | 5918 | if (ret) |
5906 | ret = VM_FAULT_SIGBUS; | 5919 | ret = VM_FAULT_SIGBUS; |