aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c577
1 files changed, 295 insertions, 282 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f7f9e49914f..50d0e9c64584 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
173 BUG_ON(EXT4_JOURNAL(inode) == NULL); 173 BUG_ON(EXT4_JOURNAL(inode) == NULL);
174 jbd_debug(2, "restarting handle %p\n", handle); 174 jbd_debug(2, "restarting handle %p\n", handle);
175 up_write(&EXT4_I(inode)->i_data_sem); 175 up_write(&EXT4_I(inode)->i_data_sem);
176 ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); 176 ret = ext4_journal_restart(handle, nblocks);
177 down_write(&EXT4_I(inode)->i_data_sem); 177 down_write(&EXT4_I(inode)->i_data_sem);
178 ext4_discard_preallocations(inode); 178 ext4_discard_preallocations(inode);
179 179
@@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
639 while (target > 0) { 639 while (target > 0) {
640 count = target; 640 count = target;
641 /* allocating blocks for indirect blocks and direct blocks */ 641 /* allocating blocks for indirect blocks and direct blocks */
642 current_block = ext4_new_meta_blocks(handle, inode, 642 current_block = ext4_new_meta_blocks(handle, inode, goal,
643 goal, &count, err); 643 0, &count, err);
644 if (*err) 644 if (*err)
645 goto failed_out; 645 goto failed_out;
646 646
@@ -720,7 +720,7 @@ allocated:
720 return ret; 720 return ret;
721failed_out: 721failed_out:
722 for (i = 0; i < index; i++) 722 for (i = 0; i < index; i++)
723 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); 723 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
724 return ret; 724 return ret;
725} 725}
726 726
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
823 return err; 823 return err;
824failed: 824failed:
825 /* Allocation failed, free what we already allocated */ 825 /* Allocation failed, free what we already allocated */
826 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); 826 ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
827 for (i = 1; i <= n ; i++) { 827 for (i = 1; i <= n ; i++) {
828 /* 828 /*
829 * branch[i].bh is newly allocated, so there is no 829 * branch[i].bh is newly allocated, so there is no
830 * need to revoke the block, which is why we don't 830 * need to revoke the block, which is why we don't
831 * need to set EXT4_FREE_BLOCKS_METADATA. 831 * need to set EXT4_FREE_BLOCKS_METADATA.
832 */ 832 */
833 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 833 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
834 EXT4_FREE_BLOCKS_FORGET); 834 EXT4_FREE_BLOCKS_FORGET);
835 } 835 }
836 for (i = n+1; i < indirect_blks; i++) 836 for (i = n+1; i < indirect_blks; i++)
837 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); 837 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
838 838
839 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); 839 ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
840 840
841 return err; 841 return err;
842} 842}
@@ -924,7 +924,7 @@ err_out:
924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1, 924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
925 EXT4_FREE_BLOCKS_FORGET); 925 EXT4_FREE_BLOCKS_FORGET);
926 } 926 }
927 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), 927 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
928 blks, 0); 928 blks, 0);
929 929
930 return err; 930 return err;
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
973 int count = 0; 973 int count = 0;
974 ext4_fsblk_t first_block = 0; 974 ext4_fsblk_t first_block = 0;
975 975
976 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
976 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); 977 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
977 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 978 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
978 depth = ext4_block_to_path(inode, map->m_lblk, offsets, 979 depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@ cleanup:
1058 partial--; 1059 partial--;
1059 } 1060 }
1060out: 1061out:
1062 trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
1063 map->m_pblk, map->m_len, err);
1061 return err; 1064 return err;
1062} 1065}
1063 1066
@@ -1927,7 +1930,7 @@ repeat:
1927 * We do still charge estimated metadata to the sb though; 1930 * We do still charge estimated metadata to the sb though;
1928 * we cannot afford to run out of free blocks. 1931 * we cannot afford to run out of free blocks.
1929 */ 1932 */
1930 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1933 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
1931 dquot_release_reservation_block(inode, 1); 1934 dquot_release_reservation_block(inode, 1);
1932 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1935 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1933 yield(); 1936 yield();
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2060 if (nr_pages == 0) 2063 if (nr_pages == 0)
2061 break; 2064 break;
2062 for (i = 0; i < nr_pages; i++) { 2065 for (i = 0; i < nr_pages; i++) {
2063 int commit_write = 0, redirty_page = 0; 2066 int commit_write = 0, skip_page = 0;
2064 struct page *page = pvec.pages[i]; 2067 struct page *page = pvec.pages[i];
2065 2068
2066 index = page->index; 2069 index = page->index;
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2086 * If the page does not have buffers (for 2089 * If the page does not have buffers (for
2087 * whatever reason), try to create them using 2090 * whatever reason), try to create them using
2088 * __block_write_begin. If this fails, 2091 * __block_write_begin. If this fails,
2089 * redirty the page and move on. 2092 * skip the page and move on.
2090 */ 2093 */
2091 if (!page_has_buffers(page)) { 2094 if (!page_has_buffers(page)) {
2092 if (__block_write_begin(page, 0, len, 2095 if (__block_write_begin(page, 0, len,
2093 noalloc_get_block_write)) { 2096 noalloc_get_block_write)) {
2094 redirty_page: 2097 skip_page:
2095 redirty_page_for_writepage(mpd->wbc,
2096 page);
2097 unlock_page(page); 2098 unlock_page(page);
2098 continue; 2099 continue;
2099 } 2100 }
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2104 block_start = 0; 2105 block_start = 0;
2105 do { 2106 do {
2106 if (!bh) 2107 if (!bh)
2107 goto redirty_page; 2108 goto skip_page;
2108 if (map && (cur_logical >= map->m_lblk) && 2109 if (map && (cur_logical >= map->m_lblk) &&
2109 (cur_logical <= (map->m_lblk + 2110 (cur_logical <= (map->m_lblk +
2110 (map->m_len - 1)))) { 2111 (map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2120 clear_buffer_unwritten(bh); 2121 clear_buffer_unwritten(bh);
2121 } 2122 }
2122 2123
2123 /* redirty page if block allocation undone */ 2124 /* skip page if block allocation undone */
2124 if (buffer_delay(bh) || buffer_unwritten(bh)) 2125 if (buffer_delay(bh) || buffer_unwritten(bh))
2125 redirty_page = 1; 2126 skip_page = 1;
2126 bh = bh->b_this_page; 2127 bh = bh->b_this_page;
2127 block_start += bh->b_size; 2128 block_start += bh->b_size;
2128 cur_logical++; 2129 cur_logical++;
2129 pblock++; 2130 pblock++;
2130 } while (bh != page_bufs); 2131 } while (bh != page_bufs);
2131 2132
2132 if (redirty_page) 2133 if (skip_page)
2133 goto redirty_page; 2134 goto skip_page;
2134 2135
2135 if (commit_write) 2136 if (commit_write)
2136 /* mark the buffer_heads as dirty & uptodate */ 2137 /* mark the buffer_heads as dirty & uptodate */
2137 block_commit_write(page, 0, len); 2138 block_commit_write(page, 0, len);
2138 2139
2140 clear_page_dirty_for_io(page);
2139 /* 2141 /*
2140 * Delalloc doesn't support data journalling, 2142 * Delalloc doesn't support data journalling,
2141 * but eventually maybe we'll lift this 2143 * but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2165 return ret; 2167 return ret;
2166} 2168}
2167 2169
2168static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2170static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
2169 sector_t logical, long blk_cnt)
2170{ 2171{
2171 int nr_pages, i; 2172 int nr_pages, i;
2172 pgoff_t index, end; 2173 pgoff_t index, end;
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2174 struct inode *inode = mpd->inode; 2175 struct inode *inode = mpd->inode;
2175 struct address_space *mapping = inode->i_mapping; 2176 struct address_space *mapping = inode->i_mapping;
2176 2177
2177 index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2178 index = mpd->first_page;
2178 end = (logical + blk_cnt - 1) >> 2179 end = mpd->next_page - 1;
2179 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2180 while (index <= end) { 2180 while (index <= end) {
2181 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 2181 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
2182 if (nr_pages == 0) 2182 if (nr_pages == 0)
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2279 err = blks; 2279 err = blks;
2280 /* 2280 /*
2281 * If get block returns EAGAIN or ENOSPC and there 2281 * If get block returns EAGAIN or ENOSPC and there
2282 * appears to be free blocks we will call 2282 * appears to be free blocks we will just let
2283 * ext4_writepage() for all of the pages which will 2283 * mpage_da_submit_io() unlock all of the pages.
2284 * just redirty the pages.
2285 */ 2284 */
2286 if (err == -EAGAIN) 2285 if (err == -EAGAIN)
2287 goto submit_io; 2286 goto submit_io;
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2312 ext4_print_free_blocks(mpd->inode); 2311 ext4_print_free_blocks(mpd->inode);
2313 } 2312 }
2314 /* invalidate all the pages */ 2313 /* invalidate all the pages */
2315 ext4_da_block_invalidatepages(mpd, next, 2314 ext4_da_block_invalidatepages(mpd);
2316 mpd->b_size >> mpd->inode->i_blkbits); 2315
2316 /* Mark this page range as having been completed */
2317 mpd->io_done = 1;
2317 return; 2318 return;
2318 } 2319 }
2319 BUG_ON(blks == 0); 2320 BUG_ON(blks == 0);
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2438} 2439}
2439 2440
2440/* 2441/*
2441 * __mpage_da_writepage - finds extent of pages and blocks
2442 *
2443 * @page: page to consider
2444 * @wbc: not used, we just follow rules
2445 * @data: context
2446 *
2447 * The function finds extents of pages and scan them for all blocks.
2448 */
2449static int __mpage_da_writepage(struct page *page,
2450 struct writeback_control *wbc,
2451 struct mpage_da_data *mpd)
2452{
2453 struct inode *inode = mpd->inode;
2454 struct buffer_head *bh, *head;
2455 sector_t logical;
2456
2457 /*
2458 * Can we merge this page to current extent?
2459 */
2460 if (mpd->next_page != page->index) {
2461 /*
2462 * Nope, we can't. So, we map non-allocated blocks
2463 * and start IO on them
2464 */
2465 if (mpd->next_page != mpd->first_page) {
2466 mpage_da_map_and_submit(mpd);
2467 /*
2468 * skip rest of the page in the page_vec
2469 */
2470 redirty_page_for_writepage(wbc, page);
2471 unlock_page(page);
2472 return MPAGE_DA_EXTENT_TAIL;
2473 }
2474
2475 /*
2476 * Start next extent of pages ...
2477 */
2478 mpd->first_page = page->index;
2479
2480 /*
2481 * ... and blocks
2482 */
2483 mpd->b_size = 0;
2484 mpd->b_state = 0;
2485 mpd->b_blocknr = 0;
2486 }
2487
2488 mpd->next_page = page->index + 1;
2489 logical = (sector_t) page->index <<
2490 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2491
2492 if (!page_has_buffers(page)) {
2493 mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
2494 (1 << BH_Dirty) | (1 << BH_Uptodate));
2495 if (mpd->io_done)
2496 return MPAGE_DA_EXTENT_TAIL;
2497 } else {
2498 /*
2499 * Page with regular buffer heads, just add all dirty ones
2500 */
2501 head = page_buffers(page);
2502 bh = head;
2503 do {
2504 BUG_ON(buffer_locked(bh));
2505 /*
2506 * We need to try to allocate
2507 * unmapped blocks in the same page.
2508 * Otherwise we won't make progress
2509 * with the page in ext4_writepage
2510 */
2511 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2512 mpage_add_bh_to_extent(mpd, logical,
2513 bh->b_size,
2514 bh->b_state);
2515 if (mpd->io_done)
2516 return MPAGE_DA_EXTENT_TAIL;
2517 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2518 /*
2519 * mapped dirty buffer. We need to update
2520 * the b_state because we look at
2521 * b_state in mpage_da_map_blocks. We don't
2522 * update b_size because if we find an
2523 * unmapped buffer_head later we need to
2524 * use the b_state flag of that buffer_head.
2525 */
2526 if (mpd->b_size == 0)
2527 mpd->b_state = bh->b_state & BH_FLAGS;
2528 }
2529 logical++;
2530 } while ((bh = bh->b_this_page) != head);
2531 }
2532
2533 return 0;
2534}
2535
2536/*
2537 * This is a special get_blocks_t callback which is used by 2442 * This is a special get_blocks_t callback which is used by
2538 * ext4_da_write_begin(). It will either return mapped block or 2443 * ext4_da_write_begin(). It will either return mapped block or
2539 * reserve space for a single block. 2444 * reserve space for a single block.
@@ -2684,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2684 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2589 * because we should have holes filled from ext4_page_mkwrite(). We even don't
2685 * need to file the inode to the transaction's list in ordered mode because if 2590 * need to file the inode to the transaction's list in ordered mode because if
2686 * we are writing back data added by write(), the inode is already there and if 2591 * we are writing back data added by write(), the inode is already there and if
2687 * we are writing back data modified via mmap(), noone guarantees in which 2592 * we are writing back data modified via mmap(), no one guarantees in which
2688 * transaction the data will hit the disk. In case we are journaling data, we 2593 * transaction the data will hit the disk. In case we are journaling data, we
2689 * cannot start transaction directly because transaction start ranks above page 2594 * cannot start transaction directly because transaction start ranks above page
2690 * lock so we have to do some magic. 2595 * lock so we have to do some magic.
@@ -2786,7 +2691,7 @@ static int ext4_writepage(struct page *page,
2786 2691
2787/* 2692/*
2788 * This is called via ext4_da_writepages() to 2693 * This is called via ext4_da_writepages() to
2789 * calulate the total number of credits to reserve to fit 2694 * calculate the total number of credits to reserve to fit
2790 * a single extent allocation into a single transaction, 2695 * a single extent allocation into a single transaction,
2791 * ext4_da_writpeages() will loop calling this before 2696 * ext4_da_writpeages() will loop calling this before
2792 * the block allocation. 2697 * the block allocation.
@@ -2811,27 +2716,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2811 2716
2812/* 2717/*
2813 * write_cache_pages_da - walk the list of dirty pages of the given 2718 * write_cache_pages_da - walk the list of dirty pages of the given
2814 * address space and call the callback function (which usually writes 2719 * address space and accumulate pages that need writing, and call
2815 * the pages). 2720 * mpage_da_map_and_submit to map a single contiguous memory region
2816 * 2721 * and then write them.
2817 * This is a forked version of write_cache_pages(). Differences:
2818 * Range cyclic is ignored.
2819 * no_nrwrite_index_update is always presumed true
2820 */ 2722 */
2821static int write_cache_pages_da(struct address_space *mapping, 2723static int write_cache_pages_da(struct address_space *mapping,
2822 struct writeback_control *wbc, 2724 struct writeback_control *wbc,
2823 struct mpage_da_data *mpd, 2725 struct mpage_da_data *mpd,
2824 pgoff_t *done_index) 2726 pgoff_t *done_index)
2825{ 2727{
2826 int ret = 0; 2728 struct buffer_head *bh, *head;
2827 int done = 0; 2729 struct inode *inode = mapping->host;
2828 struct pagevec pvec; 2730 struct pagevec pvec;
2829 unsigned nr_pages; 2731 unsigned int nr_pages;
2830 pgoff_t index; 2732 sector_t logical;
2831 pgoff_t end; /* Inclusive */ 2733 pgoff_t index, end;
2832 long nr_to_write = wbc->nr_to_write; 2734 long nr_to_write = wbc->nr_to_write;
2833 int tag; 2735 int i, tag, ret = 0;
2834 2736
2737 memset(mpd, 0, sizeof(struct mpage_da_data));
2738 mpd->wbc = wbc;
2739 mpd->inode = inode;
2835 pagevec_init(&pvec, 0); 2740 pagevec_init(&pvec, 0);
2836 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2741 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2837 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2742 end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2747,11 @@ static int write_cache_pages_da(struct address_space *mapping,
2842 tag = PAGECACHE_TAG_DIRTY; 2747 tag = PAGECACHE_TAG_DIRTY;
2843 2748
2844 *done_index = index; 2749 *done_index = index;
2845 while (!done && (index <= end)) { 2750 while (index <= end) {
2846 int i;
2847
2848 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, 2751 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2849 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2752 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2850 if (nr_pages == 0) 2753 if (nr_pages == 0)
2851 break; 2754 return 0;
2852 2755
2853 for (i = 0; i < nr_pages; i++) { 2756 for (i = 0; i < nr_pages; i++) {
2854 struct page *page = pvec.pages[i]; 2757 struct page *page = pvec.pages[i];
@@ -2860,60 +2763,98 @@ static int write_cache_pages_da(struct address_space *mapping,
2860 * mapping. However, page->index will not change 2763 * mapping. However, page->index will not change
2861 * because we have a reference on the page. 2764 * because we have a reference on the page.
2862 */ 2765 */
2863 if (page->index > end) { 2766 if (page->index > end)
2864 done = 1; 2767 goto out;
2865 break;
2866 }
2867 2768
2868 *done_index = page->index + 1; 2769 *done_index = page->index + 1;
2869 2770
2771 /*
2772 * If we can't merge this page, and we have
2773 * accumulated an contiguous region, write it
2774 */
2775 if ((mpd->next_page != page->index) &&
2776 (mpd->next_page != mpd->first_page)) {
2777 mpage_da_map_and_submit(mpd);
2778 goto ret_extent_tail;
2779 }
2780
2870 lock_page(page); 2781 lock_page(page);
2871 2782
2872 /* 2783 /*
2873 * Page truncated or invalidated. We can freely skip it 2784 * If the page is no longer dirty, or its
2874 * then, even for data integrity operations: the page 2785 * mapping no longer corresponds to inode we
2875 * has disappeared concurrently, so there could be no 2786 * are writing (which means it has been
2876 * real expectation of this data interity operation 2787 * truncated or invalidated), or the page is
2877 * even if there is now a new, dirty page at the same 2788 * already under writeback and we are not
2878 * pagecache address. 2789 * doing a data integrity writeback, skip the page
2879 */ 2790 */
2880 if (unlikely(page->mapping != mapping)) { 2791 if (!PageDirty(page) ||
2881continue_unlock: 2792 (PageWriteback(page) &&
2793 (wbc->sync_mode == WB_SYNC_NONE)) ||
2794 unlikely(page->mapping != mapping)) {
2882 unlock_page(page); 2795 unlock_page(page);
2883 continue; 2796 continue;
2884 } 2797 }
2885 2798
2886 if (!PageDirty(page)) { 2799 wait_on_page_writeback(page);
2887 /* someone wrote it for us */
2888 goto continue_unlock;
2889 }
2890
2891 if (PageWriteback(page)) {
2892 if (wbc->sync_mode != WB_SYNC_NONE)
2893 wait_on_page_writeback(page);
2894 else
2895 goto continue_unlock;
2896 }
2897
2898 BUG_ON(PageWriteback(page)); 2800 BUG_ON(PageWriteback(page));
2899 if (!clear_page_dirty_for_io(page))
2900 goto continue_unlock;
2901 2801
2902 ret = __mpage_da_writepage(page, wbc, mpd); 2802 if (mpd->next_page != page->index)
2903 if (unlikely(ret)) { 2803 mpd->first_page = page->index;
2904 if (ret == AOP_WRITEPAGE_ACTIVATE) { 2804 mpd->next_page = page->index + 1;
2905 unlock_page(page); 2805 logical = (sector_t) page->index <<
2906 ret = 0; 2806 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2907 } else { 2807
2908 done = 1; 2808 if (!page_has_buffers(page)) {
2909 break; 2809 mpage_add_bh_to_extent(mpd, logical,
2910 } 2810 PAGE_CACHE_SIZE,
2811 (1 << BH_Dirty) | (1 << BH_Uptodate));
2812 if (mpd->io_done)
2813 goto ret_extent_tail;
2814 } else {
2815 /*
2816 * Page with regular buffer heads,
2817 * just add all dirty ones
2818 */
2819 head = page_buffers(page);
2820 bh = head;
2821 do {
2822 BUG_ON(buffer_locked(bh));
2823 /*
2824 * We need to try to allocate
2825 * unmapped blocks in the same page.
2826 * Otherwise we won't make progress
2827 * with the page in ext4_writepage
2828 */
2829 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2830 mpage_add_bh_to_extent(mpd, logical,
2831 bh->b_size,
2832 bh->b_state);
2833 if (mpd->io_done)
2834 goto ret_extent_tail;
2835 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2836 /*
2837 * mapped dirty buffer. We need
2838 * to update the b_state
2839 * because we look at b_state
2840 * in mpage_da_map_blocks. We
2841 * don't update b_size because
2842 * if we find an unmapped
2843 * buffer_head later we need to
2844 * use the b_state flag of that
2845 * buffer_head.
2846 */
2847 if (mpd->b_size == 0)
2848 mpd->b_state = bh->b_state & BH_FLAGS;
2849 }
2850 logical++;
2851 } while ((bh = bh->b_this_page) != head);
2911 } 2852 }
2912 2853
2913 if (nr_to_write > 0) { 2854 if (nr_to_write > 0) {
2914 nr_to_write--; 2855 nr_to_write--;
2915 if (nr_to_write == 0 && 2856 if (nr_to_write == 0 &&
2916 wbc->sync_mode == WB_SYNC_NONE) { 2857 wbc->sync_mode == WB_SYNC_NONE)
2917 /* 2858 /*
2918 * We stop writing back only if we are 2859 * We stop writing back only if we are
2919 * not doing integrity sync. In case of 2860 * not doing integrity sync. In case of
@@ -2924,14 +2865,18 @@ continue_unlock:
2924 * pages, but have not synced all of the 2865 * pages, but have not synced all of the
2925 * old dirty pages. 2866 * old dirty pages.
2926 */ 2867 */
2927 done = 1; 2868 goto out;
2928 break;
2929 }
2930 } 2869 }
2931 } 2870 }
2932 pagevec_release(&pvec); 2871 pagevec_release(&pvec);
2933 cond_resched(); 2872 cond_resched();
2934 } 2873 }
2874 return 0;
2875ret_extent_tail:
2876 ret = MPAGE_DA_EXTENT_TAIL;
2877out:
2878 pagevec_release(&pvec);
2879 cond_resched();
2935 return ret; 2880 return ret;
2936} 2881}
2937 2882
@@ -2945,7 +2890,6 @@ static int ext4_da_writepages(struct address_space *mapping,
2945 struct mpage_da_data mpd; 2890 struct mpage_da_data mpd;
2946 struct inode *inode = mapping->host; 2891 struct inode *inode = mapping->host;
2947 int pages_written = 0; 2892 int pages_written = 0;
2948 long pages_skipped;
2949 unsigned int max_pages; 2893 unsigned int max_pages;
2950 int range_cyclic, cycled = 1, io_done = 0; 2894 int range_cyclic, cycled = 1, io_done = 0;
2951 int needed_blocks, ret = 0; 2895 int needed_blocks, ret = 0;
@@ -3028,11 +2972,6 @@ static int ext4_da_writepages(struct address_space *mapping,
3028 wbc->nr_to_write = desired_nr_to_write; 2972 wbc->nr_to_write = desired_nr_to_write;
3029 } 2973 }
3030 2974
3031 mpd.wbc = wbc;
3032 mpd.inode = mapping->host;
3033
3034 pages_skipped = wbc->pages_skipped;
3035
3036retry: 2975retry:
3037 if (wbc->sync_mode == WB_SYNC_ALL) 2976 if (wbc->sync_mode == WB_SYNC_ALL)
3038 tag_pages_for_writeback(mapping, index, end); 2977 tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +2998,10 @@ retry:
3059 } 2998 }
3060 2999
3061 /* 3000 /*
3062 * Now call __mpage_da_writepage to find the next 3001 * Now call write_cache_pages_da() to find the next
3063 * contiguous region of logical blocks that need 3002 * contiguous region of logical blocks that need
3064 * blocks to be allocated by ext4. We don't actually 3003 * blocks to be allocated by ext4 and submit them.
3065 * submit the blocks for I/O here, even though
3066 * write_cache_pages thinks it will, and will set the
3067 * pages as clean for write before calling
3068 * __mpage_da_writepage().
3069 */ 3004 */
3070 mpd.b_size = 0;
3071 mpd.b_state = 0;
3072 mpd.b_blocknr = 0;
3073 mpd.first_page = 0;
3074 mpd.next_page = 0;
3075 mpd.io_done = 0;
3076 mpd.pages_written = 0;
3077 mpd.retval = 0;
3078 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); 3005 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
3079 /* 3006 /*
3080 * If we have a contiguous extent of pages and we 3007 * If we have a contiguous extent of pages and we
@@ -3096,7 +3023,6 @@ retry:
3096 * and try again 3023 * and try again
3097 */ 3024 */
3098 jbd2_journal_force_commit_nested(sbi->s_journal); 3025 jbd2_journal_force_commit_nested(sbi->s_journal);
3099 wbc->pages_skipped = pages_skipped;
3100 ret = 0; 3026 ret = 0;
3101 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 3027 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
3102 /* 3028 /*
@@ -3104,7 +3030,6 @@ retry:
3104 * rest of the pages 3030 * rest of the pages
3105 */ 3031 */
3106 pages_written += mpd.pages_written; 3032 pages_written += mpd.pages_written;
3107 wbc->pages_skipped = pages_skipped;
3108 ret = 0; 3033 ret = 0;
3109 io_done = 1; 3034 io_done = 1;
3110 } else if (wbc->nr_to_write) 3035 } else if (wbc->nr_to_write)
@@ -3122,11 +3047,6 @@ retry:
3122 wbc->range_end = mapping->writeback_index - 1; 3047 wbc->range_end = mapping->writeback_index - 1;
3123 goto retry; 3048 goto retry;
3124 } 3049 }
3125 if (pages_skipped != wbc->pages_skipped)
3126 ext4_msg(inode->i_sb, KERN_CRIT,
3127 "This should not happen leaving %s "
3128 "with nr_to_write = %ld ret = %d",
3129 __func__, wbc->nr_to_write, ret);
3130 3050
3131 /* Update index */ 3051 /* Update index */
3132 wbc->range_cyclic = range_cyclic; 3052 wbc->range_cyclic = range_cyclic;
@@ -3383,7 +3303,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3383 * the pages by calling redirty_page_for_writepage() but that 3303 * the pages by calling redirty_page_for_writepage() but that
3384 * would be ugly in the extreme. So instead we would need to 3304 * would be ugly in the extreme. So instead we would need to
3385 * replicate parts of the code in the above functions, 3305 * replicate parts of the code in the above functions,
3386 * simplifying them becuase we wouldn't actually intend to 3306 * simplifying them because we wouldn't actually intend to
3387 * write out the pages, but rather only collect contiguous 3307 * write out the pages, but rather only collect contiguous
3388 * logical block extents, call the multi-block allocator, and 3308 * logical block extents, call the multi-block allocator, and
3389 * then update the buffer heads with the block allocations. 3309 * then update the buffer heads with the block allocations.
@@ -3460,6 +3380,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3460 3380
3461static int ext4_readpage(struct file *file, struct page *page) 3381static int ext4_readpage(struct file *file, struct page *page)
3462{ 3382{
3383 trace_ext4_readpage(page);
3463 return mpage_readpage(page, ext4_get_block); 3384 return mpage_readpage(page, ext4_get_block);
3464} 3385}
3465 3386
@@ -3494,6 +3415,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
3494{ 3415{
3495 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3416 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3496 3417
3418 trace_ext4_invalidatepage(page, offset);
3419
3497 /* 3420 /*
3498 * free any io_end structure allocated for buffers to be discarded 3421 * free any io_end structure allocated for buffers to be discarded
3499 */ 3422 */
@@ -3515,6 +3438,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3515{ 3438{
3516 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3439 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3517 3440
3441 trace_ext4_releasepage(page);
3442
3518 WARN_ON(PageChecked(page)); 3443 WARN_ON(PageChecked(page));
3519 if (!page_has_buffers(page)) 3444 if (!page_has_buffers(page))
3520 return 0; 3445 return 0;
@@ -3586,7 +3511,7 @@ retry:
3586 loff_t end = offset + iov_length(iov, nr_segs); 3511 loff_t end = offset + iov_length(iov, nr_segs);
3587 3512
3588 if (end > isize) 3513 if (end > isize)
3589 vmtruncate(inode, isize); 3514 ext4_truncate_failed_write(inode);
3590 } 3515 }
3591 } 3516 }
3592 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3517 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3768,7 +3693,7 @@ retry:
3768 * 3693 *
3769 * The unwrritten extents will be converted to written when DIO is completed. 3694 * The unwrritten extents will be converted to written when DIO is completed.
3770 * For async direct IO, since the IO may still pending when return, we 3695 * For async direct IO, since the IO may still pending when return, we
3771 * set up an end_io call back function, which will do the convertion 3696 * set up an end_io call back function, which will do the conversion
3772 * when async direct IO completed. 3697 * when async direct IO completed.
3773 * 3698 *
3774 * If the O_DIRECT write will extend the file then add this inode to the 3699 * If the O_DIRECT write will extend the file then add this inode to the
@@ -3791,7 +3716,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3791 * We could direct write to holes and fallocate. 3716 * We could direct write to holes and fallocate.
3792 * 3717 *
3793 * Allocated blocks to fill the hole are marked as uninitialized 3718 * Allocated blocks to fill the hole are marked as uninitialized
3794 * to prevent paralel buffered read to expose the stale data 3719 * to prevent parallel buffered read to expose the stale data
3795 * before DIO complete the data IO. 3720 * before DIO complete the data IO.
3796 * 3721 *
3797 * As to previously fallocated extents, ext4 get_block 3722 * As to previously fallocated extents, ext4 get_block
@@ -3852,7 +3777,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3852 int err; 3777 int err;
3853 /* 3778 /*
3854 * for non AIO case, since the IO is already 3779 * for non AIO case, since the IO is already
3855 * completed, we could do the convertion right here 3780 * completed, we could do the conversion right here
3856 */ 3781 */
3857 err = ext4_convert_unwritten_extents(inode, 3782 err = ext4_convert_unwritten_extents(inode,
3858 offset, ret); 3783 offset, ret);
@@ -3873,11 +3798,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3873{ 3798{
3874 struct file *file = iocb->ki_filp; 3799 struct file *file = iocb->ki_filp;
3875 struct inode *inode = file->f_mapping->host; 3800 struct inode *inode = file->f_mapping->host;
3801 ssize_t ret;
3876 3802
3803 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3877 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3804 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3878 return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3805 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
3879 3806 else
3880 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); 3807 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3808 trace_ext4_direct_IO_exit(inode, offset,
3809 iov_length(iov, nr_segs), rw, ret);
3810 return ret;
3881} 3811}
3882 3812
3883/* 3813/*
@@ -3903,7 +3833,6 @@ static const struct address_space_operations ext4_ordered_aops = {
3903 .readpage = ext4_readpage, 3833 .readpage = ext4_readpage,
3904 .readpages = ext4_readpages, 3834 .readpages = ext4_readpages,
3905 .writepage = ext4_writepage, 3835 .writepage = ext4_writepage,
3906 .sync_page = block_sync_page,
3907 .write_begin = ext4_write_begin, 3836 .write_begin = ext4_write_begin,
3908 .write_end = ext4_ordered_write_end, 3837 .write_end = ext4_ordered_write_end,
3909 .bmap = ext4_bmap, 3838 .bmap = ext4_bmap,
@@ -3919,7 +3848,6 @@ static const struct address_space_operations ext4_writeback_aops = {
3919 .readpage = ext4_readpage, 3848 .readpage = ext4_readpage,
3920 .readpages = ext4_readpages, 3849 .readpages = ext4_readpages,
3921 .writepage = ext4_writepage, 3850 .writepage = ext4_writepage,
3922 .sync_page = block_sync_page,
3923 .write_begin = ext4_write_begin, 3851 .write_begin = ext4_write_begin,
3924 .write_end = ext4_writeback_write_end, 3852 .write_end = ext4_writeback_write_end,
3925 .bmap = ext4_bmap, 3853 .bmap = ext4_bmap,
@@ -3935,7 +3863,6 @@ static const struct address_space_operations ext4_journalled_aops = {
3935 .readpage = ext4_readpage, 3863 .readpage = ext4_readpage,
3936 .readpages = ext4_readpages, 3864 .readpages = ext4_readpages,
3937 .writepage = ext4_writepage, 3865 .writepage = ext4_writepage,
3938 .sync_page = block_sync_page,
3939 .write_begin = ext4_write_begin, 3866 .write_begin = ext4_write_begin,
3940 .write_end = ext4_journalled_write_end, 3867 .write_end = ext4_journalled_write_end,
3941 .set_page_dirty = ext4_journalled_set_page_dirty, 3868 .set_page_dirty = ext4_journalled_set_page_dirty,
@@ -3951,7 +3878,6 @@ static const struct address_space_operations ext4_da_aops = {
3951 .readpages = ext4_readpages, 3878 .readpages = ext4_readpages,
3952 .writepage = ext4_writepage, 3879 .writepage = ext4_writepage,
3953 .writepages = ext4_da_writepages, 3880 .writepages = ext4_da_writepages,
3954 .sync_page = block_sync_page,
3955 .write_begin = ext4_da_write_begin, 3881 .write_begin = ext4_da_write_begin,
3956 .write_end = ext4_da_write_end, 3882 .write_end = ext4_da_write_end,
3957 .bmap = ext4_bmap, 3883 .bmap = ext4_bmap,
@@ -3988,9 +3914,30 @@ void ext4_set_aops(struct inode *inode)
3988int ext4_block_truncate_page(handle_t *handle, 3914int ext4_block_truncate_page(handle_t *handle,
3989 struct address_space *mapping, loff_t from) 3915 struct address_space *mapping, loff_t from)
3990{ 3916{
3917 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3918 unsigned length;
3919 unsigned blocksize;
3920 struct inode *inode = mapping->host;
3921
3922 blocksize = inode->i_sb->s_blocksize;
3923 length = blocksize - (offset & (blocksize - 1));
3924
3925 return ext4_block_zero_page_range(handle, mapping, from, length);
3926}
3927
3928/*
3929 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3930 * starting from file offset 'from'. The range to be zero'd must
3931 * be contained with in one block. If the specified range exceeds
3932 * the end of the block it will be shortened to end of the block
3933 * that cooresponds to 'from'
3934 */
3935int ext4_block_zero_page_range(handle_t *handle,
3936 struct address_space *mapping, loff_t from, loff_t length)
3937{
3991 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3938 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3992 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3939 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3993 unsigned blocksize, length, pos; 3940 unsigned blocksize, max, pos;
3994 ext4_lblk_t iblock; 3941 ext4_lblk_t iblock;
3995 struct inode *inode = mapping->host; 3942 struct inode *inode = mapping->host;
3996 struct buffer_head *bh; 3943 struct buffer_head *bh;
@@ -4003,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle,
4003 return -EINVAL; 3950 return -EINVAL;
4004 3951
4005 blocksize = inode->i_sb->s_blocksize; 3952 blocksize = inode->i_sb->s_blocksize;
4006 length = blocksize - (offset & (blocksize - 1)); 3953 max = blocksize - (offset & (blocksize - 1));
3954
3955 /*
3956 * correct length if it does not fall between
3957 * 'from' and the end of the block
3958 */
3959 if (length > max || length < 0)
3960 length = max;
3961
4007 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3962 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
4008 3963
4009 if (!page_has_buffers(page)) 3964 if (!page_has_buffers(page))
@@ -4098,7 +4053,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
4098 * 4053 *
4099 * When we do truncate() we may have to clean the ends of several 4054 * When we do truncate() we may have to clean the ends of several
4100 * indirect blocks but leave the blocks themselves alive. Block is 4055 * indirect blocks but leave the blocks themselves alive. Block is
4101 * partially truncated if some data below the new i_size is refered 4056 * partially truncated if some data below the new i_size is referred
4102 * from it (and it is on the path to the first completely truncated 4057 * from it (and it is on the path to the first completely truncated
4103 * data block, indeed). We have to free the top of that path along 4058 * data block, indeed). We have to free the top of that path along
4104 * with everything to the right of the path. Since no allocation 4059 * with everything to the right of the path. Since no allocation
@@ -4177,6 +4132,9 @@ no_top:
4177 * 4132 *
4178 * We release `count' blocks on disk, but (last - first) may be greater 4133 * We release `count' blocks on disk, but (last - first) may be greater
4179 * than `count' because there can be holes in there. 4134 * than `count' because there can be holes in there.
4135 *
4136 * Return 0 on success, 1 on invalid block range
4137 * and < 0 on fatal error.
4180 */ 4138 */
4181static int ext4_clear_blocks(handle_t *handle, struct inode *inode, 4139static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4182 struct buffer_head *bh, 4140 struct buffer_head *bh,
@@ -4203,33 +4161,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4203 if (bh) { 4161 if (bh) {
4204 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4162 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4205 err = ext4_handle_dirty_metadata(handle, inode, bh); 4163 err = ext4_handle_dirty_metadata(handle, inode, bh);
4206 if (unlikely(err)) { 4164 if (unlikely(err))
4207 ext4_std_error(inode->i_sb, err); 4165 goto out_err;
4208 return 1;
4209 }
4210 } 4166 }
4211 err = ext4_mark_inode_dirty(handle, inode); 4167 err = ext4_mark_inode_dirty(handle, inode);
4212 if (unlikely(err)) { 4168 if (unlikely(err))
4213 ext4_std_error(inode->i_sb, err); 4169 goto out_err;
4214 return 1;
4215 }
4216 err = ext4_truncate_restart_trans(handle, inode, 4170 err = ext4_truncate_restart_trans(handle, inode,
4217 blocks_for_truncate(inode)); 4171 blocks_for_truncate(inode));
4218 if (unlikely(err)) { 4172 if (unlikely(err))
4219 ext4_std_error(inode->i_sb, err); 4173 goto out_err;
4220 return 1;
4221 }
4222 if (bh) { 4174 if (bh) {
4223 BUFFER_TRACE(bh, "retaking write access"); 4175 BUFFER_TRACE(bh, "retaking write access");
4224 ext4_journal_get_write_access(handle, bh); 4176 err = ext4_journal_get_write_access(handle, bh);
4177 if (unlikely(err))
4178 goto out_err;
4225 } 4179 }
4226 } 4180 }
4227 4181
4228 for (p = first; p < last; p++) 4182 for (p = first; p < last; p++)
4229 *p = 0; 4183 *p = 0;
4230 4184
4231 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); 4185 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
4232 return 0; 4186 return 0;
4187out_err:
4188 ext4_std_error(inode->i_sb, err);
4189 return err;
4233} 4190}
4234 4191
4235/** 4192/**
@@ -4240,7 +4197,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4240 * @first: array of block numbers 4197 * @first: array of block numbers
4241 * @last: points immediately past the end of array 4198 * @last: points immediately past the end of array
4242 * 4199 *
4243 * We are freeing all blocks refered from that array (numbers are stored as 4200 * We are freeing all blocks referred from that array (numbers are stored as
4244 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 4201 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
4245 * 4202 *
4246 * We accumulate contiguous runs of blocks to free. Conveniently, if these 4203 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -4263,7 +4220,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4263 ext4_fsblk_t nr; /* Current block # */ 4220 ext4_fsblk_t nr; /* Current block # */
4264 __le32 *p; /* Pointer into inode/ind 4221 __le32 *p; /* Pointer into inode/ind
4265 for current block */ 4222 for current block */
4266 int err; 4223 int err = 0;
4267 4224
4268 if (this_bh) { /* For indirect block */ 4225 if (this_bh) { /* For indirect block */
4269 BUFFER_TRACE(this_bh, "get_write_access"); 4226 BUFFER_TRACE(this_bh, "get_write_access");
@@ -4285,9 +4242,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4285 } else if (nr == block_to_free + count) { 4242 } else if (nr == block_to_free + count) {
4286 count++; 4243 count++;
4287 } else { 4244 } else {
4288 if (ext4_clear_blocks(handle, inode, this_bh, 4245 err = ext4_clear_blocks(handle, inode, this_bh,
4289 block_to_free, count, 4246 block_to_free, count,
4290 block_to_free_p, p)) 4247 block_to_free_p, p);
4248 if (err)
4291 break; 4249 break;
4292 block_to_free = nr; 4250 block_to_free = nr;
4293 block_to_free_p = p; 4251 block_to_free_p = p;
@@ -4296,9 +4254,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4296 } 4254 }
4297 } 4255 }
4298 4256
4299 if (count > 0) 4257 if (!err && count > 0)
4300 ext4_clear_blocks(handle, inode, this_bh, block_to_free, 4258 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
4301 count, block_to_free_p, p); 4259 count, block_to_free_p, p);
4260 if (err < 0)
4261 /* fatal error */
4262 return;
4302 4263
4303 if (this_bh) { 4264 if (this_bh) {
4304 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); 4265 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4328,7 +4289,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4328 * @last: pointer immediately past the end of array 4289 * @last: pointer immediately past the end of array
4329 * @depth: depth of the branches to free 4290 * @depth: depth of the branches to free
4330 * 4291 *
4331 * We are freeing all blocks refered from these branches (numbers are 4292 * We are freeing all blocks referred from these branches (numbers are
4332 * stored as little-endian 32-bit) and updating @inode->i_blocks 4293 * stored as little-endian 32-bit) and updating @inode->i_blocks
4333 * appropriately. 4294 * appropriately.
4334 */ 4295 */
@@ -4416,7 +4377,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4416 * transaction where the data blocks are 4377 * transaction where the data blocks are
4417 * actually freed. 4378 * actually freed.
4418 */ 4379 */
4419 ext4_free_blocks(handle, inode, 0, nr, 1, 4380 ext4_free_blocks(handle, inode, NULL, nr, 1,
4420 EXT4_FREE_BLOCKS_METADATA| 4381 EXT4_FREE_BLOCKS_METADATA|
4421 EXT4_FREE_BLOCKS_FORGET); 4382 EXT4_FREE_BLOCKS_FORGET);
4422 4383
@@ -4446,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4446 4407
4447int ext4_can_truncate(struct inode *inode) 4408int ext4_can_truncate(struct inode *inode)
4448{ 4409{
4449 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4450 return 0;
4451 if (S_ISREG(inode->i_mode)) 4410 if (S_ISREG(inode->i_mode))
4452 return 1; 4411 return 1;
4453 if (S_ISDIR(inode->i_mode)) 4412 if (S_ISDIR(inode->i_mode))
@@ -4458,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode)
4458} 4417}
4459 4418
4460/* 4419/*
4420 * ext4_punch_hole: punches a hole in a file by releaseing the blocks
4421 * associated with the given offset and length
4422 *
4423 * @inode: File inode
4424 * @offset: The offset where the hole will begin
4425 * @len: The length of the hole
4426 *
4427 * Returns: 0 on sucess or negative on failure
4428 */
4429
4430int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
4431{
4432 struct inode *inode = file->f_path.dentry->d_inode;
4433 if (!S_ISREG(inode->i_mode))
4434 return -ENOTSUPP;
4435
4436 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4437 /* TODO: Add support for non extent hole punching */
4438 return -ENOTSUPP;
4439 }
4440
4441 return ext4_ext_punch_hole(file, offset, length);
4442}
4443
4444/*
4461 * ext4_truncate() 4445 * ext4_truncate()
4462 * 4446 *
4463 * We block out ext4_get_block() block instantiations across the entire 4447 * We block out ext4_get_block() block instantiations across the entire
@@ -4496,10 +4480,12 @@ void ext4_truncate(struct inode *inode)
4496 Indirect chain[4]; 4480 Indirect chain[4];
4497 Indirect *partial; 4481 Indirect *partial;
4498 __le32 nr = 0; 4482 __le32 nr = 0;
4499 int n; 4483 int n = 0;
4500 ext4_lblk_t last_block; 4484 ext4_lblk_t last_block, max_block;
4501 unsigned blocksize = inode->i_sb->s_blocksize; 4485 unsigned blocksize = inode->i_sb->s_blocksize;
4502 4486
4487 trace_ext4_truncate_enter(inode);
4488
4503 if (!ext4_can_truncate(inode)) 4489 if (!ext4_can_truncate(inode))
4504 return; 4490 return;
4505 4491
@@ -4510,6 +4496,7 @@ void ext4_truncate(struct inode *inode)
4510 4496
4511 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4497 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4512 ext4_ext_truncate(inode); 4498 ext4_ext_truncate(inode);
4499 trace_ext4_truncate_exit(inode);
4513 return; 4500 return;
4514 } 4501 }
4515 4502
@@ -4519,14 +4506,18 @@ void ext4_truncate(struct inode *inode)
4519 4506
4520 last_block = (inode->i_size + blocksize-1) 4507 last_block = (inode->i_size + blocksize-1)
4521 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4508 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4509 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
4510 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4522 4511
4523 if (inode->i_size & (blocksize - 1)) 4512 if (inode->i_size & (blocksize - 1))
4524 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 4513 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
4525 goto out_stop; 4514 goto out_stop;
4526 4515
4527 n = ext4_block_to_path(inode, last_block, offsets, NULL); 4516 if (last_block != max_block) {
4528 if (n == 0) 4517 n = ext4_block_to_path(inode, last_block, offsets, NULL);
4529 goto out_stop; /* error */ 4518 if (n == 0)
4519 goto out_stop; /* error */
4520 }
4530 4521
4531 /* 4522 /*
4532 * OK. This truncate is going to happen. We add the inode to the 4523 * OK. This truncate is going to happen. We add the inode to the
@@ -4557,7 +4548,13 @@ void ext4_truncate(struct inode *inode)
4557 */ 4548 */
4558 ei->i_disksize = inode->i_size; 4549 ei->i_disksize = inode->i_size;
4559 4550
4560 if (n == 1) { /* direct blocks */ 4551 if (last_block == max_block) {
4552 /*
4553 * It is unnecessary to free any data blocks if last_block is
4554 * equal to the indirect block limit.
4555 */
4556 goto out_unlock;
4557 } else if (n == 1) { /* direct blocks */
4561 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 4558 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
4562 i_data + EXT4_NDIR_BLOCKS); 4559 i_data + EXT4_NDIR_BLOCKS);
4563 goto do_indirects; 4560 goto do_indirects;
@@ -4617,6 +4614,7 @@ do_indirects:
4617 ; 4614 ;
4618 } 4615 }
4619 4616
4617out_unlock:
4620 up_write(&ei->i_data_sem); 4618 up_write(&ei->i_data_sem);
4621 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4619 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4622 ext4_mark_inode_dirty(handle, inode); 4620 ext4_mark_inode_dirty(handle, inode);
@@ -4639,6 +4637,7 @@ out_stop:
4639 ext4_orphan_del(handle, inode); 4637 ext4_orphan_del(handle, inode);
4640 4638
4641 ext4_journal_stop(handle); 4639 ext4_journal_stop(handle);
4640 trace_ext4_truncate_exit(inode);
4642} 4641}
4643 4642
4644/* 4643/*
@@ -4668,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
4668 /* 4667 /*
4669 * Figure out the offset within the block group inode table 4668 * Figure out the offset within the block group inode table
4670 */ 4669 */
4671 inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); 4670 inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
4672 inode_offset = ((inode->i_ino - 1) % 4671 inode_offset = ((inode->i_ino - 1) %
4673 EXT4_INODES_PER_GROUP(sb)); 4672 EXT4_INODES_PER_GROUP(sb));
4674 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); 4673 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
@@ -4770,6 +4769,7 @@ make_io:
4770 * has in-inode xattrs, or we don't have this inode in memory. 4769 * has in-inode xattrs, or we don't have this inode in memory.
4771 * Read the block from disk. 4770 * Read the block from disk.
4772 */ 4771 */
4772 trace_ext4_load_inode(inode);
4773 get_bh(bh); 4773 get_bh(bh);
4774 bh->b_end_io = end_buffer_read_sync; 4774 bh->b_end_io = end_buffer_read_sync;
4775 submit_bh(READ_META, bh); 4775 submit_bh(READ_META, bh);
@@ -4875,7 +4875,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4875 return inode; 4875 return inode;
4876 4876
4877 ei = EXT4_I(inode); 4877 ei = EXT4_I(inode);
4878 iloc.bh = 0; 4878 iloc.bh = NULL;
4879 4879
4880 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4880 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4881 if (ret < 0) 4881 if (ret < 0)
@@ -5361,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5361 5361
5362 if (S_ISREG(inode->i_mode) && 5362 if (S_ISREG(inode->i_mode) &&
5363 attr->ia_valid & ATTR_SIZE && 5363 attr->ia_valid & ATTR_SIZE &&
5364 (attr->ia_size < inode->i_size || 5364 (attr->ia_size < inode->i_size)) {
5365 (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
5366 handle_t *handle; 5365 handle_t *handle;
5367 5366
5368 handle = ext4_journal_start(inode, 3); 5367 handle = ext4_journal_start(inode, 3);
@@ -5396,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5396 goto err_out; 5395 goto err_out;
5397 } 5396 }
5398 } 5397 }
5399 /* ext4_truncate will clear the flag */
5400 if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
5401 ext4_truncate(inode);
5402 } 5398 }
5403 5399
5404 if ((attr->ia_valid & ATTR_SIZE) && 5400 if (attr->ia_valid & ATTR_SIZE) {
5405 attr->ia_size != i_size_read(inode)) 5401 if (attr->ia_size != i_size_read(inode)) {
5406 rc = vmtruncate(inode, attr->ia_size); 5402 truncate_setsize(inode, attr->ia_size);
5403 ext4_truncate(inode);
5404 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
5405 ext4_truncate(inode);
5406 }
5407 5407
5408 if (!rc) { 5408 if (!rc) {
5409 setattr_copy(inode, attr); 5409 setattr_copy(inode, attr);
@@ -5460,13 +5460,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
5460 /* if nrblocks are contiguous */ 5460 /* if nrblocks are contiguous */
5461 if (chunk) { 5461 if (chunk) {
5462 /* 5462 /*
5463 * With N contiguous data blocks, it need at most 5463 * With N contiguous data blocks, we need at most
5464 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks 5464 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
5465 * 2 dindirect blocks 5465 * 2 dindirect blocks, and 1 tindirect block
5466 * 1 tindirect block
5467 */ 5466 */
5468 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); 5467 return DIV_ROUND_UP(nrblocks,
5469 return indirects + 3; 5468 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
5470 } 5469 }
5471 /* 5470 /*
5472 * if nrblocks are not contiguous, worse case, each block touch 5471 * if nrblocks are not contiguous, worse case, each block touch
@@ -5540,7 +5539,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5540} 5539}
5541 5540
5542/* 5541/*
5543 * Calulate the total number of credits to reserve to fit 5542 * Calculate the total number of credits to reserve to fit
5544 * the modification of a single pages into a single transaction, 5543 * the modification of a single pages into a single transaction,
5545 * which may include multiple chunks of block allocations. 5544 * which may include multiple chunks of block allocations.
5546 * 5545 *
@@ -5862,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5862 goto out_unlock; 5861 goto out_unlock;
5863 } 5862 }
5864 ret = 0; 5863 ret = 0;
5865 if (PageMappedToDisk(page)) 5864
5866 goto out_unlock; 5865 lock_page(page);
5866 wait_on_page_writeback(page);
5867 if (PageMappedToDisk(page)) {
5868 up_read(&inode->i_alloc_sem);
5869 return VM_FAULT_LOCKED;
5870 }
5867 5871
5868 if (page->index == size >> PAGE_CACHE_SHIFT) 5872 if (page->index == size >> PAGE_CACHE_SHIFT)
5869 len = size & ~PAGE_CACHE_MASK; 5873 len = size & ~PAGE_CACHE_MASK;
5870 else 5874 else
5871 len = PAGE_CACHE_SIZE; 5875 len = PAGE_CACHE_SIZE;
5872 5876
5873 lock_page(page);
5874 /* 5877 /*
5875 * return if we have all the buffers mapped. This avoid 5878 * return if we have all the buffers mapped. This avoid
5876 * the need to call write_begin/write_end which does a 5879 * the need to call write_begin/write_end which does a
@@ -5880,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5880 if (page_has_buffers(page)) { 5883 if (page_has_buffers(page)) {
5881 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5884 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5882 ext4_bh_unmapped)) { 5885 ext4_bh_unmapped)) {
5883 unlock_page(page); 5886 up_read(&inode->i_alloc_sem);
5884 goto out_unlock; 5887 return VM_FAULT_LOCKED;
5885 } 5888 }
5886 } 5889 }
5887 unlock_page(page); 5890 unlock_page(page);
@@ -5901,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5901 if (ret < 0) 5904 if (ret < 0)
5902 goto out_unlock; 5905 goto out_unlock;
5903 ret = 0; 5906 ret = 0;
5907
5908 /*
5909 * write_begin/end might have created a dirty page and someone
5910 * could wander in and start the IO. Make sure that hasn't
5911 * happened.
5912 */
5913 lock_page(page);
5914 wait_on_page_writeback(page);
5915 up_read(&inode->i_alloc_sem);
5916 return VM_FAULT_LOCKED;
5904out_unlock: 5917out_unlock:
5905 if (ret) 5918 if (ret)
5906 ret = VM_FAULT_SIGBUS; 5919 ret = VM_FAULT_SIGBUS;