diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 384 |
1 files changed, 128 insertions, 256 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60a26f3a6f8b..f9c642b22efa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -78,16 +78,14 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) | |||
78 | * but there may still be a record of it in the journal, and that record | 78 | * but there may still be a record of it in the journal, and that record |
79 | * still needs to be revoked. | 79 | * still needs to be revoked. |
80 | * | 80 | * |
81 | * If the handle isn't valid we're not journaling so there's nothing to do. | 81 | * If the handle isn't valid we're not journaling, but we still need to |
82 | * call into ext4_journal_revoke() to put the buffer head. | ||
82 | */ | 83 | */ |
83 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | 84 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, |
84 | struct buffer_head *bh, ext4_fsblk_t blocknr) | 85 | struct buffer_head *bh, ext4_fsblk_t blocknr) |
85 | { | 86 | { |
86 | int err; | 87 | int err; |
87 | 88 | ||
88 | if (!ext4_handle_valid(handle)) | ||
89 | return 0; | ||
90 | |||
91 | might_sleep(); | 89 | might_sleep(); |
92 | 90 | ||
93 | BUFFER_TRACE(bh, "enter"); | 91 | BUFFER_TRACE(bh, "enter"); |
@@ -1513,14 +1511,14 @@ retry: | |||
1513 | * Add inode to orphan list in case we crash before | 1511 | * Add inode to orphan list in case we crash before |
1514 | * truncate finishes | 1512 | * truncate finishes |
1515 | */ | 1513 | */ |
1516 | if (pos + len > inode->i_size) | 1514 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1517 | ext4_orphan_add(handle, inode); | 1515 | ext4_orphan_add(handle, inode); |
1518 | 1516 | ||
1519 | ext4_journal_stop(handle); | 1517 | ext4_journal_stop(handle); |
1520 | if (pos + len > inode->i_size) { | 1518 | if (pos + len > inode->i_size) { |
1521 | vmtruncate(inode, inode->i_size); | 1519 | ext4_truncate(inode); |
1522 | /* | 1520 | /* |
1523 | * If vmtruncate failed early the inode might | 1521 | * If truncate failed early the inode might |
1524 | * still be on the orphan list; we need to | 1522 | * still be on the orphan list; we need to |
1525 | * make sure the inode is removed from the | 1523 | * make sure the inode is removed from the |
1526 | * orphan list in that case. | 1524 | * orphan list in that case. |
@@ -1614,7 +1612,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
1614 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1612 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1615 | page, fsdata); | 1613 | page, fsdata); |
1616 | copied = ret2; | 1614 | copied = ret2; |
1617 | if (pos + len > inode->i_size) | 1615 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1618 | /* if we have allocated more blocks and copied | 1616 | /* if we have allocated more blocks and copied |
1619 | * less. We will have blocks allocated outside | 1617 | * less. We will have blocks allocated outside |
1620 | * inode->i_size. So truncate them | 1618 | * inode->i_size. So truncate them |
@@ -1628,9 +1626,9 @@ static int ext4_ordered_write_end(struct file *file, | |||
1628 | ret = ret2; | 1626 | ret = ret2; |
1629 | 1627 | ||
1630 | if (pos + len > inode->i_size) { | 1628 | if (pos + len > inode->i_size) { |
1631 | vmtruncate(inode, inode->i_size); | 1629 | ext4_truncate(inode); |
1632 | /* | 1630 | /* |
1633 | * If vmtruncate failed early the inode might still be | 1631 | * If truncate failed early the inode might still be |
1634 | * on the orphan list; we need to make sure the inode | 1632 | * on the orphan list; we need to make sure the inode |
1635 | * is removed from the orphan list in that case. | 1633 | * is removed from the orphan list in that case. |
1636 | */ | 1634 | */ |
@@ -1655,7 +1653,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
1655 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1653 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1656 | page, fsdata); | 1654 | page, fsdata); |
1657 | copied = ret2; | 1655 | copied = ret2; |
1658 | if (pos + len > inode->i_size) | 1656 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1659 | /* if we have allocated more blocks and copied | 1657 | /* if we have allocated more blocks and copied |
1660 | * less. We will have blocks allocated outside | 1658 | * less. We will have blocks allocated outside |
1661 | * inode->i_size. So truncate them | 1659 | * inode->i_size. So truncate them |
@@ -1670,9 +1668,9 @@ static int ext4_writeback_write_end(struct file *file, | |||
1670 | ret = ret2; | 1668 | ret = ret2; |
1671 | 1669 | ||
1672 | if (pos + len > inode->i_size) { | 1670 | if (pos + len > inode->i_size) { |
1673 | vmtruncate(inode, inode->i_size); | 1671 | ext4_truncate(inode); |
1674 | /* | 1672 | /* |
1675 | * If vmtruncate failed early the inode might still be | 1673 | * If truncate failed early the inode might still be |
1676 | * on the orphan list; we need to make sure the inode | 1674 | * on the orphan list; we need to make sure the inode |
1677 | * is removed from the orphan list in that case. | 1675 | * is removed from the orphan list in that case. |
1678 | */ | 1676 | */ |
@@ -1722,7 +1720,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1722 | 1720 | ||
1723 | unlock_page(page); | 1721 | unlock_page(page); |
1724 | page_cache_release(page); | 1722 | page_cache_release(page); |
1725 | if (pos + len > inode->i_size) | 1723 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1726 | /* if we have allocated more blocks and copied | 1724 | /* if we have allocated more blocks and copied |
1727 | * less. We will have blocks allocated outside | 1725 | * less. We will have blocks allocated outside |
1728 | * inode->i_size. So truncate them | 1726 | * inode->i_size. So truncate them |
@@ -1733,9 +1731,9 @@ static int ext4_journalled_write_end(struct file *file, | |||
1733 | if (!ret) | 1731 | if (!ret) |
1734 | ret = ret2; | 1732 | ret = ret2; |
1735 | if (pos + len > inode->i_size) { | 1733 | if (pos + len > inode->i_size) { |
1736 | vmtruncate(inode, inode->i_size); | 1734 | ext4_truncate(inode); |
1737 | /* | 1735 | /* |
1738 | * If vmtruncate failed early the inode might still be | 1736 | * If truncate failed early the inode might still be |
1739 | * on the orphan list; we need to make sure the inode | 1737 | * on the orphan list; we need to make sure the inode |
1740 | * is removed from the orphan list in that case. | 1738 | * is removed from the orphan list in that case. |
1741 | */ | 1739 | */ |
@@ -2305,15 +2303,9 @@ flush_it: | |||
2305 | return; | 2303 | return; |
2306 | } | 2304 | } |
2307 | 2305 | ||
2308 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2306 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
2309 | { | 2307 | { |
2310 | /* | 2308 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
2311 | * unmapped buffer is possible for holes. | ||
2312 | * delay buffer is possible with delayed allocation. | ||
2313 | * We also need to consider unwritten buffer as unmapped. | ||
2314 | */ | ||
2315 | return (!buffer_mapped(bh) || buffer_delay(bh) || | ||
2316 | buffer_unwritten(bh)) && buffer_dirty(bh); | ||
2317 | } | 2309 | } |
2318 | 2310 | ||
2319 | /* | 2311 | /* |
@@ -2398,9 +2390,9 @@ static int __mpage_da_writepage(struct page *page, | |||
2398 | * We need to try to allocate | 2390 | * We need to try to allocate |
2399 | * unmapped blocks in the same page. | 2391 | * unmapped blocks in the same page. |
2400 | * Otherwise we won't make progress | 2392 | * Otherwise we won't make progress |
2401 | * with the page in ext4_da_writepage | 2393 | * with the page in ext4_writepage |
2402 | */ | 2394 | */ |
2403 | if (ext4_bh_unmapped_or_delay(NULL, bh)) { | 2395 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { |
2404 | mpage_add_bh_to_extent(mpd, logical, | 2396 | mpage_add_bh_to_extent(mpd, logical, |
2405 | bh->b_size, | 2397 | bh->b_size, |
2406 | bh->b_state); | 2398 | bh->b_state); |
@@ -2517,7 +2509,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | |||
2517 | * so call get_block_wrap with create = 0 | 2509 | * so call get_block_wrap with create = 0 |
2518 | */ | 2510 | */ |
2519 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); | 2511 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); |
2520 | BUG_ON(create && ret == 0); | ||
2521 | if (ret > 0) { | 2512 | if (ret > 0) { |
2522 | bh_result->b_size = (ret << inode->i_blkbits); | 2513 | bh_result->b_size = (ret << inode->i_blkbits); |
2523 | ret = 0; | 2514 | ret = 0; |
@@ -2525,15 +2516,102 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | |||
2525 | return ret; | 2516 | return ret; |
2526 | } | 2517 | } |
2527 | 2518 | ||
2519 | static int bget_one(handle_t *handle, struct buffer_head *bh) | ||
2520 | { | ||
2521 | get_bh(bh); | ||
2522 | return 0; | ||
2523 | } | ||
2524 | |||
2525 | static int bput_one(handle_t *handle, struct buffer_head *bh) | ||
2526 | { | ||
2527 | put_bh(bh); | ||
2528 | return 0; | ||
2529 | } | ||
2530 | |||
2531 | static int __ext4_journalled_writepage(struct page *page, | ||
2532 | struct writeback_control *wbc, | ||
2533 | unsigned int len) | ||
2534 | { | ||
2535 | struct address_space *mapping = page->mapping; | ||
2536 | struct inode *inode = mapping->host; | ||
2537 | struct buffer_head *page_bufs; | ||
2538 | handle_t *handle = NULL; | ||
2539 | int ret = 0; | ||
2540 | int err; | ||
2541 | |||
2542 | page_bufs = page_buffers(page); | ||
2543 | BUG_ON(!page_bufs); | ||
2544 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | ||
2545 | /* As soon as we unlock the page, it can go away, but we have | ||
2546 | * references to buffers so we are safe */ | ||
2547 | unlock_page(page); | ||
2548 | |||
2549 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | ||
2550 | if (IS_ERR(handle)) { | ||
2551 | ret = PTR_ERR(handle); | ||
2552 | goto out; | ||
2553 | } | ||
2554 | |||
2555 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | ||
2556 | do_journal_get_write_access); | ||
2557 | |||
2558 | err = walk_page_buffers(handle, page_bufs, 0, len, NULL, | ||
2559 | write_end_fn); | ||
2560 | if (ret == 0) | ||
2561 | ret = err; | ||
2562 | err = ext4_journal_stop(handle); | ||
2563 | if (!ret) | ||
2564 | ret = err; | ||
2565 | |||
2566 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | ||
2567 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
2568 | out: | ||
2569 | return ret; | ||
2570 | } | ||
2571 | |||
2528 | /* | 2572 | /* |
2573 | * Note that we don't need to start a transaction unless we're journaling data | ||
2574 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | ||
2575 | * need to file the inode to the transaction's list in ordered mode because if | ||
2576 | * we are writing back data added by write(), the inode is already there and if | ||
2577 | * we are writing back data modified via mmap(), noone guarantees in which | ||
2578 | * transaction the data will hit the disk. In case we are journaling data, we | ||
2579 | * cannot start transaction directly because transaction start ranks above page | ||
2580 | * lock so we have to do some magic. | ||
2581 | * | ||
2529 | * This function can get called via... | 2582 | * This function can get called via... |
2530 | * - ext4_da_writepages after taking page lock (have journal handle) | 2583 | * - ext4_da_writepages after taking page lock (have journal handle) |
2531 | * - journal_submit_inode_data_buffers (no journal handle) | 2584 | * - journal_submit_inode_data_buffers (no journal handle) |
2532 | * - shrink_page_list via pdflush (no journal handle) | 2585 | * - shrink_page_list via pdflush (no journal handle) |
2533 | * - grab_page_cache when doing write_begin (have journal handle) | 2586 | * - grab_page_cache when doing write_begin (have journal handle) |
2587 | * | ||
2588 | * We don't do any block allocation in this function. If we have page with | ||
2589 | * multiple blocks we need to write those buffer_heads that are mapped. This | ||
2590 | * is important for mmaped based write. So if we do with blocksize 1K | ||
2591 | * truncate(f, 1024); | ||
2592 | * a = mmap(f, 0, 4096); | ||
2593 | * a[0] = 'a'; | ||
2594 | * truncate(f, 4096); | ||
2595 | * we have in the page first buffer_head mapped via page_mkwrite call back | ||
2596 | * but other bufer_heads would be unmapped but dirty(dirty done via the | ||
2597 | * do_wp_page). So writepage should write the first block. If we modify | ||
2598 | * the mmap area beyond 1024 we will again get a page_fault and the | ||
2599 | * page_mkwrite callback will do the block allocation and mark the | ||
2600 | * buffer_heads mapped. | ||
2601 | * | ||
2602 | * We redirty the page if we have any buffer_heads that is either delay or | ||
2603 | * unwritten in the page. | ||
2604 | * | ||
2605 | * We can get recursively called as show below. | ||
2606 | * | ||
2607 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | ||
2608 | * ext4_writepage() | ||
2609 | * | ||
2610 | * But since we don't do any block allocation we should not deadlock. | ||
2611 | * Page also have the dirty flag cleared so we don't get recurive page_lock. | ||
2534 | */ | 2612 | */ |
2535 | static int ext4_da_writepage(struct page *page, | 2613 | static int ext4_writepage(struct page *page, |
2536 | struct writeback_control *wbc) | 2614 | struct writeback_control *wbc) |
2537 | { | 2615 | { |
2538 | int ret = 0; | 2616 | int ret = 0; |
2539 | loff_t size; | 2617 | loff_t size; |
@@ -2541,7 +2619,7 @@ static int ext4_da_writepage(struct page *page, | |||
2541 | struct buffer_head *page_bufs; | 2619 | struct buffer_head *page_bufs; |
2542 | struct inode *inode = page->mapping->host; | 2620 | struct inode *inode = page->mapping->host; |
2543 | 2621 | ||
2544 | trace_ext4_da_writepage(inode, page); | 2622 | trace_ext4_writepage(inode, page); |
2545 | size = i_size_read(inode); | 2623 | size = i_size_read(inode); |
2546 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2624 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2547 | len = size & ~PAGE_CACHE_MASK; | 2625 | len = size & ~PAGE_CACHE_MASK; |
@@ -2551,7 +2629,7 @@ static int ext4_da_writepage(struct page *page, | |||
2551 | if (page_has_buffers(page)) { | 2629 | if (page_has_buffers(page)) { |
2552 | page_bufs = page_buffers(page); | 2630 | page_bufs = page_buffers(page); |
2553 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2631 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2554 | ext4_bh_unmapped_or_delay)) { | 2632 | ext4_bh_delay_or_unwritten)) { |
2555 | /* | 2633 | /* |
2556 | * We don't want to do block allocation | 2634 | * We don't want to do block allocation |
2557 | * So redirty the page and return | 2635 | * So redirty the page and return |
@@ -2578,13 +2656,13 @@ static int ext4_da_writepage(struct page *page, | |||
2578 | * all are mapped and non delay. We don't want to | 2656 | * all are mapped and non delay. We don't want to |
2579 | * do block allocation here. | 2657 | * do block allocation here. |
2580 | */ | 2658 | */ |
2581 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2659 | ret = block_prepare_write(page, 0, len, |
2582 | noalloc_get_block_write); | 2660 | noalloc_get_block_write); |
2583 | if (!ret) { | 2661 | if (!ret) { |
2584 | page_bufs = page_buffers(page); | 2662 | page_bufs = page_buffers(page); |
2585 | /* check whether all are mapped and non delay */ | 2663 | /* check whether all are mapped and non delay */ |
2586 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2664 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2587 | ext4_bh_unmapped_or_delay)) { | 2665 | ext4_bh_delay_or_unwritten)) { |
2588 | redirty_page_for_writepage(wbc, page); | 2666 | redirty_page_for_writepage(wbc, page); |
2589 | unlock_page(page); | 2667 | unlock_page(page); |
2590 | return 0; | 2668 | return 0; |
@@ -2600,7 +2678,16 @@ static int ext4_da_writepage(struct page *page, | |||
2600 | return 0; | 2678 | return 0; |
2601 | } | 2679 | } |
2602 | /* now mark the buffer_heads as dirty and uptodate */ | 2680 | /* now mark the buffer_heads as dirty and uptodate */ |
2603 | block_commit_write(page, 0, PAGE_CACHE_SIZE); | 2681 | block_commit_write(page, 0, len); |
2682 | } | ||
2683 | |||
2684 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | ||
2685 | /* | ||
2686 | * It's mmapped pagecache. Add buffers and journal it. There | ||
2687 | * doesn't seem much point in redirtying the page here. | ||
2688 | */ | ||
2689 | ClearPageChecked(page); | ||
2690 | return __ext4_journalled_writepage(page, wbc, len); | ||
2604 | } | 2691 | } |
2605 | 2692 | ||
2606 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2693 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
@@ -2907,7 +2994,7 @@ retry: | |||
2907 | * i_size_read because we hold i_mutex. | 2994 | * i_size_read because we hold i_mutex. |
2908 | */ | 2995 | */ |
2909 | if (pos + len > inode->i_size) | 2996 | if (pos + len > inode->i_size) |
2910 | vmtruncate(inode, inode->i_size); | 2997 | ext4_truncate(inode); |
2911 | } | 2998 | } |
2912 | 2999 | ||
2913 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3000 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3130,222 +3217,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3130 | return generic_block_bmap(mapping, block, ext4_get_block); | 3217 | return generic_block_bmap(mapping, block, ext4_get_block); |
3131 | } | 3218 | } |
3132 | 3219 | ||
3133 | static int bget_one(handle_t *handle, struct buffer_head *bh) | ||
3134 | { | ||
3135 | get_bh(bh); | ||
3136 | return 0; | ||
3137 | } | ||
3138 | |||
3139 | static int bput_one(handle_t *handle, struct buffer_head *bh) | ||
3140 | { | ||
3141 | put_bh(bh); | ||
3142 | return 0; | ||
3143 | } | ||
3144 | |||
3145 | /* | ||
3146 | * Note that we don't need to start a transaction unless we're journaling data | ||
3147 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | ||
3148 | * need to file the inode to the transaction's list in ordered mode because if | ||
3149 | * we are writing back data added by write(), the inode is already there and if | ||
3150 | * we are writing back data modified via mmap(), noone guarantees in which | ||
3151 | * transaction the data will hit the disk. In case we are journaling data, we | ||
3152 | * cannot start transaction directly because transaction start ranks above page | ||
3153 | * lock so we have to do some magic. | ||
3154 | * | ||
3155 | * In all journaling modes block_write_full_page() will start the I/O. | ||
3156 | * | ||
3157 | * Problem: | ||
3158 | * | ||
3159 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | ||
3160 | * ext4_writepage() | ||
3161 | * | ||
3162 | * Similar for: | ||
3163 | * | ||
3164 | * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... | ||
3165 | * | ||
3166 | * Same applies to ext4_get_block(). We will deadlock on various things like | ||
3167 | * lock_journal and i_data_sem | ||
3168 | * | ||
3169 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | ||
3170 | * allocations fail. | ||
3171 | * | ||
3172 | * 16May01: If we're reentered then journal_current_handle() will be | ||
3173 | * non-zero. We simply *return*. | ||
3174 | * | ||
3175 | * 1 July 2001: @@@ FIXME: | ||
3176 | * In journalled data mode, a data buffer may be metadata against the | ||
3177 | * current transaction. But the same file is part of a shared mapping | ||
3178 | * and someone does a writepage() on it. | ||
3179 | * | ||
3180 | * We will move the buffer onto the async_data list, but *after* it has | ||
3181 | * been dirtied. So there's a small window where we have dirty data on | ||
3182 | * BJ_Metadata. | ||
3183 | * | ||
3184 | * Note that this only applies to the last partial page in the file. The | ||
3185 | * bit which block_write_full_page() uses prepare/commit for. (That's | ||
3186 | * broken code anyway: it's wrong for msync()). | ||
3187 | * | ||
3188 | * It's a rare case: affects the final partial page, for journalled data | ||
3189 | * where the file is subject to bith write() and writepage() in the same | ||
3190 | * transction. To fix it we'll need a custom block_write_full_page(). | ||
3191 | * We'll probably need that anyway for journalling writepage() output. | ||
3192 | * | ||
3193 | * We don't honour synchronous mounts for writepage(). That would be | ||
3194 | * disastrous. Any write() or metadata operation will sync the fs for | ||
3195 | * us. | ||
3196 | * | ||
3197 | */ | ||
3198 | static int __ext4_normal_writepage(struct page *page, | ||
3199 | struct writeback_control *wbc) | ||
3200 | { | ||
3201 | struct inode *inode = page->mapping->host; | ||
3202 | |||
3203 | if (test_opt(inode->i_sb, NOBH)) | ||
3204 | return nobh_writepage(page, noalloc_get_block_write, wbc); | ||
3205 | else | ||
3206 | return block_write_full_page(page, noalloc_get_block_write, | ||
3207 | wbc); | ||
3208 | } | ||
3209 | |||
3210 | static int ext4_normal_writepage(struct page *page, | ||
3211 | struct writeback_control *wbc) | ||
3212 | { | ||
3213 | struct inode *inode = page->mapping->host; | ||
3214 | loff_t size = i_size_read(inode); | ||
3215 | loff_t len; | ||
3216 | |||
3217 | trace_ext4_normal_writepage(inode, page); | ||
3218 | J_ASSERT(PageLocked(page)); | ||
3219 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
3220 | len = size & ~PAGE_CACHE_MASK; | ||
3221 | else | ||
3222 | len = PAGE_CACHE_SIZE; | ||
3223 | |||
3224 | if (page_has_buffers(page)) { | ||
3225 | /* if page has buffers it should all be mapped | ||
3226 | * and allocated. If there are not buffers attached | ||
3227 | * to the page we know the page is dirty but it lost | ||
3228 | * buffers. That means that at some moment in time | ||
3229 | * after write_begin() / write_end() has been called | ||
3230 | * all buffers have been clean and thus they must have been | ||
3231 | * written at least once. So they are all mapped and we can | ||
3232 | * happily proceed with mapping them and writing the page. | ||
3233 | */ | ||
3234 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
3235 | ext4_bh_unmapped_or_delay)); | ||
3236 | } | ||
3237 | |||
3238 | if (!ext4_journal_current_handle()) | ||
3239 | return __ext4_normal_writepage(page, wbc); | ||
3240 | |||
3241 | redirty_page_for_writepage(wbc, page); | ||
3242 | unlock_page(page); | ||
3243 | return 0; | ||
3244 | } | ||
3245 | |||
3246 | static int __ext4_journalled_writepage(struct page *page, | ||
3247 | struct writeback_control *wbc) | ||
3248 | { | ||
3249 | struct address_space *mapping = page->mapping; | ||
3250 | struct inode *inode = mapping->host; | ||
3251 | struct buffer_head *page_bufs; | ||
3252 | handle_t *handle = NULL; | ||
3253 | int ret = 0; | ||
3254 | int err; | ||
3255 | |||
3256 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
3257 | noalloc_get_block_write); | ||
3258 | if (ret != 0) | ||
3259 | goto out_unlock; | ||
3260 | |||
3261 | page_bufs = page_buffers(page); | ||
3262 | walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, | ||
3263 | bget_one); | ||
3264 | /* As soon as we unlock the page, it can go away, but we have | ||
3265 | * references to buffers so we are safe */ | ||
3266 | unlock_page(page); | ||
3267 | |||
3268 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | ||
3269 | if (IS_ERR(handle)) { | ||
3270 | ret = PTR_ERR(handle); | ||
3271 | goto out; | ||
3272 | } | ||
3273 | |||
3274 | ret = walk_page_buffers(handle, page_bufs, 0, | ||
3275 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | ||
3276 | |||
3277 | err = walk_page_buffers(handle, page_bufs, 0, | ||
3278 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
3279 | if (ret == 0) | ||
3280 | ret = err; | ||
3281 | err = ext4_journal_stop(handle); | ||
3282 | if (!ret) | ||
3283 | ret = err; | ||
3284 | |||
3285 | walk_page_buffers(handle, page_bufs, 0, | ||
3286 | PAGE_CACHE_SIZE, NULL, bput_one); | ||
3287 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
3288 | goto out; | ||
3289 | |||
3290 | out_unlock: | ||
3291 | unlock_page(page); | ||
3292 | out: | ||
3293 | return ret; | ||
3294 | } | ||
3295 | |||
3296 | static int ext4_journalled_writepage(struct page *page, | ||
3297 | struct writeback_control *wbc) | ||
3298 | { | ||
3299 | struct inode *inode = page->mapping->host; | ||
3300 | loff_t size = i_size_read(inode); | ||
3301 | loff_t len; | ||
3302 | |||
3303 | trace_ext4_journalled_writepage(inode, page); | ||
3304 | J_ASSERT(PageLocked(page)); | ||
3305 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
3306 | len = size & ~PAGE_CACHE_MASK; | ||
3307 | else | ||
3308 | len = PAGE_CACHE_SIZE; | ||
3309 | |||
3310 | if (page_has_buffers(page)) { | ||
3311 | /* if page has buffers it should all be mapped | ||
3312 | * and allocated. If there are not buffers attached | ||
3313 | * to the page we know the page is dirty but it lost | ||
3314 | * buffers. That means that at some moment in time | ||
3315 | * after write_begin() / write_end() has been called | ||
3316 | * all buffers have been clean and thus they must have been | ||
3317 | * written at least once. So they are all mapped and we can | ||
3318 | * happily proceed with mapping them and writing the page. | ||
3319 | */ | ||
3320 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
3321 | ext4_bh_unmapped_or_delay)); | ||
3322 | } | ||
3323 | |||
3324 | if (ext4_journal_current_handle()) | ||
3325 | goto no_write; | ||
3326 | |||
3327 | if (PageChecked(page)) { | ||
3328 | /* | ||
3329 | * It's mmapped pagecache. Add buffers and journal it. There | ||
3330 | * doesn't seem much point in redirtying the page here. | ||
3331 | */ | ||
3332 | ClearPageChecked(page); | ||
3333 | return __ext4_journalled_writepage(page, wbc); | ||
3334 | } else { | ||
3335 | /* | ||
3336 | * It may be a page full of checkpoint-mode buffers. We don't | ||
3337 | * really know unless we go poke around in the buffer_heads. | ||
3338 | * But block_write_full_page will do the right thing. | ||
3339 | */ | ||
3340 | return block_write_full_page(page, noalloc_get_block_write, | ||
3341 | wbc); | ||
3342 | } | ||
3343 | no_write: | ||
3344 | redirty_page_for_writepage(wbc, page); | ||
3345 | unlock_page(page); | ||
3346 | return 0; | ||
3347 | } | ||
3348 | |||
3349 | static int ext4_readpage(struct file *file, struct page *page) | 3220 | static int ext4_readpage(struct file *file, struct page *page) |
3350 | { | 3221 | { |
3351 | return mpage_readpage(page, ext4_get_block); | 3222 | return mpage_readpage(page, ext4_get_block); |
@@ -3492,7 +3363,7 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
3492 | static const struct address_space_operations ext4_ordered_aops = { | 3363 | static const struct address_space_operations ext4_ordered_aops = { |
3493 | .readpage = ext4_readpage, | 3364 | .readpage = ext4_readpage, |
3494 | .readpages = ext4_readpages, | 3365 | .readpages = ext4_readpages, |
3495 | .writepage = ext4_normal_writepage, | 3366 | .writepage = ext4_writepage, |
3496 | .sync_page = block_sync_page, | 3367 | .sync_page = block_sync_page, |
3497 | .write_begin = ext4_write_begin, | 3368 | .write_begin = ext4_write_begin, |
3498 | .write_end = ext4_ordered_write_end, | 3369 | .write_end = ext4_ordered_write_end, |
@@ -3507,7 +3378,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
3507 | static const struct address_space_operations ext4_writeback_aops = { | 3378 | static const struct address_space_operations ext4_writeback_aops = { |
3508 | .readpage = ext4_readpage, | 3379 | .readpage = ext4_readpage, |
3509 | .readpages = ext4_readpages, | 3380 | .readpages = ext4_readpages, |
3510 | .writepage = ext4_normal_writepage, | 3381 | .writepage = ext4_writepage, |
3511 | .sync_page = block_sync_page, | 3382 | .sync_page = block_sync_page, |
3512 | .write_begin = ext4_write_begin, | 3383 | .write_begin = ext4_write_begin, |
3513 | .write_end = ext4_writeback_write_end, | 3384 | .write_end = ext4_writeback_write_end, |
@@ -3522,7 +3393,7 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
3522 | static const struct address_space_operations ext4_journalled_aops = { | 3393 | static const struct address_space_operations ext4_journalled_aops = { |
3523 | .readpage = ext4_readpage, | 3394 | .readpage = ext4_readpage, |
3524 | .readpages = ext4_readpages, | 3395 | .readpages = ext4_readpages, |
3525 | .writepage = ext4_journalled_writepage, | 3396 | .writepage = ext4_writepage, |
3526 | .sync_page = block_sync_page, | 3397 | .sync_page = block_sync_page, |
3527 | .write_begin = ext4_write_begin, | 3398 | .write_begin = ext4_write_begin, |
3528 | .write_end = ext4_journalled_write_end, | 3399 | .write_end = ext4_journalled_write_end, |
@@ -3536,7 +3407,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3536 | static const struct address_space_operations ext4_da_aops = { | 3407 | static const struct address_space_operations ext4_da_aops = { |
3537 | .readpage = ext4_readpage, | 3408 | .readpage = ext4_readpage, |
3538 | .readpages = ext4_readpages, | 3409 | .readpages = ext4_readpages, |
3539 | .writepage = ext4_da_writepage, | 3410 | .writepage = ext4_writepage, |
3540 | .writepages = ext4_da_writepages, | 3411 | .writepages = ext4_da_writepages, |
3541 | .sync_page = block_sync_page, | 3412 | .sync_page = block_sync_page, |
3542 | .write_begin = ext4_da_write_begin, | 3413 | .write_begin = ext4_da_write_begin, |
@@ -3583,7 +3454,8 @@ int ext4_block_truncate_page(handle_t *handle, | |||
3583 | struct page *page; | 3454 | struct page *page; |
3584 | int err = 0; | 3455 | int err = 0; |
3585 | 3456 | ||
3586 | page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); | 3457 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3458 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
3587 | if (!page) | 3459 | if (!page) |
3588 | return -EINVAL; | 3460 | return -EINVAL; |
3589 | 3461 | ||