aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c664
1 files changed, 297 insertions, 367 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cd818d8bb221..88049d8d30cb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -132,10 +132,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
132} 132}
133 133
134static void ext4_invalidatepage(struct page *page, unsigned long offset); 134static void ext4_invalidatepage(struct page *page, unsigned long offset);
135static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
136 struct buffer_head *bh_result, int create);
137static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
138static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
139static int __ext4_journalled_writepage(struct page *page, unsigned int len); 135static int __ext4_journalled_writepage(struct page *page, unsigned int len);
140static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 136static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
141static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 137static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
@@ -238,7 +234,8 @@ void ext4_evict_inode(struct inode *inode)
238 * protection against it 234 * protection against it
239 */ 235 */
240 sb_start_intwrite(inode->i_sb); 236 sb_start_intwrite(inode->i_sb);
241 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); 237 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
238 ext4_blocks_for_truncate(inode)+3);
242 if (IS_ERR(handle)) { 239 if (IS_ERR(handle)) {
243 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 240 ext4_std_error(inode->i_sb, PTR_ERR(handle));
244 /* 241 /*
@@ -346,7 +343,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
346 spin_lock(&ei->i_block_reservation_lock); 343 spin_lock(&ei->i_block_reservation_lock);
347 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 344 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
348 if (unlikely(used > ei->i_reserved_data_blocks)) { 345 if (unlikely(used > ei->i_reserved_data_blocks)) {
349 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 346 ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
350 "with only %d reserved data blocks", 347 "with only %d reserved data blocks",
351 __func__, inode->i_ino, used, 348 __func__, inode->i_ino, used,
352 ei->i_reserved_data_blocks); 349 ei->i_reserved_data_blocks);
@@ -355,10 +352,12 @@ void ext4_da_update_reserve_space(struct inode *inode,
355 } 352 }
356 353
357 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { 354 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
358 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " 355 ext4_warning(inode->i_sb, "ino %lu, allocated %d "
359 "with only %d reserved metadata blocks\n", __func__, 356 "with only %d reserved metadata blocks "
360 inode->i_ino, ei->i_allocated_meta_blocks, 357 "(releasing %d blocks with reserved %d data blocks)",
361 ei->i_reserved_meta_blocks); 358 inode->i_ino, ei->i_allocated_meta_blocks,
359 ei->i_reserved_meta_blocks, used,
360 ei->i_reserved_data_blocks);
362 WARN_ON(1); 361 WARN_ON(1);
363 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; 362 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
364 } 363 }
@@ -508,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
508int ext4_map_blocks(handle_t *handle, struct inode *inode, 507int ext4_map_blocks(handle_t *handle, struct inode *inode,
509 struct ext4_map_blocks *map, int flags) 508 struct ext4_map_blocks *map, int flags)
510{ 509{
510 struct extent_status es;
511 int retval; 511 int retval;
512 512
513 map->m_flags = 0; 513 map->m_flags = 0;
514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
515 "logical block %lu\n", inode->i_ino, flags, map->m_len, 515 "logical block %lu\n", inode->i_ino, flags, map->m_len,
516 (unsigned long) map->m_lblk); 516 (unsigned long) map->m_lblk);
517
518 /* Lookup extent status tree firstly */
519 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
520 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
521 map->m_pblk = ext4_es_pblock(&es) +
522 map->m_lblk - es.es_lblk;
523 map->m_flags |= ext4_es_is_written(&es) ?
524 EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
525 retval = es.es_len - (map->m_lblk - es.es_lblk);
526 if (retval > map->m_len)
527 retval = map->m_len;
528 map->m_len = retval;
529 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
530 retval = 0;
531 } else {
532 BUG_ON(1);
533 }
534 goto found;
535 }
536
517 /* 537 /*
518 * Try to see if we can get the block without requesting a new 538 * Try to see if we can get the block without requesting a new
519 * file system block. 539 * file system block.
@@ -527,20 +547,27 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
527 retval = ext4_ind_map_blocks(handle, inode, map, flags & 547 retval = ext4_ind_map_blocks(handle, inode, map, flags &
528 EXT4_GET_BLOCKS_KEEP_SIZE); 548 EXT4_GET_BLOCKS_KEEP_SIZE);
529 } 549 }
550 if (retval > 0) {
551 int ret;
552 unsigned long long status;
553
554 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
557 ext4_find_delalloc_range(inode, map->m_lblk,
558 map->m_lblk + map->m_len - 1))
559 status |= EXTENT_STATUS_DELAYED;
560 ret = ext4_es_insert_extent(inode, map->m_lblk,
561 map->m_len, map->m_pblk, status);
562 if (ret < 0)
563 retval = ret;
564 }
530 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 565 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
531 up_read((&EXT4_I(inode)->i_data_sem)); 566 up_read((&EXT4_I(inode)->i_data_sem));
532 567
568found:
533 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 569 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
534 int ret; 570 int ret = check_block_validity(inode, map);
535 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
536 /* delayed alloc may be allocated by fallocate and
537 * coverted to initialized by directIO.
538 * we need to handle delayed extent here.
539 */
540 down_write((&EXT4_I(inode)->i_data_sem));
541 goto delayed_mapped;
542 }
543 ret = check_block_validity(inode, map);
544 if (ret != 0) 571 if (ret != 0)
545 return ret; 572 return ret;
546 } 573 }
@@ -560,16 +587,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
560 return retval; 587 return retval;
561 588
562 /* 589 /*
563 * When we call get_blocks without the create flag, the 590 * Here we clear m_flags because after allocating an new extent,
564 * BH_Unwritten flag could have gotten set if the blocks 591 * it will be set again.
565 * requested were part of a uninitialized extent. We need to
566 * clear this flag now that we are committed to convert all or
567 * part of the uninitialized extent to be an initialized
568 * extent. This is because we need to avoid the combination
569 * of BH_Unwritten and BH_Mapped flags being simultaneously
570 * set on the buffer_head.
571 */ 592 */
572 map->m_flags &= ~EXT4_MAP_UNWRITTEN; 593 map->m_flags &= ~EXT4_MAP_FLAGS;
573 594
574 /* 595 /*
575 * New blocks allocate and/or writing to uninitialized extent 596 * New blocks allocate and/or writing to uninitialized extent
@@ -615,18 +636,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
615 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 636 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
616 ext4_da_update_reserve_space(inode, retval, 1); 637 ext4_da_update_reserve_space(inode, retval, 1);
617 } 638 }
618 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { 639 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
619 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 640 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
620 641
621 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 642 if (retval > 0) {
622 int ret; 643 int ret;
623delayed_mapped: 644 unsigned long long status;
624 /* delayed allocation blocks has been allocated */ 645
625 ret = ext4_es_remove_extent(inode, map->m_lblk, 646 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
626 map->m_len); 647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
627 if (ret < 0) 648 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
628 retval = ret; 649 ext4_find_delalloc_range(inode, map->m_lblk,
629 } 650 map->m_lblk + map->m_len - 1))
651 status |= EXTENT_STATUS_DELAYED;
652 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
653 map->m_pblk, status);
654 if (ret < 0)
655 retval = ret;
630 } 656 }
631 657
632 up_write((&EXT4_I(inode)->i_data_sem)); 658 up_write((&EXT4_I(inode)->i_data_sem));
@@ -660,7 +686,8 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
660 if (map.m_len > DIO_MAX_BLOCKS) 686 if (map.m_len > DIO_MAX_BLOCKS)
661 map.m_len = DIO_MAX_BLOCKS; 687 map.m_len = DIO_MAX_BLOCKS;
662 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); 688 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
663 handle = ext4_journal_start(inode, dio_credits); 689 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
690 dio_credits);
664 if (IS_ERR(handle)) { 691 if (IS_ERR(handle)) {
665 ret = PTR_ERR(handle); 692 ret = PTR_ERR(handle);
666 return ret; 693 return ret;
@@ -707,14 +734,16 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
707 /* ensure we send some value back into *errp */ 734 /* ensure we send some value back into *errp */
708 *errp = 0; 735 *errp = 0;
709 736
737 if (create && err == 0)
738 err = -ENOSPC; /* should never happen */
710 if (err < 0) 739 if (err < 0)
711 *errp = err; 740 *errp = err;
712 if (err <= 0) 741 if (err <= 0)
713 return NULL; 742 return NULL;
714 743
715 bh = sb_getblk(inode->i_sb, map.m_pblk); 744 bh = sb_getblk(inode->i_sb, map.m_pblk);
716 if (!bh) { 745 if (unlikely(!bh)) {
717 *errp = -EIO; 746 *errp = -ENOMEM;
718 return NULL; 747 return NULL;
719 } 748 }
720 if (map.m_flags & EXT4_MAP_NEW) { 749 if (map.m_flags & EXT4_MAP_NEW) {
@@ -808,11 +837,10 @@ int ext4_walk_page_buffers(handle_t *handle,
808 * and the commit_write(). So doing the jbd2_journal_start at the start of 837 * and the commit_write(). So doing the jbd2_journal_start at the start of
809 * prepare_write() is the right place. 838 * prepare_write() is the right place.
810 * 839 *
811 * Also, this function can nest inside ext4_writepage() -> 840 * Also, this function can nest inside ext4_writepage(). In that case, we
812 * block_write_full_page(). In that case, we *know* that ext4_writepage() 841 * *know* that ext4_writepage() has generated enough buffer credits to do the
813 * has generated enough buffer credits to do the whole page. So we won't 842 * whole page. So we won't block on the journal in that case, which is good,
814 * block on the journal in that case, which is good, because the caller may 843 * because the caller may be PF_MEMALLOC.
815 * be PF_MEMALLOC.
816 * 844 *
817 * By accident, ext4 can be reentered when a transaction is open via 845 * By accident, ext4 can be reentered when a transaction is open via
818 * quota file writes. If we were to commit the transaction while thus 846 * quota file writes. If we were to commit the transaction while thus
@@ -878,32 +906,40 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
878 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 906 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
879 flags, pagep); 907 flags, pagep);
880 if (ret < 0) 908 if (ret < 0)
881 goto out; 909 return ret;
882 if (ret == 1) { 910 if (ret == 1)
883 ret = 0; 911 return 0;
884 goto out;
885 }
886 } 912 }
887 913
888retry: 914 /*
889 handle = ext4_journal_start(inode, needed_blocks); 915 * grab_cache_page_write_begin() can take a long time if the
916 * system is thrashing due to memory pressure, or if the page
917 * is being written back. So grab it first before we start
918 * the transaction handle. This also allows us to allocate
919 * the page (if needed) without using GFP_NOFS.
920 */
921retry_grab:
922 page = grab_cache_page_write_begin(mapping, index, flags);
923 if (!page)
924 return -ENOMEM;
925 unlock_page(page);
926
927retry_journal:
928 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
890 if (IS_ERR(handle)) { 929 if (IS_ERR(handle)) {
891 ret = PTR_ERR(handle); 930 page_cache_release(page);
892 goto out; 931 return PTR_ERR(handle);
893 } 932 }
894 933
895 /* We cannot recurse into the filesystem as the transaction is already 934 lock_page(page);
896 * started */ 935 if (page->mapping != mapping) {
897 flags |= AOP_FLAG_NOFS; 936 /* The page got truncated from under us */
898 937 unlock_page(page);
899 page = grab_cache_page_write_begin(mapping, index, flags); 938 page_cache_release(page);
900 if (!page) {
901 ext4_journal_stop(handle); 939 ext4_journal_stop(handle);
902 ret = -ENOMEM; 940 goto retry_grab;
903 goto out;
904 } 941 }
905 942 wait_on_page_writeback(page);
906 *pagep = page;
907 943
908 if (ext4_should_dioread_nolock(inode)) 944 if (ext4_should_dioread_nolock(inode))
909 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 945 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -918,7 +954,6 @@ retry:
918 954
919 if (ret) { 955 if (ret) {
920 unlock_page(page); 956 unlock_page(page);
921 page_cache_release(page);
922 /* 957 /*
923 * __block_write_begin may have instantiated a few blocks 958 * __block_write_begin may have instantiated a few blocks
924 * outside i_size. Trim these off again. Don't need 959 * outside i_size. Trim these off again. Don't need
@@ -942,11 +977,14 @@ retry:
942 if (inode->i_nlink) 977 if (inode->i_nlink)
943 ext4_orphan_del(NULL, inode); 978 ext4_orphan_del(NULL, inode);
944 } 979 }
945 }
946 980
947 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 981 if (ret == -ENOSPC &&
948 goto retry; 982 ext4_should_retry_alloc(inode->i_sb, &retries))
949out: 983 goto retry_journal;
984 page_cache_release(page);
985 return ret;
986 }
987 *pagep = page;
950 return ret; 988 return ret;
951} 989}
952 990
@@ -1256,7 +1294,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1256 * function is called from invalidate page, it's 1294 * function is called from invalidate page, it's
1257 * harmless to return without any action. 1295 * harmless to return without any action.
1258 */ 1296 */
1259 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " 1297 ext4_warning(inode->i_sb, "ext4_da_release_space: "
1260 "ino %lu, to_free %d with only %d reserved " 1298 "ino %lu, to_free %d with only %d reserved "
1261 "data blocks", inode->i_ino, to_free, 1299 "data blocks", inode->i_ino, to_free,
1262 ei->i_reserved_data_blocks); 1300 ei->i_reserved_data_blocks);
@@ -1357,7 +1395,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1357 loff_t size = i_size_read(inode); 1395 loff_t size = i_size_read(inode);
1358 unsigned int len, block_start; 1396 unsigned int len, block_start;
1359 struct buffer_head *bh, *page_bufs = NULL; 1397 struct buffer_head *bh, *page_bufs = NULL;
1360 int journal_data = ext4_should_journal_data(inode);
1361 sector_t pblock = 0, cur_logical = 0; 1398 sector_t pblock = 0, cur_logical = 0;
1362 struct ext4_io_submit io_submit; 1399 struct ext4_io_submit io_submit;
1363 1400
@@ -1378,7 +1415,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1378 if (nr_pages == 0) 1415 if (nr_pages == 0)
1379 break; 1416 break;
1380 for (i = 0; i < nr_pages; i++) { 1417 for (i = 0; i < nr_pages; i++) {
1381 int commit_write = 0, skip_page = 0; 1418 int skip_page = 0;
1382 struct page *page = pvec.pages[i]; 1419 struct page *page = pvec.pages[i];
1383 1420
1384 index = page->index; 1421 index = page->index;
@@ -1400,27 +1437,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1400 BUG_ON(!PageLocked(page)); 1437 BUG_ON(!PageLocked(page));
1401 BUG_ON(PageWriteback(page)); 1438 BUG_ON(PageWriteback(page));
1402 1439
1403 /*
1404 * If the page does not have buffers (for
1405 * whatever reason), try to create them using
1406 * __block_write_begin. If this fails,
1407 * skip the page and move on.
1408 */
1409 if (!page_has_buffers(page)) {
1410 if (__block_write_begin(page, 0, len,
1411 noalloc_get_block_write)) {
1412 skip_page:
1413 unlock_page(page);
1414 continue;
1415 }
1416 commit_write = 1;
1417 }
1418
1419 bh = page_bufs = page_buffers(page); 1440 bh = page_bufs = page_buffers(page);
1420 block_start = 0; 1441 block_start = 0;
1421 do { 1442 do {
1422 if (!bh)
1423 goto skip_page;
1424 if (map && (cur_logical >= map->m_lblk) && 1443 if (map && (cur_logical >= map->m_lblk) &&
1425 (cur_logical <= (map->m_lblk + 1444 (cur_logical <= (map->m_lblk +
1426 (map->m_len - 1)))) { 1445 (map->m_len - 1)))) {
@@ -1448,33 +1467,14 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1448 pblock++; 1467 pblock++;
1449 } while (bh != page_bufs); 1468 } while (bh != page_bufs);
1450 1469
1451 if (skip_page) 1470 if (skip_page) {
1452 goto skip_page; 1471 unlock_page(page);
1453 1472 continue;
1454 if (commit_write) 1473 }
1455 /* mark the buffer_heads as dirty & uptodate */
1456 block_commit_write(page, 0, len);
1457 1474
1458 clear_page_dirty_for_io(page); 1475 clear_page_dirty_for_io(page);
1459 /* 1476 err = ext4_bio_write_page(&io_submit, page, len,
1460 * Delalloc doesn't support data journalling, 1477 mpd->wbc);
1461 * but eventually maybe we'll lift this
1462 * restriction.
1463 */
1464 if (unlikely(journal_data && PageChecked(page)))
1465 err = __ext4_journalled_writepage(page, len);
1466 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
1467 err = ext4_bio_write_page(&io_submit, page,
1468 len, mpd->wbc);
1469 else if (buffer_uninit(page_bufs)) {
1470 ext4_set_bh_endio(page_bufs, inode);
1471 err = block_write_full_page_endio(page,
1472 noalloc_get_block_write,
1473 mpd->wbc, ext4_end_io_buffer_write);
1474 } else
1475 err = block_write_full_page(page,
1476 noalloc_get_block_write, mpd->wbc);
1477
1478 if (!err) 1478 if (!err)
1479 mpd->pages_written++; 1479 mpd->pages_written++;
1480 /* 1480 /*
@@ -1640,7 +1640,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1640 (unsigned long long) next, 1640 (unsigned long long) next,
1641 mpd->b_size >> mpd->inode->i_blkbits, err); 1641 mpd->b_size >> mpd->inode->i_blkbits, err);
1642 ext4_msg(sb, KERN_CRIT, 1642 ext4_msg(sb, KERN_CRIT,
1643 "This should not happen!! Data will be lost\n"); 1643 "This should not happen!! Data will be lost");
1644 if (err == -ENOSPC) 1644 if (err == -ENOSPC)
1645 ext4_print_free_blocks(mpd->inode); 1645 ext4_print_free_blocks(mpd->inode);
1646 } 1646 }
@@ -1690,16 +1690,16 @@ submit_io:
1690 * 1690 *
1691 * @mpd->lbh - extent of blocks 1691 * @mpd->lbh - extent of blocks
1692 * @logical - logical number of the block in the file 1692 * @logical - logical number of the block in the file
1693 * @bh - bh of the block (used to access block's state) 1693 * @b_state - b_state of the buffer head added
1694 * 1694 *
1695 * the function is used to collect contig. blocks in same state 1695 * the function is used to collect contig. blocks in same state
1696 */ 1696 */
1697static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 1697static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
1698 sector_t logical, size_t b_size,
1699 unsigned long b_state) 1698 unsigned long b_state)
1700{ 1699{
1701 sector_t next; 1700 sector_t next;
1702 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; 1701 int blkbits = mpd->inode->i_blkbits;
1702 int nrblocks = mpd->b_size >> blkbits;
1703 1703
1704 /* 1704 /*
1705 * XXX Don't go larger than mballoc is willing to allocate 1705 * XXX Don't go larger than mballoc is willing to allocate
@@ -1707,11 +1707,11 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1707 * mpage_da_submit_io() into this function and then call 1707 * mpage_da_submit_io() into this function and then call
1708 * ext4_map_blocks() multiple times in a loop 1708 * ext4_map_blocks() multiple times in a loop
1709 */ 1709 */
1710 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) 1710 if (nrblocks >= (8*1024*1024 >> blkbits))
1711 goto flush_it; 1711 goto flush_it;
1712 1712
1713 /* check if thereserved journal credits might overflow */ 1713 /* check if the reserved journal credits might overflow */
1714 if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { 1714 if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
1715 if (nrblocks >= EXT4_MAX_TRANS_DATA) { 1715 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1716 /* 1716 /*
1717 * With non-extent format we are limited by the journal 1717 * With non-extent format we are limited by the journal
@@ -1720,16 +1720,6 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1720 * nrblocks. So limit nrblocks. 1720 * nrblocks. So limit nrblocks.
1721 */ 1721 */
1722 goto flush_it; 1722 goto flush_it;
1723 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1724 EXT4_MAX_TRANS_DATA) {
1725 /*
1726 * Adding the new buffer_head would make it cross the
1727 * allowed limit for which we have journal credit
1728 * reserved. So limit the new bh->b_size
1729 */
1730 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1731 mpd->inode->i_blkbits;
1732 /* we will do mpage_da_submit_io in the next loop */
1733 } 1723 }
1734 } 1724 }
1735 /* 1725 /*
@@ -1737,7 +1727,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1737 */ 1727 */
1738 if (mpd->b_size == 0) { 1728 if (mpd->b_size == 0) {
1739 mpd->b_blocknr = logical; 1729 mpd->b_blocknr = logical;
1740 mpd->b_size = b_size; 1730 mpd->b_size = 1 << blkbits;
1741 mpd->b_state = b_state & BH_FLAGS; 1731 mpd->b_state = b_state & BH_FLAGS;
1742 return; 1732 return;
1743 } 1733 }
@@ -1747,7 +1737,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1747 * Can we merge the block to our big extent? 1737 * Can we merge the block to our big extent?
1748 */ 1738 */
1749 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { 1739 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
1750 mpd->b_size += b_size; 1740 mpd->b_size += 1 << blkbits;
1751 return; 1741 return;
1752 } 1742 }
1753 1743
@@ -1775,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1775 struct ext4_map_blocks *map, 1765 struct ext4_map_blocks *map,
1776 struct buffer_head *bh) 1766 struct buffer_head *bh)
1777{ 1767{
1768 struct extent_status es;
1778 int retval; 1769 int retval;
1779 sector_t invalid_block = ~((sector_t) 0xffff); 1770 sector_t invalid_block = ~((sector_t) 0xffff);
1780 1771
@@ -1785,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1785 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," 1776 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1786 "logical block %lu\n", inode->i_ino, map->m_len, 1777 "logical block %lu\n", inode->i_ino, map->m_len,
1787 (unsigned long) map->m_lblk); 1778 (unsigned long) map->m_lblk);
1779
1780 /* Lookup extent status tree firstly */
1781 if (ext4_es_lookup_extent(inode, iblock, &es)) {
1782
1783 if (ext4_es_is_hole(&es)) {
1784 retval = 0;
1785 down_read((&EXT4_I(inode)->i_data_sem));
1786 goto add_delayed;
1787 }
1788
1789 /*
1790 * Delayed extent could be allocated by fallocate.
1791 * So we need to check it.
1792 */
1793 if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
1794 map_bh(bh, inode->i_sb, invalid_block);
1795 set_buffer_new(bh);
1796 set_buffer_delay(bh);
1797 return 0;
1798 }
1799
1800 map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
1801 retval = es.es_len - (iblock - es.es_lblk);
1802 if (retval > map->m_len)
1803 retval = map->m_len;
1804 map->m_len = retval;
1805 if (ext4_es_is_written(&es))
1806 map->m_flags |= EXT4_MAP_MAPPED;
1807 else if (ext4_es_is_unwritten(&es))
1808 map->m_flags |= EXT4_MAP_UNWRITTEN;
1809 else
1810 BUG_ON(1);
1811
1812 return retval;
1813 }
1814
1788 /* 1815 /*
1789 * Try to see if we can get the block without requesting a new 1816 * Try to see if we can get the block without requesting a new
1790 * file system block. 1817 * file system block.
@@ -1803,11 +1830,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1803 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 1830 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
1804 retval = 0; 1831 retval = 0;
1805 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1832 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1806 retval = ext4_ext_map_blocks(NULL, inode, map, 0); 1833 retval = ext4_ext_map_blocks(NULL, inode, map,
1834 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1807 else 1835 else
1808 retval = ext4_ind_map_blocks(NULL, inode, map, 0); 1836 retval = ext4_ind_map_blocks(NULL, inode, map,
1837 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1809 1838
1839add_delayed:
1810 if (retval == 0) { 1840 if (retval == 0) {
1841 int ret;
1811 /* 1842 /*
1812 * XXX: __block_prepare_write() unmaps passed block, 1843 * XXX: __block_prepare_write() unmaps passed block,
1813 * is it OK? 1844 * is it OK?
@@ -1815,15 +1846,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1815 /* If the block was allocated from previously allocated cluster, 1846 /* If the block was allocated from previously allocated cluster,
1816 * then we dont need to reserve it again. */ 1847 * then we dont need to reserve it again. */
1817 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1848 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1818 retval = ext4_da_reserve_space(inode, iblock); 1849 ret = ext4_da_reserve_space(inode, iblock);
1819 if (retval) 1850 if (ret) {
1820 /* not enough space to reserve */ 1851 /* not enough space to reserve */
1852 retval = ret;
1821 goto out_unlock; 1853 goto out_unlock;
1854 }
1822 } 1855 }
1823 1856
1824 retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); 1857 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1825 if (retval) 1858 ~0, EXTENT_STATUS_DELAYED);
1859 if (ret) {
1860 retval = ret;
1826 goto out_unlock; 1861 goto out_unlock;
1862 }
1827 1863
1828 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served 1864 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1829 * and it should not appear on the bh->b_state. 1865 * and it should not appear on the bh->b_state.
@@ -1833,6 +1869,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1833 map_bh(bh, inode->i_sb, invalid_block); 1869 map_bh(bh, inode->i_sb, invalid_block);
1834 set_buffer_new(bh); 1870 set_buffer_new(bh);
1835 set_buffer_delay(bh); 1871 set_buffer_delay(bh);
1872 } else if (retval > 0) {
1873 int ret;
1874 unsigned long long status;
1875
1876 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1879 map->m_pblk, status);
1880 if (ret != 0)
1881 retval = ret;
1836 } 1882 }
1837 1883
1838out_unlock: 1884out_unlock:
@@ -1890,27 +1936,6 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1890 return 0; 1936 return 0;
1891} 1937}
1892 1938
1893/*
1894 * This function is used as a standard get_block_t calback function
1895 * when there is no desire to allocate any blocks. It is used as a
1896 * callback function for block_write_begin() and block_write_full_page().
1897 * These functions should only try to map a single block at a time.
1898 *
1899 * Since this function doesn't do block allocations even if the caller
1900 * requests it by passing in create=1, it is critically important that
1901 * any caller checks to make sure that any buffer heads are returned
1902 * by this function are either all already mapped or marked for
1903 * delayed allocation before calling block_write_full_page(). Otherwise,
1904 * b_blocknr could be left unitialized, and the page write functions will
1905 * be taken by surprise.
1906 */
1907static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
1908 struct buffer_head *bh_result, int create)
1909{
1910 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
1911 return _ext4_get_block(inode, iblock, bh_result, 0);
1912}
1913
1914static int bget_one(handle_t *handle, struct buffer_head *bh) 1939static int bget_one(handle_t *handle, struct buffer_head *bh)
1915{ 1940{
1916 get_bh(bh); 1941 get_bh(bh);
@@ -1955,7 +1980,8 @@ static int __ext4_journalled_writepage(struct page *page,
1955 * references to buffers so we are safe */ 1980 * references to buffers so we are safe */
1956 unlock_page(page); 1981 unlock_page(page);
1957 1982
1958 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 1983 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
1984 ext4_writepage_trans_blocks(inode));
1959 if (IS_ERR(handle)) { 1985 if (IS_ERR(handle)) {
1960 ret = PTR_ERR(handle); 1986 ret = PTR_ERR(handle);
1961 goto out; 1987 goto out;
@@ -2035,11 +2061,12 @@ out:
2035static int ext4_writepage(struct page *page, 2061static int ext4_writepage(struct page *page,
2036 struct writeback_control *wbc) 2062 struct writeback_control *wbc)
2037{ 2063{
2038 int ret = 0, commit_write = 0; 2064 int ret = 0;
2039 loff_t size; 2065 loff_t size;
2040 unsigned int len; 2066 unsigned int len;
2041 struct buffer_head *page_bufs = NULL; 2067 struct buffer_head *page_bufs = NULL;
2042 struct inode *inode = page->mapping->host; 2068 struct inode *inode = page->mapping->host;
2069 struct ext4_io_submit io_submit;
2043 2070
2044 trace_ext4_writepage(page); 2071 trace_ext4_writepage(page);
2045 size = i_size_read(inode); 2072 size = i_size_read(inode);
@@ -2048,39 +2075,29 @@ static int ext4_writepage(struct page *page,
2048 else 2075 else
2049 len = PAGE_CACHE_SIZE; 2076 len = PAGE_CACHE_SIZE;
2050 2077
2078 page_bufs = page_buffers(page);
2051 /* 2079 /*
2052 * If the page does not have buffers (for whatever reason), 2080 * We cannot do block allocation or other extent handling in this
2053 * try to create them using __block_write_begin. If this 2081 * function. If there are buffers needing that, we have to redirty
2054 * fails, redirty the page and move on. 2082 * the page. But we may reach here when we do a journal commit via
2083 * journal_submit_inode_data_buffers() and in that case we must write
2084 * allocated buffers to achieve data=ordered mode guarantees.
2055 */ 2085 */
2056 if (!page_has_buffers(page)) { 2086 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2057 if (__block_write_begin(page, 0, len, 2087 ext4_bh_delay_or_unwritten)) {
2058 noalloc_get_block_write)) { 2088 redirty_page_for_writepage(wbc, page);
2059 redirty_page: 2089 if (current->flags & PF_MEMALLOC) {
2060 redirty_page_for_writepage(wbc, page); 2090 /*
2091 * For memory cleaning there's no point in writing only
2092 * some buffers. So just bail out. Warn if we came here
2093 * from direct reclaim.
2094 */
2095 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD))
2096 == PF_MEMALLOC);
2061 unlock_page(page); 2097 unlock_page(page);
2062 return 0; 2098 return 0;
2063 } 2099 }
2064 commit_write = 1;
2065 } 2100 }
2066 page_bufs = page_buffers(page);
2067 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2068 ext4_bh_delay_or_unwritten)) {
2069 /*
2070 * We don't want to do block allocation, so redirty
2071 * the page and return. We may reach here when we do
2072 * a journal commit via journal_submit_inode_data_buffers.
2073 * We can also reach here via shrink_page_list but it
2074 * should never be for direct reclaim so warn if that
2075 * happens
2076 */
2077 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
2078 PF_MEMALLOC);
2079 goto redirty_page;
2080 }
2081 if (commit_write)
2082 /* now mark the buffer_heads as dirty and uptodate */
2083 block_commit_write(page, 0, len);
2084 2101
2085 if (PageChecked(page) && ext4_should_journal_data(inode)) 2102 if (PageChecked(page) && ext4_should_journal_data(inode))
2086 /* 2103 /*
@@ -2089,14 +2106,9 @@ static int ext4_writepage(struct page *page,
2089 */ 2106 */
2090 return __ext4_journalled_writepage(page, len); 2107 return __ext4_journalled_writepage(page, len);
2091 2108
2092 if (buffer_uninit(page_bufs)) { 2109 memset(&io_submit, 0, sizeof(io_submit));
2093 ext4_set_bh_endio(page_bufs, inode); 2110 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2094 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2111 ext4_io_submit(&io_submit);
2095 wbc, ext4_end_io_buffer_write);
2096 } else
2097 ret = block_write_full_page(page, noalloc_get_block_write,
2098 wbc);
2099
2100 return ret; 2112 return ret;
2101} 2113}
2102 2114
@@ -2228,51 +2240,38 @@ static int write_cache_pages_da(handle_t *handle,
2228 logical = (sector_t) page->index << 2240 logical = (sector_t) page->index <<
2229 (PAGE_CACHE_SHIFT - inode->i_blkbits); 2241 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2230 2242
2231 if (!page_has_buffers(page)) { 2243 /* Add all dirty buffers to mpd */
2232 mpage_add_bh_to_extent(mpd, logical, 2244 head = page_buffers(page);
2233 PAGE_CACHE_SIZE, 2245 bh = head;
2234 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2246 do {
2235 if (mpd->io_done) 2247 BUG_ON(buffer_locked(bh));
2236 goto ret_extent_tail;
2237 } else {
2238 /* 2248 /*
2239 * Page with regular buffer heads, 2249 * We need to try to allocate unmapped blocks
2240 * just add all dirty ones 2250 * in the same page. Otherwise we won't make
2251 * progress with the page in ext4_writepage
2241 */ 2252 */
2242 head = page_buffers(page); 2253 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2243 bh = head; 2254 mpage_add_bh_to_extent(mpd, logical,
2244 do { 2255 bh->b_state);
2245 BUG_ON(buffer_locked(bh)); 2256 if (mpd->io_done)
2257 goto ret_extent_tail;
2258 } else if (buffer_dirty(bh) &&
2259 buffer_mapped(bh)) {
2246 /* 2260 /*
2247 * We need to try to allocate 2261 * mapped dirty buffer. We need to
2248 * unmapped blocks in the same page. 2262 * update the b_state because we look
2249 * Otherwise we won't make progress 2263 * at b_state in mpage_da_map_blocks.
2250 * with the page in ext4_writepage 2264 * We don't update b_size because if we
2265 * find an unmapped buffer_head later
2266 * we need to use the b_state flag of
2267 * that buffer_head.
2251 */ 2268 */
2252 if (ext4_bh_delay_or_unwritten(NULL, bh)) { 2269 if (mpd->b_size == 0)
2253 mpage_add_bh_to_extent(mpd, logical, 2270 mpd->b_state =
2254 bh->b_size, 2271 bh->b_state & BH_FLAGS;
2255 bh->b_state); 2272 }
2256 if (mpd->io_done) 2273 logical++;
2257 goto ret_extent_tail; 2274 } while ((bh = bh->b_this_page) != head);
2258 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2259 /*
2260 * mapped dirty buffer. We need
2261 * to update the b_state
2262 * because we look at b_state
2263 * in mpage_da_map_blocks. We
2264 * don't update b_size because
2265 * if we find an unmapped
2266 * buffer_head later we need to
2267 * use the b_state flag of that
2268 * buffer_head.
2269 */
2270 if (mpd->b_size == 0)
2271 mpd->b_state = bh->b_state & BH_FLAGS;
2272 }
2273 logical++;
2274 } while ((bh = bh->b_this_page) != head);
2275 }
2276 2275
2277 if (nr_to_write > 0) { 2276 if (nr_to_write > 0) {
2278 nr_to_write--; 2277 nr_to_write--;
@@ -2413,7 +2412,8 @@ retry:
2413 needed_blocks = ext4_da_writepages_trans_blocks(inode); 2412 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2414 2413
2415 /* start a new transaction*/ 2414 /* start a new transaction*/
2416 handle = ext4_journal_start(inode, needed_blocks); 2415 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2416 needed_blocks);
2417 if (IS_ERR(handle)) { 2417 if (IS_ERR(handle)) {
2418 ret = PTR_ERR(handle); 2418 ret = PTR_ERR(handle);
2419 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2419 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -2555,42 +2555,52 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2555 pos, len, flags, 2555 pos, len, flags,
2556 pagep, fsdata); 2556 pagep, fsdata);
2557 if (ret < 0) 2557 if (ret < 0)
2558 goto out; 2558 return ret;
2559 if (ret == 1) { 2559 if (ret == 1)
2560 ret = 0; 2560 return 0;
2561 goto out;
2562 }
2563 } 2561 }
2564 2562
2565retry: 2563 /*
2564 * grab_cache_page_write_begin() can take a long time if the
2565 * system is thrashing due to memory pressure, or if the page
2566 * is being written back. So grab it first before we start
2567 * the transaction handle. This also allows us to allocate
2568 * the page (if needed) without using GFP_NOFS.
2569 */
2570retry_grab:
2571 page = grab_cache_page_write_begin(mapping, index, flags);
2572 if (!page)
2573 return -ENOMEM;
2574 unlock_page(page);
2575
2566 /* 2576 /*
2567 * With delayed allocation, we don't log the i_disksize update 2577 * With delayed allocation, we don't log the i_disksize update
2568 * if there is delayed block allocation. But we still need 2578 * if there is delayed block allocation. But we still need
2569 * to journalling the i_disksize update if writes to the end 2579 * to journalling the i_disksize update if writes to the end
2570 * of file which has an already mapped buffer. 2580 * of file which has an already mapped buffer.
2571 */ 2581 */
2572 handle = ext4_journal_start(inode, 1); 2582retry_journal:
2583 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
2573 if (IS_ERR(handle)) { 2584 if (IS_ERR(handle)) {
2574 ret = PTR_ERR(handle); 2585 page_cache_release(page);
2575 goto out; 2586 return PTR_ERR(handle);
2576 } 2587 }
2577 /* We cannot recurse into the filesystem as the transaction is already
2578 * started */
2579 flags |= AOP_FLAG_NOFS;
2580 2588
2581 page = grab_cache_page_write_begin(mapping, index, flags); 2589 lock_page(page);
2582 if (!page) { 2590 if (page->mapping != mapping) {
2591 /* The page got truncated from under us */
2592 unlock_page(page);
2593 page_cache_release(page);
2583 ext4_journal_stop(handle); 2594 ext4_journal_stop(handle);
2584 ret = -ENOMEM; 2595 goto retry_grab;
2585 goto out;
2586 } 2596 }
2587 *pagep = page; 2597 /* In case writeback began while the page was unlocked */
2598 wait_on_page_writeback(page);
2588 2599
2589 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2600 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2590 if (ret < 0) { 2601 if (ret < 0) {
2591 unlock_page(page); 2602 unlock_page(page);
2592 ext4_journal_stop(handle); 2603 ext4_journal_stop(handle);
2593 page_cache_release(page);
2594 /* 2604 /*
2595 * block_write_begin may have instantiated a few blocks 2605 * block_write_begin may have instantiated a few blocks
2596 * outside i_size. Trim these off again. Don't need 2606 * outside i_size. Trim these off again. Don't need
@@ -2598,11 +2608,16 @@ retry:
2598 */ 2608 */
2599 if (pos + len > inode->i_size) 2609 if (pos + len > inode->i_size)
2600 ext4_truncate_failed_write(inode); 2610 ext4_truncate_failed_write(inode);
2611
2612 if (ret == -ENOSPC &&
2613 ext4_should_retry_alloc(inode->i_sb, &retries))
2614 goto retry_journal;
2615
2616 page_cache_release(page);
2617 return ret;
2601 } 2618 }
2602 2619
2603 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2620 *pagep = page;
2604 goto retry;
2605out:
2606 return ret; 2621 return ret;
2607} 2622}
2608 2623
@@ -2858,36 +2873,10 @@ ext4_readpages(struct file *file, struct address_space *mapping,
2858 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2873 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2859} 2874}
2860 2875
2861static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
2862{
2863 struct buffer_head *head, *bh;
2864 unsigned int curr_off = 0;
2865
2866 if (!page_has_buffers(page))
2867 return;
2868 head = bh = page_buffers(page);
2869 do {
2870 if (offset <= curr_off && test_clear_buffer_uninit(bh)
2871 && bh->b_private) {
2872 ext4_free_io_end(bh->b_private);
2873 bh->b_private = NULL;
2874 bh->b_end_io = NULL;
2875 }
2876 curr_off = curr_off + bh->b_size;
2877 bh = bh->b_this_page;
2878 } while (bh != head);
2879}
2880
2881static void ext4_invalidatepage(struct page *page, unsigned long offset) 2876static void ext4_invalidatepage(struct page *page, unsigned long offset)
2882{ 2877{
2883 trace_ext4_invalidatepage(page, offset); 2878 trace_ext4_invalidatepage(page, offset);
2884 2879
2885 /*
2886 * free any io_end structure allocated for buffers to be discarded
2887 */
2888 if (ext4_should_dioread_nolock(page->mapping->host))
2889 ext4_invalidatepage_free_endio(page, offset);
2890
2891 /* No journalling happens on data buffers when this function is used */ 2880 /* No journalling happens on data buffers when this function is used */
2892 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); 2881 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
2893 2882
@@ -2977,9 +2966,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2977 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2966 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2978 ext4_free_io_end(io_end); 2967 ext4_free_io_end(io_end);
2979out: 2968out:
2969 inode_dio_done(inode);
2980 if (is_async) 2970 if (is_async)
2981 aio_complete(iocb, ret, 0); 2971 aio_complete(iocb, ret, 0);
2982 inode_dio_done(inode);
2983 return; 2972 return;
2984 } 2973 }
2985 2974
@@ -2993,65 +2982,6 @@ out:
2993 ext4_add_complete_io(io_end); 2982 ext4_add_complete_io(io_end);
2994} 2983}
2995 2984
2996static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2997{
2998 ext4_io_end_t *io_end = bh->b_private;
2999 struct inode *inode;
3000
3001 if (!test_clear_buffer_uninit(bh) || !io_end)
3002 goto out;
3003
3004 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3005 ext4_msg(io_end->inode->i_sb, KERN_INFO,
3006 "sb umounted, discard end_io request for inode %lu",
3007 io_end->inode->i_ino);
3008 ext4_free_io_end(io_end);
3009 goto out;
3010 }
3011
3012 /*
3013 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
3014 * but being more careful is always safe for the future change.
3015 */
3016 inode = io_end->inode;
3017 ext4_set_io_unwritten_flag(inode, io_end);
3018 ext4_add_complete_io(io_end);
3019out:
3020 bh->b_private = NULL;
3021 bh->b_end_io = NULL;
3022 clear_buffer_uninit(bh);
3023 end_buffer_async_write(bh, uptodate);
3024}
3025
3026static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3027{
3028 ext4_io_end_t *io_end;
3029 struct page *page = bh->b_page;
3030 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3031 size_t size = bh->b_size;
3032
3033retry:
3034 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3035 if (!io_end) {
3036 pr_warn_ratelimited("%s: allocation fail\n", __func__);
3037 schedule();
3038 goto retry;
3039 }
3040 io_end->offset = offset;
3041 io_end->size = size;
3042 /*
3043 * We need to hold a reference to the page to make sure it
3044 * doesn't get evicted before ext4_end_io_work() has a chance
3045 * to convert the extent from written to unwritten.
3046 */
3047 io_end->page = page;
3048 get_page(io_end->page);
3049
3050 bh->b_private = io_end;
3051 bh->b_end_io = ext4_end_io_buffer_write;
3052 return 0;
3053}
3054
3055/* 2985/*
3056 * For ext4 extent files, ext4 will do direct-io write to holes, 2986 * For ext4 extent files, ext4 will do direct-io write to holes,
3057 * preallocated extents, and those write extend the file, no need to 2987 * preallocated extents, and those write extend the file, no need to
@@ -3557,16 +3487,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3557 if (!S_ISREG(inode->i_mode)) 3487 if (!S_ISREG(inode->i_mode))
3558 return -EOPNOTSUPP; 3488 return -EOPNOTSUPP;
3559 3489
3560 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3490 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3561 /* TODO: Add support for non extent hole punching */ 3491 return ext4_ind_punch_hole(file, offset, length);
3562 return -EOPNOTSUPP;
3563 }
3564 3492
3565 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { 3493 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3566 /* TODO: Add support for bigalloc file systems */ 3494 /* TODO: Add support for bigalloc file systems */
3567 return -EOPNOTSUPP; 3495 return -EOPNOTSUPP;
3568 } 3496 }
3569 3497
3498 trace_ext4_punch_hole(inode, offset, length);
3499
3570 return ext4_ext_punch_hole(file, offset, length); 3500 return ext4_ext_punch_hole(file, offset, length);
3571} 3501}
3572 3502
@@ -3660,11 +3590,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
3660 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); 3590 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3661 3591
3662 bh = sb_getblk(sb, block); 3592 bh = sb_getblk(sb, block);
3663 if (!bh) { 3593 if (unlikely(!bh))
3664 EXT4_ERROR_INODE_BLOCK(inode, block, 3594 return -ENOMEM;
3665 "unable to read itable block");
3666 return -EIO;
3667 }
3668 if (!buffer_uptodate(bh)) { 3595 if (!buffer_uptodate(bh)) {
3669 lock_buffer(bh); 3596 lock_buffer(bh);
3670 3597
@@ -3696,7 +3623,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
3696 3623
3697 /* Is the inode bitmap in cache? */ 3624 /* Is the inode bitmap in cache? */
3698 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); 3625 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3699 if (!bitmap_bh) 3626 if (unlikely(!bitmap_bh))
3700 goto make_io; 3627 goto make_io;
3701 3628
3702 /* 3629 /*
@@ -4404,8 +4331,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4404 4331
4405 /* (user+group)*(old+new) structure, inode write (sb, 4332 /* (user+group)*(old+new) structure, inode write (sb,
4406 * inode block, ? - but truncate inode update has it) */ 4333 * inode block, ? - but truncate inode update has it) */
4407 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ 4334 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4408 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); 4335 (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
4336 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
4409 if (IS_ERR(handle)) { 4337 if (IS_ERR(handle)) {
4410 error = PTR_ERR(handle); 4338 error = PTR_ERR(handle);
4411 goto err_out; 4339 goto err_out;
@@ -4440,7 +4368,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4440 (attr->ia_size < inode->i_size)) { 4368 (attr->ia_size < inode->i_size)) {
4441 handle_t *handle; 4369 handle_t *handle;
4442 4370
4443 handle = ext4_journal_start(inode, 3); 4371 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4444 if (IS_ERR(handle)) { 4372 if (IS_ERR(handle)) {
4445 error = PTR_ERR(handle); 4373 error = PTR_ERR(handle);
4446 goto err_out; 4374 goto err_out;
@@ -4460,7 +4388,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4460 attr->ia_size); 4388 attr->ia_size);
4461 if (error) { 4389 if (error) {
4462 /* Do as much error cleanup as possible */ 4390 /* Do as much error cleanup as possible */
4463 handle = ext4_journal_start(inode, 3); 4391 handle = ext4_journal_start(inode,
4392 EXT4_HT_INODE, 3);
4464 if (IS_ERR(handle)) { 4393 if (IS_ERR(handle)) {
4465 ext4_orphan_del(NULL, inode); 4394 ext4_orphan_del(NULL, inode);
4466 goto err_out; 4395 goto err_out;
@@ -4801,7 +4730,7 @@ void ext4_dirty_inode(struct inode *inode, int flags)
4801{ 4730{
4802 handle_t *handle; 4731 handle_t *handle;
4803 4732
4804 handle = ext4_journal_start(inode, 2); 4733 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4805 if (IS_ERR(handle)) 4734 if (IS_ERR(handle))
4806 goto out; 4735 goto out;
4807 4736
@@ -4902,7 +4831,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4902 4831
4903 /* Finally we can mark the inode as dirty. */ 4832 /* Finally we can mark the inode as dirty. */
4904 4833
4905 handle = ext4_journal_start(inode, 1); 4834 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
4906 if (IS_ERR(handle)) 4835 if (IS_ERR(handle))
4907 return PTR_ERR(handle); 4836 return PTR_ERR(handle);
4908 4837
@@ -4980,7 +4909,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4980 else 4909 else
4981 get_block = ext4_get_block; 4910 get_block = ext4_get_block;
4982retry_alloc: 4911retry_alloc:
4983 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 4912 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
4913 ext4_writepage_trans_blocks(inode));
4984 if (IS_ERR(handle)) { 4914 if (IS_ERR(handle)) {
4985 ret = VM_FAULT_SIGBUS; 4915 ret = VM_FAULT_SIGBUS;
4986 goto out; 4916 goto out;