diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 664 |
1 files changed, 297 insertions, 367 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cd818d8bb221..88049d8d30cb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -132,10 +132,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
132 | } | 132 | } |
133 | 133 | ||
134 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 134 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
135 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
136 | struct buffer_head *bh_result, int create); | ||
137 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
138 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
139 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 135 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
140 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 136 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
141 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 137 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, |
@@ -238,7 +234,8 @@ void ext4_evict_inode(struct inode *inode) | |||
238 | * protection against it | 234 | * protection against it |
239 | */ | 235 | */ |
240 | sb_start_intwrite(inode->i_sb); | 236 | sb_start_intwrite(inode->i_sb); |
241 | handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); | 237 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, |
238 | ext4_blocks_for_truncate(inode)+3); | ||
242 | if (IS_ERR(handle)) { | 239 | if (IS_ERR(handle)) { |
243 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 240 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); |
244 | /* | 241 | /* |
@@ -346,7 +343,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
346 | spin_lock(&ei->i_block_reservation_lock); | 343 | spin_lock(&ei->i_block_reservation_lock); |
347 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); | 344 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
348 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 345 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
349 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 346 | ext4_warning(inode->i_sb, "%s: ino %lu, used %d " |
350 | "with only %d reserved data blocks", | 347 | "with only %d reserved data blocks", |
351 | __func__, inode->i_ino, used, | 348 | __func__, inode->i_ino, used, |
352 | ei->i_reserved_data_blocks); | 349 | ei->i_reserved_data_blocks); |
@@ -355,10 +352,12 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
355 | } | 352 | } |
356 | 353 | ||
357 | if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { | 354 | if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { |
358 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " | 355 | ext4_warning(inode->i_sb, "ino %lu, allocated %d " |
359 | "with only %d reserved metadata blocks\n", __func__, | 356 | "with only %d reserved metadata blocks " |
360 | inode->i_ino, ei->i_allocated_meta_blocks, | 357 | "(releasing %d blocks with reserved %d data blocks)", |
361 | ei->i_reserved_meta_blocks); | 358 | inode->i_ino, ei->i_allocated_meta_blocks, |
359 | ei->i_reserved_meta_blocks, used, | ||
360 | ei->i_reserved_data_blocks); | ||
362 | WARN_ON(1); | 361 | WARN_ON(1); |
363 | ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; | 362 | ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; |
364 | } | 363 | } |
@@ -508,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
508 | int ext4_map_blocks(handle_t *handle, struct inode *inode, | 507 | int ext4_map_blocks(handle_t *handle, struct inode *inode, |
509 | struct ext4_map_blocks *map, int flags) | 508 | struct ext4_map_blocks *map, int flags) |
510 | { | 509 | { |
510 | struct extent_status es; | ||
511 | int retval; | 511 | int retval; |
512 | 512 | ||
513 | map->m_flags = 0; | 513 | map->m_flags = 0; |
514 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," | 514 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
515 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 515 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
516 | (unsigned long) map->m_lblk); | 516 | (unsigned long) map->m_lblk); |
517 | |||
518 | /* Lookup extent status tree firstly */ | ||
519 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | ||
520 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | ||
521 | map->m_pblk = ext4_es_pblock(&es) + | ||
522 | map->m_lblk - es.es_lblk; | ||
523 | map->m_flags |= ext4_es_is_written(&es) ? | ||
524 | EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; | ||
525 | retval = es.es_len - (map->m_lblk - es.es_lblk); | ||
526 | if (retval > map->m_len) | ||
527 | retval = map->m_len; | ||
528 | map->m_len = retval; | ||
529 | } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { | ||
530 | retval = 0; | ||
531 | } else { | ||
532 | BUG_ON(1); | ||
533 | } | ||
534 | goto found; | ||
535 | } | ||
536 | |||
517 | /* | 537 | /* |
518 | * Try to see if we can get the block without requesting a new | 538 | * Try to see if we can get the block without requesting a new |
519 | * file system block. | 539 | * file system block. |
@@ -527,20 +547,27 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
527 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | 547 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
528 | EXT4_GET_BLOCKS_KEEP_SIZE); | 548 | EXT4_GET_BLOCKS_KEEP_SIZE); |
529 | } | 549 | } |
550 | if (retval > 0) { | ||
551 | int ret; | ||
552 | unsigned long long status; | ||
553 | |||
554 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | ||
555 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | ||
556 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | ||
557 | ext4_find_delalloc_range(inode, map->m_lblk, | ||
558 | map->m_lblk + map->m_len - 1)) | ||
559 | status |= EXTENT_STATUS_DELAYED; | ||
560 | ret = ext4_es_insert_extent(inode, map->m_lblk, | ||
561 | map->m_len, map->m_pblk, status); | ||
562 | if (ret < 0) | ||
563 | retval = ret; | ||
564 | } | ||
530 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 565 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
531 | up_read((&EXT4_I(inode)->i_data_sem)); | 566 | up_read((&EXT4_I(inode)->i_data_sem)); |
532 | 567 | ||
568 | found: | ||
533 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 569 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
534 | int ret; | 570 | int ret = check_block_validity(inode, map); |
535 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { | ||
536 | /* delayed alloc may be allocated by fallocate and | ||
537 | * coverted to initialized by directIO. | ||
538 | * we need to handle delayed extent here. | ||
539 | */ | ||
540 | down_write((&EXT4_I(inode)->i_data_sem)); | ||
541 | goto delayed_mapped; | ||
542 | } | ||
543 | ret = check_block_validity(inode, map); | ||
544 | if (ret != 0) | 571 | if (ret != 0) |
545 | return ret; | 572 | return ret; |
546 | } | 573 | } |
@@ -560,16 +587,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
560 | return retval; | 587 | return retval; |
561 | 588 | ||
562 | /* | 589 | /* |
563 | * When we call get_blocks without the create flag, the | 590 | * Here we clear m_flags because after allocating an new extent, |
564 | * BH_Unwritten flag could have gotten set if the blocks | 591 | * it will be set again. |
565 | * requested were part of a uninitialized extent. We need to | ||
566 | * clear this flag now that we are committed to convert all or | ||
567 | * part of the uninitialized extent to be an initialized | ||
568 | * extent. This is because we need to avoid the combination | ||
569 | * of BH_Unwritten and BH_Mapped flags being simultaneously | ||
570 | * set on the buffer_head. | ||
571 | */ | 592 | */ |
572 | map->m_flags &= ~EXT4_MAP_UNWRITTEN; | 593 | map->m_flags &= ~EXT4_MAP_FLAGS; |
573 | 594 | ||
574 | /* | 595 | /* |
575 | * New blocks allocate and/or writing to uninitialized extent | 596 | * New blocks allocate and/or writing to uninitialized extent |
@@ -615,18 +636,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
615 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 636 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
616 | ext4_da_update_reserve_space(inode, retval, 1); | 637 | ext4_da_update_reserve_space(inode, retval, 1); |
617 | } | 638 | } |
618 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { | 639 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
619 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 640 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
620 | 641 | ||
621 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 642 | if (retval > 0) { |
622 | int ret; | 643 | int ret; |
623 | delayed_mapped: | 644 | unsigned long long status; |
624 | /* delayed allocation blocks has been allocated */ | 645 | |
625 | ret = ext4_es_remove_extent(inode, map->m_lblk, | 646 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
626 | map->m_len); | 647 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
627 | if (ret < 0) | 648 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
628 | retval = ret; | 649 | ext4_find_delalloc_range(inode, map->m_lblk, |
629 | } | 650 | map->m_lblk + map->m_len - 1)) |
651 | status |= EXTENT_STATUS_DELAYED; | ||
652 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | ||
653 | map->m_pblk, status); | ||
654 | if (ret < 0) | ||
655 | retval = ret; | ||
630 | } | 656 | } |
631 | 657 | ||
632 | up_write((&EXT4_I(inode)->i_data_sem)); | 658 | up_write((&EXT4_I(inode)->i_data_sem)); |
@@ -660,7 +686,8 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
660 | if (map.m_len > DIO_MAX_BLOCKS) | 686 | if (map.m_len > DIO_MAX_BLOCKS) |
661 | map.m_len = DIO_MAX_BLOCKS; | 687 | map.m_len = DIO_MAX_BLOCKS; |
662 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); | 688 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); |
663 | handle = ext4_journal_start(inode, dio_credits); | 689 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
690 | dio_credits); | ||
664 | if (IS_ERR(handle)) { | 691 | if (IS_ERR(handle)) { |
665 | ret = PTR_ERR(handle); | 692 | ret = PTR_ERR(handle); |
666 | return ret; | 693 | return ret; |
@@ -707,14 +734,16 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
707 | /* ensure we send some value back into *errp */ | 734 | /* ensure we send some value back into *errp */ |
708 | *errp = 0; | 735 | *errp = 0; |
709 | 736 | ||
737 | if (create && err == 0) | ||
738 | err = -ENOSPC; /* should never happen */ | ||
710 | if (err < 0) | 739 | if (err < 0) |
711 | *errp = err; | 740 | *errp = err; |
712 | if (err <= 0) | 741 | if (err <= 0) |
713 | return NULL; | 742 | return NULL; |
714 | 743 | ||
715 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 744 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
716 | if (!bh) { | 745 | if (unlikely(!bh)) { |
717 | *errp = -EIO; | 746 | *errp = -ENOMEM; |
718 | return NULL; | 747 | return NULL; |
719 | } | 748 | } |
720 | if (map.m_flags & EXT4_MAP_NEW) { | 749 | if (map.m_flags & EXT4_MAP_NEW) { |
@@ -808,11 +837,10 @@ int ext4_walk_page_buffers(handle_t *handle, | |||
808 | * and the commit_write(). So doing the jbd2_journal_start at the start of | 837 | * and the commit_write(). So doing the jbd2_journal_start at the start of |
809 | * prepare_write() is the right place. | 838 | * prepare_write() is the right place. |
810 | * | 839 | * |
811 | * Also, this function can nest inside ext4_writepage() -> | 840 | * Also, this function can nest inside ext4_writepage(). In that case, we |
812 | * block_write_full_page(). In that case, we *know* that ext4_writepage() | 841 | * *know* that ext4_writepage() has generated enough buffer credits to do the |
813 | * has generated enough buffer credits to do the whole page. So we won't | 842 | * whole page. So we won't block on the journal in that case, which is good, |
814 | * block on the journal in that case, which is good, because the caller may | 843 | * because the caller may be PF_MEMALLOC. |
815 | * be PF_MEMALLOC. | ||
816 | * | 844 | * |
817 | * By accident, ext4 can be reentered when a transaction is open via | 845 | * By accident, ext4 can be reentered when a transaction is open via |
818 | * quota file writes. If we were to commit the transaction while thus | 846 | * quota file writes. If we were to commit the transaction while thus |
@@ -878,32 +906,40 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
878 | ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, | 906 | ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, |
879 | flags, pagep); | 907 | flags, pagep); |
880 | if (ret < 0) | 908 | if (ret < 0) |
881 | goto out; | 909 | return ret; |
882 | if (ret == 1) { | 910 | if (ret == 1) |
883 | ret = 0; | 911 | return 0; |
884 | goto out; | ||
885 | } | ||
886 | } | 912 | } |
887 | 913 | ||
888 | retry: | 914 | /* |
889 | handle = ext4_journal_start(inode, needed_blocks); | 915 | * grab_cache_page_write_begin() can take a long time if the |
916 | * system is thrashing due to memory pressure, or if the page | ||
917 | * is being written back. So grab it first before we start | ||
918 | * the transaction handle. This also allows us to allocate | ||
919 | * the page (if needed) without using GFP_NOFS. | ||
920 | */ | ||
921 | retry_grab: | ||
922 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
923 | if (!page) | ||
924 | return -ENOMEM; | ||
925 | unlock_page(page); | ||
926 | |||
927 | retry_journal: | ||
928 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); | ||
890 | if (IS_ERR(handle)) { | 929 | if (IS_ERR(handle)) { |
891 | ret = PTR_ERR(handle); | 930 | page_cache_release(page); |
892 | goto out; | 931 | return PTR_ERR(handle); |
893 | } | 932 | } |
894 | 933 | ||
895 | /* We cannot recurse into the filesystem as the transaction is already | 934 | lock_page(page); |
896 | * started */ | 935 | if (page->mapping != mapping) { |
897 | flags |= AOP_FLAG_NOFS; | 936 | /* The page got truncated from under us */ |
898 | 937 | unlock_page(page); | |
899 | page = grab_cache_page_write_begin(mapping, index, flags); | 938 | page_cache_release(page); |
900 | if (!page) { | ||
901 | ext4_journal_stop(handle); | 939 | ext4_journal_stop(handle); |
902 | ret = -ENOMEM; | 940 | goto retry_grab; |
903 | goto out; | ||
904 | } | 941 | } |
905 | 942 | wait_on_page_writeback(page); | |
906 | *pagep = page; | ||
907 | 943 | ||
908 | if (ext4_should_dioread_nolock(inode)) | 944 | if (ext4_should_dioread_nolock(inode)) |
909 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); | 945 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
@@ -918,7 +954,6 @@ retry: | |||
918 | 954 | ||
919 | if (ret) { | 955 | if (ret) { |
920 | unlock_page(page); | 956 | unlock_page(page); |
921 | page_cache_release(page); | ||
922 | /* | 957 | /* |
923 | * __block_write_begin may have instantiated a few blocks | 958 | * __block_write_begin may have instantiated a few blocks |
924 | * outside i_size. Trim these off again. Don't need | 959 | * outside i_size. Trim these off again. Don't need |
@@ -942,11 +977,14 @@ retry: | |||
942 | if (inode->i_nlink) | 977 | if (inode->i_nlink) |
943 | ext4_orphan_del(NULL, inode); | 978 | ext4_orphan_del(NULL, inode); |
944 | } | 979 | } |
945 | } | ||
946 | 980 | ||
947 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 981 | if (ret == -ENOSPC && |
948 | goto retry; | 982 | ext4_should_retry_alloc(inode->i_sb, &retries)) |
949 | out: | 983 | goto retry_journal; |
984 | page_cache_release(page); | ||
985 | return ret; | ||
986 | } | ||
987 | *pagep = page; | ||
950 | return ret; | 988 | return ret; |
951 | } | 989 | } |
952 | 990 | ||
@@ -1256,7 +1294,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1256 | * function is called from invalidate page, it's | 1294 | * function is called from invalidate page, it's |
1257 | * harmless to return without any action. | 1295 | * harmless to return without any action. |
1258 | */ | 1296 | */ |
1259 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " | 1297 | ext4_warning(inode->i_sb, "ext4_da_release_space: " |
1260 | "ino %lu, to_free %d with only %d reserved " | 1298 | "ino %lu, to_free %d with only %d reserved " |
1261 | "data blocks", inode->i_ino, to_free, | 1299 | "data blocks", inode->i_ino, to_free, |
1262 | ei->i_reserved_data_blocks); | 1300 | ei->i_reserved_data_blocks); |
@@ -1357,7 +1395,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1357 | loff_t size = i_size_read(inode); | 1395 | loff_t size = i_size_read(inode); |
1358 | unsigned int len, block_start; | 1396 | unsigned int len, block_start; |
1359 | struct buffer_head *bh, *page_bufs = NULL; | 1397 | struct buffer_head *bh, *page_bufs = NULL; |
1360 | int journal_data = ext4_should_journal_data(inode); | ||
1361 | sector_t pblock = 0, cur_logical = 0; | 1398 | sector_t pblock = 0, cur_logical = 0; |
1362 | struct ext4_io_submit io_submit; | 1399 | struct ext4_io_submit io_submit; |
1363 | 1400 | ||
@@ -1378,7 +1415,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1378 | if (nr_pages == 0) | 1415 | if (nr_pages == 0) |
1379 | break; | 1416 | break; |
1380 | for (i = 0; i < nr_pages; i++) { | 1417 | for (i = 0; i < nr_pages; i++) { |
1381 | int commit_write = 0, skip_page = 0; | 1418 | int skip_page = 0; |
1382 | struct page *page = pvec.pages[i]; | 1419 | struct page *page = pvec.pages[i]; |
1383 | 1420 | ||
1384 | index = page->index; | 1421 | index = page->index; |
@@ -1400,27 +1437,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1400 | BUG_ON(!PageLocked(page)); | 1437 | BUG_ON(!PageLocked(page)); |
1401 | BUG_ON(PageWriteback(page)); | 1438 | BUG_ON(PageWriteback(page)); |
1402 | 1439 | ||
1403 | /* | ||
1404 | * If the page does not have buffers (for | ||
1405 | * whatever reason), try to create them using | ||
1406 | * __block_write_begin. If this fails, | ||
1407 | * skip the page and move on. | ||
1408 | */ | ||
1409 | if (!page_has_buffers(page)) { | ||
1410 | if (__block_write_begin(page, 0, len, | ||
1411 | noalloc_get_block_write)) { | ||
1412 | skip_page: | ||
1413 | unlock_page(page); | ||
1414 | continue; | ||
1415 | } | ||
1416 | commit_write = 1; | ||
1417 | } | ||
1418 | |||
1419 | bh = page_bufs = page_buffers(page); | 1440 | bh = page_bufs = page_buffers(page); |
1420 | block_start = 0; | 1441 | block_start = 0; |
1421 | do { | 1442 | do { |
1422 | if (!bh) | ||
1423 | goto skip_page; | ||
1424 | if (map && (cur_logical >= map->m_lblk) && | 1443 | if (map && (cur_logical >= map->m_lblk) && |
1425 | (cur_logical <= (map->m_lblk + | 1444 | (cur_logical <= (map->m_lblk + |
1426 | (map->m_len - 1)))) { | 1445 | (map->m_len - 1)))) { |
@@ -1448,33 +1467,14 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1448 | pblock++; | 1467 | pblock++; |
1449 | } while (bh != page_bufs); | 1468 | } while (bh != page_bufs); |
1450 | 1469 | ||
1451 | if (skip_page) | 1470 | if (skip_page) { |
1452 | goto skip_page; | 1471 | unlock_page(page); |
1453 | 1472 | continue; | |
1454 | if (commit_write) | 1473 | } |
1455 | /* mark the buffer_heads as dirty & uptodate */ | ||
1456 | block_commit_write(page, 0, len); | ||
1457 | 1474 | ||
1458 | clear_page_dirty_for_io(page); | 1475 | clear_page_dirty_for_io(page); |
1459 | /* | 1476 | err = ext4_bio_write_page(&io_submit, page, len, |
1460 | * Delalloc doesn't support data journalling, | 1477 | mpd->wbc); |
1461 | * but eventually maybe we'll lift this | ||
1462 | * restriction. | ||
1463 | */ | ||
1464 | if (unlikely(journal_data && PageChecked(page))) | ||
1465 | err = __ext4_journalled_writepage(page, len); | ||
1466 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | ||
1467 | err = ext4_bio_write_page(&io_submit, page, | ||
1468 | len, mpd->wbc); | ||
1469 | else if (buffer_uninit(page_bufs)) { | ||
1470 | ext4_set_bh_endio(page_bufs, inode); | ||
1471 | err = block_write_full_page_endio(page, | ||
1472 | noalloc_get_block_write, | ||
1473 | mpd->wbc, ext4_end_io_buffer_write); | ||
1474 | } else | ||
1475 | err = block_write_full_page(page, | ||
1476 | noalloc_get_block_write, mpd->wbc); | ||
1477 | |||
1478 | if (!err) | 1478 | if (!err) |
1479 | mpd->pages_written++; | 1479 | mpd->pages_written++; |
1480 | /* | 1480 | /* |
@@ -1640,7 +1640,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1640 | (unsigned long long) next, | 1640 | (unsigned long long) next, |
1641 | mpd->b_size >> mpd->inode->i_blkbits, err); | 1641 | mpd->b_size >> mpd->inode->i_blkbits, err); |
1642 | ext4_msg(sb, KERN_CRIT, | 1642 | ext4_msg(sb, KERN_CRIT, |
1643 | "This should not happen!! Data will be lost\n"); | 1643 | "This should not happen!! Data will be lost"); |
1644 | if (err == -ENOSPC) | 1644 | if (err == -ENOSPC) |
1645 | ext4_print_free_blocks(mpd->inode); | 1645 | ext4_print_free_blocks(mpd->inode); |
1646 | } | 1646 | } |
@@ -1690,16 +1690,16 @@ submit_io: | |||
1690 | * | 1690 | * |
1691 | * @mpd->lbh - extent of blocks | 1691 | * @mpd->lbh - extent of blocks |
1692 | * @logical - logical number of the block in the file | 1692 | * @logical - logical number of the block in the file |
1693 | * @bh - bh of the block (used to access block's state) | 1693 | * @b_state - b_state of the buffer head added |
1694 | * | 1694 | * |
1695 | * the function is used to collect contig. blocks in same state | 1695 | * the function is used to collect contig. blocks in same state |
1696 | */ | 1696 | */ |
1697 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1697 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, |
1698 | sector_t logical, size_t b_size, | ||
1699 | unsigned long b_state) | 1698 | unsigned long b_state) |
1700 | { | 1699 | { |
1701 | sector_t next; | 1700 | sector_t next; |
1702 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; | 1701 | int blkbits = mpd->inode->i_blkbits; |
1702 | int nrblocks = mpd->b_size >> blkbits; | ||
1703 | 1703 | ||
1704 | /* | 1704 | /* |
1705 | * XXX Don't go larger than mballoc is willing to allocate | 1705 | * XXX Don't go larger than mballoc is willing to allocate |
@@ -1707,11 +1707,11 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
1707 | * mpage_da_submit_io() into this function and then call | 1707 | * mpage_da_submit_io() into this function and then call |
1708 | * ext4_map_blocks() multiple times in a loop | 1708 | * ext4_map_blocks() multiple times in a loop |
1709 | */ | 1709 | */ |
1710 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | 1710 | if (nrblocks >= (8*1024*1024 >> blkbits)) |
1711 | goto flush_it; | 1711 | goto flush_it; |
1712 | 1712 | ||
1713 | /* check if thereserved journal credits might overflow */ | 1713 | /* check if the reserved journal credits might overflow */ |
1714 | if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { | 1714 | if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) { |
1715 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | 1715 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { |
1716 | /* | 1716 | /* |
1717 | * With non-extent format we are limited by the journal | 1717 | * With non-extent format we are limited by the journal |
@@ -1720,16 +1720,6 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
1720 | * nrblocks. So limit nrblocks. | 1720 | * nrblocks. So limit nrblocks. |
1721 | */ | 1721 | */ |
1722 | goto flush_it; | 1722 | goto flush_it; |
1723 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
1724 | EXT4_MAX_TRANS_DATA) { | ||
1725 | /* | ||
1726 | * Adding the new buffer_head would make it cross the | ||
1727 | * allowed limit for which we have journal credit | ||
1728 | * reserved. So limit the new bh->b_size | ||
1729 | */ | ||
1730 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
1731 | mpd->inode->i_blkbits; | ||
1732 | /* we will do mpage_da_submit_io in the next loop */ | ||
1733 | } | 1723 | } |
1734 | } | 1724 | } |
1735 | /* | 1725 | /* |
@@ -1737,7 +1727,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
1737 | */ | 1727 | */ |
1738 | if (mpd->b_size == 0) { | 1728 | if (mpd->b_size == 0) { |
1739 | mpd->b_blocknr = logical; | 1729 | mpd->b_blocknr = logical; |
1740 | mpd->b_size = b_size; | 1730 | mpd->b_size = 1 << blkbits; |
1741 | mpd->b_state = b_state & BH_FLAGS; | 1731 | mpd->b_state = b_state & BH_FLAGS; |
1742 | return; | 1732 | return; |
1743 | } | 1733 | } |
@@ -1747,7 +1737,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
1747 | * Can we merge the block to our big extent? | 1737 | * Can we merge the block to our big extent? |
1748 | */ | 1738 | */ |
1749 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | 1739 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { |
1750 | mpd->b_size += b_size; | 1740 | mpd->b_size += 1 << blkbits; |
1751 | return; | 1741 | return; |
1752 | } | 1742 | } |
1753 | 1743 | ||
@@ -1775,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1775 | struct ext4_map_blocks *map, | 1765 | struct ext4_map_blocks *map, |
1776 | struct buffer_head *bh) | 1766 | struct buffer_head *bh) |
1777 | { | 1767 | { |
1768 | struct extent_status es; | ||
1778 | int retval; | 1769 | int retval; |
1779 | sector_t invalid_block = ~((sector_t) 0xffff); | 1770 | sector_t invalid_block = ~((sector_t) 0xffff); |
1780 | 1771 | ||
@@ -1785,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1785 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," | 1776 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," |
1786 | "logical block %lu\n", inode->i_ino, map->m_len, | 1777 | "logical block %lu\n", inode->i_ino, map->m_len, |
1787 | (unsigned long) map->m_lblk); | 1778 | (unsigned long) map->m_lblk); |
1779 | |||
1780 | /* Lookup extent status tree firstly */ | ||
1781 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | ||
1782 | |||
1783 | if (ext4_es_is_hole(&es)) { | ||
1784 | retval = 0; | ||
1785 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
1786 | goto add_delayed; | ||
1787 | } | ||
1788 | |||
1789 | /* | ||
1790 | * Delayed extent could be allocated by fallocate. | ||
1791 | * So we need to check it. | ||
1792 | */ | ||
1793 | if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { | ||
1794 | map_bh(bh, inode->i_sb, invalid_block); | ||
1795 | set_buffer_new(bh); | ||
1796 | set_buffer_delay(bh); | ||
1797 | return 0; | ||
1798 | } | ||
1799 | |||
1800 | map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; | ||
1801 | retval = es.es_len - (iblock - es.es_lblk); | ||
1802 | if (retval > map->m_len) | ||
1803 | retval = map->m_len; | ||
1804 | map->m_len = retval; | ||
1805 | if (ext4_es_is_written(&es)) | ||
1806 | map->m_flags |= EXT4_MAP_MAPPED; | ||
1807 | else if (ext4_es_is_unwritten(&es)) | ||
1808 | map->m_flags |= EXT4_MAP_UNWRITTEN; | ||
1809 | else | ||
1810 | BUG_ON(1); | ||
1811 | |||
1812 | return retval; | ||
1813 | } | ||
1814 | |||
1788 | /* | 1815 | /* |
1789 | * Try to see if we can get the block without requesting a new | 1816 | * Try to see if we can get the block without requesting a new |
1790 | * file system block. | 1817 | * file system block. |
@@ -1803,11 +1830,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1803 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | 1830 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; |
1804 | retval = 0; | 1831 | retval = 0; |
1805 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1832 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1806 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | 1833 | retval = ext4_ext_map_blocks(NULL, inode, map, |
1834 | EXT4_GET_BLOCKS_NO_PUT_HOLE); | ||
1807 | else | 1835 | else |
1808 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | 1836 | retval = ext4_ind_map_blocks(NULL, inode, map, |
1837 | EXT4_GET_BLOCKS_NO_PUT_HOLE); | ||
1809 | 1838 | ||
1839 | add_delayed: | ||
1810 | if (retval == 0) { | 1840 | if (retval == 0) { |
1841 | int ret; | ||
1811 | /* | 1842 | /* |
1812 | * XXX: __block_prepare_write() unmaps passed block, | 1843 | * XXX: __block_prepare_write() unmaps passed block, |
1813 | * is it OK? | 1844 | * is it OK? |
@@ -1815,15 +1846,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1815 | /* If the block was allocated from previously allocated cluster, | 1846 | /* If the block was allocated from previously allocated cluster, |
1816 | * then we dont need to reserve it again. */ | 1847 | * then we dont need to reserve it again. */ |
1817 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | 1848 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { |
1818 | retval = ext4_da_reserve_space(inode, iblock); | 1849 | ret = ext4_da_reserve_space(inode, iblock); |
1819 | if (retval) | 1850 | if (ret) { |
1820 | /* not enough space to reserve */ | 1851 | /* not enough space to reserve */ |
1852 | retval = ret; | ||
1821 | goto out_unlock; | 1853 | goto out_unlock; |
1854 | } | ||
1822 | } | 1855 | } |
1823 | 1856 | ||
1824 | retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); | 1857 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
1825 | if (retval) | 1858 | ~0, EXTENT_STATUS_DELAYED); |
1859 | if (ret) { | ||
1860 | retval = ret; | ||
1826 | goto out_unlock; | 1861 | goto out_unlock; |
1862 | } | ||
1827 | 1863 | ||
1828 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | 1864 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served |
1829 | * and it should not appear on the bh->b_state. | 1865 | * and it should not appear on the bh->b_state. |
@@ -1833,6 +1869,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1833 | map_bh(bh, inode->i_sb, invalid_block); | 1869 | map_bh(bh, inode->i_sb, invalid_block); |
1834 | set_buffer_new(bh); | 1870 | set_buffer_new(bh); |
1835 | set_buffer_delay(bh); | 1871 | set_buffer_delay(bh); |
1872 | } else if (retval > 0) { | ||
1873 | int ret; | ||
1874 | unsigned long long status; | ||
1875 | |||
1876 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | ||
1877 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | ||
1878 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | ||
1879 | map->m_pblk, status); | ||
1880 | if (ret != 0) | ||
1881 | retval = ret; | ||
1836 | } | 1882 | } |
1837 | 1883 | ||
1838 | out_unlock: | 1884 | out_unlock: |
@@ -1890,27 +1936,6 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1890 | return 0; | 1936 | return 0; |
1891 | } | 1937 | } |
1892 | 1938 | ||
1893 | /* | ||
1894 | * This function is used as a standard get_block_t calback function | ||
1895 | * when there is no desire to allocate any blocks. It is used as a | ||
1896 | * callback function for block_write_begin() and block_write_full_page(). | ||
1897 | * These functions should only try to map a single block at a time. | ||
1898 | * | ||
1899 | * Since this function doesn't do block allocations even if the caller | ||
1900 | * requests it by passing in create=1, it is critically important that | ||
1901 | * any caller checks to make sure that any buffer heads are returned | ||
1902 | * by this function are either all already mapped or marked for | ||
1903 | * delayed allocation before calling block_write_full_page(). Otherwise, | ||
1904 | * b_blocknr could be left unitialized, and the page write functions will | ||
1905 | * be taken by surprise. | ||
1906 | */ | ||
1907 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
1908 | struct buffer_head *bh_result, int create) | ||
1909 | { | ||
1910 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
1911 | return _ext4_get_block(inode, iblock, bh_result, 0); | ||
1912 | } | ||
1913 | |||
1914 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 1939 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
1915 | { | 1940 | { |
1916 | get_bh(bh); | 1941 | get_bh(bh); |
@@ -1955,7 +1980,8 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1955 | * references to buffers so we are safe */ | 1980 | * references to buffers so we are safe */ |
1956 | unlock_page(page); | 1981 | unlock_page(page); |
1957 | 1982 | ||
1958 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 1983 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
1984 | ext4_writepage_trans_blocks(inode)); | ||
1959 | if (IS_ERR(handle)) { | 1985 | if (IS_ERR(handle)) { |
1960 | ret = PTR_ERR(handle); | 1986 | ret = PTR_ERR(handle); |
1961 | goto out; | 1987 | goto out; |
@@ -2035,11 +2061,12 @@ out: | |||
2035 | static int ext4_writepage(struct page *page, | 2061 | static int ext4_writepage(struct page *page, |
2036 | struct writeback_control *wbc) | 2062 | struct writeback_control *wbc) |
2037 | { | 2063 | { |
2038 | int ret = 0, commit_write = 0; | 2064 | int ret = 0; |
2039 | loff_t size; | 2065 | loff_t size; |
2040 | unsigned int len; | 2066 | unsigned int len; |
2041 | struct buffer_head *page_bufs = NULL; | 2067 | struct buffer_head *page_bufs = NULL; |
2042 | struct inode *inode = page->mapping->host; | 2068 | struct inode *inode = page->mapping->host; |
2069 | struct ext4_io_submit io_submit; | ||
2043 | 2070 | ||
2044 | trace_ext4_writepage(page); | 2071 | trace_ext4_writepage(page); |
2045 | size = i_size_read(inode); | 2072 | size = i_size_read(inode); |
@@ -2048,39 +2075,29 @@ static int ext4_writepage(struct page *page, | |||
2048 | else | 2075 | else |
2049 | len = PAGE_CACHE_SIZE; | 2076 | len = PAGE_CACHE_SIZE; |
2050 | 2077 | ||
2078 | page_bufs = page_buffers(page); | ||
2051 | /* | 2079 | /* |
2052 | * If the page does not have buffers (for whatever reason), | 2080 | * We cannot do block allocation or other extent handling in this |
2053 | * try to create them using __block_write_begin. If this | 2081 | * function. If there are buffers needing that, we have to redirty |
2054 | * fails, redirty the page and move on. | 2082 | * the page. But we may reach here when we do a journal commit via |
2083 | * journal_submit_inode_data_buffers() and in that case we must write | ||
2084 | * allocated buffers to achieve data=ordered mode guarantees. | ||
2055 | */ | 2085 | */ |
2056 | if (!page_has_buffers(page)) { | 2086 | if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2057 | if (__block_write_begin(page, 0, len, | 2087 | ext4_bh_delay_or_unwritten)) { |
2058 | noalloc_get_block_write)) { | 2088 | redirty_page_for_writepage(wbc, page); |
2059 | redirty_page: | 2089 | if (current->flags & PF_MEMALLOC) { |
2060 | redirty_page_for_writepage(wbc, page); | 2090 | /* |
2091 | * For memory cleaning there's no point in writing only | ||
2092 | * some buffers. So just bail out. Warn if we came here | ||
2093 | * from direct reclaim. | ||
2094 | */ | ||
2095 | WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) | ||
2096 | == PF_MEMALLOC); | ||
2061 | unlock_page(page); | 2097 | unlock_page(page); |
2062 | return 0; | 2098 | return 0; |
2063 | } | 2099 | } |
2064 | commit_write = 1; | ||
2065 | } | 2100 | } |
2066 | page_bufs = page_buffers(page); | ||
2067 | if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2068 | ext4_bh_delay_or_unwritten)) { | ||
2069 | /* | ||
2070 | * We don't want to do block allocation, so redirty | ||
2071 | * the page and return. We may reach here when we do | ||
2072 | * a journal commit via journal_submit_inode_data_buffers. | ||
2073 | * We can also reach here via shrink_page_list but it | ||
2074 | * should never be for direct reclaim so warn if that | ||
2075 | * happens | ||
2076 | */ | ||
2077 | WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == | ||
2078 | PF_MEMALLOC); | ||
2079 | goto redirty_page; | ||
2080 | } | ||
2081 | if (commit_write) | ||
2082 | /* now mark the buffer_heads as dirty and uptodate */ | ||
2083 | block_commit_write(page, 0, len); | ||
2084 | 2101 | ||
2085 | if (PageChecked(page) && ext4_should_journal_data(inode)) | 2102 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2086 | /* | 2103 | /* |
@@ -2089,14 +2106,9 @@ static int ext4_writepage(struct page *page, | |||
2089 | */ | 2106 | */ |
2090 | return __ext4_journalled_writepage(page, len); | 2107 | return __ext4_journalled_writepage(page, len); |
2091 | 2108 | ||
2092 | if (buffer_uninit(page_bufs)) { | 2109 | memset(&io_submit, 0, sizeof(io_submit)); |
2093 | ext4_set_bh_endio(page_bufs, inode); | 2110 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
2094 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2111 | ext4_io_submit(&io_submit); |
2095 | wbc, ext4_end_io_buffer_write); | ||
2096 | } else | ||
2097 | ret = block_write_full_page(page, noalloc_get_block_write, | ||
2098 | wbc); | ||
2099 | |||
2100 | return ret; | 2112 | return ret; |
2101 | } | 2113 | } |
2102 | 2114 | ||
@@ -2228,51 +2240,38 @@ static int write_cache_pages_da(handle_t *handle, | |||
2228 | logical = (sector_t) page->index << | 2240 | logical = (sector_t) page->index << |
2229 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2241 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2230 | 2242 | ||
2231 | if (!page_has_buffers(page)) { | 2243 | /* Add all dirty buffers to mpd */ |
2232 | mpage_add_bh_to_extent(mpd, logical, | 2244 | head = page_buffers(page); |
2233 | PAGE_CACHE_SIZE, | 2245 | bh = head; |
2234 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | 2246 | do { |
2235 | if (mpd->io_done) | 2247 | BUG_ON(buffer_locked(bh)); |
2236 | goto ret_extent_tail; | ||
2237 | } else { | ||
2238 | /* | 2248 | /* |
2239 | * Page with regular buffer heads, | 2249 | * We need to try to allocate unmapped blocks |
2240 | * just add all dirty ones | 2250 | * in the same page. Otherwise we won't make |
2251 | * progress with the page in ext4_writepage | ||
2241 | */ | 2252 | */ |
2242 | head = page_buffers(page); | 2253 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { |
2243 | bh = head; | 2254 | mpage_add_bh_to_extent(mpd, logical, |
2244 | do { | 2255 | bh->b_state); |
2245 | BUG_ON(buffer_locked(bh)); | 2256 | if (mpd->io_done) |
2257 | goto ret_extent_tail; | ||
2258 | } else if (buffer_dirty(bh) && | ||
2259 | buffer_mapped(bh)) { | ||
2246 | /* | 2260 | /* |
2247 | * We need to try to allocate | 2261 | * mapped dirty buffer. We need to |
2248 | * unmapped blocks in the same page. | 2262 | * update the b_state because we look |
2249 | * Otherwise we won't make progress | 2263 | * at b_state in mpage_da_map_blocks. |
2250 | * with the page in ext4_writepage | 2264 | * We don't update b_size because if we |
2265 | * find an unmapped buffer_head later | ||
2266 | * we need to use the b_state flag of | ||
2267 | * that buffer_head. | ||
2251 | */ | 2268 | */ |
2252 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | 2269 | if (mpd->b_size == 0) |
2253 | mpage_add_bh_to_extent(mpd, logical, | 2270 | mpd->b_state = |
2254 | bh->b_size, | 2271 | bh->b_state & BH_FLAGS; |
2255 | bh->b_state); | 2272 | } |
2256 | if (mpd->io_done) | 2273 | logical++; |
2257 | goto ret_extent_tail; | 2274 | } while ((bh = bh->b_this_page) != head); |
2258 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2259 | /* | ||
2260 | * mapped dirty buffer. We need | ||
2261 | * to update the b_state | ||
2262 | * because we look at b_state | ||
2263 | * in mpage_da_map_blocks. We | ||
2264 | * don't update b_size because | ||
2265 | * if we find an unmapped | ||
2266 | * buffer_head later we need to | ||
2267 | * use the b_state flag of that | ||
2268 | * buffer_head. | ||
2269 | */ | ||
2270 | if (mpd->b_size == 0) | ||
2271 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2272 | } | ||
2273 | logical++; | ||
2274 | } while ((bh = bh->b_this_page) != head); | ||
2275 | } | ||
2276 | 2275 | ||
2277 | if (nr_to_write > 0) { | 2276 | if (nr_to_write > 0) { |
2278 | nr_to_write--; | 2277 | nr_to_write--; |
@@ -2413,7 +2412,8 @@ retry: | |||
2413 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2412 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2414 | 2413 | ||
2415 | /* start a new transaction*/ | 2414 | /* start a new transaction*/ |
2416 | handle = ext4_journal_start(inode, needed_blocks); | 2415 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
2416 | needed_blocks); | ||
2417 | if (IS_ERR(handle)) { | 2417 | if (IS_ERR(handle)) { |
2418 | ret = PTR_ERR(handle); | 2418 | ret = PTR_ERR(handle); |
2419 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2419 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
@@ -2555,42 +2555,52 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2555 | pos, len, flags, | 2555 | pos, len, flags, |
2556 | pagep, fsdata); | 2556 | pagep, fsdata); |
2557 | if (ret < 0) | 2557 | if (ret < 0) |
2558 | goto out; | 2558 | return ret; |
2559 | if (ret == 1) { | 2559 | if (ret == 1) |
2560 | ret = 0; | 2560 | return 0; |
2561 | goto out; | ||
2562 | } | ||
2563 | } | 2561 | } |
2564 | 2562 | ||
2565 | retry: | 2563 | /* |
2564 | * grab_cache_page_write_begin() can take a long time if the | ||
2565 | * system is thrashing due to memory pressure, or if the page | ||
2566 | * is being written back. So grab it first before we start | ||
2567 | * the transaction handle. This also allows us to allocate | ||
2568 | * the page (if needed) without using GFP_NOFS. | ||
2569 | */ | ||
2570 | retry_grab: | ||
2571 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
2572 | if (!page) | ||
2573 | return -ENOMEM; | ||
2574 | unlock_page(page); | ||
2575 | |||
2566 | /* | 2576 | /* |
2567 | * With delayed allocation, we don't log the i_disksize update | 2577 | * With delayed allocation, we don't log the i_disksize update |
2568 | * if there is delayed block allocation. But we still need | 2578 | * if there is delayed block allocation. But we still need |
2569 | * to journalling the i_disksize update if writes to the end | 2579 | * to journalling the i_disksize update if writes to the end |
2570 | * of file which has an already mapped buffer. | 2580 | * of file which has an already mapped buffer. |
2571 | */ | 2581 | */ |
2572 | handle = ext4_journal_start(inode, 1); | 2582 | retry_journal: |
2583 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); | ||
2573 | if (IS_ERR(handle)) { | 2584 | if (IS_ERR(handle)) { |
2574 | ret = PTR_ERR(handle); | 2585 | page_cache_release(page); |
2575 | goto out; | 2586 | return PTR_ERR(handle); |
2576 | } | 2587 | } |
2577 | /* We cannot recurse into the filesystem as the transaction is already | ||
2578 | * started */ | ||
2579 | flags |= AOP_FLAG_NOFS; | ||
2580 | 2588 | ||
2581 | page = grab_cache_page_write_begin(mapping, index, flags); | 2589 | lock_page(page); |
2582 | if (!page) { | 2590 | if (page->mapping != mapping) { |
2591 | /* The page got truncated from under us */ | ||
2592 | unlock_page(page); | ||
2593 | page_cache_release(page); | ||
2583 | ext4_journal_stop(handle); | 2594 | ext4_journal_stop(handle); |
2584 | ret = -ENOMEM; | 2595 | goto retry_grab; |
2585 | goto out; | ||
2586 | } | 2596 | } |
2587 | *pagep = page; | 2597 | /* In case writeback began while the page was unlocked */ |
2598 | wait_on_page_writeback(page); | ||
2588 | 2599 | ||
2589 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); | 2600 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
2590 | if (ret < 0) { | 2601 | if (ret < 0) { |
2591 | unlock_page(page); | 2602 | unlock_page(page); |
2592 | ext4_journal_stop(handle); | 2603 | ext4_journal_stop(handle); |
2593 | page_cache_release(page); | ||
2594 | /* | 2604 | /* |
2595 | * block_write_begin may have instantiated a few blocks | 2605 | * block_write_begin may have instantiated a few blocks |
2596 | * outside i_size. Trim these off again. Don't need | 2606 | * outside i_size. Trim these off again. Don't need |
@@ -2598,11 +2608,16 @@ retry: | |||
2598 | */ | 2608 | */ |
2599 | if (pos + len > inode->i_size) | 2609 | if (pos + len > inode->i_size) |
2600 | ext4_truncate_failed_write(inode); | 2610 | ext4_truncate_failed_write(inode); |
2611 | |||
2612 | if (ret == -ENOSPC && | ||
2613 | ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
2614 | goto retry_journal; | ||
2615 | |||
2616 | page_cache_release(page); | ||
2617 | return ret; | ||
2601 | } | 2618 | } |
2602 | 2619 | ||
2603 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2620 | *pagep = page; |
2604 | goto retry; | ||
2605 | out: | ||
2606 | return ret; | 2621 | return ret; |
2607 | } | 2622 | } |
2608 | 2623 | ||
@@ -2858,36 +2873,10 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
2858 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2873 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
2859 | } | 2874 | } |
2860 | 2875 | ||
2861 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | ||
2862 | { | ||
2863 | struct buffer_head *head, *bh; | ||
2864 | unsigned int curr_off = 0; | ||
2865 | |||
2866 | if (!page_has_buffers(page)) | ||
2867 | return; | ||
2868 | head = bh = page_buffers(page); | ||
2869 | do { | ||
2870 | if (offset <= curr_off && test_clear_buffer_uninit(bh) | ||
2871 | && bh->b_private) { | ||
2872 | ext4_free_io_end(bh->b_private); | ||
2873 | bh->b_private = NULL; | ||
2874 | bh->b_end_io = NULL; | ||
2875 | } | ||
2876 | curr_off = curr_off + bh->b_size; | ||
2877 | bh = bh->b_this_page; | ||
2878 | } while (bh != head); | ||
2879 | } | ||
2880 | |||
2881 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 2876 | static void ext4_invalidatepage(struct page *page, unsigned long offset) |
2882 | { | 2877 | { |
2883 | trace_ext4_invalidatepage(page, offset); | 2878 | trace_ext4_invalidatepage(page, offset); |
2884 | 2879 | ||
2885 | /* | ||
2886 | * free any io_end structure allocated for buffers to be discarded | ||
2887 | */ | ||
2888 | if (ext4_should_dioread_nolock(page->mapping->host)) | ||
2889 | ext4_invalidatepage_free_endio(page, offset); | ||
2890 | |||
2891 | /* No journalling happens on data buffers when this function is used */ | 2880 | /* No journalling happens on data buffers when this function is used */ |
2892 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | 2881 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); |
2893 | 2882 | ||
@@ -2977,9 +2966,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2977 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 2966 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
2978 | ext4_free_io_end(io_end); | 2967 | ext4_free_io_end(io_end); |
2979 | out: | 2968 | out: |
2969 | inode_dio_done(inode); | ||
2980 | if (is_async) | 2970 | if (is_async) |
2981 | aio_complete(iocb, ret, 0); | 2971 | aio_complete(iocb, ret, 0); |
2982 | inode_dio_done(inode); | ||
2983 | return; | 2972 | return; |
2984 | } | 2973 | } |
2985 | 2974 | ||
@@ -2993,65 +2982,6 @@ out: | |||
2993 | ext4_add_complete_io(io_end); | 2982 | ext4_add_complete_io(io_end); |
2994 | } | 2983 | } |
2995 | 2984 | ||
2996 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | ||
2997 | { | ||
2998 | ext4_io_end_t *io_end = bh->b_private; | ||
2999 | struct inode *inode; | ||
3000 | |||
3001 | if (!test_clear_buffer_uninit(bh) || !io_end) | ||
3002 | goto out; | ||
3003 | |||
3004 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | ||
3005 | ext4_msg(io_end->inode->i_sb, KERN_INFO, | ||
3006 | "sb umounted, discard end_io request for inode %lu", | ||
3007 | io_end->inode->i_ino); | ||
3008 | ext4_free_io_end(io_end); | ||
3009 | goto out; | ||
3010 | } | ||
3011 | |||
3012 | /* | ||
3013 | * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, | ||
3014 | * but being more careful is always safe for the future change. | ||
3015 | */ | ||
3016 | inode = io_end->inode; | ||
3017 | ext4_set_io_unwritten_flag(inode, io_end); | ||
3018 | ext4_add_complete_io(io_end); | ||
3019 | out: | ||
3020 | bh->b_private = NULL; | ||
3021 | bh->b_end_io = NULL; | ||
3022 | clear_buffer_uninit(bh); | ||
3023 | end_buffer_async_write(bh, uptodate); | ||
3024 | } | ||
3025 | |||
3026 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | ||
3027 | { | ||
3028 | ext4_io_end_t *io_end; | ||
3029 | struct page *page = bh->b_page; | ||
3030 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; | ||
3031 | size_t size = bh->b_size; | ||
3032 | |||
3033 | retry: | ||
3034 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | ||
3035 | if (!io_end) { | ||
3036 | pr_warn_ratelimited("%s: allocation fail\n", __func__); | ||
3037 | schedule(); | ||
3038 | goto retry; | ||
3039 | } | ||
3040 | io_end->offset = offset; | ||
3041 | io_end->size = size; | ||
3042 | /* | ||
3043 | * We need to hold a reference to the page to make sure it | ||
3044 | * doesn't get evicted before ext4_end_io_work() has a chance | ||
3045 | * to convert the extent from written to unwritten. | ||
3046 | */ | ||
3047 | io_end->page = page; | ||
3048 | get_page(io_end->page); | ||
3049 | |||
3050 | bh->b_private = io_end; | ||
3051 | bh->b_end_io = ext4_end_io_buffer_write; | ||
3052 | return 0; | ||
3053 | } | ||
3054 | |||
3055 | /* | 2985 | /* |
3056 | * For ext4 extent files, ext4 will do direct-io write to holes, | 2986 | * For ext4 extent files, ext4 will do direct-io write to holes, |
3057 | * preallocated extents, and those write extend the file, no need to | 2987 | * preallocated extents, and those write extend the file, no need to |
@@ -3557,16 +3487,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3557 | if (!S_ISREG(inode->i_mode)) | 3487 | if (!S_ISREG(inode->i_mode)) |
3558 | return -EOPNOTSUPP; | 3488 | return -EOPNOTSUPP; |
3559 | 3489 | ||
3560 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 3490 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3561 | /* TODO: Add support for non extent hole punching */ | 3491 | return ext4_ind_punch_hole(file, offset, length); |
3562 | return -EOPNOTSUPP; | ||
3563 | } | ||
3564 | 3492 | ||
3565 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | 3493 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { |
3566 | /* TODO: Add support for bigalloc file systems */ | 3494 | /* TODO: Add support for bigalloc file systems */ |
3567 | return -EOPNOTSUPP; | 3495 | return -EOPNOTSUPP; |
3568 | } | 3496 | } |
3569 | 3497 | ||
3498 | trace_ext4_punch_hole(inode, offset, length); | ||
3499 | |||
3570 | return ext4_ext_punch_hole(file, offset, length); | 3500 | return ext4_ext_punch_hole(file, offset, length); |
3571 | } | 3501 | } |
3572 | 3502 | ||
@@ -3660,11 +3590,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3660 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | 3590 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); |
3661 | 3591 | ||
3662 | bh = sb_getblk(sb, block); | 3592 | bh = sb_getblk(sb, block); |
3663 | if (!bh) { | 3593 | if (unlikely(!bh)) |
3664 | EXT4_ERROR_INODE_BLOCK(inode, block, | 3594 | return -ENOMEM; |
3665 | "unable to read itable block"); | ||
3666 | return -EIO; | ||
3667 | } | ||
3668 | if (!buffer_uptodate(bh)) { | 3595 | if (!buffer_uptodate(bh)) { |
3669 | lock_buffer(bh); | 3596 | lock_buffer(bh); |
3670 | 3597 | ||
@@ -3696,7 +3623,7 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3696 | 3623 | ||
3697 | /* Is the inode bitmap in cache? */ | 3624 | /* Is the inode bitmap in cache? */ |
3698 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); | 3625 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3699 | if (!bitmap_bh) | 3626 | if (unlikely(!bitmap_bh)) |
3700 | goto make_io; | 3627 | goto make_io; |
3701 | 3628 | ||
3702 | /* | 3629 | /* |
@@ -4404,8 +4331,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4404 | 4331 | ||
4405 | /* (user+group)*(old+new) structure, inode write (sb, | 4332 | /* (user+group)*(old+new) structure, inode write (sb, |
4406 | * inode block, ? - but truncate inode update has it) */ | 4333 | * inode block, ? - but truncate inode update has it) */ |
4407 | handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ | 4334 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, |
4408 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); | 4335 | (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + |
4336 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3); | ||
4409 | if (IS_ERR(handle)) { | 4337 | if (IS_ERR(handle)) { |
4410 | error = PTR_ERR(handle); | 4338 | error = PTR_ERR(handle); |
4411 | goto err_out; | 4339 | goto err_out; |
@@ -4440,7 +4368,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4440 | (attr->ia_size < inode->i_size)) { | 4368 | (attr->ia_size < inode->i_size)) { |
4441 | handle_t *handle; | 4369 | handle_t *handle; |
4442 | 4370 | ||
4443 | handle = ext4_journal_start(inode, 3); | 4371 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); |
4444 | if (IS_ERR(handle)) { | 4372 | if (IS_ERR(handle)) { |
4445 | error = PTR_ERR(handle); | 4373 | error = PTR_ERR(handle); |
4446 | goto err_out; | 4374 | goto err_out; |
@@ -4460,7 +4388,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4460 | attr->ia_size); | 4388 | attr->ia_size); |
4461 | if (error) { | 4389 | if (error) { |
4462 | /* Do as much error cleanup as possible */ | 4390 | /* Do as much error cleanup as possible */ |
4463 | handle = ext4_journal_start(inode, 3); | 4391 | handle = ext4_journal_start(inode, |
4392 | EXT4_HT_INODE, 3); | ||
4464 | if (IS_ERR(handle)) { | 4393 | if (IS_ERR(handle)) { |
4465 | ext4_orphan_del(NULL, inode); | 4394 | ext4_orphan_del(NULL, inode); |
4466 | goto err_out; | 4395 | goto err_out; |
@@ -4801,7 +4730,7 @@ void ext4_dirty_inode(struct inode *inode, int flags) | |||
4801 | { | 4730 | { |
4802 | handle_t *handle; | 4731 | handle_t *handle; |
4803 | 4732 | ||
4804 | handle = ext4_journal_start(inode, 2); | 4733 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); |
4805 | if (IS_ERR(handle)) | 4734 | if (IS_ERR(handle)) |
4806 | goto out; | 4735 | goto out; |
4807 | 4736 | ||
@@ -4902,7 +4831,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4902 | 4831 | ||
4903 | /* Finally we can mark the inode as dirty. */ | 4832 | /* Finally we can mark the inode as dirty. */ |
4904 | 4833 | ||
4905 | handle = ext4_journal_start(inode, 1); | 4834 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); |
4906 | if (IS_ERR(handle)) | 4835 | if (IS_ERR(handle)) |
4907 | return PTR_ERR(handle); | 4836 | return PTR_ERR(handle); |
4908 | 4837 | ||
@@ -4980,7 +4909,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4980 | else | 4909 | else |
4981 | get_block = ext4_get_block; | 4910 | get_block = ext4_get_block; |
4982 | retry_alloc: | 4911 | retry_alloc: |
4983 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 4912 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
4913 | ext4_writepage_trans_blocks(inode)); | ||
4984 | if (IS_ERR(handle)) { | 4914 | if (IS_ERR(handle)) { |
4985 | ret = VM_FAULT_SIGBUS; | 4915 | ret = VM_FAULT_SIGBUS; |
4986 | goto out; | 4916 | goto out; |