diff options
author | Dmitry Monakhov <dmonakhov@openvz.org> | 2012-09-26 12:54:52 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-09-26 12:54:52 -0400 |
commit | 8c85447391735469f407add6fdb0630ce59d7f6d (patch) | |
tree | 9c047814f2aaaf0a61a611605a88c3c9f9e3184b /fs/ext4 | |
parent | bb5574880574fea38c674942cf0360253a2d60fe (diff) |
ext4: reimplement uninit extent optimization for move_extent_per_page()
Uninitialized extent may became initialized(parallel writeback task)
at any moment after we drop i_data_sem, so we have to recheck extent's
state after we hold page's lock and i_data_sem.
If we about to change page's mapping we must hold page's lock in order to
serialize other users.
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/move_extent.c | 81 |
1 files changed, 76 insertions, 5 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c87a746450e5..c2e47da7c2ba 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -595,6 +595,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
595 | } | 595 | } |
596 | 596 | ||
597 | /** | 597 | /** |
598 | * mext_check_coverage - Check that all extents in range has the same type | ||
599 | * | ||
600 | * @inode: inode in question | ||
601 | * @from: block offset of inode | ||
602 | * @count: block count to be checked | ||
603 | * @uninit: extents expected to be uninitialized | ||
604 | * @err: pointer to save error value | ||
605 | * | ||
606 | * Return 1 if all extents in range has expected type, and zero otherwise. | ||
607 | */ | ||
608 | static int | ||
609 | mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, | ||
610 | int uninit, int *err) | ||
611 | { | ||
612 | struct ext4_ext_path *path = NULL; | ||
613 | struct ext4_extent *ext; | ||
614 | ext4_lblk_t last = from + count; | ||
615 | while (from < last) { | ||
616 | *err = get_ext_path(inode, from, &path); | ||
617 | if (*err) | ||
618 | return 0; | ||
619 | ext = path[ext_depth(inode)].p_ext; | ||
620 | if (!ext) { | ||
621 | ext4_ext_drop_refs(path); | ||
622 | return 0; | ||
623 | } | ||
624 | if (uninit != ext4_ext_is_uninitialized(ext)) { | ||
625 | ext4_ext_drop_refs(path); | ||
626 | return 0; | ||
627 | } | ||
628 | from += ext4_ext_get_actual_len(ext); | ||
629 | ext4_ext_drop_refs(path); | ||
630 | } | ||
631 | return 1; | ||
632 | } | ||
633 | |||
634 | /** | ||
598 | * mext_replace_branches - Replace original extents with new extents | 635 | * mext_replace_branches - Replace original extents with new extents |
599 | * | 636 | * |
600 | * @handle: journal handle | 637 | * @handle: journal handle |
@@ -629,9 +666,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
629 | int replaced_count = 0; | 666 | int replaced_count = 0; |
630 | int dext_alen; | 667 | int dext_alen; |
631 | 668 | ||
632 | /* Protect extent trees against block allocations via delalloc */ | ||
633 | double_down_write_data_sem(orig_inode, donor_inode); | ||
634 | |||
635 | /* Get the original extent for the block "orig_off" */ | 669 | /* Get the original extent for the block "orig_off" */ |
636 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | 670 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
637 | if (*err) | 671 | if (*err) |
@@ -730,8 +764,6 @@ out: | |||
730 | ext4_ext_invalidate_cache(orig_inode); | 764 | ext4_ext_invalidate_cache(orig_inode); |
731 | ext4_ext_invalidate_cache(donor_inode); | 765 | ext4_ext_invalidate_cache(donor_inode); |
732 | 766 | ||
733 | double_up_write_data_sem(orig_inode, donor_inode); | ||
734 | |||
735 | return replaced_count; | 767 | return replaced_count; |
736 | } | 768 | } |
737 | 769 | ||
@@ -925,7 +957,46 @@ again: | |||
925 | pagep); | 957 | pagep); |
926 | if (unlikely(*err < 0)) | 958 | if (unlikely(*err < 0)) |
927 | goto stop_journal; | 959 | goto stop_journal; |
960 | /* | ||
961 | * If orig extent was uninitialized it can become initialized | ||
962 | * at any time after i_data_sem was dropped, in order to | ||
963 | * serialize with delalloc we have recheck extent while we | ||
964 | * hold page's lock, if it is still the case data copy is not | ||
965 | * necessary, just swap data blocks between orig and donor. | ||
966 | */ | ||
967 | if (uninit) { | ||
968 | double_down_write_data_sem(orig_inode, donor_inode); | ||
969 | /* If any of extents in range became initialized we have to | ||
970 | * fallback to data copying */ | ||
971 | uninit = mext_check_coverage(orig_inode, orig_blk_offset, | ||
972 | block_len_in_page, 1, err); | ||
973 | if (*err) | ||
974 | goto drop_data_sem; | ||
928 | 975 | ||
976 | uninit &= mext_check_coverage(donor_inode, orig_blk_offset, | ||
977 | block_len_in_page, 1, err); | ||
978 | if (*err) | ||
979 | goto drop_data_sem; | ||
980 | |||
981 | if (!uninit) { | ||
982 | double_up_write_data_sem(orig_inode, donor_inode); | ||
983 | goto data_copy; | ||
984 | } | ||
985 | if ((page_has_private(pagep[0]) && | ||
986 | !try_to_release_page(pagep[0], 0)) || | ||
987 | (page_has_private(pagep[1]) && | ||
988 | !try_to_release_page(pagep[1], 0))) { | ||
989 | *err = -EBUSY; | ||
990 | goto drop_data_sem; | ||
991 | } | ||
992 | replaced_count = mext_replace_branches(handle, orig_inode, | ||
993 | donor_inode, orig_blk_offset, | ||
994 | block_len_in_page, err); | ||
995 | drop_data_sem: | ||
996 | double_up_write_data_sem(orig_inode, donor_inode); | ||
997 | goto unlock_pages; | ||
998 | } | ||
999 | data_copy: | ||
929 | *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); | 1000 | *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); |
930 | if (*err) | 1001 | if (*err) |
931 | goto unlock_pages; | 1002 | goto unlock_pages; |