diff options
author | Sage Weil <sage@inktank.com> | 2013-08-15 14:11:45 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-08-15 14:11:45 -0400 |
commit | ee3e542fec6e69bc9fb668698889a37d93950ddf (patch) | |
tree | e74ee766a4764769ef1d3d45d266b4dea64101d3 /fs/ext4/extents_status.c | |
parent | fe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff) | |
parent | f1d6e17f540af37bb1891480143669ba7636c4cf (diff) |
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r-- | fs/ext4/extents_status.c | 144 |
1 files changed, 102 insertions, 42 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d31..91cb110da1b4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * Ext4 extents status tree core functions. | 10 | * Ext4 extents status tree core functions. |
11 | */ | 11 | */ |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/list_sort.h> | ||
13 | #include "ext4.h" | 14 | #include "ext4.h" |
14 | #include "extents_status.h" | 15 | #include "extents_status.h" |
15 | #include "ext4_extents.h" | 16 | #include "ext4_extents.h" |
@@ -147,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
147 | ext4_lblk_t end); | 148 | ext4_lblk_t end); |
148 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 149 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
149 | int nr_to_scan); | 150 | int nr_to_scan); |
151 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
152 | struct ext4_inode_info *locked_ei); | ||
150 | 153 | ||
151 | int __init ext4_init_es(void) | 154 | int __init ext4_init_es(void) |
152 | { | 155 | { |
@@ -291,7 +294,6 @@ out: | |||
291 | 294 | ||
292 | read_unlock(&EXT4_I(inode)->i_es_lock); | 295 | read_unlock(&EXT4_I(inode)->i_es_lock); |
293 | 296 | ||
294 | ext4_es_lru_add(inode); | ||
295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 297 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
296 | } | 298 | } |
297 | 299 | ||
@@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
439 | */ | 441 | */ |
440 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { | 442 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { |
441 | if (in_range(es->es_lblk, ee_block, ee_len)) { | 443 | if (in_range(es->es_lblk, ee_block, ee_len)) { |
442 | pr_warn("ES insert assertation failed for " | 444 | pr_warn("ES insert assertion failed for " |
443 | "inode: %lu we can find an extent " | 445 | "inode: %lu we can find an extent " |
444 | "at block [%d/%d/%llu/%c], but we " | 446 | "at block [%d/%d/%llu/%c], but we " |
445 | "want to add an delayed/hole extent " | 447 | "want to add an delayed/hole extent " |
@@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
458 | */ | 460 | */ |
459 | if (es->es_lblk < ee_block || | 461 | if (es->es_lblk < ee_block || |
460 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { | 462 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { |
461 | pr_warn("ES insert assertation failed for inode: %lu " | 463 | pr_warn("ES insert assertion failed for inode: %lu " |
462 | "ex_status [%d/%d/%llu/%c] != " | 464 | "ex_status [%d/%d/%llu/%c] != " |
463 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 465 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, |
464 | ee_block, ee_len, ee_start, | 466 | ee_block, ee_len, ee_start, |
@@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
468 | } | 470 | } |
469 | 471 | ||
470 | if (ee_status ^ es_status) { | 472 | if (ee_status ^ es_status) { |
471 | pr_warn("ES insert assertation failed for inode: %lu " | 473 | pr_warn("ES insert assertion failed for inode: %lu " |
472 | "ex_status [%d/%d/%llu/%c] != " | 474 | "ex_status [%d/%d/%llu/%c] != " |
473 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 475 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, |
474 | ee_block, ee_len, ee_start, | 476 | ee_block, ee_len, ee_start, |
@@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
481 | * that we don't want to add an written/unwritten extent. | 483 | * that we don't want to add an written/unwritten extent. |
482 | */ | 484 | */ |
483 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | 485 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { |
484 | pr_warn("ES insert assertation failed for inode: %lu " | 486 | pr_warn("ES insert assertion failed for inode: %lu " |
485 | "can't find an extent at block %d but we want " | 487 | "can't find an extent at block %d but we want " |
486 | "to add an written/unwritten extent " | 488 | "to add an written/unwritten extent " |
487 | "[%d/%d/%llu/%llx]\n", inode->i_ino, | 489 | "[%d/%d/%llu/%llx]\n", inode->i_ino, |
@@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
519 | * We want to add a delayed/hole extent but this | 521 | * We want to add a delayed/hole extent but this |
520 | * block has been allocated. | 522 | * block has been allocated. |
521 | */ | 523 | */ |
522 | pr_warn("ES insert assertation failed for inode: %lu " | 524 | pr_warn("ES insert assertion failed for inode: %lu " |
523 | "We can find blocks but we want to add a " | 525 | "We can find blocks but we want to add a " |
524 | "delayed/hole extent [%d/%d/%llu/%llx]\n", | 526 | "delayed/hole extent [%d/%d/%llu/%llx]\n", |
525 | inode->i_ino, es->es_lblk, es->es_len, | 527 | inode->i_ino, es->es_lblk, es->es_len, |
@@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
527 | return; | 529 | return; |
528 | } else if (ext4_es_is_written(es)) { | 530 | } else if (ext4_es_is_written(es)) { |
529 | if (retval != es->es_len) { | 531 | if (retval != es->es_len) { |
530 | pr_warn("ES insert assertation failed for " | 532 | pr_warn("ES insert assertion failed for " |
531 | "inode: %lu retval %d != es_len %d\n", | 533 | "inode: %lu retval %d != es_len %d\n", |
532 | inode->i_ino, retval, es->es_len); | 534 | inode->i_ino, retval, es->es_len); |
533 | return; | 535 | return; |
534 | } | 536 | } |
535 | if (map.m_pblk != ext4_es_pblock(es)) { | 537 | if (map.m_pblk != ext4_es_pblock(es)) { |
536 | pr_warn("ES insert assertation failed for " | 538 | pr_warn("ES insert assertion failed for " |
537 | "inode: %lu m_pblk %llu != " | 539 | "inode: %lu m_pblk %llu != " |
538 | "es_pblk %llu\n", | 540 | "es_pblk %llu\n", |
539 | inode->i_ino, map.m_pblk, | 541 | inode->i_ino, map.m_pblk, |
@@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
549 | } | 551 | } |
550 | } else if (retval == 0) { | 552 | } else if (retval == 0) { |
551 | if (ext4_es_is_written(es)) { | 553 | if (ext4_es_is_written(es)) { |
552 | pr_warn("ES insert assertation failed for inode: %lu " | 554 | pr_warn("ES insert assertion failed for inode: %lu " |
553 | "We can't find the block but we want to add " | 555 | "We can't find the block but we want to add " |
554 | "an written extent [%d/%d/%llu/%llx]\n", | 556 | "an written extent [%d/%d/%llu/%llx]\n", |
555 | inode->i_ino, es->es_lblk, es->es_len, | 557 | inode->i_ino, es->es_lblk, es->es_len, |
@@ -632,10 +634,8 @@ out: | |||
632 | } | 634 | } |
633 | 635 | ||
634 | /* | 636 | /* |
635 | * ext4_es_insert_extent() adds a space to a extent status tree. | 637 | * ext4_es_insert_extent() adds information to an inode's extent |
636 | * | 638 | * status tree. |
637 | * ext4_es_insert_extent is called by ext4_da_write_begin and | ||
638 | * ext4_es_remove_extent. | ||
639 | * | 639 | * |
640 | * Return 0 on success, error code on failure. | 640 | * Return 0 on success, error code on failure. |
641 | */ | 641 | */ |
@@ -667,12 +667,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
667 | err = __es_remove_extent(inode, lblk, end); | 667 | err = __es_remove_extent(inode, lblk, end); |
668 | if (err != 0) | 668 | if (err != 0) |
669 | goto error; | 669 | goto error; |
670 | retry: | ||
670 | err = __es_insert_extent(inode, &newes); | 671 | err = __es_insert_extent(inode, &newes); |
672 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | ||
673 | EXT4_I(inode))) | ||
674 | goto retry; | ||
675 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | ||
676 | err = 0; | ||
671 | 677 | ||
672 | error: | 678 | error: |
673 | write_unlock(&EXT4_I(inode)->i_es_lock); | 679 | write_unlock(&EXT4_I(inode)->i_es_lock); |
674 | 680 | ||
675 | ext4_es_lru_add(inode); | ||
676 | ext4_es_print_tree(inode); | 681 | ext4_es_print_tree(inode); |
677 | 682 | ||
678 | return err; | 683 | return err; |
@@ -734,7 +739,6 @@ out: | |||
734 | 739 | ||
735 | read_unlock(&EXT4_I(inode)->i_es_lock); | 740 | read_unlock(&EXT4_I(inode)->i_es_lock); |
736 | 741 | ||
737 | ext4_es_lru_add(inode); | ||
738 | trace_ext4_es_lookup_extent_exit(inode, es, found); | 742 | trace_ext4_es_lookup_extent_exit(inode, es, found); |
739 | return found; | 743 | return found; |
740 | } | 744 | } |
@@ -748,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
748 | struct extent_status orig_es; | 752 | struct extent_status orig_es; |
749 | ext4_lblk_t len1, len2; | 753 | ext4_lblk_t len1, len2; |
750 | ext4_fsblk_t block; | 754 | ext4_fsblk_t block; |
751 | int err = 0; | 755 | int err; |
752 | 756 | ||
757 | retry: | ||
758 | err = 0; | ||
753 | es = __es_tree_search(&tree->root, lblk); | 759 | es = __es_tree_search(&tree->root, lblk); |
754 | if (!es) | 760 | if (!es) |
755 | goto out; | 761 | goto out; |
@@ -784,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
784 | if (err) { | 790 | if (err) { |
785 | es->es_lblk = orig_es.es_lblk; | 791 | es->es_lblk = orig_es.es_lblk; |
786 | es->es_len = orig_es.es_len; | 792 | es->es_len = orig_es.es_len; |
793 | if ((err == -ENOMEM) && | ||
794 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | ||
795 | EXT4_I(inode))) | ||
796 | goto retry; | ||
787 | goto out; | 797 | goto out; |
788 | } | 798 | } |
789 | } else { | 799 | } else { |
@@ -878,38 +888,64 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
878 | EXTENT_STATUS_WRITTEN); | 888 | EXTENT_STATUS_WRITTEN); |
879 | } | 889 | } |
880 | 890 | ||
881 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 891 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, |
892 | struct list_head *b) | ||
893 | { | ||
894 | struct ext4_inode_info *eia, *eib; | ||
895 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
896 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
897 | |||
898 | if (eia->i_touch_when == eib->i_touch_when) | ||
899 | return 0; | ||
900 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
901 | return 1; | ||
902 | else | ||
903 | return -1; | ||
904 | } | ||
905 | |||
906 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
907 | struct ext4_inode_info *locked_ei) | ||
882 | { | 908 | { |
883 | struct ext4_sb_info *sbi = container_of(shrink, | ||
884 | struct ext4_sb_info, s_es_shrinker); | ||
885 | struct ext4_inode_info *ei; | 909 | struct ext4_inode_info *ei; |
886 | struct list_head *cur, *tmp, scanned; | 910 | struct list_head *cur, *tmp; |
887 | int nr_to_scan = sc->nr_to_scan; | 911 | LIST_HEAD(skiped); |
888 | int ret, nr_shrunk = 0; | 912 | int ret, nr_shrunk = 0; |
889 | 913 | ||
890 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 914 | spin_lock(&sbi->s_es_lru_lock); |
891 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | ||
892 | |||
893 | if (!nr_to_scan) | ||
894 | return ret; | ||
895 | 915 | ||
896 | INIT_LIST_HEAD(&scanned); | 916 | /* |
917 | * If the inode that is at the head of LRU list is newer than | ||
918 | * last_sorted time, that means that we need to sort this list. | ||
919 | */ | ||
920 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); | ||
921 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | ||
922 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
923 | sbi->s_es_last_sorted = jiffies; | ||
924 | } | ||
897 | 925 | ||
898 | spin_lock(&sbi->s_es_lru_lock); | ||
899 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 926 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
900 | list_move_tail(cur, &scanned); | 927 | /* |
928 | * If we have already reclaimed all extents from extent | ||
929 | * status tree, just stop the loop immediately. | ||
930 | */ | ||
931 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | ||
932 | break; | ||
901 | 933 | ||
902 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 934 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
903 | 935 | ||
904 | read_lock(&ei->i_es_lock); | 936 | /* Skip the inode that is newer than the last_sorted time */ |
905 | if (ei->i_es_lru_nr == 0) { | 937 | if (sbi->s_es_last_sorted < ei->i_touch_when) { |
906 | read_unlock(&ei->i_es_lock); | 938 | list_move_tail(cur, &skiped); |
907 | continue; | 939 | continue; |
908 | } | 940 | } |
909 | read_unlock(&ei->i_es_lock); | 941 | |
942 | if (ei->i_es_lru_nr == 0 || ei == locked_ei) | ||
943 | continue; | ||
910 | 944 | ||
911 | write_lock(&ei->i_es_lock); | 945 | write_lock(&ei->i_es_lock); |
912 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 946 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); |
947 | if (ei->i_es_lru_nr == 0) | ||
948 | list_del_init(&ei->i_es_lru); | ||
913 | write_unlock(&ei->i_es_lock); | 949 | write_unlock(&ei->i_es_lock); |
914 | 950 | ||
915 | nr_shrunk += ret; | 951 | nr_shrunk += ret; |
@@ -917,29 +953,50 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
917 | if (nr_to_scan == 0) | 953 | if (nr_to_scan == 0) |
918 | break; | 954 | break; |
919 | } | 955 | } |
920 | list_splice_tail(&scanned, &sbi->s_es_lru); | 956 | |
957 | /* Move the newer inodes into the tail of the LRU list. */ | ||
958 | list_splice_tail(&skiped, &sbi->s_es_lru); | ||
921 | spin_unlock(&sbi->s_es_lru_lock); | 959 | spin_unlock(&sbi->s_es_lru_lock); |
922 | 960 | ||
961 | if (locked_ei && nr_shrunk == 0) | ||
962 | nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | ||
963 | |||
964 | return nr_shrunk; | ||
965 | } | ||
966 | |||
967 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | ||
968 | { | ||
969 | struct ext4_sb_info *sbi = container_of(shrink, | ||
970 | struct ext4_sb_info, s_es_shrinker); | ||
971 | int nr_to_scan = sc->nr_to_scan; | ||
972 | int ret, nr_shrunk; | ||
973 | |||
974 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
975 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | ||
976 | |||
977 | if (!nr_to_scan) | ||
978 | return ret; | ||
979 | |||
980 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | ||
981 | |||
923 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 982 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); |
924 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 983 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); |
925 | return ret; | 984 | return ret; |
926 | } | 985 | } |
927 | 986 | ||
928 | void ext4_es_register_shrinker(struct super_block *sb) | 987 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
929 | { | 988 | { |
930 | struct ext4_sb_info *sbi; | ||
931 | |||
932 | sbi = EXT4_SB(sb); | ||
933 | INIT_LIST_HEAD(&sbi->s_es_lru); | 989 | INIT_LIST_HEAD(&sbi->s_es_lru); |
934 | spin_lock_init(&sbi->s_es_lru_lock); | 990 | spin_lock_init(&sbi->s_es_lru_lock); |
991 | sbi->s_es_last_sorted = 0; | ||
935 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 992 | sbi->s_es_shrinker.shrink = ext4_es_shrink; |
936 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 993 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
937 | register_shrinker(&sbi->s_es_shrinker); | 994 | register_shrinker(&sbi->s_es_shrinker); |
938 | } | 995 | } |
939 | 996 | ||
940 | void ext4_es_unregister_shrinker(struct super_block *sb) | 997 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
941 | { | 998 | { |
942 | unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); | 999 | unregister_shrinker(&sbi->s_es_shrinker); |
943 | } | 1000 | } |
944 | 1001 | ||
945 | void ext4_es_lru_add(struct inode *inode) | 1002 | void ext4_es_lru_add(struct inode *inode) |
@@ -947,11 +1004,14 @@ void ext4_es_lru_add(struct inode *inode) | |||
947 | struct ext4_inode_info *ei = EXT4_I(inode); | 1004 | struct ext4_inode_info *ei = EXT4_I(inode); |
948 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1005 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
949 | 1006 | ||
1007 | ei->i_touch_when = jiffies; | ||
1008 | |||
1009 | if (!list_empty(&ei->i_es_lru)) | ||
1010 | return; | ||
1011 | |||
950 | spin_lock(&sbi->s_es_lru_lock); | 1012 | spin_lock(&sbi->s_es_lru_lock); |
951 | if (list_empty(&ei->i_es_lru)) | 1013 | if (list_empty(&ei->i_es_lru)) |
952 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 1014 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); |
953 | else | ||
954 | list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
955 | spin_unlock(&sbi->s_es_lru_lock); | 1015 | spin_unlock(&sbi->s_es_lru_lock); |
956 | } | 1016 | } |
957 | 1017 | ||