aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents_status.c
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
committerSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
commitee3e542fec6e69bc9fb668698889a37d93950ddf (patch)
treee74ee766a4764769ef1d3d45d266b4dea64101d3 /fs/ext4/extents_status.c
parentfe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff)
parentf1d6e17f540af37bb1891480143669ba7636c4cf (diff)
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r--fs/ext4/extents_status.c144
1 files changed, 102 insertions, 42 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e6941e622d31..91cb110da1b4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -10,6 +10,7 @@
10 * Ext4 extents status tree core functions. 10 * Ext4 extents status tree core functions.
11 */ 11 */
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/list_sort.h>
13#include "ext4.h" 14#include "ext4.h"
14#include "extents_status.h" 15#include "extents_status.h"
15#include "ext4_extents.h" 16#include "ext4_extents.h"
@@ -147,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end); 148 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 149static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan); 150 int nr_to_scan);
151static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152 struct ext4_inode_info *locked_ei);
150 153
151int __init ext4_init_es(void) 154int __init ext4_init_es(void)
152{ 155{
@@ -291,7 +294,6 @@ out:
291 294
292 read_unlock(&EXT4_I(inode)->i_es_lock); 295 read_unlock(&EXT4_I(inode)->i_es_lock);
293 296
294 ext4_es_lru_add(inode);
295 trace_ext4_es_find_delayed_extent_range_exit(inode, es); 297 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
296} 298}
297 299
@@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
439 */ 441 */
440 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { 442 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
441 if (in_range(es->es_lblk, ee_block, ee_len)) { 443 if (in_range(es->es_lblk, ee_block, ee_len)) {
442 pr_warn("ES insert assertation failed for " 444 pr_warn("ES insert assertion failed for "
443 "inode: %lu we can find an extent " 445 "inode: %lu we can find an extent "
444 "at block [%d/%d/%llu/%c], but we " 446 "at block [%d/%d/%llu/%c], but we "
445 "want to add an delayed/hole extent " 447 "want to add an delayed/hole extent "
@@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
458 */ 460 */
459 if (es->es_lblk < ee_block || 461 if (es->es_lblk < ee_block ||
460 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { 462 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
461 pr_warn("ES insert assertation failed for inode: %lu " 463 pr_warn("ES insert assertion failed for inode: %lu "
462 "ex_status [%d/%d/%llu/%c] != " 464 "ex_status [%d/%d/%llu/%c] != "
463 "es_status [%d/%d/%llu/%c]\n", inode->i_ino, 465 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
464 ee_block, ee_len, ee_start, 466 ee_block, ee_len, ee_start,
@@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
468 } 470 }
469 471
470 if (ee_status ^ es_status) { 472 if (ee_status ^ es_status) {
471 pr_warn("ES insert assertation failed for inode: %lu " 473 pr_warn("ES insert assertion failed for inode: %lu "
472 "ex_status [%d/%d/%llu/%c] != " 474 "ex_status [%d/%d/%llu/%c] != "
473 "es_status [%d/%d/%llu/%c]\n", inode->i_ino, 475 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
474 ee_block, ee_len, ee_start, 476 ee_block, ee_len, ee_start,
@@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
481 * that we don't want to add an written/unwritten extent. 483 * that we don't want to add an written/unwritten extent.
482 */ 484 */
483 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { 485 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
484 pr_warn("ES insert assertation failed for inode: %lu " 486 pr_warn("ES insert assertion failed for inode: %lu "
485 "can't find an extent at block %d but we want " 487 "can't find an extent at block %d but we want "
486 "to add an written/unwritten extent " 488 "to add an written/unwritten extent "
487 "[%d/%d/%llu/%llx]\n", inode->i_ino, 489 "[%d/%d/%llu/%llx]\n", inode->i_ino,
@@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
519 * We want to add a delayed/hole extent but this 521 * We want to add a delayed/hole extent but this
520 * block has been allocated. 522 * block has been allocated.
521 */ 523 */
522 pr_warn("ES insert assertation failed for inode: %lu " 524 pr_warn("ES insert assertion failed for inode: %lu "
523 "We can find blocks but we want to add a " 525 "We can find blocks but we want to add a "
524 "delayed/hole extent [%d/%d/%llu/%llx]\n", 526 "delayed/hole extent [%d/%d/%llu/%llx]\n",
525 inode->i_ino, es->es_lblk, es->es_len, 527 inode->i_ino, es->es_lblk, es->es_len,
@@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
527 return; 529 return;
528 } else if (ext4_es_is_written(es)) { 530 } else if (ext4_es_is_written(es)) {
529 if (retval != es->es_len) { 531 if (retval != es->es_len) {
530 pr_warn("ES insert assertation failed for " 532 pr_warn("ES insert assertion failed for "
531 "inode: %lu retval %d != es_len %d\n", 533 "inode: %lu retval %d != es_len %d\n",
532 inode->i_ino, retval, es->es_len); 534 inode->i_ino, retval, es->es_len);
533 return; 535 return;
534 } 536 }
535 if (map.m_pblk != ext4_es_pblock(es)) { 537 if (map.m_pblk != ext4_es_pblock(es)) {
536 pr_warn("ES insert assertation failed for " 538 pr_warn("ES insert assertion failed for "
537 "inode: %lu m_pblk %llu != " 539 "inode: %lu m_pblk %llu != "
538 "es_pblk %llu\n", 540 "es_pblk %llu\n",
539 inode->i_ino, map.m_pblk, 541 inode->i_ino, map.m_pblk,
@@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
549 } 551 }
550 } else if (retval == 0) { 552 } else if (retval == 0) {
551 if (ext4_es_is_written(es)) { 553 if (ext4_es_is_written(es)) {
552 pr_warn("ES insert assertation failed for inode: %lu " 554 pr_warn("ES insert assertion failed for inode: %lu "
553 "We can't find the block but we want to add " 555 "We can't find the block but we want to add "
554 "an written extent [%d/%d/%llu/%llx]\n", 556 "an written extent [%d/%d/%llu/%llx]\n",
555 inode->i_ino, es->es_lblk, es->es_len, 557 inode->i_ino, es->es_lblk, es->es_len,
@@ -632,10 +634,8 @@ out:
632} 634}
633 635
634/* 636/*
635 * ext4_es_insert_extent() adds a space to a extent status tree. 637 * ext4_es_insert_extent() adds information to an inode's extent
636 * 638 * status tree.
637 * ext4_es_insert_extent is called by ext4_da_write_begin and
638 * ext4_es_remove_extent.
639 * 639 *
640 * Return 0 on success, error code on failure. 640 * Return 0 on success, error code on failure.
641 */ 641 */
@@ -667,12 +667,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
667 err = __es_remove_extent(inode, lblk, end); 667 err = __es_remove_extent(inode, lblk, end);
668 if (err != 0) 668 if (err != 0)
669 goto error; 669 goto error;
670retry:
670 err = __es_insert_extent(inode, &newes); 671 err = __es_insert_extent(inode, &newes);
672 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
673 EXT4_I(inode)))
674 goto retry;
675 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
676 err = 0;
671 677
672error: 678error:
673 write_unlock(&EXT4_I(inode)->i_es_lock); 679 write_unlock(&EXT4_I(inode)->i_es_lock);
674 680
675 ext4_es_lru_add(inode);
676 ext4_es_print_tree(inode); 681 ext4_es_print_tree(inode);
677 682
678 return err; 683 return err;
@@ -734,7 +739,6 @@ out:
734 739
735 read_unlock(&EXT4_I(inode)->i_es_lock); 740 read_unlock(&EXT4_I(inode)->i_es_lock);
736 741
737 ext4_es_lru_add(inode);
738 trace_ext4_es_lookup_extent_exit(inode, es, found); 742 trace_ext4_es_lookup_extent_exit(inode, es, found);
739 return found; 743 return found;
740} 744}
@@ -748,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
748 struct extent_status orig_es; 752 struct extent_status orig_es;
749 ext4_lblk_t len1, len2; 753 ext4_lblk_t len1, len2;
750 ext4_fsblk_t block; 754 ext4_fsblk_t block;
751 int err = 0; 755 int err;
752 756
757retry:
758 err = 0;
753 es = __es_tree_search(&tree->root, lblk); 759 es = __es_tree_search(&tree->root, lblk);
754 if (!es) 760 if (!es)
755 goto out; 761 goto out;
@@ -784,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
784 if (err) { 790 if (err) {
785 es->es_lblk = orig_es.es_lblk; 791 es->es_lblk = orig_es.es_lblk;
786 es->es_len = orig_es.es_len; 792 es->es_len = orig_es.es_len;
793 if ((err == -ENOMEM) &&
794 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
795 EXT4_I(inode)))
796 goto retry;
787 goto out; 797 goto out;
788 } 798 }
789 } else { 799 } else {
@@ -878,38 +888,64 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
878 EXTENT_STATUS_WRITTEN); 888 EXTENT_STATUS_WRITTEN);
879} 889}
880 890
881static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) 891static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
892 struct list_head *b)
893{
894 struct ext4_inode_info *eia, *eib;
895 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
896 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
897
898 if (eia->i_touch_when == eib->i_touch_when)
899 return 0;
900 if (time_after(eia->i_touch_when, eib->i_touch_when))
901 return 1;
902 else
903 return -1;
904}
905
906static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
907 struct ext4_inode_info *locked_ei)
882{ 908{
883 struct ext4_sb_info *sbi = container_of(shrink,
884 struct ext4_sb_info, s_es_shrinker);
885 struct ext4_inode_info *ei; 909 struct ext4_inode_info *ei;
886 struct list_head *cur, *tmp, scanned; 910 struct list_head *cur, *tmp;
887 int nr_to_scan = sc->nr_to_scan; 911 LIST_HEAD(skiped);
888 int ret, nr_shrunk = 0; 912 int ret, nr_shrunk = 0;
889 913
890 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 914 spin_lock(&sbi->s_es_lru_lock);
891 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
892
893 if (!nr_to_scan)
894 return ret;
895 915
896 INIT_LIST_HEAD(&scanned); 916 /*
917 * If the inode that is at the head of LRU list is newer than
918 * last_sorted time, that means that we need to sort this list.
919 */
920 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
921 if (sbi->s_es_last_sorted < ei->i_touch_when) {
922 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
923 sbi->s_es_last_sorted = jiffies;
924 }
897 925
898 spin_lock(&sbi->s_es_lru_lock);
899 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 926 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
900 list_move_tail(cur, &scanned); 927 /*
928 * If we have already reclaimed all extents from extent
929 * status tree, just stop the loop immediately.
930 */
931 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
932 break;
901 933
902 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 934 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
903 935
904 read_lock(&ei->i_es_lock); 936 /* Skip the inode that is newer than the last_sorted time */
905 if (ei->i_es_lru_nr == 0) { 937 if (sbi->s_es_last_sorted < ei->i_touch_when) {
906 read_unlock(&ei->i_es_lock); 938 list_move_tail(cur, &skiped);
907 continue; 939 continue;
908 } 940 }
909 read_unlock(&ei->i_es_lock); 941
942 if (ei->i_es_lru_nr == 0 || ei == locked_ei)
943 continue;
910 944
911 write_lock(&ei->i_es_lock); 945 write_lock(&ei->i_es_lock);
912 ret = __es_try_to_reclaim_extents(ei, nr_to_scan); 946 ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
947 if (ei->i_es_lru_nr == 0)
948 list_del_init(&ei->i_es_lru);
913 write_unlock(&ei->i_es_lock); 949 write_unlock(&ei->i_es_lock);
914 950
915 nr_shrunk += ret; 951 nr_shrunk += ret;
@@ -917,29 +953,50 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
917 if (nr_to_scan == 0) 953 if (nr_to_scan == 0)
918 break; 954 break;
919 } 955 }
920 list_splice_tail(&scanned, &sbi->s_es_lru); 956
957 /* Move the newer inodes into the tail of the LRU list. */
958 list_splice_tail(&skiped, &sbi->s_es_lru);
921 spin_unlock(&sbi->s_es_lru_lock); 959 spin_unlock(&sbi->s_es_lru_lock);
922 960
961 if (locked_ei && nr_shrunk == 0)
962 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
963
964 return nr_shrunk;
965}
966
967static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
968{
969 struct ext4_sb_info *sbi = container_of(shrink,
970 struct ext4_sb_info, s_es_shrinker);
971 int nr_to_scan = sc->nr_to_scan;
972 int ret, nr_shrunk;
973
974 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
975 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
976
977 if (!nr_to_scan)
978 return ret;
979
980 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
981
923 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 982 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
924 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); 983 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
925 return ret; 984 return ret;
926} 985}
927 986
928void ext4_es_register_shrinker(struct super_block *sb) 987void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
929{ 988{
930 struct ext4_sb_info *sbi;
931
932 sbi = EXT4_SB(sb);
933 INIT_LIST_HEAD(&sbi->s_es_lru); 989 INIT_LIST_HEAD(&sbi->s_es_lru);
934 spin_lock_init(&sbi->s_es_lru_lock); 990 spin_lock_init(&sbi->s_es_lru_lock);
991 sbi->s_es_last_sorted = 0;
935 sbi->s_es_shrinker.shrink = ext4_es_shrink; 992 sbi->s_es_shrinker.shrink = ext4_es_shrink;
936 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 993 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
937 register_shrinker(&sbi->s_es_shrinker); 994 register_shrinker(&sbi->s_es_shrinker);
938} 995}
939 996
940void ext4_es_unregister_shrinker(struct super_block *sb) 997void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
941{ 998{
942 unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); 999 unregister_shrinker(&sbi->s_es_shrinker);
943} 1000}
944 1001
945void ext4_es_lru_add(struct inode *inode) 1002void ext4_es_lru_add(struct inode *inode)
@@ -947,11 +1004,14 @@ void ext4_es_lru_add(struct inode *inode)
947 struct ext4_inode_info *ei = EXT4_I(inode); 1004 struct ext4_inode_info *ei = EXT4_I(inode);
948 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1005 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
949 1006
1007 ei->i_touch_when = jiffies;
1008
1009 if (!list_empty(&ei->i_es_lru))
1010 return;
1011
950 spin_lock(&sbi->s_es_lru_lock); 1012 spin_lock(&sbi->s_es_lru_lock);
951 if (list_empty(&ei->i_es_lru)) 1013 if (list_empty(&ei->i_es_lru))
952 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); 1014 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
953 else
954 list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
955 spin_unlock(&sbi->s_es_lru_lock); 1015 spin_unlock(&sbi->s_es_lru_lock);
956} 1016}
957 1017