aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents_status.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2013-08-16 22:05:14 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-08-16 22:05:14 -0400
commit7869a4a6c5caa7b2e5c41ccaf46eb3371f88eea7 (patch)
tree1c55037a6b090b843b7f8669686dfdbbfd9ceb70 /fs/ext4/extents_status.c
parent107a7bd31ac003e42c0f966aa8e5b26947de6024 (diff)
ext4: add support for extent pre-caching
Add a new fiemap flag which forces the all of the extents in an inode to be cached in the extent_status tree. This is critically important when using AIO to a preallocated file, since if we need to read in blocks from the extent tree, the io_submit(2) system call becomes synchronous, and the AIO is no longer "A", which is bad. In addition, for most files which have an external leaf tree block, the cost of caching the information in the extent status tree will be less than caching the entire 4k block in the buffer cache. So it is generally a win to keep the extent information cached. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r--fs/ext4/extents_status.c72
1 files changed, 51 insertions, 21 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 1dc5df016e25..0e88a367b535 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -710,11 +710,8 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
710 write_lock(&EXT4_I(inode)->i_es_lock); 710 write_lock(&EXT4_I(inode)->i_es_lock);
711 711
712 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); 712 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
713 if (es && ((es->es_lblk <= lblk) || (es->es_lblk <= end))) 713 if (!es || es->es_lblk > end)
714 goto out; 714 __es_insert_extent(inode, &newes);
715
716 __es_insert_extent(inode, &newes);
717out:
718 write_unlock(&EXT4_I(inode)->i_es_lock); 715 write_unlock(&EXT4_I(inode)->i_es_lock);
719} 716}
720 717
@@ -930,6 +927,12 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
930 eia = list_entry(a, struct ext4_inode_info, i_es_lru); 927 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
931 eib = list_entry(b, struct ext4_inode_info, i_es_lru); 928 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
932 929
930 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
931 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
932 return 1;
933 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
934 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
935 return -1;
933 if (eia->i_touch_when == eib->i_touch_when) 936 if (eia->i_touch_when == eib->i_touch_when)
934 return 0; 937 return 0;
935 if (time_after(eia->i_touch_when, eib->i_touch_when)) 938 if (time_after(eia->i_touch_when, eib->i_touch_when))
@@ -943,21 +946,13 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
943{ 946{
944 struct ext4_inode_info *ei; 947 struct ext4_inode_info *ei;
945 struct list_head *cur, *tmp; 948 struct list_head *cur, *tmp;
946 LIST_HEAD(skiped); 949 LIST_HEAD(skipped);
947 int ret, nr_shrunk = 0; 950 int ret, nr_shrunk = 0;
951 int retried = 0, skip_precached = 1, nr_skipped = 0;
948 952
949 spin_lock(&sbi->s_es_lru_lock); 953 spin_lock(&sbi->s_es_lru_lock);
950 954
951 /* 955retry:
952 * If the inode that is at the head of LRU list is newer than
953 * last_sorted time, that means that we need to sort this list.
954 */
955 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
956 if (sbi->s_es_last_sorted < ei->i_touch_when) {
957 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
958 sbi->s_es_last_sorted = jiffies;
959 }
960
961 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 956 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
962 /* 957 /*
963 * If we have already reclaimed all extents from extent 958 * If we have already reclaimed all extents from extent
@@ -968,9 +963,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
968 963
969 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 964 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
970 965
971 /* Skip the inode that is newer than the last_sorted time */ 966 /*
972 if (sbi->s_es_last_sorted < ei->i_touch_when) { 967 * Skip the inode that is newer than the last_sorted
973 list_move_tail(cur, &skiped); 968 * time. Normally we try hard to avoid shrinking
969 * precached inodes, but we will as a last resort.
970 */
971 if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
972 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
973 EXT4_STATE_EXT_PRECACHED))) {
974 nr_skipped++;
975 list_move_tail(cur, &skipped);
974 continue; 976 continue;
975 } 977 }
976 978
@@ -990,11 +992,33 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
990 } 992 }
991 993
992 /* Move the newer inodes into the tail of the LRU list. */ 994 /* Move the newer inodes into the tail of the LRU list. */
993 list_splice_tail(&skiped, &sbi->s_es_lru); 995 list_splice_tail(&skipped, &sbi->s_es_lru);
996 INIT_LIST_HEAD(&skipped);
997
998 /*
999 * If we skipped any inodes, and we weren't able to make any
1000 * forward progress, sort the list and try again.
1001 */
1002 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1003 retried++;
1004 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
1005 sbi->s_es_last_sorted = jiffies;
1006 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
1007 i_es_lru);
1008 /*
1009 * If there are no non-precached inodes left on the
1010 * list, start releasing precached extents.
1011 */
1012 if (ext4_test_inode_state(&ei->vfs_inode,
1013 EXT4_STATE_EXT_PRECACHED))
1014 skip_precached = 0;
1015 goto retry;
1016 }
1017
994 spin_unlock(&sbi->s_es_lru_lock); 1018 spin_unlock(&sbi->s_es_lru_lock);
995 1019
996 if (locked_ei && nr_shrunk == 0) 1020 if (locked_ei && nr_shrunk == 0)
997 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 1021 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
998 1022
999 return nr_shrunk; 1023 return nr_shrunk;
1000} 1024}
@@ -1069,10 +1093,16 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1069 struct rb_node *node; 1093 struct rb_node *node;
1070 struct extent_status *es; 1094 struct extent_status *es;
1071 int nr_shrunk = 0; 1095 int nr_shrunk = 0;
1096 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1097 DEFAULT_RATELIMIT_BURST);
1072 1098
1073 if (ei->i_es_lru_nr == 0) 1099 if (ei->i_es_lru_nr == 0)
1074 return 0; 1100 return 0;
1075 1101
1102 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1103 __ratelimit(&_rs))
1104 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1105
1076 node = rb_first(&tree->root); 1106 node = rb_first(&tree->root);
1077 while (node != NULL) { 1107 while (node != NULL) {
1078 es = rb_entry(node, struct extent_status, rb_node); 1108 es = rb_entry(node, struct extent_status, rb_node);