aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents_status.c
diff options
context:
space:
mode:
authorZheng Liu <wenqing.lz@taobao.com>2013-02-18 00:32:55 -0500
committerTheodore Ts'o <tytso@mit.edu>2013-02-18 00:32:55 -0500
commit74cd15cd02708c7188581f279f33a98b2ae8d322 (patch)
tree5d5c2380ffc7ddf1cd529127b89bf572c1798ffd /fs/ext4/extents_status.c
parentbdedbb7b8d5b960e1ff0d116f5d4935febe73183 (diff)
ext4: reclaim extents from extent status tree
Although extent status is loaded on-demand, we also need to reclaim extent from the tree when we are under a heavy memory pressure because in some cases fragmented extent tree causes status tree costs too much memory. Here we maintain a lru list in super_block. When the extent status of an inode is accessed and changed, this inode will be move to the tail of the list. The inode will be dropped from this list when it is cleared. In the inode, a counter is added to count the number of cached objects in extent status tree. Here only written/unwritten/hole extent is counted because delayed extent doesn't be reclaimed due to fiemap, bigalloc and seek_data/hole need it. The counter will be increased as a new extent is allocated, and it will be decreased as a extent is freed. In this commit we use normal shrinker framework to reclaim memory from the status tree. ext4_es_reclaim_extents_count() traverses the lru list to count the number of reclaimable extents. ext4_es_shrink() tries to reclaim written/unwritten/hole extents from extent status tree. The inode that has been shrunk is moved to the tail of lru list. Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: Jan kara <jack@suse.cz>
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r--fs/ext4/extents_status.c156
1 files changed, 156 insertions, 0 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index cce152c3c8dc..9f1380e05474 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -145,6 +145,9 @@ static struct kmem_cache *ext4_es_cachep;
145static int __es_insert_extent(struct inode *inode, struct extent_status *newes); 145static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end); 147 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan);
150static int ext4_es_reclaim_extents_count(struct super_block *sb);
148 151
149int __init ext4_init_es(void) 152int __init ext4_init_es(void)
150{ 153{
@@ -280,6 +283,7 @@ out:
280 283
281 read_unlock(&EXT4_I(inode)->i_es_lock); 284 read_unlock(&EXT4_I(inode)->i_es_lock);
282 285
286 ext4_es_lru_add(inode);
283 trace_ext4_es_find_delayed_extent_exit(inode, es); 287 trace_ext4_es_find_delayed_extent_exit(inode, es);
284} 288}
285 289
@@ -294,11 +298,24 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
294 es->es_lblk = lblk; 298 es->es_lblk = lblk;
295 es->es_len = len; 299 es->es_len = len;
296 es->es_pblk = pblk; 300 es->es_pblk = pblk;
301
302 /*
303 * We don't count delayed extent because we never try to reclaim them
304 */
305 if (!ext4_es_is_delayed(es))
306 EXT4_I(inode)->i_es_lru_nr++;
307
297 return es; 308 return es;
298} 309}
299 310
300static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) 311static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
301{ 312{
313 /* Decrease the lru counter when this es is not delayed */
314 if (!ext4_es_is_delayed(es)) {
315 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
316 EXT4_I(inode)->i_es_lru_nr--;
317 }
318
302 kmem_cache_free(ext4_es_cachep, es); 319 kmem_cache_free(ext4_es_cachep, es);
303} 320}
304 321
@@ -456,6 +473,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
456error: 473error:
457 write_unlock(&EXT4_I(inode)->i_es_lock); 474 write_unlock(&EXT4_I(inode)->i_es_lock);
458 475
476 ext4_es_lru_add(inode);
459 ext4_es_print_tree(inode); 477 ext4_es_print_tree(inode);
460 478
461 return err; 479 return err;
@@ -517,6 +535,7 @@ out:
517 535
518 read_unlock(&EXT4_I(inode)->i_es_lock); 536 read_unlock(&EXT4_I(inode)->i_es_lock);
519 537
538 ext4_es_lru_add(inode);
520 trace_ext4_es_lookup_extent_exit(inode, es, found); 539 trace_ext4_es_lookup_extent_exit(inode, es, found);
521 return found; 540 return found;
522} 541}
@@ -639,3 +658,140 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
639 ext4_es_print_tree(inode); 658 ext4_es_print_tree(inode);
640 return err; 659 return err;
641} 660}
661
662static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
663{
664 struct ext4_sb_info *sbi = container_of(shrink,
665 struct ext4_sb_info, s_es_shrinker);
666 struct ext4_inode_info *ei;
667 struct list_head *cur, *tmp, scanned;
668 int nr_to_scan = sc->nr_to_scan;
669 int ret, nr_shrunk = 0;
670
671 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
672
673 if (!nr_to_scan)
674 return ext4_es_reclaim_extents_count(sbi->s_sb);
675
676 INIT_LIST_HEAD(&scanned);
677
678 spin_lock(&sbi->s_es_lru_lock);
679 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
680 list_move_tail(cur, &scanned);
681
682 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
683
684 read_lock(&ei->i_es_lock);
685 if (ei->i_es_lru_nr == 0) {
686 read_unlock(&ei->i_es_lock);
687 continue;
688 }
689 read_unlock(&ei->i_es_lock);
690
691 write_lock(&ei->i_es_lock);
692 ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
693 write_unlock(&ei->i_es_lock);
694
695 nr_shrunk += ret;
696 nr_to_scan -= ret;
697 if (nr_to_scan == 0)
698 break;
699 }
700 list_splice_tail(&scanned, &sbi->s_es_lru);
701 spin_unlock(&sbi->s_es_lru_lock);
702 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
703
704 return ext4_es_reclaim_extents_count(sbi->s_sb);
705}
706
707void ext4_es_register_shrinker(struct super_block *sb)
708{
709 struct ext4_sb_info *sbi;
710
711 sbi = EXT4_SB(sb);
712 INIT_LIST_HEAD(&sbi->s_es_lru);
713 spin_lock_init(&sbi->s_es_lru_lock);
714 sbi->s_es_shrinker.shrink = ext4_es_shrink;
715 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
716 register_shrinker(&sbi->s_es_shrinker);
717}
718
719void ext4_es_unregister_shrinker(struct super_block *sb)
720{
721 unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
722}
723
724void ext4_es_lru_add(struct inode *inode)
725{
726 struct ext4_inode_info *ei = EXT4_I(inode);
727 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
728
729 spin_lock(&sbi->s_es_lru_lock);
730 if (list_empty(&ei->i_es_lru))
731 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
732 else
733 list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
734 spin_unlock(&sbi->s_es_lru_lock);
735}
736
737void ext4_es_lru_del(struct inode *inode)
738{
739 struct ext4_inode_info *ei = EXT4_I(inode);
740 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
741
742 spin_lock(&sbi->s_es_lru_lock);
743 if (!list_empty(&ei->i_es_lru))
744 list_del_init(&ei->i_es_lru);
745 spin_unlock(&sbi->s_es_lru_lock);
746}
747
748static int ext4_es_reclaim_extents_count(struct super_block *sb)
749{
750 struct ext4_sb_info *sbi = EXT4_SB(sb);
751 struct ext4_inode_info *ei;
752 struct list_head *cur;
753 int nr_cached = 0;
754
755 spin_lock(&sbi->s_es_lru_lock);
756 list_for_each(cur, &sbi->s_es_lru) {
757 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
758 read_lock(&ei->i_es_lock);
759 nr_cached += ei->i_es_lru_nr;
760 read_unlock(&ei->i_es_lock);
761 }
762 spin_unlock(&sbi->s_es_lru_lock);
763 trace_ext4_es_reclaim_extents_count(sb, nr_cached);
764 return nr_cached;
765}
766
767static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
768 int nr_to_scan)
769{
770 struct inode *inode = &ei->vfs_inode;
771 struct ext4_es_tree *tree = &ei->i_es_tree;
772 struct rb_node *node;
773 struct extent_status *es;
774 int nr_shrunk = 0;
775
776 if (ei->i_es_lru_nr == 0)
777 return 0;
778
779 node = rb_first(&tree->root);
780 while (node != NULL) {
781 es = rb_entry(node, struct extent_status, rb_node);
782 node = rb_next(&es->rb_node);
783 /*
784 * We can't reclaim delayed extent from status tree because
785 * fiemap, bigallic, and seek_data/hole need to use it.
786 */
787 if (!ext4_es_is_delayed(es)) {
788 rb_erase(&es->rb_node, &tree->root);
789 ext4_es_free_extent(inode, es);
790 nr_shrunk++;
791 if (--nr_to_scan == 0)
792 break;
793 }
794 }
795 tree->cache_es = NULL;
796 return nr_shrunk;
797}