aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/extents_status.c156
-rw-r--r--fs/ext4/extents_status.h5
-rw-r--r--fs/ext4/super.c7
-rw-r--r--include/trace/events/ext4.h60
5 files changed, 235 insertions, 0 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c565c941f7a..6e16c1867959 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -888,6 +888,8 @@ struct ext4_inode_info {
888 /* extents status tree */ 888 /* extents status tree */
889 struct ext4_es_tree i_es_tree; 889 struct ext4_es_tree i_es_tree;
890 rwlock_t i_es_lock; 890 rwlock_t i_es_lock;
891 struct list_head i_es_lru;
892 unsigned int i_es_lru_nr; /* protected by i_es_lock */
891 893
892 /* ialloc */ 894 /* ialloc */
893 ext4_group_t i_last_alloc_group; 895 ext4_group_t i_last_alloc_group;
@@ -1303,6 +1305,11 @@ struct ext4_sb_info {
1303 1305
1304 /* Precomputed FS UUID checksum for seeding other checksums */ 1306 /* Precomputed FS UUID checksum for seeding other checksums */
1305 __u32 s_csum_seed; 1307 __u32 s_csum_seed;
1308
1309 /* Reclaim extents from extent status tree */
1310 struct shrinker s_es_shrinker;
1311 struct list_head s_es_lru;
1312 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1306}; 1313};
1307 1314
1308static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1315static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index cce152c3c8dc..9f1380e05474 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -145,6 +145,9 @@ static struct kmem_cache *ext4_es_cachep;
145static int __es_insert_extent(struct inode *inode, struct extent_status *newes); 145static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end); 147 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan);
150static int ext4_es_reclaim_extents_count(struct super_block *sb);
148 151
149int __init ext4_init_es(void) 152int __init ext4_init_es(void)
150{ 153{
@@ -280,6 +283,7 @@ out:
280 283
281 read_unlock(&EXT4_I(inode)->i_es_lock); 284 read_unlock(&EXT4_I(inode)->i_es_lock);
282 285
286 ext4_es_lru_add(inode);
283 trace_ext4_es_find_delayed_extent_exit(inode, es); 287 trace_ext4_es_find_delayed_extent_exit(inode, es);
284} 288}
285 289
@@ -294,11 +298,24 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
294 es->es_lblk = lblk; 298 es->es_lblk = lblk;
295 es->es_len = len; 299 es->es_len = len;
296 es->es_pblk = pblk; 300 es->es_pblk = pblk;
301
302 /*
303 * We don't count delayed extent because we never try to reclaim them
304 */
305 if (!ext4_es_is_delayed(es))
306 EXT4_I(inode)->i_es_lru_nr++;
307
297 return es; 308 return es;
298} 309}
299 310
300static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) 311static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
301{ 312{
313 /* Decrease the lru counter when this es is not delayed */
314 if (!ext4_es_is_delayed(es)) {
315 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
316 EXT4_I(inode)->i_es_lru_nr--;
317 }
318
302 kmem_cache_free(ext4_es_cachep, es); 319 kmem_cache_free(ext4_es_cachep, es);
303} 320}
304 321
@@ -456,6 +473,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
456error: 473error:
457 write_unlock(&EXT4_I(inode)->i_es_lock); 474 write_unlock(&EXT4_I(inode)->i_es_lock);
458 475
476 ext4_es_lru_add(inode);
459 ext4_es_print_tree(inode); 477 ext4_es_print_tree(inode);
460 478
461 return err; 479 return err;
@@ -517,6 +535,7 @@ out:
517 535
518 read_unlock(&EXT4_I(inode)->i_es_lock); 536 read_unlock(&EXT4_I(inode)->i_es_lock);
519 537
538 ext4_es_lru_add(inode);
520 trace_ext4_es_lookup_extent_exit(inode, es, found); 539 trace_ext4_es_lookup_extent_exit(inode, es, found);
521 return found; 540 return found;
522} 541}
@@ -639,3 +658,140 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
639 ext4_es_print_tree(inode); 658 ext4_es_print_tree(inode);
640 return err; 659 return err;
641} 660}
661
662static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
663{
664 struct ext4_sb_info *sbi = container_of(shrink,
665 struct ext4_sb_info, s_es_shrinker);
666 struct ext4_inode_info *ei;
667 struct list_head *cur, *tmp, scanned;
668 int nr_to_scan = sc->nr_to_scan;
669 int ret, nr_shrunk = 0;
670
671 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
672
673 if (!nr_to_scan)
674 return ext4_es_reclaim_extents_count(sbi->s_sb);
675
676 INIT_LIST_HEAD(&scanned);
677
678 spin_lock(&sbi->s_es_lru_lock);
679 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
680 list_move_tail(cur, &scanned);
681
682 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
683
684 read_lock(&ei->i_es_lock);
685 if (ei->i_es_lru_nr == 0) {
686 read_unlock(&ei->i_es_lock);
687 continue;
688 }
689 read_unlock(&ei->i_es_lock);
690
691 write_lock(&ei->i_es_lock);
692 ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
693 write_unlock(&ei->i_es_lock);
694
695 nr_shrunk += ret;
696 nr_to_scan -= ret;
697 if (nr_to_scan == 0)
698 break;
699 }
700 list_splice_tail(&scanned, &sbi->s_es_lru);
701 spin_unlock(&sbi->s_es_lru_lock);
702 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
703
704 return ext4_es_reclaim_extents_count(sbi->s_sb);
705}
706
707void ext4_es_register_shrinker(struct super_block *sb)
708{
709 struct ext4_sb_info *sbi;
710
711 sbi = EXT4_SB(sb);
712 INIT_LIST_HEAD(&sbi->s_es_lru);
713 spin_lock_init(&sbi->s_es_lru_lock);
714 sbi->s_es_shrinker.shrink = ext4_es_shrink;
715 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
716 register_shrinker(&sbi->s_es_shrinker);
717}
718
719void ext4_es_unregister_shrinker(struct super_block *sb)
720{
721 unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
722}
723
724void ext4_es_lru_add(struct inode *inode)
725{
726 struct ext4_inode_info *ei = EXT4_I(inode);
727 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
728
729 spin_lock(&sbi->s_es_lru_lock);
730 if (list_empty(&ei->i_es_lru))
731 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
732 else
733 list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
734 spin_unlock(&sbi->s_es_lru_lock);
735}
736
737void ext4_es_lru_del(struct inode *inode)
738{
739 struct ext4_inode_info *ei = EXT4_I(inode);
740 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
741
742 spin_lock(&sbi->s_es_lru_lock);
743 if (!list_empty(&ei->i_es_lru))
744 list_del_init(&ei->i_es_lru);
745 spin_unlock(&sbi->s_es_lru_lock);
746}
747
748static int ext4_es_reclaim_extents_count(struct super_block *sb)
749{
750 struct ext4_sb_info *sbi = EXT4_SB(sb);
751 struct ext4_inode_info *ei;
752 struct list_head *cur;
753 int nr_cached = 0;
754
755 spin_lock(&sbi->s_es_lru_lock);
756 list_for_each(cur, &sbi->s_es_lru) {
757 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
758 read_lock(&ei->i_es_lock);
759 nr_cached += ei->i_es_lru_nr;
760 read_unlock(&ei->i_es_lock);
761 }
762 spin_unlock(&sbi->s_es_lru_lock);
763 trace_ext4_es_reclaim_extents_count(sb, nr_cached);
764 return nr_cached;
765}
766
767static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
768 int nr_to_scan)
769{
770 struct inode *inode = &ei->vfs_inode;
771 struct ext4_es_tree *tree = &ei->i_es_tree;
772 struct rb_node *node;
773 struct extent_status *es;
774 int nr_shrunk = 0;
775
776 if (ei->i_es_lru_nr == 0)
777 return 0;
778
779 node = rb_first(&tree->root);
780 while (node != NULL) {
781 es = rb_entry(node, struct extent_status, rb_node);
782 node = rb_next(&es->rb_node);
783 /*
784 * We can't reclaim delayed extent from status tree because
785 * fiemap, bigallic, and seek_data/hole need to use it.
786 */
787 if (!ext4_es_is_delayed(es)) {
788 rb_erase(&es->rb_node, &tree->root);
789 ext4_es_free_extent(inode, es);
790 nr_shrunk++;
791 if (--nr_to_scan == 0)
792 break;
793 }
794 }
795 tree->cache_es = NULL;
796 return nr_shrunk;
797}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 8ffc90c784fa..cf83e77b16cb 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -106,4 +106,9 @@ static inline void ext4_es_store_status(struct extent_status *es,
106 es->es_pblk = block; 106 es->es_pblk = block;
107} 107}
108 108
109extern void ext4_es_register_shrinker(struct super_block *sb);
110extern void ext4_es_unregister_shrinker(struct super_block *sb);
111extern void ext4_es_lru_add(struct inode *inode);
112extern void ext4_es_lru_del(struct inode *inode);
113
109#endif /* _EXT4_EXTENTS_STATUS_H */ 114#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d80bfe5ac11c..373d46cd5d3f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -755,6 +755,7 @@ static void ext4_put_super(struct super_block *sb)
755 ext4_abort(sb, "Couldn't clean up the journal"); 755 ext4_abort(sb, "Couldn't clean up the journal");
756 } 756 }
757 757
758 ext4_es_unregister_shrinker(sb);
758 del_timer(&sbi->s_err_report); 759 del_timer(&sbi->s_err_report);
759 ext4_release_system_zone(sb); 760 ext4_release_system_zone(sb);
760 ext4_mb_release(sb); 761 ext4_mb_release(sb);
@@ -840,6 +841,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
840 spin_lock_init(&ei->i_prealloc_lock); 841 spin_lock_init(&ei->i_prealloc_lock);
841 ext4_es_init_tree(&ei->i_es_tree); 842 ext4_es_init_tree(&ei->i_es_tree);
842 rwlock_init(&ei->i_es_lock); 843 rwlock_init(&ei->i_es_lock);
844 INIT_LIST_HEAD(&ei->i_es_lru);
845 ei->i_es_lru_nr = 0;
843 ei->i_reserved_data_blocks = 0; 846 ei->i_reserved_data_blocks = 0;
844 ei->i_reserved_meta_blocks = 0; 847 ei->i_reserved_meta_blocks = 0;
845 ei->i_allocated_meta_blocks = 0; 848 ei->i_allocated_meta_blocks = 0;
@@ -928,6 +931,7 @@ void ext4_clear_inode(struct inode *inode)
928 dquot_drop(inode); 931 dquot_drop(inode);
929 ext4_discard_preallocations(inode); 932 ext4_discard_preallocations(inode);
930 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 933 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
934 ext4_es_lru_del(inode);
931 if (EXT4_I(inode)->jinode) { 935 if (EXT4_I(inode)->jinode) {
932 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 936 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
933 EXT4_I(inode)->jinode); 937 EXT4_I(inode)->jinode);
@@ -3693,6 +3697,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3693 sbi->s_max_writeback_mb_bump = 128; 3697 sbi->s_max_writeback_mb_bump = 128;
3694 sbi->s_extent_max_zeroout_kb = 32; 3698 sbi->s_extent_max_zeroout_kb = 32;
3695 3699
3700 /* Register extent status tree shrinker */
3701 ext4_es_register_shrinker(sb);
3702
3696 /* 3703 /*
3697 * set up enough so that it can read an inode 3704 * set up enough so that it can read an inode
3698 */ 3705 */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 1e590b68cec4..c0457c0d1a68 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2255,6 +2255,66 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
2255 __entry->found ? __entry->status : 0) 2255 __entry->found ? __entry->status : 0)
2256); 2256);
2257 2257
2258TRACE_EVENT(ext4_es_reclaim_extents_count,
2259 TP_PROTO(struct super_block *sb, int nr_cached),
2260
2261 TP_ARGS(sb, nr_cached),
2262
2263 TP_STRUCT__entry(
2264 __field( dev_t, dev )
2265 __field( int, nr_cached )
2266 ),
2267
2268 TP_fast_assign(
2269 __entry->dev = sb->s_dev;
2270 __entry->nr_cached = nr_cached;
2271 ),
2272
2273 TP_printk("dev %d,%d cached objects nr %d",
2274 MAJOR(__entry->dev), MINOR(__entry->dev),
2275 __entry->nr_cached)
2276);
2277
2278TRACE_EVENT(ext4_es_shrink_enter,
2279 TP_PROTO(struct super_block *sb, int nr_to_scan),
2280
2281 TP_ARGS(sb, nr_to_scan),
2282
2283 TP_STRUCT__entry(
2284 __field( dev_t, dev )
2285 __field( int, nr_to_scan )
2286 ),
2287
2288 TP_fast_assign(
2289 __entry->dev = sb->s_dev;
2290 __entry->nr_to_scan = nr_to_scan;
2291 ),
2292
2293 TP_printk("dev %d,%d nr to scan %d",
2294 MAJOR(__entry->dev), MINOR(__entry->dev),
2295 __entry->nr_to_scan)
2296);
2297
2298TRACE_EVENT(ext4_es_shrink_exit,
2299 TP_PROTO(struct super_block *sb, int shrunk_nr),
2300
2301 TP_ARGS(sb, shrunk_nr),
2302
2303 TP_STRUCT__entry(
2304 __field( dev_t, dev )
2305 __field( int, shrunk_nr )
2306 ),
2307
2308 TP_fast_assign(
2309 __entry->dev = sb->s_dev;
2310 __entry->shrunk_nr = shrunk_nr;
2311 ),
2312
2313 TP_printk("dev %d,%d nr to scan %d",
2314 MAJOR(__entry->dev), MINOR(__entry->dev),
2315 __entry->shrunk_nr)
2316);
2317
2258#endif /* _TRACE_EXT4_H */ 2318#endif /* _TRACE_EXT4_H */
2259 2319
2260/* This part must be outside protection */ 2320/* This part must be outside protection */