diff options
-rw-r--r-- | fs/ext4/ext4.h | 7 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 156 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 5 | ||||
-rw-r--r-- | fs/ext4/super.c | 7 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 60 |
5 files changed, 235 insertions, 0 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0c565c941f7a..6e16c1867959 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -888,6 +888,8 @@ struct ext4_inode_info { | |||
888 | /* extents status tree */ | 888 | /* extents status tree */ |
889 | struct ext4_es_tree i_es_tree; | 889 | struct ext4_es_tree i_es_tree; |
890 | rwlock_t i_es_lock; | 890 | rwlock_t i_es_lock; |
891 | struct list_head i_es_lru; | ||
892 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | ||
891 | 893 | ||
892 | /* ialloc */ | 894 | /* ialloc */ |
893 | ext4_group_t i_last_alloc_group; | 895 | ext4_group_t i_last_alloc_group; |
@@ -1303,6 +1305,11 @@ struct ext4_sb_info { | |||
1303 | 1305 | ||
1304 | /* Precomputed FS UUID checksum for seeding other checksums */ | 1306 | /* Precomputed FS UUID checksum for seeding other checksums */ |
1305 | __u32 s_csum_seed; | 1307 | __u32 s_csum_seed; |
1308 | |||
1309 | /* Reclaim extents from extent status tree */ | ||
1310 | struct shrinker s_es_shrinker; | ||
1311 | struct list_head s_es_lru; | ||
1312 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | ||
1306 | }; | 1313 | }; |
1307 | 1314 | ||
1308 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1315 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index cce152c3c8dc..9f1380e05474 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -145,6 +145,9 @@ static struct kmem_cache *ext4_es_cachep; | |||
145 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); | 145 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); |
146 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 146 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
147 | ext4_lblk_t end); | 147 | ext4_lblk_t end); |
148 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | ||
149 | int nr_to_scan); | ||
150 | static int ext4_es_reclaim_extents_count(struct super_block *sb); | ||
148 | 151 | ||
149 | int __init ext4_init_es(void) | 152 | int __init ext4_init_es(void) |
150 | { | 153 | { |
@@ -280,6 +283,7 @@ out: | |||
280 | 283 | ||
281 | read_unlock(&EXT4_I(inode)->i_es_lock); | 284 | read_unlock(&EXT4_I(inode)->i_es_lock); |
282 | 285 | ||
286 | ext4_es_lru_add(inode); | ||
283 | trace_ext4_es_find_delayed_extent_exit(inode, es); | 287 | trace_ext4_es_find_delayed_extent_exit(inode, es); |
284 | } | 288 | } |
285 | 289 | ||
@@ -294,11 +298,24 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
294 | es->es_lblk = lblk; | 298 | es->es_lblk = lblk; |
295 | es->es_len = len; | 299 | es->es_len = len; |
296 | es->es_pblk = pblk; | 300 | es->es_pblk = pblk; |
301 | |||
302 | /* | ||
303 | * We don't count delayed extent because we never try to reclaim them | ||
304 | */ | ||
305 | if (!ext4_es_is_delayed(es)) | ||
306 | EXT4_I(inode)->i_es_lru_nr++; | ||
307 | |||
297 | return es; | 308 | return es; |
298 | } | 309 | } |
299 | 310 | ||
300 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | 311 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) |
301 | { | 312 | { |
313 | /* Decrease the lru counter when this es is not delayed */ | ||
314 | if (!ext4_es_is_delayed(es)) { | ||
315 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | ||
316 | EXT4_I(inode)->i_es_lru_nr--; | ||
317 | } | ||
318 | |||
302 | kmem_cache_free(ext4_es_cachep, es); | 319 | kmem_cache_free(ext4_es_cachep, es); |
303 | } | 320 | } |
304 | 321 | ||
@@ -456,6 +473,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
456 | error: | 473 | error: |
457 | write_unlock(&EXT4_I(inode)->i_es_lock); | 474 | write_unlock(&EXT4_I(inode)->i_es_lock); |
458 | 475 | ||
476 | ext4_es_lru_add(inode); | ||
459 | ext4_es_print_tree(inode); | 477 | ext4_es_print_tree(inode); |
460 | 478 | ||
461 | return err; | 479 | return err; |
@@ -517,6 +535,7 @@ out: | |||
517 | 535 | ||
518 | read_unlock(&EXT4_I(inode)->i_es_lock); | 536 | read_unlock(&EXT4_I(inode)->i_es_lock); |
519 | 537 | ||
538 | ext4_es_lru_add(inode); | ||
520 | trace_ext4_es_lookup_extent_exit(inode, es, found); | 539 | trace_ext4_es_lookup_extent_exit(inode, es, found); |
521 | return found; | 540 | return found; |
522 | } | 541 | } |
@@ -639,3 +658,140 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
639 | ext4_es_print_tree(inode); | 658 | ext4_es_print_tree(inode); |
640 | return err; | 659 | return err; |
641 | } | 660 | } |
661 | |||
662 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | ||
663 | { | ||
664 | struct ext4_sb_info *sbi = container_of(shrink, | ||
665 | struct ext4_sb_info, s_es_shrinker); | ||
666 | struct ext4_inode_info *ei; | ||
667 | struct list_head *cur, *tmp, scanned; | ||
668 | int nr_to_scan = sc->nr_to_scan; | ||
669 | int ret, nr_shrunk = 0; | ||
670 | |||
671 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan); | ||
672 | |||
673 | if (!nr_to_scan) | ||
674 | return ext4_es_reclaim_extents_count(sbi->s_sb); | ||
675 | |||
676 | INIT_LIST_HEAD(&scanned); | ||
677 | |||
678 | spin_lock(&sbi->s_es_lru_lock); | ||
679 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | ||
680 | list_move_tail(cur, &scanned); | ||
681 | |||
682 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
683 | |||
684 | read_lock(&ei->i_es_lock); | ||
685 | if (ei->i_es_lru_nr == 0) { | ||
686 | read_unlock(&ei->i_es_lock); | ||
687 | continue; | ||
688 | } | ||
689 | read_unlock(&ei->i_es_lock); | ||
690 | |||
691 | write_lock(&ei->i_es_lock); | ||
692 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | ||
693 | write_unlock(&ei->i_es_lock); | ||
694 | |||
695 | nr_shrunk += ret; | ||
696 | nr_to_scan -= ret; | ||
697 | if (nr_to_scan == 0) | ||
698 | break; | ||
699 | } | ||
700 | list_splice_tail(&scanned, &sbi->s_es_lru); | ||
701 | spin_unlock(&sbi->s_es_lru_lock); | ||
702 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk); | ||
703 | |||
704 | return ext4_es_reclaim_extents_count(sbi->s_sb); | ||
705 | } | ||
706 | |||
707 | void ext4_es_register_shrinker(struct super_block *sb) | ||
708 | { | ||
709 | struct ext4_sb_info *sbi; | ||
710 | |||
711 | sbi = EXT4_SB(sb); | ||
712 | INIT_LIST_HEAD(&sbi->s_es_lru); | ||
713 | spin_lock_init(&sbi->s_es_lru_lock); | ||
714 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | ||
715 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | ||
716 | register_shrinker(&sbi->s_es_shrinker); | ||
717 | } | ||
718 | |||
719 | void ext4_es_unregister_shrinker(struct super_block *sb) | ||
720 | { | ||
721 | unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); | ||
722 | } | ||
723 | |||
724 | void ext4_es_lru_add(struct inode *inode) | ||
725 | { | ||
726 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
727 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
728 | |||
729 | spin_lock(&sbi->s_es_lru_lock); | ||
730 | if (list_empty(&ei->i_es_lru)) | ||
731 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
732 | else | ||
733 | list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
734 | spin_unlock(&sbi->s_es_lru_lock); | ||
735 | } | ||
736 | |||
737 | void ext4_es_lru_del(struct inode *inode) | ||
738 | { | ||
739 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
740 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
741 | |||
742 | spin_lock(&sbi->s_es_lru_lock); | ||
743 | if (!list_empty(&ei->i_es_lru)) | ||
744 | list_del_init(&ei->i_es_lru); | ||
745 | spin_unlock(&sbi->s_es_lru_lock); | ||
746 | } | ||
747 | |||
748 | static int ext4_es_reclaim_extents_count(struct super_block *sb) | ||
749 | { | ||
750 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
751 | struct ext4_inode_info *ei; | ||
752 | struct list_head *cur; | ||
753 | int nr_cached = 0; | ||
754 | |||
755 | spin_lock(&sbi->s_es_lru_lock); | ||
756 | list_for_each(cur, &sbi->s_es_lru) { | ||
757 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
758 | read_lock(&ei->i_es_lock); | ||
759 | nr_cached += ei->i_es_lru_nr; | ||
760 | read_unlock(&ei->i_es_lock); | ||
761 | } | ||
762 | spin_unlock(&sbi->s_es_lru_lock); | ||
763 | trace_ext4_es_reclaim_extents_count(sb, nr_cached); | ||
764 | return nr_cached; | ||
765 | } | ||
766 | |||
767 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | ||
768 | int nr_to_scan) | ||
769 | { | ||
770 | struct inode *inode = &ei->vfs_inode; | ||
771 | struct ext4_es_tree *tree = &ei->i_es_tree; | ||
772 | struct rb_node *node; | ||
773 | struct extent_status *es; | ||
774 | int nr_shrunk = 0; | ||
775 | |||
776 | if (ei->i_es_lru_nr == 0) | ||
777 | return 0; | ||
778 | |||
779 | node = rb_first(&tree->root); | ||
780 | while (node != NULL) { | ||
781 | es = rb_entry(node, struct extent_status, rb_node); | ||
782 | node = rb_next(&es->rb_node); | ||
783 | /* | ||
784 | * We can't reclaim delayed extent from status tree because | ||
785 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
786 | */ | ||
787 | if (!ext4_es_is_delayed(es)) { | ||
788 | rb_erase(&es->rb_node, &tree->root); | ||
789 | ext4_es_free_extent(inode, es); | ||
790 | nr_shrunk++; | ||
791 | if (--nr_to_scan == 0) | ||
792 | break; | ||
793 | } | ||
794 | } | ||
795 | tree->cache_es = NULL; | ||
796 | return nr_shrunk; | ||
797 | } | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 8ffc90c784fa..cf83e77b16cb 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -106,4 +106,9 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
106 | es->es_pblk = block; | 106 | es->es_pblk = block; |
107 | } | 107 | } |
108 | 108 | ||
109 | extern void ext4_es_register_shrinker(struct super_block *sb); | ||
110 | extern void ext4_es_unregister_shrinker(struct super_block *sb); | ||
111 | extern void ext4_es_lru_add(struct inode *inode); | ||
112 | extern void ext4_es_lru_del(struct inode *inode); | ||
113 | |||
109 | #endif /* _EXT4_EXTENTS_STATUS_H */ | 114 | #endif /* _EXT4_EXTENTS_STATUS_H */ |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d80bfe5ac11c..373d46cd5d3f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -755,6 +755,7 @@ static void ext4_put_super(struct super_block *sb) | |||
755 | ext4_abort(sb, "Couldn't clean up the journal"); | 755 | ext4_abort(sb, "Couldn't clean up the journal"); |
756 | } | 756 | } |
757 | 757 | ||
758 | ext4_es_unregister_shrinker(sb); | ||
758 | del_timer(&sbi->s_err_report); | 759 | del_timer(&sbi->s_err_report); |
759 | ext4_release_system_zone(sb); | 760 | ext4_release_system_zone(sb); |
760 | ext4_mb_release(sb); | 761 | ext4_mb_release(sb); |
@@ -840,6 +841,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
840 | spin_lock_init(&ei->i_prealloc_lock); | 841 | spin_lock_init(&ei->i_prealloc_lock); |
841 | ext4_es_init_tree(&ei->i_es_tree); | 842 | ext4_es_init_tree(&ei->i_es_tree); |
842 | rwlock_init(&ei->i_es_lock); | 843 | rwlock_init(&ei->i_es_lock); |
844 | INIT_LIST_HEAD(&ei->i_es_lru); | ||
845 | ei->i_es_lru_nr = 0; | ||
843 | ei->i_reserved_data_blocks = 0; | 846 | ei->i_reserved_data_blocks = 0; |
844 | ei->i_reserved_meta_blocks = 0; | 847 | ei->i_reserved_meta_blocks = 0; |
845 | ei->i_allocated_meta_blocks = 0; | 848 | ei->i_allocated_meta_blocks = 0; |
@@ -928,6 +931,7 @@ void ext4_clear_inode(struct inode *inode) | |||
928 | dquot_drop(inode); | 931 | dquot_drop(inode); |
929 | ext4_discard_preallocations(inode); | 932 | ext4_discard_preallocations(inode); |
930 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 933 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
934 | ext4_es_lru_del(inode); | ||
931 | if (EXT4_I(inode)->jinode) { | 935 | if (EXT4_I(inode)->jinode) { |
932 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), | 936 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
933 | EXT4_I(inode)->jinode); | 937 | EXT4_I(inode)->jinode); |
@@ -3693,6 +3697,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3693 | sbi->s_max_writeback_mb_bump = 128; | 3697 | sbi->s_max_writeback_mb_bump = 128; |
3694 | sbi->s_extent_max_zeroout_kb = 32; | 3698 | sbi->s_extent_max_zeroout_kb = 32; |
3695 | 3699 | ||
3700 | /* Register extent status tree shrinker */ | ||
3701 | ext4_es_register_shrinker(sb); | ||
3702 | |||
3696 | /* | 3703 | /* |
3697 | * set up enough so that it can read an inode | 3704 | * set up enough so that it can read an inode |
3698 | */ | 3705 | */ |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 1e590b68cec4..c0457c0d1a68 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -2255,6 +2255,66 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, | |||
2255 | __entry->found ? __entry->status : 0) | 2255 | __entry->found ? __entry->status : 0) |
2256 | ); | 2256 | ); |
2257 | 2257 | ||
2258 | TRACE_EVENT(ext4_es_reclaim_extents_count, | ||
2259 | TP_PROTO(struct super_block *sb, int nr_cached), | ||
2260 | |||
2261 | TP_ARGS(sb, nr_cached), | ||
2262 | |||
2263 | TP_STRUCT__entry( | ||
2264 | __field( dev_t, dev ) | ||
2265 | __field( int, nr_cached ) | ||
2266 | ), | ||
2267 | |||
2268 | TP_fast_assign( | ||
2269 | __entry->dev = sb->s_dev; | ||
2270 | __entry->nr_cached = nr_cached; | ||
2271 | ), | ||
2272 | |||
2273 | TP_printk("dev %d,%d cached objects nr %d", | ||
2274 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
2275 | __entry->nr_cached) | ||
2276 | ); | ||
2277 | |||
2278 | TRACE_EVENT(ext4_es_shrink_enter, | ||
2279 | TP_PROTO(struct super_block *sb, int nr_to_scan), | ||
2280 | |||
2281 | TP_ARGS(sb, nr_to_scan), | ||
2282 | |||
2283 | TP_STRUCT__entry( | ||
2284 | __field( dev_t, dev ) | ||
2285 | __field( int, nr_to_scan ) | ||
2286 | ), | ||
2287 | |||
2288 | TP_fast_assign( | ||
2289 | __entry->dev = sb->s_dev; | ||
2290 | __entry->nr_to_scan = nr_to_scan; | ||
2291 | ), | ||
2292 | |||
2293 | TP_printk("dev %d,%d nr to scan %d", | ||
2294 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
2295 | __entry->nr_to_scan) | ||
2296 | ); | ||
2297 | |||
2298 | TRACE_EVENT(ext4_es_shrink_exit, | ||
2299 | TP_PROTO(struct super_block *sb, int shrunk_nr), | ||
2300 | |||
2301 | TP_ARGS(sb, shrunk_nr), | ||
2302 | |||
2303 | TP_STRUCT__entry( | ||
2304 | __field( dev_t, dev ) | ||
2305 | __field( int, shrunk_nr ) | ||
2306 | ), | ||
2307 | |||
2308 | TP_fast_assign( | ||
2309 | __entry->dev = sb->s_dev; | ||
2310 | __entry->shrunk_nr = shrunk_nr; | ||
2311 | ), | ||
2312 | |||
2313 | TP_printk("dev %d,%d nr to scan %d", | ||
2314 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
2315 | __entry->shrunk_nr) | ||
2316 | ); | ||
2317 | |||
2258 | #endif /* _TRACE_EXT4_H */ | 2318 | #endif /* _TRACE_EXT4_H */ |
2259 | 2319 | ||
2260 | /* This part must be outside protection */ | 2320 | /* This part must be outside protection */ |