diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-07-08 00:14:39 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-07-20 01:44:35 -0400 |
commit | 98b745c647a5a90c3c21ea43cbfad9a47b0dfad7 (patch) | |
tree | 08917a620b6d15076223c1ef75996a1a76a13abd | |
parent | fcb94f72d3e0f4f34b326c2986da8e5996daf72c (diff) |
inode: Make unused inode LRU per superblock
The inode unused list is currently a global LRU. This does not match
the other global filesystem cache - the dentry cache - which uses
per-superblock LRU lists. Hence we have related filesystem object
types using different LRU reclaimation schemes.
To enable a per-superblock filesystem cache shrinker, both of these
caches need to have per-sb unused object LRU lists. Hence this patch
converts the global inode LRU to per-sb LRUs.
The patch only does rudimentary per-sb propotioning in the shrinker
infrastructure, as this gets removed when the per-sb shrinker
callouts are introduced later on.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/inode.c | 91 | ||||
-rw-r--r-- | fs/super.c | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 4 |
3 files changed, 85 insertions, 11 deletions
diff --git a/fs/inode.c b/fs/inode.c index 9a0361121712..8c3491302e0c 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -34,7 +34,7 @@ | |||
34 | * inode->i_lock protects: | 34 | * inode->i_lock protects: |
35 | * inode->i_state, inode->i_hash, __iget() | 35 | * inode->i_state, inode->i_hash, __iget() |
36 | * inode_lru_lock protects: | 36 | * inode_lru_lock protects: |
37 | * inode_lru, inode->i_lru | 37 | * inode->i_sb->s_inode_lru, inode->i_lru |
38 | * inode_sb_list_lock protects: | 38 | * inode_sb_list_lock protects: |
39 | * sb->s_inodes, inode->i_sb_list | 39 | * sb->s_inodes, inode->i_sb_list |
40 | * inode_wb_list_lock protects: | 40 | * inode_wb_list_lock protects: |
@@ -64,7 +64,6 @@ static unsigned int i_hash_shift __read_mostly; | |||
64 | static struct hlist_head *inode_hashtable __read_mostly; | 64 | static struct hlist_head *inode_hashtable __read_mostly; |
65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); | 65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); |
66 | 66 | ||
67 | static LIST_HEAD(inode_lru); | ||
68 | static DEFINE_SPINLOCK(inode_lru_lock); | 67 | static DEFINE_SPINLOCK(inode_lru_lock); |
69 | 68 | ||
70 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); | 69 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); |
@@ -345,7 +344,8 @@ static void inode_lru_list_add(struct inode *inode) | |||
345 | { | 344 | { |
346 | spin_lock(&inode_lru_lock); | 345 | spin_lock(&inode_lru_lock); |
347 | if (list_empty(&inode->i_lru)) { | 346 | if (list_empty(&inode->i_lru)) { |
348 | list_add(&inode->i_lru, &inode_lru); | 347 | list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); |
348 | inode->i_sb->s_nr_inodes_unused++; | ||
349 | this_cpu_inc(nr_unused); | 349 | this_cpu_inc(nr_unused); |
350 | } | 350 | } |
351 | spin_unlock(&inode_lru_lock); | 351 | spin_unlock(&inode_lru_lock); |
@@ -356,6 +356,7 @@ static void inode_lru_list_del(struct inode *inode) | |||
356 | spin_lock(&inode_lru_lock); | 356 | spin_lock(&inode_lru_lock); |
357 | if (!list_empty(&inode->i_lru)) { | 357 | if (!list_empty(&inode->i_lru)) { |
358 | list_del_init(&inode->i_lru); | 358 | list_del_init(&inode->i_lru); |
359 | inode->i_sb->s_nr_inodes_unused--; | ||
359 | this_cpu_dec(nr_unused); | 360 | this_cpu_dec(nr_unused); |
360 | } | 361 | } |
361 | spin_unlock(&inode_lru_lock); | 362 | spin_unlock(&inode_lru_lock); |
@@ -628,21 +629,20 @@ static int can_unuse(struct inode *inode) | |||
628 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 629 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
629 | * with this flag set because they are the inodes that are out of order. | 630 | * with this flag set because they are the inodes that are out of order. |
630 | */ | 631 | */ |
631 | static void prune_icache(int nr_to_scan) | 632 | static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan) |
632 | { | 633 | { |
633 | LIST_HEAD(freeable); | 634 | LIST_HEAD(freeable); |
634 | int nr_scanned; | 635 | int nr_scanned; |
635 | unsigned long reap = 0; | 636 | unsigned long reap = 0; |
636 | 637 | ||
637 | down_read(&iprune_sem); | ||
638 | spin_lock(&inode_lru_lock); | 638 | spin_lock(&inode_lru_lock); |
639 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 639 | for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) { |
640 | struct inode *inode; | 640 | struct inode *inode; |
641 | 641 | ||
642 | if (list_empty(&inode_lru)) | 642 | if (list_empty(&sb->s_inode_lru)) |
643 | break; | 643 | break; |
644 | 644 | ||
645 | inode = list_entry(inode_lru.prev, struct inode, i_lru); | 645 | inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); |
646 | 646 | ||
647 | /* | 647 | /* |
648 | * we are inverting the inode_lru_lock/inode->i_lock here, | 648 | * we are inverting the inode_lru_lock/inode->i_lock here, |
@@ -650,7 +650,7 @@ static void prune_icache(int nr_to_scan) | |||
650 | * inode to the back of the list so we don't spin on it. | 650 | * inode to the back of the list so we don't spin on it. |
651 | */ | 651 | */ |
652 | if (!spin_trylock(&inode->i_lock)) { | 652 | if (!spin_trylock(&inode->i_lock)) { |
653 | list_move(&inode->i_lru, &inode_lru); | 653 | list_move(&inode->i_lru, &sb->s_inode_lru); |
654 | continue; | 654 | continue; |
655 | } | 655 | } |
656 | 656 | ||
@@ -662,6 +662,7 @@ static void prune_icache(int nr_to_scan) | |||
662 | (inode->i_state & ~I_REFERENCED)) { | 662 | (inode->i_state & ~I_REFERENCED)) { |
663 | list_del_init(&inode->i_lru); | 663 | list_del_init(&inode->i_lru); |
664 | spin_unlock(&inode->i_lock); | 664 | spin_unlock(&inode->i_lock); |
665 | sb->s_nr_inodes_unused--; | ||
665 | this_cpu_dec(nr_unused); | 666 | this_cpu_dec(nr_unused); |
666 | continue; | 667 | continue; |
667 | } | 668 | } |
@@ -669,7 +670,7 @@ static void prune_icache(int nr_to_scan) | |||
669 | /* recently referenced inodes get one more pass */ | 670 | /* recently referenced inodes get one more pass */ |
670 | if (inode->i_state & I_REFERENCED) { | 671 | if (inode->i_state & I_REFERENCED) { |
671 | inode->i_state &= ~I_REFERENCED; | 672 | inode->i_state &= ~I_REFERENCED; |
672 | list_move(&inode->i_lru, &inode_lru); | 673 | list_move(&inode->i_lru, &sb->s_inode_lru); |
673 | spin_unlock(&inode->i_lock); | 674 | spin_unlock(&inode->i_lock); |
674 | continue; | 675 | continue; |
675 | } | 676 | } |
@@ -683,7 +684,7 @@ static void prune_icache(int nr_to_scan) | |||
683 | iput(inode); | 684 | iput(inode); |
684 | spin_lock(&inode_lru_lock); | 685 | spin_lock(&inode_lru_lock); |
685 | 686 | ||
686 | if (inode != list_entry(inode_lru.next, | 687 | if (inode != list_entry(sb->s_inode_lru.next, |
687 | struct inode, i_lru)) | 688 | struct inode, i_lru)) |
688 | continue; /* wrong inode or list_empty */ | 689 | continue; /* wrong inode or list_empty */ |
689 | /* avoid lock inversions with trylock */ | 690 | /* avoid lock inversions with trylock */ |
@@ -699,6 +700,7 @@ static void prune_icache(int nr_to_scan) | |||
699 | spin_unlock(&inode->i_lock); | 700 | spin_unlock(&inode->i_lock); |
700 | 701 | ||
701 | list_move(&inode->i_lru, &freeable); | 702 | list_move(&inode->i_lru, &freeable); |
703 | sb->s_nr_inodes_unused--; | ||
702 | this_cpu_dec(nr_unused); | 704 | this_cpu_dec(nr_unused); |
703 | } | 705 | } |
704 | if (current_is_kswapd()) | 706 | if (current_is_kswapd()) |
@@ -706,8 +708,75 @@ static void prune_icache(int nr_to_scan) | |||
706 | else | 708 | else |
707 | __count_vm_events(PGINODESTEAL, reap); | 709 | __count_vm_events(PGINODESTEAL, reap); |
708 | spin_unlock(&inode_lru_lock); | 710 | spin_unlock(&inode_lru_lock); |
711 | *nr_to_scan = nr_scanned; | ||
709 | 712 | ||
710 | dispose_list(&freeable); | 713 | dispose_list(&freeable); |
714 | } | ||
715 | |||
716 | static void prune_icache(int count) | ||
717 | { | ||
718 | struct super_block *sb, *p = NULL; | ||
719 | int w_count; | ||
720 | int unused = inodes_stat.nr_unused; | ||
721 | int prune_ratio; | ||
722 | int pruned; | ||
723 | |||
724 | if (unused == 0 || count == 0) | ||
725 | return; | ||
726 | down_read(&iprune_sem); | ||
727 | if (count >= unused) | ||
728 | prune_ratio = 1; | ||
729 | else | ||
730 | prune_ratio = unused / count; | ||
731 | spin_lock(&sb_lock); | ||
732 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
733 | if (list_empty(&sb->s_instances)) | ||
734 | continue; | ||
735 | if (sb->s_nr_inodes_unused == 0) | ||
736 | continue; | ||
737 | sb->s_count++; | ||
738 | /* Now, we reclaim unused dentrins with fairness. | ||
739 | * We reclaim them same percentage from each superblock. | ||
740 | * We calculate number of dentries to scan on this sb | ||
741 | * as follows, but the implementation is arranged to avoid | ||
742 | * overflows: | ||
743 | * number of dentries to scan on this sb = | ||
744 | * count * (number of dentries on this sb / | ||
745 | * number of dentries in the machine) | ||
746 | */ | ||
747 | spin_unlock(&sb_lock); | ||
748 | if (prune_ratio != 1) | ||
749 | w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1; | ||
750 | else | ||
751 | w_count = sb->s_nr_inodes_unused; | ||
752 | pruned = w_count; | ||
753 | /* | ||
754 | * We need to be sure this filesystem isn't being unmounted, | ||
755 | * otherwise we could race with generic_shutdown_super(), and | ||
756 | * end up holding a reference to an inode while the filesystem | ||
757 | * is unmounted. So we try to get s_umount, and make sure | ||
758 | * s_root isn't NULL. | ||
759 | */ | ||
760 | if (down_read_trylock(&sb->s_umount)) { | ||
761 | if ((sb->s_root != NULL) && | ||
762 | (!list_empty(&sb->s_dentry_lru))) { | ||
763 | shrink_icache_sb(sb, &w_count); | ||
764 | pruned -= w_count; | ||
765 | } | ||
766 | up_read(&sb->s_umount); | ||
767 | } | ||
768 | spin_lock(&sb_lock); | ||
769 | if (p) | ||
770 | __put_super(p); | ||
771 | count -= pruned; | ||
772 | p = sb; | ||
773 | /* more work left to do? */ | ||
774 | if (count <= 0) | ||
775 | break; | ||
776 | } | ||
777 | if (p) | ||
778 | __put_super(p); | ||
779 | spin_unlock(&sb_lock); | ||
711 | up_read(&iprune_sem); | 780 | up_read(&iprune_sem); |
712 | } | 781 | } |
713 | 782 | ||
diff --git a/fs/super.c b/fs/super.c index 263edeb9f0e9..e8e6dbfefe8c 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -77,6 +77,7 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
77 | INIT_HLIST_BL_HEAD(&s->s_anon); | 77 | INIT_HLIST_BL_HEAD(&s->s_anon); |
78 | INIT_LIST_HEAD(&s->s_inodes); | 78 | INIT_LIST_HEAD(&s->s_inodes); |
79 | INIT_LIST_HEAD(&s->s_dentry_lru); | 79 | INIT_LIST_HEAD(&s->s_dentry_lru); |
80 | INIT_LIST_HEAD(&s->s_inode_lru); | ||
80 | init_rwsem(&s->s_umount); | 81 | init_rwsem(&s->s_umount); |
81 | mutex_init(&s->s_lock); | 82 | mutex_init(&s->s_lock); |
82 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 83 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a0011aef4338..9724f0a48742 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1397,6 +1397,10 @@ struct super_block { | |||
1397 | struct list_head s_dentry_lru; /* unused dentry lru */ | 1397 | struct list_head s_dentry_lru; /* unused dentry lru */ |
1398 | int s_nr_dentry_unused; /* # of dentry on lru */ | 1398 | int s_nr_dentry_unused; /* # of dentry on lru */ |
1399 | 1399 | ||
1400 | /* inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ | ||
1401 | struct list_head s_inode_lru; /* unused inode lru */ | ||
1402 | int s_nr_inodes_unused; /* # of inodes on lru */ | ||
1403 | |||
1400 | struct block_device *s_bdev; | 1404 | struct block_device *s_bdev; |
1401 | struct backing_dev_info *s_bdi; | 1405 | struct backing_dev_info *s_bdi; |
1402 | struct mtd_info *s_mtd; | 1406 | struct mtd_info *s_mtd; |