aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-07-08 00:14:39 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 01:44:35 -0400
commit98b745c647a5a90c3c21ea43cbfad9a47b0dfad7 (patch)
tree08917a620b6d15076223c1ef75996a1a76a13abd
parentfcb94f72d3e0f4f34b326c2986da8e5996daf72c (diff)
inode: Make unused inode LRU per superblock
The inode unused list is currently a global LRU. This does not match the other global filesystem cache - the dentry cache - which uses per-superblock LRU lists. Hence we have related filesystem object types using different LRU reclaimation schemes. To enable a per-superblock filesystem cache shrinker, both of these caches need to have per-sb unused object LRU lists. Hence this patch converts the global inode LRU to per-sb LRUs. The patch only does rudimentary per-sb propotioning in the shrinker infrastructure, as this gets removed when the per-sb shrinker callouts are introduced later on. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/inode.c91
-rw-r--r--fs/super.c1
-rw-r--r--include/linux/fs.h4
3 files changed, 85 insertions, 11 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 9a0361121712..8c3491302e0c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -34,7 +34,7 @@
34 * inode->i_lock protects: 34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget() 35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects: 36 * inode_lru_lock protects:
37 * inode_lru, inode->i_lru 37 * inode->i_sb->s_inode_lru, inode->i_lru
38 * inode_sb_list_lock protects: 38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list 39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects: 40 * inode_wb_list_lock protects:
@@ -64,7 +64,6 @@ static unsigned int i_hash_shift __read_mostly;
64static struct hlist_head *inode_hashtable __read_mostly; 64static struct hlist_head *inode_hashtable __read_mostly;
65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
66 66
67static LIST_HEAD(inode_lru);
68static DEFINE_SPINLOCK(inode_lru_lock); 67static DEFINE_SPINLOCK(inode_lru_lock);
69 68
70__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 69__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
@@ -345,7 +344,8 @@ static void inode_lru_list_add(struct inode *inode)
345{ 344{
346 spin_lock(&inode_lru_lock); 345 spin_lock(&inode_lru_lock);
347 if (list_empty(&inode->i_lru)) { 346 if (list_empty(&inode->i_lru)) {
348 list_add(&inode->i_lru, &inode_lru); 347 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
348 inode->i_sb->s_nr_inodes_unused++;
349 this_cpu_inc(nr_unused); 349 this_cpu_inc(nr_unused);
350 } 350 }
351 spin_unlock(&inode_lru_lock); 351 spin_unlock(&inode_lru_lock);
@@ -356,6 +356,7 @@ static void inode_lru_list_del(struct inode *inode)
356 spin_lock(&inode_lru_lock); 356 spin_lock(&inode_lru_lock);
357 if (!list_empty(&inode->i_lru)) { 357 if (!list_empty(&inode->i_lru)) {
358 list_del_init(&inode->i_lru); 358 list_del_init(&inode->i_lru);
359 inode->i_sb->s_nr_inodes_unused--;
359 this_cpu_dec(nr_unused); 360 this_cpu_dec(nr_unused);
360 } 361 }
361 spin_unlock(&inode_lru_lock); 362 spin_unlock(&inode_lru_lock);
@@ -628,21 +629,20 @@ static int can_unuse(struct inode *inode)
628 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 629 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
629 * with this flag set because they are the inodes that are out of order. 630 * with this flag set because they are the inodes that are out of order.
630 */ 631 */
631static void prune_icache(int nr_to_scan) 632static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
632{ 633{
633 LIST_HEAD(freeable); 634 LIST_HEAD(freeable);
634 int nr_scanned; 635 int nr_scanned;
635 unsigned long reap = 0; 636 unsigned long reap = 0;
636 637
637 down_read(&iprune_sem);
638 spin_lock(&inode_lru_lock); 638 spin_lock(&inode_lru_lock);
639 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 639 for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) {
640 struct inode *inode; 640 struct inode *inode;
641 641
642 if (list_empty(&inode_lru)) 642 if (list_empty(&sb->s_inode_lru))
643 break; 643 break;
644 644
645 inode = list_entry(inode_lru.prev, struct inode, i_lru); 645 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
646 646
647 /* 647 /*
648 * we are inverting the inode_lru_lock/inode->i_lock here, 648 * we are inverting the inode_lru_lock/inode->i_lock here,
@@ -650,7 +650,7 @@ static void prune_icache(int nr_to_scan)
650 * inode to the back of the list so we don't spin on it. 650 * inode to the back of the list so we don't spin on it.
651 */ 651 */
652 if (!spin_trylock(&inode->i_lock)) { 652 if (!spin_trylock(&inode->i_lock)) {
653 list_move(&inode->i_lru, &inode_lru); 653 list_move(&inode->i_lru, &sb->s_inode_lru);
654 continue; 654 continue;
655 } 655 }
656 656
@@ -662,6 +662,7 @@ static void prune_icache(int nr_to_scan)
662 (inode->i_state & ~I_REFERENCED)) { 662 (inode->i_state & ~I_REFERENCED)) {
663 list_del_init(&inode->i_lru); 663 list_del_init(&inode->i_lru);
664 spin_unlock(&inode->i_lock); 664 spin_unlock(&inode->i_lock);
665 sb->s_nr_inodes_unused--;
665 this_cpu_dec(nr_unused); 666 this_cpu_dec(nr_unused);
666 continue; 667 continue;
667 } 668 }
@@ -669,7 +670,7 @@ static void prune_icache(int nr_to_scan)
669 /* recently referenced inodes get one more pass */ 670 /* recently referenced inodes get one more pass */
670 if (inode->i_state & I_REFERENCED) { 671 if (inode->i_state & I_REFERENCED) {
671 inode->i_state &= ~I_REFERENCED; 672 inode->i_state &= ~I_REFERENCED;
672 list_move(&inode->i_lru, &inode_lru); 673 list_move(&inode->i_lru, &sb->s_inode_lru);
673 spin_unlock(&inode->i_lock); 674 spin_unlock(&inode->i_lock);
674 continue; 675 continue;
675 } 676 }
@@ -683,7 +684,7 @@ static void prune_icache(int nr_to_scan)
683 iput(inode); 684 iput(inode);
684 spin_lock(&inode_lru_lock); 685 spin_lock(&inode_lru_lock);
685 686
686 if (inode != list_entry(inode_lru.next, 687 if (inode != list_entry(sb->s_inode_lru.next,
687 struct inode, i_lru)) 688 struct inode, i_lru))
688 continue; /* wrong inode or list_empty */ 689 continue; /* wrong inode or list_empty */
689 /* avoid lock inversions with trylock */ 690 /* avoid lock inversions with trylock */
@@ -699,6 +700,7 @@ static void prune_icache(int nr_to_scan)
699 spin_unlock(&inode->i_lock); 700 spin_unlock(&inode->i_lock);
700 701
701 list_move(&inode->i_lru, &freeable); 702 list_move(&inode->i_lru, &freeable);
703 sb->s_nr_inodes_unused--;
702 this_cpu_dec(nr_unused); 704 this_cpu_dec(nr_unused);
703 } 705 }
704 if (current_is_kswapd()) 706 if (current_is_kswapd())
@@ -706,8 +708,75 @@ static void prune_icache(int nr_to_scan)
706 else 708 else
707 __count_vm_events(PGINODESTEAL, reap); 709 __count_vm_events(PGINODESTEAL, reap);
708 spin_unlock(&inode_lru_lock); 710 spin_unlock(&inode_lru_lock);
711 *nr_to_scan = nr_scanned;
709 712
710 dispose_list(&freeable); 713 dispose_list(&freeable);
714}
715
716static void prune_icache(int count)
717{
718 struct super_block *sb, *p = NULL;
719 int w_count;
720 int unused = inodes_stat.nr_unused;
721 int prune_ratio;
722 int pruned;
723
724 if (unused == 0 || count == 0)
725 return;
726 down_read(&iprune_sem);
727 if (count >= unused)
728 prune_ratio = 1;
729 else
730 prune_ratio = unused / count;
731 spin_lock(&sb_lock);
732 list_for_each_entry(sb, &super_blocks, s_list) {
733 if (list_empty(&sb->s_instances))
734 continue;
735 if (sb->s_nr_inodes_unused == 0)
736 continue;
737 sb->s_count++;
738 /* Now, we reclaim unused dentrins with fairness.
739 * We reclaim them same percentage from each superblock.
740 * We calculate number of dentries to scan on this sb
741 * as follows, but the implementation is arranged to avoid
742 * overflows:
743 * number of dentries to scan on this sb =
744 * count * (number of dentries on this sb /
745 * number of dentries in the machine)
746 */
747 spin_unlock(&sb_lock);
748 if (prune_ratio != 1)
749 w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
750 else
751 w_count = sb->s_nr_inodes_unused;
752 pruned = w_count;
753 /*
754 * We need to be sure this filesystem isn't being unmounted,
755 * otherwise we could race with generic_shutdown_super(), and
756 * end up holding a reference to an inode while the filesystem
757 * is unmounted. So we try to get s_umount, and make sure
758 * s_root isn't NULL.
759 */
760 if (down_read_trylock(&sb->s_umount)) {
761 if ((sb->s_root != NULL) &&
762 (!list_empty(&sb->s_dentry_lru))) {
763 shrink_icache_sb(sb, &w_count);
764 pruned -= w_count;
765 }
766 up_read(&sb->s_umount);
767 }
768 spin_lock(&sb_lock);
769 if (p)
770 __put_super(p);
771 count -= pruned;
772 p = sb;
773 /* more work left to do? */
774 if (count <= 0)
775 break;
776 }
777 if (p)
778 __put_super(p);
779 spin_unlock(&sb_lock);
711 up_read(&iprune_sem); 780 up_read(&iprune_sem);
712} 781}
713 782
diff --git a/fs/super.c b/fs/super.c
index 263edeb9f0e9..e8e6dbfefe8c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -77,6 +77,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
77 INIT_HLIST_BL_HEAD(&s->s_anon); 77 INIT_HLIST_BL_HEAD(&s->s_anon);
78 INIT_LIST_HEAD(&s->s_inodes); 78 INIT_LIST_HEAD(&s->s_inodes);
79 INIT_LIST_HEAD(&s->s_dentry_lru); 79 INIT_LIST_HEAD(&s->s_dentry_lru);
80 INIT_LIST_HEAD(&s->s_inode_lru);
80 init_rwsem(&s->s_umount); 81 init_rwsem(&s->s_umount);
81 mutex_init(&s->s_lock); 82 mutex_init(&s->s_lock);
82 lockdep_set_class(&s->s_umount, &type->s_umount_key); 83 lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a0011aef4338..9724f0a48742 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1397,6 +1397,10 @@ struct super_block {
1397 struct list_head s_dentry_lru; /* unused dentry lru */ 1397 struct list_head s_dentry_lru; /* unused dentry lru */
1398 int s_nr_dentry_unused; /* # of dentry on lru */ 1398 int s_nr_dentry_unused; /* # of dentry on lru */
1399 1399
1400 /* inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */
1401 struct list_head s_inode_lru; /* unused inode lru */
1402 int s_nr_inodes_unused; /* # of inodes on lru */
1403
1400 struct block_device *s_bdev; 1404 struct block_device *s_bdev;
1401 struct backing_dev_info *s_bdi; 1405 struct backing_dev_info *s_bdi;
1402 struct mtd_info *s_mtd; 1406 struct mtd_info *s_mtd;