aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-07-08 00:14:42 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:10 -0400
commitb0d40c92adafde7c2d81203ce7c1c69275f41140 (patch)
treef75a19dcd1a37aff23dc43323b58f014b1297c6b /fs/inode.c
parent12ad3ab66103e6582ca69c0c9de18b13487eaaef (diff)
superblock: introduce per-sb cache shrinker infrastructure
With context based shrinkers, we can implement a per-superblock shrinker that shrinks the caches attached to the superblock. We currently have global shrinkers for the inode and dentry caches that split up into per-superblock operations via a coarse proportioning method that does not batch very well. The global shrinkers also have a dependency - dentries pin inodes - so we have to be very careful about how we register the global shrinkers so that the implicit call order is always correct. With a per-sb shrinker callout, we can encode this dependency directly into the per-sb shrinker, hence avoiding the need for strictly ordering shrinker registrations. We also have no need for any proportioning code for the shrinker subsystem already provides this functionality across all shrinkers. Allowing the shrinker to operate on a single superblock at a time means that we do less superblock list traversals and locking and reclaim should batch more effectively. This should result in less CPU overhead for reclaim and potentially faster reclaim of items from each filesystem. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c117
1 files changed, 9 insertions, 108 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 0450e25aeda0..1fdbb64a952f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -73,7 +73,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
73 * 73 *
74 * We don't actually need it to protect anything in the umount path, 74 * We don't actually need it to protect anything in the umount path,
75 * but only need to cycle through it to make sure any inode that 75 * but only need to cycle through it to make sure any inode that
76 * prune_icache took off the LRU list has been fully torn down by the 76 * prune_icache_sb took off the LRU list has been fully torn down by the
77 * time we are past evict_inodes. 77 * time we are past evict_inodes.
78 */ 78 */
79static DECLARE_RWSEM(iprune_sem); 79static DECLARE_RWSEM(iprune_sem);
@@ -544,7 +544,7 @@ void evict_inodes(struct super_block *sb)
544 dispose_list(&dispose); 544 dispose_list(&dispose);
545 545
546 /* 546 /*
547 * Cycle through iprune_sem to make sure any inode that prune_icache 547 * Cycle through iprune_sem to make sure any inode that prune_icache_sb
548 * moved off the list before we took the lock has been fully torn 548 * moved off the list before we took the lock has been fully torn
549 * down. 549 * down.
550 */ 550 */
@@ -612,9 +612,10 @@ static int can_unuse(struct inode *inode)
612} 612}
613 613
614/* 614/*
615 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 615 * Walk the superblock inode LRU for freeable inodes and attempt to free them.
616 * temporary list and then are freed outside sb->s_inode_lru_lock by 616 * This is called from the superblock shrinker function with a number of inodes
617 * dispose_list(). 617 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
618 * then are freed outside inode_lock by dispose_list().
618 * 619 *
619 * Any inodes which are pinned purely because of attached pagecache have their 620 * Any inodes which are pinned purely because of attached pagecache have their
620 * pagecache removed. If the inode has metadata buffers attached to 621 * pagecache removed. If the inode has metadata buffers attached to
@@ -628,14 +629,15 @@ static int can_unuse(struct inode *inode)
628 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 629 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
629 * with this flag set because they are the inodes that are out of order. 630 * with this flag set because they are the inodes that are out of order.
630 */ 631 */
631static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan) 632void prune_icache_sb(struct super_block *sb, int nr_to_scan)
632{ 633{
633 LIST_HEAD(freeable); 634 LIST_HEAD(freeable);
634 int nr_scanned; 635 int nr_scanned;
635 unsigned long reap = 0; 636 unsigned long reap = 0;
636 637
638 down_read(&iprune_sem);
637 spin_lock(&sb->s_inode_lru_lock); 639 spin_lock(&sb->s_inode_lru_lock);
638 for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) { 640 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
639 struct inode *inode; 641 struct inode *inode;
640 642
641 if (list_empty(&sb->s_inode_lru)) 643 if (list_empty(&sb->s_inode_lru))
@@ -707,111 +709,11 @@ static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
707 else 709 else
708 __count_vm_events(PGINODESTEAL, reap); 710 __count_vm_events(PGINODESTEAL, reap);
709 spin_unlock(&sb->s_inode_lru_lock); 711 spin_unlock(&sb->s_inode_lru_lock);
710 *nr_to_scan = nr_scanned;
711 712
712 dispose_list(&freeable); 713 dispose_list(&freeable);
713}
714
715static void prune_icache(int count)
716{
717 struct super_block *sb, *p = NULL;
718 int w_count;
719 int unused = inodes_stat.nr_unused;
720 int prune_ratio;
721 int pruned;
722
723 if (unused == 0 || count == 0)
724 return;
725 down_read(&iprune_sem);
726 if (count >= unused)
727 prune_ratio = 1;
728 else
729 prune_ratio = unused / count;
730 spin_lock(&sb_lock);
731 list_for_each_entry(sb, &super_blocks, s_list) {
732 if (list_empty(&sb->s_instances))
733 continue;
734 if (sb->s_nr_inodes_unused == 0)
735 continue;
736 sb->s_count++;
737 /* Now, we reclaim unused dentrins with fairness.
738 * We reclaim them same percentage from each superblock.
739 * We calculate number of dentries to scan on this sb
740 * as follows, but the implementation is arranged to avoid
741 * overflows:
742 * number of dentries to scan on this sb =
743 * count * (number of dentries on this sb /
744 * number of dentries in the machine)
745 */
746 spin_unlock(&sb_lock);
747 if (prune_ratio != 1)
748 w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
749 else
750 w_count = sb->s_nr_inodes_unused;
751 pruned = w_count;
752 /*
753 * We need to be sure this filesystem isn't being unmounted,
754 * otherwise we could race with generic_shutdown_super(), and
755 * end up holding a reference to an inode while the filesystem
756 * is unmounted. So we try to get s_umount, and make sure
757 * s_root isn't NULL.
758 */
759 if (down_read_trylock(&sb->s_umount)) {
760 if ((sb->s_root != NULL) &&
761 (!list_empty(&sb->s_dentry_lru))) {
762 shrink_icache_sb(sb, &w_count);
763 pruned -= w_count;
764 }
765 up_read(&sb->s_umount);
766 }
767 spin_lock(&sb_lock);
768 if (p)
769 __put_super(p);
770 count -= pruned;
771 p = sb;
772 /* more work left to do? */
773 if (count <= 0)
774 break;
775 }
776 if (p)
777 __put_super(p);
778 spin_unlock(&sb_lock);
779 up_read(&iprune_sem); 714 up_read(&iprune_sem);
780} 715}
781 716
782/*
783 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
784 * "unused" means that no dentries are referring to the inodes: the files are
785 * not open and the dcache references to those inodes have already been
786 * reclaimed.
787 *
788 * This function is passed the number of inodes to scan, and it returns the
789 * total number of remaining possibly-reclaimable inodes.
790 */
791static int shrink_icache_memory(struct shrinker *shrink,
792 struct shrink_control *sc)
793{
794 int nr = sc->nr_to_scan;
795 gfp_t gfp_mask = sc->gfp_mask;
796
797 if (nr) {
798 /*
799 * Nasty deadlock avoidance. We may hold various FS locks,
800 * and we don't want to recurse into the FS that called us
801 * in clear_inode() and friends..
802 */
803 if (!(gfp_mask & __GFP_FS))
804 return -1;
805 prune_icache(nr);
806 }
807 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
808}
809
810static struct shrinker icache_shrinker = {
811 .shrink = shrink_icache_memory,
812 .seeks = DEFAULT_SEEKS,
813};
814
815static void __wait_on_freeing_inode(struct inode *inode); 717static void __wait_on_freeing_inode(struct inode *inode);
816/* 718/*
817 * Called with the inode lock held. 719 * Called with the inode lock held.
@@ -1691,7 +1593,6 @@ void __init inode_init(void)
1691 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1593 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1692 SLAB_MEM_SPREAD), 1594 SLAB_MEM_SPREAD),
1693 init_once); 1595 init_once);
1694 register_shrinker(&icache_shrinker);
1695 1596
1696 /* Hash may have been set up in inode_init_early */ 1597 /* Hash may have been set up in inode_init_early */
1697 if (!hashdist) 1598 if (!hashdist)