diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 117 |
1 files changed, 9 insertions, 108 deletions
diff --git a/fs/inode.c b/fs/inode.c index 0450e25aeda0..1fdbb64a952f 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -73,7 +73,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); | |||
73 | * | 73 | * |
74 | * We don't actually need it to protect anything in the umount path, | 74 | * We don't actually need it to protect anything in the umount path, |
75 | * but only need to cycle through it to make sure any inode that | 75 | * but only need to cycle through it to make sure any inode that |
76 | * prune_icache took off the LRU list has been fully torn down by the | 76 | * prune_icache_sb took off the LRU list has been fully torn down by the |
77 | * time we are past evict_inodes. | 77 | * time we are past evict_inodes. |
78 | */ | 78 | */ |
79 | static DECLARE_RWSEM(iprune_sem); | 79 | static DECLARE_RWSEM(iprune_sem); |
@@ -544,7 +544,7 @@ void evict_inodes(struct super_block *sb) | |||
544 | dispose_list(&dispose); | 544 | dispose_list(&dispose); |
545 | 545 | ||
546 | /* | 546 | /* |
547 | * Cycle through iprune_sem to make sure any inode that prune_icache | 547 | * Cycle through iprune_sem to make sure any inode that prune_icache_sb |
548 | * moved off the list before we took the lock has been fully torn | 548 | * moved off the list before we took the lock has been fully torn |
549 | * down. | 549 | * down. |
550 | */ | 550 | */ |
@@ -612,9 +612,10 @@ static int can_unuse(struct inode *inode) | |||
612 | } | 612 | } |
613 | 613 | ||
614 | /* | 614 | /* |
615 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a | 615 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. |
616 | * temporary list and then are freed outside sb->s_inode_lru_lock by | 616 | * This is called from the superblock shrinker function with a number of inodes |
617 | * dispose_list(). | 617 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and |
618 | * then are freed outside inode_lock by dispose_list(). | ||
618 | * | 619 | * |
619 | * Any inodes which are pinned purely because of attached pagecache have their | 620 | * Any inodes which are pinned purely because of attached pagecache have their |
620 | * pagecache removed. If the inode has metadata buffers attached to | 621 | * pagecache removed. If the inode has metadata buffers attached to |
@@ -628,14 +629,15 @@ static int can_unuse(struct inode *inode) | |||
628 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 629 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
629 | * with this flag set because they are the inodes that are out of order. | 630 | * with this flag set because they are the inodes that are out of order. |
630 | */ | 631 | */ |
631 | static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan) | 632 | void prune_icache_sb(struct super_block *sb, int nr_to_scan) |
632 | { | 633 | { |
633 | LIST_HEAD(freeable); | 634 | LIST_HEAD(freeable); |
634 | int nr_scanned; | 635 | int nr_scanned; |
635 | unsigned long reap = 0; | 636 | unsigned long reap = 0; |
636 | 637 | ||
638 | down_read(&iprune_sem); | ||
637 | spin_lock(&sb->s_inode_lru_lock); | 639 | spin_lock(&sb->s_inode_lru_lock); |
638 | for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) { | 640 | for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { |
639 | struct inode *inode; | 641 | struct inode *inode; |
640 | 642 | ||
641 | if (list_empty(&sb->s_inode_lru)) | 643 | if (list_empty(&sb->s_inode_lru)) |
@@ -707,111 +709,11 @@ static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan) | |||
707 | else | 709 | else |
708 | __count_vm_events(PGINODESTEAL, reap); | 710 | __count_vm_events(PGINODESTEAL, reap); |
709 | spin_unlock(&sb->s_inode_lru_lock); | 711 | spin_unlock(&sb->s_inode_lru_lock); |
710 | *nr_to_scan = nr_scanned; | ||
711 | 712 | ||
712 | dispose_list(&freeable); | 713 | dispose_list(&freeable); |
713 | } | ||
714 | |||
715 | static void prune_icache(int count) | ||
716 | { | ||
717 | struct super_block *sb, *p = NULL; | ||
718 | int w_count; | ||
719 | int unused = inodes_stat.nr_unused; | ||
720 | int prune_ratio; | ||
721 | int pruned; | ||
722 | |||
723 | if (unused == 0 || count == 0) | ||
724 | return; | ||
725 | down_read(&iprune_sem); | ||
726 | if (count >= unused) | ||
727 | prune_ratio = 1; | ||
728 | else | ||
729 | prune_ratio = unused / count; | ||
730 | spin_lock(&sb_lock); | ||
731 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
732 | if (list_empty(&sb->s_instances)) | ||
733 | continue; | ||
734 | if (sb->s_nr_inodes_unused == 0) | ||
735 | continue; | ||
736 | sb->s_count++; | ||
737 | /* Now, we reclaim unused dentrins with fairness. | ||
738 | * We reclaim them same percentage from each superblock. | ||
739 | * We calculate number of dentries to scan on this sb | ||
740 | * as follows, but the implementation is arranged to avoid | ||
741 | * overflows: | ||
742 | * number of dentries to scan on this sb = | ||
743 | * count * (number of dentries on this sb / | ||
744 | * number of dentries in the machine) | ||
745 | */ | ||
746 | spin_unlock(&sb_lock); | ||
747 | if (prune_ratio != 1) | ||
748 | w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1; | ||
749 | else | ||
750 | w_count = sb->s_nr_inodes_unused; | ||
751 | pruned = w_count; | ||
752 | /* | ||
753 | * We need to be sure this filesystem isn't being unmounted, | ||
754 | * otherwise we could race with generic_shutdown_super(), and | ||
755 | * end up holding a reference to an inode while the filesystem | ||
756 | * is unmounted. So we try to get s_umount, and make sure | ||
757 | * s_root isn't NULL. | ||
758 | */ | ||
759 | if (down_read_trylock(&sb->s_umount)) { | ||
760 | if ((sb->s_root != NULL) && | ||
761 | (!list_empty(&sb->s_dentry_lru))) { | ||
762 | shrink_icache_sb(sb, &w_count); | ||
763 | pruned -= w_count; | ||
764 | } | ||
765 | up_read(&sb->s_umount); | ||
766 | } | ||
767 | spin_lock(&sb_lock); | ||
768 | if (p) | ||
769 | __put_super(p); | ||
770 | count -= pruned; | ||
771 | p = sb; | ||
772 | /* more work left to do? */ | ||
773 | if (count <= 0) | ||
774 | break; | ||
775 | } | ||
776 | if (p) | ||
777 | __put_super(p); | ||
778 | spin_unlock(&sb_lock); | ||
779 | up_read(&iprune_sem); | 714 | up_read(&iprune_sem); |
780 | } | 715 | } |
781 | 716 | ||
782 | /* | ||
783 | * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, | ||
784 | * "unused" means that no dentries are referring to the inodes: the files are | ||
785 | * not open and the dcache references to those inodes have already been | ||
786 | * reclaimed. | ||
787 | * | ||
788 | * This function is passed the number of inodes to scan, and it returns the | ||
789 | * total number of remaining possibly-reclaimable inodes. | ||
790 | */ | ||
791 | static int shrink_icache_memory(struct shrinker *shrink, | ||
792 | struct shrink_control *sc) | ||
793 | { | ||
794 | int nr = sc->nr_to_scan; | ||
795 | gfp_t gfp_mask = sc->gfp_mask; | ||
796 | |||
797 | if (nr) { | ||
798 | /* | ||
799 | * Nasty deadlock avoidance. We may hold various FS locks, | ||
800 | * and we don't want to recurse into the FS that called us | ||
801 | * in clear_inode() and friends.. | ||
802 | */ | ||
803 | if (!(gfp_mask & __GFP_FS)) | ||
804 | return -1; | ||
805 | prune_icache(nr); | ||
806 | } | ||
807 | return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; | ||
808 | } | ||
809 | |||
810 | static struct shrinker icache_shrinker = { | ||
811 | .shrink = shrink_icache_memory, | ||
812 | .seeks = DEFAULT_SEEKS, | ||
813 | }; | ||
814 | |||
815 | static void __wait_on_freeing_inode(struct inode *inode); | 717 | static void __wait_on_freeing_inode(struct inode *inode); |
816 | /* | 718 | /* |
817 | * Called with the inode lock held. | 719 | * Called with the inode lock held. |
@@ -1691,7 +1593,6 @@ void __init inode_init(void) | |||
1691 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| | 1593 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| |
1692 | SLAB_MEM_SPREAD), | 1594 | SLAB_MEM_SPREAD), |
1693 | init_once); | 1595 | init_once); |
1694 | register_shrinker(&icache_shrinker); | ||
1695 | 1596 | ||
1696 | /* Hash may have been set up in inode_init_early */ | 1597 | /* Hash may have been set up in inode_init_early */ |
1697 | if (!hashdist) | 1598 | if (!hashdist) |