diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 193 |
1 files changed, 81 insertions, 112 deletions
diff --git a/fs/inode.c b/fs/inode.c index 93a0625b46e4..b33ba8e021cc 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/prefetch.h> | 17 | #include <linux/prefetch.h> |
18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ | 18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ |
19 | #include <linux/ratelimit.h> | 19 | #include <linux/ratelimit.h> |
20 | #include <linux/list_lru.h> | ||
20 | #include "internal.h" | 21 | #include "internal.h" |
21 | 22 | ||
22 | /* | 23 | /* |
@@ -24,7 +25,7 @@ | |||
24 | * | 25 | * |
25 | * inode->i_lock protects: | 26 | * inode->i_lock protects: |
26 | * inode->i_state, inode->i_hash, __iget() | 27 | * inode->i_state, inode->i_hash, __iget() |
27 | * inode->i_sb->s_inode_lru_lock protects: | 28 | * Inode LRU list locks protect: |
28 | * inode->i_sb->s_inode_lru, inode->i_lru | 29 | * inode->i_sb->s_inode_lru, inode->i_lru |
29 | * inode_sb_list_lock protects: | 30 | * inode_sb_list_lock protects: |
30 | * sb->s_inodes, inode->i_sb_list | 31 | * sb->s_inodes, inode->i_sb_list |
@@ -37,7 +38,7 @@ | |||
37 | * | 38 | * |
38 | * inode_sb_list_lock | 39 | * inode_sb_list_lock |
39 | * inode->i_lock | 40 | * inode->i_lock |
40 | * inode->i_sb->s_inode_lru_lock | 41 | * Inode LRU list locks |
41 | * | 42 | * |
42 | * bdi->wb.list_lock | 43 | * bdi->wb.list_lock |
43 | * inode->i_lock | 44 | * inode->i_lock |
@@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops); | |||
70 | */ | 71 | */ |
71 | struct inodes_stat_t inodes_stat; | 72 | struct inodes_stat_t inodes_stat; |
72 | 73 | ||
73 | static DEFINE_PER_CPU(unsigned int, nr_inodes); | 74 | static DEFINE_PER_CPU(unsigned long, nr_inodes); |
74 | static DEFINE_PER_CPU(unsigned int, nr_unused); | 75 | static DEFINE_PER_CPU(unsigned long, nr_unused); |
75 | 76 | ||
76 | static struct kmem_cache *inode_cachep __read_mostly; | 77 | static struct kmem_cache *inode_cachep __read_mostly; |
77 | 78 | ||
78 | static int get_nr_inodes(void) | 79 | static long get_nr_inodes(void) |
79 | { | 80 | { |
80 | int i; | 81 | int i; |
81 | int sum = 0; | 82 | long sum = 0; |
82 | for_each_possible_cpu(i) | 83 | for_each_possible_cpu(i) |
83 | sum += per_cpu(nr_inodes, i); | 84 | sum += per_cpu(nr_inodes, i); |
84 | return sum < 0 ? 0 : sum; | 85 | return sum < 0 ? 0 : sum; |
85 | } | 86 | } |
86 | 87 | ||
87 | static inline int get_nr_inodes_unused(void) | 88 | static inline long get_nr_inodes_unused(void) |
88 | { | 89 | { |
89 | int i; | 90 | int i; |
90 | int sum = 0; | 91 | long sum = 0; |
91 | for_each_possible_cpu(i) | 92 | for_each_possible_cpu(i) |
92 | sum += per_cpu(nr_unused, i); | 93 | sum += per_cpu(nr_unused, i); |
93 | return sum < 0 ? 0 : sum; | 94 | return sum < 0 ? 0 : sum; |
94 | } | 95 | } |
95 | 96 | ||
96 | int get_nr_dirty_inodes(void) | 97 | long get_nr_dirty_inodes(void) |
97 | { | 98 | { |
98 | /* not actually dirty inodes, but a wild approximation */ | 99 | /* not actually dirty inodes, but a wild approximation */ |
99 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | 100 | long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); |
100 | return nr_dirty > 0 ? nr_dirty : 0; | 101 | return nr_dirty > 0 ? nr_dirty : 0; |
101 | } | 102 | } |
102 | 103 | ||
@@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
109 | { | 110 | { |
110 | inodes_stat.nr_inodes = get_nr_inodes(); | 111 | inodes_stat.nr_inodes = get_nr_inodes(); |
111 | inodes_stat.nr_unused = get_nr_inodes_unused(); | 112 | inodes_stat.nr_unused = get_nr_inodes_unused(); |
112 | return proc_dointvec(table, write, buffer, lenp, ppos); | 113 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
113 | } | 114 | } |
114 | #endif | 115 | #endif |
115 | 116 | ||
@@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold); | |||
401 | 402 | ||
402 | static void inode_lru_list_add(struct inode *inode) | 403 | static void inode_lru_list_add(struct inode *inode) |
403 | { | 404 | { |
404 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 405 | if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) |
405 | if (list_empty(&inode->i_lru)) { | ||
406 | list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); | ||
407 | inode->i_sb->s_nr_inodes_unused++; | ||
408 | this_cpu_inc(nr_unused); | 406 | this_cpu_inc(nr_unused); |
409 | } | ||
410 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | ||
411 | } | 407 | } |
412 | 408 | ||
413 | /* | 409 | /* |
@@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode) | |||
425 | 421 | ||
426 | static void inode_lru_list_del(struct inode *inode) | 422 | static void inode_lru_list_del(struct inode *inode) |
427 | { | 423 | { |
428 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 424 | |
429 | if (!list_empty(&inode->i_lru)) { | 425 | if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) |
430 | list_del_init(&inode->i_lru); | ||
431 | inode->i_sb->s_nr_inodes_unused--; | ||
432 | this_cpu_dec(nr_unused); | 426 | this_cpu_dec(nr_unused); |
433 | } | ||
434 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | ||
435 | } | 427 | } |
436 | 428 | ||
437 | /** | 429 | /** |
@@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
675 | return busy; | 667 | return busy; |
676 | } | 668 | } |
677 | 669 | ||
678 | static int can_unuse(struct inode *inode) | ||
679 | { | ||
680 | if (inode->i_state & ~I_REFERENCED) | ||
681 | return 0; | ||
682 | if (inode_has_buffers(inode)) | ||
683 | return 0; | ||
684 | if (atomic_read(&inode->i_count)) | ||
685 | return 0; | ||
686 | if (inode->i_data.nrpages) | ||
687 | return 0; | ||
688 | return 1; | ||
689 | } | ||
690 | |||
691 | /* | 670 | /* |
692 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. | 671 | * Isolate the inode from the LRU in preparation for freeing it. |
693 | * This is called from the superblock shrinker function with a number of inodes | ||
694 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and | ||
695 | * then are freed outside inode_lock by dispose_list(). | ||
696 | * | 672 | * |
697 | * Any inodes which are pinned purely because of attached pagecache have their | 673 | * Any inodes which are pinned purely because of attached pagecache have their |
698 | * pagecache removed. If the inode has metadata buffers attached to | 674 | * pagecache removed. If the inode has metadata buffers attached to |
@@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode) | |||
706 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 682 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
707 | * with this flag set because they are the inodes that are out of order. | 683 | * with this flag set because they are the inodes that are out of order. |
708 | */ | 684 | */ |
709 | void prune_icache_sb(struct super_block *sb, int nr_to_scan) | 685 | static enum lru_status |
686 | inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | ||
710 | { | 687 | { |
711 | LIST_HEAD(freeable); | 688 | struct list_head *freeable = arg; |
712 | int nr_scanned; | 689 | struct inode *inode = container_of(item, struct inode, i_lru); |
713 | unsigned long reap = 0; | ||
714 | 690 | ||
715 | spin_lock(&sb->s_inode_lru_lock); | 691 | /* |
716 | for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { | 692 | * we are inverting the lru lock/inode->i_lock here, so use a trylock. |
717 | struct inode *inode; | 693 | * If we fail to get the lock, just skip it. |
694 | */ | ||
695 | if (!spin_trylock(&inode->i_lock)) | ||
696 | return LRU_SKIP; | ||
718 | 697 | ||
719 | if (list_empty(&sb->s_inode_lru)) | 698 | /* |
720 | break; | 699 | * Referenced or dirty inodes are still in use. Give them another pass |
700 | * through the LRU as we canot reclaim them now. | ||
701 | */ | ||
702 | if (atomic_read(&inode->i_count) || | ||
703 | (inode->i_state & ~I_REFERENCED)) { | ||
704 | list_del_init(&inode->i_lru); | ||
705 | spin_unlock(&inode->i_lock); | ||
706 | this_cpu_dec(nr_unused); | ||
707 | return LRU_REMOVED; | ||
708 | } | ||
721 | 709 | ||
722 | inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); | 710 | /* recently referenced inodes get one more pass */ |
711 | if (inode->i_state & I_REFERENCED) { | ||
712 | inode->i_state &= ~I_REFERENCED; | ||
713 | spin_unlock(&inode->i_lock); | ||
714 | return LRU_ROTATE; | ||
715 | } | ||
723 | 716 | ||
724 | /* | 717 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
725 | * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, | 718 | __iget(inode); |
726 | * so use a trylock. If we fail to get the lock, just move the | 719 | spin_unlock(&inode->i_lock); |
727 | * inode to the back of the list so we don't spin on it. | 720 | spin_unlock(lru_lock); |
728 | */ | 721 | if (remove_inode_buffers(inode)) { |
729 | if (!spin_trylock(&inode->i_lock)) { | 722 | unsigned long reap; |
730 | list_move(&inode->i_lru, &sb->s_inode_lru); | 723 | reap = invalidate_mapping_pages(&inode->i_data, 0, -1); |
731 | continue; | 724 | if (current_is_kswapd()) |
725 | __count_vm_events(KSWAPD_INODESTEAL, reap); | ||
726 | else | ||
727 | __count_vm_events(PGINODESTEAL, reap); | ||
728 | if (current->reclaim_state) | ||
729 | current->reclaim_state->reclaimed_slab += reap; | ||
732 | } | 730 | } |
731 | iput(inode); | ||
732 | spin_lock(lru_lock); | ||
733 | return LRU_RETRY; | ||
734 | } | ||
733 | 735 | ||
734 | /* | 736 | WARN_ON(inode->i_state & I_NEW); |
735 | * Referenced or dirty inodes are still in use. Give them | 737 | inode->i_state |= I_FREEING; |
736 | * another pass through the LRU as we canot reclaim them now. | 738 | list_move(&inode->i_lru, freeable); |
737 | */ | 739 | spin_unlock(&inode->i_lock); |
738 | if (atomic_read(&inode->i_count) || | ||
739 | (inode->i_state & ~I_REFERENCED)) { | ||
740 | list_del_init(&inode->i_lru); | ||
741 | spin_unlock(&inode->i_lock); | ||
742 | sb->s_nr_inodes_unused--; | ||
743 | this_cpu_dec(nr_unused); | ||
744 | continue; | ||
745 | } | ||
746 | 740 | ||
747 | /* recently referenced inodes get one more pass */ | 741 | this_cpu_dec(nr_unused); |
748 | if (inode->i_state & I_REFERENCED) { | 742 | return LRU_REMOVED; |
749 | inode->i_state &= ~I_REFERENCED; | 743 | } |
750 | list_move(&inode->i_lru, &sb->s_inode_lru); | ||
751 | spin_unlock(&inode->i_lock); | ||
752 | continue; | ||
753 | } | ||
754 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | ||
755 | __iget(inode); | ||
756 | spin_unlock(&inode->i_lock); | ||
757 | spin_unlock(&sb->s_inode_lru_lock); | ||
758 | if (remove_inode_buffers(inode)) | ||
759 | reap += invalidate_mapping_pages(&inode->i_data, | ||
760 | 0, -1); | ||
761 | iput(inode); | ||
762 | spin_lock(&sb->s_inode_lru_lock); | ||
763 | |||
764 | if (inode != list_entry(sb->s_inode_lru.next, | ||
765 | struct inode, i_lru)) | ||
766 | continue; /* wrong inode or list_empty */ | ||
767 | /* avoid lock inversions with trylock */ | ||
768 | if (!spin_trylock(&inode->i_lock)) | ||
769 | continue; | ||
770 | if (!can_unuse(inode)) { | ||
771 | spin_unlock(&inode->i_lock); | ||
772 | continue; | ||
773 | } | ||
774 | } | ||
775 | WARN_ON(inode->i_state & I_NEW); | ||
776 | inode->i_state |= I_FREEING; | ||
777 | spin_unlock(&inode->i_lock); | ||
778 | 744 | ||
779 | list_move(&inode->i_lru, &freeable); | 745 | /* |
780 | sb->s_nr_inodes_unused--; | 746 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. |
781 | this_cpu_dec(nr_unused); | 747 | * This is called from the superblock shrinker function with a number of inodes |
782 | } | 748 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and |
783 | if (current_is_kswapd()) | 749 | * then are freed outside inode_lock by dispose_list(). |
784 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 750 | */ |
785 | else | 751 | long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, |
786 | __count_vm_events(PGINODESTEAL, reap); | 752 | int nid) |
787 | spin_unlock(&sb->s_inode_lru_lock); | 753 | { |
788 | if (current->reclaim_state) | 754 | LIST_HEAD(freeable); |
789 | current->reclaim_state->reclaimed_slab += reap; | 755 | long freed; |
790 | 756 | ||
757 | freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, | ||
758 | &freeable, &nr_to_scan); | ||
791 | dispose_list(&freeable); | 759 | dispose_list(&freeable); |
760 | return freed; | ||
792 | } | 761 | } |
793 | 762 | ||
794 | static void __wait_on_freeing_inode(struct inode *inode); | 763 | static void __wait_on_freeing_inode(struct inode *inode); |