diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 134 |
1 files changed, 43 insertions, 91 deletions
diff --git a/fs/inode.c b/fs/inode.c index 43566d17d1b8..a48fa5355fb4 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -33,11 +33,11 @@ | |||
33 | * | 33 | * |
34 | * inode->i_lock protects: | 34 | * inode->i_lock protects: |
35 | * inode->i_state, inode->i_hash, __iget() | 35 | * inode->i_state, inode->i_hash, __iget() |
36 | * inode_lru_lock protects: | 36 | * inode->i_sb->s_inode_lru_lock protects: |
37 | * inode_lru, inode->i_lru | 37 | * inode->i_sb->s_inode_lru, inode->i_lru |
38 | * inode_sb_list_lock protects: | 38 | * inode_sb_list_lock protects: |
39 | * sb->s_inodes, inode->i_sb_list | 39 | * sb->s_inodes, inode->i_sb_list |
40 | * inode_wb_list_lock protects: | 40 | * bdi->wb.list_lock protects: |
41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list | 41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list |
42 | * inode_hash_lock protects: | 42 | * inode_hash_lock protects: |
43 | * inode_hashtable, inode->i_hash | 43 | * inode_hashtable, inode->i_hash |
@@ -46,9 +46,9 @@ | |||
46 | * | 46 | * |
47 | * inode_sb_list_lock | 47 | * inode_sb_list_lock |
48 | * inode->i_lock | 48 | * inode->i_lock |
49 | * inode_lru_lock | 49 | * inode->i_sb->s_inode_lru_lock |
50 | * | 50 | * |
51 | * inode_wb_list_lock | 51 | * bdi->wb.list_lock |
52 | * inode->i_lock | 52 | * inode->i_lock |
53 | * | 53 | * |
54 | * inode_hash_lock | 54 | * inode_hash_lock |
@@ -64,22 +64,7 @@ static unsigned int i_hash_shift __read_mostly; | |||
64 | static struct hlist_head *inode_hashtable __read_mostly; | 64 | static struct hlist_head *inode_hashtable __read_mostly; |
65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); | 65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); |
66 | 66 | ||
67 | static LIST_HEAD(inode_lru); | ||
68 | static DEFINE_SPINLOCK(inode_lru_lock); | ||
69 | |||
70 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); | 67 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); |
71 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); | ||
72 | |||
73 | /* | ||
74 | * iprune_sem provides exclusion between the icache shrinking and the | ||
75 | * umount path. | ||
76 | * | ||
77 | * We don't actually need it to protect anything in the umount path, | ||
78 | * but only need to cycle through it to make sure any inode that | ||
79 | * prune_icache took off the LRU list has been fully torn down by the | ||
80 | * time we are past evict_inodes. | ||
81 | */ | ||
82 | static DECLARE_RWSEM(iprune_sem); | ||
83 | 68 | ||
84 | /* | 69 | /* |
85 | * Empty aops. Can be used for the cases where the user does not | 70 | * Empty aops. Can be used for the cases where the user does not |
@@ -95,6 +80,7 @@ EXPORT_SYMBOL(empty_aops); | |||
95 | struct inodes_stat_t inodes_stat; | 80 | struct inodes_stat_t inodes_stat; |
96 | 81 | ||
97 | static DEFINE_PER_CPU(unsigned int, nr_inodes); | 82 | static DEFINE_PER_CPU(unsigned int, nr_inodes); |
83 | static DEFINE_PER_CPU(unsigned int, nr_unused); | ||
98 | 84 | ||
99 | static struct kmem_cache *inode_cachep __read_mostly; | 85 | static struct kmem_cache *inode_cachep __read_mostly; |
100 | 86 | ||
@@ -109,7 +95,11 @@ static int get_nr_inodes(void) | |||
109 | 95 | ||
110 | static inline int get_nr_inodes_unused(void) | 96 | static inline int get_nr_inodes_unused(void) |
111 | { | 97 | { |
112 | return inodes_stat.nr_unused; | 98 | int i; |
99 | int sum = 0; | ||
100 | for_each_possible_cpu(i) | ||
101 | sum += per_cpu(nr_unused, i); | ||
102 | return sum < 0 ? 0 : sum; | ||
113 | } | 103 | } |
114 | 104 | ||
115 | int get_nr_dirty_inodes(void) | 105 | int get_nr_dirty_inodes(void) |
@@ -127,6 +117,7 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
127 | void __user *buffer, size_t *lenp, loff_t *ppos) | 117 | void __user *buffer, size_t *lenp, loff_t *ppos) |
128 | { | 118 | { |
129 | inodes_stat.nr_inodes = get_nr_inodes(); | 119 | inodes_stat.nr_inodes = get_nr_inodes(); |
120 | inodes_stat.nr_unused = get_nr_inodes_unused(); | ||
130 | return proc_dointvec(table, write, buffer, lenp, ppos); | 121 | return proc_dointvec(table, write, buffer, lenp, ppos); |
131 | } | 122 | } |
132 | #endif | 123 | #endif |
@@ -176,8 +167,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
176 | mutex_init(&inode->i_mutex); | 167 | mutex_init(&inode->i_mutex); |
177 | lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); | 168 | lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); |
178 | 169 | ||
179 | init_rwsem(&inode->i_alloc_sem); | 170 | atomic_set(&inode->i_dio_count, 0); |
180 | lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); | ||
181 | 171 | ||
182 | mapping->a_ops = &empty_aops; | 172 | mapping->a_ops = &empty_aops; |
183 | mapping->host = inode; | 173 | mapping->host = inode; |
@@ -337,22 +327,24 @@ EXPORT_SYMBOL(ihold); | |||
337 | 327 | ||
338 | static void inode_lru_list_add(struct inode *inode) | 328 | static void inode_lru_list_add(struct inode *inode) |
339 | { | 329 | { |
340 | spin_lock(&inode_lru_lock); | 330 | spin_lock(&inode->i_sb->s_inode_lru_lock); |
341 | if (list_empty(&inode->i_lru)) { | 331 | if (list_empty(&inode->i_lru)) { |
342 | list_add(&inode->i_lru, &inode_lru); | 332 | list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); |
343 | inodes_stat.nr_unused++; | 333 | inode->i_sb->s_nr_inodes_unused++; |
334 | this_cpu_inc(nr_unused); | ||
344 | } | 335 | } |
345 | spin_unlock(&inode_lru_lock); | 336 | spin_unlock(&inode->i_sb->s_inode_lru_lock); |
346 | } | 337 | } |
347 | 338 | ||
348 | static void inode_lru_list_del(struct inode *inode) | 339 | static void inode_lru_list_del(struct inode *inode) |
349 | { | 340 | { |
350 | spin_lock(&inode_lru_lock); | 341 | spin_lock(&inode->i_sb->s_inode_lru_lock); |
351 | if (!list_empty(&inode->i_lru)) { | 342 | if (!list_empty(&inode->i_lru)) { |
352 | list_del_init(&inode->i_lru); | 343 | list_del_init(&inode->i_lru); |
353 | inodes_stat.nr_unused--; | 344 | inode->i_sb->s_nr_inodes_unused--; |
345 | this_cpu_dec(nr_unused); | ||
354 | } | 346 | } |
355 | spin_unlock(&inode_lru_lock); | 347 | spin_unlock(&inode->i_sb->s_inode_lru_lock); |
356 | } | 348 | } |
357 | 349 | ||
358 | /** | 350 | /** |
@@ -537,14 +529,6 @@ void evict_inodes(struct super_block *sb) | |||
537 | spin_unlock(&inode_sb_list_lock); | 529 | spin_unlock(&inode_sb_list_lock); |
538 | 530 | ||
539 | dispose_list(&dispose); | 531 | dispose_list(&dispose); |
540 | |||
541 | /* | ||
542 | * Cycle through iprune_sem to make sure any inode that prune_icache | ||
543 | * moved off the list before we took the lock has been fully torn | ||
544 | * down. | ||
545 | */ | ||
546 | down_write(&iprune_sem); | ||
547 | up_write(&iprune_sem); | ||
548 | } | 532 | } |
549 | 533 | ||
550 | /** | 534 | /** |
@@ -607,8 +591,10 @@ static int can_unuse(struct inode *inode) | |||
607 | } | 591 | } |
608 | 592 | ||
609 | /* | 593 | /* |
610 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a | 594 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. |
611 | * temporary list and then are freed outside inode_lru_lock by dispose_list(). | 595 | * This is called from the superblock shrinker function with a number of inodes |
596 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and | ||
597 | * then are freed outside inode_lock by dispose_list(). | ||
612 | * | 598 | * |
613 | * Any inodes which are pinned purely because of attached pagecache have their | 599 | * Any inodes which are pinned purely because of attached pagecache have their |
614 | * pagecache removed. If the inode has metadata buffers attached to | 600 | * pagecache removed. If the inode has metadata buffers attached to |
@@ -622,29 +608,28 @@ static int can_unuse(struct inode *inode) | |||
622 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 608 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
623 | * with this flag set because they are the inodes that are out of order. | 609 | * with this flag set because they are the inodes that are out of order. |
624 | */ | 610 | */ |
625 | static void prune_icache(int nr_to_scan) | 611 | void prune_icache_sb(struct super_block *sb, int nr_to_scan) |
626 | { | 612 | { |
627 | LIST_HEAD(freeable); | 613 | LIST_HEAD(freeable); |
628 | int nr_scanned; | 614 | int nr_scanned; |
629 | unsigned long reap = 0; | 615 | unsigned long reap = 0; |
630 | 616 | ||
631 | down_read(&iprune_sem); | 617 | spin_lock(&sb->s_inode_lru_lock); |
632 | spin_lock(&inode_lru_lock); | 618 | for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { |
633 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | ||
634 | struct inode *inode; | 619 | struct inode *inode; |
635 | 620 | ||
636 | if (list_empty(&inode_lru)) | 621 | if (list_empty(&sb->s_inode_lru)) |
637 | break; | 622 | break; |
638 | 623 | ||
639 | inode = list_entry(inode_lru.prev, struct inode, i_lru); | 624 | inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); |
640 | 625 | ||
641 | /* | 626 | /* |
642 | * we are inverting the inode_lru_lock/inode->i_lock here, | 627 | * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, |
643 | * so use a trylock. If we fail to get the lock, just move the | 628 | * so use a trylock. If we fail to get the lock, just move the |
644 | * inode to the back of the list so we don't spin on it. | 629 | * inode to the back of the list so we don't spin on it. |
645 | */ | 630 | */ |
646 | if (!spin_trylock(&inode->i_lock)) { | 631 | if (!spin_trylock(&inode->i_lock)) { |
647 | list_move(&inode->i_lru, &inode_lru); | 632 | list_move(&inode->i_lru, &sb->s_inode_lru); |
648 | continue; | 633 | continue; |
649 | } | 634 | } |
650 | 635 | ||
@@ -656,28 +641,29 @@ static void prune_icache(int nr_to_scan) | |||
656 | (inode->i_state & ~I_REFERENCED)) { | 641 | (inode->i_state & ~I_REFERENCED)) { |
657 | list_del_init(&inode->i_lru); | 642 | list_del_init(&inode->i_lru); |
658 | spin_unlock(&inode->i_lock); | 643 | spin_unlock(&inode->i_lock); |
659 | inodes_stat.nr_unused--; | 644 | sb->s_nr_inodes_unused--; |
645 | this_cpu_dec(nr_unused); | ||
660 | continue; | 646 | continue; |
661 | } | 647 | } |
662 | 648 | ||
663 | /* recently referenced inodes get one more pass */ | 649 | /* recently referenced inodes get one more pass */ |
664 | if (inode->i_state & I_REFERENCED) { | 650 | if (inode->i_state & I_REFERENCED) { |
665 | inode->i_state &= ~I_REFERENCED; | 651 | inode->i_state &= ~I_REFERENCED; |
666 | list_move(&inode->i_lru, &inode_lru); | 652 | list_move(&inode->i_lru, &sb->s_inode_lru); |
667 | spin_unlock(&inode->i_lock); | 653 | spin_unlock(&inode->i_lock); |
668 | continue; | 654 | continue; |
669 | } | 655 | } |
670 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 656 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
671 | __iget(inode); | 657 | __iget(inode); |
672 | spin_unlock(&inode->i_lock); | 658 | spin_unlock(&inode->i_lock); |
673 | spin_unlock(&inode_lru_lock); | 659 | spin_unlock(&sb->s_inode_lru_lock); |
674 | if (remove_inode_buffers(inode)) | 660 | if (remove_inode_buffers(inode)) |
675 | reap += invalidate_mapping_pages(&inode->i_data, | 661 | reap += invalidate_mapping_pages(&inode->i_data, |
676 | 0, -1); | 662 | 0, -1); |
677 | iput(inode); | 663 | iput(inode); |
678 | spin_lock(&inode_lru_lock); | 664 | spin_lock(&sb->s_inode_lru_lock); |
679 | 665 | ||
680 | if (inode != list_entry(inode_lru.next, | 666 | if (inode != list_entry(sb->s_inode_lru.next, |
681 | struct inode, i_lru)) | 667 | struct inode, i_lru)) |
682 | continue; /* wrong inode or list_empty */ | 668 | continue; /* wrong inode or list_empty */ |
683 | /* avoid lock inversions with trylock */ | 669 | /* avoid lock inversions with trylock */ |
@@ -693,51 +679,18 @@ static void prune_icache(int nr_to_scan) | |||
693 | spin_unlock(&inode->i_lock); | 679 | spin_unlock(&inode->i_lock); |
694 | 680 | ||
695 | list_move(&inode->i_lru, &freeable); | 681 | list_move(&inode->i_lru, &freeable); |
696 | inodes_stat.nr_unused--; | 682 | sb->s_nr_inodes_unused--; |
683 | this_cpu_dec(nr_unused); | ||
697 | } | 684 | } |
698 | if (current_is_kswapd()) | 685 | if (current_is_kswapd()) |
699 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 686 | __count_vm_events(KSWAPD_INODESTEAL, reap); |
700 | else | 687 | else |
701 | __count_vm_events(PGINODESTEAL, reap); | 688 | __count_vm_events(PGINODESTEAL, reap); |
702 | spin_unlock(&inode_lru_lock); | 689 | spin_unlock(&sb->s_inode_lru_lock); |
703 | 690 | ||
704 | dispose_list(&freeable); | 691 | dispose_list(&freeable); |
705 | up_read(&iprune_sem); | ||
706 | } | 692 | } |
707 | 693 | ||
708 | /* | ||
709 | * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, | ||
710 | * "unused" means that no dentries are referring to the inodes: the files are | ||
711 | * not open and the dcache references to those inodes have already been | ||
712 | * reclaimed. | ||
713 | * | ||
714 | * This function is passed the number of inodes to scan, and it returns the | ||
715 | * total number of remaining possibly-reclaimable inodes. | ||
716 | */ | ||
717 | static int shrink_icache_memory(struct shrinker *shrink, | ||
718 | struct shrink_control *sc) | ||
719 | { | ||
720 | int nr = sc->nr_to_scan; | ||
721 | gfp_t gfp_mask = sc->gfp_mask; | ||
722 | |||
723 | if (nr) { | ||
724 | /* | ||
725 | * Nasty deadlock avoidance. We may hold various FS locks, | ||
726 | * and we don't want to recurse into the FS that called us | ||
727 | * in clear_inode() and friends.. | ||
728 | */ | ||
729 | if (!(gfp_mask & __GFP_FS)) | ||
730 | return -1; | ||
731 | prune_icache(nr); | ||
732 | } | ||
733 | return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; | ||
734 | } | ||
735 | |||
736 | static struct shrinker icache_shrinker = { | ||
737 | .shrink = shrink_icache_memory, | ||
738 | .seeks = DEFAULT_SEEKS, | ||
739 | }; | ||
740 | |||
741 | static void __wait_on_freeing_inode(struct inode *inode); | 694 | static void __wait_on_freeing_inode(struct inode *inode); |
742 | /* | 695 | /* |
743 | * Called with the inode lock held. | 696 | * Called with the inode lock held. |
@@ -1331,7 +1284,7 @@ static void iput_final(struct inode *inode) | |||
1331 | 1284 | ||
1332 | WARN_ON(inode->i_state & I_NEW); | 1285 | WARN_ON(inode->i_state & I_NEW); |
1333 | 1286 | ||
1334 | if (op && op->drop_inode) | 1287 | if (op->drop_inode) |
1335 | drop = op->drop_inode(inode); | 1288 | drop = op->drop_inode(inode); |
1336 | else | 1289 | else |
1337 | drop = generic_drop_inode(inode); | 1290 | drop = generic_drop_inode(inode); |
@@ -1617,7 +1570,6 @@ void __init inode_init(void) | |||
1617 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| | 1570 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| |
1618 | SLAB_MEM_SPREAD), | 1571 | SLAB_MEM_SPREAD), |
1619 | init_once); | 1572 | init_once); |
1620 | register_shrinker(&icache_shrinker); | ||
1621 | 1573 | ||
1622 | /* Hash may have been set up in inode_init_early */ | 1574 | /* Hash may have been set up in inode_init_early */ |
1623 | if (!hashdist) | 1575 | if (!hashdist) |