aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c134
1 files changed, 43 insertions, 91 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 43566d17d1b8..a48fa5355fb4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -33,11 +33,11 @@
33 * 33 *
34 * inode->i_lock protects: 34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget() 35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects: 36 * inode->i_sb->s_inode_lru_lock protects:
37 * inode_lru, inode->i_lru 37 * inode->i_sb->s_inode_lru, inode->i_lru
38 * inode_sb_list_lock protects: 38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list 39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects: 40 * bdi->wb.list_lock protects:
41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list 41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
42 * inode_hash_lock protects: 42 * inode_hash_lock protects:
43 * inode_hashtable, inode->i_hash 43 * inode_hashtable, inode->i_hash
@@ -46,9 +46,9 @@
46 * 46 *
47 * inode_sb_list_lock 47 * inode_sb_list_lock
48 * inode->i_lock 48 * inode->i_lock
49 * inode_lru_lock 49 * inode->i_sb->s_inode_lru_lock
50 * 50 *
51 * inode_wb_list_lock 51 * bdi->wb.list_lock
52 * inode->i_lock 52 * inode->i_lock
53 * 53 *
54 * inode_hash_lock 54 * inode_hash_lock
@@ -64,22 +64,7 @@ static unsigned int i_hash_shift __read_mostly;
64static struct hlist_head *inode_hashtable __read_mostly; 64static struct hlist_head *inode_hashtable __read_mostly;
65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
66 66
67static LIST_HEAD(inode_lru);
68static DEFINE_SPINLOCK(inode_lru_lock);
69
70__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 67__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
71__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
72
73/*
74 * iprune_sem provides exclusion between the icache shrinking and the
75 * umount path.
76 *
77 * We don't actually need it to protect anything in the umount path,
78 * but only need to cycle through it to make sure any inode that
79 * prune_icache took off the LRU list has been fully torn down by the
80 * time we are past evict_inodes.
81 */
82static DECLARE_RWSEM(iprune_sem);
83 68
84/* 69/*
85 * Empty aops. Can be used for the cases where the user does not 70 * Empty aops. Can be used for the cases where the user does not
@@ -95,6 +80,7 @@ EXPORT_SYMBOL(empty_aops);
95struct inodes_stat_t inodes_stat; 80struct inodes_stat_t inodes_stat;
96 81
97static DEFINE_PER_CPU(unsigned int, nr_inodes); 82static DEFINE_PER_CPU(unsigned int, nr_inodes);
83static DEFINE_PER_CPU(unsigned int, nr_unused);
98 84
99static struct kmem_cache *inode_cachep __read_mostly; 85static struct kmem_cache *inode_cachep __read_mostly;
100 86
@@ -109,7 +95,11 @@ static int get_nr_inodes(void)
109 95
110static inline int get_nr_inodes_unused(void) 96static inline int get_nr_inodes_unused(void)
111{ 97{
112 return inodes_stat.nr_unused; 98 int i;
99 int sum = 0;
100 for_each_possible_cpu(i)
101 sum += per_cpu(nr_unused, i);
102 return sum < 0 ? 0 : sum;
113} 103}
114 104
115int get_nr_dirty_inodes(void) 105int get_nr_dirty_inodes(void)
@@ -127,6 +117,7 @@ int proc_nr_inodes(ctl_table *table, int write,
127 void __user *buffer, size_t *lenp, loff_t *ppos) 117 void __user *buffer, size_t *lenp, loff_t *ppos)
128{ 118{
129 inodes_stat.nr_inodes = get_nr_inodes(); 119 inodes_stat.nr_inodes = get_nr_inodes();
120 inodes_stat.nr_unused = get_nr_inodes_unused();
130 return proc_dointvec(table, write, buffer, lenp, ppos); 121 return proc_dointvec(table, write, buffer, lenp, ppos);
131} 122}
132#endif 123#endif
@@ -176,8 +167,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
176 mutex_init(&inode->i_mutex); 167 mutex_init(&inode->i_mutex);
177 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 168 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
178 169
179 init_rwsem(&inode->i_alloc_sem); 170 atomic_set(&inode->i_dio_count, 0);
180 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
181 171
182 mapping->a_ops = &empty_aops; 172 mapping->a_ops = &empty_aops;
183 mapping->host = inode; 173 mapping->host = inode;
@@ -337,22 +327,24 @@ EXPORT_SYMBOL(ihold);
337 327
338static void inode_lru_list_add(struct inode *inode) 328static void inode_lru_list_add(struct inode *inode)
339{ 329{
340 spin_lock(&inode_lru_lock); 330 spin_lock(&inode->i_sb->s_inode_lru_lock);
341 if (list_empty(&inode->i_lru)) { 331 if (list_empty(&inode->i_lru)) {
342 list_add(&inode->i_lru, &inode_lru); 332 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
343 inodes_stat.nr_unused++; 333 inode->i_sb->s_nr_inodes_unused++;
334 this_cpu_inc(nr_unused);
344 } 335 }
345 spin_unlock(&inode_lru_lock); 336 spin_unlock(&inode->i_sb->s_inode_lru_lock);
346} 337}
347 338
348static void inode_lru_list_del(struct inode *inode) 339static void inode_lru_list_del(struct inode *inode)
349{ 340{
350 spin_lock(&inode_lru_lock); 341 spin_lock(&inode->i_sb->s_inode_lru_lock);
351 if (!list_empty(&inode->i_lru)) { 342 if (!list_empty(&inode->i_lru)) {
352 list_del_init(&inode->i_lru); 343 list_del_init(&inode->i_lru);
353 inodes_stat.nr_unused--; 344 inode->i_sb->s_nr_inodes_unused--;
345 this_cpu_dec(nr_unused);
354 } 346 }
355 spin_unlock(&inode_lru_lock); 347 spin_unlock(&inode->i_sb->s_inode_lru_lock);
356} 348}
357 349
358/** 350/**
@@ -537,14 +529,6 @@ void evict_inodes(struct super_block *sb)
537 spin_unlock(&inode_sb_list_lock); 529 spin_unlock(&inode_sb_list_lock);
538 530
539 dispose_list(&dispose); 531 dispose_list(&dispose);
540
541 /*
542 * Cycle through iprune_sem to make sure any inode that prune_icache
543 * moved off the list before we took the lock has been fully torn
544 * down.
545 */
546 down_write(&iprune_sem);
547 up_write(&iprune_sem);
548} 532}
549 533
550/** 534/**
@@ -607,8 +591,10 @@ static int can_unuse(struct inode *inode)
607} 591}
608 592
609/* 593/*
610 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 594 * Walk the superblock inode LRU for freeable inodes and attempt to free them.
611 * temporary list and then are freed outside inode_lru_lock by dispose_list(). 595 * This is called from the superblock shrinker function with a number of inodes
596 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
597 * then are freed outside inode_lock by dispose_list().
612 * 598 *
613 * Any inodes which are pinned purely because of attached pagecache have their 599 * Any inodes which are pinned purely because of attached pagecache have their
614 * pagecache removed. If the inode has metadata buffers attached to 600 * pagecache removed. If the inode has metadata buffers attached to
@@ -622,29 +608,28 @@ static int can_unuse(struct inode *inode)
622 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 608 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
623 * with this flag set because they are the inodes that are out of order. 609 * with this flag set because they are the inodes that are out of order.
624 */ 610 */
625static void prune_icache(int nr_to_scan) 611void prune_icache_sb(struct super_block *sb, int nr_to_scan)
626{ 612{
627 LIST_HEAD(freeable); 613 LIST_HEAD(freeable);
628 int nr_scanned; 614 int nr_scanned;
629 unsigned long reap = 0; 615 unsigned long reap = 0;
630 616
631 down_read(&iprune_sem); 617 spin_lock(&sb->s_inode_lru_lock);
632 spin_lock(&inode_lru_lock); 618 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
633 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
634 struct inode *inode; 619 struct inode *inode;
635 620
636 if (list_empty(&inode_lru)) 621 if (list_empty(&sb->s_inode_lru))
637 break; 622 break;
638 623
639 inode = list_entry(inode_lru.prev, struct inode, i_lru); 624 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
640 625
641 /* 626 /*
642 * we are inverting the inode_lru_lock/inode->i_lock here, 627 * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
643 * so use a trylock. If we fail to get the lock, just move the 628 * so use a trylock. If we fail to get the lock, just move the
644 * inode to the back of the list so we don't spin on it. 629 * inode to the back of the list so we don't spin on it.
645 */ 630 */
646 if (!spin_trylock(&inode->i_lock)) { 631 if (!spin_trylock(&inode->i_lock)) {
647 list_move(&inode->i_lru, &inode_lru); 632 list_move(&inode->i_lru, &sb->s_inode_lru);
648 continue; 633 continue;
649 } 634 }
650 635
@@ -656,28 +641,29 @@ static void prune_icache(int nr_to_scan)
656 (inode->i_state & ~I_REFERENCED)) { 641 (inode->i_state & ~I_REFERENCED)) {
657 list_del_init(&inode->i_lru); 642 list_del_init(&inode->i_lru);
658 spin_unlock(&inode->i_lock); 643 spin_unlock(&inode->i_lock);
659 inodes_stat.nr_unused--; 644 sb->s_nr_inodes_unused--;
645 this_cpu_dec(nr_unused);
660 continue; 646 continue;
661 } 647 }
662 648
663 /* recently referenced inodes get one more pass */ 649 /* recently referenced inodes get one more pass */
664 if (inode->i_state & I_REFERENCED) { 650 if (inode->i_state & I_REFERENCED) {
665 inode->i_state &= ~I_REFERENCED; 651 inode->i_state &= ~I_REFERENCED;
666 list_move(&inode->i_lru, &inode_lru); 652 list_move(&inode->i_lru, &sb->s_inode_lru);
667 spin_unlock(&inode->i_lock); 653 spin_unlock(&inode->i_lock);
668 continue; 654 continue;
669 } 655 }
670 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 656 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
671 __iget(inode); 657 __iget(inode);
672 spin_unlock(&inode->i_lock); 658 spin_unlock(&inode->i_lock);
673 spin_unlock(&inode_lru_lock); 659 spin_unlock(&sb->s_inode_lru_lock);
674 if (remove_inode_buffers(inode)) 660 if (remove_inode_buffers(inode))
675 reap += invalidate_mapping_pages(&inode->i_data, 661 reap += invalidate_mapping_pages(&inode->i_data,
676 0, -1); 662 0, -1);
677 iput(inode); 663 iput(inode);
678 spin_lock(&inode_lru_lock); 664 spin_lock(&sb->s_inode_lru_lock);
679 665
680 if (inode != list_entry(inode_lru.next, 666 if (inode != list_entry(sb->s_inode_lru.next,
681 struct inode, i_lru)) 667 struct inode, i_lru))
682 continue; /* wrong inode or list_empty */ 668 continue; /* wrong inode or list_empty */
683 /* avoid lock inversions with trylock */ 669 /* avoid lock inversions with trylock */
@@ -693,51 +679,18 @@ static void prune_icache(int nr_to_scan)
693 spin_unlock(&inode->i_lock); 679 spin_unlock(&inode->i_lock);
694 680
695 list_move(&inode->i_lru, &freeable); 681 list_move(&inode->i_lru, &freeable);
696 inodes_stat.nr_unused--; 682 sb->s_nr_inodes_unused--;
683 this_cpu_dec(nr_unused);
697 } 684 }
698 if (current_is_kswapd()) 685 if (current_is_kswapd())
699 __count_vm_events(KSWAPD_INODESTEAL, reap); 686 __count_vm_events(KSWAPD_INODESTEAL, reap);
700 else 687 else
701 __count_vm_events(PGINODESTEAL, reap); 688 __count_vm_events(PGINODESTEAL, reap);
702 spin_unlock(&inode_lru_lock); 689 spin_unlock(&sb->s_inode_lru_lock);
703 690
704 dispose_list(&freeable); 691 dispose_list(&freeable);
705 up_read(&iprune_sem);
706} 692}
707 693
708/*
709 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
710 * "unused" means that no dentries are referring to the inodes: the files are
711 * not open and the dcache references to those inodes have already been
712 * reclaimed.
713 *
714 * This function is passed the number of inodes to scan, and it returns the
715 * total number of remaining possibly-reclaimable inodes.
716 */
717static int shrink_icache_memory(struct shrinker *shrink,
718 struct shrink_control *sc)
719{
720 int nr = sc->nr_to_scan;
721 gfp_t gfp_mask = sc->gfp_mask;
722
723 if (nr) {
724 /*
725 * Nasty deadlock avoidance. We may hold various FS locks,
726 * and we don't want to recurse into the FS that called us
727 * in clear_inode() and friends..
728 */
729 if (!(gfp_mask & __GFP_FS))
730 return -1;
731 prune_icache(nr);
732 }
733 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
734}
735
736static struct shrinker icache_shrinker = {
737 .shrink = shrink_icache_memory,
738 .seeks = DEFAULT_SEEKS,
739};
740
741static void __wait_on_freeing_inode(struct inode *inode); 694static void __wait_on_freeing_inode(struct inode *inode);
742/* 695/*
743 * Called with the inode lock held. 696 * Called with the inode lock held.
@@ -1331,7 +1284,7 @@ static void iput_final(struct inode *inode)
1331 1284
1332 WARN_ON(inode->i_state & I_NEW); 1285 WARN_ON(inode->i_state & I_NEW);
1333 1286
1334 if (op && op->drop_inode) 1287 if (op->drop_inode)
1335 drop = op->drop_inode(inode); 1288 drop = op->drop_inode(inode);
1336 else 1289 else
1337 drop = generic_drop_inode(inode); 1290 drop = generic_drop_inode(inode);
@@ -1617,7 +1570,6 @@ void __init inode_init(void)
1617 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1570 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1618 SLAB_MEM_SPREAD), 1571 SLAB_MEM_SPREAD),
1619 init_once); 1572 init_once);
1620 register_shrinker(&icache_shrinker);
1621 1573
1622 /* Hash may have been set up in inode_init_early */ 1574 /* Hash may have been set up in inode_init_early */
1623 if (!hashdist) 1575 if (!hashdist)