diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 113 |
1 files changed, 69 insertions, 44 deletions
diff --git a/fs/inode.c b/fs/inode.c index ae2727ab0c3a..9910c039f026 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
84 | DEFINE_SPINLOCK(inode_lock); | 84 | DEFINE_SPINLOCK(inode_lock); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages | 87 | * iprune_sem provides exclusion between the icache shrinking and the |
88 | * icache shrinking path, and the umount path. Without this exclusion, | 88 | * umount path. |
89 | * by the time prune_icache calls iput for the inode whose pages it has | ||
90 | * been invalidating, or by the time it calls clear_inode & destroy_inode | ||
91 | * from its final dispose_list, the struct super_block they refer to | ||
92 | * (for inode->i_sb->s_op) may already have been freed and reused. | ||
93 | * | 89 | * |
94 | * We make this an rwsem because the fastpath is icache shrinking. In | 90 | * We don't actually need it to protect anything in the umount path, |
95 | * some cases a filesystem may be doing a significant amount of work in | 91 | * but only need to cycle through it to make sure any inode that |
96 | * its inode reclaim code, so this should improve parallelism. | 92 | * prune_icache took off the LRU list has been fully torn down by the |
93 | * time we are past evict_inodes. | ||
97 | */ | 94 | */ |
98 | static DECLARE_RWSEM(iprune_sem); | 95 | static DECLARE_RWSEM(iprune_sem); |
99 | 96 | ||
@@ -102,26 +99,29 @@ static DECLARE_RWSEM(iprune_sem); | |||
102 | */ | 99 | */ |
103 | struct inodes_stat_t inodes_stat; | 100 | struct inodes_stat_t inodes_stat; |
104 | 101 | ||
105 | static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; | 102 | static DEFINE_PER_CPU(unsigned int, nr_inodes); |
106 | static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; | ||
107 | 103 | ||
108 | static struct kmem_cache *inode_cachep __read_mostly; | 104 | static struct kmem_cache *inode_cachep __read_mostly; |
109 | 105 | ||
110 | static inline int get_nr_inodes(void) | 106 | static int get_nr_inodes(void) |
111 | { | 107 | { |
112 | return percpu_counter_sum_positive(&nr_inodes); | 108 | int i; |
109 | int sum = 0; | ||
110 | for_each_possible_cpu(i) | ||
111 | sum += per_cpu(nr_inodes, i); | ||
112 | return sum < 0 ? 0 : sum; | ||
113 | } | 113 | } |
114 | 114 | ||
115 | static inline int get_nr_inodes_unused(void) | 115 | static inline int get_nr_inodes_unused(void) |
116 | { | 116 | { |
117 | return percpu_counter_sum_positive(&nr_inodes_unused); | 117 | return inodes_stat.nr_unused; |
118 | } | 118 | } |
119 | 119 | ||
120 | int get_nr_dirty_inodes(void) | 120 | int get_nr_dirty_inodes(void) |
121 | { | 121 | { |
122 | /* not actually dirty inodes, but a wild approximation */ | ||
122 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | 123 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); |
123 | return nr_dirty > 0 ? nr_dirty : 0; | 124 | return nr_dirty > 0 ? nr_dirty : 0; |
124 | |||
125 | } | 125 | } |
126 | 126 | ||
127 | /* | 127 | /* |
@@ -132,7 +132,6 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
132 | void __user *buffer, size_t *lenp, loff_t *ppos) | 132 | void __user *buffer, size_t *lenp, loff_t *ppos) |
133 | { | 133 | { |
134 | inodes_stat.nr_inodes = get_nr_inodes(); | 134 | inodes_stat.nr_inodes = get_nr_inodes(); |
135 | inodes_stat.nr_unused = get_nr_inodes_unused(); | ||
136 | return proc_dointvec(table, write, buffer, lenp, ppos); | 135 | return proc_dointvec(table, write, buffer, lenp, ppos); |
137 | } | 136 | } |
138 | #endif | 137 | #endif |
@@ -224,7 +223,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
224 | inode->i_fsnotify_mask = 0; | 223 | inode->i_fsnotify_mask = 0; |
225 | #endif | 224 | #endif |
226 | 225 | ||
227 | percpu_counter_inc(&nr_inodes); | 226 | this_cpu_inc(nr_inodes); |
228 | 227 | ||
229 | return 0; | 228 | return 0; |
230 | out: | 229 | out: |
@@ -255,6 +254,12 @@ static struct inode *alloc_inode(struct super_block *sb) | |||
255 | return inode; | 254 | return inode; |
256 | } | 255 | } |
257 | 256 | ||
257 | void free_inode_nonrcu(struct inode *inode) | ||
258 | { | ||
259 | kmem_cache_free(inode_cachep, inode); | ||
260 | } | ||
261 | EXPORT_SYMBOL(free_inode_nonrcu); | ||
262 | |||
258 | void __destroy_inode(struct inode *inode) | 263 | void __destroy_inode(struct inode *inode) |
259 | { | 264 | { |
260 | BUG_ON(inode_has_buffers(inode)); | 265 | BUG_ON(inode_has_buffers(inode)); |
@@ -266,10 +271,17 @@ void __destroy_inode(struct inode *inode) | |||
266 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) | 271 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) |
267 | posix_acl_release(inode->i_default_acl); | 272 | posix_acl_release(inode->i_default_acl); |
268 | #endif | 273 | #endif |
269 | percpu_counter_dec(&nr_inodes); | 274 | this_cpu_dec(nr_inodes); |
270 | } | 275 | } |
271 | EXPORT_SYMBOL(__destroy_inode); | 276 | EXPORT_SYMBOL(__destroy_inode); |
272 | 277 | ||
278 | static void i_callback(struct rcu_head *head) | ||
279 | { | ||
280 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
281 | INIT_LIST_HEAD(&inode->i_dentry); | ||
282 | kmem_cache_free(inode_cachep, inode); | ||
283 | } | ||
284 | |||
273 | static void destroy_inode(struct inode *inode) | 285 | static void destroy_inode(struct inode *inode) |
274 | { | 286 | { |
275 | BUG_ON(!list_empty(&inode->i_lru)); | 287 | BUG_ON(!list_empty(&inode->i_lru)); |
@@ -277,8 +289,22 @@ static void destroy_inode(struct inode *inode) | |||
277 | if (inode->i_sb->s_op->destroy_inode) | 289 | if (inode->i_sb->s_op->destroy_inode) |
278 | inode->i_sb->s_op->destroy_inode(inode); | 290 | inode->i_sb->s_op->destroy_inode(inode); |
279 | else | 291 | else |
280 | kmem_cache_free(inode_cachep, (inode)); | 292 | call_rcu(&inode->i_rcu, i_callback); |
293 | } | ||
294 | |||
295 | void address_space_init_once(struct address_space *mapping) | ||
296 | { | ||
297 | memset(mapping, 0, sizeof(*mapping)); | ||
298 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); | ||
299 | spin_lock_init(&mapping->tree_lock); | ||
300 | spin_lock_init(&mapping->i_mmap_lock); | ||
301 | INIT_LIST_HEAD(&mapping->private_list); | ||
302 | spin_lock_init(&mapping->private_lock); | ||
303 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); | ||
304 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | ||
305 | mutex_init(&mapping->unmap_mutex); | ||
281 | } | 306 | } |
307 | EXPORT_SYMBOL(address_space_init_once); | ||
282 | 308 | ||
283 | /* | 309 | /* |
284 | * These are initializations that only need to be done | 310 | * These are initializations that only need to be done |
@@ -293,13 +319,7 @@ void inode_init_once(struct inode *inode) | |||
293 | INIT_LIST_HEAD(&inode->i_devices); | 319 | INIT_LIST_HEAD(&inode->i_devices); |
294 | INIT_LIST_HEAD(&inode->i_wb_list); | 320 | INIT_LIST_HEAD(&inode->i_wb_list); |
295 | INIT_LIST_HEAD(&inode->i_lru); | 321 | INIT_LIST_HEAD(&inode->i_lru); |
296 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 322 | address_space_init_once(&inode->i_data); |
297 | spin_lock_init(&inode->i_data.tree_lock); | ||
298 | spin_lock_init(&inode->i_data.i_mmap_lock); | ||
299 | INIT_LIST_HEAD(&inode->i_data.private_list); | ||
300 | spin_lock_init(&inode->i_data.private_lock); | ||
301 | INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); | ||
302 | INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); | ||
303 | i_size_ordered_init(inode); | 323 | i_size_ordered_init(inode); |
304 | #ifdef CONFIG_FSNOTIFY | 324 | #ifdef CONFIG_FSNOTIFY |
305 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); | 325 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); |
@@ -335,7 +355,7 @@ static void inode_lru_list_add(struct inode *inode) | |||
335 | { | 355 | { |
336 | if (list_empty(&inode->i_lru)) { | 356 | if (list_empty(&inode->i_lru)) { |
337 | list_add(&inode->i_lru, &inode_lru); | 357 | list_add(&inode->i_lru, &inode_lru); |
338 | percpu_counter_inc(&nr_inodes_unused); | 358 | inodes_stat.nr_unused++; |
339 | } | 359 | } |
340 | } | 360 | } |
341 | 361 | ||
@@ -343,7 +363,7 @@ static void inode_lru_list_del(struct inode *inode) | |||
343 | { | 363 | { |
344 | if (!list_empty(&inode->i_lru)) { | 364 | if (!list_empty(&inode->i_lru)) { |
345 | list_del_init(&inode->i_lru); | 365 | list_del_init(&inode->i_lru); |
346 | percpu_counter_dec(&nr_inodes_unused); | 366 | inodes_stat.nr_unused--; |
347 | } | 367 | } |
348 | } | 368 | } |
349 | 369 | ||
@@ -430,6 +450,7 @@ void end_writeback(struct inode *inode) | |||
430 | BUG_ON(!(inode->i_state & I_FREEING)); | 450 | BUG_ON(!(inode->i_state & I_FREEING)); |
431 | BUG_ON(inode->i_state & I_CLEAR); | 451 | BUG_ON(inode->i_state & I_CLEAR); |
432 | inode_sync_wait(inode); | 452 | inode_sync_wait(inode); |
453 | /* don't need i_lock here, no concurrent mods to i_state */ | ||
433 | inode->i_state = I_FREEING | I_CLEAR; | 454 | inode->i_state = I_FREEING | I_CLEAR; |
434 | } | 455 | } |
435 | EXPORT_SYMBOL(end_writeback); | 456 | EXPORT_SYMBOL(end_writeback); |
@@ -492,17 +513,12 @@ void evict_inodes(struct super_block *sb) | |||
492 | struct inode *inode, *next; | 513 | struct inode *inode, *next; |
493 | LIST_HEAD(dispose); | 514 | LIST_HEAD(dispose); |
494 | 515 | ||
495 | down_write(&iprune_sem); | ||
496 | |||
497 | spin_lock(&inode_lock); | 516 | spin_lock(&inode_lock); |
498 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 517 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
499 | if (atomic_read(&inode->i_count)) | 518 | if (atomic_read(&inode->i_count)) |
500 | continue; | 519 | continue; |
501 | 520 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | |
502 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
503 | WARN_ON(1); | ||
504 | continue; | 521 | continue; |
505 | } | ||
506 | 522 | ||
507 | inode->i_state |= I_FREEING; | 523 | inode->i_state |= I_FREEING; |
508 | 524 | ||
@@ -513,33 +529,45 @@ void evict_inodes(struct super_block *sb) | |||
513 | list_move(&inode->i_lru, &dispose); | 529 | list_move(&inode->i_lru, &dispose); |
514 | list_del_init(&inode->i_wb_list); | 530 | list_del_init(&inode->i_wb_list); |
515 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | 531 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) |
516 | percpu_counter_dec(&nr_inodes_unused); | 532 | inodes_stat.nr_unused--; |
517 | } | 533 | } |
518 | spin_unlock(&inode_lock); | 534 | spin_unlock(&inode_lock); |
519 | 535 | ||
520 | dispose_list(&dispose); | 536 | dispose_list(&dispose); |
537 | |||
538 | /* | ||
539 | * Cycle through iprune_sem to make sure any inode that prune_icache | ||
540 | * moved off the list before we took the lock has been fully torn | ||
541 | * down. | ||
542 | */ | ||
543 | down_write(&iprune_sem); | ||
521 | up_write(&iprune_sem); | 544 | up_write(&iprune_sem); |
522 | } | 545 | } |
523 | 546 | ||
524 | /** | 547 | /** |
525 | * invalidate_inodes - attempt to free all inodes on a superblock | 548 | * invalidate_inodes - attempt to free all inodes on a superblock |
526 | * @sb: superblock to operate on | 549 | * @sb: superblock to operate on |
550 | * @kill_dirty: flag to guide handling of dirty inodes | ||
527 | * | 551 | * |
528 | * Attempts to free all inodes for a given superblock. If there were any | 552 | * Attempts to free all inodes for a given superblock. If there were any |
529 | * busy inodes return a non-zero value, else zero. | 553 | * busy inodes return a non-zero value, else zero. |
554 | * If @kill_dirty is set, discard dirty inodes too, otherwise treat | ||
555 | * them as busy. | ||
530 | */ | 556 | */ |
531 | int invalidate_inodes(struct super_block *sb) | 557 | int invalidate_inodes(struct super_block *sb, bool kill_dirty) |
532 | { | 558 | { |
533 | int busy = 0; | 559 | int busy = 0; |
534 | struct inode *inode, *next; | 560 | struct inode *inode, *next; |
535 | LIST_HEAD(dispose); | 561 | LIST_HEAD(dispose); |
536 | 562 | ||
537 | down_write(&iprune_sem); | ||
538 | |||
539 | spin_lock(&inode_lock); | 563 | spin_lock(&inode_lock); |
540 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 564 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
541 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | 565 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) |
542 | continue; | 566 | continue; |
567 | if (inode->i_state & I_DIRTY && !kill_dirty) { | ||
568 | busy = 1; | ||
569 | continue; | ||
570 | } | ||
543 | if (atomic_read(&inode->i_count)) { | 571 | if (atomic_read(&inode->i_count)) { |
544 | busy = 1; | 572 | busy = 1; |
545 | continue; | 573 | continue; |
@@ -554,12 +582,11 @@ int invalidate_inodes(struct super_block *sb) | |||
554 | list_move(&inode->i_lru, &dispose); | 582 | list_move(&inode->i_lru, &dispose); |
555 | list_del_init(&inode->i_wb_list); | 583 | list_del_init(&inode->i_wb_list); |
556 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | 584 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) |
557 | percpu_counter_dec(&nr_inodes_unused); | 585 | inodes_stat.nr_unused--; |
558 | } | 586 | } |
559 | spin_unlock(&inode_lock); | 587 | spin_unlock(&inode_lock); |
560 | 588 | ||
561 | dispose_list(&dispose); | 589 | dispose_list(&dispose); |
562 | up_write(&iprune_sem); | ||
563 | 590 | ||
564 | return busy; | 591 | return busy; |
565 | } | 592 | } |
@@ -616,7 +643,7 @@ static void prune_icache(int nr_to_scan) | |||
616 | if (atomic_read(&inode->i_count) || | 643 | if (atomic_read(&inode->i_count) || |
617 | (inode->i_state & ~I_REFERENCED)) { | 644 | (inode->i_state & ~I_REFERENCED)) { |
618 | list_del_init(&inode->i_lru); | 645 | list_del_init(&inode->i_lru); |
619 | percpu_counter_dec(&nr_inodes_unused); | 646 | inodes_stat.nr_unused--; |
620 | continue; | 647 | continue; |
621 | } | 648 | } |
622 | 649 | ||
@@ -650,7 +677,7 @@ static void prune_icache(int nr_to_scan) | |||
650 | */ | 677 | */ |
651 | list_move(&inode->i_lru, &freeable); | 678 | list_move(&inode->i_lru, &freeable); |
652 | list_del_init(&inode->i_wb_list); | 679 | list_del_init(&inode->i_wb_list); |
653 | percpu_counter_dec(&nr_inodes_unused); | 680 | inodes_stat.nr_unused--; |
654 | } | 681 | } |
655 | if (current_is_kswapd()) | 682 | if (current_is_kswapd()) |
656 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 683 | __count_vm_events(KSWAPD_INODESTEAL, reap); |
@@ -1648,8 +1675,6 @@ void __init inode_init(void) | |||
1648 | SLAB_MEM_SPREAD), | 1675 | SLAB_MEM_SPREAD), |
1649 | init_once); | 1676 | init_once); |
1650 | register_shrinker(&icache_shrinker); | 1677 | register_shrinker(&icache_shrinker); |
1651 | percpu_counter_init(&nr_inodes, 0); | ||
1652 | percpu_counter_init(&nr_inodes_unused, 0); | ||
1653 | 1678 | ||
1654 | /* Hash may have been set up in inode_init_early */ | 1679 | /* Hash may have been set up in inode_init_early */ |
1655 | if (!hashdist) | 1680 | if (!hashdist) |