diff options
author | Dave Chinner <dchinner@redhat.com> | 2010-10-23 05:03:02 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-10-25 21:26:09 -0400 |
commit | cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 (patch) | |
tree | dab046695754e2cec1a7ab7a64b236e976d94360 | |
parent | be1a16a0ae29a7c90081a657b64aa51cb1a65a27 (diff) |
fs: Convert nr_inodes and nr_unused to per-cpu counters
The number of inodes allocated does not need to be tied to the
addition or removal of an inode to/from a list. If we are not tied
to a list lock, we could update the counters when inodes are
initialised or destroyed, but to do that we need to convert the
counters to be per-cpu (i.e. independent of a lock). This means that
we have the freedom to change the list/locking implementation
without needing to care about the counters.
Based on a patch originally from Eric Dumazet.
[AV: cleaned up a bit, fixed build breakage on weird configs
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/fs-writeback.c | 5 | ||||
-rw-r--r-- | fs/inode.c | 64 | ||||
-rw-r--r-- | fs/internal.h | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 4 |
5 files changed, 52 insertions, 25 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 39f44f2e709a..f04d04af84f2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
723 | wb->last_old_flush = jiffies; | 723 | wb->last_old_flush = jiffies; |
724 | nr_pages = global_page_state(NR_FILE_DIRTY) + | 724 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
725 | global_page_state(NR_UNSTABLE_NFS) + | 725 | global_page_state(NR_UNSTABLE_NFS) + |
726 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 726 | get_nr_dirty_inodes(); |
727 | 727 | ||
728 | if (nr_pages) { | 728 | if (nr_pages) { |
729 | struct wb_writeback_work work = { | 729 | struct wb_writeback_work work = { |
@@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1090 | 1090 | ||
1091 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1091 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1092 | 1092 | ||
1093 | work.nr_pages = nr_dirty + nr_unstable + | 1093 | work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes(); |
1094 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1095 | 1094 | ||
1096 | bdi_queue_work(sb->s_bdi, &work); | 1095 | bdi_queue_work(sb->s_bdi, &work); |
1097 | wait_for_completion(&done); | 1096 | wait_for_completion(&done); |
diff --git a/fs/inode.c b/fs/inode.c index 4440cf1034ec..0d5aeccbdd90 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem); | |||
103 | */ | 103 | */ |
104 | struct inodes_stat_t inodes_stat; | 104 | struct inodes_stat_t inodes_stat; |
105 | 105 | ||
106 | static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; | ||
107 | static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; | ||
108 | |||
106 | static struct kmem_cache *inode_cachep __read_mostly; | 109 | static struct kmem_cache *inode_cachep __read_mostly; |
107 | 110 | ||
111 | static inline int get_nr_inodes(void) | ||
112 | { | ||
113 | return percpu_counter_sum_positive(&nr_inodes); | ||
114 | } | ||
115 | |||
116 | static inline int get_nr_inodes_unused(void) | ||
117 | { | ||
118 | return percpu_counter_sum_positive(&nr_inodes_unused); | ||
119 | } | ||
120 | |||
121 | int get_nr_dirty_inodes(void) | ||
122 | { | ||
123 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | ||
124 | return nr_dirty > 0 ? nr_dirty : 0; | ||
125 | |||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Handle nr_inode sysctl | ||
130 | */ | ||
131 | #ifdef CONFIG_SYSCTL | ||
132 | int proc_nr_inodes(ctl_table *table, int write, | ||
133 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
134 | { | ||
135 | inodes_stat.nr_inodes = get_nr_inodes(); | ||
136 | inodes_stat.nr_unused = get_nr_inodes_unused(); | ||
137 | return proc_dointvec(table, write, buffer, lenp, ppos); | ||
138 | } | ||
139 | #endif | ||
140 | |||
108 | static void wake_up_inode(struct inode *inode) | 141 | static void wake_up_inode(struct inode *inode) |
109 | { | 142 | { |
110 | /* | 143 | /* |
@@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
192 | inode->i_fsnotify_mask = 0; | 225 | inode->i_fsnotify_mask = 0; |
193 | #endif | 226 | #endif |
194 | 227 | ||
228 | percpu_counter_inc(&nr_inodes); | ||
229 | |||
195 | return 0; | 230 | return 0; |
196 | out: | 231 | out: |
197 | return -ENOMEM; | 232 | return -ENOMEM; |
@@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode) | |||
232 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) | 267 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) |
233 | posix_acl_release(inode->i_default_acl); | 268 | posix_acl_release(inode->i_default_acl); |
234 | #endif | 269 | #endif |
270 | percpu_counter_dec(&nr_inodes); | ||
235 | } | 271 | } |
236 | EXPORT_SYMBOL(__destroy_inode); | 272 | EXPORT_SYMBOL(__destroy_inode); |
237 | 273 | ||
@@ -286,7 +322,7 @@ void __iget(struct inode *inode) | |||
286 | 322 | ||
287 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 323 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
288 | list_move(&inode->i_list, &inode_in_use); | 324 | list_move(&inode->i_list, &inode_in_use); |
289 | inodes_stat.nr_unused--; | 325 | percpu_counter_dec(&nr_inodes_unused); |
290 | } | 326 | } |
291 | 327 | ||
292 | void end_writeback(struct inode *inode) | 328 | void end_writeback(struct inode *inode) |
@@ -327,8 +363,6 @@ static void evict(struct inode *inode) | |||
327 | */ | 363 | */ |
328 | static void dispose_list(struct list_head *head) | 364 | static void dispose_list(struct list_head *head) |
329 | { | 365 | { |
330 | int nr_disposed = 0; | ||
331 | |||
332 | while (!list_empty(head)) { | 366 | while (!list_empty(head)) { |
333 | struct inode *inode; | 367 | struct inode *inode; |
334 | 368 | ||
@@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head) | |||
344 | 378 | ||
345 | wake_up_inode(inode); | 379 | wake_up_inode(inode); |
346 | destroy_inode(inode); | 380 | destroy_inode(inode); |
347 | nr_disposed++; | ||
348 | } | 381 | } |
349 | spin_lock(&inode_lock); | ||
350 | inodes_stat.nr_inodes -= nr_disposed; | ||
351 | spin_unlock(&inode_lock); | ||
352 | } | 382 | } |
353 | 383 | ||
354 | /* | 384 | /* |
@@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head) | |||
357 | static int invalidate_list(struct list_head *head, struct list_head *dispose) | 387 | static int invalidate_list(struct list_head *head, struct list_head *dispose) |
358 | { | 388 | { |
359 | struct list_head *next; | 389 | struct list_head *next; |
360 | int busy = 0, count = 0; | 390 | int busy = 0; |
361 | 391 | ||
362 | next = head->next; | 392 | next = head->next; |
363 | for (;;) { | 393 | for (;;) { |
@@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
383 | list_move(&inode->i_list, dispose); | 413 | list_move(&inode->i_list, dispose); |
384 | WARN_ON(inode->i_state & I_NEW); | 414 | WARN_ON(inode->i_state & I_NEW); |
385 | inode->i_state |= I_FREEING; | 415 | inode->i_state |= I_FREEING; |
386 | count++; | 416 | percpu_counter_dec(&nr_inodes_unused); |
387 | continue; | 417 | continue; |
388 | } | 418 | } |
389 | busy = 1; | 419 | busy = 1; |
390 | } | 420 | } |
391 | /* only unused inodes may be cached with i_count zero */ | ||
392 | inodes_stat.nr_unused -= count; | ||
393 | return busy; | 421 | return busy; |
394 | } | 422 | } |
395 | 423 | ||
@@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode) | |||
447 | static void prune_icache(int nr_to_scan) | 475 | static void prune_icache(int nr_to_scan) |
448 | { | 476 | { |
449 | LIST_HEAD(freeable); | 477 | LIST_HEAD(freeable); |
450 | int nr_pruned = 0; | ||
451 | int nr_scanned; | 478 | int nr_scanned; |
452 | unsigned long reap = 0; | 479 | unsigned long reap = 0; |
453 | 480 | ||
@@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan) | |||
483 | list_move(&inode->i_list, &freeable); | 510 | list_move(&inode->i_list, &freeable); |
484 | WARN_ON(inode->i_state & I_NEW); | 511 | WARN_ON(inode->i_state & I_NEW); |
485 | inode->i_state |= I_FREEING; | 512 | inode->i_state |= I_FREEING; |
486 | nr_pruned++; | 513 | percpu_counter_dec(&nr_inodes_unused); |
487 | } | 514 | } |
488 | inodes_stat.nr_unused -= nr_pruned; | ||
489 | if (current_is_kswapd()) | 515 | if (current_is_kswapd()) |
490 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 516 | __count_vm_events(KSWAPD_INODESTEAL, reap); |
491 | else | 517 | else |
@@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | |||
517 | return -1; | 543 | return -1; |
518 | prune_icache(nr); | 544 | prune_icache(nr); |
519 | } | 545 | } |
520 | return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 546 | return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; |
521 | } | 547 | } |
522 | 548 | ||
523 | static struct shrinker icache_shrinker = { | 549 | static struct shrinker icache_shrinker = { |
@@ -594,7 +620,6 @@ static inline void | |||
594 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, | 620 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, |
595 | struct inode *inode) | 621 | struct inode *inode) |
596 | { | 622 | { |
597 | inodes_stat.nr_inodes++; | ||
598 | list_add(&inode->i_list, &inode_in_use); | 623 | list_add(&inode->i_list, &inode_in_use); |
599 | list_add(&inode->i_sb_list, &sb->s_inodes); | 624 | list_add(&inode->i_sb_list, &sb->s_inodes); |
600 | if (head) | 625 | if (head) |
@@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode) | |||
1214 | if (!drop) { | 1239 | if (!drop) { |
1215 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 1240 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
1216 | list_move(&inode->i_list, &inode_unused); | 1241 | list_move(&inode->i_list, &inode_unused); |
1217 | inodes_stat.nr_unused++; | 1242 | percpu_counter_inc(&nr_inodes_unused); |
1218 | if (sb->s_flags & MS_ACTIVE) { | 1243 | if (sb->s_flags & MS_ACTIVE) { |
1219 | spin_unlock(&inode_lock); | 1244 | spin_unlock(&inode_lock); |
1220 | return; | 1245 | return; |
@@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode) | |||
1226 | spin_lock(&inode_lock); | 1251 | spin_lock(&inode_lock); |
1227 | WARN_ON(inode->i_state & I_NEW); | 1252 | WARN_ON(inode->i_state & I_NEW); |
1228 | inode->i_state &= ~I_WILL_FREE; | 1253 | inode->i_state &= ~I_WILL_FREE; |
1229 | inodes_stat.nr_unused--; | 1254 | percpu_counter_dec(&nr_inodes_unused); |
1230 | hlist_del_init(&inode->i_hash); | 1255 | hlist_del_init(&inode->i_hash); |
1231 | } | 1256 | } |
1232 | list_del_init(&inode->i_list); | 1257 | list_del_init(&inode->i_list); |
1233 | list_del_init(&inode->i_sb_list); | 1258 | list_del_init(&inode->i_sb_list); |
1234 | WARN_ON(inode->i_state & I_NEW); | 1259 | WARN_ON(inode->i_state & I_NEW); |
1235 | inode->i_state |= I_FREEING; | 1260 | inode->i_state |= I_FREEING; |
1236 | inodes_stat.nr_inodes--; | ||
1237 | spin_unlock(&inode_lock); | 1261 | spin_unlock(&inode_lock); |
1238 | evict(inode); | 1262 | evict(inode); |
1239 | spin_lock(&inode_lock); | 1263 | spin_lock(&inode_lock); |
@@ -1502,6 +1526,8 @@ void __init inode_init(void) | |||
1502 | SLAB_MEM_SPREAD), | 1526 | SLAB_MEM_SPREAD), |
1503 | init_once); | 1527 | init_once); |
1504 | register_shrinker(&icache_shrinker); | 1528 | register_shrinker(&icache_shrinker); |
1529 | percpu_counter_init(&nr_inodes, 0); | ||
1530 | percpu_counter_init(&nr_inodes_unused, 0); | ||
1505 | 1531 | ||
1506 | /* Hash may have been set up in inode_init_early */ | 1532 | /* Hash may have been set up in inode_init_early */ |
1507 | if (!hashdist) | 1533 | if (!hashdist) |
diff --git a/fs/internal.h b/fs/internal.h index f6dce46d80dc..4cc67eb6ed56 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -105,4 +105,5 @@ extern void release_open_intent(struct nameidata *); | |||
105 | /* | 105 | /* |
106 | * inode.c | 106 | * inode.c |
107 | */ | 107 | */ |
108 | extern int get_nr_dirty_inodes(void); | ||
108 | extern int invalidate_inodes(struct super_block *); | 109 | extern int invalidate_inodes(struct super_block *); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 78043da85e1f..a3937a8ee95e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, | |||
2486 | struct ctl_table; | 2486 | struct ctl_table; |
2487 | int proc_nr_files(struct ctl_table *table, int write, | 2487 | int proc_nr_files(struct ctl_table *table, int write, |
2488 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2488 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2489 | 2489 | int proc_nr_inodes(struct ctl_table *table, int write, | |
2490 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
2490 | int __init get_filesystem_list(char *buf); | 2491 | int __init get_filesystem_list(char *buf); |
2491 | 2492 | ||
2492 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) | 2493 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 694b140852c2..99a510cbfbb3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = { | |||
1340 | .data = &inodes_stat, | 1340 | .data = &inodes_stat, |
1341 | .maxlen = 2*sizeof(int), | 1341 | .maxlen = 2*sizeof(int), |
1342 | .mode = 0444, | 1342 | .mode = 0444, |
1343 | .proc_handler = proc_dointvec, | 1343 | .proc_handler = proc_nr_inodes, |
1344 | }, | 1344 | }, |
1345 | { | 1345 | { |
1346 | .procname = "inode-state", | 1346 | .procname = "inode-state", |
1347 | .data = &inodes_stat, | 1347 | .data = &inodes_stat, |
1348 | .maxlen = 7*sizeof(int), | 1348 | .maxlen = 7*sizeof(int), |
1349 | .mode = 0444, | 1349 | .mode = 0444, |
1350 | .proc_handler = proc_dointvec, | 1350 | .proc_handler = proc_nr_inodes, |
1351 | }, | 1351 | }, |
1352 | { | 1352 | { |
1353 | .procname = "file-nr", | 1353 | .procname = "file-nr", |