aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-10-23 05:03:02 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2010-10-25 21:26:09 -0400
commitcffbc8aa334f55c9ed42d25202eb3ebf3a97c195 (patch)
treedab046695754e2cec1a7ab7a64b236e976d94360
parentbe1a16a0ae29a7c90081a657b64aa51cb1a65a27 (diff)
fs: Convert nr_inodes and nr_unused to per-cpu counters
The number of inodes allocated does not need to be tied to the addition or removal of an inode to/from a list. If we are not tied to a list lock, we could update the counters when inodes are initialised or destroyed, but to do that we need to convert the counters to be per-cpu (i.e. independent of a lock). This means that we have the freedom to change the list/locking implementation without needing to care about the counters. Based on a patch originally from Eric Dumazet. [AV: cleaned up a bit, fixed build breakage on weird configs Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/fs-writeback.c5
-rw-r--r--fs/inode.c64
-rw-r--r--fs/internal.h1
-rw-r--r--include/linux/fs.h3
-rw-r--r--kernel/sysctl.c4
5 files changed, 52 insertions, 25 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 39f44f2e709a..f04d04af84f2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
723 wb->last_old_flush = jiffies; 723 wb->last_old_flush = jiffies;
724 nr_pages = global_page_state(NR_FILE_DIRTY) + 724 nr_pages = global_page_state(NR_FILE_DIRTY) +
725 global_page_state(NR_UNSTABLE_NFS) + 725 global_page_state(NR_UNSTABLE_NFS) +
726 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 726 get_nr_dirty_inodes();
727 727
728 if (nr_pages) { 728 if (nr_pages) {
729 struct wb_writeback_work work = { 729 struct wb_writeback_work work = {
@@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb)
1090 1090
1091 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1091 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1092 1092
1093 work.nr_pages = nr_dirty + nr_unstable + 1093 work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
1094 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1095 1094
1096 bdi_queue_work(sb->s_bdi, &work); 1095 bdi_queue_work(sb->s_bdi, &work);
1097 wait_for_completion(&done); 1096 wait_for_completion(&done);
diff --git a/fs/inode.c b/fs/inode.c
index 4440cf1034ec..0d5aeccbdd90 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem);
103 */ 103 */
104struct inodes_stat_t inodes_stat; 104struct inodes_stat_t inodes_stat;
105 105
106static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
107static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
108
106static struct kmem_cache *inode_cachep __read_mostly; 109static struct kmem_cache *inode_cachep __read_mostly;
107 110
111static inline int get_nr_inodes(void)
112{
113 return percpu_counter_sum_positive(&nr_inodes);
114}
115
116static inline int get_nr_inodes_unused(void)
117{
118 return percpu_counter_sum_positive(&nr_inodes_unused);
119}
120
121int get_nr_dirty_inodes(void)
122{
123 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
124 return nr_dirty > 0 ? nr_dirty : 0;
125
126}
127
128/*
129 * Handle nr_inode sysctl
130 */
131#ifdef CONFIG_SYSCTL
132int proc_nr_inodes(ctl_table *table, int write,
133 void __user *buffer, size_t *lenp, loff_t *ppos)
134{
135 inodes_stat.nr_inodes = get_nr_inodes();
136 inodes_stat.nr_unused = get_nr_inodes_unused();
137 return proc_dointvec(table, write, buffer, lenp, ppos);
138}
139#endif
140
108static void wake_up_inode(struct inode *inode) 141static void wake_up_inode(struct inode *inode)
109{ 142{
110 /* 143 /*
@@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
192 inode->i_fsnotify_mask = 0; 225 inode->i_fsnotify_mask = 0;
193#endif 226#endif
194 227
228 percpu_counter_inc(&nr_inodes);
229
195 return 0; 230 return 0;
196out: 231out:
197 return -ENOMEM; 232 return -ENOMEM;
@@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode)
232 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 267 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
233 posix_acl_release(inode->i_default_acl); 268 posix_acl_release(inode->i_default_acl);
234#endif 269#endif
270 percpu_counter_dec(&nr_inodes);
235} 271}
236EXPORT_SYMBOL(__destroy_inode); 272EXPORT_SYMBOL(__destroy_inode);
237 273
@@ -286,7 +322,7 @@ void __iget(struct inode *inode)
286 322
287 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 323 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
288 list_move(&inode->i_list, &inode_in_use); 324 list_move(&inode->i_list, &inode_in_use);
289 inodes_stat.nr_unused--; 325 percpu_counter_dec(&nr_inodes_unused);
290} 326}
291 327
292void end_writeback(struct inode *inode) 328void end_writeback(struct inode *inode)
@@ -327,8 +363,6 @@ static void evict(struct inode *inode)
327 */ 363 */
328static void dispose_list(struct list_head *head) 364static void dispose_list(struct list_head *head)
329{ 365{
330 int nr_disposed = 0;
331
332 while (!list_empty(head)) { 366 while (!list_empty(head)) {
333 struct inode *inode; 367 struct inode *inode;
334 368
@@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head)
344 378
345 wake_up_inode(inode); 379 wake_up_inode(inode);
346 destroy_inode(inode); 380 destroy_inode(inode);
347 nr_disposed++;
348 } 381 }
349 spin_lock(&inode_lock);
350 inodes_stat.nr_inodes -= nr_disposed;
351 spin_unlock(&inode_lock);
352} 382}
353 383
354/* 384/*
@@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head)
357static int invalidate_list(struct list_head *head, struct list_head *dispose) 387static int invalidate_list(struct list_head *head, struct list_head *dispose)
358{ 388{
359 struct list_head *next; 389 struct list_head *next;
360 int busy = 0, count = 0; 390 int busy = 0;
361 391
362 next = head->next; 392 next = head->next;
363 for (;;) { 393 for (;;) {
@@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
383 list_move(&inode->i_list, dispose); 413 list_move(&inode->i_list, dispose);
384 WARN_ON(inode->i_state & I_NEW); 414 WARN_ON(inode->i_state & I_NEW);
385 inode->i_state |= I_FREEING; 415 inode->i_state |= I_FREEING;
386 count++; 416 percpu_counter_dec(&nr_inodes_unused);
387 continue; 417 continue;
388 } 418 }
389 busy = 1; 419 busy = 1;
390 } 420 }
391 /* only unused inodes may be cached with i_count zero */
392 inodes_stat.nr_unused -= count;
393 return busy; 421 return busy;
394} 422}
395 423
@@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode)
447static void prune_icache(int nr_to_scan) 475static void prune_icache(int nr_to_scan)
448{ 476{
449 LIST_HEAD(freeable); 477 LIST_HEAD(freeable);
450 int nr_pruned = 0;
451 int nr_scanned; 478 int nr_scanned;
452 unsigned long reap = 0; 479 unsigned long reap = 0;
453 480
@@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan)
483 list_move(&inode->i_list, &freeable); 510 list_move(&inode->i_list, &freeable);
484 WARN_ON(inode->i_state & I_NEW); 511 WARN_ON(inode->i_state & I_NEW);
485 inode->i_state |= I_FREEING; 512 inode->i_state |= I_FREEING;
486 nr_pruned++; 513 percpu_counter_dec(&nr_inodes_unused);
487 } 514 }
488 inodes_stat.nr_unused -= nr_pruned;
489 if (current_is_kswapd()) 515 if (current_is_kswapd())
490 __count_vm_events(KSWAPD_INODESTEAL, reap); 516 __count_vm_events(KSWAPD_INODESTEAL, reap);
491 else 517 else
@@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
517 return -1; 543 return -1;
518 prune_icache(nr); 544 prune_icache(nr);
519 } 545 }
520 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 546 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
521} 547}
522 548
523static struct shrinker icache_shrinker = { 549static struct shrinker icache_shrinker = {
@@ -594,7 +620,6 @@ static inline void
594__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, 620__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
595 struct inode *inode) 621 struct inode *inode)
596{ 622{
597 inodes_stat.nr_inodes++;
598 list_add(&inode->i_list, &inode_in_use); 623 list_add(&inode->i_list, &inode_in_use);
599 list_add(&inode->i_sb_list, &sb->s_inodes); 624 list_add(&inode->i_sb_list, &sb->s_inodes);
600 if (head) 625 if (head)
@@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode)
1214 if (!drop) { 1239 if (!drop) {
1215 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1240 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1216 list_move(&inode->i_list, &inode_unused); 1241 list_move(&inode->i_list, &inode_unused);
1217 inodes_stat.nr_unused++; 1242 percpu_counter_inc(&nr_inodes_unused);
1218 if (sb->s_flags & MS_ACTIVE) { 1243 if (sb->s_flags & MS_ACTIVE) {
1219 spin_unlock(&inode_lock); 1244 spin_unlock(&inode_lock);
1220 return; 1245 return;
@@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode)
1226 spin_lock(&inode_lock); 1251 spin_lock(&inode_lock);
1227 WARN_ON(inode->i_state & I_NEW); 1252 WARN_ON(inode->i_state & I_NEW);
1228 inode->i_state &= ~I_WILL_FREE; 1253 inode->i_state &= ~I_WILL_FREE;
1229 inodes_stat.nr_unused--; 1254 percpu_counter_dec(&nr_inodes_unused);
1230 hlist_del_init(&inode->i_hash); 1255 hlist_del_init(&inode->i_hash);
1231 } 1256 }
1232 list_del_init(&inode->i_list); 1257 list_del_init(&inode->i_list);
1233 list_del_init(&inode->i_sb_list); 1258 list_del_init(&inode->i_sb_list);
1234 WARN_ON(inode->i_state & I_NEW); 1259 WARN_ON(inode->i_state & I_NEW);
1235 inode->i_state |= I_FREEING; 1260 inode->i_state |= I_FREEING;
1236 inodes_stat.nr_inodes--;
1237 spin_unlock(&inode_lock); 1261 spin_unlock(&inode_lock);
1238 evict(inode); 1262 evict(inode);
1239 spin_lock(&inode_lock); 1263 spin_lock(&inode_lock);
@@ -1502,6 +1526,8 @@ void __init inode_init(void)
1502 SLAB_MEM_SPREAD), 1526 SLAB_MEM_SPREAD),
1503 init_once); 1527 init_once);
1504 register_shrinker(&icache_shrinker); 1528 register_shrinker(&icache_shrinker);
1529 percpu_counter_init(&nr_inodes, 0);
1530 percpu_counter_init(&nr_inodes_unused, 0);
1505 1531
1506 /* Hash may have been set up in inode_init_early */ 1532 /* Hash may have been set up in inode_init_early */
1507 if (!hashdist) 1533 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index f6dce46d80dc..4cc67eb6ed56 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -105,4 +105,5 @@ extern void release_open_intent(struct nameidata *);
105/* 105/*
106 * inode.c 106 * inode.c
107 */ 107 */
108extern int get_nr_dirty_inodes(void);
108extern int invalidate_inodes(struct super_block *); 109extern int invalidate_inodes(struct super_block *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78043da85e1f..a3937a8ee95e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
2486struct ctl_table; 2486struct ctl_table;
2487int proc_nr_files(struct ctl_table *table, int write, 2487int proc_nr_files(struct ctl_table *table, int write,
2488 void __user *buffer, size_t *lenp, loff_t *ppos); 2488 void __user *buffer, size_t *lenp, loff_t *ppos);
2489 2489int proc_nr_inodes(struct ctl_table *table, int write,
2490 void __user *buffer, size_t *lenp, loff_t *ppos);
2490int __init get_filesystem_list(char *buf); 2491int __init get_filesystem_list(char *buf);
2491 2492
2492#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) 2493#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 694b140852c2..99a510cbfbb3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = {
1340 .data = &inodes_stat, 1340 .data = &inodes_stat,
1341 .maxlen = 2*sizeof(int), 1341 .maxlen = 2*sizeof(int),
1342 .mode = 0444, 1342 .mode = 0444,
1343 .proc_handler = proc_dointvec, 1343 .proc_handler = proc_nr_inodes,
1344 }, 1344 },
1345 { 1345 {
1346 .procname = "inode-state", 1346 .procname = "inode-state",
1347 .data = &inodes_stat, 1347 .data = &inodes_stat,
1348 .maxlen = 7*sizeof(int), 1348 .maxlen = 7*sizeof(int),
1349 .mode = 0444, 1349 .mode = 0444,
1350 .proc_handler = proc_dointvec, 1350 .proc_handler = proc_nr_inodes,
1351 }, 1351 },
1352 { 1352 {
1353 .procname = "file-nr", 1353 .procname = "file-nr",