aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2010-01-29 18:38:31 -0500
committerThomas Gleixner <tglx@linutronix.de>2010-04-27 11:32:49 -0400
commit6f22d55b46fbf80b018009ece79f15b8582843e5 (patch)
tree4623783cdf5b9c59482617f0a3437a852dc52e24
parent20d3da9e29645abfba13536a82cf04a4076f9ce6 (diff)
fs-inode_lock-scale-10
Impelemnt lazy inode lru similarly to dcache. This should reduce inode list lock acquisition (todo: measure). Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: John Stultz <johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--fs/fs-writeback.c21
-rw-r--r--fs/inode.c61
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/writeback.h1
4 files changed, 42 insertions, 48 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 07d70704078e..df99a99f99a2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -544,7 +544,7 @@ select_queue:
544 /* 544 /*
545 * The inode is clean, inuse 545 * The inode is clean, inuse
546 */ 546 */
547 list_move(&inode->i_list, &inode_in_use); 547 list_del_init(&inode->i_list);
548 } else { 548 } else {
549 /* 549 /*
550 * The inode is clean, unused 550 * The inode is clean, unused
@@ -1151,8 +1151,6 @@ static void wait_sb_inodes(struct super_block *sb)
1151 */ 1151 */
1152 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1152 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1153 1153
1154 spin_lock(&sb_inode_list_lock);
1155
1156 /* 1154 /*
1157 * Data integrity sync. Must wait for all pages under writeback, 1155 * Data integrity sync. Must wait for all pages under writeback,
1158 * because there may have been pages dirtied before our sync 1156 * because there may have been pages dirtied before our sync
@@ -1160,7 +1158,8 @@ static void wait_sb_inodes(struct super_block *sb)
1160 * In which case, the inode may not be on the dirty list, but 1158 * In which case, the inode may not be on the dirty list, but
1161 * we still have to wait for that writeout. 1159 * we still have to wait for that writeout.
1162 */ 1160 */
1163 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1161 rcu_read_lock();
1162 list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
1164 struct address_space *mapping; 1163 struct address_space *mapping;
1165 1164
1166 mapping = inode->i_mapping; 1165 mapping = inode->i_mapping;
@@ -1174,13 +1173,13 @@ static void wait_sb_inodes(struct super_block *sb)
1174 } 1173 }
1175 __iget(inode); 1174 __iget(inode);
1176 spin_unlock(&inode->i_lock); 1175 spin_unlock(&inode->i_lock);
1177 spin_unlock(&sb_inode_list_lock); 1176 rcu_read_unlock();
1178 /* 1177 /*
1179 * We hold a reference to 'inode' so it couldn't have been 1178 * We hold a reference to 'inode' so it couldn't have been
1180 * removed from s_inodes list while we dropped the 1179 * removed from s_inodes list while we dropped the i_lock. We
1181 * sb_inode_list_lock. We cannot iput the inode now as we can 1180 * cannot iput the inode now as we can be holding the last
1182 * be holding the last reference and we cannot iput it under 1181 * reference and we cannot iput it under spinlock. So we keep
1183 * spinlock. So we keep the reference and iput it later. 1182 * the reference and iput it later.
1184 */ 1183 */
1185 iput(old_inode); 1184 iput(old_inode);
1186 old_inode = inode; 1185 old_inode = inode;
@@ -1189,9 +1188,9 @@ static void wait_sb_inodes(struct super_block *sb)
1189 1188
1190 cond_resched(); 1189 cond_resched();
1191 1190
1192 spin_lock(&sb_inode_list_lock); 1191 rcu_read_lock();
1193 } 1192 }
1194 spin_unlock(&sb_inode_list_lock); 1193 rcu_read_unlock();
1195 iput(old_inode); 1194 iput(old_inode);
1196} 1195}
1197 1196
diff --git a/fs/inode.c b/fs/inode.c
index 1d7ca6ed7dbd..5039ddf4f0e0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -74,7 +74,6 @@ static unsigned int i_hash_shift __read_mostly;
74 * allowing for low-overhead inode sync() operations. 74 * allowing for low-overhead inode sync() operations.
75 */ 75 */
76 76
77LIST_HEAD(inode_in_use);
78LIST_HEAD(inode_unused); 77LIST_HEAD(inode_unused);
79 78
80struct inode_hash_bucket { 79struct inode_hash_bucket {
@@ -266,6 +265,7 @@ void inode_init_once(struct inode *inode)
266 INIT_HLIST_NODE(&inode->i_hash); 265 INIT_HLIST_NODE(&inode->i_hash);
267 INIT_LIST_HEAD(&inode->i_dentry); 266 INIT_LIST_HEAD(&inode->i_dentry);
268 INIT_LIST_HEAD(&inode->i_devices); 267 INIT_LIST_HEAD(&inode->i_devices);
268 INIT_LIST_HEAD(&inode->i_list);
269 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 269 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
270 spin_lock_init(&inode->i_data.tree_lock); 270 spin_lock_init(&inode->i_data.tree_lock);
271 spin_lock_init(&inode->i_data.i_mmap_lock); 271 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -291,24 +291,6 @@ static void init_once(void *foo)
291 inode_init_once(inode); 291 inode_init_once(inode);
292} 292}
293 293
294/*
295 * inode_lock must be held
296 */
297void __iget(struct inode *inode)
298{
299 assert_spin_locked(&inode->i_lock);
300 inode->i_count++;
301 if (inode->i_count > 1)
302 return;
303
304 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
305 spin_lock(&wb_inode_list_lock);
306 list_move(&inode->i_list, &inode_in_use);
307 spin_unlock(&wb_inode_list_lock);
308 }
309 atomic_dec(&inodes_stat.nr_unused);
310}
311
312/** 294/**
313 * clear_inode - clear an inode 295 * clear_inode - clear an inode
314 * @inode: inode to clear 296 * @inode: inode to clear
@@ -352,7 +334,7 @@ static void dispose_list(struct list_head *head)
352 struct inode *inode; 334 struct inode *inode;
353 335
354 inode = list_first_entry(head, struct inode, i_list); 336 inode = list_first_entry(head, struct inode, i_list);
355 list_del(&inode->i_list); 337 list_del_init(&inode->i_list);
356 338
357 if (inode->i_data.nrpages) 339 if (inode->i_data.nrpages)
358 truncate_inode_pages(&inode->i_data, 0); 340 truncate_inode_pages(&inode->i_data, 0);
@@ -405,11 +387,12 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
405 invalidate_inode_buffers(inode); 387 invalidate_inode_buffers(inode);
406 if (!inode->i_count) { 388 if (!inode->i_count) {
407 spin_lock(&wb_inode_list_lock); 389 spin_lock(&wb_inode_list_lock);
408 list_move(&inode->i_list, dispose); 390 list_del(&inode->i_list);
409 spin_unlock(&wb_inode_list_lock); 391 spin_unlock(&wb_inode_list_lock);
410 WARN_ON(inode->i_state & I_NEW); 392 WARN_ON(inode->i_state & I_NEW);
411 inode->i_state |= I_FREEING; 393 inode->i_state |= I_FREEING;
412 spin_unlock(&inode->i_lock); 394 spin_unlock(&inode->i_lock);
395 list_add(&inode->i_list, dispose);
413 count++; 396 count++;
414 continue; 397 continue;
415 } 398 }
@@ -496,7 +479,13 @@ again:
496 spin_unlock(&wb_inode_list_lock); 479 spin_unlock(&wb_inode_list_lock);
497 goto again; 480 goto again;
498 } 481 }
499 if (inode->i_state || inode->i_count) { 482 if (inode->i_count) {
483 list_del_init(&inode->i_list);
484 spin_unlock(&inode->i_lock);
485 atomic_dec(&inodes_stat.nr_unused);
486 continue;
487 }
488 if (inode->i_state) {
500 list_move(&inode->i_list, &inode_unused); 489 list_move(&inode->i_list, &inode_unused);
501 spin_unlock(&inode->i_lock); 490 spin_unlock(&inode->i_lock);
502 continue; 491 continue;
@@ -512,6 +501,7 @@ again:
512again2: 501again2:
513 spin_lock(&wb_inode_list_lock); 502 spin_lock(&wb_inode_list_lock);
514 503
504 /* XXX: may no longer work well */
515 if (inode != list_entry(inode_unused.next, 505 if (inode != list_entry(inode_unused.next,
516 struct inode, i_list)) 506 struct inode, i_list))
517 continue; /* wrong inode or list_empty */ 507 continue; /* wrong inode or list_empty */
@@ -660,9 +650,6 @@ __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
660 atomic_inc(&inodes_stat.nr_inodes); 650 atomic_inc(&inodes_stat.nr_inodes);
661 list_add(&inode->i_sb_list, &sb->s_inodes); 651 list_add(&inode->i_sb_list, &sb->s_inodes);
662 spin_unlock(&sb_inode_list_lock); 652 spin_unlock(&sb_inode_list_lock);
663 spin_lock(&wb_inode_list_lock);
664 list_add(&inode->i_list, &inode_in_use);
665 spin_unlock(&wb_inode_list_lock);
666 if (b) { 653 if (b) {
667 spin_lock(&b->lock); 654 spin_lock(&b->lock);
668 hlist_add_head(&inode->i_hash, &b->head); 655 hlist_add_head(&inode->i_hash, &b->head);
@@ -1311,9 +1298,11 @@ void generic_delete_inode(struct inode *inode)
1311{ 1298{
1312 const struct super_operations *op = inode->i_sb->s_op; 1299 const struct super_operations *op = inode->i_sb->s_op;
1313 1300
1314 spin_lock(&wb_inode_list_lock); 1301 if (!list_empty(&inode->i_list)) {
1315 list_del_init(&inode->i_list); 1302 spin_lock(&wb_inode_list_lock);
1316 spin_unlock(&wb_inode_list_lock); 1303 list_del_init(&inode->i_list);
1304 spin_unlock(&wb_inode_list_lock);
1305 }
1317 list_del_init(&inode->i_sb_list); 1306 list_del_init(&inode->i_sb_list);
1318 spin_unlock(&sb_inode_list_lock); 1307 spin_unlock(&sb_inode_list_lock);
1319 WARN_ON(inode->i_state & I_NEW); 1308 WARN_ON(inode->i_state & I_NEW);
@@ -1365,12 +1354,12 @@ int generic_detach_inode(struct inode *inode)
1365 struct super_block *sb = inode->i_sb; 1354 struct super_block *sb = inode->i_sb;
1366 1355
1367 if (!hlist_unhashed(&inode->i_hash)) { 1356 if (!hlist_unhashed(&inode->i_hash)) {
1368 if (!(inode->i_state & (I_DIRTY|I_SYNC))) { 1357 if (list_empty(&inode->i_list)) {
1369 spin_lock(&wb_inode_list_lock); 1358 spin_lock(&wb_inode_list_lock);
1370 list_move(&inode->i_list, &inode_unused); 1359 list_add(&inode->i_list, &inode_unused);
1371 spin_unlock(&wb_inode_list_lock); 1360 spin_unlock(&wb_inode_list_lock);
1361 atomic_inc(&inodes_stat.nr_unused);
1372 } 1362 }
1373 atomic_inc(&inodes_stat.nr_unused);
1374 if (sb->s_flags & MS_ACTIVE) { 1363 if (sb->s_flags & MS_ACTIVE) {
1375 spin_unlock(&inode->i_lock); 1364 spin_unlock(&inode->i_lock);
1376 spin_unlock(&sb_inode_list_lock); 1365 spin_unlock(&sb_inode_list_lock);
@@ -1386,11 +1375,13 @@ int generic_detach_inode(struct inode *inode)
1386 WARN_ON(inode->i_state & I_NEW); 1375 WARN_ON(inode->i_state & I_NEW);
1387 inode->i_state &= ~I_WILL_FREE; 1376 inode->i_state &= ~I_WILL_FREE;
1388 __remove_inode_hash(inode); 1377 __remove_inode_hash(inode);
1378 }
1379 if (!list_empty(&inode->i_list)) {
1380 spin_lock(&wb_inode_list_lock);
1381 list_del_init(&inode->i_list);
1382 spin_unlock(&wb_inode_list_lock);
1389 atomic_dec(&inodes_stat.nr_unused); 1383 atomic_dec(&inodes_stat.nr_unused);
1390 } 1384 }
1391 spin_lock(&wb_inode_list_lock);
1392 list_del_init(&inode->i_list);
1393 spin_unlock(&wb_inode_list_lock);
1394 list_del_init(&inode->i_sb_list); 1385 list_del_init(&inode->i_sb_list);
1395 spin_unlock(&sb_inode_list_lock); 1386 spin_unlock(&sb_inode_list_lock);
1396 WARN_ON(inode->i_state & I_NEW); 1387 WARN_ON(inode->i_state & I_NEW);
@@ -1726,7 +1717,7 @@ void __init inode_init(void)
1726 1717
1727 inode_hashtable = 1718 inode_hashtable =
1728 alloc_large_system_hash("Inode-cache", 1719 alloc_large_system_hash("Inode-cache",
1729 sizeof(struct hlist_head), 1720 sizeof(struct inode_hash_bucket),
1730 ihash_entries, 1721 ihash_entries,
1731 14, 1722 14,
1732 0, 1723 0,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3551528a25f6..b86542c8d68c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2174,7 +2174,6 @@ extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struc
2174extern int insert_inode_locked(struct inode *); 2174extern int insert_inode_locked(struct inode *);
2175extern void unlock_new_inode(struct inode *); 2175extern void unlock_new_inode(struct inode *);
2176 2176
2177extern void __iget(struct inode * inode);
2178extern void iget_failed(struct inode *); 2177extern void iget_failed(struct inode *);
2179extern void clear_inode(struct inode *); 2178extern void clear_inode(struct inode *);
2180extern void destroy_inode(struct inode *); 2179extern void destroy_inode(struct inode *);
@@ -2393,6 +2392,12 @@ extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
2393extern void save_mount_options(struct super_block *sb, char *options); 2392extern void save_mount_options(struct super_block *sb, char *options);
2394extern void replace_mount_options(struct super_block *sb, char *options); 2393extern void replace_mount_options(struct super_block *sb, char *options);
2395 2394
2395static inline void __iget(struct inode *inode)
2396{
2397 assert_spin_locked(&inode->i_lock);
2398 inode->i_count++;
2399}
2400
2396static inline ino_t parent_ino(struct dentry *dentry) 2401static inline ino_t parent_ino(struct dentry *dentry)
2397{ 2402{
2398 ino_t res; 2403 ino_t res;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 05479e576a77..15e8bcd90cd1 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,6 @@ struct backing_dev_info;
11 11
12extern spinlock_t sb_inode_list_lock; 12extern spinlock_t sb_inode_list_lock;
13extern spinlock_t wb_inode_list_lock; 13extern spinlock_t wb_inode_list_lock;
14extern struct list_head inode_in_use;
15extern struct list_head inode_unused; 14extern struct list_head inode_unused;
16 15
17/* 16/*