diff options
author | Nick Piggin <npiggin@suse.de> | 2010-01-29 18:38:31 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-04-27 11:32:49 -0400 |
commit | 6f22d55b46fbf80b018009ece79f15b8582843e5 (patch) | |
tree | 4623783cdf5b9c59482617f0a3437a852dc52e24 | |
parent | 20d3da9e29645abfba13536a82cf04a4076f9ce6 (diff) |
fs-inode_lock-scale-10
Impelemnt lazy inode lru similarly to dcache. This should reduce inode list
lock acquisition (todo: measure).
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | fs/fs-writeback.c | 21 | ||||
-rw-r--r-- | fs/inode.c | 61 | ||||
-rw-r--r-- | include/linux/fs.h | 7 | ||||
-rw-r--r-- | include/linux/writeback.h | 1 |
4 files changed, 42 insertions, 48 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 07d70704078e..df99a99f99a2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -544,7 +544,7 @@ select_queue: | |||
544 | /* | 544 | /* |
545 | * The inode is clean, inuse | 545 | * The inode is clean, inuse |
546 | */ | 546 | */ |
547 | list_move(&inode->i_list, &inode_in_use); | 547 | list_del_init(&inode->i_list); |
548 | } else { | 548 | } else { |
549 | /* | 549 | /* |
550 | * The inode is clean, unused | 550 | * The inode is clean, unused |
@@ -1151,8 +1151,6 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1151 | */ | 1151 | */ |
1152 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1152 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1153 | 1153 | ||
1154 | spin_lock(&sb_inode_list_lock); | ||
1155 | |||
1156 | /* | 1154 | /* |
1157 | * Data integrity sync. Must wait for all pages under writeback, | 1155 | * Data integrity sync. Must wait for all pages under writeback, |
1158 | * because there may have been pages dirtied before our sync | 1156 | * because there may have been pages dirtied before our sync |
@@ -1160,7 +1158,8 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1160 | * In which case, the inode may not be on the dirty list, but | 1158 | * In which case, the inode may not be on the dirty list, but |
1161 | * we still have to wait for that writeout. | 1159 | * we still have to wait for that writeout. |
1162 | */ | 1160 | */ |
1163 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1161 | rcu_read_lock(); |
1162 | list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) { | ||
1164 | struct address_space *mapping; | 1163 | struct address_space *mapping; |
1165 | 1164 | ||
1166 | mapping = inode->i_mapping; | 1165 | mapping = inode->i_mapping; |
@@ -1174,13 +1173,13 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1174 | } | 1173 | } |
1175 | __iget(inode); | 1174 | __iget(inode); |
1176 | spin_unlock(&inode->i_lock); | 1175 | spin_unlock(&inode->i_lock); |
1177 | spin_unlock(&sb_inode_list_lock); | 1176 | rcu_read_unlock(); |
1178 | /* | 1177 | /* |
1179 | * We hold a reference to 'inode' so it couldn't have been | 1178 | * We hold a reference to 'inode' so it couldn't have been |
1180 | * removed from s_inodes list while we dropped the | 1179 | * removed from s_inodes list while we dropped the i_lock. We |
1181 | * sb_inode_list_lock. We cannot iput the inode now as we can | 1180 | * cannot iput the inode now as we can be holding the last |
1182 | * be holding the last reference and we cannot iput it under | 1181 | * reference and we cannot iput it under spinlock. So we keep |
1183 | * spinlock. So we keep the reference and iput it later. | 1182 | * the reference and iput it later. |
1184 | */ | 1183 | */ |
1185 | iput(old_inode); | 1184 | iput(old_inode); |
1186 | old_inode = inode; | 1185 | old_inode = inode; |
@@ -1189,9 +1188,9 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1189 | 1188 | ||
1190 | cond_resched(); | 1189 | cond_resched(); |
1191 | 1190 | ||
1192 | spin_lock(&sb_inode_list_lock); | 1191 | rcu_read_lock(); |
1193 | } | 1192 | } |
1194 | spin_unlock(&sb_inode_list_lock); | 1193 | rcu_read_unlock(); |
1195 | iput(old_inode); | 1194 | iput(old_inode); |
1196 | } | 1195 | } |
1197 | 1196 | ||
diff --git a/fs/inode.c b/fs/inode.c index 1d7ca6ed7dbd..5039ddf4f0e0 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -74,7 +74,6 @@ static unsigned int i_hash_shift __read_mostly; | |||
74 | * allowing for low-overhead inode sync() operations. | 74 | * allowing for low-overhead inode sync() operations. |
75 | */ | 75 | */ |
76 | 76 | ||
77 | LIST_HEAD(inode_in_use); | ||
78 | LIST_HEAD(inode_unused); | 77 | LIST_HEAD(inode_unused); |
79 | 78 | ||
80 | struct inode_hash_bucket { | 79 | struct inode_hash_bucket { |
@@ -266,6 +265,7 @@ void inode_init_once(struct inode *inode) | |||
266 | INIT_HLIST_NODE(&inode->i_hash); | 265 | INIT_HLIST_NODE(&inode->i_hash); |
267 | INIT_LIST_HEAD(&inode->i_dentry); | 266 | INIT_LIST_HEAD(&inode->i_dentry); |
268 | INIT_LIST_HEAD(&inode->i_devices); | 267 | INIT_LIST_HEAD(&inode->i_devices); |
268 | INIT_LIST_HEAD(&inode->i_list); | ||
269 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 269 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); |
270 | spin_lock_init(&inode->i_data.tree_lock); | 270 | spin_lock_init(&inode->i_data.tree_lock); |
271 | spin_lock_init(&inode->i_data.i_mmap_lock); | 271 | spin_lock_init(&inode->i_data.i_mmap_lock); |
@@ -291,24 +291,6 @@ static void init_once(void *foo) | |||
291 | inode_init_once(inode); | 291 | inode_init_once(inode); |
292 | } | 292 | } |
293 | 293 | ||
294 | /* | ||
295 | * inode_lock must be held | ||
296 | */ | ||
297 | void __iget(struct inode *inode) | ||
298 | { | ||
299 | assert_spin_locked(&inode->i_lock); | ||
300 | inode->i_count++; | ||
301 | if (inode->i_count > 1) | ||
302 | return; | ||
303 | |||
304 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
305 | spin_lock(&wb_inode_list_lock); | ||
306 | list_move(&inode->i_list, &inode_in_use); | ||
307 | spin_unlock(&wb_inode_list_lock); | ||
308 | } | ||
309 | atomic_dec(&inodes_stat.nr_unused); | ||
310 | } | ||
311 | |||
312 | /** | 294 | /** |
313 | * clear_inode - clear an inode | 295 | * clear_inode - clear an inode |
314 | * @inode: inode to clear | 296 | * @inode: inode to clear |
@@ -352,7 +334,7 @@ static void dispose_list(struct list_head *head) | |||
352 | struct inode *inode; | 334 | struct inode *inode; |
353 | 335 | ||
354 | inode = list_first_entry(head, struct inode, i_list); | 336 | inode = list_first_entry(head, struct inode, i_list); |
355 | list_del(&inode->i_list); | 337 | list_del_init(&inode->i_list); |
356 | 338 | ||
357 | if (inode->i_data.nrpages) | 339 | if (inode->i_data.nrpages) |
358 | truncate_inode_pages(&inode->i_data, 0); | 340 | truncate_inode_pages(&inode->i_data, 0); |
@@ -405,11 +387,12 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
405 | invalidate_inode_buffers(inode); | 387 | invalidate_inode_buffers(inode); |
406 | if (!inode->i_count) { | 388 | if (!inode->i_count) { |
407 | spin_lock(&wb_inode_list_lock); | 389 | spin_lock(&wb_inode_list_lock); |
408 | list_move(&inode->i_list, dispose); | 390 | list_del(&inode->i_list); |
409 | spin_unlock(&wb_inode_list_lock); | 391 | spin_unlock(&wb_inode_list_lock); |
410 | WARN_ON(inode->i_state & I_NEW); | 392 | WARN_ON(inode->i_state & I_NEW); |
411 | inode->i_state |= I_FREEING; | 393 | inode->i_state |= I_FREEING; |
412 | spin_unlock(&inode->i_lock); | 394 | spin_unlock(&inode->i_lock); |
395 | list_add(&inode->i_list, dispose); | ||
413 | count++; | 396 | count++; |
414 | continue; | 397 | continue; |
415 | } | 398 | } |
@@ -496,7 +479,13 @@ again: | |||
496 | spin_unlock(&wb_inode_list_lock); | 479 | spin_unlock(&wb_inode_list_lock); |
497 | goto again; | 480 | goto again; |
498 | } | 481 | } |
499 | if (inode->i_state || inode->i_count) { | 482 | if (inode->i_count) { |
483 | list_del_init(&inode->i_list); | ||
484 | spin_unlock(&inode->i_lock); | ||
485 | atomic_dec(&inodes_stat.nr_unused); | ||
486 | continue; | ||
487 | } | ||
488 | if (inode->i_state) { | ||
500 | list_move(&inode->i_list, &inode_unused); | 489 | list_move(&inode->i_list, &inode_unused); |
501 | spin_unlock(&inode->i_lock); | 490 | spin_unlock(&inode->i_lock); |
502 | continue; | 491 | continue; |
@@ -512,6 +501,7 @@ again: | |||
512 | again2: | 501 | again2: |
513 | spin_lock(&wb_inode_list_lock); | 502 | spin_lock(&wb_inode_list_lock); |
514 | 503 | ||
504 | /* XXX: may no longer work well */ | ||
515 | if (inode != list_entry(inode_unused.next, | 505 | if (inode != list_entry(inode_unused.next, |
516 | struct inode, i_list)) | 506 | struct inode, i_list)) |
517 | continue; /* wrong inode or list_empty */ | 507 | continue; /* wrong inode or list_empty */ |
@@ -660,9 +650,6 @@ __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b, | |||
660 | atomic_inc(&inodes_stat.nr_inodes); | 650 | atomic_inc(&inodes_stat.nr_inodes); |
661 | list_add(&inode->i_sb_list, &sb->s_inodes); | 651 | list_add(&inode->i_sb_list, &sb->s_inodes); |
662 | spin_unlock(&sb_inode_list_lock); | 652 | spin_unlock(&sb_inode_list_lock); |
663 | spin_lock(&wb_inode_list_lock); | ||
664 | list_add(&inode->i_list, &inode_in_use); | ||
665 | spin_unlock(&wb_inode_list_lock); | ||
666 | if (b) { | 653 | if (b) { |
667 | spin_lock(&b->lock); | 654 | spin_lock(&b->lock); |
668 | hlist_add_head(&inode->i_hash, &b->head); | 655 | hlist_add_head(&inode->i_hash, &b->head); |
@@ -1311,9 +1298,11 @@ void generic_delete_inode(struct inode *inode) | |||
1311 | { | 1298 | { |
1312 | const struct super_operations *op = inode->i_sb->s_op; | 1299 | const struct super_operations *op = inode->i_sb->s_op; |
1313 | 1300 | ||
1314 | spin_lock(&wb_inode_list_lock); | 1301 | if (!list_empty(&inode->i_list)) { |
1315 | list_del_init(&inode->i_list); | 1302 | spin_lock(&wb_inode_list_lock); |
1316 | spin_unlock(&wb_inode_list_lock); | 1303 | list_del_init(&inode->i_list); |
1304 | spin_unlock(&wb_inode_list_lock); | ||
1305 | } | ||
1317 | list_del_init(&inode->i_sb_list); | 1306 | list_del_init(&inode->i_sb_list); |
1318 | spin_unlock(&sb_inode_list_lock); | 1307 | spin_unlock(&sb_inode_list_lock); |
1319 | WARN_ON(inode->i_state & I_NEW); | 1308 | WARN_ON(inode->i_state & I_NEW); |
@@ -1365,12 +1354,12 @@ int generic_detach_inode(struct inode *inode) | |||
1365 | struct super_block *sb = inode->i_sb; | 1354 | struct super_block *sb = inode->i_sb; |
1366 | 1355 | ||
1367 | if (!hlist_unhashed(&inode->i_hash)) { | 1356 | if (!hlist_unhashed(&inode->i_hash)) { |
1368 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | 1357 | if (list_empty(&inode->i_list)) { |
1369 | spin_lock(&wb_inode_list_lock); | 1358 | spin_lock(&wb_inode_list_lock); |
1370 | list_move(&inode->i_list, &inode_unused); | 1359 | list_add(&inode->i_list, &inode_unused); |
1371 | spin_unlock(&wb_inode_list_lock); | 1360 | spin_unlock(&wb_inode_list_lock); |
1361 | atomic_inc(&inodes_stat.nr_unused); | ||
1372 | } | 1362 | } |
1373 | atomic_inc(&inodes_stat.nr_unused); | ||
1374 | if (sb->s_flags & MS_ACTIVE) { | 1363 | if (sb->s_flags & MS_ACTIVE) { |
1375 | spin_unlock(&inode->i_lock); | 1364 | spin_unlock(&inode->i_lock); |
1376 | spin_unlock(&sb_inode_list_lock); | 1365 | spin_unlock(&sb_inode_list_lock); |
@@ -1386,11 +1375,13 @@ int generic_detach_inode(struct inode *inode) | |||
1386 | WARN_ON(inode->i_state & I_NEW); | 1375 | WARN_ON(inode->i_state & I_NEW); |
1387 | inode->i_state &= ~I_WILL_FREE; | 1376 | inode->i_state &= ~I_WILL_FREE; |
1388 | __remove_inode_hash(inode); | 1377 | __remove_inode_hash(inode); |
1378 | } | ||
1379 | if (!list_empty(&inode->i_list)) { | ||
1380 | spin_lock(&wb_inode_list_lock); | ||
1381 | list_del_init(&inode->i_list); | ||
1382 | spin_unlock(&wb_inode_list_lock); | ||
1389 | atomic_dec(&inodes_stat.nr_unused); | 1383 | atomic_dec(&inodes_stat.nr_unused); |
1390 | } | 1384 | } |
1391 | spin_lock(&wb_inode_list_lock); | ||
1392 | list_del_init(&inode->i_list); | ||
1393 | spin_unlock(&wb_inode_list_lock); | ||
1394 | list_del_init(&inode->i_sb_list); | 1385 | list_del_init(&inode->i_sb_list); |
1395 | spin_unlock(&sb_inode_list_lock); | 1386 | spin_unlock(&sb_inode_list_lock); |
1396 | WARN_ON(inode->i_state & I_NEW); | 1387 | WARN_ON(inode->i_state & I_NEW); |
@@ -1726,7 +1717,7 @@ void __init inode_init(void) | |||
1726 | 1717 | ||
1727 | inode_hashtable = | 1718 | inode_hashtable = |
1728 | alloc_large_system_hash("Inode-cache", | 1719 | alloc_large_system_hash("Inode-cache", |
1729 | sizeof(struct hlist_head), | 1720 | sizeof(struct inode_hash_bucket), |
1730 | ihash_entries, | 1721 | ihash_entries, |
1731 | 14, | 1722 | 14, |
1732 | 0, | 1723 | 0, |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 3551528a25f6..b86542c8d68c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2174,7 +2174,6 @@ extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struc | |||
2174 | extern int insert_inode_locked(struct inode *); | 2174 | extern int insert_inode_locked(struct inode *); |
2175 | extern void unlock_new_inode(struct inode *); | 2175 | extern void unlock_new_inode(struct inode *); |
2176 | 2176 | ||
2177 | extern void __iget(struct inode * inode); | ||
2178 | extern void iget_failed(struct inode *); | 2177 | extern void iget_failed(struct inode *); |
2179 | extern void clear_inode(struct inode *); | 2178 | extern void clear_inode(struct inode *); |
2180 | extern void destroy_inode(struct inode *); | 2179 | extern void destroy_inode(struct inode *); |
@@ -2393,6 +2392,12 @@ extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); | |||
2393 | extern void save_mount_options(struct super_block *sb, char *options); | 2392 | extern void save_mount_options(struct super_block *sb, char *options); |
2394 | extern void replace_mount_options(struct super_block *sb, char *options); | 2393 | extern void replace_mount_options(struct super_block *sb, char *options); |
2395 | 2394 | ||
2395 | static inline void __iget(struct inode *inode) | ||
2396 | { | ||
2397 | assert_spin_locked(&inode->i_lock); | ||
2398 | inode->i_count++; | ||
2399 | } | ||
2400 | |||
2396 | static inline ino_t parent_ino(struct dentry *dentry) | 2401 | static inline ino_t parent_ino(struct dentry *dentry) |
2397 | { | 2402 | { |
2398 | ino_t res; | 2403 | ino_t res; |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 05479e576a77..15e8bcd90cd1 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -11,7 +11,6 @@ struct backing_dev_info; | |||
11 | 11 | ||
12 | extern spinlock_t sb_inode_list_lock; | 12 | extern spinlock_t sb_inode_list_lock; |
13 | extern spinlock_t wb_inode_list_lock; | 13 | extern spinlock_t wb_inode_list_lock; |
14 | extern struct list_head inode_in_use; | ||
15 | extern struct list_head inode_unused; | 14 | extern struct list_head inode_unused; |
16 | 15 | ||
17 | /* | 16 | /* |