diff options
author | Nick Piggin <npiggin@suse.de> | 2010-10-23 06:55:17 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-10-25 21:26:09 -0400 |
commit | 9e38d86ff2d8a8db99570e982230861046df32b5 (patch) | |
tree | 7ea2ceea24a4e070259a4585b2748c9e2c070ee0 /fs/inode.c | |
parent | cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 (diff) |
fs: Implement lazy LRU updates for inodes
Convert the inode LRU to use lazy updates to reduce lock and
cacheline traffic. We avoid moving inodes around in the LRU list
during iget/iput operations so these frequent operations don't need
to access the LRUs. Instead, we defer the refcount checks to
reclaim-time and use a per-inode state flag, I_REFERENCED, to tell
reclaim that iget has touched the inode in the past. This means that
only reclaim should be touching the LRU with any frequency, hence
significantly reducing lock acquisitions and the amount contention
on LRU updates.
This also removes the inode_in_use list, which means we now only
have one list for tracking the inode LRU status. This makes it much
simpler to split out the LRU list operations under it's own lock.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 86 |
1 files changed, 60 insertions, 26 deletions
diff --git a/fs/inode.c b/fs/inode.c index 0d5aeccbdd90..3bdc76f1653a 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; | |||
72 | * allowing for low-overhead inode sync() operations. | 72 | * allowing for low-overhead inode sync() operations. |
73 | */ | 73 | */ |
74 | 74 | ||
75 | LIST_HEAD(inode_in_use); | 75 | static LIST_HEAD(inode_unused); |
76 | LIST_HEAD(inode_unused); | ||
77 | static struct hlist_head *inode_hashtable __read_mostly; | 76 | static struct hlist_head *inode_hashtable __read_mostly; |
78 | 77 | ||
79 | /* | 78 | /* |
@@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) | |||
291 | INIT_HLIST_NODE(&inode->i_hash); | 290 | INIT_HLIST_NODE(&inode->i_hash); |
292 | INIT_LIST_HEAD(&inode->i_dentry); | 291 | INIT_LIST_HEAD(&inode->i_dentry); |
293 | INIT_LIST_HEAD(&inode->i_devices); | 292 | INIT_LIST_HEAD(&inode->i_devices); |
293 | INIT_LIST_HEAD(&inode->i_list); | ||
294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); |
295 | spin_lock_init(&inode->i_data.tree_lock); | 295 | spin_lock_init(&inode->i_data.tree_lock); |
296 | spin_lock_init(&inode->i_data.i_mmap_lock); | 296 | spin_lock_init(&inode->i_data.i_mmap_lock); |
@@ -317,12 +317,23 @@ static void init_once(void *foo) | |||
317 | */ | 317 | */ |
318 | void __iget(struct inode *inode) | 318 | void __iget(struct inode *inode) |
319 | { | 319 | { |
320 | if (atomic_inc_return(&inode->i_count) != 1) | 320 | atomic_inc(&inode->i_count); |
321 | return; | 321 | } |
322 | 322 | ||
323 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 323 | static void inode_lru_list_add(struct inode *inode) |
324 | list_move(&inode->i_list, &inode_in_use); | 324 | { |
325 | percpu_counter_dec(&nr_inodes_unused); | 325 | if (list_empty(&inode->i_list)) { |
326 | list_add(&inode->i_list, &inode_unused); | ||
327 | percpu_counter_inc(&nr_inodes_unused); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void inode_lru_list_del(struct inode *inode) | ||
332 | { | ||
333 | if (!list_empty(&inode->i_list)) { | ||
334 | list_del_init(&inode->i_list); | ||
335 | percpu_counter_dec(&nr_inodes_unused); | ||
336 | } | ||
326 | } | 337 | } |
327 | 338 | ||
328 | void end_writeback(struct inode *inode) | 339 | void end_writeback(struct inode *inode) |
@@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head) | |||
367 | struct inode *inode; | 378 | struct inode *inode; |
368 | 379 | ||
369 | inode = list_first_entry(head, struct inode, i_list); | 380 | inode = list_first_entry(head, struct inode, i_list); |
370 | list_del(&inode->i_list); | 381 | list_del_init(&inode->i_list); |
371 | 382 | ||
372 | evict(inode); | 383 | evict(inode); |
373 | 384 | ||
@@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
413 | list_move(&inode->i_list, dispose); | 424 | list_move(&inode->i_list, dispose); |
414 | WARN_ON(inode->i_state & I_NEW); | 425 | WARN_ON(inode->i_state & I_NEW); |
415 | inode->i_state |= I_FREEING; | 426 | inode->i_state |= I_FREEING; |
416 | percpu_counter_dec(&nr_inodes_unused); | 427 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) |
428 | percpu_counter_dec(&nr_inodes_unused); | ||
417 | continue; | 429 | continue; |
418 | } | 430 | } |
419 | busy = 1; | 431 | busy = 1; |
@@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb) | |||
448 | 460 | ||
449 | static int can_unuse(struct inode *inode) | 461 | static int can_unuse(struct inode *inode) |
450 | { | 462 | { |
451 | if (inode->i_state) | 463 | if (inode->i_state & ~I_REFERENCED) |
452 | return 0; | 464 | return 0; |
453 | if (inode_has_buffers(inode)) | 465 | if (inode_has_buffers(inode)) |
454 | return 0; | 466 | return 0; |
@@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode) | |||
460 | } | 472 | } |
461 | 473 | ||
462 | /* | 474 | /* |
463 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to | 475 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
464 | * a temporary list and then are freed outside inode_lock by dispose_list(). | 476 | * temporary list and then are freed outside inode_lock by dispose_list(). |
465 | * | 477 | * |
466 | * Any inodes which are pinned purely because of attached pagecache have their | 478 | * Any inodes which are pinned purely because of attached pagecache have their |
467 | * pagecache removed. We expect the final iput() on that inode to add it to | 479 | * pagecache removed. If the inode has metadata buffers attached to |
468 | * the front of the inode_unused list. So look for it there and if the | 480 | * mapping->private_list then try to remove them. |
469 | * inode is still freeable, proceed. The right inode is found 99.9% of the | ||
470 | * time in testing on a 4-way. | ||
471 | * | 481 | * |
472 | * If the inode has metadata buffers attached to mapping->private_list then | 482 | * If the inode has the I_REFERENCED flag set, then it means that it has been |
473 | * try to remove them. | 483 | * used recently - the flag is set in iput_final(). When we encounter such an |
484 | * inode, clear the flag and move it to the back of the LRU so it gets another | ||
485 | * pass through the LRU before it gets reclaimed. This is necessary because of | ||
486 | * the fact we are doing lazy LRU updates to minimise lock contention so the | ||
487 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | ||
488 | * with this flag set because they are the inodes that are out of order. | ||
474 | */ | 489 | */ |
475 | static void prune_icache(int nr_to_scan) | 490 | static void prune_icache(int nr_to_scan) |
476 | { | 491 | { |
@@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan) | |||
488 | 503 | ||
489 | inode = list_entry(inode_unused.prev, struct inode, i_list); | 504 | inode = list_entry(inode_unused.prev, struct inode, i_list); |
490 | 505 | ||
491 | if (inode->i_state || atomic_read(&inode->i_count)) { | 506 | /* |
507 | * Referenced or dirty inodes are still in use. Give them | ||
508 | * another pass through the LRU as we canot reclaim them now. | ||
509 | */ | ||
510 | if (atomic_read(&inode->i_count) || | ||
511 | (inode->i_state & ~I_REFERENCED)) { | ||
512 | list_del_init(&inode->i_list); | ||
513 | percpu_counter_dec(&nr_inodes_unused); | ||
514 | continue; | ||
515 | } | ||
516 | |||
517 | /* recently referenced inodes get one more pass */ | ||
518 | if (inode->i_state & I_REFERENCED) { | ||
492 | list_move(&inode->i_list, &inode_unused); | 519 | list_move(&inode->i_list, &inode_unused); |
520 | inode->i_state &= ~I_REFERENCED; | ||
493 | continue; | 521 | continue; |
494 | } | 522 | } |
495 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 523 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
@@ -620,7 +648,6 @@ static inline void | |||
620 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, | 648 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, |
621 | struct inode *inode) | 649 | struct inode *inode) |
622 | { | 650 | { |
623 | list_add(&inode->i_list, &inode_in_use); | ||
624 | list_add(&inode->i_sb_list, &sb->s_inodes); | 651 | list_add(&inode->i_sb_list, &sb->s_inodes); |
625 | if (head) | 652 | if (head) |
626 | hlist_add_head(&inode->i_hash, head); | 653 | hlist_add_head(&inode->i_hash, head); |
@@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode) | |||
1237 | drop = generic_drop_inode(inode); | 1264 | drop = generic_drop_inode(inode); |
1238 | 1265 | ||
1239 | if (!drop) { | 1266 | if (!drop) { |
1240 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
1241 | list_move(&inode->i_list, &inode_unused); | ||
1242 | percpu_counter_inc(&nr_inodes_unused); | ||
1243 | if (sb->s_flags & MS_ACTIVE) { | 1267 | if (sb->s_flags & MS_ACTIVE) { |
1268 | inode->i_state |= I_REFERENCED; | ||
1269 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
1270 | inode_lru_list_add(inode); | ||
1271 | } | ||
1244 | spin_unlock(&inode_lock); | 1272 | spin_unlock(&inode_lock); |
1245 | return; | 1273 | return; |
1246 | } | 1274 | } |
@@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode) | |||
1251 | spin_lock(&inode_lock); | 1279 | spin_lock(&inode_lock); |
1252 | WARN_ON(inode->i_state & I_NEW); | 1280 | WARN_ON(inode->i_state & I_NEW); |
1253 | inode->i_state &= ~I_WILL_FREE; | 1281 | inode->i_state &= ~I_WILL_FREE; |
1254 | percpu_counter_dec(&nr_inodes_unused); | ||
1255 | hlist_del_init(&inode->i_hash); | 1282 | hlist_del_init(&inode->i_hash); |
1256 | } | 1283 | } |
1257 | list_del_init(&inode->i_list); | ||
1258 | list_del_init(&inode->i_sb_list); | ||
1259 | WARN_ON(inode->i_state & I_NEW); | 1284 | WARN_ON(inode->i_state & I_NEW); |
1260 | inode->i_state |= I_FREEING; | 1285 | inode->i_state |= I_FREEING; |
1286 | |||
1287 | /* | ||
1288 | * After we delete the inode from the LRU here, we avoid moving dirty | ||
1289 | * inodes back onto the LRU now because I_FREEING is set and hence | ||
1290 | * writeback_single_inode() won't move the inode around. | ||
1291 | */ | ||
1292 | inode_lru_list_del(inode); | ||
1293 | |||
1294 | list_del_init(&inode->i_sb_list); | ||
1261 | spin_unlock(&inode_lock); | 1295 | spin_unlock(&inode_lock); |
1262 | evict(inode); | 1296 | evict(inode); |
1263 | spin_lock(&inode_lock); | 1297 | spin_lock(&inode_lock); |