diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 86 |
1 files changed, 60 insertions, 26 deletions
diff --git a/fs/inode.c b/fs/inode.c index 0d5aeccbdd90..3bdc76f1653a 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; | |||
72 | * allowing for low-overhead inode sync() operations. | 72 | * allowing for low-overhead inode sync() operations. |
73 | */ | 73 | */ |
74 | 74 | ||
75 | LIST_HEAD(inode_in_use); | 75 | static LIST_HEAD(inode_unused); |
76 | LIST_HEAD(inode_unused); | ||
77 | static struct hlist_head *inode_hashtable __read_mostly; | 76 | static struct hlist_head *inode_hashtable __read_mostly; |
78 | 77 | ||
79 | /* | 78 | /* |
@@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) | |||
291 | INIT_HLIST_NODE(&inode->i_hash); | 290 | INIT_HLIST_NODE(&inode->i_hash); |
292 | INIT_LIST_HEAD(&inode->i_dentry); | 291 | INIT_LIST_HEAD(&inode->i_dentry); |
293 | INIT_LIST_HEAD(&inode->i_devices); | 292 | INIT_LIST_HEAD(&inode->i_devices); |
293 | INIT_LIST_HEAD(&inode->i_list); | ||
294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); |
295 | spin_lock_init(&inode->i_data.tree_lock); | 295 | spin_lock_init(&inode->i_data.tree_lock); |
296 | spin_lock_init(&inode->i_data.i_mmap_lock); | 296 | spin_lock_init(&inode->i_data.i_mmap_lock); |
@@ -317,12 +317,23 @@ static void init_once(void *foo) | |||
317 | */ | 317 | */ |
318 | void __iget(struct inode *inode) | 318 | void __iget(struct inode *inode) |
319 | { | 319 | { |
320 | if (atomic_inc_return(&inode->i_count) != 1) | 320 | atomic_inc(&inode->i_count); |
321 | return; | 321 | } |
322 | 322 | ||
323 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 323 | static void inode_lru_list_add(struct inode *inode) |
324 | list_move(&inode->i_list, &inode_in_use); | 324 | { |
325 | percpu_counter_dec(&nr_inodes_unused); | 325 | if (list_empty(&inode->i_list)) { |
326 | list_add(&inode->i_list, &inode_unused); | ||
327 | percpu_counter_inc(&nr_inodes_unused); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void inode_lru_list_del(struct inode *inode) | ||
332 | { | ||
333 | if (!list_empty(&inode->i_list)) { | ||
334 | list_del_init(&inode->i_list); | ||
335 | percpu_counter_dec(&nr_inodes_unused); | ||
336 | } | ||
326 | } | 337 | } |
327 | 338 | ||
328 | void end_writeback(struct inode *inode) | 339 | void end_writeback(struct inode *inode) |
@@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head) | |||
367 | struct inode *inode; | 378 | struct inode *inode; |
368 | 379 | ||
369 | inode = list_first_entry(head, struct inode, i_list); | 380 | inode = list_first_entry(head, struct inode, i_list); |
370 | list_del(&inode->i_list); | 381 | list_del_init(&inode->i_list); |
371 | 382 | ||
372 | evict(inode); | 383 | evict(inode); |
373 | 384 | ||
@@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
413 | list_move(&inode->i_list, dispose); | 424 | list_move(&inode->i_list, dispose); |
414 | WARN_ON(inode->i_state & I_NEW); | 425 | WARN_ON(inode->i_state & I_NEW); |
415 | inode->i_state |= I_FREEING; | 426 | inode->i_state |= I_FREEING; |
416 | percpu_counter_dec(&nr_inodes_unused); | 427 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) |
428 | percpu_counter_dec(&nr_inodes_unused); | ||
417 | continue; | 429 | continue; |
418 | } | 430 | } |
419 | busy = 1; | 431 | busy = 1; |
@@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb) | |||
448 | 460 | ||
449 | static int can_unuse(struct inode *inode) | 461 | static int can_unuse(struct inode *inode) |
450 | { | 462 | { |
451 | if (inode->i_state) | 463 | if (inode->i_state & ~I_REFERENCED) |
452 | return 0; | 464 | return 0; |
453 | if (inode_has_buffers(inode)) | 465 | if (inode_has_buffers(inode)) |
454 | return 0; | 466 | return 0; |
@@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode) | |||
460 | } | 472 | } |
461 | 473 | ||
462 | /* | 474 | /* |
463 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to | 475 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
464 | * a temporary list and then are freed outside inode_lock by dispose_list(). | 476 | * temporary list and then are freed outside inode_lock by dispose_list(). |
465 | * | 477 | * |
466 | * Any inodes which are pinned purely because of attached pagecache have their | 478 | * Any inodes which are pinned purely because of attached pagecache have their |
467 | * pagecache removed. We expect the final iput() on that inode to add it to | 479 | * pagecache removed. If the inode has metadata buffers attached to |
468 | * the front of the inode_unused list. So look for it there and if the | 480 | * mapping->private_list then try to remove them. |
469 | * inode is still freeable, proceed. The right inode is found 99.9% of the | ||
470 | * time in testing on a 4-way. | ||
471 | * | 481 | * |
472 | * If the inode has metadata buffers attached to mapping->private_list then | 482 | * If the inode has the I_REFERENCED flag set, then it means that it has been |
473 | * try to remove them. | 483 | * used recently - the flag is set in iput_final(). When we encounter such an |
484 | * inode, clear the flag and move it to the back of the LRU so it gets another | ||
485 | * pass through the LRU before it gets reclaimed. This is necessary because of | ||
486 | * the fact we are doing lazy LRU updates to minimise lock contention so the | ||
487 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | ||
488 | * with this flag set because they are the inodes that are out of order. | ||
474 | */ | 489 | */ |
475 | static void prune_icache(int nr_to_scan) | 490 | static void prune_icache(int nr_to_scan) |
476 | { | 491 | { |
@@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan) | |||
488 | 503 | ||
489 | inode = list_entry(inode_unused.prev, struct inode, i_list); | 504 | inode = list_entry(inode_unused.prev, struct inode, i_list); |
490 | 505 | ||
491 | if (inode->i_state || atomic_read(&inode->i_count)) { | 506 | /* |
507 | * Referenced or dirty inodes are still in use. Give them | ||
508 | * another pass through the LRU as we canot reclaim them now. | ||
509 | */ | ||
510 | if (atomic_read(&inode->i_count) || | ||
511 | (inode->i_state & ~I_REFERENCED)) { | ||
512 | list_del_init(&inode->i_list); | ||
513 | percpu_counter_dec(&nr_inodes_unused); | ||
514 | continue; | ||
515 | } | ||
516 | |||
517 | /* recently referenced inodes get one more pass */ | ||
518 | if (inode->i_state & I_REFERENCED) { | ||
492 | list_move(&inode->i_list, &inode_unused); | 519 | list_move(&inode->i_list, &inode_unused); |
520 | inode->i_state &= ~I_REFERENCED; | ||
493 | continue; | 521 | continue; |
494 | } | 522 | } |
495 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 523 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
@@ -620,7 +648,6 @@ static inline void | |||
620 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, | 648 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, |
621 | struct inode *inode) | 649 | struct inode *inode) |
622 | { | 650 | { |
623 | list_add(&inode->i_list, &inode_in_use); | ||
624 | list_add(&inode->i_sb_list, &sb->s_inodes); | 651 | list_add(&inode->i_sb_list, &sb->s_inodes); |
625 | if (head) | 652 | if (head) |
626 | hlist_add_head(&inode->i_hash, head); | 653 | hlist_add_head(&inode->i_hash, head); |
@@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode) | |||
1237 | drop = generic_drop_inode(inode); | 1264 | drop = generic_drop_inode(inode); |
1238 | 1265 | ||
1239 | if (!drop) { | 1266 | if (!drop) { |
1240 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
1241 | list_move(&inode->i_list, &inode_unused); | ||
1242 | percpu_counter_inc(&nr_inodes_unused); | ||
1243 | if (sb->s_flags & MS_ACTIVE) { | 1267 | if (sb->s_flags & MS_ACTIVE) { |
1268 | inode->i_state |= I_REFERENCED; | ||
1269 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
1270 | inode_lru_list_add(inode); | ||
1271 | } | ||
1244 | spin_unlock(&inode_lock); | 1272 | spin_unlock(&inode_lock); |
1245 | return; | 1273 | return; |
1246 | } | 1274 | } |
@@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode) | |||
1251 | spin_lock(&inode_lock); | 1279 | spin_lock(&inode_lock); |
1252 | WARN_ON(inode->i_state & I_NEW); | 1280 | WARN_ON(inode->i_state & I_NEW); |
1253 | inode->i_state &= ~I_WILL_FREE; | 1281 | inode->i_state &= ~I_WILL_FREE; |
1254 | percpu_counter_dec(&nr_inodes_unused); | ||
1255 | hlist_del_init(&inode->i_hash); | 1282 | hlist_del_init(&inode->i_hash); |
1256 | } | 1283 | } |
1257 | list_del_init(&inode->i_list); | ||
1258 | list_del_init(&inode->i_sb_list); | ||
1259 | WARN_ON(inode->i_state & I_NEW); | 1284 | WARN_ON(inode->i_state & I_NEW); |
1260 | inode->i_state |= I_FREEING; | 1285 | inode->i_state |= I_FREEING; |
1286 | |||
1287 | /* | ||
1288 | * After we delete the inode from the LRU here, we avoid moving dirty | ||
1289 | * inodes back onto the LRU now because I_FREEING is set and hence | ||
1290 | * writeback_single_inode() won't move the inode around. | ||
1291 | */ | ||
1292 | inode_lru_list_del(inode); | ||
1293 | |||
1294 | list_del_init(&inode->i_sb_list); | ||
1261 | spin_unlock(&inode_lock); | 1295 | spin_unlock(&inode_lock); |
1262 | evict(inode); | 1296 | evict(inode); |
1263 | spin_lock(&inode_lock); | 1297 | spin_lock(&inode_lock); |