diff options
| -rw-r--r-- | fs/fs-writeback.c | 11 | ||||
| -rw-r--r-- | fs/inode.c | 86 | ||||
| -rw-r--r-- | include/linux/fs.h | 13 | ||||
| -rw-r--r-- | include/linux/writeback.h | 2 |
4 files changed, 71 insertions, 41 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f04d04af84f2..e8f65290e836 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -408,16 +408,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 408 | * completion. | 408 | * completion. |
| 409 | */ | 409 | */ |
| 410 | redirty_tail(inode); | 410 | redirty_tail(inode); |
| 411 | } else if (atomic_read(&inode->i_count)) { | ||
| 412 | /* | ||
| 413 | * The inode is clean, inuse | ||
| 414 | */ | ||
| 415 | list_move(&inode->i_list, &inode_in_use); | ||
| 416 | } else { | 411 | } else { |
| 417 | /* | 412 | /* |
| 418 | * The inode is clean, unused | 413 | * The inode is clean. At this point we either have |
| 414 | * a reference to the inode or it's on it's way out. | ||
| 415 | * No need to add it back to the LRU. | ||
| 419 | */ | 416 | */ |
| 420 | list_move(&inode->i_list, &inode_unused); | 417 | list_del_init(&inode->i_list); |
| 421 | } | 418 | } |
| 422 | } | 419 | } |
| 423 | inode_sync_complete(inode); | 420 | inode_sync_complete(inode); |
diff --git a/fs/inode.c b/fs/inode.c index 0d5aeccbdd90..3bdc76f1653a 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; | |||
| 72 | * allowing for low-overhead inode sync() operations. | 72 | * allowing for low-overhead inode sync() operations. |
| 73 | */ | 73 | */ |
| 74 | 74 | ||
| 75 | LIST_HEAD(inode_in_use); | 75 | static LIST_HEAD(inode_unused); |
| 76 | LIST_HEAD(inode_unused); | ||
| 77 | static struct hlist_head *inode_hashtable __read_mostly; | 76 | static struct hlist_head *inode_hashtable __read_mostly; |
| 78 | 77 | ||
| 79 | /* | 78 | /* |
| @@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) | |||
| 291 | INIT_HLIST_NODE(&inode->i_hash); | 290 | INIT_HLIST_NODE(&inode->i_hash); |
| 292 | INIT_LIST_HEAD(&inode->i_dentry); | 291 | INIT_LIST_HEAD(&inode->i_dentry); |
| 293 | INIT_LIST_HEAD(&inode->i_devices); | 292 | INIT_LIST_HEAD(&inode->i_devices); |
| 293 | INIT_LIST_HEAD(&inode->i_list); | ||
| 294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); |
| 295 | spin_lock_init(&inode->i_data.tree_lock); | 295 | spin_lock_init(&inode->i_data.tree_lock); |
| 296 | spin_lock_init(&inode->i_data.i_mmap_lock); | 296 | spin_lock_init(&inode->i_data.i_mmap_lock); |
| @@ -317,12 +317,23 @@ static void init_once(void *foo) | |||
| 317 | */ | 317 | */ |
| 318 | void __iget(struct inode *inode) | 318 | void __iget(struct inode *inode) |
| 319 | { | 319 | { |
| 320 | if (atomic_inc_return(&inode->i_count) != 1) | 320 | atomic_inc(&inode->i_count); |
| 321 | return; | 321 | } |
| 322 | 322 | ||
| 323 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 323 | static void inode_lru_list_add(struct inode *inode) |
| 324 | list_move(&inode->i_list, &inode_in_use); | 324 | { |
| 325 | percpu_counter_dec(&nr_inodes_unused); | 325 | if (list_empty(&inode->i_list)) { |
| 326 | list_add(&inode->i_list, &inode_unused); | ||
| 327 | percpu_counter_inc(&nr_inodes_unused); | ||
| 328 | } | ||
| 329 | } | ||
| 330 | |||
| 331 | static void inode_lru_list_del(struct inode *inode) | ||
| 332 | { | ||
| 333 | if (!list_empty(&inode->i_list)) { | ||
| 334 | list_del_init(&inode->i_list); | ||
| 335 | percpu_counter_dec(&nr_inodes_unused); | ||
| 336 | } | ||
| 326 | } | 337 | } |
| 327 | 338 | ||
| 328 | void end_writeback(struct inode *inode) | 339 | void end_writeback(struct inode *inode) |
| @@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head) | |||
| 367 | struct inode *inode; | 378 | struct inode *inode; |
| 368 | 379 | ||
| 369 | inode = list_first_entry(head, struct inode, i_list); | 380 | inode = list_first_entry(head, struct inode, i_list); |
| 370 | list_del(&inode->i_list); | 381 | list_del_init(&inode->i_list); |
| 371 | 382 | ||
| 372 | evict(inode); | 383 | evict(inode); |
| 373 | 384 | ||
| @@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
| 413 | list_move(&inode->i_list, dispose); | 424 | list_move(&inode->i_list, dispose); |
| 414 | WARN_ON(inode->i_state & I_NEW); | 425 | WARN_ON(inode->i_state & I_NEW); |
| 415 | inode->i_state |= I_FREEING; | 426 | inode->i_state |= I_FREEING; |
| 416 | percpu_counter_dec(&nr_inodes_unused); | 427 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) |
| 428 | percpu_counter_dec(&nr_inodes_unused); | ||
| 417 | continue; | 429 | continue; |
| 418 | } | 430 | } |
| 419 | busy = 1; | 431 | busy = 1; |
| @@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb) | |||
| 448 | 460 | ||
| 449 | static int can_unuse(struct inode *inode) | 461 | static int can_unuse(struct inode *inode) |
| 450 | { | 462 | { |
| 451 | if (inode->i_state) | 463 | if (inode->i_state & ~I_REFERENCED) |
| 452 | return 0; | 464 | return 0; |
| 453 | if (inode_has_buffers(inode)) | 465 | if (inode_has_buffers(inode)) |
| 454 | return 0; | 466 | return 0; |
| @@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode) | |||
| 460 | } | 472 | } |
| 461 | 473 | ||
| 462 | /* | 474 | /* |
| 463 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to | 475 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
| 464 | * a temporary list and then are freed outside inode_lock by dispose_list(). | 476 | * temporary list and then are freed outside inode_lock by dispose_list(). |
| 465 | * | 477 | * |
| 466 | * Any inodes which are pinned purely because of attached pagecache have their | 478 | * Any inodes which are pinned purely because of attached pagecache have their |
| 467 | * pagecache removed. We expect the final iput() on that inode to add it to | 479 | * pagecache removed. If the inode has metadata buffers attached to |
| 468 | * the front of the inode_unused list. So look for it there and if the | 480 | * mapping->private_list then try to remove them. |
| 469 | * inode is still freeable, proceed. The right inode is found 99.9% of the | ||
| 470 | * time in testing on a 4-way. | ||
| 471 | * | 481 | * |
| 472 | * If the inode has metadata buffers attached to mapping->private_list then | 482 | * If the inode has the I_REFERENCED flag set, then it means that it has been |
| 473 | * try to remove them. | 483 | * used recently - the flag is set in iput_final(). When we encounter such an |
| 484 | * inode, clear the flag and move it to the back of the LRU so it gets another | ||
| 485 | * pass through the LRU before it gets reclaimed. This is necessary because of | ||
| 486 | * the fact we are doing lazy LRU updates to minimise lock contention so the | ||
| 487 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | ||
| 488 | * with this flag set because they are the inodes that are out of order. | ||
| 474 | */ | 489 | */ |
| 475 | static void prune_icache(int nr_to_scan) | 490 | static void prune_icache(int nr_to_scan) |
| 476 | { | 491 | { |
| @@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan) | |||
| 488 | 503 | ||
| 489 | inode = list_entry(inode_unused.prev, struct inode, i_list); | 504 | inode = list_entry(inode_unused.prev, struct inode, i_list); |
| 490 | 505 | ||
| 491 | if (inode->i_state || atomic_read(&inode->i_count)) { | 506 | /* |
| 507 | * Referenced or dirty inodes are still in use. Give them | ||
| 508 | * another pass through the LRU as we canot reclaim them now. | ||
| 509 | */ | ||
| 510 | if (atomic_read(&inode->i_count) || | ||
| 511 | (inode->i_state & ~I_REFERENCED)) { | ||
| 512 | list_del_init(&inode->i_list); | ||
| 513 | percpu_counter_dec(&nr_inodes_unused); | ||
| 514 | continue; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* recently referenced inodes get one more pass */ | ||
| 518 | if (inode->i_state & I_REFERENCED) { | ||
| 492 | list_move(&inode->i_list, &inode_unused); | 519 | list_move(&inode->i_list, &inode_unused); |
| 520 | inode->i_state &= ~I_REFERENCED; | ||
| 493 | continue; | 521 | continue; |
| 494 | } | 522 | } |
| 495 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 523 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
| @@ -620,7 +648,6 @@ static inline void | |||
| 620 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, | 648 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, |
| 621 | struct inode *inode) | 649 | struct inode *inode) |
| 622 | { | 650 | { |
| 623 | list_add(&inode->i_list, &inode_in_use); | ||
| 624 | list_add(&inode->i_sb_list, &sb->s_inodes); | 651 | list_add(&inode->i_sb_list, &sb->s_inodes); |
| 625 | if (head) | 652 | if (head) |
| 626 | hlist_add_head(&inode->i_hash, head); | 653 | hlist_add_head(&inode->i_hash, head); |
| @@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode) | |||
| 1237 | drop = generic_drop_inode(inode); | 1264 | drop = generic_drop_inode(inode); |
| 1238 | 1265 | ||
| 1239 | if (!drop) { | 1266 | if (!drop) { |
| 1240 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
| 1241 | list_move(&inode->i_list, &inode_unused); | ||
| 1242 | percpu_counter_inc(&nr_inodes_unused); | ||
| 1243 | if (sb->s_flags & MS_ACTIVE) { | 1267 | if (sb->s_flags & MS_ACTIVE) { |
| 1268 | inode->i_state |= I_REFERENCED; | ||
| 1269 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
| 1270 | inode_lru_list_add(inode); | ||
| 1271 | } | ||
| 1244 | spin_unlock(&inode_lock); | 1272 | spin_unlock(&inode_lock); |
| 1245 | return; | 1273 | return; |
| 1246 | } | 1274 | } |
| @@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode) | |||
| 1251 | spin_lock(&inode_lock); | 1279 | spin_lock(&inode_lock); |
| 1252 | WARN_ON(inode->i_state & I_NEW); | 1280 | WARN_ON(inode->i_state & I_NEW); |
| 1253 | inode->i_state &= ~I_WILL_FREE; | 1281 | inode->i_state &= ~I_WILL_FREE; |
| 1254 | percpu_counter_dec(&nr_inodes_unused); | ||
| 1255 | hlist_del_init(&inode->i_hash); | 1282 | hlist_del_init(&inode->i_hash); |
| 1256 | } | 1283 | } |
| 1257 | list_del_init(&inode->i_list); | ||
| 1258 | list_del_init(&inode->i_sb_list); | ||
| 1259 | WARN_ON(inode->i_state & I_NEW); | 1284 | WARN_ON(inode->i_state & I_NEW); |
| 1260 | inode->i_state |= I_FREEING; | 1285 | inode->i_state |= I_FREEING; |
| 1286 | |||
| 1287 | /* | ||
| 1288 | * After we delete the inode from the LRU here, we avoid moving dirty | ||
| 1289 | * inodes back onto the LRU now because I_FREEING is set and hence | ||
| 1290 | * writeback_single_inode() won't move the inode around. | ||
| 1291 | */ | ||
| 1292 | inode_lru_list_del(inode); | ||
| 1293 | |||
| 1294 | list_del_init(&inode->i_sb_list); | ||
| 1261 | spin_unlock(&inode_lock); | 1295 | spin_unlock(&inode_lock); |
| 1262 | evict(inode); | 1296 | evict(inode); |
| 1263 | spin_lock(&inode_lock); | 1297 | spin_lock(&inode_lock); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a3937a8ee95e..876275fc0638 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -1641,16 +1641,17 @@ struct super_operations { | |||
| 1641 | * | 1641 | * |
| 1642 | * Q: What is the difference between I_WILL_FREE and I_FREEING? | 1642 | * Q: What is the difference between I_WILL_FREE and I_FREEING? |
| 1643 | */ | 1643 | */ |
| 1644 | #define I_DIRTY_SYNC 1 | 1644 | #define I_DIRTY_SYNC (1 << 0) |
| 1645 | #define I_DIRTY_DATASYNC 2 | 1645 | #define I_DIRTY_DATASYNC (1 << 1) |
| 1646 | #define I_DIRTY_PAGES 4 | 1646 | #define I_DIRTY_PAGES (1 << 2) |
| 1647 | #define __I_NEW 3 | 1647 | #define __I_NEW 3 |
| 1648 | #define I_NEW (1 << __I_NEW) | 1648 | #define I_NEW (1 << __I_NEW) |
| 1649 | #define I_WILL_FREE 16 | 1649 | #define I_WILL_FREE (1 << 4) |
| 1650 | #define I_FREEING 32 | 1650 | #define I_FREEING (1 << 5) |
| 1651 | #define I_CLEAR 64 | 1651 | #define I_CLEAR (1 << 6) |
| 1652 | #define __I_SYNC 7 | 1652 | #define __I_SYNC 7 |
| 1653 | #define I_SYNC (1 << __I_SYNC) | 1653 | #define I_SYNC (1 << __I_SYNC) |
| 1654 | #define I_REFERENCED (1 << 8) | ||
| 1654 | 1655 | ||
| 1655 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) | 1656 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) |
| 1656 | 1657 | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..242b6f812ba6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -10,8 +10,6 @@ | |||
| 10 | struct backing_dev_info; | 10 | struct backing_dev_info; |
| 11 | 11 | ||
| 12 | extern spinlock_t inode_lock; | 12 | extern spinlock_t inode_lock; |
| 13 | extern struct list_head inode_in_use; | ||
| 14 | extern struct list_head inode_unused; | ||
| 15 | 13 | ||
| 16 | /* | 14 | /* |
| 17 | * fs/fs-writeback.c | 15 | * fs/fs-writeback.c |
