diff options
-rw-r--r-- | fs/fs-writeback.c | 11 | ||||
-rw-r--r-- | fs/inode.c | 86 | ||||
-rw-r--r-- | include/linux/fs.h | 13 | ||||
-rw-r--r-- | include/linux/writeback.h | 2 |
4 files changed, 71 insertions, 41 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f04d04af84f2..e8f65290e836 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -408,16 +408,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
408 | * completion. | 408 | * completion. |
409 | */ | 409 | */ |
410 | redirty_tail(inode); | 410 | redirty_tail(inode); |
411 | } else if (atomic_read(&inode->i_count)) { | ||
412 | /* | ||
413 | * The inode is clean, inuse | ||
414 | */ | ||
415 | list_move(&inode->i_list, &inode_in_use); | ||
416 | } else { | 411 | } else { |
417 | /* | 412 | /* |
418 | * The inode is clean, unused | 413 | * The inode is clean. At this point we either have |
414 | * a reference to the inode or it's on it's way out. | ||
415 | * No need to add it back to the LRU. | ||
419 | */ | 416 | */ |
420 | list_move(&inode->i_list, &inode_unused); | 417 | list_del_init(&inode->i_list); |
421 | } | 418 | } |
422 | } | 419 | } |
423 | inode_sync_complete(inode); | 420 | inode_sync_complete(inode); |
diff --git a/fs/inode.c b/fs/inode.c index 0d5aeccbdd90..3bdc76f1653a 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; | |||
72 | * allowing for low-overhead inode sync() operations. | 72 | * allowing for low-overhead inode sync() operations. |
73 | */ | 73 | */ |
74 | 74 | ||
75 | LIST_HEAD(inode_in_use); | 75 | static LIST_HEAD(inode_unused); |
76 | LIST_HEAD(inode_unused); | ||
77 | static struct hlist_head *inode_hashtable __read_mostly; | 76 | static struct hlist_head *inode_hashtable __read_mostly; |
78 | 77 | ||
79 | /* | 78 | /* |
@@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) | |||
291 | INIT_HLIST_NODE(&inode->i_hash); | 290 | INIT_HLIST_NODE(&inode->i_hash); |
292 | INIT_LIST_HEAD(&inode->i_dentry); | 291 | INIT_LIST_HEAD(&inode->i_dentry); |
293 | INIT_LIST_HEAD(&inode->i_devices); | 292 | INIT_LIST_HEAD(&inode->i_devices); |
293 | INIT_LIST_HEAD(&inode->i_list); | ||
294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 294 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); |
295 | spin_lock_init(&inode->i_data.tree_lock); | 295 | spin_lock_init(&inode->i_data.tree_lock); |
296 | spin_lock_init(&inode->i_data.i_mmap_lock); | 296 | spin_lock_init(&inode->i_data.i_mmap_lock); |
@@ -317,12 +317,23 @@ static void init_once(void *foo) | |||
317 | */ | 317 | */ |
318 | void __iget(struct inode *inode) | 318 | void __iget(struct inode *inode) |
319 | { | 319 | { |
320 | if (atomic_inc_return(&inode->i_count) != 1) | 320 | atomic_inc(&inode->i_count); |
321 | return; | 321 | } |
322 | 322 | ||
323 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 323 | static void inode_lru_list_add(struct inode *inode) |
324 | list_move(&inode->i_list, &inode_in_use); | 324 | { |
325 | percpu_counter_dec(&nr_inodes_unused); | 325 | if (list_empty(&inode->i_list)) { |
326 | list_add(&inode->i_list, &inode_unused); | ||
327 | percpu_counter_inc(&nr_inodes_unused); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void inode_lru_list_del(struct inode *inode) | ||
332 | { | ||
333 | if (!list_empty(&inode->i_list)) { | ||
334 | list_del_init(&inode->i_list); | ||
335 | percpu_counter_dec(&nr_inodes_unused); | ||
336 | } | ||
326 | } | 337 | } |
327 | 338 | ||
328 | void end_writeback(struct inode *inode) | 339 | void end_writeback(struct inode *inode) |
@@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head) | |||
367 | struct inode *inode; | 378 | struct inode *inode; |
368 | 379 | ||
369 | inode = list_first_entry(head, struct inode, i_list); | 380 | inode = list_first_entry(head, struct inode, i_list); |
370 | list_del(&inode->i_list); | 381 | list_del_init(&inode->i_list); |
371 | 382 | ||
372 | evict(inode); | 383 | evict(inode); |
373 | 384 | ||
@@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
413 | list_move(&inode->i_list, dispose); | 424 | list_move(&inode->i_list, dispose); |
414 | WARN_ON(inode->i_state & I_NEW); | 425 | WARN_ON(inode->i_state & I_NEW); |
415 | inode->i_state |= I_FREEING; | 426 | inode->i_state |= I_FREEING; |
416 | percpu_counter_dec(&nr_inodes_unused); | 427 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) |
428 | percpu_counter_dec(&nr_inodes_unused); | ||
417 | continue; | 429 | continue; |
418 | } | 430 | } |
419 | busy = 1; | 431 | busy = 1; |
@@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb) | |||
448 | 460 | ||
449 | static int can_unuse(struct inode *inode) | 461 | static int can_unuse(struct inode *inode) |
450 | { | 462 | { |
451 | if (inode->i_state) | 463 | if (inode->i_state & ~I_REFERENCED) |
452 | return 0; | 464 | return 0; |
453 | if (inode_has_buffers(inode)) | 465 | if (inode_has_buffers(inode)) |
454 | return 0; | 466 | return 0; |
@@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode) | |||
460 | } | 472 | } |
461 | 473 | ||
462 | /* | 474 | /* |
463 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to | 475 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
464 | * a temporary list and then are freed outside inode_lock by dispose_list(). | 476 | * temporary list and then are freed outside inode_lock by dispose_list(). |
465 | * | 477 | * |
466 | * Any inodes which are pinned purely because of attached pagecache have their | 478 | * Any inodes which are pinned purely because of attached pagecache have their |
467 | * pagecache removed. We expect the final iput() on that inode to add it to | 479 | * pagecache removed. If the inode has metadata buffers attached to |
468 | * the front of the inode_unused list. So look for it there and if the | 480 | * mapping->private_list then try to remove them. |
469 | * inode is still freeable, proceed. The right inode is found 99.9% of the | ||
470 | * time in testing on a 4-way. | ||
471 | * | 481 | * |
472 | * If the inode has metadata buffers attached to mapping->private_list then | 482 | * If the inode has the I_REFERENCED flag set, then it means that it has been |
473 | * try to remove them. | 483 | * used recently - the flag is set in iput_final(). When we encounter such an |
484 | * inode, clear the flag and move it to the back of the LRU so it gets another | ||
485 | * pass through the LRU before it gets reclaimed. This is necessary because of | ||
486 | * the fact we are doing lazy LRU updates to minimise lock contention so the | ||
487 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | ||
488 | * with this flag set because they are the inodes that are out of order. | ||
474 | */ | 489 | */ |
475 | static void prune_icache(int nr_to_scan) | 490 | static void prune_icache(int nr_to_scan) |
476 | { | 491 | { |
@@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan) | |||
488 | 503 | ||
489 | inode = list_entry(inode_unused.prev, struct inode, i_list); | 504 | inode = list_entry(inode_unused.prev, struct inode, i_list); |
490 | 505 | ||
491 | if (inode->i_state || atomic_read(&inode->i_count)) { | 506 | /* |
507 | * Referenced or dirty inodes are still in use. Give them | ||
508 | * another pass through the LRU as we canot reclaim them now. | ||
509 | */ | ||
510 | if (atomic_read(&inode->i_count) || | ||
511 | (inode->i_state & ~I_REFERENCED)) { | ||
512 | list_del_init(&inode->i_list); | ||
513 | percpu_counter_dec(&nr_inodes_unused); | ||
514 | continue; | ||
515 | } | ||
516 | |||
517 | /* recently referenced inodes get one more pass */ | ||
518 | if (inode->i_state & I_REFERENCED) { | ||
492 | list_move(&inode->i_list, &inode_unused); | 519 | list_move(&inode->i_list, &inode_unused); |
520 | inode->i_state &= ~I_REFERENCED; | ||
493 | continue; | 521 | continue; |
494 | } | 522 | } |
495 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 523 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
@@ -620,7 +648,6 @@ static inline void | |||
620 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, | 648 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, |
621 | struct inode *inode) | 649 | struct inode *inode) |
622 | { | 650 | { |
623 | list_add(&inode->i_list, &inode_in_use); | ||
624 | list_add(&inode->i_sb_list, &sb->s_inodes); | 651 | list_add(&inode->i_sb_list, &sb->s_inodes); |
625 | if (head) | 652 | if (head) |
626 | hlist_add_head(&inode->i_hash, head); | 653 | hlist_add_head(&inode->i_hash, head); |
@@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode) | |||
1237 | drop = generic_drop_inode(inode); | 1264 | drop = generic_drop_inode(inode); |
1238 | 1265 | ||
1239 | if (!drop) { | 1266 | if (!drop) { |
1240 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
1241 | list_move(&inode->i_list, &inode_unused); | ||
1242 | percpu_counter_inc(&nr_inodes_unused); | ||
1243 | if (sb->s_flags & MS_ACTIVE) { | 1267 | if (sb->s_flags & MS_ACTIVE) { |
1268 | inode->i_state |= I_REFERENCED; | ||
1269 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
1270 | inode_lru_list_add(inode); | ||
1271 | } | ||
1244 | spin_unlock(&inode_lock); | 1272 | spin_unlock(&inode_lock); |
1245 | return; | 1273 | return; |
1246 | } | 1274 | } |
@@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode) | |||
1251 | spin_lock(&inode_lock); | 1279 | spin_lock(&inode_lock); |
1252 | WARN_ON(inode->i_state & I_NEW); | 1280 | WARN_ON(inode->i_state & I_NEW); |
1253 | inode->i_state &= ~I_WILL_FREE; | 1281 | inode->i_state &= ~I_WILL_FREE; |
1254 | percpu_counter_dec(&nr_inodes_unused); | ||
1255 | hlist_del_init(&inode->i_hash); | 1282 | hlist_del_init(&inode->i_hash); |
1256 | } | 1283 | } |
1257 | list_del_init(&inode->i_list); | ||
1258 | list_del_init(&inode->i_sb_list); | ||
1259 | WARN_ON(inode->i_state & I_NEW); | 1284 | WARN_ON(inode->i_state & I_NEW); |
1260 | inode->i_state |= I_FREEING; | 1285 | inode->i_state |= I_FREEING; |
1286 | |||
1287 | /* | ||
1288 | * After we delete the inode from the LRU here, we avoid moving dirty | ||
1289 | * inodes back onto the LRU now because I_FREEING is set and hence | ||
1290 | * writeback_single_inode() won't move the inode around. | ||
1291 | */ | ||
1292 | inode_lru_list_del(inode); | ||
1293 | |||
1294 | list_del_init(&inode->i_sb_list); | ||
1261 | spin_unlock(&inode_lock); | 1295 | spin_unlock(&inode_lock); |
1262 | evict(inode); | 1296 | evict(inode); |
1263 | spin_lock(&inode_lock); | 1297 | spin_lock(&inode_lock); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a3937a8ee95e..876275fc0638 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1641,16 +1641,17 @@ struct super_operations { | |||
1641 | * | 1641 | * |
1642 | * Q: What is the difference between I_WILL_FREE and I_FREEING? | 1642 | * Q: What is the difference between I_WILL_FREE and I_FREEING? |
1643 | */ | 1643 | */ |
1644 | #define I_DIRTY_SYNC 1 | 1644 | #define I_DIRTY_SYNC (1 << 0) |
1645 | #define I_DIRTY_DATASYNC 2 | 1645 | #define I_DIRTY_DATASYNC (1 << 1) |
1646 | #define I_DIRTY_PAGES 4 | 1646 | #define I_DIRTY_PAGES (1 << 2) |
1647 | #define __I_NEW 3 | 1647 | #define __I_NEW 3 |
1648 | #define I_NEW (1 << __I_NEW) | 1648 | #define I_NEW (1 << __I_NEW) |
1649 | #define I_WILL_FREE 16 | 1649 | #define I_WILL_FREE (1 << 4) |
1650 | #define I_FREEING 32 | 1650 | #define I_FREEING (1 << 5) |
1651 | #define I_CLEAR 64 | 1651 | #define I_CLEAR (1 << 6) |
1652 | #define __I_SYNC 7 | 1652 | #define __I_SYNC 7 |
1653 | #define I_SYNC (1 << __I_SYNC) | 1653 | #define I_SYNC (1 << __I_SYNC) |
1654 | #define I_REFERENCED (1 << 8) | ||
1654 | 1655 | ||
1655 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) | 1656 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) |
1656 | 1657 | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..242b6f812ba6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -10,8 +10,6 @@ | |||
10 | struct backing_dev_info; | 10 | struct backing_dev_info; |
11 | 11 | ||
12 | extern spinlock_t inode_lock; | 12 | extern spinlock_t inode_lock; |
13 | extern struct list_head inode_in_use; | ||
14 | extern struct list_head inode_unused; | ||
15 | 13 | ||
16 | /* | 14 | /* |
17 | * fs/fs-writeback.c | 15 | * fs/fs-writeback.c |