aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/fs-writeback.c11
-rw-r--r--fs/inode.c86
-rw-r--r--include/linux/fs.h13
-rw-r--r--include/linux/writeback.h2
4 files changed, 71 insertions, 41 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f04d04af84f2..e8f65290e836 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -408,16 +408,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
408 * completion. 408 * completion.
409 */ 409 */
410 redirty_tail(inode); 410 redirty_tail(inode);
411 } else if (atomic_read(&inode->i_count)) {
412 /*
413 * The inode is clean, inuse
414 */
415 list_move(&inode->i_list, &inode_in_use);
416 } else { 411 } else {
417 /* 412 /*
418 * The inode is clean, unused 413 * The inode is clean. At this point we either have
414 * a reference to the inode or it's on it's way out.
415 * No need to add it back to the LRU.
419 */ 416 */
420 list_move(&inode->i_list, &inode_unused); 417 list_del_init(&inode->i_list);
421 } 418 }
422 } 419 }
423 inode_sync_complete(inode); 420 inode_sync_complete(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 0d5aeccbdd90..3bdc76f1653a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
72 * allowing for low-overhead inode sync() operations. 72 * allowing for low-overhead inode sync() operations.
73 */ 73 */
74 74
75LIST_HEAD(inode_in_use); 75static LIST_HEAD(inode_unused);
76LIST_HEAD(inode_unused);
77static struct hlist_head *inode_hashtable __read_mostly; 76static struct hlist_head *inode_hashtable __read_mostly;
78 77
79/* 78/*
@@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode)
291 INIT_HLIST_NODE(&inode->i_hash); 290 INIT_HLIST_NODE(&inode->i_hash);
292 INIT_LIST_HEAD(&inode->i_dentry); 291 INIT_LIST_HEAD(&inode->i_dentry);
293 INIT_LIST_HEAD(&inode->i_devices); 292 INIT_LIST_HEAD(&inode->i_devices);
293 INIT_LIST_HEAD(&inode->i_list);
294 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 294 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
295 spin_lock_init(&inode->i_data.tree_lock); 295 spin_lock_init(&inode->i_data.tree_lock);
296 spin_lock_init(&inode->i_data.i_mmap_lock); 296 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -317,12 +317,23 @@ static void init_once(void *foo)
317 */ 317 */
318void __iget(struct inode *inode) 318void __iget(struct inode *inode)
319{ 319{
320 if (atomic_inc_return(&inode->i_count) != 1) 320 atomic_inc(&inode->i_count);
321 return; 321}
322 322
323 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 323static void inode_lru_list_add(struct inode *inode)
324 list_move(&inode->i_list, &inode_in_use); 324{
325 percpu_counter_dec(&nr_inodes_unused); 325 if (list_empty(&inode->i_list)) {
326 list_add(&inode->i_list, &inode_unused);
327 percpu_counter_inc(&nr_inodes_unused);
328 }
329}
330
331static void inode_lru_list_del(struct inode *inode)
332{
333 if (!list_empty(&inode->i_list)) {
334 list_del_init(&inode->i_list);
335 percpu_counter_dec(&nr_inodes_unused);
336 }
326} 337}
327 338
328void end_writeback(struct inode *inode) 339void end_writeback(struct inode *inode)
@@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head)
367 struct inode *inode; 378 struct inode *inode;
368 379
369 inode = list_first_entry(head, struct inode, i_list); 380 inode = list_first_entry(head, struct inode, i_list);
370 list_del(&inode->i_list); 381 list_del_init(&inode->i_list);
371 382
372 evict(inode); 383 evict(inode);
373 384
@@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
413 list_move(&inode->i_list, dispose); 424 list_move(&inode->i_list, dispose);
414 WARN_ON(inode->i_state & I_NEW); 425 WARN_ON(inode->i_state & I_NEW);
415 inode->i_state |= I_FREEING; 426 inode->i_state |= I_FREEING;
416 percpu_counter_dec(&nr_inodes_unused); 427 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
428 percpu_counter_dec(&nr_inodes_unused);
417 continue; 429 continue;
418 } 430 }
419 busy = 1; 431 busy = 1;
@@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb)
448 460
449static int can_unuse(struct inode *inode) 461static int can_unuse(struct inode *inode)
450{ 462{
451 if (inode->i_state) 463 if (inode->i_state & ~I_REFERENCED)
452 return 0; 464 return 0;
453 if (inode_has_buffers(inode)) 465 if (inode_has_buffers(inode))
454 return 0; 466 return 0;
@@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode)
460} 472}
461 473
462/* 474/*
463 * Scan `goal' inodes on the unused list for freeable ones. They are moved to 475 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
464 * a temporary list and then are freed outside inode_lock by dispose_list(). 476 * temporary list and then are freed outside inode_lock by dispose_list().
465 * 477 *
466 * Any inodes which are pinned purely because of attached pagecache have their 478 * Any inodes which are pinned purely because of attached pagecache have their
467 * pagecache removed. We expect the final iput() on that inode to add it to 479 * pagecache removed. If the inode has metadata buffers attached to
468 * the front of the inode_unused list. So look for it there and if the 480 * mapping->private_list then try to remove them.
469 * inode is still freeable, proceed. The right inode is found 99.9% of the
470 * time in testing on a 4-way.
471 * 481 *
472 * If the inode has metadata buffers attached to mapping->private_list then 482 * If the inode has the I_REFERENCED flag set, then it means that it has been
473 * try to remove them. 483 * used recently - the flag is set in iput_final(). When we encounter such an
484 * inode, clear the flag and move it to the back of the LRU so it gets another
485 * pass through the LRU before it gets reclaimed. This is necessary because of
486 * the fact we are doing lazy LRU updates to minimise lock contention so the
487 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
488 * with this flag set because they are the inodes that are out of order.
474 */ 489 */
475static void prune_icache(int nr_to_scan) 490static void prune_icache(int nr_to_scan)
476{ 491{
@@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan)
488 503
489 inode = list_entry(inode_unused.prev, struct inode, i_list); 504 inode = list_entry(inode_unused.prev, struct inode, i_list);
490 505
491 if (inode->i_state || atomic_read(&inode->i_count)) { 506 /*
507 * Referenced or dirty inodes are still in use. Give them
508 * another pass through the LRU as we canot reclaim them now.
509 */
510 if (atomic_read(&inode->i_count) ||
511 (inode->i_state & ~I_REFERENCED)) {
512 list_del_init(&inode->i_list);
513 percpu_counter_dec(&nr_inodes_unused);
514 continue;
515 }
516
517 /* recently referenced inodes get one more pass */
518 if (inode->i_state & I_REFERENCED) {
492 list_move(&inode->i_list, &inode_unused); 519 list_move(&inode->i_list, &inode_unused);
520 inode->i_state &= ~I_REFERENCED;
493 continue; 521 continue;
494 } 522 }
495 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 523 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -620,7 +648,6 @@ static inline void
620__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, 648__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
621 struct inode *inode) 649 struct inode *inode)
622{ 650{
623 list_add(&inode->i_list, &inode_in_use);
624 list_add(&inode->i_sb_list, &sb->s_inodes); 651 list_add(&inode->i_sb_list, &sb->s_inodes);
625 if (head) 652 if (head)
626 hlist_add_head(&inode->i_hash, head); 653 hlist_add_head(&inode->i_hash, head);
@@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode)
1237 drop = generic_drop_inode(inode); 1264 drop = generic_drop_inode(inode);
1238 1265
1239 if (!drop) { 1266 if (!drop) {
1240 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1241 list_move(&inode->i_list, &inode_unused);
1242 percpu_counter_inc(&nr_inodes_unused);
1243 if (sb->s_flags & MS_ACTIVE) { 1267 if (sb->s_flags & MS_ACTIVE) {
1268 inode->i_state |= I_REFERENCED;
1269 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1270 inode_lru_list_add(inode);
1271 }
1244 spin_unlock(&inode_lock); 1272 spin_unlock(&inode_lock);
1245 return; 1273 return;
1246 } 1274 }
@@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode)
1251 spin_lock(&inode_lock); 1279 spin_lock(&inode_lock);
1252 WARN_ON(inode->i_state & I_NEW); 1280 WARN_ON(inode->i_state & I_NEW);
1253 inode->i_state &= ~I_WILL_FREE; 1281 inode->i_state &= ~I_WILL_FREE;
1254 percpu_counter_dec(&nr_inodes_unused);
1255 hlist_del_init(&inode->i_hash); 1282 hlist_del_init(&inode->i_hash);
1256 } 1283 }
1257 list_del_init(&inode->i_list);
1258 list_del_init(&inode->i_sb_list);
1259 WARN_ON(inode->i_state & I_NEW); 1284 WARN_ON(inode->i_state & I_NEW);
1260 inode->i_state |= I_FREEING; 1285 inode->i_state |= I_FREEING;
1286
1287 /*
1288 * After we delete the inode from the LRU here, we avoid moving dirty
1289 * inodes back onto the LRU now because I_FREEING is set and hence
1290 * writeback_single_inode() won't move the inode around.
1291 */
1292 inode_lru_list_del(inode);
1293
1294 list_del_init(&inode->i_sb_list);
1261 spin_unlock(&inode_lock); 1295 spin_unlock(&inode_lock);
1262 evict(inode); 1296 evict(inode);
1263 spin_lock(&inode_lock); 1297 spin_lock(&inode_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a3937a8ee95e..876275fc0638 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1641,16 +1641,17 @@ struct super_operations {
1641 * 1641 *
1642 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1642 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1643 */ 1643 */
1644#define I_DIRTY_SYNC 1 1644#define I_DIRTY_SYNC (1 << 0)
1645#define I_DIRTY_DATASYNC 2 1645#define I_DIRTY_DATASYNC (1 << 1)
1646#define I_DIRTY_PAGES 4 1646#define I_DIRTY_PAGES (1 << 2)
1647#define __I_NEW 3 1647#define __I_NEW 3
1648#define I_NEW (1 << __I_NEW) 1648#define I_NEW (1 << __I_NEW)
1649#define I_WILL_FREE 16 1649#define I_WILL_FREE (1 << 4)
1650#define I_FREEING 32 1650#define I_FREEING (1 << 5)
1651#define I_CLEAR 64 1651#define I_CLEAR (1 << 6)
1652#define __I_SYNC 7 1652#define __I_SYNC 7
1653#define I_SYNC (1 << __I_SYNC) 1653#define I_SYNC (1 << __I_SYNC)
1654#define I_REFERENCED (1 << 8)
1654 1655
1655#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1656#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1656 1657
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 72a5d647a5f2..242b6f812ba6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,8 +10,6 @@
10struct backing_dev_info; 10struct backing_dev_info;
11 11
12extern spinlock_t inode_lock; 12extern spinlock_t inode_lock;
13extern struct list_head inode_in_use;
14extern struct list_head inode_unused;
15 13
16/* 14/*
17 * fs/fs-writeback.c 15 * fs/fs-writeback.c