aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2010-10-20 20:49:30 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2010-10-25 21:26:15 -0400
commit7ccf19a8042e343f8159f8a5fdd6a9422aa90c78 (patch)
tree9a69aaad6eb8992cae06f44dfea8c1d94f2a7f99
parenta5491e0c7bb7387e3e6ff9994d6dc2efc78af56c (diff)
fs: inode split IO and LRU lists
The use of the same inode list structure (inode->i_list) for two different list constructs with different lifecycles and purposes makes it impossible to separate the locking of the different operations. Therefore, to enable the separation of the locking of the writeback and reclaim lists, split the inode->i_list into two separate lists dedicated to their specific tracking functions. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/fs-writeback.c35
-rw-r--r--fs/inode.c53
-rw-r--r--include/linux/fs.h3
-rw-r--r--mm/backing-dev.c6
5 files changed, 58 insertions, 41 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4e847e53051f..dea3b628a6ce 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -59,7 +59,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
59 spin_lock(&inode_lock); 59 spin_lock(&inode_lock);
60 inode->i_data.backing_dev_info = dst; 60 inode->i_data.backing_dev_info = dst;
61 if (inode->i_state & I_DIRTY) 61 if (inode->i_state & I_DIRTY)
62 list_move(&inode->i_list, &dst->wb.b_dirty); 62 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
63 spin_unlock(&inode_lock); 63 spin_unlock(&inode_lock);
64} 64}
65 65
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e8f65290e836..7a24cc957f05 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
79 return sb->s_bdi; 79 return sb->s_bdi;
80} 80}
81 81
82static inline struct inode *wb_inode(struct list_head *head)
83{
84 return list_entry(head, struct inode, i_wb_list);
85}
86
82static void bdi_queue_work(struct backing_dev_info *bdi, 87static void bdi_queue_work(struct backing_dev_info *bdi,
83 struct wb_writeback_work *work) 88 struct wb_writeback_work *work)
84{ 89{
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
172 if (!list_empty(&wb->b_dirty)) { 177 if (!list_empty(&wb->b_dirty)) {
173 struct inode *tail; 178 struct inode *tail;
174 179
175 tail = list_entry(wb->b_dirty.next, struct inode, i_list); 180 tail = wb_inode(wb->b_dirty.next);
176 if (time_before(inode->dirtied_when, tail->dirtied_when)) 181 if (time_before(inode->dirtied_when, tail->dirtied_when))
177 inode->dirtied_when = jiffies; 182 inode->dirtied_when = jiffies;
178 } 183 }
179 list_move(&inode->i_list, &wb->b_dirty); 184 list_move(&inode->i_wb_list, &wb->b_dirty);
180} 185}
181 186
182/* 187/*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
186{ 191{
187 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 192 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
188 193
189 list_move(&inode->i_list, &wb->b_more_io); 194 list_move(&inode->i_wb_list, &wb->b_more_io);
190} 195}
191 196
192static void inode_sync_complete(struct inode *inode) 197static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
227 int do_sb_sort = 0; 232 int do_sb_sort = 0;
228 233
229 while (!list_empty(delaying_queue)) { 234 while (!list_empty(delaying_queue)) {
230 inode = list_entry(delaying_queue->prev, struct inode, i_list); 235 inode = wb_inode(delaying_queue->prev);
231 if (older_than_this && 236 if (older_than_this &&
232 inode_dirtied_after(inode, *older_than_this)) 237 inode_dirtied_after(inode, *older_than_this))
233 break; 238 break;
234 if (sb && sb != inode->i_sb) 239 if (sb && sb != inode->i_sb)
235 do_sb_sort = 1; 240 do_sb_sort = 1;
236 sb = inode->i_sb; 241 sb = inode->i_sb;
237 list_move(&inode->i_list, &tmp); 242 list_move(&inode->i_wb_list, &tmp);
238 } 243 }
239 244
240 /* just one sb in list, splice to dispatch_queue and we're done */ 245 /* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
245 250
246 /* Move inodes from one superblock together */ 251 /* Move inodes from one superblock together */
247 while (!list_empty(&tmp)) { 252 while (!list_empty(&tmp)) {
248 inode = list_entry(tmp.prev, struct inode, i_list); 253 sb = wb_inode(tmp.prev)->i_sb;
249 sb = inode->i_sb;
250 list_for_each_prev_safe(pos, node, &tmp) { 254 list_for_each_prev_safe(pos, node, &tmp) {
251 inode = list_entry(pos, struct inode, i_list); 255 inode = wb_inode(pos);
252 if (inode->i_sb == sb) 256 if (inode->i_sb == sb)
253 list_move(&inode->i_list, dispatch_queue); 257 list_move(&inode->i_wb_list, dispatch_queue);
254 } 258 }
255 } 259 }
256} 260}
@@ -414,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
414 * a reference to the inode or it's on it's way out. 418 * a reference to the inode or it's on it's way out.
415 * No need to add it back to the LRU. 419 * No need to add it back to the LRU.
416 */ 420 */
417 list_del_init(&inode->i_list); 421 list_del_init(&inode->i_wb_list);
418 } 422 }
419 } 423 }
420 inode_sync_complete(inode); 424 inode_sync_complete(inode);
@@ -462,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
462{ 466{
463 while (!list_empty(&wb->b_io)) { 467 while (!list_empty(&wb->b_io)) {
464 long pages_skipped; 468 long pages_skipped;
465 struct inode *inode = list_entry(wb->b_io.prev, 469 struct inode *inode = wb_inode(wb->b_io.prev);
466 struct inode, i_list);
467 470
468 if (inode->i_sb != sb) { 471 if (inode->i_sb != sb) {
469 if (only_this_sb) { 472 if (only_this_sb) {
@@ -533,8 +536,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
533 queue_io(wb, wbc->older_than_this); 536 queue_io(wb, wbc->older_than_this);
534 537
535 while (!list_empty(&wb->b_io)) { 538 while (!list_empty(&wb->b_io)) {
536 struct inode *inode = list_entry(wb->b_io.prev, 539 struct inode *inode = wb_inode(wb->b_io.prev);
537 struct inode, i_list);
538 struct super_block *sb = inode->i_sb; 540 struct super_block *sb = inode->i_sb;
539 541
540 if (!pin_sb_for_writeback(sb)) { 542 if (!pin_sb_for_writeback(sb)) {
@@ -672,8 +674,7 @@ static long wb_writeback(struct bdi_writeback *wb,
672 */ 674 */
673 spin_lock(&inode_lock); 675 spin_lock(&inode_lock);
674 if (!list_empty(&wb->b_more_io)) { 676 if (!list_empty(&wb->b_more_io)) {
675 inode = list_entry(wb->b_more_io.prev, 677 inode = wb_inode(wb->b_more_io.prev);
676 struct inode, i_list);
677 trace_wbc_writeback_wait(&wbc, wb->bdi); 678 trace_wbc_writeback_wait(&wbc, wb->bdi);
678 inode_wait_for_writeback(inode); 679 inode_wait_for_writeback(inode);
679 } 680 }
@@ -987,7 +988,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
987 } 988 }
988 989
989 inode->dirtied_when = jiffies; 990 inode->dirtied_when = jiffies;
990 list_move(&inode->i_list, &bdi->wb.b_dirty); 991 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
991 } 992 }
992 } 993 }
993out: 994out:
diff --git a/fs/inode.c b/fs/inode.c
index 4bedac32154f..09e2d7a5f1d2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -71,7 +71,7 @@ static unsigned int i_hash_shift __read_mostly;
71 * allowing for low-overhead inode sync() operations. 71 * allowing for low-overhead inode sync() operations.
72 */ 72 */
73 73
74static LIST_HEAD(inode_unused); 74static LIST_HEAD(inode_lru);
75static struct hlist_head *inode_hashtable __read_mostly; 75static struct hlist_head *inode_hashtable __read_mostly;
76 76
77/* 77/*
@@ -271,6 +271,7 @@ EXPORT_SYMBOL(__destroy_inode);
271 271
272static void destroy_inode(struct inode *inode) 272static void destroy_inode(struct inode *inode)
273{ 273{
274 BUG_ON(!list_empty(&inode->i_lru));
274 __destroy_inode(inode); 275 __destroy_inode(inode);
275 if (inode->i_sb->s_op->destroy_inode) 276 if (inode->i_sb->s_op->destroy_inode)
276 inode->i_sb->s_op->destroy_inode(inode); 277 inode->i_sb->s_op->destroy_inode(inode);
@@ -289,7 +290,8 @@ void inode_init_once(struct inode *inode)
289 INIT_HLIST_NODE(&inode->i_hash); 290 INIT_HLIST_NODE(&inode->i_hash);
290 INIT_LIST_HEAD(&inode->i_dentry); 291 INIT_LIST_HEAD(&inode->i_dentry);
291 INIT_LIST_HEAD(&inode->i_devices); 292 INIT_LIST_HEAD(&inode->i_devices);
292 INIT_LIST_HEAD(&inode->i_list); 293 INIT_LIST_HEAD(&inode->i_wb_list);
294 INIT_LIST_HEAD(&inode->i_lru);
293 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 295 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
294 spin_lock_init(&inode->i_data.tree_lock); 296 spin_lock_init(&inode->i_data.tree_lock);
295 spin_lock_init(&inode->i_data.i_mmap_lock); 297 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -330,16 +332,16 @@ EXPORT_SYMBOL(ihold);
330 332
331static void inode_lru_list_add(struct inode *inode) 333static void inode_lru_list_add(struct inode *inode)
332{ 334{
333 if (list_empty(&inode->i_list)) { 335 if (list_empty(&inode->i_lru)) {
334 list_add(&inode->i_list, &inode_unused); 336 list_add(&inode->i_lru, &inode_lru);
335 percpu_counter_inc(&nr_inodes_unused); 337 percpu_counter_inc(&nr_inodes_unused);
336 } 338 }
337} 339}
338 340
339static void inode_lru_list_del(struct inode *inode) 341static void inode_lru_list_del(struct inode *inode)
340{ 342{
341 if (!list_empty(&inode->i_list)) { 343 if (!list_empty(&inode->i_lru)) {
342 list_del_init(&inode->i_list); 344 list_del_init(&inode->i_lru);
343 percpu_counter_dec(&nr_inodes_unused); 345 percpu_counter_dec(&nr_inodes_unused);
344 } 346 }
345} 347}
@@ -460,8 +462,8 @@ static void dispose_list(struct list_head *head)
460 while (!list_empty(head)) { 462 while (!list_empty(head)) {
461 struct inode *inode; 463 struct inode *inode;
462 464
463 inode = list_first_entry(head, struct inode, i_list); 465 inode = list_first_entry(head, struct inode, i_lru);
464 list_del_init(&inode->i_list); 466 list_del_init(&inode->i_lru);
465 467
466 evict(inode); 468 evict(inode);
467 469
@@ -507,8 +509,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
507 continue; 509 continue;
508 } 510 }
509 511
510 list_move(&inode->i_list, dispose);
511 inode->i_state |= I_FREEING; 512 inode->i_state |= I_FREEING;
513
514 /*
515 * Move the inode off the IO lists and LRU once I_FREEING is
516 * set so that it won't get moved back on there if it is dirty.
517 */
518 list_move(&inode->i_lru, dispose);
519 list_del_init(&inode->i_wb_list);
512 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 520 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
513 percpu_counter_dec(&nr_inodes_unused); 521 percpu_counter_dec(&nr_inodes_unused);
514 } 522 }
@@ -580,10 +588,10 @@ static void prune_icache(int nr_to_scan)
580 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 588 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
581 struct inode *inode; 589 struct inode *inode;
582 590
583 if (list_empty(&inode_unused)) 591 if (list_empty(&inode_lru))
584 break; 592 break;
585 593
586 inode = list_entry(inode_unused.prev, struct inode, i_list); 594 inode = list_entry(inode_lru.prev, struct inode, i_lru);
587 595
588 /* 596 /*
589 * Referenced or dirty inodes are still in use. Give them 597 * Referenced or dirty inodes are still in use. Give them
@@ -591,14 +599,14 @@ static void prune_icache(int nr_to_scan)
591 */ 599 */
592 if (atomic_read(&inode->i_count) || 600 if (atomic_read(&inode->i_count) ||
593 (inode->i_state & ~I_REFERENCED)) { 601 (inode->i_state & ~I_REFERENCED)) {
594 list_del_init(&inode->i_list); 602 list_del_init(&inode->i_lru);
595 percpu_counter_dec(&nr_inodes_unused); 603 percpu_counter_dec(&nr_inodes_unused);
596 continue; 604 continue;
597 } 605 }
598 606
599 /* recently referenced inodes get one more pass */ 607 /* recently referenced inodes get one more pass */
600 if (inode->i_state & I_REFERENCED) { 608 if (inode->i_state & I_REFERENCED) {
601 list_move(&inode->i_list, &inode_unused); 609 list_move(&inode->i_lru, &inode_lru);
602 inode->i_state &= ~I_REFERENCED; 610 inode->i_state &= ~I_REFERENCED;
603 continue; 611 continue;
604 } 612 }
@@ -611,15 +619,21 @@ static void prune_icache(int nr_to_scan)
611 iput(inode); 619 iput(inode);
612 spin_lock(&inode_lock); 620 spin_lock(&inode_lock);
613 621
614 if (inode != list_entry(inode_unused.next, 622 if (inode != list_entry(inode_lru.next,
615 struct inode, i_list)) 623 struct inode, i_lru))
616 continue; /* wrong inode or list_empty */ 624 continue; /* wrong inode or list_empty */
617 if (!can_unuse(inode)) 625 if (!can_unuse(inode))
618 continue; 626 continue;
619 } 627 }
620 list_move(&inode->i_list, &freeable);
621 WARN_ON(inode->i_state & I_NEW); 628 WARN_ON(inode->i_state & I_NEW);
622 inode->i_state |= I_FREEING; 629 inode->i_state |= I_FREEING;
630
631 /*
632 * Move the inode off the IO lists and LRU once I_FREEING is
633 * set so that it won't get moved back on there if it is dirty.
634 */
635 list_move(&inode->i_lru, &freeable);
636 list_del_init(&inode->i_wb_list);
623 percpu_counter_dec(&nr_inodes_unused); 637 percpu_counter_dec(&nr_inodes_unused);
624 } 638 }
625 if (current_is_kswapd()) 639 if (current_is_kswapd())
@@ -1340,15 +1354,16 @@ static void iput_final(struct inode *inode)
1340 inode->i_state &= ~I_WILL_FREE; 1354 inode->i_state &= ~I_WILL_FREE;
1341 __remove_inode_hash(inode); 1355 __remove_inode_hash(inode);
1342 } 1356 }
1357
1343 WARN_ON(inode->i_state & I_NEW); 1358 WARN_ON(inode->i_state & I_NEW);
1344 inode->i_state |= I_FREEING; 1359 inode->i_state |= I_FREEING;
1345 1360
1346 /* 1361 /*
1347 * After we delete the inode from the LRU here, we avoid moving dirty 1362 * Move the inode off the IO lists and LRU once I_FREEING is
1348 * inodes back onto the LRU now because I_FREEING is set and hence 1363 * set so that it won't get moved back on there if it is dirty.
1349 * writeback_single_inode() won't move the inode around.
1350 */ 1364 */
1351 inode_lru_list_del(inode); 1365 inode_lru_list_del(inode);
1366 list_del_init(&inode->i_wb_list);
1352 1367
1353 __inode_sb_list_del(inode); 1368 __inode_sb_list_del(inode);
1354 spin_unlock(&inode_lock); 1369 spin_unlock(&inode_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d58059944801..f300a6508818 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -723,7 +723,8 @@ struct posix_acl;
723 723
724struct inode { 724struct inode {
725 struct hlist_node i_hash; 725 struct hlist_node i_hash;
726 struct list_head i_list; /* backing dev IO list */ 726 struct list_head i_wb_list; /* backing dev IO list */
727 struct list_head i_lru; /* inode LRU list */
727 struct list_head i_sb_list; 728 struct list_head i_sb_list;
728 struct list_head i_dentry; 729 struct list_head i_dentry;
729 unsigned long i_ino; 730 unsigned long i_ino;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 65d420499a61..15d5097de821 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
74 74
75 nr_wb = nr_dirty = nr_io = nr_more_io = 0; 75 nr_wb = nr_dirty = nr_io = nr_more_io = 0;
76 spin_lock(&inode_lock); 76 spin_lock(&inode_lock);
77 list_for_each_entry(inode, &wb->b_dirty, i_list) 77 list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
78 nr_dirty++; 78 nr_dirty++;
79 list_for_each_entry(inode, &wb->b_io, i_list) 79 list_for_each_entry(inode, &wb->b_io, i_wb_list)
80 nr_io++; 80 nr_io++;
81 list_for_each_entry(inode, &wb->b_more_io, i_list) 81 list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
82 nr_more_io++; 82 nr_more_io++;
83 spin_unlock(&inode_lock); 83 spin_unlock(&inode_lock);
84 84