aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/drop_caches.c9
-rw-r--r--fs/fs-writeback.c44
-rw-r--r--fs/inode.c150
-rw-r--r--fs/notify/inode_mark.c21
-rw-r--r--fs/quota/dquot.c13
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/quotaops.h2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/rmap.c1
11 files changed, 174 insertions, 74 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 889287019599..bc39b18cf3d0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -56,9 +56,11 @@ static void bdev_inode_switch_bdi(struct inode *inode,
56 struct backing_dev_info *dst) 56 struct backing_dev_info *dst)
57{ 57{
58 spin_lock(&inode_lock); 58 spin_lock(&inode_lock);
59 spin_lock(&inode->i_lock);
59 inode->i_data.backing_dev_info = dst; 60 inode->i_data.backing_dev_info = dst;
60 if (inode->i_state & I_DIRTY) 61 if (inode->i_state & I_DIRTY)
61 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 62 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
63 spin_unlock(&inode->i_lock);
62 spin_unlock(&inode_lock); 64 spin_unlock(&inode_lock);
63} 65}
64 66
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2caf..da666f3148f9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1144,7 +1144,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1144 * inode list. 1144 * inode list.
1145 * 1145 *
1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
1147 * mapping->tree_lock and the global inode_lock. 1147 * mapping->tree_lock and mapping->host->i_lock.
1148 */ 1148 */
1149void mark_buffer_dirty(struct buffer_head *bh) 1149void mark_buffer_dirty(struct buffer_head *bh)
1150{ 1150{
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 816f88e6b9ce..6c6f73ba0868 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -18,11 +18,14 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 21 spin_lock(&inode->i_lock);
22 continue; 22 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
23 if (inode->i_mapping->nrpages == 0) 23 (inode->i_mapping->nrpages == 0)) {
24 spin_unlock(&inode->i_lock);
24 continue; 25 continue;
26 }
25 __iget(inode); 27 __iget(inode);
28 spin_unlock(&inode->i_lock);
26 spin_unlock(&inode_lock); 29 spin_unlock(&inode_lock);
27 invalidate_mapping_pages(inode->i_mapping, 0, -1); 30 invalidate_mapping_pages(inode->i_mapping, 0, -1);
28 iput(toput_inode); 31 iput(toput_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59c6e4956786..efd1ebe879cc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -306,10 +306,12 @@ static void inode_wait_for_writeback(struct inode *inode)
306 wait_queue_head_t *wqh; 306 wait_queue_head_t *wqh;
307 307
308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
309 while (inode->i_state & I_SYNC) { 309 while (inode->i_state & I_SYNC) {
310 spin_unlock(&inode->i_lock);
310 spin_unlock(&inode_lock); 311 spin_unlock(&inode_lock);
311 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 312 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
312 spin_lock(&inode_lock); 313 spin_lock(&inode_lock);
314 spin_lock(&inode->i_lock);
313 } 315 }
314} 316}
315 317
@@ -333,6 +335,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
333 unsigned dirty; 335 unsigned dirty;
334 int ret; 336 int ret;
335 337
338 spin_lock(&inode->i_lock);
336 if (!atomic_read(&inode->i_count)) 339 if (!atomic_read(&inode->i_count))
337 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 340 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
338 else 341 else
@@ -348,6 +351,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
348 * completed a full scan of b_io. 351 * completed a full scan of b_io.
349 */ 352 */
350 if (wbc->sync_mode != WB_SYNC_ALL) { 353 if (wbc->sync_mode != WB_SYNC_ALL) {
354 spin_unlock(&inode->i_lock);
351 requeue_io(inode); 355 requeue_io(inode);
352 return 0; 356 return 0;
353 } 357 }
@@ -363,6 +367,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
363 /* Set I_SYNC, reset I_DIRTY_PAGES */ 367 /* Set I_SYNC, reset I_DIRTY_PAGES */
364 inode->i_state |= I_SYNC; 368 inode->i_state |= I_SYNC;
365 inode->i_state &= ~I_DIRTY_PAGES; 369 inode->i_state &= ~I_DIRTY_PAGES;
370 spin_unlock(&inode->i_lock);
366 spin_unlock(&inode_lock); 371 spin_unlock(&inode_lock);
367 372
368 ret = do_writepages(mapping, wbc); 373 ret = do_writepages(mapping, wbc);
@@ -384,8 +389,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
384 * write_inode() 389 * write_inode()
385 */ 390 */
386 spin_lock(&inode_lock); 391 spin_lock(&inode_lock);
392 spin_lock(&inode->i_lock);
387 dirty = inode->i_state & I_DIRTY; 393 dirty = inode->i_state & I_DIRTY;
388 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 394 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
395 spin_unlock(&inode->i_lock);
389 spin_unlock(&inode_lock); 396 spin_unlock(&inode_lock);
390 /* Don't write the inode if only I_DIRTY_PAGES was set */ 397 /* Don't write the inode if only I_DIRTY_PAGES was set */
391 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 398 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
@@ -395,6 +402,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
395 } 402 }
396 403
397 spin_lock(&inode_lock); 404 spin_lock(&inode_lock);
405 spin_lock(&inode->i_lock);
398 inode->i_state &= ~I_SYNC; 406 inode->i_state &= ~I_SYNC;
399 if (!(inode->i_state & I_FREEING)) { 407 if (!(inode->i_state & I_FREEING)) {
400 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 408 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -436,6 +444,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
436 } 444 }
437 } 445 }
438 inode_sync_complete(inode); 446 inode_sync_complete(inode);
447 spin_unlock(&inode->i_lock);
439 return ret; 448 return ret;
440} 449}
441 450
@@ -506,7 +515,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
506 * kind does not need peridic writeout yet, and for the latter 515 * kind does not need peridic writeout yet, and for the latter
507 * kind writeout is handled by the freer. 516 * kind writeout is handled by the freer.
508 */ 517 */
518 spin_lock(&inode->i_lock);
509 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 519 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
520 spin_unlock(&inode->i_lock);
510 requeue_io(inode); 521 requeue_io(inode);
511 continue; 522 continue;
512 } 523 }
@@ -515,10 +526,14 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
515 * Was this inode dirtied after sync_sb_inodes was called? 526 * Was this inode dirtied after sync_sb_inodes was called?
516 * This keeps sync from extra jobs and livelock. 527 * This keeps sync from extra jobs and livelock.
517 */ 528 */
518 if (inode_dirtied_after(inode, wbc->wb_start)) 529 if (inode_dirtied_after(inode, wbc->wb_start)) {
530 spin_unlock(&inode->i_lock);
519 return 1; 531 return 1;
532 }
520 533
521 __iget(inode); 534 __iget(inode);
535 spin_unlock(&inode->i_lock);
536
522 pages_skipped = wbc->pages_skipped; 537 pages_skipped = wbc->pages_skipped;
523 writeback_single_inode(inode, wbc); 538 writeback_single_inode(inode, wbc);
524 if (wbc->pages_skipped != pages_skipped) { 539 if (wbc->pages_skipped != pages_skipped) {
@@ -724,7 +739,9 @@ static long wb_writeback(struct bdi_writeback *wb,
724 if (!list_empty(&wb->b_more_io)) { 739 if (!list_empty(&wb->b_more_io)) {
725 inode = wb_inode(wb->b_more_io.prev); 740 inode = wb_inode(wb->b_more_io.prev);
726 trace_wbc_writeback_wait(&wbc, wb->bdi); 741 trace_wbc_writeback_wait(&wbc, wb->bdi);
742 spin_lock(&inode->i_lock);
727 inode_wait_for_writeback(inode); 743 inode_wait_for_writeback(inode);
744 spin_unlock(&inode->i_lock);
728 } 745 }
729 spin_unlock(&inode_lock); 746 spin_unlock(&inode_lock);
730 } 747 }
@@ -1017,6 +1034,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1017 block_dump___mark_inode_dirty(inode); 1034 block_dump___mark_inode_dirty(inode);
1018 1035
1019 spin_lock(&inode_lock); 1036 spin_lock(&inode_lock);
1037 spin_lock(&inode->i_lock);
1020 if ((inode->i_state & flags) != flags) { 1038 if ((inode->i_state & flags) != flags) {
1021 const int was_dirty = inode->i_state & I_DIRTY; 1039 const int was_dirty = inode->i_state & I_DIRTY;
1022 1040
@@ -1028,7 +1046,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1028 * superblock list, based upon its state. 1046 * superblock list, based upon its state.
1029 */ 1047 */
1030 if (inode->i_state & I_SYNC) 1048 if (inode->i_state & I_SYNC)
1031 goto out; 1049 goto out_unlock_inode;
1032 1050
1033 /* 1051 /*
1034 * Only add valid (hashed) inodes to the superblock's 1052 * Only add valid (hashed) inodes to the superblock's
@@ -1036,11 +1054,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1036 */ 1054 */
1037 if (!S_ISBLK(inode->i_mode)) { 1055 if (!S_ISBLK(inode->i_mode)) {
1038 if (inode_unhashed(inode)) 1056 if (inode_unhashed(inode))
1039 goto out; 1057 goto out_unlock_inode;
1040 } 1058 }
1041 if (inode->i_state & I_FREEING) 1059 if (inode->i_state & I_FREEING)
1042 goto out; 1060 goto out_unlock_inode;
1043 1061
1062 spin_unlock(&inode->i_lock);
1044 /* 1063 /*
1045 * If the inode was already on b_dirty/b_io/b_more_io, don't 1064 * If the inode was already on b_dirty/b_io/b_more_io, don't
1046 * reposition it (that would break b_dirty time-ordering). 1065 * reposition it (that would break b_dirty time-ordering).
@@ -1065,7 +1084,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1065 inode->dirtied_when = jiffies; 1084 inode->dirtied_when = jiffies;
1066 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1085 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1067 } 1086 }
1087 goto out;
1068 } 1088 }
1089out_unlock_inode:
1090 spin_unlock(&inode->i_lock);
1069out: 1091out:
1070 spin_unlock(&inode_lock); 1092 spin_unlock(&inode_lock);
1071 1093
@@ -1111,14 +1133,16 @@ static void wait_sb_inodes(struct super_block *sb)
1111 * we still have to wait for that writeout. 1133 * we still have to wait for that writeout.
1112 */ 1134 */
1113 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1135 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1114 struct address_space *mapping; 1136 struct address_space *mapping = inode->i_mapping;
1115 1137
1116 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 1138 spin_lock(&inode->i_lock);
1117 continue; 1139 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
1118 mapping = inode->i_mapping; 1140 (mapping->nrpages == 0)) {
1119 if (mapping->nrpages == 0) 1141 spin_unlock(&inode->i_lock);
1120 continue; 1142 continue;
1143 }
1121 __iget(inode); 1144 __iget(inode);
1145 spin_unlock(&inode->i_lock);
1122 spin_unlock(&inode_lock); 1146 spin_unlock(&inode_lock);
1123 /* 1147 /*
1124 * We hold a reference to 'inode' so it couldn't have 1148 * We hold a reference to 'inode' so it couldn't have
diff --git a/fs/inode.c b/fs/inode.c
index 0b3da4a77704..14b12c4ee026 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -28,6 +28,17 @@
28#include <linux/cred.h> 28#include <linux/cred.h>
29 29
30/* 30/*
31 * inode locking rules.
32 *
33 * inode->i_lock protects:
34 * inode->i_state, inode->i_hash, __iget()
35 *
36 * Lock ordering:
37 * inode_lock
38 * inode->i_lock
39 */
40
41/*
31 * This is needed for the following functions: 42 * This is needed for the following functions:
32 * - inode_has_buffers 43 * - inode_has_buffers
33 * - invalidate_bdev 44 * - invalidate_bdev
@@ -137,15 +148,6 @@ int proc_nr_inodes(ctl_table *table, int write,
137} 148}
138#endif 149#endif
139 150
140static void wake_up_inode(struct inode *inode)
141{
142 /*
143 * Prevent speculative execution through spin_unlock(&inode_lock);
144 */
145 smp_mb();
146 wake_up_bit(&inode->i_state, __I_NEW);
147}
148
149/** 151/**
150 * inode_init_always - perform inode structure intialisation 152 * inode_init_always - perform inode structure intialisation
151 * @sb: superblock inode belongs to 153 * @sb: superblock inode belongs to
@@ -336,7 +338,7 @@ static void init_once(void *foo)
336} 338}
337 339
338/* 340/*
339 * inode_lock must be held 341 * inode->i_lock must be held
340 */ 342 */
341void __iget(struct inode *inode) 343void __iget(struct inode *inode)
342{ 344{
@@ -413,7 +415,9 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
413 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 415 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
414 416
415 spin_lock(&inode_lock); 417 spin_lock(&inode_lock);
418 spin_lock(&inode->i_lock);
416 hlist_add_head(&inode->i_hash, b); 419 hlist_add_head(&inode->i_hash, b);
420 spin_unlock(&inode->i_lock);
417 spin_unlock(&inode_lock); 421 spin_unlock(&inode_lock);
418} 422}
419EXPORT_SYMBOL(__insert_inode_hash); 423EXPORT_SYMBOL(__insert_inode_hash);
@@ -438,7 +442,9 @@ static void __remove_inode_hash(struct inode *inode)
438void remove_inode_hash(struct inode *inode) 442void remove_inode_hash(struct inode *inode)
439{ 443{
440 spin_lock(&inode_lock); 444 spin_lock(&inode_lock);
445 spin_lock(&inode->i_lock);
441 hlist_del_init(&inode->i_hash); 446 hlist_del_init(&inode->i_hash);
447 spin_unlock(&inode->i_lock);
442 spin_unlock(&inode_lock); 448 spin_unlock(&inode_lock);
443} 449}
444EXPORT_SYMBOL(remove_inode_hash); 450EXPORT_SYMBOL(remove_inode_hash);
@@ -495,7 +501,9 @@ static void dispose_list(struct list_head *head)
495 __inode_sb_list_del(inode); 501 __inode_sb_list_del(inode);
496 spin_unlock(&inode_lock); 502 spin_unlock(&inode_lock);
497 503
498 wake_up_inode(inode); 504 spin_lock(&inode->i_lock);
505 wake_up_bit(&inode->i_state, __I_NEW);
506 spin_unlock(&inode->i_lock);
499 destroy_inode(inode); 507 destroy_inode(inode);
500 } 508 }
501} 509}
@@ -518,10 +526,17 @@ void evict_inodes(struct super_block *sb)
518 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 526 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
519 if (atomic_read(&inode->i_count)) 527 if (atomic_read(&inode->i_count))
520 continue; 528 continue;
521 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 529
530 spin_lock(&inode->i_lock);
531 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
532 spin_unlock(&inode->i_lock);
522 continue; 533 continue;
534 }
523 535
524 inode->i_state |= I_FREEING; 536 inode->i_state |= I_FREEING;
537 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
538 inodes_stat.nr_unused--;
539 spin_unlock(&inode->i_lock);
525 540
526 /* 541 /*
527 * Move the inode off the IO lists and LRU once I_FREEING is 542 * Move the inode off the IO lists and LRU once I_FREEING is
@@ -529,8 +544,6 @@ void evict_inodes(struct super_block *sb)
529 */ 544 */
530 list_move(&inode->i_lru, &dispose); 545 list_move(&inode->i_lru, &dispose);
531 list_del_init(&inode->i_wb_list); 546 list_del_init(&inode->i_wb_list);
532 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
533 inodes_stat.nr_unused--;
534 } 547 }
535 spin_unlock(&inode_lock); 548 spin_unlock(&inode_lock);
536 549
@@ -563,18 +576,26 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
563 576
564 spin_lock(&inode_lock); 577 spin_lock(&inode_lock);
565 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 578 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
566 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 579 spin_lock(&inode->i_lock);
580 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
581 spin_unlock(&inode->i_lock);
567 continue; 582 continue;
583 }
568 if (inode->i_state & I_DIRTY && !kill_dirty) { 584 if (inode->i_state & I_DIRTY && !kill_dirty) {
585 spin_unlock(&inode->i_lock);
569 busy = 1; 586 busy = 1;
570 continue; 587 continue;
571 } 588 }
572 if (atomic_read(&inode->i_count)) { 589 if (atomic_read(&inode->i_count)) {
590 spin_unlock(&inode->i_lock);
573 busy = 1; 591 busy = 1;
574 continue; 592 continue;
575 } 593 }
576 594
577 inode->i_state |= I_FREEING; 595 inode->i_state |= I_FREEING;
596 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
597 inodes_stat.nr_unused--;
598 spin_unlock(&inode->i_lock);
578 599
579 /* 600 /*
580 * Move the inode off the IO lists and LRU once I_FREEING is 601 * Move the inode off the IO lists and LRU once I_FREEING is
@@ -582,8 +603,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
582 */ 603 */
583 list_move(&inode->i_lru, &dispose); 604 list_move(&inode->i_lru, &dispose);
584 list_del_init(&inode->i_wb_list); 605 list_del_init(&inode->i_wb_list);
585 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
586 inodes_stat.nr_unused--;
587 } 606 }
588 spin_unlock(&inode_lock); 607 spin_unlock(&inode_lock);
589 608
@@ -641,8 +660,10 @@ static void prune_icache(int nr_to_scan)
641 * Referenced or dirty inodes are still in use. Give them 660 * Referenced or dirty inodes are still in use. Give them
642 * another pass through the LRU as we canot reclaim them now. 661 * another pass through the LRU as we canot reclaim them now.
643 */ 662 */
663 spin_lock(&inode->i_lock);
644 if (atomic_read(&inode->i_count) || 664 if (atomic_read(&inode->i_count) ||
645 (inode->i_state & ~I_REFERENCED)) { 665 (inode->i_state & ~I_REFERENCED)) {
666 spin_unlock(&inode->i_lock);
646 list_del_init(&inode->i_lru); 667 list_del_init(&inode->i_lru);
647 inodes_stat.nr_unused--; 668 inodes_stat.nr_unused--;
648 continue; 669 continue;
@@ -650,12 +671,14 @@ static void prune_icache(int nr_to_scan)
650 671
651 /* recently referenced inodes get one more pass */ 672 /* recently referenced inodes get one more pass */
652 if (inode->i_state & I_REFERENCED) { 673 if (inode->i_state & I_REFERENCED) {
653 list_move(&inode->i_lru, &inode_lru);
654 inode->i_state &= ~I_REFERENCED; 674 inode->i_state &= ~I_REFERENCED;
675 spin_unlock(&inode->i_lock);
676 list_move(&inode->i_lru, &inode_lru);
655 continue; 677 continue;
656 } 678 }
657 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 679 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
658 __iget(inode); 680 __iget(inode);
681 spin_unlock(&inode->i_lock);
659 spin_unlock(&inode_lock); 682 spin_unlock(&inode_lock);
660 if (remove_inode_buffers(inode)) 683 if (remove_inode_buffers(inode))
661 reap += invalidate_mapping_pages(&inode->i_data, 684 reap += invalidate_mapping_pages(&inode->i_data,
@@ -666,11 +689,15 @@ static void prune_icache(int nr_to_scan)
666 if (inode != list_entry(inode_lru.next, 689 if (inode != list_entry(inode_lru.next,
667 struct inode, i_lru)) 690 struct inode, i_lru))
668 continue; /* wrong inode or list_empty */ 691 continue; /* wrong inode or list_empty */
669 if (!can_unuse(inode)) 692 spin_lock(&inode->i_lock);
693 if (!can_unuse(inode)) {
694 spin_unlock(&inode->i_lock);
670 continue; 695 continue;
696 }
671 } 697 }
672 WARN_ON(inode->i_state & I_NEW); 698 WARN_ON(inode->i_state & I_NEW);
673 inode->i_state |= I_FREEING; 699 inode->i_state |= I_FREEING;
700 spin_unlock(&inode->i_lock);
674 701
675 /* 702 /*
676 * Move the inode off the IO lists and LRU once I_FREEING is 703 * Move the inode off the IO lists and LRU once I_FREEING is
@@ -737,11 +764,13 @@ repeat:
737 continue; 764 continue;
738 if (!test(inode, data)) 765 if (!test(inode, data))
739 continue; 766 continue;
767 spin_lock(&inode->i_lock);
740 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 768 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
741 __wait_on_freeing_inode(inode); 769 __wait_on_freeing_inode(inode);
742 goto repeat; 770 goto repeat;
743 } 771 }
744 __iget(inode); 772 __iget(inode);
773 spin_unlock(&inode->i_lock);
745 return inode; 774 return inode;
746 } 775 }
747 return NULL; 776 return NULL;
@@ -763,11 +792,13 @@ repeat:
763 continue; 792 continue;
764 if (inode->i_sb != sb) 793 if (inode->i_sb != sb)
765 continue; 794 continue;
795 spin_lock(&inode->i_lock);
766 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 796 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
767 __wait_on_freeing_inode(inode); 797 __wait_on_freeing_inode(inode);
768 goto repeat; 798 goto repeat;
769 } 799 }
770 __iget(inode); 800 __iget(inode);
801 spin_unlock(&inode->i_lock);
771 return inode; 802 return inode;
772 } 803 }
773 return NULL; 804 return NULL;
@@ -832,14 +863,23 @@ struct inode *new_inode(struct super_block *sb)
832 inode = alloc_inode(sb); 863 inode = alloc_inode(sb);
833 if (inode) { 864 if (inode) {
834 spin_lock(&inode_lock); 865 spin_lock(&inode_lock);
835 __inode_sb_list_add(inode); 866 spin_lock(&inode->i_lock);
836 inode->i_state = 0; 867 inode->i_state = 0;
868 spin_unlock(&inode->i_lock);
869 __inode_sb_list_add(inode);
837 spin_unlock(&inode_lock); 870 spin_unlock(&inode_lock);
838 } 871 }
839 return inode; 872 return inode;
840} 873}
841EXPORT_SYMBOL(new_inode); 874EXPORT_SYMBOL(new_inode);
842 875
876/**
877 * unlock_new_inode - clear the I_NEW state and wake up any waiters
878 * @inode: new inode to unlock
879 *
880 * Called when the inode is fully initialised to clear the new state of the
881 * inode and wake up anyone waiting for the inode to finish initialisation.
882 */
843void unlock_new_inode(struct inode *inode) 883void unlock_new_inode(struct inode *inode)
844{ 884{
845#ifdef CONFIG_DEBUG_LOCK_ALLOC 885#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -859,19 +899,11 @@ void unlock_new_inode(struct inode *inode)
859 } 899 }
860 } 900 }
861#endif 901#endif
862 /* 902 spin_lock(&inode->i_lock);
863 * This is special! We do not need the spinlock when clearing I_NEW,
864 * because we're guaranteed that nobody else tries to do anything about
865 * the state of the inode when it is locked, as we just created it (so
866 * there can be no old holders that haven't tested I_NEW).
867 * However we must emit the memory barrier so that other CPUs reliably
868 * see the clearing of I_NEW after the other inode initialisation has
869 * completed.
870 */
871 smp_mb();
872 WARN_ON(!(inode->i_state & I_NEW)); 903 WARN_ON(!(inode->i_state & I_NEW));
873 inode->i_state &= ~I_NEW; 904 inode->i_state &= ~I_NEW;
874 wake_up_inode(inode); 905 wake_up_bit(&inode->i_state, __I_NEW);
906 spin_unlock(&inode->i_lock);
875} 907}
876EXPORT_SYMBOL(unlock_new_inode); 908EXPORT_SYMBOL(unlock_new_inode);
877 909
@@ -900,9 +932,11 @@ static struct inode *get_new_inode(struct super_block *sb,
900 if (set(inode, data)) 932 if (set(inode, data))
901 goto set_failed; 933 goto set_failed;
902 934
935 spin_lock(&inode->i_lock);
936 inode->i_state = I_NEW;
903 hlist_add_head(&inode->i_hash, head); 937 hlist_add_head(&inode->i_hash, head);
938 spin_unlock(&inode->i_lock);
904 __inode_sb_list_add(inode); 939 __inode_sb_list_add(inode);
905 inode->i_state = I_NEW;
906 spin_unlock(&inode_lock); 940 spin_unlock(&inode_lock);
907 941
908 /* Return the locked inode with I_NEW set, the 942 /* Return the locked inode with I_NEW set, the
@@ -947,9 +981,11 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
947 old = find_inode_fast(sb, head, ino); 981 old = find_inode_fast(sb, head, ino);
948 if (!old) { 982 if (!old) {
949 inode->i_ino = ino; 983 inode->i_ino = ino;
984 spin_lock(&inode->i_lock);
985 inode->i_state = I_NEW;
950 hlist_add_head(&inode->i_hash, head); 986 hlist_add_head(&inode->i_hash, head);
987 spin_unlock(&inode->i_lock);
951 __inode_sb_list_add(inode); 988 __inode_sb_list_add(inode);
952 inode->i_state = I_NEW;
953 spin_unlock(&inode_lock); 989 spin_unlock(&inode_lock);
954 990
955 /* Return the locked inode with I_NEW set, the 991 /* Return the locked inode with I_NEW set, the
@@ -1034,15 +1070,19 @@ EXPORT_SYMBOL(iunique);
1034struct inode *igrab(struct inode *inode) 1070struct inode *igrab(struct inode *inode)
1035{ 1071{
1036 spin_lock(&inode_lock); 1072 spin_lock(&inode_lock);
1037 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1073 spin_lock(&inode->i_lock);
1074 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1038 __iget(inode); 1075 __iget(inode);
1039 else 1076 spin_unlock(&inode->i_lock);
1077 } else {
1078 spin_unlock(&inode->i_lock);
1040 /* 1079 /*
1041 * Handle the case where s_op->clear_inode is not been 1080 * Handle the case where s_op->clear_inode is not been
1042 * called yet, and somebody is calling igrab 1081 * called yet, and somebody is calling igrab
1043 * while the inode is getting freed. 1082 * while the inode is getting freed.
1044 */ 1083 */
1045 inode = NULL; 1084 inode = NULL;
1085 }
1046 spin_unlock(&inode_lock); 1086 spin_unlock(&inode_lock);
1047 return inode; 1087 return inode;
1048} 1088}
@@ -1271,7 +1311,6 @@ int insert_inode_locked(struct inode *inode)
1271 ino_t ino = inode->i_ino; 1311 ino_t ino = inode->i_ino;
1272 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1312 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1273 1313
1274 inode->i_state |= I_NEW;
1275 while (1) { 1314 while (1) {
1276 struct hlist_node *node; 1315 struct hlist_node *node;
1277 struct inode *old = NULL; 1316 struct inode *old = NULL;
@@ -1281,16 +1320,23 @@ int insert_inode_locked(struct inode *inode)
1281 continue; 1320 continue;
1282 if (old->i_sb != sb) 1321 if (old->i_sb != sb)
1283 continue; 1322 continue;
1284 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1323 spin_lock(&old->i_lock);
1324 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1325 spin_unlock(&old->i_lock);
1285 continue; 1326 continue;
1327 }
1286 break; 1328 break;
1287 } 1329 }
1288 if (likely(!node)) { 1330 if (likely(!node)) {
1331 spin_lock(&inode->i_lock);
1332 inode->i_state |= I_NEW;
1289 hlist_add_head(&inode->i_hash, head); 1333 hlist_add_head(&inode->i_hash, head);
1334 spin_unlock(&inode->i_lock);
1290 spin_unlock(&inode_lock); 1335 spin_unlock(&inode_lock);
1291 return 0; 1336 return 0;
1292 } 1337 }
1293 __iget(old); 1338 __iget(old);
1339 spin_unlock(&old->i_lock);
1294 spin_unlock(&inode_lock); 1340 spin_unlock(&inode_lock);
1295 wait_on_inode(old); 1341 wait_on_inode(old);
1296 if (unlikely(!inode_unhashed(old))) { 1342 if (unlikely(!inode_unhashed(old))) {
@@ -1308,8 +1354,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1308 struct super_block *sb = inode->i_sb; 1354 struct super_block *sb = inode->i_sb;
1309 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1355 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1310 1356
1311 inode->i_state |= I_NEW;
1312
1313 while (1) { 1357 while (1) {
1314 struct hlist_node *node; 1358 struct hlist_node *node;
1315 struct inode *old = NULL; 1359 struct inode *old = NULL;
@@ -1320,16 +1364,23 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1320 continue; 1364 continue;
1321 if (!test(old, data)) 1365 if (!test(old, data))
1322 continue; 1366 continue;
1323 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1367 spin_lock(&old->i_lock);
1368 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1369 spin_unlock(&old->i_lock);
1324 continue; 1370 continue;
1371 }
1325 break; 1372 break;
1326 } 1373 }
1327 if (likely(!node)) { 1374 if (likely(!node)) {
1375 spin_lock(&inode->i_lock);
1376 inode->i_state |= I_NEW;
1328 hlist_add_head(&inode->i_hash, head); 1377 hlist_add_head(&inode->i_hash, head);
1378 spin_unlock(&inode->i_lock);
1329 spin_unlock(&inode_lock); 1379 spin_unlock(&inode_lock);
1330 return 0; 1380 return 0;
1331 } 1381 }
1332 __iget(old); 1382 __iget(old);
1383 spin_unlock(&old->i_lock);
1333 spin_unlock(&inode_lock); 1384 spin_unlock(&inode_lock);
1334 wait_on_inode(old); 1385 wait_on_inode(old);
1335 if (unlikely(!inode_unhashed(old))) { 1386 if (unlikely(!inode_unhashed(old))) {
@@ -1375,6 +1426,9 @@ static void iput_final(struct inode *inode)
1375 const struct super_operations *op = inode->i_sb->s_op; 1426 const struct super_operations *op = inode->i_sb->s_op;
1376 int drop; 1427 int drop;
1377 1428
1429 spin_lock(&inode->i_lock);
1430 WARN_ON(inode->i_state & I_NEW);
1431
1378 if (op && op->drop_inode) 1432 if (op && op->drop_inode)
1379 drop = op->drop_inode(inode); 1433 drop = op->drop_inode(inode);
1380 else 1434 else
@@ -1386,21 +1440,23 @@ static void iput_final(struct inode *inode)
1386 if (!(inode->i_state & (I_DIRTY|I_SYNC))) { 1440 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1387 inode_lru_list_add(inode); 1441 inode_lru_list_add(inode);
1388 } 1442 }
1443 spin_unlock(&inode->i_lock);
1389 spin_unlock(&inode_lock); 1444 spin_unlock(&inode_lock);
1390 return; 1445 return;
1391 } 1446 }
1392 WARN_ON(inode->i_state & I_NEW);
1393 inode->i_state |= I_WILL_FREE; 1447 inode->i_state |= I_WILL_FREE;
1448 spin_unlock(&inode->i_lock);
1394 spin_unlock(&inode_lock); 1449 spin_unlock(&inode_lock);
1395 write_inode_now(inode, 1); 1450 write_inode_now(inode, 1);
1396 spin_lock(&inode_lock); 1451 spin_lock(&inode_lock);
1452 spin_lock(&inode->i_lock);
1397 WARN_ON(inode->i_state & I_NEW); 1453 WARN_ON(inode->i_state & I_NEW);
1398 inode->i_state &= ~I_WILL_FREE; 1454 inode->i_state &= ~I_WILL_FREE;
1399 __remove_inode_hash(inode); 1455 __remove_inode_hash(inode);
1400 } 1456 }
1401 1457
1402 WARN_ON(inode->i_state & I_NEW);
1403 inode->i_state |= I_FREEING; 1458 inode->i_state |= I_FREEING;
1459 spin_unlock(&inode->i_lock);
1404 1460
1405 /* 1461 /*
1406 * Move the inode off the IO lists and LRU once I_FREEING is 1462 * Move the inode off the IO lists and LRU once I_FREEING is
@@ -1413,8 +1469,10 @@ static void iput_final(struct inode *inode)
1413 spin_unlock(&inode_lock); 1469 spin_unlock(&inode_lock);
1414 evict(inode); 1470 evict(inode);
1415 remove_inode_hash(inode); 1471 remove_inode_hash(inode);
1416 wake_up_inode(inode); 1472 spin_lock(&inode->i_lock);
1473 wake_up_bit(&inode->i_state, __I_NEW);
1417 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1474 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1475 spin_unlock(&inode->i_lock);
1418 destroy_inode(inode); 1476 destroy_inode(inode);
1419} 1477}
1420 1478
@@ -1611,9 +1669,8 @@ EXPORT_SYMBOL(inode_wait);
1611 * to recheck inode state. 1669 * to recheck inode state.
1612 * 1670 *
1613 * It doesn't matter if I_NEW is not set initially, a call to 1671 * It doesn't matter if I_NEW is not set initially, a call to
1614 * wake_up_inode() after removing from the hash list will DTRT. 1672 * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
1615 * 1673 * will DTRT.
1616 * This is called with inode_lock held.
1617 */ 1674 */
1618static void __wait_on_freeing_inode(struct inode *inode) 1675static void __wait_on_freeing_inode(struct inode *inode)
1619{ 1676{
@@ -1621,6 +1678,7 @@ static void __wait_on_freeing_inode(struct inode *inode)
1621 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1678 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1622 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1679 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1623 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1680 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1681 spin_unlock(&inode->i_lock);
1624 spin_unlock(&inode_lock); 1682 spin_unlock(&inode_lock);
1625 schedule(); 1683 schedule();
1626 finish_wait(wq, &wait.wait); 1684 finish_wait(wq, &wait.wait);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4c29fcf557d1..4dd53fb44124 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
254 * I_WILL_FREE, or I_NEW which is fine because by that point 254 * I_WILL_FREE, or I_NEW which is fine because by that point
255 * the inode cannot have any associated watches. 255 * the inode cannot have any associated watches.
256 */ 256 */
257 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 257 spin_lock(&inode->i_lock);
258 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
259 spin_unlock(&inode->i_lock);
258 continue; 260 continue;
261 }
259 262
260 /* 263 /*
261 * If i_count is zero, the inode cannot have any watches and 264 * If i_count is zero, the inode cannot have any watches and
@@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list)
263 * evict all inodes with zero i_count from icache which is 266 * evict all inodes with zero i_count from icache which is
264 * unnecessarily violent and may in fact be illegal to do. 267 * unnecessarily violent and may in fact be illegal to do.
265 */ 268 */
266 if (!atomic_read(&inode->i_count)) 269 if (!atomic_read(&inode->i_count)) {
270 spin_unlock(&inode->i_lock);
267 continue; 271 continue;
272 }
268 273
269 need_iput_tmp = need_iput; 274 need_iput_tmp = need_iput;
270 need_iput = NULL; 275 need_iput = NULL;
@@ -274,13 +279,17 @@ void fsnotify_unmount_inodes(struct list_head *list)
274 __iget(inode); 279 __iget(inode);
275 else 280 else
276 need_iput_tmp = NULL; 281 need_iput_tmp = NULL;
282 spin_unlock(&inode->i_lock);
277 283
278 /* In case the dropping of a reference would nuke next_i. */ 284 /* In case the dropping of a reference would nuke next_i. */
279 if ((&next_i->i_sb_list != list) && 285 if ((&next_i->i_sb_list != list) &&
280 atomic_read(&next_i->i_count) && 286 atomic_read(&next_i->i_count)) {
281 !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 287 spin_lock(&next_i->i_lock);
282 __iget(next_i); 288 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
283 need_iput = next_i; 289 __iget(next_i);
290 need_iput = next_i;
291 }
292 spin_unlock(&next_i->i_lock);
284 } 293 }
285 294
286 /* 295 /*
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a2a622e079f0..a1470fda366c 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -902,18 +902,19 @@ static void add_dquot_ref(struct super_block *sb, int type)
902 902
903 spin_lock(&inode_lock); 903 spin_lock(&inode_lock);
904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
905 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 905 spin_lock(&inode->i_lock);
906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
907 !atomic_read(&inode->i_writecount) ||
908 !dqinit_needed(inode, type)) {
909 spin_unlock(&inode->i_lock);
906 continue; 910 continue;
911 }
907#ifdef CONFIG_QUOTA_DEBUG 912#ifdef CONFIG_QUOTA_DEBUG
908 if (unlikely(inode_get_rsv_space(inode) > 0)) 913 if (unlikely(inode_get_rsv_space(inode) > 0))
909 reserved = 1; 914 reserved = 1;
910#endif 915#endif
911 if (!atomic_read(&inode->i_writecount))
912 continue;
913 if (!dqinit_needed(inode, type))
914 continue;
915
916 __iget(inode); 916 __iget(inode);
917 spin_unlock(&inode->i_lock);
917 spin_unlock(&inode_lock); 918 spin_unlock(&inode_lock);
918 919
919 iput(old_inode); 920 iput(old_inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4dda076c24a1..ed6fdcc1484c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1647,7 +1647,7 @@ struct super_operations {
1647}; 1647};
1648 1648
1649/* 1649/*
1650 * Inode state bits. Protected by inode_lock. 1650 * Inode state bits. Protected by inode->i_lock
1651 * 1651 *
1652 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1652 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
1653 * I_DIRTY_DATASYNC and I_DIRTY_PAGES. 1653 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index eb354f6f26b3..26f9e3612e0f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -277,7 +277,7 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
277 /* 277 /*
278 * Mark inode fully dirty. Since we are allocating blocks, inode 278 * Mark inode fully dirty. Since we are allocating blocks, inode
279 * would become fully dirty soon anyway and it reportedly 279 * would become fully dirty soon anyway and it reportedly
280 * reduces inode_lock contention. 280 * reduces lock contention.
281 */ 281 */
282 mark_inode_dirty(inode); 282 mark_inode_dirty(inode);
283 } 283 }
diff --git a/mm/filemap.c b/mm/filemap.c
index f807afda86f2..499e9aa91450 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -99,7 +99,9 @@
99 * ->private_lock (page_remove_rmap->set_page_dirty) 99 * ->private_lock (page_remove_rmap->set_page_dirty)
100 * ->tree_lock (page_remove_rmap->set_page_dirty) 100 * ->tree_lock (page_remove_rmap->set_page_dirty)
101 * ->inode_lock (page_remove_rmap->set_page_dirty) 101 * ->inode_lock (page_remove_rmap->set_page_dirty)
102 * ->inode->i_lock (page_remove_rmap->set_page_dirty)
102 * ->inode_lock (zap_pte_range->set_page_dirty) 103 * ->inode_lock (zap_pte_range->set_page_dirty)
104 * ->inode->i_lock (zap_pte_range->set_page_dirty)
103 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 105 * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
104 * 106 *
105 * (code doesn't rely on that order, so you could switch it around) 107 * (code doesn't rely on that order, so you could switch it around)
diff --git a/mm/rmap.c b/mm/rmap.c
index 4a8e99a0fb97..7dada0456448 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -32,6 +32,7 @@
32 * mmlist_lock (in mmput, drain_mmlist and others) 32 * mmlist_lock (in mmput, drain_mmlist and others)
33 * mapping->private_lock (in __set_page_dirty_buffers) 33 * mapping->private_lock (in __set_page_dirty_buffers)
34 * inode_lock (in set_page_dirty's __mark_inode_dirty) 34 * inode_lock (in set_page_dirty's __mark_inode_dirty)
35 * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
35 * sb_lock (within inode_lock in fs/fs-writeback.c) 36 * sb_lock (within inode_lock in fs/fs-writeback.c)
36 * mapping->tree_lock (widely used, in set_page_dirty, 37 * mapping->tree_lock (widely used, in set_page_dirty,
37 * in arch-dependent flush_dcache_mmap_lock, 38 * in arch-dependent flush_dcache_mmap_lock,