aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c656
1 files changed, 312 insertions, 344 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 0b3da4a77704..05a1f75ae791 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,6 +26,38 @@
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/ima.h> 27#include <linux/ima.h>
28#include <linux/cred.h> 28#include <linux/cred.h>
29#include "internal.h"
30
31/*
32 * inode locking rules.
33 *
34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects:
37 * inode_lru, inode->i_lru
38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects:
41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
42 * inode_hash_lock protects:
43 * inode_hashtable, inode->i_hash
44 *
45 * Lock ordering:
46 *
47 * inode_sb_list_lock
48 * inode->i_lock
49 * inode_lru_lock
50 *
51 * inode_wb_list_lock
52 * inode->i_lock
53 *
54 * inode_hash_lock
55 * inode_sb_list_lock
56 * inode->i_lock
57 *
58 * iunique_lock
59 * inode_hash_lock
60 */
29 61
30/* 62/*
31 * This is needed for the following functions: 63 * This is needed for the following functions:
@@ -60,6 +92,8 @@
60 92
61static unsigned int i_hash_mask __read_mostly; 93static unsigned int i_hash_mask __read_mostly;
62static unsigned int i_hash_shift __read_mostly; 94static unsigned int i_hash_shift __read_mostly;
95static struct hlist_head *inode_hashtable __read_mostly;
96static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
63 97
64/* 98/*
65 * Each inode can be on two separate lists. One is 99 * Each inode can be on two separate lists. One is
@@ -74,15 +108,10 @@ static unsigned int i_hash_shift __read_mostly;
74 */ 108 */
75 109
76static LIST_HEAD(inode_lru); 110static LIST_HEAD(inode_lru);
77static struct hlist_head *inode_hashtable __read_mostly; 111static DEFINE_SPINLOCK(inode_lru_lock);
78 112
79/* 113__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
80 * A simple spinlock to protect the list manipulations. 114__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
81 *
82 * NOTE! You also have to own the lock if you change
83 * the i_state of an inode while it is in use..
84 */
85DEFINE_SPINLOCK(inode_lock);
86 115
87/* 116/*
88 * iprune_sem provides exclusion between the icache shrinking and the 117 * iprune_sem provides exclusion between the icache shrinking and the
@@ -137,15 +166,6 @@ int proc_nr_inodes(ctl_table *table, int write,
137} 166}
138#endif 167#endif
139 168
140static void wake_up_inode(struct inode *inode)
141{
142 /*
143 * Prevent speculative execution through spin_unlock(&inode_lock);
144 */
145 smp_mb();
146 wake_up_bit(&inode->i_state, __I_NEW);
147}
148
149/** 169/**
150 * inode_init_always - perform inode structure intialisation 170 * inode_init_always - perform inode structure intialisation
151 * @sb: superblock inode belongs to 171 * @sb: superblock inode belongs to
@@ -336,7 +356,7 @@ static void init_once(void *foo)
336} 356}
337 357
338/* 358/*
339 * inode_lock must be held 359 * inode->i_lock must be held
340 */ 360 */
341void __iget(struct inode *inode) 361void __iget(struct inode *inode)
342{ 362{
@@ -354,23 +374,22 @@ EXPORT_SYMBOL(ihold);
354 374
355static void inode_lru_list_add(struct inode *inode) 375static void inode_lru_list_add(struct inode *inode)
356{ 376{
377 spin_lock(&inode_lru_lock);
357 if (list_empty(&inode->i_lru)) { 378 if (list_empty(&inode->i_lru)) {
358 list_add(&inode->i_lru, &inode_lru); 379 list_add(&inode->i_lru, &inode_lru);
359 inodes_stat.nr_unused++; 380 inodes_stat.nr_unused++;
360 } 381 }
382 spin_unlock(&inode_lru_lock);
361} 383}
362 384
363static void inode_lru_list_del(struct inode *inode) 385static void inode_lru_list_del(struct inode *inode)
364{ 386{
387 spin_lock(&inode_lru_lock);
365 if (!list_empty(&inode->i_lru)) { 388 if (!list_empty(&inode->i_lru)) {
366 list_del_init(&inode->i_lru); 389 list_del_init(&inode->i_lru);
367 inodes_stat.nr_unused--; 390 inodes_stat.nr_unused--;
368 } 391 }
369} 392 spin_unlock(&inode_lru_lock);
370
371static inline void __inode_sb_list_add(struct inode *inode)
372{
373 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
374} 393}
375 394
376/** 395/**
@@ -379,15 +398,17 @@ static inline void __inode_sb_list_add(struct inode *inode)
379 */ 398 */
380void inode_sb_list_add(struct inode *inode) 399void inode_sb_list_add(struct inode *inode)
381{ 400{
382 spin_lock(&inode_lock); 401 spin_lock(&inode_sb_list_lock);
383 __inode_sb_list_add(inode); 402 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
384 spin_unlock(&inode_lock); 403 spin_unlock(&inode_sb_list_lock);
385} 404}
386EXPORT_SYMBOL_GPL(inode_sb_list_add); 405EXPORT_SYMBOL_GPL(inode_sb_list_add);
387 406
388static inline void __inode_sb_list_del(struct inode *inode) 407static inline void inode_sb_list_del(struct inode *inode)
389{ 408{
409 spin_lock(&inode_sb_list_lock);
390 list_del_init(&inode->i_sb_list); 410 list_del_init(&inode->i_sb_list);
411 spin_unlock(&inode_sb_list_lock);
391} 412}
392 413
393static unsigned long hash(struct super_block *sb, unsigned long hashval) 414static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -412,24 +433,15 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
412{ 433{
413 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 434 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
414 435
415 spin_lock(&inode_lock); 436 spin_lock(&inode_hash_lock);
437 spin_lock(&inode->i_lock);
416 hlist_add_head(&inode->i_hash, b); 438 hlist_add_head(&inode->i_hash, b);
417 spin_unlock(&inode_lock); 439 spin_unlock(&inode->i_lock);
440 spin_unlock(&inode_hash_lock);
418} 441}
419EXPORT_SYMBOL(__insert_inode_hash); 442EXPORT_SYMBOL(__insert_inode_hash);
420 443
421/** 444/**
422 * __remove_inode_hash - remove an inode from the hash
423 * @inode: inode to unhash
424 *
425 * Remove an inode from the superblock.
426 */
427static void __remove_inode_hash(struct inode *inode)
428{
429 hlist_del_init(&inode->i_hash);
430}
431
432/**
433 * remove_inode_hash - remove an inode from the hash 445 * remove_inode_hash - remove an inode from the hash
434 * @inode: inode to unhash 446 * @inode: inode to unhash
435 * 447 *
@@ -437,9 +449,11 @@ static void __remove_inode_hash(struct inode *inode)
437 */ 449 */
438void remove_inode_hash(struct inode *inode) 450void remove_inode_hash(struct inode *inode)
439{ 451{
440 spin_lock(&inode_lock); 452 spin_lock(&inode_hash_lock);
453 spin_lock(&inode->i_lock);
441 hlist_del_init(&inode->i_hash); 454 hlist_del_init(&inode->i_hash);
442 spin_unlock(&inode_lock); 455 spin_unlock(&inode->i_lock);
456 spin_unlock(&inode_hash_lock);
443} 457}
444EXPORT_SYMBOL(remove_inode_hash); 458EXPORT_SYMBOL(remove_inode_hash);
445 459
@@ -456,10 +470,29 @@ void end_writeback(struct inode *inode)
456} 470}
457EXPORT_SYMBOL(end_writeback); 471EXPORT_SYMBOL(end_writeback);
458 472
473/*
474 * Free the inode passed in, removing it from the lists it is still connected
475 * to. We remove any pages still attached to the inode and wait for any IO that
476 * is still in progress before finally destroying the inode.
477 *
478 * An inode must already be marked I_FREEING so that we avoid the inode being
479 * moved back onto lists if we race with other code that manipulates the lists
480 * (e.g. writeback_single_inode). The caller is responsible for setting this.
481 *
482 * An inode must already be removed from the LRU list before being evicted from
483 * the cache. This should occur atomically with setting the I_FREEING state
484 * flag, so no inodes here should ever be on the LRU when being evicted.
485 */
459static void evict(struct inode *inode) 486static void evict(struct inode *inode)
460{ 487{
461 const struct super_operations *op = inode->i_sb->s_op; 488 const struct super_operations *op = inode->i_sb->s_op;
462 489
490 BUG_ON(!(inode->i_state & I_FREEING));
491 BUG_ON(!list_empty(&inode->i_lru));
492
493 inode_wb_list_del(inode);
494 inode_sb_list_del(inode);
495
463 if (op->evict_inode) { 496 if (op->evict_inode) {
464 op->evict_inode(inode); 497 op->evict_inode(inode);
465 } else { 498 } else {
@@ -471,6 +504,15 @@ static void evict(struct inode *inode)
471 bd_forget(inode); 504 bd_forget(inode);
472 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 505 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
473 cd_forget(inode); 506 cd_forget(inode);
507
508 remove_inode_hash(inode);
509
510 spin_lock(&inode->i_lock);
511 wake_up_bit(&inode->i_state, __I_NEW);
512 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
513 spin_unlock(&inode->i_lock);
514
515 destroy_inode(inode);
474} 516}
475 517
476/* 518/*
@@ -489,14 +531,6 @@ static void dispose_list(struct list_head *head)
489 list_del_init(&inode->i_lru); 531 list_del_init(&inode->i_lru);
490 532
491 evict(inode); 533 evict(inode);
492
493 spin_lock(&inode_lock);
494 __remove_inode_hash(inode);
495 __inode_sb_list_del(inode);
496 spin_unlock(&inode_lock);
497
498 wake_up_inode(inode);
499 destroy_inode(inode);
500 } 534 }
501} 535}
502 536
@@ -514,25 +548,23 @@ void evict_inodes(struct super_block *sb)
514 struct inode *inode, *next; 548 struct inode *inode, *next;
515 LIST_HEAD(dispose); 549 LIST_HEAD(dispose);
516 550
517 spin_lock(&inode_lock); 551 spin_lock(&inode_sb_list_lock);
518 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 552 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
519 if (atomic_read(&inode->i_count)) 553 if (atomic_read(&inode->i_count))
520 continue; 554 continue;
521 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 555
556 spin_lock(&inode->i_lock);
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
558 spin_unlock(&inode->i_lock);
522 continue; 559 continue;
560 }
523 561
524 inode->i_state |= I_FREEING; 562 inode->i_state |= I_FREEING;
525 563 inode_lru_list_del(inode);
526 /* 564 spin_unlock(&inode->i_lock);
527 * Move the inode off the IO lists and LRU once I_FREEING is 565 list_add(&inode->i_lru, &dispose);
528 * set so that it won't get moved back on there if it is dirty.
529 */
530 list_move(&inode->i_lru, &dispose);
531 list_del_init(&inode->i_wb_list);
532 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
533 inodes_stat.nr_unused--;
534 } 566 }
535 spin_unlock(&inode_lock); 567 spin_unlock(&inode_sb_list_lock);
536 568
537 dispose_list(&dispose); 569 dispose_list(&dispose);
538 570
@@ -561,31 +593,30 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
561 struct inode *inode, *next; 593 struct inode *inode, *next;
562 LIST_HEAD(dispose); 594 LIST_HEAD(dispose);
563 595
564 spin_lock(&inode_lock); 596 spin_lock(&inode_sb_list_lock);
565 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 597 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
566 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 598 spin_lock(&inode->i_lock);
599 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
600 spin_unlock(&inode->i_lock);
567 continue; 601 continue;
602 }
568 if (inode->i_state & I_DIRTY && !kill_dirty) { 603 if (inode->i_state & I_DIRTY && !kill_dirty) {
604 spin_unlock(&inode->i_lock);
569 busy = 1; 605 busy = 1;
570 continue; 606 continue;
571 } 607 }
572 if (atomic_read(&inode->i_count)) { 608 if (atomic_read(&inode->i_count)) {
609 spin_unlock(&inode->i_lock);
573 busy = 1; 610 busy = 1;
574 continue; 611 continue;
575 } 612 }
576 613
577 inode->i_state |= I_FREEING; 614 inode->i_state |= I_FREEING;
578 615 inode_lru_list_del(inode);
579 /* 616 spin_unlock(&inode->i_lock);
580 * Move the inode off the IO lists and LRU once I_FREEING is 617 list_add(&inode->i_lru, &dispose);
581 * set so that it won't get moved back on there if it is dirty.
582 */
583 list_move(&inode->i_lru, &dispose);
584 list_del_init(&inode->i_wb_list);
585 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
586 inodes_stat.nr_unused--;
587 } 618 }
588 spin_unlock(&inode_lock); 619 spin_unlock(&inode_sb_list_lock);
589 620
590 dispose_list(&dispose); 621 dispose_list(&dispose);
591 622
@@ -607,7 +638,7 @@ static int can_unuse(struct inode *inode)
607 638
608/* 639/*
609 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 640 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
610 * temporary list and then are freed outside inode_lock by dispose_list(). 641 * temporary list and then are freed outside inode_lru_lock by dispose_list().
611 * 642 *
612 * Any inodes which are pinned purely because of attached pagecache have their 643 * Any inodes which are pinned purely because of attached pagecache have their
613 * pagecache removed. If the inode has metadata buffers attached to 644 * pagecache removed. If the inode has metadata buffers attached to
@@ -628,7 +659,7 @@ static void prune_icache(int nr_to_scan)
628 unsigned long reap = 0; 659 unsigned long reap = 0;
629 660
630 down_read(&iprune_sem); 661 down_read(&iprune_sem);
631 spin_lock(&inode_lock); 662 spin_lock(&inode_lru_lock);
632 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 663 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
633 struct inode *inode; 664 struct inode *inode;
634 665
@@ -638,53 +669,67 @@ static void prune_icache(int nr_to_scan)
638 inode = list_entry(inode_lru.prev, struct inode, i_lru); 669 inode = list_entry(inode_lru.prev, struct inode, i_lru);
639 670
640 /* 671 /*
672 * we are inverting the inode_lru_lock/inode->i_lock here,
673 * so use a trylock. If we fail to get the lock, just move the
674 * inode to the back of the list so we don't spin on it.
675 */
676 if (!spin_trylock(&inode->i_lock)) {
677 list_move(&inode->i_lru, &inode_lru);
678 continue;
679 }
680
681 /*
641 * Referenced or dirty inodes are still in use. Give them 682 * Referenced or dirty inodes are still in use. Give them
642 * another pass through the LRU as we canot reclaim them now. 683 * another pass through the LRU as we canot reclaim them now.
643 */ 684 */
644 if (atomic_read(&inode->i_count) || 685 if (atomic_read(&inode->i_count) ||
645 (inode->i_state & ~I_REFERENCED)) { 686 (inode->i_state & ~I_REFERENCED)) {
646 list_del_init(&inode->i_lru); 687 list_del_init(&inode->i_lru);
688 spin_unlock(&inode->i_lock);
647 inodes_stat.nr_unused--; 689 inodes_stat.nr_unused--;
648 continue; 690 continue;
649 } 691 }
650 692
651 /* recently referenced inodes get one more pass */ 693 /* recently referenced inodes get one more pass */
652 if (inode->i_state & I_REFERENCED) { 694 if (inode->i_state & I_REFERENCED) {
653 list_move(&inode->i_lru, &inode_lru);
654 inode->i_state &= ~I_REFERENCED; 695 inode->i_state &= ~I_REFERENCED;
696 list_move(&inode->i_lru, &inode_lru);
697 spin_unlock(&inode->i_lock);
655 continue; 698 continue;
656 } 699 }
657 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 700 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
658 __iget(inode); 701 __iget(inode);
659 spin_unlock(&inode_lock); 702 spin_unlock(&inode->i_lock);
703 spin_unlock(&inode_lru_lock);
660 if (remove_inode_buffers(inode)) 704 if (remove_inode_buffers(inode))
661 reap += invalidate_mapping_pages(&inode->i_data, 705 reap += invalidate_mapping_pages(&inode->i_data,
662 0, -1); 706 0, -1);
663 iput(inode); 707 iput(inode);
664 spin_lock(&inode_lock); 708 spin_lock(&inode_lru_lock);
665 709
666 if (inode != list_entry(inode_lru.next, 710 if (inode != list_entry(inode_lru.next,
667 struct inode, i_lru)) 711 struct inode, i_lru))
668 continue; /* wrong inode or list_empty */ 712 continue; /* wrong inode or list_empty */
669 if (!can_unuse(inode)) 713 /* avoid lock inversions with trylock */
714 if (!spin_trylock(&inode->i_lock))
715 continue;
716 if (!can_unuse(inode)) {
717 spin_unlock(&inode->i_lock);
670 continue; 718 continue;
719 }
671 } 720 }
672 WARN_ON(inode->i_state & I_NEW); 721 WARN_ON(inode->i_state & I_NEW);
673 inode->i_state |= I_FREEING; 722 inode->i_state |= I_FREEING;
723 spin_unlock(&inode->i_lock);
674 724
675 /*
676 * Move the inode off the IO lists and LRU once I_FREEING is
677 * set so that it won't get moved back on there if it is dirty.
678 */
679 list_move(&inode->i_lru, &freeable); 725 list_move(&inode->i_lru, &freeable);
680 list_del_init(&inode->i_wb_list);
681 inodes_stat.nr_unused--; 726 inodes_stat.nr_unused--;
682 } 727 }
683 if (current_is_kswapd()) 728 if (current_is_kswapd())
684 __count_vm_events(KSWAPD_INODESTEAL, reap); 729 __count_vm_events(KSWAPD_INODESTEAL, reap);
685 else 730 else
686 __count_vm_events(PGINODESTEAL, reap); 731 __count_vm_events(PGINODESTEAL, reap);
687 spin_unlock(&inode_lock); 732 spin_unlock(&inode_lru_lock);
688 733
689 dispose_list(&freeable); 734 dispose_list(&freeable);
690 up_read(&iprune_sem); 735 up_read(&iprune_sem);
@@ -733,15 +778,21 @@ static struct inode *find_inode(struct super_block *sb,
733 778
734repeat: 779repeat:
735 hlist_for_each_entry(inode, node, head, i_hash) { 780 hlist_for_each_entry(inode, node, head, i_hash) {
736 if (inode->i_sb != sb) 781 spin_lock(&inode->i_lock);
782 if (inode->i_sb != sb) {
783 spin_unlock(&inode->i_lock);
737 continue; 784 continue;
738 if (!test(inode, data)) 785 }
786 if (!test(inode, data)) {
787 spin_unlock(&inode->i_lock);
739 continue; 788 continue;
789 }
740 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 790 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
741 __wait_on_freeing_inode(inode); 791 __wait_on_freeing_inode(inode);
742 goto repeat; 792 goto repeat;
743 } 793 }
744 __iget(inode); 794 __iget(inode);
795 spin_unlock(&inode->i_lock);
745 return inode; 796 return inode;
746 } 797 }
747 return NULL; 798 return NULL;
@@ -759,15 +810,21 @@ static struct inode *find_inode_fast(struct super_block *sb,
759 810
760repeat: 811repeat:
761 hlist_for_each_entry(inode, node, head, i_hash) { 812 hlist_for_each_entry(inode, node, head, i_hash) {
762 if (inode->i_ino != ino) 813 spin_lock(&inode->i_lock);
814 if (inode->i_ino != ino) {
815 spin_unlock(&inode->i_lock);
763 continue; 816 continue;
764 if (inode->i_sb != sb) 817 }
818 if (inode->i_sb != sb) {
819 spin_unlock(&inode->i_lock);
765 continue; 820 continue;
821 }
766 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 822 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
767 __wait_on_freeing_inode(inode); 823 __wait_on_freeing_inode(inode);
768 goto repeat; 824 goto repeat;
769 } 825 }
770 __iget(inode); 826 __iget(inode);
827 spin_unlock(&inode->i_lock);
771 return inode; 828 return inode;
772 } 829 }
773 return NULL; 830 return NULL;
@@ -827,19 +884,26 @@ struct inode *new_inode(struct super_block *sb)
827{ 884{
828 struct inode *inode; 885 struct inode *inode;
829 886
830 spin_lock_prefetch(&inode_lock); 887 spin_lock_prefetch(&inode_sb_list_lock);
831 888
832 inode = alloc_inode(sb); 889 inode = alloc_inode(sb);
833 if (inode) { 890 if (inode) {
834 spin_lock(&inode_lock); 891 spin_lock(&inode->i_lock);
835 __inode_sb_list_add(inode);
836 inode->i_state = 0; 892 inode->i_state = 0;
837 spin_unlock(&inode_lock); 893 spin_unlock(&inode->i_lock);
894 inode_sb_list_add(inode);
838 } 895 }
839 return inode; 896 return inode;
840} 897}
841EXPORT_SYMBOL(new_inode); 898EXPORT_SYMBOL(new_inode);
842 899
900/**
901 * unlock_new_inode - clear the I_NEW state and wake up any waiters
902 * @inode: new inode to unlock
903 *
904 * Called when the inode is fully initialised to clear the new state of the
905 * inode and wake up anyone waiting for the inode to finish initialisation.
906 */
843void unlock_new_inode(struct inode *inode) 907void unlock_new_inode(struct inode *inode)
844{ 908{
845#ifdef CONFIG_DEBUG_LOCK_ALLOC 909#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -859,51 +923,67 @@ void unlock_new_inode(struct inode *inode)
859 } 923 }
860 } 924 }
861#endif 925#endif
862 /* 926 spin_lock(&inode->i_lock);
863 * This is special! We do not need the spinlock when clearing I_NEW,
864 * because we're guaranteed that nobody else tries to do anything about
865 * the state of the inode when it is locked, as we just created it (so
866 * there can be no old holders that haven't tested I_NEW).
867 * However we must emit the memory barrier so that other CPUs reliably
868 * see the clearing of I_NEW after the other inode initialisation has
869 * completed.
870 */
871 smp_mb();
872 WARN_ON(!(inode->i_state & I_NEW)); 927 WARN_ON(!(inode->i_state & I_NEW));
873 inode->i_state &= ~I_NEW; 928 inode->i_state &= ~I_NEW;
874 wake_up_inode(inode); 929 wake_up_bit(&inode->i_state, __I_NEW);
930 spin_unlock(&inode->i_lock);
875} 931}
876EXPORT_SYMBOL(unlock_new_inode); 932EXPORT_SYMBOL(unlock_new_inode);
877 933
878/* 934/**
879 * This is called without the inode lock held.. Be careful. 935 * iget5_locked - obtain an inode from a mounted file system
936 * @sb: super block of file system
937 * @hashval: hash value (usually inode number) to get
938 * @test: callback used for comparisons between inodes
939 * @set: callback used to initialize a new struct inode
940 * @data: opaque data pointer to pass to @test and @set
941 *
942 * Search for the inode specified by @hashval and @data in the inode cache,
943 * and if present it is return it with an increased reference count. This is
944 * a generalized version of iget_locked() for file systems where the inode
945 * number is not sufficient for unique identification of an inode.
880 * 946 *
881 * We no longer cache the sb_flags in i_flags - see fs.h 947 * If the inode is not in cache, allocate a new inode and return it locked,
882 * -- rmk@arm.uk.linux.org 948 * hashed, and with the I_NEW flag set. The file system gets to fill it in
949 * before unlocking it via unlock_new_inode().
950 *
951 * Note both @test and @set are called with the inode_hash_lock held, so can't
952 * sleep.
883 */ 953 */
884static struct inode *get_new_inode(struct super_block *sb, 954struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
885 struct hlist_head *head, 955 int (*test)(struct inode *, void *),
886 int (*test)(struct inode *, void *), 956 int (*set)(struct inode *, void *), void *data)
887 int (*set)(struct inode *, void *),
888 void *data)
889{ 957{
958 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
890 struct inode *inode; 959 struct inode *inode;
891 960
961 spin_lock(&inode_hash_lock);
962 inode = find_inode(sb, head, test, data);
963 spin_unlock(&inode_hash_lock);
964
965 if (inode) {
966 wait_on_inode(inode);
967 return inode;
968 }
969
892 inode = alloc_inode(sb); 970 inode = alloc_inode(sb);
893 if (inode) { 971 if (inode) {
894 struct inode *old; 972 struct inode *old;
895 973
896 spin_lock(&inode_lock); 974 spin_lock(&inode_hash_lock);
897 /* We released the lock, so.. */ 975 /* We released the lock, so.. */
898 old = find_inode(sb, head, test, data); 976 old = find_inode(sb, head, test, data);
899 if (!old) { 977 if (!old) {
900 if (set(inode, data)) 978 if (set(inode, data))
901 goto set_failed; 979 goto set_failed;
902 980
903 hlist_add_head(&inode->i_hash, head); 981 spin_lock(&inode->i_lock);
904 __inode_sb_list_add(inode);
905 inode->i_state = I_NEW; 982 inode->i_state = I_NEW;
906 spin_unlock(&inode_lock); 983 hlist_add_head(&inode->i_hash, head);
984 spin_unlock(&inode->i_lock);
985 inode_sb_list_add(inode);
986 spin_unlock(&inode_hash_lock);
907 987
908 /* Return the locked inode with I_NEW set, the 988 /* Return the locked inode with I_NEW set, the
909 * caller is responsible for filling in the contents 989 * caller is responsible for filling in the contents
@@ -916,7 +996,7 @@ static struct inode *get_new_inode(struct super_block *sb,
916 * us. Use the old inode instead of the one we just 996 * us. Use the old inode instead of the one we just
917 * allocated. 997 * allocated.
918 */ 998 */
919 spin_unlock(&inode_lock); 999 spin_unlock(&inode_hash_lock);
920 destroy_inode(inode); 1000 destroy_inode(inode);
921 inode = old; 1001 inode = old;
922 wait_on_inode(inode); 1002 wait_on_inode(inode);
@@ -924,33 +1004,53 @@ static struct inode *get_new_inode(struct super_block *sb,
924 return inode; 1004 return inode;
925 1005
926set_failed: 1006set_failed:
927 spin_unlock(&inode_lock); 1007 spin_unlock(&inode_hash_lock);
928 destroy_inode(inode); 1008 destroy_inode(inode);
929 return NULL; 1009 return NULL;
930} 1010}
1011EXPORT_SYMBOL(iget5_locked);
931 1012
932/* 1013/**
933 * get_new_inode_fast is the fast path version of get_new_inode, see the 1014 * iget_locked - obtain an inode from a mounted file system
934 * comment at iget_locked for details. 1015 * @sb: super block of file system
1016 * @ino: inode number to get
1017 *
1018 * Search for the inode specified by @ino in the inode cache and if present
1019 * return it with an increased reference count. This is for file systems
1020 * where the inode number is sufficient for unique identification of an inode.
1021 *
1022 * If the inode is not in cache, allocate a new inode and return it locked,
1023 * hashed, and with the I_NEW flag set. The file system gets to fill it in
1024 * before unlocking it via unlock_new_inode().
935 */ 1025 */
936static struct inode *get_new_inode_fast(struct super_block *sb, 1026struct inode *iget_locked(struct super_block *sb, unsigned long ino)
937 struct hlist_head *head, unsigned long ino)
938{ 1027{
1028 struct hlist_head *head = inode_hashtable + hash(sb, ino);
939 struct inode *inode; 1029 struct inode *inode;
940 1030
1031 spin_lock(&inode_hash_lock);
1032 inode = find_inode_fast(sb, head, ino);
1033 spin_unlock(&inode_hash_lock);
1034 if (inode) {
1035 wait_on_inode(inode);
1036 return inode;
1037 }
1038
941 inode = alloc_inode(sb); 1039 inode = alloc_inode(sb);
942 if (inode) { 1040 if (inode) {
943 struct inode *old; 1041 struct inode *old;
944 1042
945 spin_lock(&inode_lock); 1043 spin_lock(&inode_hash_lock);
946 /* We released the lock, so.. */ 1044 /* We released the lock, so.. */
947 old = find_inode_fast(sb, head, ino); 1045 old = find_inode_fast(sb, head, ino);
948 if (!old) { 1046 if (!old) {
949 inode->i_ino = ino; 1047 inode->i_ino = ino;
950 hlist_add_head(&inode->i_hash, head); 1048 spin_lock(&inode->i_lock);
951 __inode_sb_list_add(inode);
952 inode->i_state = I_NEW; 1049 inode->i_state = I_NEW;
953 spin_unlock(&inode_lock); 1050 hlist_add_head(&inode->i_hash, head);
1051 spin_unlock(&inode->i_lock);
1052 inode_sb_list_add(inode);
1053 spin_unlock(&inode_hash_lock);
954 1054
955 /* Return the locked inode with I_NEW set, the 1055 /* Return the locked inode with I_NEW set, the
956 * caller is responsible for filling in the contents 1056 * caller is responsible for filling in the contents
@@ -963,13 +1063,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
963 * us. Use the old inode instead of the one we just 1063 * us. Use the old inode instead of the one we just
964 * allocated. 1064 * allocated.
965 */ 1065 */
966 spin_unlock(&inode_lock); 1066 spin_unlock(&inode_hash_lock);
967 destroy_inode(inode); 1067 destroy_inode(inode);
968 inode = old; 1068 inode = old;
969 wait_on_inode(inode); 1069 wait_on_inode(inode);
970 } 1070 }
971 return inode; 1071 return inode;
972} 1072}
1073EXPORT_SYMBOL(iget_locked);
973 1074
974/* 1075/*
975 * search the inode cache for a matching inode number. 1076 * search the inode cache for a matching inode number.
@@ -984,10 +1085,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
984 struct hlist_node *node; 1085 struct hlist_node *node;
985 struct inode *inode; 1086 struct inode *inode;
986 1087
1088 spin_lock(&inode_hash_lock);
987 hlist_for_each_entry(inode, node, b, i_hash) { 1089 hlist_for_each_entry(inode, node, b, i_hash) {
988 if (inode->i_ino == ino && inode->i_sb == sb) 1090 if (inode->i_ino == ino && inode->i_sb == sb) {
1091 spin_unlock(&inode_hash_lock);
989 return 0; 1092 return 0;
1093 }
990 } 1094 }
1095 spin_unlock(&inode_hash_lock);
991 1096
992 return 1; 1097 return 1;
993} 1098}
@@ -1017,7 +1122,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
1017 static unsigned int counter; 1122 static unsigned int counter;
1018 ino_t res; 1123 ino_t res;
1019 1124
1020 spin_lock(&inode_lock);
1021 spin_lock(&iunique_lock); 1125 spin_lock(&iunique_lock);
1022 do { 1126 do {
1023 if (counter <= max_reserved) 1127 if (counter <= max_reserved)
@@ -1025,7 +1129,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
1025 res = counter++; 1129 res = counter++;
1026 } while (!test_inode_iunique(sb, res)); 1130 } while (!test_inode_iunique(sb, res));
1027 spin_unlock(&iunique_lock); 1131 spin_unlock(&iunique_lock);
1028 spin_unlock(&inode_lock);
1029 1132
1030 return res; 1133 return res;
1031} 1134}
@@ -1033,116 +1136,50 @@ EXPORT_SYMBOL(iunique);
1033 1136
1034struct inode *igrab(struct inode *inode) 1137struct inode *igrab(struct inode *inode)
1035{ 1138{
1036 spin_lock(&inode_lock); 1139 spin_lock(&inode->i_lock);
1037 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1140 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1038 __iget(inode); 1141 __iget(inode);
1039 else 1142 spin_unlock(&inode->i_lock);
1143 } else {
1144 spin_unlock(&inode->i_lock);
1040 /* 1145 /*
1041 * Handle the case where s_op->clear_inode is not been 1146 * Handle the case where s_op->clear_inode is not been
1042 * called yet, and somebody is calling igrab 1147 * called yet, and somebody is calling igrab
1043 * while the inode is getting freed. 1148 * while the inode is getting freed.
1044 */ 1149 */
1045 inode = NULL; 1150 inode = NULL;
1046 spin_unlock(&inode_lock); 1151 }
1047 return inode; 1152 return inode;
1048} 1153}
1049EXPORT_SYMBOL(igrab); 1154EXPORT_SYMBOL(igrab);
1050 1155
1051/** 1156/**
1052 * ifind - internal function, you want ilookup5() or iget5().
1053 * @sb: super block of file system to search
1054 * @head: the head of the list to search
1055 * @test: callback used for comparisons between inodes
1056 * @data: opaque data pointer to pass to @test
1057 * @wait: if true wait for the inode to be unlocked, if false do not
1058 *
1059 * ifind() searches for the inode specified by @data in the inode
1060 * cache. This is a generalized version of ifind_fast() for file systems where
1061 * the inode number is not sufficient for unique identification of an inode.
1062 *
1063 * If the inode is in the cache, the inode is returned with an incremented
1064 * reference count.
1065 *
1066 * Otherwise NULL is returned.
1067 *
1068 * Note, @test is called with the inode_lock held, so can't sleep.
1069 */
1070static struct inode *ifind(struct super_block *sb,
1071 struct hlist_head *head, int (*test)(struct inode *, void *),
1072 void *data, const int wait)
1073{
1074 struct inode *inode;
1075
1076 spin_lock(&inode_lock);
1077 inode = find_inode(sb, head, test, data);
1078 if (inode) {
1079 spin_unlock(&inode_lock);
1080 if (likely(wait))
1081 wait_on_inode(inode);
1082 return inode;
1083 }
1084 spin_unlock(&inode_lock);
1085 return NULL;
1086}
1087
1088/**
1089 * ifind_fast - internal function, you want ilookup() or iget().
1090 * @sb: super block of file system to search
1091 * @head: head of the list to search
1092 * @ino: inode number to search for
1093 *
1094 * ifind_fast() searches for the inode @ino in the inode cache. This is for
1095 * file systems where the inode number is sufficient for unique identification
1096 * of an inode.
1097 *
1098 * If the inode is in the cache, the inode is returned with an incremented
1099 * reference count.
1100 *
1101 * Otherwise NULL is returned.
1102 */
1103static struct inode *ifind_fast(struct super_block *sb,
1104 struct hlist_head *head, unsigned long ino)
1105{
1106 struct inode *inode;
1107
1108 spin_lock(&inode_lock);
1109 inode = find_inode_fast(sb, head, ino);
1110 if (inode) {
1111 spin_unlock(&inode_lock);
1112 wait_on_inode(inode);
1113 return inode;
1114 }
1115 spin_unlock(&inode_lock);
1116 return NULL;
1117}
1118
1119/**
1120 * ilookup5_nowait - search for an inode in the inode cache 1157 * ilookup5_nowait - search for an inode in the inode cache
1121 * @sb: super block of file system to search 1158 * @sb: super block of file system to search
1122 * @hashval: hash value (usually inode number) to search for 1159 * @hashval: hash value (usually inode number) to search for
1123 * @test: callback used for comparisons between inodes 1160 * @test: callback used for comparisons between inodes
1124 * @data: opaque data pointer to pass to @test 1161 * @data: opaque data pointer to pass to @test
1125 * 1162 *
1126 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1163 * Search for the inode specified by @hashval and @data in the inode cache.
1127 * @data in the inode cache. This is a generalized version of ilookup() for
1128 * file systems where the inode number is not sufficient for unique
1129 * identification of an inode.
1130 *
1131 * If the inode is in the cache, the inode is returned with an incremented 1164 * If the inode is in the cache, the inode is returned with an incremented
1132 * reference count. Note, the inode lock is not waited upon so you have to be 1165 * reference count.
1133 * very careful what you do with the returned inode. You probably should be
1134 * using ilookup5() instead.
1135 * 1166 *
1136 * Otherwise NULL is returned. 1167 * Note: I_NEW is not waited upon so you have to be very careful what you do
1168 * with the returned inode. You probably should be using ilookup5() instead.
1137 * 1169 *
1138 * Note, @test is called with the inode_lock held, so can't sleep. 1170 * Note: @test is called with the inode_hash_lock held, so can't sleep.
1139 */ 1171 */
1140struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1172struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1141 int (*test)(struct inode *, void *), void *data) 1173 int (*test)(struct inode *, void *), void *data)
1142{ 1174{
1143 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1175 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1176 struct inode *inode;
1177
1178 spin_lock(&inode_hash_lock);
1179 inode = find_inode(sb, head, test, data);
1180 spin_unlock(&inode_hash_lock);
1144 1181
1145 return ifind(sb, head, test, data, 0); 1182 return inode;
1146} 1183}
1147EXPORT_SYMBOL(ilookup5_nowait); 1184EXPORT_SYMBOL(ilookup5_nowait);
1148 1185
@@ -1153,24 +1190,24 @@ EXPORT_SYMBOL(ilookup5_nowait);
1153 * @test: callback used for comparisons between inodes 1190 * @test: callback used for comparisons between inodes
1154 * @data: opaque data pointer to pass to @test 1191 * @data: opaque data pointer to pass to @test
1155 * 1192 *
1156 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1193 * Search for the inode specified by @hashval and @data in the inode cache,
1157 * @data in the inode cache. This is a generalized version of ilookup() for 1194 * and if the inode is in the cache, return the inode with an incremented
1158 * file systems where the inode number is not sufficient for unique 1195 * reference count. Waits on I_NEW before returning the inode.
1159 * identification of an inode.
1160 *
1161 * If the inode is in the cache, the inode lock is waited upon and the inode is
1162 * returned with an incremented reference count. 1196 * returned with an incremented reference count.
1163 * 1197 *
1164 * Otherwise NULL is returned. 1198 * This is a generalized version of ilookup() for file systems where the
1199 * inode number is not sufficient for unique identification of an inode.
1165 * 1200 *
1166 * Note, @test is called with the inode_lock held, so can't sleep. 1201 * Note: @test is called with the inode_hash_lock held, so can't sleep.
1167 */ 1202 */
1168struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1203struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1169 int (*test)(struct inode *, void *), void *data) 1204 int (*test)(struct inode *, void *), void *data)
1170{ 1205{
1171 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1206 struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
1172 1207
1173 return ifind(sb, head, test, data, 1); 1208 if (inode)
1209 wait_on_inode(inode);
1210 return inode;
1174} 1211}
1175EXPORT_SYMBOL(ilookup5); 1212EXPORT_SYMBOL(ilookup5);
1176 1213
@@ -1179,91 +1216,23 @@ EXPORT_SYMBOL(ilookup5);
1179 * @sb: super block of file system to search 1216 * @sb: super block of file system to search
1180 * @ino: inode number to search for 1217 * @ino: inode number to search for
1181 * 1218 *
1182 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1219 * Search for the inode @ino in the inode cache, and if the inode is in the
1183 * This is for file systems where the inode number is sufficient for unique 1220 * cache, the inode is returned with an incremented reference count.
1184 * identification of an inode.
1185 *
1186 * If the inode is in the cache, the inode is returned with an incremented
1187 * reference count.
1188 *
1189 * Otherwise NULL is returned.
1190 */ 1221 */
1191struct inode *ilookup(struct super_block *sb, unsigned long ino) 1222struct inode *ilookup(struct super_block *sb, unsigned long ino)
1192{ 1223{
1193 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1224 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1194
1195 return ifind_fast(sb, head, ino);
1196}
1197EXPORT_SYMBOL(ilookup);
1198
1199/**
1200 * iget5_locked - obtain an inode from a mounted file system
1201 * @sb: super block of file system
1202 * @hashval: hash value (usually inode number) to get
1203 * @test: callback used for comparisons between inodes
1204 * @set: callback used to initialize a new struct inode
1205 * @data: opaque data pointer to pass to @test and @set
1206 *
1207 * iget5_locked() uses ifind() to search for the inode specified by @hashval
1208 * and @data in the inode cache and if present it is returned with an increased
1209 * reference count. This is a generalized version of iget_locked() for file
1210 * systems where the inode number is not sufficient for unique identification
1211 * of an inode.
1212 *
1213 * If the inode is not in cache, get_new_inode() is called to allocate a new
1214 * inode and this is returned locked, hashed, and with the I_NEW flag set. The
1215 * file system gets to fill it in before unlocking it via unlock_new_inode().
1216 *
1217 * Note both @test and @set are called with the inode_lock held, so can't sleep.
1218 */
1219struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1220 int (*test)(struct inode *, void *),
1221 int (*set)(struct inode *, void *), void *data)
1222{
1223 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1224 struct inode *inode; 1225 struct inode *inode;
1225 1226
1226 inode = ifind(sb, head, test, data, 1); 1227 spin_lock(&inode_hash_lock);
1227 if (inode) 1228 inode = find_inode_fast(sb, head, ino);
1228 return inode; 1229 spin_unlock(&inode_hash_lock);
1229 /*
1230 * get_new_inode() will do the right thing, re-trying the search
1231 * in case it had to block at any point.
1232 */
1233 return get_new_inode(sb, head, test, set, data);
1234}
1235EXPORT_SYMBOL(iget5_locked);
1236
1237/**
1238 * iget_locked - obtain an inode from a mounted file system
1239 * @sb: super block of file system
1240 * @ino: inode number to get
1241 *
1242 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in
1243 * the inode cache and if present it is returned with an increased reference
1244 * count. This is for file systems where the inode number is sufficient for
1245 * unique identification of an inode.
1246 *
1247 * If the inode is not in cache, get_new_inode_fast() is called to allocate a
1248 * new inode and this is returned locked, hashed, and with the I_NEW flag set.
1249 * The file system gets to fill it in before unlocking it via
1250 * unlock_new_inode().
1251 */
1252struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1253{
1254 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1255 struct inode *inode;
1256 1230
1257 inode = ifind_fast(sb, head, ino);
1258 if (inode) 1231 if (inode)
1259 return inode; 1232 wait_on_inode(inode);
1260 /* 1233 return inode;
1261 * get_new_inode_fast() will do the right thing, re-trying the search
1262 * in case it had to block at any point.
1263 */
1264 return get_new_inode_fast(sb, head, ino);
1265} 1234}
1266EXPORT_SYMBOL(iget_locked); 1235EXPORT_SYMBOL(ilookup);
1267 1236
1268int insert_inode_locked(struct inode *inode) 1237int insert_inode_locked(struct inode *inode)
1269{ 1238{
@@ -1271,27 +1240,33 @@ int insert_inode_locked(struct inode *inode)
1271 ino_t ino = inode->i_ino; 1240 ino_t ino = inode->i_ino;
1272 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1241 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1273 1242
1274 inode->i_state |= I_NEW;
1275 while (1) { 1243 while (1) {
1276 struct hlist_node *node; 1244 struct hlist_node *node;
1277 struct inode *old = NULL; 1245 struct inode *old = NULL;
1278 spin_lock(&inode_lock); 1246 spin_lock(&inode_hash_lock);
1279 hlist_for_each_entry(old, node, head, i_hash) { 1247 hlist_for_each_entry(old, node, head, i_hash) {
1280 if (old->i_ino != ino) 1248 if (old->i_ino != ino)
1281 continue; 1249 continue;
1282 if (old->i_sb != sb) 1250 if (old->i_sb != sb)
1283 continue; 1251 continue;
1284 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1252 spin_lock(&old->i_lock);
1253 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1254 spin_unlock(&old->i_lock);
1285 continue; 1255 continue;
1256 }
1286 break; 1257 break;
1287 } 1258 }
1288 if (likely(!node)) { 1259 if (likely(!node)) {
1260 spin_lock(&inode->i_lock);
1261 inode->i_state |= I_NEW;
1289 hlist_add_head(&inode->i_hash, head); 1262 hlist_add_head(&inode->i_hash, head);
1290 spin_unlock(&inode_lock); 1263 spin_unlock(&inode->i_lock);
1264 spin_unlock(&inode_hash_lock);
1291 return 0; 1265 return 0;
1292 } 1266 }
1293 __iget(old); 1267 __iget(old);
1294 spin_unlock(&inode_lock); 1268 spin_unlock(&old->i_lock);
1269 spin_unlock(&inode_hash_lock);
1295 wait_on_inode(old); 1270 wait_on_inode(old);
1296 if (unlikely(!inode_unhashed(old))) { 1271 if (unlikely(!inode_unhashed(old))) {
1297 iput(old); 1272 iput(old);
@@ -1308,29 +1283,34 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1308 struct super_block *sb = inode->i_sb; 1283 struct super_block *sb = inode->i_sb;
1309 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1284 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1310 1285
1311 inode->i_state |= I_NEW;
1312
1313 while (1) { 1286 while (1) {
1314 struct hlist_node *node; 1287 struct hlist_node *node;
1315 struct inode *old = NULL; 1288 struct inode *old = NULL;
1316 1289
1317 spin_lock(&inode_lock); 1290 spin_lock(&inode_hash_lock);
1318 hlist_for_each_entry(old, node, head, i_hash) { 1291 hlist_for_each_entry(old, node, head, i_hash) {
1319 if (old->i_sb != sb) 1292 if (old->i_sb != sb)
1320 continue; 1293 continue;
1321 if (!test(old, data)) 1294 if (!test(old, data))
1322 continue; 1295 continue;
1323 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1296 spin_lock(&old->i_lock);
1297 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1298 spin_unlock(&old->i_lock);
1324 continue; 1299 continue;
1300 }
1325 break; 1301 break;
1326 } 1302 }
1327 if (likely(!node)) { 1303 if (likely(!node)) {
1304 spin_lock(&inode->i_lock);
1305 inode->i_state |= I_NEW;
1328 hlist_add_head(&inode->i_hash, head); 1306 hlist_add_head(&inode->i_hash, head);
1329 spin_unlock(&inode_lock); 1307 spin_unlock(&inode->i_lock);
1308 spin_unlock(&inode_hash_lock);
1330 return 0; 1309 return 0;
1331 } 1310 }
1332 __iget(old); 1311 __iget(old);
1333 spin_unlock(&inode_lock); 1312 spin_unlock(&old->i_lock);
1313 spin_unlock(&inode_hash_lock);
1334 wait_on_inode(old); 1314 wait_on_inode(old);
1335 if (unlikely(!inode_unhashed(old))) { 1315 if (unlikely(!inode_unhashed(old))) {
1336 iput(old); 1316 iput(old);
@@ -1375,47 +1355,35 @@ static void iput_final(struct inode *inode)
1375 const struct super_operations *op = inode->i_sb->s_op; 1355 const struct super_operations *op = inode->i_sb->s_op;
1376 int drop; 1356 int drop;
1377 1357
1358 WARN_ON(inode->i_state & I_NEW);
1359
1378 if (op && op->drop_inode) 1360 if (op && op->drop_inode)
1379 drop = op->drop_inode(inode); 1361 drop = op->drop_inode(inode);
1380 else 1362 else
1381 drop = generic_drop_inode(inode); 1363 drop = generic_drop_inode(inode);
1382 1364
1365 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1366 inode->i_state |= I_REFERENCED;
1367 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1368 inode_lru_list_add(inode);
1369 spin_unlock(&inode->i_lock);
1370 return;
1371 }
1372
1383 if (!drop) { 1373 if (!drop) {
1384 if (sb->s_flags & MS_ACTIVE) {
1385 inode->i_state |= I_REFERENCED;
1386 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1387 inode_lru_list_add(inode);
1388 }
1389 spin_unlock(&inode_lock);
1390 return;
1391 }
1392 WARN_ON(inode->i_state & I_NEW);
1393 inode->i_state |= I_WILL_FREE; 1374 inode->i_state |= I_WILL_FREE;
1394 spin_unlock(&inode_lock); 1375 spin_unlock(&inode->i_lock);
1395 write_inode_now(inode, 1); 1376 write_inode_now(inode, 1);
1396 spin_lock(&inode_lock); 1377 spin_lock(&inode->i_lock);
1397 WARN_ON(inode->i_state & I_NEW); 1378 WARN_ON(inode->i_state & I_NEW);
1398 inode->i_state &= ~I_WILL_FREE; 1379 inode->i_state &= ~I_WILL_FREE;
1399 __remove_inode_hash(inode);
1400 } 1380 }
1401 1381
1402 WARN_ON(inode->i_state & I_NEW);
1403 inode->i_state |= I_FREEING; 1382 inode->i_state |= I_FREEING;
1404
1405 /*
1406 * Move the inode off the IO lists and LRU once I_FREEING is
1407 * set so that it won't get moved back on there if it is dirty.
1408 */
1409 inode_lru_list_del(inode); 1383 inode_lru_list_del(inode);
1410 list_del_init(&inode->i_wb_list); 1384 spin_unlock(&inode->i_lock);
1411 1385
1412 __inode_sb_list_del(inode);
1413 spin_unlock(&inode_lock);
1414 evict(inode); 1386 evict(inode);
1415 remove_inode_hash(inode);
1416 wake_up_inode(inode);
1417 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1418 destroy_inode(inode);
1419} 1387}
1420 1388
1421/** 1389/**
@@ -1432,7 +1400,7 @@ void iput(struct inode *inode)
1432 if (inode) { 1400 if (inode) {
1433 BUG_ON(inode->i_state & I_CLEAR); 1401 BUG_ON(inode->i_state & I_CLEAR);
1434 1402
1435 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1403 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
1436 iput_final(inode); 1404 iput_final(inode);
1437 } 1405 }
1438} 1406}
@@ -1611,9 +1579,8 @@ EXPORT_SYMBOL(inode_wait);
1611 * to recheck inode state. 1579 * to recheck inode state.
1612 * 1580 *
1613 * It doesn't matter if I_NEW is not set initially, a call to 1581 * It doesn't matter if I_NEW is not set initially, a call to
1614 * wake_up_inode() after removing from the hash list will DTRT. 1582 * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
1615 * 1583 * will DTRT.
1616 * This is called with inode_lock held.
1617 */ 1584 */
1618static void __wait_on_freeing_inode(struct inode *inode) 1585static void __wait_on_freeing_inode(struct inode *inode)
1619{ 1586{
@@ -1621,10 +1588,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
1621 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1588 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1622 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1589 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1623 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1590 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1624 spin_unlock(&inode_lock); 1591 spin_unlock(&inode->i_lock);
1592 spin_unlock(&inode_hash_lock);
1625 schedule(); 1593 schedule();
1626 finish_wait(wq, &wait.wait); 1594 finish_wait(wq, &wait.wait);
1627 spin_lock(&inode_lock); 1595 spin_lock(&inode_hash_lock);
1628} 1596}
1629 1597
1630static __initdata unsigned long ihash_entries; 1598static __initdata unsigned long ihash_entries;