diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/fs/inode.c b/fs/inode.c index b2ba83d2c4e1..76582b06ab97 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/rwsem.h> | ||
17 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
18 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
87 | DEFINE_SPINLOCK(inode_lock); | 88 | DEFINE_SPINLOCK(inode_lock); |
88 | 89 | ||
89 | /* | 90 | /* |
90 | * iprune_mutex provides exclusion between the kswapd or try_to_free_pages | 91 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages |
91 | * icache shrinking path, and the umount path. Without this exclusion, | 92 | * icache shrinking path, and the umount path. Without this exclusion, |
92 | * by the time prune_icache calls iput for the inode whose pages it has | 93 | * by the time prune_icache calls iput for the inode whose pages it has |
93 | * been invalidating, or by the time it calls clear_inode & destroy_inode | 94 | * been invalidating, or by the time it calls clear_inode & destroy_inode |
94 | * from its final dispose_list, the struct super_block they refer to | 95 | * from its final dispose_list, the struct super_block they refer to |
95 | * (for inode->i_sb->s_op) may already have been freed and reused. | 96 | * (for inode->i_sb->s_op) may already have been freed and reused. |
97 | * | ||
98 | * We make this an rwsem because the fastpath is icache shrinking. In | ||
99 | * some cases a filesystem may be doing a significant amount of work in | ||
100 | * its inode reclaim code, so this should improve parallelism. | ||
96 | */ | 101 | */ |
97 | static DEFINE_MUTEX(iprune_mutex); | 102 | static DECLARE_RWSEM(iprune_sem); |
98 | 103 | ||
99 | /* | 104 | /* |
100 | * Statistics gathering.. | 105 | * Statistics gathering.. |
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode) | |||
123 | int inode_init_always(struct super_block *sb, struct inode *inode) | 128 | int inode_init_always(struct super_block *sb, struct inode *inode) |
124 | { | 129 | { |
125 | static const struct address_space_operations empty_aops; | 130 | static const struct address_space_operations empty_aops; |
126 | static struct inode_operations empty_iops; | 131 | static const struct inode_operations empty_iops; |
127 | static const struct file_operations empty_fops; | 132 | static const struct file_operations empty_fops; |
128 | struct address_space *const mapping = &inode->i_data; | 133 | struct address_space *const mapping = &inode->i_data; |
129 | 134 | ||
@@ -381,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
381 | /* | 386 | /* |
382 | * We can reschedule here without worrying about the list's | 387 | * We can reschedule here without worrying about the list's |
383 | * consistency because the per-sb list of inodes must not | 388 | * consistency because the per-sb list of inodes must not |
384 | * change during umount anymore, and because iprune_mutex keeps | 389 | * change during umount anymore, and because iprune_sem keeps |
385 | * shrink_icache_memory() away. | 390 | * shrink_icache_memory() away. |
386 | */ | 391 | */ |
387 | cond_resched_lock(&inode_lock); | 392 | cond_resched_lock(&inode_lock); |
@@ -420,7 +425,7 @@ int invalidate_inodes(struct super_block *sb) | |||
420 | int busy; | 425 | int busy; |
421 | LIST_HEAD(throw_away); | 426 | LIST_HEAD(throw_away); |
422 | 427 | ||
423 | mutex_lock(&iprune_mutex); | 428 | down_write(&iprune_sem); |
424 | spin_lock(&inode_lock); | 429 | spin_lock(&inode_lock); |
425 | inotify_unmount_inodes(&sb->s_inodes); | 430 | inotify_unmount_inodes(&sb->s_inodes); |
426 | fsnotify_unmount_inodes(&sb->s_inodes); | 431 | fsnotify_unmount_inodes(&sb->s_inodes); |
@@ -428,7 +433,7 @@ int invalidate_inodes(struct super_block *sb) | |||
428 | spin_unlock(&inode_lock); | 433 | spin_unlock(&inode_lock); |
429 | 434 | ||
430 | dispose_list(&throw_away); | 435 | dispose_list(&throw_away); |
431 | mutex_unlock(&iprune_mutex); | 436 | up_write(&iprune_sem); |
432 | 437 | ||
433 | return busy; | 438 | return busy; |
434 | } | 439 | } |
@@ -467,7 +472,7 @@ static void prune_icache(int nr_to_scan) | |||
467 | int nr_scanned; | 472 | int nr_scanned; |
468 | unsigned long reap = 0; | 473 | unsigned long reap = 0; |
469 | 474 | ||
470 | mutex_lock(&iprune_mutex); | 475 | down_read(&iprune_sem); |
471 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
472 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 477 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
473 | struct inode *inode; | 478 | struct inode *inode; |
@@ -509,7 +514,7 @@ static void prune_icache(int nr_to_scan) | |||
509 | spin_unlock(&inode_lock); | 514 | spin_unlock(&inode_lock); |
510 | 515 | ||
511 | dispose_list(&freeable); | 516 | dispose_list(&freeable); |
512 | mutex_unlock(&iprune_mutex); | 517 | up_read(&iprune_sem); |
513 | } | 518 | } |
514 | 519 | ||
515 | /* | 520 | /* |
@@ -695,13 +700,15 @@ void unlock_new_inode(struct inode *inode) | |||
695 | } | 700 | } |
696 | #endif | 701 | #endif |
697 | /* | 702 | /* |
698 | * This is special! We do not need the spinlock | 703 | * This is special! We do not need the spinlock when clearing I_LOCK, |
699 | * when clearing I_LOCK, because we're guaranteed | 704 | * because we're guaranteed that nobody else tries to do anything about |
700 | * that nobody else tries to do anything about the | 705 | * the state of the inode when it is locked, as we just created it (so |
701 | * state of the inode when it is locked, as we | 706 | * there can be no old holders that haven't tested I_LOCK). |
702 | * just created it (so there can be no old holders | 707 | * However we must emit the memory barrier so that other CPUs reliably |
703 | * that haven't tested I_LOCK). | 708 | * see the clearing of I_LOCK after the other inode initialisation has |
709 | * completed. | ||
704 | */ | 710 | */ |
711 | smp_mb(); | ||
705 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); | 712 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); |
706 | inode->i_state &= ~(I_LOCK|I_NEW); | 713 | inode->i_state &= ~(I_LOCK|I_NEW); |
707 | wake_up_inode(inode); | 714 | wake_up_inode(inode); |