aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-03-22 07:23:40 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-03-24 21:16:32 -0400
commit55fa6091d83160ca772fc37cebae45d42695a708 (patch)
tree4df49f372032e30449e1a2dd64daf443e20b781c
parentf283c86afe6aa70b733d1ecebad5d9464943b774 (diff)
fs: move i_sb_list out from under inode_lock
Protect the per-sb inode list with a new global lock inode_sb_list_lock and use it to protect the list manipulations and traversals. This lock replaces the inode_lock as the inodes on the list can be validity checked while holding the inode->i_lock and hence the inode_lock is no longer needed to protect the list. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/drop_caches.c9
-rw-r--r--fs/fs-writeback.c21
-rw-r--r--fs/inode.c43
-rw-r--r--fs/internal.h2
-rw-r--r--fs/notify/inode_mark.c20
-rw-r--r--fs/quota/dquot.c28
6 files changed, 67 insertions, 56 deletions
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 6c6f73ba0868..98b77c89494c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,6 +8,7 @@
8#include <linux/writeback.h> 8#include <linux/writeback.h>
9#include <linux/sysctl.h> 9#include <linux/sysctl.h>
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include "internal.h"
11 12
12/* A global variable is a bit ugly, but it keeps the code simple */ 13/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches; 14int sysctl_drop_caches;
@@ -16,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
16{ 17{
17 struct inode *inode, *toput_inode = NULL; 18 struct inode *inode, *toput_inode = NULL;
18 19
19 spin_lock(&inode_lock); 20 spin_lock(&inode_sb_list_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 spin_lock(&inode->i_lock); 22 spin_lock(&inode->i_lock);
22 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 23 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -26,13 +27,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
26 } 27 }
27 __iget(inode); 28 __iget(inode);
28 spin_unlock(&inode->i_lock); 29 spin_unlock(&inode->i_lock);
29 spin_unlock(&inode_lock); 30 spin_unlock(&inode_sb_list_lock);
30 invalidate_mapping_pages(inode->i_mapping, 0, -1); 31 invalidate_mapping_pages(inode->i_mapping, 0, -1);
31 iput(toput_inode); 32 iput(toput_inode);
32 toput_inode = inode; 33 toput_inode = inode;
33 spin_lock(&inode_lock); 34 spin_lock(&inode_sb_list_lock);
34 } 35 }
35 spin_unlock(&inode_lock); 36 spin_unlock(&inode_sb_list_lock);
36 iput(toput_inode); 37 iput(toput_inode);
37} 38}
38 39
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index efd1ebe879cc..5de56a2182bb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1123,7 +1123,7 @@ static void wait_sb_inodes(struct super_block *sb)
1123 */ 1123 */
1124 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1124 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1125 1125
1126 spin_lock(&inode_lock); 1126 spin_lock(&inode_sb_list_lock);
1127 1127
1128 /* 1128 /*
1129 * Data integrity sync. Must wait for all pages under writeback, 1129 * Data integrity sync. Must wait for all pages under writeback,
@@ -1143,14 +1143,15 @@ static void wait_sb_inodes(struct super_block *sb)
1143 } 1143 }
1144 __iget(inode); 1144 __iget(inode);
1145 spin_unlock(&inode->i_lock); 1145 spin_unlock(&inode->i_lock);
1146 spin_unlock(&inode_lock); 1146 spin_unlock(&inode_sb_list_lock);
1147
1147 /* 1148 /*
1148 * We hold a reference to 'inode' so it couldn't have 1149 * We hold a reference to 'inode' so it couldn't have been
1149 * been removed from s_inodes list while we dropped the 1150 * removed from s_inodes list while we dropped the
1150 * inode_lock. We cannot iput the inode now as we can 1151 * inode_sb_list_lock. We cannot iput the inode now as we can
1151 * be holding the last reference and we cannot iput it 1152 * be holding the last reference and we cannot iput it under
1152 * under inode_lock. So we keep the reference and iput 1153 * inode_sb_list_lock. So we keep the reference and iput it
1153 * it later. 1154 * later.
1154 */ 1155 */
1155 iput(old_inode); 1156 iput(old_inode);
1156 old_inode = inode; 1157 old_inode = inode;
@@ -1159,9 +1160,9 @@ static void wait_sb_inodes(struct super_block *sb)
1159 1160
1160 cond_resched(); 1161 cond_resched();
1161 1162
1162 spin_lock(&inode_lock); 1163 spin_lock(&inode_sb_list_lock);
1163 } 1164 }
1164 spin_unlock(&inode_lock); 1165 spin_unlock(&inode_sb_list_lock);
1165 iput(old_inode); 1166 iput(old_inode);
1166} 1167}
1167 1168
diff --git a/fs/inode.c b/fs/inode.c
index 389f5a247599..785b1ab23ff0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -34,10 +34,15 @@
34 * inode->i_state, inode->i_hash, __iget() 34 * inode->i_state, inode->i_hash, __iget()
35 * inode_lru_lock protects: 35 * inode_lru_lock protects:
36 * inode_lru, inode->i_lru 36 * inode_lru, inode->i_lru
37 * inode_sb_list_lock protects:
38 * sb->s_inodes, inode->i_sb_list
37 * 39 *
38 * Lock ordering: 40 * Lock ordering:
39 * inode_lock 41 * inode_lock
40 * inode->i_lock 42 * inode->i_lock
43 *
44 * inode_sb_list_lock
45 * inode->i_lock
41 * inode_lru_lock 46 * inode_lru_lock
42 */ 47 */
43 48
@@ -99,6 +104,8 @@ static struct hlist_head *inode_hashtable __read_mostly;
99 */ 104 */
100DEFINE_SPINLOCK(inode_lock); 105DEFINE_SPINLOCK(inode_lock);
101 106
107__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
108
102/* 109/*
103 * iprune_sem provides exclusion between the icache shrinking and the 110 * iprune_sem provides exclusion between the icache shrinking and the
104 * umount path. 111 * umount path.
@@ -378,26 +385,23 @@ static void inode_lru_list_del(struct inode *inode)
378 spin_unlock(&inode_lru_lock); 385 spin_unlock(&inode_lru_lock);
379} 386}
380 387
381static inline void __inode_sb_list_add(struct inode *inode)
382{
383 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
384}
385
386/** 388/**
387 * inode_sb_list_add - add inode to the superblock list of inodes 389 * inode_sb_list_add - add inode to the superblock list of inodes
388 * @inode: inode to add 390 * @inode: inode to add
389 */ 391 */
390void inode_sb_list_add(struct inode *inode) 392void inode_sb_list_add(struct inode *inode)
391{ 393{
392 spin_lock(&inode_lock); 394 spin_lock(&inode_sb_list_lock);
393 __inode_sb_list_add(inode); 395 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
394 spin_unlock(&inode_lock); 396 spin_unlock(&inode_sb_list_lock);
395} 397}
396EXPORT_SYMBOL_GPL(inode_sb_list_add); 398EXPORT_SYMBOL_GPL(inode_sb_list_add);
397 399
398static inline void __inode_sb_list_del(struct inode *inode) 400static inline void inode_sb_list_del(struct inode *inode)
399{ 401{
402 spin_lock(&inode_sb_list_lock);
400 list_del_init(&inode->i_sb_list); 403 list_del_init(&inode->i_sb_list);
404 spin_unlock(&inode_sb_list_lock);
401} 405}
402 406
403static unsigned long hash(struct super_block *sb, unsigned long hashval) 407static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -481,9 +485,10 @@ static void evict(struct inode *inode)
481 485
482 spin_lock(&inode_lock); 486 spin_lock(&inode_lock);
483 list_del_init(&inode->i_wb_list); 487 list_del_init(&inode->i_wb_list);
484 __inode_sb_list_del(inode);
485 spin_unlock(&inode_lock); 488 spin_unlock(&inode_lock);
486 489
490 inode_sb_list_del(inode);
491
487 if (op->evict_inode) { 492 if (op->evict_inode) {
488 op->evict_inode(inode); 493 op->evict_inode(inode);
489 } else { 494 } else {
@@ -539,7 +544,7 @@ void evict_inodes(struct super_block *sb)
539 struct inode *inode, *next; 544 struct inode *inode, *next;
540 LIST_HEAD(dispose); 545 LIST_HEAD(dispose);
541 546
542 spin_lock(&inode_lock); 547 spin_lock(&inode_sb_list_lock);
543 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 548 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
544 if (atomic_read(&inode->i_count)) 549 if (atomic_read(&inode->i_count))
545 continue; 550 continue;
@@ -555,7 +560,7 @@ void evict_inodes(struct super_block *sb)
555 spin_unlock(&inode->i_lock); 560 spin_unlock(&inode->i_lock);
556 list_add(&inode->i_lru, &dispose); 561 list_add(&inode->i_lru, &dispose);
557 } 562 }
558 spin_unlock(&inode_lock); 563 spin_unlock(&inode_sb_list_lock);
559 564
560 dispose_list(&dispose); 565 dispose_list(&dispose);
561 566
@@ -584,7 +589,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
584 struct inode *inode, *next; 589 struct inode *inode, *next;
585 LIST_HEAD(dispose); 590 LIST_HEAD(dispose);
586 591
587 spin_lock(&inode_lock); 592 spin_lock(&inode_sb_list_lock);
588 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 593 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
589 spin_lock(&inode->i_lock); 594 spin_lock(&inode->i_lock);
590 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 595 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -607,7 +612,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
607 spin_unlock(&inode->i_lock); 612 spin_unlock(&inode->i_lock);
608 list_add(&inode->i_lru, &dispose); 613 list_add(&inode->i_lru, &dispose);
609 } 614 }
610 spin_unlock(&inode_lock); 615 spin_unlock(&inode_sb_list_lock);
611 616
612 dispose_list(&dispose); 617 dispose_list(&dispose);
613 618
@@ -867,16 +872,14 @@ struct inode *new_inode(struct super_block *sb)
867{ 872{
868 struct inode *inode; 873 struct inode *inode;
869 874
870 spin_lock_prefetch(&inode_lock); 875 spin_lock_prefetch(&inode_sb_list_lock);
871 876
872 inode = alloc_inode(sb); 877 inode = alloc_inode(sb);
873 if (inode) { 878 if (inode) {
874 spin_lock(&inode_lock);
875 spin_lock(&inode->i_lock); 879 spin_lock(&inode->i_lock);
876 inode->i_state = 0; 880 inode->i_state = 0;
877 spin_unlock(&inode->i_lock); 881 spin_unlock(&inode->i_lock);
878 __inode_sb_list_add(inode); 882 inode_sb_list_add(inode);
879 spin_unlock(&inode_lock);
880 } 883 }
881 return inode; 884 return inode;
882} 885}
@@ -945,7 +948,7 @@ static struct inode *get_new_inode(struct super_block *sb,
945 inode->i_state = I_NEW; 948 inode->i_state = I_NEW;
946 hlist_add_head(&inode->i_hash, head); 949 hlist_add_head(&inode->i_hash, head);
947 spin_unlock(&inode->i_lock); 950 spin_unlock(&inode->i_lock);
948 __inode_sb_list_add(inode); 951 inode_sb_list_add(inode);
949 spin_unlock(&inode_lock); 952 spin_unlock(&inode_lock);
950 953
951 /* Return the locked inode with I_NEW set, the 954 /* Return the locked inode with I_NEW set, the
@@ -994,7 +997,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
994 inode->i_state = I_NEW; 997 inode->i_state = I_NEW;
995 hlist_add_head(&inode->i_hash, head); 998 hlist_add_head(&inode->i_hash, head);
996 spin_unlock(&inode->i_lock); 999 spin_unlock(&inode->i_lock);
997 __inode_sb_list_add(inode); 1000 inode_sb_list_add(inode);
998 spin_unlock(&inode_lock); 1001 spin_unlock(&inode_lock);
999 1002
1000 /* Return the locked inode with I_NEW set, the 1003 /* Return the locked inode with I_NEW set, the
diff --git a/fs/internal.h b/fs/internal.h
index 8318059b42c6..7013ae0c88c1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,6 +125,8 @@ extern long do_handle_open(int mountdirfd,
125/* 125/*
126 * inode.c 126 * inode.c
127 */ 127 */
128extern spinlock_t inode_sb_list_lock;
129
128extern int get_nr_dirty_inodes(void); 130extern int get_nr_dirty_inodes(void);
129extern void evict_inodes(struct super_block *); 131extern void evict_inodes(struct super_block *);
130extern int invalidate_inodes(struct super_block *, bool); 132extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4dd53fb44124..fb3b3c5ef0ee 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -29,6 +29,8 @@
29#include <linux/fsnotify_backend.h> 29#include <linux/fsnotify_backend.h>
30#include "fsnotify.h" 30#include "fsnotify.h"
31 31
32#include "../internal.h"
33
32/* 34/*
33 * Recalculate the mask of events relevant to a given inode locked. 35 * Recalculate the mask of events relevant to a given inode locked.
34 */ 36 */
@@ -237,15 +239,14 @@ out:
237 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 239 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
238 * @list: list of inodes being unmounted (sb->s_inodes) 240 * @list: list of inodes being unmounted (sb->s_inodes)
239 * 241 *
240 * Called with inode_lock held, protecting the unmounting super block's list 242 * Called during unmount with no locks held, so needs to be safe against
241 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. 243 * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
242 * We temporarily drop inode_lock, however, and CAN block.
243 */ 244 */
244void fsnotify_unmount_inodes(struct list_head *list) 245void fsnotify_unmount_inodes(struct list_head *list)
245{ 246{
246 struct inode *inode, *next_i, *need_iput = NULL; 247 struct inode *inode, *next_i, *need_iput = NULL;
247 248
248 spin_lock(&inode_lock); 249 spin_lock(&inode_sb_list_lock);
249 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 250 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
250 struct inode *need_iput_tmp; 251 struct inode *need_iput_tmp;
251 252
@@ -293,12 +294,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
293 } 294 }
294 295
295 /* 296 /*
296 * We can safely drop inode_lock here because we hold 297 * We can safely drop inode_sb_list_lock here because we hold
297 * references on both inode and next_i. Also no new inodes 298 * references on both inode and next_i. Also no new inodes
298 * will be added since the umount has begun. Finally, 299 * will be added since the umount has begun.
299 * iprune_mutex keeps shrink_icache_memory() away.
300 */ 300 */
301 spin_unlock(&inode_lock); 301 spin_unlock(&inode_sb_list_lock);
302 302
303 if (need_iput_tmp) 303 if (need_iput_tmp)
304 iput(need_iput_tmp); 304 iput(need_iput_tmp);
@@ -310,7 +310,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
310 310
311 iput(inode); 311 iput(inode);
312 312
313 spin_lock(&inode_lock); 313 spin_lock(&inode_sb_list_lock);
314 } 314 }
315 spin_unlock(&inode_lock); 315 spin_unlock(&inode_sb_list_lock);
316} 316}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a1470fda366c..fcc8ae75d874 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -76,7 +76,7 @@
76#include <linux/buffer_head.h> 76#include <linux/buffer_head.h>
77#include <linux/capability.h> 77#include <linux/capability.h>
78#include <linux/quotaops.h> 78#include <linux/quotaops.h>
79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 79#include "../internal.h" /* ugh */
80 80
81#include <asm/uaccess.h> 81#include <asm/uaccess.h>
82 82
@@ -900,7 +900,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
900 int reserved = 0; 900 int reserved = 0;
901#endif 901#endif
902 902
903 spin_lock(&inode_lock); 903 spin_lock(&inode_sb_list_lock);
904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
905 spin_lock(&inode->i_lock); 905 spin_lock(&inode->i_lock);
906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -915,19 +915,23 @@ static void add_dquot_ref(struct super_block *sb, int type)
915#endif 915#endif
916 __iget(inode); 916 __iget(inode);
917 spin_unlock(&inode->i_lock); 917 spin_unlock(&inode->i_lock);
918 spin_unlock(&inode_lock); 918 spin_unlock(&inode_sb_list_lock);
919 919
920 iput(old_inode); 920 iput(old_inode);
921 __dquot_initialize(inode, type); 921 __dquot_initialize(inode, type);
922 /* We hold a reference to 'inode' so it couldn't have been 922
923 * removed from s_inodes list while we dropped the inode_lock. 923 /*
924 * We cannot iput the inode now as we can be holding the last 924 * We hold a reference to 'inode' so it couldn't have been
925 * reference and we cannot iput it under inode_lock. So we 925 * removed from s_inodes list while we dropped the
926 * keep the reference and iput it later. */ 926 * inode_sb_list_lock We cannot iput the inode now as we can be
927 * holding the last reference and we cannot iput it under
928 * inode_sb_list_lock. So we keep the reference and iput it
929 * later.
930 */
927 old_inode = inode; 931 old_inode = inode;
928 spin_lock(&inode_lock); 932 spin_lock(&inode_sb_list_lock);
929 } 933 }
930 spin_unlock(&inode_lock); 934 spin_unlock(&inode_sb_list_lock);
931 iput(old_inode); 935 iput(old_inode);
932 936
933#ifdef CONFIG_QUOTA_DEBUG 937#ifdef CONFIG_QUOTA_DEBUG
@@ -1008,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1008 struct inode *inode; 1012 struct inode *inode;
1009 int reserved = 0; 1013 int reserved = 0;
1010 1014
1011 spin_lock(&inode_lock); 1015 spin_lock(&inode_sb_list_lock);
1012 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1016 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1013 /* 1017 /*
1014 * We have to scan also I_NEW inodes because they can already 1018 * We have to scan also I_NEW inodes because they can already
@@ -1022,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1022 remove_inode_dquot_ref(inode, type, tofree_head); 1026 remove_inode_dquot_ref(inode, type, tofree_head);
1023 } 1027 }
1024 } 1028 }
1025 spin_unlock(&inode_lock); 1029 spin_unlock(&inode_sb_list_lock);
1026#ifdef CONFIG_QUOTA_DEBUG 1030#ifdef CONFIG_QUOTA_DEBUG
1027 if (reserved) { 1031 if (reserved) {
1028 printk(KERN_WARNING "VFS (%s): Writes happened after quota" 1032 printk(KERN_WARNING "VFS (%s): Writes happened after quota"