diff options
author | Joern Engel <joern@wohnheim.fh-wedel.de> | 2007-10-17 02:30:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-17 11:43:02 -0400 |
commit | 1c0eeaf5698597146ed9b873e2f9e0961edcf0f9 (patch) | |
tree | 5265eac8437e8ce517a62db8fe2bd99db5b7019b /fs | |
parent | 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b (diff) |
introduce I_SYNC
I_LOCK was used for several unrelated purposes, which caused deadlock
situations in certain filesystems as a side effect. One of the purposes
now uses the new I_SYNC bit.
Also document the various bits and change their order from historical to
logical.
[bunk@stusta.de: make fs/inode.c:wake_up_inode() static]
Signed-off-by: Joern Engel <joern@wohnheim.fh-wedel.de>
Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: David Chinner <dgc@sgi.com>
Cc: Anton Altaparmakov <aia21@cam.ac.uk>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fs-writeback.c | 39 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 2 | ||||
-rw-r--r-- | fs/inode.c | 24 | ||||
-rw-r--r-- | fs/jfs/jfs_txnmgr.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 4 |
5 files changed, 47 insertions, 31 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 71c158ac60a3..686734ff973d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -100,11 +100,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
100 | inode->i_state |= flags; | 100 | inode->i_state |= flags; |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * If the inode is locked, just update its dirty state. | 103 | * If the inode is being synced, just update its dirty state. |
104 | * The unlocker will place the inode on the appropriate | 104 | * The unlocker will place the inode on the appropriate |
105 | * superblock list, based upon its state. | 105 | * superblock list, based upon its state. |
106 | */ | 106 | */ |
107 | if (inode->i_state & I_LOCK) | 107 | if (inode->i_state & I_SYNC) |
108 | goto out; | 108 | goto out; |
109 | 109 | ||
110 | /* | 110 | /* |
@@ -172,6 +172,15 @@ static void requeue_io(struct inode *inode) | |||
172 | list_move(&inode->i_list, &inode->i_sb->s_more_io); | 172 | list_move(&inode->i_list, &inode->i_sb->s_more_io); |
173 | } | 173 | } |
174 | 174 | ||
175 | static void inode_sync_complete(struct inode *inode) | ||
176 | { | ||
177 | /* | ||
178 | * Prevent speculative execution through spin_unlock(&inode_lock); | ||
179 | */ | ||
180 | smp_mb(); | ||
181 | wake_up_bit(&inode->i_state, __I_SYNC); | ||
182 | } | ||
183 | |||
175 | /* | 184 | /* |
176 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. | 185 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. |
177 | */ | 186 | */ |
@@ -225,11 +234,11 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
225 | int wait = wbc->sync_mode == WB_SYNC_ALL; | 234 | int wait = wbc->sync_mode == WB_SYNC_ALL; |
226 | int ret; | 235 | int ret; |
227 | 236 | ||
228 | BUG_ON(inode->i_state & I_LOCK); | 237 | BUG_ON(inode->i_state & I_SYNC); |
229 | 238 | ||
230 | /* Set I_LOCK, reset I_DIRTY */ | 239 | /* Set I_SYNC, reset I_DIRTY */ |
231 | dirty = inode->i_state & I_DIRTY; | 240 | dirty = inode->i_state & I_DIRTY; |
232 | inode->i_state |= I_LOCK; | 241 | inode->i_state |= I_SYNC; |
233 | inode->i_state &= ~I_DIRTY; | 242 | inode->i_state &= ~I_DIRTY; |
234 | 243 | ||
235 | spin_unlock(&inode_lock); | 244 | spin_unlock(&inode_lock); |
@@ -250,7 +259,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
250 | } | 259 | } |
251 | 260 | ||
252 | spin_lock(&inode_lock); | 261 | spin_lock(&inode_lock); |
253 | inode->i_state &= ~I_LOCK; | 262 | inode->i_state &= ~I_SYNC; |
254 | if (!(inode->i_state & I_FREEING)) { | 263 | if (!(inode->i_state & I_FREEING)) { |
255 | if (!(inode->i_state & I_DIRTY) && | 264 | if (!(inode->i_state & I_DIRTY) && |
256 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 265 | mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
@@ -305,7 +314,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
305 | list_move(&inode->i_list, &inode_unused); | 314 | list_move(&inode->i_list, &inode_unused); |
306 | } | 315 | } |
307 | } | 316 | } |
308 | wake_up_inode(inode); | 317 | inode_sync_complete(inode); |
309 | return ret; | 318 | return ret; |
310 | } | 319 | } |
311 | 320 | ||
@@ -324,7 +333,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
324 | else | 333 | else |
325 | WARN_ON(inode->i_state & I_WILL_FREE); | 334 | WARN_ON(inode->i_state & I_WILL_FREE); |
326 | 335 | ||
327 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { | 336 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { |
328 | struct address_space *mapping = inode->i_mapping; | 337 | struct address_space *mapping = inode->i_mapping; |
329 | int ret; | 338 | int ret; |
330 | 339 | ||
@@ -350,16 +359,16 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
350 | /* | 359 | /* |
351 | * It's a data-integrity sync. We must wait. | 360 | * It's a data-integrity sync. We must wait. |
352 | */ | 361 | */ |
353 | if (inode->i_state & I_LOCK) { | 362 | if (inode->i_state & I_SYNC) { |
354 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LOCK); | 363 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); |
355 | 364 | ||
356 | wqh = bit_waitqueue(&inode->i_state, __I_LOCK); | 365 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
357 | do { | 366 | do { |
358 | spin_unlock(&inode_lock); | 367 | spin_unlock(&inode_lock); |
359 | __wait_on_bit(wqh, &wq, inode_wait, | 368 | __wait_on_bit(wqh, &wq, inode_wait, |
360 | TASK_UNINTERRUPTIBLE); | 369 | TASK_UNINTERRUPTIBLE); |
361 | spin_lock(&inode_lock); | 370 | spin_lock(&inode_lock); |
362 | } while (inode->i_state & I_LOCK); | 371 | } while (inode->i_state & I_SYNC); |
363 | } | 372 | } |
364 | return __sync_single_inode(inode, wbc); | 373 | return __sync_single_inode(inode, wbc); |
365 | } | 374 | } |
@@ -392,7 +401,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
392 | * The inodes to be written are parked on sb->s_io. They are moved back onto | 401 | * The inodes to be written are parked on sb->s_io. They are moved back onto |
393 | * sb->s_dirty as they are selected for writing. This way, none can be missed | 402 | * sb->s_dirty as they are selected for writing. This way, none can be missed |
394 | * on the writer throttling path, and we get decent balancing between many | 403 | * on the writer throttling path, and we get decent balancing between many |
395 | * throttled threads: we don't want them all piling up on __wait_on_inode. | 404 | * throttled threads: we don't want them all piling up on inode_sync_wait. |
396 | */ | 405 | */ |
397 | static void | 406 | static void |
398 | sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | 407 | sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) |
@@ -661,7 +670,7 @@ int write_inode_now(struct inode *inode, int sync) | |||
661 | ret = __writeback_single_inode(inode, &wbc); | 670 | ret = __writeback_single_inode(inode, &wbc); |
662 | spin_unlock(&inode_lock); | 671 | spin_unlock(&inode_lock); |
663 | if (sync) | 672 | if (sync) |
664 | wait_on_inode(inode); | 673 | inode_sync_wait(inode); |
665 | return ret; | 674 | return ret; |
666 | } | 675 | } |
667 | EXPORT_SYMBOL(write_inode_now); | 676 | EXPORT_SYMBOL(write_inode_now); |
@@ -736,7 +745,7 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int | |||
736 | err = err2; | 745 | err = err2; |
737 | } | 746 | } |
738 | else | 747 | else |
739 | wait_on_inode(inode); | 748 | inode_sync_wait(inode); |
740 | 749 | ||
741 | return err; | 750 | return err; |
742 | } | 751 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6bf6890f0530..0f5df73dbb73 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -384,7 +384,7 @@ static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) | |||
384 | struct super_block *sb = inode->i_sb; | 384 | struct super_block *sb = inode->i_sb; |
385 | 385 | ||
386 | if (!hlist_unhashed(&inode->i_hash)) { | 386 | if (!hlist_unhashed(&inode->i_hash)) { |
387 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) | 387 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
388 | list_move(&inode->i_list, &inode_unused); | 388 | list_move(&inode->i_list, &inode_unused); |
389 | inodes_stat.nr_unused++; | 389 | inodes_stat.nr_unused++; |
390 | if (!sb || (sb->s_flags & MS_ACTIVE)) { | 390 | if (!sb || (sb->s_flags & MS_ACTIVE)) { |
diff --git a/fs/inode.c b/fs/inode.c index c6165771e00e..ed35383d0b6c 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -99,6 +99,15 @@ struct inodes_stat_t inodes_stat; | |||
99 | 99 | ||
100 | static struct kmem_cache * inode_cachep __read_mostly; | 100 | static struct kmem_cache * inode_cachep __read_mostly; |
101 | 101 | ||
102 | static void wake_up_inode(struct inode *inode) | ||
103 | { | ||
104 | /* | ||
105 | * Prevent speculative execution through spin_unlock(&inode_lock); | ||
106 | */ | ||
107 | smp_mb(); | ||
108 | wake_up_bit(&inode->i_state, __I_LOCK); | ||
109 | } | ||
110 | |||
102 | static struct inode *alloc_inode(struct super_block *sb) | 111 | static struct inode *alloc_inode(struct super_block *sb) |
103 | { | 112 | { |
104 | static const struct address_space_operations empty_aops; | 113 | static const struct address_space_operations empty_aops; |
@@ -232,7 +241,7 @@ void __iget(struct inode * inode) | |||
232 | return; | 241 | return; |
233 | } | 242 | } |
234 | atomic_inc(&inode->i_count); | 243 | atomic_inc(&inode->i_count); |
235 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) | 244 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
236 | list_move(&inode->i_list, &inode_in_use); | 245 | list_move(&inode->i_list, &inode_in_use); |
237 | inodes_stat.nr_unused--; | 246 | inodes_stat.nr_unused--; |
238 | } | 247 | } |
@@ -253,7 +262,7 @@ void clear_inode(struct inode *inode) | |||
253 | BUG_ON(inode->i_data.nrpages); | 262 | BUG_ON(inode->i_data.nrpages); |
254 | BUG_ON(!(inode->i_state & I_FREEING)); | 263 | BUG_ON(!(inode->i_state & I_FREEING)); |
255 | BUG_ON(inode->i_state & I_CLEAR); | 264 | BUG_ON(inode->i_state & I_CLEAR); |
256 | wait_on_inode(inode); | 265 | inode_sync_wait(inode); |
257 | DQUOT_DROP(inode); | 266 | DQUOT_DROP(inode); |
258 | if (inode->i_sb->s_op->clear_inode) | 267 | if (inode->i_sb->s_op->clear_inode) |
259 | inode->i_sb->s_op->clear_inode(inode); | 268 | inode->i_sb->s_op->clear_inode(inode); |
@@ -1071,7 +1080,7 @@ static void generic_forget_inode(struct inode *inode) | |||
1071 | struct super_block *sb = inode->i_sb; | 1080 | struct super_block *sb = inode->i_sb; |
1072 | 1081 | ||
1073 | if (!hlist_unhashed(&inode->i_hash)) { | 1082 | if (!hlist_unhashed(&inode->i_hash)) { |
1074 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) | 1083 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
1075 | list_move(&inode->i_list, &inode_unused); | 1084 | list_move(&inode->i_list, &inode_unused); |
1076 | inodes_stat.nr_unused++; | 1085 | inodes_stat.nr_unused++; |
1077 | if (sb->s_flags & MS_ACTIVE) { | 1086 | if (sb->s_flags & MS_ACTIVE) { |
@@ -1314,15 +1323,6 @@ static void __wait_on_freeing_inode(struct inode *inode) | |||
1314 | spin_lock(&inode_lock); | 1323 | spin_lock(&inode_lock); |
1315 | } | 1324 | } |
1316 | 1325 | ||
1317 | void wake_up_inode(struct inode *inode) | ||
1318 | { | ||
1319 | /* | ||
1320 | * Prevent speculative execution through spin_unlock(&inode_lock); | ||
1321 | */ | ||
1322 | smp_mb(); | ||
1323 | wake_up_bit(&inode->i_state, __I_LOCK); | ||
1324 | } | ||
1325 | |||
1326 | /* | 1326 | /* |
1327 | * We rarely want to lock two inodes that do not have a parent/child | 1327 | * We rarely want to lock two inodes that do not have a parent/child |
1328 | * relationship (such as directory, child inode) simultaneously. The | 1328 | * relationship (such as directory, child inode) simultaneously. The |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 7aa1f7004eaf..e7c60ae6b5b2 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -1289,7 +1289,14 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
1289 | * commit the transaction synchronously, so the last iput | 1289 | * commit the transaction synchronously, so the last iput |
1290 | * will be done by the calling thread (or later) | 1290 | * will be done by the calling thread (or later) |
1291 | */ | 1291 | */ |
1292 | if (tblk->u.ip->i_state & I_LOCK) | 1292 | /* |
1293 | * I believe this code is no longer needed. Splitting I_LOCK | ||
1294 | * into two bits, I_LOCK and I_SYNC should prevent this | ||
1295 | * deadlock as well. But since I don't have a JFS testload | ||
1296 | * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. | ||
1297 | * Joern | ||
1298 | */ | ||
1299 | if (tblk->u.ip->i_state & I_SYNC) | ||
1293 | tblk->xflag &= ~COMMIT_LAZY; | 1300 | tblk->xflag &= ~COMMIT_LAZY; |
1294 | } | 1301 | } |
1295 | 1302 | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 0b5fa124bef2..e0e06dd4bef2 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -133,7 +133,7 @@ xfs_ichgtime( | |||
133 | */ | 133 | */ |
134 | SYNCHRONIZE(); | 134 | SYNCHRONIZE(); |
135 | ip->i_update_core = 1; | 135 | ip->i_update_core = 1; |
136 | if (!(inode->i_state & I_LOCK)) | 136 | if (!(inode->i_state & I_SYNC)) |
137 | mark_inode_dirty_sync(inode); | 137 | mark_inode_dirty_sync(inode); |
138 | } | 138 | } |
139 | 139 | ||
@@ -185,7 +185,7 @@ xfs_ichgtime_fast( | |||
185 | */ | 185 | */ |
186 | SYNCHRONIZE(); | 186 | SYNCHRONIZE(); |
187 | ip->i_update_core = 1; | 187 | ip->i_update_core = 1; |
188 | if (!(inode->i_state & I_LOCK)) | 188 | if (!(inode->i_state & I_SYNC)) |
189 | mark_inode_dirty_sync(inode); | 189 | mark_inode_dirty_sync(inode); |
190 | } | 190 | } |
191 | 191 | ||