diff options
author | Andrea Arcangeli <andrea@suse.de> | 2005-10-30 18:03:05 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 20:37:26 -0500 |
commit | 7f04c26d715a2467a49a2384268de8f70f787b51 (patch) | |
tree | 8507e9f705f063d996857789261b762b24fad94f /fs/fs-writeback.c | |
parent | 52303e8b5f8aa234865d40d76ea16b0ff4b27022 (diff) |
[PATCH] fix nr_unused accounting, and avoid recursing in iput with I_WILL_FREE set
list_move(&inode->i_list, &inode_in_use);
} else {
list_move(&inode->i_list, &inode_unused);
+ inodes_stat.nr_unused++;
}
}
wake_up_inode(inode);
Are you sure the above diff is correct? It was added somewhere between
2.6.5 and 2.6.8. I think it's wrong.
The only way I can imagine the i_count to be zero in the above path, is
that I_WILL_FREE is set. And if I_WILL_FREE is set, then we must not
increase nr_unused. So I believe the above change is buggy and it will
definitely overstate the number of unused inodes and it should be backed
out.
Note that __writeback_single_inode before calling __sync_single_inode, can
drop the spinlock and we can have both the dirty and locked bitflags clear
here:
spin_unlock(&inode_lock);
__wait_on_inode(inode);
iput(inode);
XXXXXXX
spin_lock(&inode_lock);
}
use inode again here
a construct like the above makes zero sense from a reference counting
standpoint.
Either we don't ever use the inode again after the iput, or the
inode_lock should be taken _before_ executing the iput (i.e. a __iput
would be required). Taking the inode_lock after iput means the iget was
useless if we keep using the inode after the iput.
So the only chance the 2.6 was safe to call __writeback_single_inode
with the i_count == 0, is that I_WILL_FREE is set (I_WILL_FREE will
prevent the VM to free the inode in XXXXX).
Potentially calling the above iput with I_WILL_FREE was also wrong
because it would recurse in iput_final (the second mainline bug).
The below (untested) patch fixes the nr_unused accounting, avoids recursing
in iput when I_WILL_FREE is set and makes sure (with the BUG_ON) that we
don't corrupt memory and that all holders that don't set I_WILL_FREE, keeps
a reference on the inode!
Signed-off-by: Andrea Arcangeli <andrea@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 28 |
1 files changed, 16 insertions, 12 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e94ab398b717..ffab4783ac64 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -230,7 +230,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
230 | * The inode is clean, unused | 230 | * The inode is clean, unused |
231 | */ | 231 | */ |
232 | list_move(&inode->i_list, &inode_unused); | 232 | list_move(&inode->i_list, &inode_unused); |
233 | inodes_stat.nr_unused++; | ||
234 | } | 233 | } |
235 | } | 234 | } |
236 | wake_up_inode(inode); | 235 | wake_up_inode(inode); |
@@ -238,14 +237,20 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
238 | } | 237 | } |
239 | 238 | ||
240 | /* | 239 | /* |
241 | * Write out an inode's dirty pages. Called under inode_lock. | 240 | * Write out an inode's dirty pages. Called under inode_lock. Either the |
241 | * caller has ref on the inode (either via __iget or via syscall against an fd) | ||
242 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | ||
242 | */ | 243 | */ |
243 | static int | 244 | static int |
244 | __writeback_single_inode(struct inode *inode, | 245 | __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
245 | struct writeback_control *wbc) | ||
246 | { | 246 | { |
247 | wait_queue_head_t *wqh; | 247 | wait_queue_head_t *wqh; |
248 | 248 | ||
249 | if (!atomic_read(&inode->i_count)) | ||
250 | WARN_ON(!(inode->i_state & I_WILL_FREE)); | ||
251 | else | ||
252 | WARN_ON(inode->i_state & I_WILL_FREE); | ||
253 | |||
249 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { | 254 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { |
250 | list_move(&inode->i_list, &inode->i_sb->s_dirty); | 255 | list_move(&inode->i_list, &inode->i_sb->s_dirty); |
251 | return 0; | 256 | return 0; |
@@ -259,11 +264,9 @@ __writeback_single_inode(struct inode *inode, | |||
259 | 264 | ||
260 | wqh = bit_waitqueue(&inode->i_state, __I_LOCK); | 265 | wqh = bit_waitqueue(&inode->i_state, __I_LOCK); |
261 | do { | 266 | do { |
262 | __iget(inode); | ||
263 | spin_unlock(&inode_lock); | 267 | spin_unlock(&inode_lock); |
264 | __wait_on_bit(wqh, &wq, inode_wait, | 268 | __wait_on_bit(wqh, &wq, inode_wait, |
265 | TASK_UNINTERRUPTIBLE); | 269 | TASK_UNINTERRUPTIBLE); |
266 | iput(inode); | ||
267 | spin_lock(&inode_lock); | 270 | spin_lock(&inode_lock); |
268 | } while (inode->i_state & I_LOCK); | 271 | } while (inode->i_state & I_LOCK); |
269 | } | 272 | } |
@@ -541,14 +544,15 @@ void sync_inodes(int wait) | |||
541 | } | 544 | } |
542 | 545 | ||
543 | /** | 546 | /** |
544 | * write_inode_now - write an inode to disk | 547 | * write_inode_now - write an inode to disk |
545 | * @inode: inode to write to disk | 548 | * @inode: inode to write to disk |
546 | * @sync: whether the write should be synchronous or not | 549 | * @sync: whether the write should be synchronous or not |
550 | * | ||
551 | * This function commits an inode to disk immediately if it is dirty. This is | ||
552 | * primarily needed by knfsd. | ||
547 | * | 553 | * |
548 | * This function commits an inode to disk immediately if it is | 554 | * The caller must either have a ref on the inode or must have set I_WILL_FREE. |
549 | * dirty. This is primarily needed by knfsd. | ||
550 | */ | 555 | */ |
551 | |||
552 | int write_inode_now(struct inode *inode, int sync) | 556 | int write_inode_now(struct inode *inode, int sync) |
553 | { | 557 | { |
554 | int ret; | 558 | int ret; |