aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <andrea@suse.de>2005-10-30 18:03:05 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 20:37:26 -0500
commit7f04c26d715a2467a49a2384268de8f70f787b51 (patch)
tree8507e9f705f063d996857789261b762b24fad94f
parent52303e8b5f8aa234865d40d76ea16b0ff4b27022 (diff)
[PATCH] fix nr_unused accounting, and avoid recursing in iput with I_WILL_FREE set
list_move(&inode->i_list, &inode_in_use); } else { list_move(&inode->i_list, &inode_unused); + inodes_stat.nr_unused++; } } wake_up_inode(inode); Are you sure the above diff is correct? It was added somewhere between 2.6.5 and 2.6.8. I think it's wrong. The only way I can imagine the i_count to be zero in the above path, is that I_WILL_FREE is set. And if I_WILL_FREE is set, then we must not increase nr_unused. So I believe the above change is buggy and it will definitely overstate the number of unused inodes and it should be backed out. Note that __writeback_single_inode before calling __sync_single_inode, can drop the spinlock and we can have both the dirty and locked bitflags clear here: spin_unlock(&inode_lock); __wait_on_inode(inode); iput(inode); XXXXXXX spin_lock(&inode_lock); } use inode again here a construct like the above makes zero sense from a reference counting standpoint. Either we don't ever use the inode again after the iput, or the inode_lock should be taken _before_ executing the iput (i.e. a __iput would be required). Taking the inode_lock after iput means the iget was useless if we keep using the inode after the iput. So the only chance the 2.6 was safe to call __writeback_single_inode with the i_count == 0, is that I_WILL_FREE is set (I_WILL_FREE will prevent the VM to free the inode in XXXXX). Potentially calling the above iput with I_WILL_FREE was also wrong because it would recurse in iput_final (the second mainline bug). The below (untested) patch fixes the nr_unused accounting, avoids recursing in iput when I_WILL_FREE is set and makes sure (with the BUG_ON) that we don't corrupt memory and that all holders that don't set I_WILL_FREE, keeps a reference on the inode! Signed-off-by: Andrea Arcangeli <andrea@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/fs-writeback.c28
-rw-r--r--fs/inode.c1
2 files changed, 17 insertions, 12 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e94ab398b717..ffab4783ac64 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -230,7 +230,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
230 * The inode is clean, unused 230 * The inode is clean, unused
231 */ 231 */
232 list_move(&inode->i_list, &inode_unused); 232 list_move(&inode->i_list, &inode_unused);
233 inodes_stat.nr_unused++;
234 } 233 }
235 } 234 }
236 wake_up_inode(inode); 235 wake_up_inode(inode);
@@ -238,14 +237,20 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
238} 237}
239 238
240/* 239/*
241 * Write out an inode's dirty pages. Called under inode_lock. 240 * Write out an inode's dirty pages. Called under inode_lock. Either the
241 * caller has ref on the inode (either via __iget or via syscall against an fd)
242 * or the inode has I_WILL_FREE set (via generic_forget_inode)
242 */ 243 */
243static int 244static int
244__writeback_single_inode(struct inode *inode, 245__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
245 struct writeback_control *wbc)
246{ 246{
247 wait_queue_head_t *wqh; 247 wait_queue_head_t *wqh;
248 248
249 if (!atomic_read(&inode->i_count))
250 WARN_ON(!(inode->i_state & I_WILL_FREE));
251 else
252 WARN_ON(inode->i_state & I_WILL_FREE);
253
249 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { 254 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) {
250 list_move(&inode->i_list, &inode->i_sb->s_dirty); 255 list_move(&inode->i_list, &inode->i_sb->s_dirty);
251 return 0; 256 return 0;
@@ -259,11 +264,9 @@ __writeback_single_inode(struct inode *inode,
259 264
260 wqh = bit_waitqueue(&inode->i_state, __I_LOCK); 265 wqh = bit_waitqueue(&inode->i_state, __I_LOCK);
261 do { 266 do {
262 __iget(inode);
263 spin_unlock(&inode_lock); 267 spin_unlock(&inode_lock);
264 __wait_on_bit(wqh, &wq, inode_wait, 268 __wait_on_bit(wqh, &wq, inode_wait,
265 TASK_UNINTERRUPTIBLE); 269 TASK_UNINTERRUPTIBLE);
266 iput(inode);
267 spin_lock(&inode_lock); 270 spin_lock(&inode_lock);
268 } while (inode->i_state & I_LOCK); 271 } while (inode->i_state & I_LOCK);
269 } 272 }
@@ -541,14 +544,15 @@ void sync_inodes(int wait)
541} 544}
542 545
543/** 546/**
544 * write_inode_now - write an inode to disk 547 * write_inode_now - write an inode to disk
545 * @inode: inode to write to disk 548 * @inode: inode to write to disk
546 * @sync: whether the write should be synchronous or not 549 * @sync: whether the write should be synchronous or not
550 *
551 * This function commits an inode to disk immediately if it is dirty. This is
552 * primarily needed by knfsd.
547 * 553 *
548 * This function commits an inode to disk immediately if it is 554 * The caller must either have a ref on the inode or must have set I_WILL_FREE.
549 * dirty. This is primarily needed by knfsd.
550 */ 555 */
551
552int write_inode_now(struct inode *inode, int sync) 556int write_inode_now(struct inode *inode, int sync)
553{ 557{
554 int ret; 558 int ret;
diff --git a/fs/inode.c b/fs/inode.c
index 7d3316527767..d8d04bd72b59 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1088,6 +1088,7 @@ static void generic_forget_inode(struct inode *inode)
1088 if (inode->i_data.nrpages) 1088 if (inode->i_data.nrpages)
1089 truncate_inode_pages(&inode->i_data, 0); 1089 truncate_inode_pages(&inode->i_data, 0);
1090 clear_inode(inode); 1090 clear_inode(inode);
1091 wake_up_inode(inode);
1091 destroy_inode(inode); 1092 destroy_inode(inode);
1092} 1093}
1093 1094