aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>2006-06-22 17:47:21 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-22 18:05:57 -0400
commit09d967c6f32b35eab15b45862ae16e4f06259d8e (patch)
tree9fca9dda390612041f857a33cb746fe1eb28b60a /fs
parent0e5b3781591cc954037c08ef78edf7f1192d38c5 (diff)
[PATCH] Fix a race condition between ->i_mapping and iput()
This race became a cause of oops, and can reproduce by the following. while true; do dd if=/dev/zero of=/dev/.static/dev/hdg1 bs=512 count=1000 & sync done This race condition was between __sync_single_inode() and iput(). cpu0 (fs's inode) cpu1 (bdev's inode) ----------------- ------------------- close("/dev/hda2") [...] __sync_single_inode() /* copy the bdev's ->i_mapping */ mapping = inode->i_mapping; generic_forget_inode() bdev_clear_inode() /* restre the fs's ->i_mapping */ inode->i_mapping = &inode->i_data; /* bdev's inode was freed */ destroy_inode(inode); if (wait) { /* dereference a freed bdev's mapping->host */ filemap_fdatawait(mapping); /* Oops */ Since __sync_single_inode() is only taking a ref-count of fs's inode, the another process can be close() and freeing the bdev's inode while writing fs's inode. So, __sync_signle_inode() accesses the freed ->i_mapping, oops. This patch takes a ref-count on the bdev's inode for the fs's inode before setting a ->i_mapping, and the clear_inode() of the fs's inode does iput() on the bdev's inode. So if the fs's inode is still living, bdev's inode shouldn't be freed. Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c32
1 files changed, 25 insertions, 7 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f5958f413bd1..44aaba202f78 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -414,21 +414,31 @@ EXPORT_SYMBOL(bdput);
414static struct block_device *bd_acquire(struct inode *inode) 414static struct block_device *bd_acquire(struct inode *inode)
415{ 415{
416 struct block_device *bdev; 416 struct block_device *bdev;
417
417 spin_lock(&bdev_lock); 418 spin_lock(&bdev_lock);
418 bdev = inode->i_bdev; 419 bdev = inode->i_bdev;
419 if (bdev && igrab(bdev->bd_inode)) { 420 if (bdev) {
421 atomic_inc(&bdev->bd_inode->i_count);
420 spin_unlock(&bdev_lock); 422 spin_unlock(&bdev_lock);
421 return bdev; 423 return bdev;
422 } 424 }
423 spin_unlock(&bdev_lock); 425 spin_unlock(&bdev_lock);
426
424 bdev = bdget(inode->i_rdev); 427 bdev = bdget(inode->i_rdev);
425 if (bdev) { 428 if (bdev) {
426 spin_lock(&bdev_lock); 429 spin_lock(&bdev_lock);
427 if (inode->i_bdev) 430 if (!inode->i_bdev) {
428 __bd_forget(inode); 431 /*
429 inode->i_bdev = bdev; 432 * We take an additional bd_inode->i_count for inode,
430 inode->i_mapping = bdev->bd_inode->i_mapping; 433 * and it's released in clear_inode() of inode.
431 list_add(&inode->i_devices, &bdev->bd_inodes); 434 * So, we can access it via ->i_mapping always
435 * without igrab().
436 */
437 atomic_inc(&bdev->bd_inode->i_count);
438 inode->i_bdev = bdev;
439 inode->i_mapping = bdev->bd_inode->i_mapping;
440 list_add(&inode->i_devices, &bdev->bd_inodes);
441 }
432 spin_unlock(&bdev_lock); 442 spin_unlock(&bdev_lock);
433 } 443 }
434 return bdev; 444 return bdev;
@@ -438,10 +448,18 @@ static struct block_device *bd_acquire(struct inode *inode)
438 448
439void bd_forget(struct inode *inode) 449void bd_forget(struct inode *inode)
440{ 450{
451 struct block_device *bdev = NULL;
452
441 spin_lock(&bdev_lock); 453 spin_lock(&bdev_lock);
442 if (inode->i_bdev) 454 if (inode->i_bdev) {
455 if (inode->i_sb != blockdev_superblock)
456 bdev = inode->i_bdev;
443 __bd_forget(inode); 457 __bd_forget(inode);
458 }
444 spin_unlock(&bdev_lock); 459 spin_unlock(&bdev_lock);
460
461 if (bdev)
462 iput(bdev->bd_inode);
445} 463}
446 464
447int bd_claim(struct block_device *bdev, void *holder) 465int bd_claim(struct block_device *bdev, void *holder)