diff options
author | OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> | 2006-06-22 17:47:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-22 18:05:57 -0400 |
commit | 09d967c6f32b35eab15b45862ae16e4f06259d8e (patch) | |
tree | 9fca9dda390612041f857a33cb746fe1eb28b60a | |
parent | 0e5b3781591cc954037c08ef78edf7f1192d38c5 (diff) |
[PATCH] Fix a race condition between ->i_mapping and iput()
This race became a cause of oops, and can reproduce by the following.
while true; do
dd if=/dev/zero of=/dev/.static/dev/hdg1 bs=512 count=1000 & sync
done
This race condition was between __sync_single_inode() and iput().
cpu0 (fs's inode) cpu1 (bdev's inode)
----------------- -------------------
close("/dev/hda2")
[...]
__sync_single_inode()
/* copy the bdev's ->i_mapping */
mapping = inode->i_mapping;
generic_forget_inode()
bdev_clear_inode()
/* restre the fs's ->i_mapping */
inode->i_mapping = &inode->i_data;
/* bdev's inode was freed */
destroy_inode(inode);
if (wait) {
/* dereference a freed bdev's mapping->host */
filemap_fdatawait(mapping); /* Oops */
Since __sync_single_inode() is only taking a ref-count of fs's inode, the
another process can be close() and freeing the bdev's inode while writing
fs's inode. So, __sync_signle_inode() accesses the freed ->i_mapping,
oops.
This patch takes a ref-count on the bdev's inode for the fs's inode before
setting a ->i_mapping, and the clear_inode() of the fs's inode does iput() on
the bdev's inode. So if the fs's inode is still living, bdev's inode
shouldn't be freed.
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/block_dev.c | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index f5958f413bd1..44aaba202f78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -414,21 +414,31 @@ EXPORT_SYMBOL(bdput); | |||
414 | static struct block_device *bd_acquire(struct inode *inode) | 414 | static struct block_device *bd_acquire(struct inode *inode) |
415 | { | 415 | { |
416 | struct block_device *bdev; | 416 | struct block_device *bdev; |
417 | |||
417 | spin_lock(&bdev_lock); | 418 | spin_lock(&bdev_lock); |
418 | bdev = inode->i_bdev; | 419 | bdev = inode->i_bdev; |
419 | if (bdev && igrab(bdev->bd_inode)) { | 420 | if (bdev) { |
421 | atomic_inc(&bdev->bd_inode->i_count); | ||
420 | spin_unlock(&bdev_lock); | 422 | spin_unlock(&bdev_lock); |
421 | return bdev; | 423 | return bdev; |
422 | } | 424 | } |
423 | spin_unlock(&bdev_lock); | 425 | spin_unlock(&bdev_lock); |
426 | |||
424 | bdev = bdget(inode->i_rdev); | 427 | bdev = bdget(inode->i_rdev); |
425 | if (bdev) { | 428 | if (bdev) { |
426 | spin_lock(&bdev_lock); | 429 | spin_lock(&bdev_lock); |
427 | if (inode->i_bdev) | 430 | if (!inode->i_bdev) { |
428 | __bd_forget(inode); | 431 | /* |
429 | inode->i_bdev = bdev; | 432 | * We take an additional bd_inode->i_count for inode, |
430 | inode->i_mapping = bdev->bd_inode->i_mapping; | 433 | * and it's released in clear_inode() of inode. |
431 | list_add(&inode->i_devices, &bdev->bd_inodes); | 434 | * So, we can access it via ->i_mapping always |
435 | * without igrab(). | ||
436 | */ | ||
437 | atomic_inc(&bdev->bd_inode->i_count); | ||
438 | inode->i_bdev = bdev; | ||
439 | inode->i_mapping = bdev->bd_inode->i_mapping; | ||
440 | list_add(&inode->i_devices, &bdev->bd_inodes); | ||
441 | } | ||
432 | spin_unlock(&bdev_lock); | 442 | spin_unlock(&bdev_lock); |
433 | } | 443 | } |
434 | return bdev; | 444 | return bdev; |
@@ -438,10 +448,18 @@ static struct block_device *bd_acquire(struct inode *inode) | |||
438 | 448 | ||
439 | void bd_forget(struct inode *inode) | 449 | void bd_forget(struct inode *inode) |
440 | { | 450 | { |
451 | struct block_device *bdev = NULL; | ||
452 | |||
441 | spin_lock(&bdev_lock); | 453 | spin_lock(&bdev_lock); |
442 | if (inode->i_bdev) | 454 | if (inode->i_bdev) { |
455 | if (inode->i_sb != blockdev_superblock) | ||
456 | bdev = inode->i_bdev; | ||
443 | __bd_forget(inode); | 457 | __bd_forget(inode); |
458 | } | ||
444 | spin_unlock(&bdev_lock); | 459 | spin_unlock(&bdev_lock); |
460 | |||
461 | if (bdev) | ||
462 | iput(bdev->bd_inode); | ||
445 | } | 463 | } |
446 | 464 | ||
447 | int bd_claim(struct block_device *bdev, void *holder) | 465 | int bd_claim(struct block_device *bdev, void *holder) |