diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-20 11:31:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-20 11:31:19 -0400 |
commit | 5f22ca9b13551debea77a407a8d06cd9c6f15238 (patch) | |
tree | 435b5eef62fd4a67f66a690e243b529ca475750b | |
parent | 395c68464cffbc645919a93d6111bc15201167f3 (diff) |
vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion
There was another FAT BKL conversion deadlock reported by Bart
Trojanowski due to the BKL being used as a recursive lock by FAT, which
was missed because it only triggers with 'sync' (or 'dirsync') mounts.
The recursion worked for the BKL, but after the conversion to lock_super
(which uses a mutex), it just deadlocks.
Thanks to Bart for debugging this and testing the fix. The lock
debugging information from the original report:
=============================================
[ INFO: possible recursive locking detected ]
2.6.27-rc3-bisect-00448-ga7f5aaf #16
---------------------------------------------
mv/4020 is trying to acquire lock:
(&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20
but task is already holding lock:
(&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20
other info that might help us debug this:
3 locks held by mv/4020:
#0: (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140
#1: (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110
#2: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20
stack backtrace:
Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf #16
[<c014e694>] validate_chain+0x984/0xea0
[<c0108d70>] ? native_sched_clock+0x0/0xf0
[<c014ee9c>] __lock_acquire+0x2ec/0x9b0
[<c014f5cf>] lock_acquire+0x6f/0x90
[<c01a90fe>] ? lock_super+0x1e/0x20
[<c044e5fd>] mutex_lock_nested+0xad/0x300
[<c01a90fe>] ? lock_super+0x1e/0x20
[<c01a90fe>] ? lock_super+0x1e/0x20
[<c01a90fe>] lock_super+0x1e/0x20
[<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat]
[<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80
[<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat]
[<f8b3a962>] fat_sync_inode+0x12/0x20 [fat]
[<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat]
[<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat]
[<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat]
[<c01b0968>] vfs_unlink+0x98/0x110
[<c01b2400>] do_unlinkat+0x130/0x140
[<c016a8f5>] ? audit_syscall_entry+0x105/0x150
[<c01b253b>] sys_unlinkat+0x3b/0x40
[<c01040d3>] sysenter_do_call+0x12/0x3f
=======================
where the deadlock is due to the nesting of lock_super from vfat_unlink
to fat_write_inode:
- do_unlinkat
- vfs_unlink
- vfat_unlink
* lock_super
- fat_remove_entries
- fat_sync_inode
- fat_write_inode
* lock_super
and the fix is to simply remove the use of lock_super() in fat_write_inode.
The lock_super() there had been just an automatic conversion of the
kernel lock to the superblock lock, but no locking was actually needed
there, since the code in fat_write_inode already protected all relevant
accesses with a spinlock (sbi->inode_hash_lock to be exact). The only
code inside the BKL (and thus the superblock lock) was accesses tp local
variables or calls to functions that have long been SMP-safe (i.e.
sb_bread, mark_buffe_dirty and brlese).
Bart reports:
"Looks good. I ran 10 parallel processes creating 1M files truncating
them, writing to them again and then deleting them. This patch fixes
the issue I ran into.
Signed-off-by: Bart Trojanowski <bart@jukie.net>"
Reported-and-tested-by: Bart Trojanowski <bart@jukie.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/fat/inode.c | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 6d266d793e2c..80ff3381fa21 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait) | |||
562 | struct buffer_head *bh; | 562 | struct buffer_head *bh; |
563 | struct msdos_dir_entry *raw_entry; | 563 | struct msdos_dir_entry *raw_entry; |
564 | loff_t i_pos; | 564 | loff_t i_pos; |
565 | int err = 0; | 565 | int err; |
566 | 566 | ||
567 | retry: | 567 | retry: |
568 | i_pos = MSDOS_I(inode)->i_pos; | 568 | i_pos = MSDOS_I(inode)->i_pos; |
569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) | 569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) |
570 | return 0; | 570 | return 0; |
571 | 571 | ||
572 | lock_super(sb); | ||
573 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 572 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
574 | if (!bh) { | 573 | if (!bh) { |
575 | printk(KERN_ERR "FAT: unable to read inode block " | 574 | printk(KERN_ERR "FAT: unable to read inode block " |
576 | "for updating (i_pos %lld)\n", i_pos); | 575 | "for updating (i_pos %lld)\n", i_pos); |
577 | err = -EIO; | 576 | return -EIO; |
578 | goto out; | ||
579 | } | 577 | } |
580 | spin_lock(&sbi->inode_hash_lock); | 578 | spin_lock(&sbi->inode_hash_lock); |
581 | if (i_pos != MSDOS_I(inode)->i_pos) { | 579 | if (i_pos != MSDOS_I(inode)->i_pos) { |
582 | spin_unlock(&sbi->inode_hash_lock); | 580 | spin_unlock(&sbi->inode_hash_lock); |
583 | brelse(bh); | 581 | brelse(bh); |
584 | unlock_super(sb); | ||
585 | goto retry; | 582 | goto retry; |
586 | } | 583 | } |
587 | 584 | ||
@@ -607,11 +604,10 @@ retry: | |||
607 | } | 604 | } |
608 | spin_unlock(&sbi->inode_hash_lock); | 605 | spin_unlock(&sbi->inode_hash_lock); |
609 | mark_buffer_dirty(bh); | 606 | mark_buffer_dirty(bh); |
607 | err = 0; | ||
610 | if (wait) | 608 | if (wait) |
611 | err = sync_dirty_buffer(bh); | 609 | err = sync_dirty_buffer(bh); |
612 | brelse(bh); | 610 | brelse(bh); |
613 | out: | ||
614 | unlock_super(sb); | ||
615 | return err; | 611 | return err; |
616 | } | 612 | } |
617 | 613 | ||