diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-04-21 20:19:44 -0400 |
---|---|---|
committer | Wu Fengguang <fengguang.wu@intel.com> | 2011-06-07 20:25:21 -0400 |
commit | f758eeabeb96f878c860e8f110f94ec8820822a9 (patch) | |
tree | fea5a465aa0aa38c6c9263eb264acbeb7f722c02 /fs/block_dev.c | |
parent | 424b351fe1901fc909fd0ca4f21dab58f24c1aac (diff) |
writeback: split inode_wb_list_lock into bdi_writeback.list_lock
Split the global inode_wb_list_lock into a per-bdi_writeback list_lock,
as it's currently the most contended lock in the system for metadata
heavy workloads. It won't help for single-filesystem workloads for
which we'll need the I/O-less balance_dirty_pages, but at least we
can dedicate a cpu to spinning on each bdi now for larger systems.
Based on earlier patches from Nick Piggin and Dave Chinner.
It reduces lock contentions to 1/4 in this test case:
10 HDD JBOD, 100 dd on each disk, XFS, 6GB ram
lock_stat version 0.3
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
vanilla 2.6.39-rc3:
inode_wb_list_lock: 42590 44433 0.12 147.74 144127.35 252274 886792 0.08 121.34 917211.23
------------------
inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85
inode_wb_list_lock 34 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49
inode_wb_list_lock 12893 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0
inode_wb_list_lock 10702 [<ffffffff8115afef>] writeback_single_inode+0x16d/0x20a
------------------
inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85
inode_wb_list_lock 19 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49
inode_wb_list_lock 5550 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0
inode_wb_list_lock 8511 [<ffffffff8115b4ad>] writeback_sb_inodes+0x10f/0x157
2.6.39-rc3 + patch:
&(&wb->list_lock)->rlock: 11383 11657 0.14 151.69 40429.51 90825 527918 0.11 145.90 556843.37
------------------------
&(&wb->list_lock)->rlock 10 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86
&(&wb->list_lock)->rlock 1493 [<ffffffff8115b1ed>] writeback_inodes_wb+0x3d/0x150
&(&wb->list_lock)->rlock 3652 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f
&(&wb->list_lock)->rlock 1412 [<ffffffff8115a38e>] writeback_single_inode+0x17f/0x223
------------------------
&(&wb->list_lock)->rlock 3 [<ffffffff8110b5af>] bdi_lock_two+0x46/0x4b
&(&wb->list_lock)->rlock 6 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86
&(&wb->list_lock)->rlock 2061 [<ffffffff8115af97>] __mark_inode_dirty+0x173/0x1cf
&(&wb->list_lock)->rlock 2629 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f
hughd@google.com: fix recursive lock when bdi_lock_two() is called with new the same as old
akpm@linux-foundation.org: cleanup bdev_inode_switch_bdi() comment
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r-- | fs/block_dev.c | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1a2421f908f0..3c9a03e51b62 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -44,24 +44,28 @@ inline struct block_device *I_BDEV(struct inode *inode) | |||
44 | { | 44 | { |
45 | return &BDEV_I(inode)->bdev; | 45 | return &BDEV_I(inode)->bdev; |
46 | } | 46 | } |
47 | |||
48 | EXPORT_SYMBOL(I_BDEV); | 47 | EXPORT_SYMBOL(I_BDEV); |
49 | 48 | ||
50 | /* | 49 | /* |
51 | * move the inode from it's current bdi to the a new bdi. if the inode is dirty | 50 | * Move the inode from its current bdi to a new bdi. If the inode is dirty we |
52 | * we need to move it onto the dirty list of @dst so that the inode is always | 51 | * need to move it onto the dirty list of @dst so that the inode is always on |
53 | * on the right list. | 52 | * the right list. |
54 | */ | 53 | */ |
55 | static void bdev_inode_switch_bdi(struct inode *inode, | 54 | static void bdev_inode_switch_bdi(struct inode *inode, |
56 | struct backing_dev_info *dst) | 55 | struct backing_dev_info *dst) |
57 | { | 56 | { |
58 | spin_lock(&inode_wb_list_lock); | 57 | struct backing_dev_info *old = inode->i_data.backing_dev_info; |
58 | |||
59 | if (unlikely(dst == old)) /* deadlock avoidance */ | ||
60 | return; | ||
61 | bdi_lock_two(&old->wb, &dst->wb); | ||
59 | spin_lock(&inode->i_lock); | 62 | spin_lock(&inode->i_lock); |
60 | inode->i_data.backing_dev_info = dst; | 63 | inode->i_data.backing_dev_info = dst; |
61 | if (inode->i_state & I_DIRTY) | 64 | if (inode->i_state & I_DIRTY) |
62 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | 65 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
63 | spin_unlock(&inode->i_lock); | 66 | spin_unlock(&inode->i_lock); |
64 | spin_unlock(&inode_wb_list_lock); | 67 | spin_unlock(&old->wb.list_lock); |
68 | spin_unlock(&dst->wb.list_lock); | ||
65 | } | 69 | } |
66 | 70 | ||
67 | static sector_t max_block(struct block_device *bdev) | 71 | static sector_t max_block(struct block_device *bdev) |