From facc31ddc3570a3a0d8951c94f16b898e01b464d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Mar 2011 19:54:27 +0100 Subject: block: Don't implicitly trigger event check on disk_unblock_events() Currently, disk_unblock_events() implicitly kick event check if the block count reaches zero. This behavior is not described in the comment and hinders with future changes. Make the unblocker explicitly check events by calling disk_check_events() as necessary. This patch doesn't cause any behavior difference. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Kay Sievers --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 889287019599..fffdf86c1757 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1446,6 +1446,7 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) if (bdev_free) { if (bdev->bd_write_holder) { disk_unblock_events(bdev->bd_disk); + disk_check_events(bdev->bd_disk); bdev->bd_write_holder = false; } else disk_check_events(bdev->bd_disk); -- cgit v1.2.2 From 6936217cc7e58573026bdba25b1bfb778e8f2267 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Mar 2011 19:54:27 +0100 Subject: block: Don't check events on close unless it was blocked The block event mechanism currently always checks events when the device is being closed regardless of the open mode. The intention was to allow detection of EJECT_REQUEST when a device is closed whether disk event polling is enabled or not. This is unnecessary as, for devices of interest, events are checked from either userland or kernel and in the former case ->check_events() is performed on open of each poll attempt anyway. Furthermore, this unconditional event check on close makes the code susceptible to event loop if the block driver doesn't clear reported events correctly - an event triggers userland to open and close the device which in turn causes another event, rinse and repeat. Check events on close only if it was blocked by excl write open. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Kay Sievers --- fs/block_dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index fffdf86c1757..7dd2c658d429 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1448,13 +1448,11 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) disk_unblock_events(bdev->bd_disk); disk_check_events(bdev->bd_disk); bdev->bd_write_holder = false; - } else - disk_check_events(bdev->bd_disk); + } } mutex_unlock(&bdev->bd_mutex); - } else - disk_check_events(bdev->bd_disk); + } return __blkdev_put(bdev, mode, 0); } -- cgit v1.2.2 From 69e02c59a7d962dced8047401b81a8d897e1702e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Mar 2011 19:54:27 +0100 Subject: block: Don't check events while open is in progress Not all block drivers clear events immediately after reporting. Some do so in ->revalidate_disk() or other steps during ->open(). There is a slim chance event poll may happen between the clearing event check from check_disk_change() and the actual clearing of the events which would result in spurious events. Block event checks while block device open is in progress. There is no need to kick explicit event check afterwards as events are always checked during open. -v2: The original patch could have called disk_unblock_events() with an already released or %NULL @disk causing oops. Fixed by making sure references are put after disk_unblock_events() is called. It also makes the error path of __blkdev_get() a bit simpler. This problem was reported by Jens. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Kay Sievers --- fs/block_dev.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 7dd2c658d429..d42cad2757ae 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1087,6 +1087,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!disk) goto out; + disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; @@ -1108,10 +1109,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) */ disk_put_part(bdev->bd_part); bdev->bd_part = NULL; - module_put(disk->fops->owner); - put_disk(disk); bdev->bd_disk = NULL; mutex_unlock(&bdev->bd_mutex); + disk_unblock_events(disk); + module_put(disk->fops->owner); + put_disk(disk); goto restart; } if (ret) @@ -1148,9 +1150,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - module_put(disk->fops->owner); - put_disk(disk); - disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { ret = bdev->bd_disk->fops->open(bdev, mode); @@ -1160,11 +1159,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); } + /* only one opener holds refs to the module and disk */ + module_put(disk->fops->owner); + put_disk(disk); } bdev->bd_openers++; if (for_part) bdev->bd_part_count++; mutex_unlock(&bdev->bd_mutex); + disk_unblock_events(disk); return 0; out_clear: @@ -1177,9 +1180,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); + disk_unblock_events(disk); out: - if (disk) - module_put(disk->fops->owner); + module_put(disk->fops->owner); put_disk(disk); bdput(bdev); -- cgit v1.2.2 From 7eaceaccab5f40bbfda044629a6298616aeaed50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2011 08:52:07 +0100 Subject: block: remove per-queue plugging Code has been converted over to the new explicit on-stack plugging, and delay users have been converted to use the new API for that. So lets kill off the old plugging along with aops->sync_page(). Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 4fb8a3431531..fffc2c672396 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1520,7 +1520,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, - .sync_page = block_sync_page, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .writepages = generic_writepages, -- cgit v1.2.2 From 4345caba340f051e10847924fc078ae18ed6695c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Mar 2011 13:53:31 +0100 Subject: block: NULL dereference on error path in __blkdev_get() "disk" is always NULL when we goto out. There was a check for this before, but it was removed in 69e02c59a7d9 "block: Don't check events while open is in progress". Signed-off-by: Dan Carpenter Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index fbe05cbdd692..7d02afb2b7f4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1181,9 +1181,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); - out: module_put(disk->fops->owner); put_disk(disk); + out: bdput(bdev); return ret; -- cgit v1.2.2 From 250df6ed274d767da844a5d9f05720b804240197 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:36 +1100 Subject: fs: protect inode->i_state with inode->i_lock Protect inode state transitions and validity checks with the inode->i_lock. This enables us to make inode state transitions independently of the inode_lock and is the first step to peeling away the inode_lock from the code. This requires that __iget() is done atomically with i_state checks during list traversals so that we don't race with another thread marking the inode I_FREEING between the state check and grabbing the reference. Also remove the unlock_new_inode() memory barrier optimisation required to avoid taking the inode_lock when clearing I_NEW. Simplify the code by simply taking the inode->i_lock around the state change and wakeup. Because the wakeup is no longer tricky, remove the wake_up_inode() function and open code the wakeup where necessary. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/block_dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 889287019599..bc39b18cf3d0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -56,9 +56,11 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { spin_lock(&inode_lock); + spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; if (inode->i_state & I_DIRTY) list_move(&inode->i_wb_list, &dst->wb.b_dirty); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); } -- cgit v1.2.2 From a66979abad090b2765a6c6790c9fdeab996833f2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:41 +1100 Subject: fs: move i_wb_list out from under inode_lock Protect the inode writeback list with a new global lock inode_wb_list_lock and use it to protect the list manipulations and traversals. This lock replaces the inode_lock as the inodes on the list can be validity checked while holding the inode->i_lock and hence the inode_lock is no longer needed to protect the list. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/block_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index bc39b18cf3d0..2bbc0e62102f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -55,13 +55,13 @@ EXPORT_SYMBOL(I_BDEV); static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; if (inode->i_state & I_DIRTY) list_move(&inode->i_wb_list, &dst->wb.b_dirty); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); } static sector_t max_block(struct block_device *bdev) -- cgit v1.2.2