diff options
| author | Waiman Long <longman@redhat.com> | 2017-09-20 15:12:20 -0400 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2017-09-25 10:56:05 -0400 |
| commit | 5acb3cc2c2e9d3020a4fee43763c6463767f1572 (patch) | |
| tree | 0d650e446e8e7ef738e3d2237b6778fb6ef447a4 /kernel/trace | |
| parent | 1dae69bedeeca0b57e441eae491fbd38049c0b47 (diff) | |
blktrace: Fix potential deadlock between delete & sysfs ops
The lockdep code had reported the following unsafe locking scenario:
CPU0 CPU1
---- ----
lock(s_active#228);
lock(&bdev->bd_mutex/1);
lock(s_active#228);
lock(&bdev->bd_mutex);
*** DEADLOCK ***
The deadlock may happen when one task (CPU1) is trying to delete a
partition in a block device and another task (CPU0) is accessing
tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that
partition.
The s_active isn't an actual lock. It is a reference count (kn->count)
on the sysfs (kernfs) file. Removal of a sysfs file, however, require
a wait until all the references are gone. The reference count is
treated like a rwsem using lockdep instrumentation code.
The fact that a thread is in the sysfs callback method or in the
ioctl call means there is a reference to the opended sysfs or device
file. That should prevent the underlying block structure from being
removed.
Instead of using bd_mutex in the block_device structure, a new
blk_trace_mutex is now added to the request_queue structure to protect
access to the blk_trace structure.
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Fix typo in patch subject line, and prune a comment detailing how
the code used to work.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/blktrace.c | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2a685b45b73b..45a3928544ce 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start) | |||
| 648 | } | 648 | } |
| 649 | EXPORT_SYMBOL_GPL(blk_trace_startstop); | 649 | EXPORT_SYMBOL_GPL(blk_trace_startstop); |
| 650 | 650 | ||
| 651 | /* | ||
| 652 | * When reading or writing the blktrace sysfs files, the references to the | ||
| 653 | * opened sysfs or device files should prevent the underlying block device | ||
| 654 | * from being removed. So no further delete protection is really needed. | ||
| 655 | */ | ||
| 656 | |||
| 651 | /** | 657 | /** |
| 652 | * blk_trace_ioctl: - handle the ioctls associated with tracing | 658 | * blk_trace_ioctl: - handle the ioctls associated with tracing |
| 653 | * @bdev: the block device | 659 | * @bdev: the block device |
| @@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
| 665 | if (!q) | 671 | if (!q) |
| 666 | return -ENXIO; | 672 | return -ENXIO; |
| 667 | 673 | ||
| 668 | mutex_lock(&bdev->bd_mutex); | 674 | mutex_lock(&q->blk_trace_mutex); |
| 669 | 675 | ||
| 670 | switch (cmd) { | 676 | switch (cmd) { |
| 671 | case BLKTRACESETUP: | 677 | case BLKTRACESETUP: |
| @@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
| 691 | break; | 697 | break; |
| 692 | } | 698 | } |
| 693 | 699 | ||
| 694 | mutex_unlock(&bdev->bd_mutex); | 700 | mutex_unlock(&q->blk_trace_mutex); |
| 695 | return ret; | 701 | return ret; |
| 696 | } | 702 | } |
| 697 | 703 | ||
| @@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
| 1727 | if (q == NULL) | 1733 | if (q == NULL) |
| 1728 | goto out_bdput; | 1734 | goto out_bdput; |
| 1729 | 1735 | ||
| 1730 | mutex_lock(&bdev->bd_mutex); | 1736 | mutex_lock(&q->blk_trace_mutex); |
| 1731 | 1737 | ||
| 1732 | if (attr == &dev_attr_enable) { | 1738 | if (attr == &dev_attr_enable) { |
| 1733 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | 1739 | ret = sprintf(buf, "%u\n", !!q->blk_trace); |
| @@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
| 1746 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); | 1752 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); |
| 1747 | 1753 | ||
| 1748 | out_unlock_bdev: | 1754 | out_unlock_bdev: |
| 1749 | mutex_unlock(&bdev->bd_mutex); | 1755 | mutex_unlock(&q->blk_trace_mutex); |
| 1750 | out_bdput: | 1756 | out_bdput: |
| 1751 | bdput(bdev); | 1757 | bdput(bdev); |
| 1752 | out: | 1758 | out: |
| @@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1788 | if (q == NULL) | 1794 | if (q == NULL) |
| 1789 | goto out_bdput; | 1795 | goto out_bdput; |
| 1790 | 1796 | ||
| 1791 | mutex_lock(&bdev->bd_mutex); | 1797 | mutex_lock(&q->blk_trace_mutex); |
| 1792 | 1798 | ||
| 1793 | if (attr == &dev_attr_enable) { | 1799 | if (attr == &dev_attr_enable) { |
| 1794 | if (value) | 1800 | if (value) |
| @@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1814 | } | 1820 | } |
| 1815 | 1821 | ||
| 1816 | out_unlock_bdev: | 1822 | out_unlock_bdev: |
| 1817 | mutex_unlock(&bdev->bd_mutex); | 1823 | mutex_unlock(&q->blk_trace_mutex); |
| 1818 | out_bdput: | 1824 | out_bdput: |
| 1819 | bdput(bdev); | 1825 | bdput(bdev); |
| 1820 | out: | 1826 | out: |
