aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-04-07 05:53:59 -0400
committerJens Axboe <jens.axboe@oracle.com>2010-04-27 04:57:54 -0400
commit6b4517a7913a09d3259bb1d21c9cb300f12294bd (patch)
treea79a1a95c0ef193b20fc004b575f65826f2a85cc
parent1a3cbbc5a5e8a66934aa0947896a4aca6fd77298 (diff)
block: implement bd_claiming and claiming block
Currently, device claiming for exclusive open is done after low level open - disk->fops->open() - has completed successfully. This means that exclusive open attempts while a device is already exclusively open will fail only after disk->fops->open() is called. cdrom driver issues commands during open() which means that O_EXCL open attempt can unintentionally inject commands to in-progress command stream for burning thus disturbing burning process. In most cases, this doesn't cause problems because the first command to be issued is TUR which most devices can process in the middle of burning. However, depending on how a device replies to TUR during burning, cdrom driver may end up issuing further commands. This can't be resolved trivially by moving bd_claim() before doing actual open() because that means an open attempt which will end up failing could interfere other legit O_EXCL open attempts. ie. unconfirmed open attempts can fail others. This patch resolves the problem by introducing claiming block which is started by bd_start_claiming() and terminated either by bd_claim() or bd_abort_claiming(). bd_claim() from inside a claiming block is guaranteed to succeed and once a claiming block is started, other bd_start_claiming() or bd_claim() attempts block till the current claiming block is terminated. bd_claim() can still be used standalone although now it always synchronizes against claiming blocks, so the existing users will keep working without any change. blkdev_open() and open_bdev_exclusive() are converted to use claiming blocks so that exclusive open attempts from these functions don't interfere with the existing exclusive open. This problem was discovered while investigating bko#15403. https://bugzilla.kernel.org/show_bug.cgi?id=15403 The burning problem itself can be resolved by updating userspace probing tools to always open w/ O_EXCL. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Matthias-Christian Ott <ott@mirix.org> Cc: Kay Sievers <kay.sievers@vrfy.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--fs/block_dev.c198
-rw-r--r--include/linux/fs.h1
2 files changed, 175 insertions, 24 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e59440c7e1cf..ea8385ea58ab 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -694,11 +694,144 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
694} 694}
695 695
696/** 696/**
697 * bd_prepare_to_claim - prepare to claim a block device
698 * @bdev: block device of interest
699 * @whole: the whole device containing @bdev, may equal @bdev
700 * @holder: holder trying to claim @bdev
701 *
702 * Prepare to claim @bdev. This function fails if @bdev is already
703 * claimed by another holder and waits if another claiming is in
704 * progress. This function doesn't actually claim. On successful
705 * return, the caller has ownership of bd_claiming and bd_holder[s].
706 *
707 * CONTEXT:
708 * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab
709 * it multiple times.
710 *
711 * RETURNS:
712 * 0 if @bdev can be claimed, -EBUSY otherwise.
713 */
714static int bd_prepare_to_claim(struct block_device *bdev,
715 struct block_device *whole, void *holder)
716{
717retry:
718 /* if someone else claimed, fail */
719 if (!bd_may_claim(bdev, whole, holder))
720 return -EBUSY;
721
722 /* if someone else is claiming, wait for it to finish */
723 if (whole->bd_claiming && whole->bd_claiming != holder) {
724 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
725 DEFINE_WAIT(wait);
726
727 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
728 spin_unlock(&bdev_lock);
729 schedule();
730 finish_wait(wq, &wait);
731 spin_lock(&bdev_lock);
732 goto retry;
733 }
734
735 /* yay, all mine */
736 return 0;
737}
738
739/**
740 * bd_start_claiming - start claiming a block device
741 * @bdev: block device of interest
742 * @holder: holder trying to claim @bdev
743 *
744 * @bdev is about to be opened exclusively. Check @bdev can be opened
745 * exclusively and mark that an exclusive open is in progress. Each
746 * successful call to this function must be matched with a call to
747 * either bd_claim() or bd_abort_claiming(). If this function
748 * succeeds, the matching bd_claim() is guaranteed to succeed.
749 *
750 * CONTEXT:
751 * Might sleep.
752 *
753 * RETURNS:
754 * Pointer to the block device containing @bdev on success, ERR_PTR()
755 * value on failure.
756 */
757static struct block_device *bd_start_claiming(struct block_device *bdev,
758 void *holder)
759{
760 struct gendisk *disk;
761 struct block_device *whole;
762 int partno, err;
763
764 might_sleep();
765
766 /*
767 * @bdev might not have been initialized properly yet, look up
768 * and grab the outer block device the hard way.
769 */
770 disk = get_gendisk(bdev->bd_dev, &partno);
771 if (!disk)
772 return ERR_PTR(-ENXIO);
773
774 whole = bdget_disk(disk, 0);
775 put_disk(disk);
776 if (!whole)
777 return ERR_PTR(-ENOMEM);
778
779 /* prepare to claim, if successful, mark claiming in progress */
780 spin_lock(&bdev_lock);
781
782 err = bd_prepare_to_claim(bdev, whole, holder);
783 if (err == 0) {
784 whole->bd_claiming = holder;
785 spin_unlock(&bdev_lock);
786 return whole;
787 } else {
788 spin_unlock(&bdev_lock);
789 bdput(whole);
790 return ERR_PTR(err);
791 }
792}
793
794/* releases bdev_lock */
795static void __bd_abort_claiming(struct block_device *whole, void *holder)
796{
797 BUG_ON(whole->bd_claiming != holder);
798 whole->bd_claiming = NULL;
799 wake_up_bit(&whole->bd_claiming, 0);
800
801 spin_unlock(&bdev_lock);
802 bdput(whole);
803}
804
805/**
806 * bd_abort_claiming - abort claiming a block device
807 * @whole: whole block device returned by bd_start_claiming()
808 * @holder: holder trying to claim @bdev
809 *
810 * Abort a claiming block started by bd_start_claiming(). Note that
811 * @whole is not the block device to be claimed but the whole device
812 * returned by bd_start_claiming().
813 *
814 * CONTEXT:
815 * Grabs and releases bdev_lock.
816 */
817static void bd_abort_claiming(struct block_device *whole, void *holder)
818{
819 spin_lock(&bdev_lock);
820 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
821}
822
823/**
697 * bd_claim - claim a block device 824 * bd_claim - claim a block device
698 * @bdev: block device to claim 825 * @bdev: block device to claim
699 * @holder: holder trying to claim @bdev 826 * @holder: holder trying to claim @bdev
700 * 827 *
701 * Try to claim @bdev. 828 * Try to claim @bdev which must have been opened successfully. This
829 * function may be called with or without preceding
830 * blk_start_claiming(). In the former case, this function is always
831 * successful and terminates the claiming block.
832 *
833 * CONTEXT:
834 * Might sleep.
702 * 835 *
703 * RETURNS: 836 * RETURNS:
704 * 0 if successful, -EBUSY if @bdev is already claimed. 837 * 0 if successful, -EBUSY if @bdev is already claimed.
@@ -706,11 +839,14 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
706int bd_claim(struct block_device *bdev, void *holder) 839int bd_claim(struct block_device *bdev, void *holder)
707{ 840{
708 struct block_device *whole = bdev->bd_contains; 841 struct block_device *whole = bdev->bd_contains;
709 int res = -EBUSY; 842 int res;
843
844 might_sleep();
710 845
711 spin_lock(&bdev_lock); 846 spin_lock(&bdev_lock);
712 847
713 if (bd_may_claim(bdev, whole, holder)) { 848 res = bd_prepare_to_claim(bdev, whole, holder);
849 if (res == 0) {
714 /* note that for a whole device bd_holders 850 /* note that for a whole device bd_holders
715 * will be incremented twice, and bd_holder will 851 * will be incremented twice, and bd_holder will
716 * be set to bd_claim before being set to holder 852 * be set to bd_claim before being set to holder
@@ -719,10 +855,13 @@ int bd_claim(struct block_device *bdev, void *holder)
719 whole->bd_holder = bd_claim; 855 whole->bd_holder = bd_claim;
720 bdev->bd_holders++; 856 bdev->bd_holders++;
721 bdev->bd_holder = holder; 857 bdev->bd_holder = holder;
722 res = 0;
723 } 858 }
724 859
725 spin_unlock(&bdev_lock); 860 if (whole->bd_claiming)
861 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
862 else
863 spin_unlock(&bdev_lock);
864
726 return res; 865 return res;
727} 866}
728EXPORT_SYMBOL(bd_claim); 867EXPORT_SYMBOL(bd_claim);
@@ -1338,6 +1477,7 @@ EXPORT_SYMBOL(blkdev_get);
1338 1477
1339static int blkdev_open(struct inode * inode, struct file * filp) 1478static int blkdev_open(struct inode * inode, struct file * filp)
1340{ 1479{
1480 struct block_device *whole = NULL;
1341 struct block_device *bdev; 1481 struct block_device *bdev;
1342 int res; 1482 int res;
1343 1483
@@ -1360,22 +1500,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1360 if (bdev == NULL) 1500 if (bdev == NULL)
1361 return -ENOMEM; 1501 return -ENOMEM;
1362 1502
1503 if (filp->f_mode & FMODE_EXCL) {
1504 whole = bd_start_claiming(bdev, filp);
1505 if (IS_ERR(whole)) {
1506 bdput(bdev);
1507 return PTR_ERR(whole);
1508 }
1509 }
1510
1363 filp->f_mapping = bdev->bd_inode->i_mapping; 1511 filp->f_mapping = bdev->bd_inode->i_mapping;
1364 1512
1365 res = blkdev_get(bdev, filp->f_mode); 1513 res = blkdev_get(bdev, filp->f_mode);
1366 if (res)
1367 return res;
1368 1514
1369 if (filp->f_mode & FMODE_EXCL) { 1515 if (whole) {
1370 res = bd_claim(bdev, filp); 1516 if (res == 0)
1371 if (res) 1517 BUG_ON(bd_claim(bdev, filp) != 0);
1372 goto out_blkdev_put; 1518 else
1519 bd_abort_claiming(whole, filp);
1373 } 1520 }
1374 1521
1375 return 0;
1376
1377 out_blkdev_put:
1378 blkdev_put(bdev, filp->f_mode);
1379 return res; 1522 return res;
1380} 1523}
1381 1524
@@ -1586,27 +1729,34 @@ EXPORT_SYMBOL(lookup_bdev);
1586 */ 1729 */
1587struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1730struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1588{ 1731{
1589 struct block_device *bdev; 1732 struct block_device *bdev, *whole;
1590 int error = 0; 1733 int error;
1591 1734
1592 bdev = lookup_bdev(path); 1735 bdev = lookup_bdev(path);
1593 if (IS_ERR(bdev)) 1736 if (IS_ERR(bdev))
1594 return bdev; 1737 return bdev;
1595 1738
1739 whole = bd_start_claiming(bdev, holder);
1740 if (IS_ERR(whole)) {
1741 bdput(bdev);
1742 return whole;
1743 }
1744
1596 error = blkdev_get(bdev, mode); 1745 error = blkdev_get(bdev, mode);
1597 if (error) 1746 if (error)
1598 return ERR_PTR(error); 1747 goto out_abort_claiming;
1748
1599 error = -EACCES; 1749 error = -EACCES;
1600 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1750 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1601 goto blkdev_put; 1751 goto out_blkdev_put;
1602 error = bd_claim(bdev, holder);
1603 if (error)
1604 goto blkdev_put;
1605 1752
1753 BUG_ON(bd_claim(bdev, holder) != 0);
1606 return bdev; 1754 return bdev;
1607 1755
1608blkdev_put: 1756out_blkdev_put:
1609 blkdev_put(bdev, mode); 1757 blkdev_put(bdev, mode);
1758out_abort_claiming:
1759 bd_abort_claiming(whole, holder);
1610 return ERR_PTR(error); 1760 return ERR_PTR(error);
1611} 1761}
1612 1762
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39d57bc6cc71..31ee31be51e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -651,6 +651,7 @@ struct block_device {
651 int bd_openers; 651 int bd_openers;
652 struct mutex bd_mutex; /* open/close mutex */ 652 struct mutex bd_mutex; /* open/close mutex */
653 struct list_head bd_inodes; 653 struct list_head bd_inodes;
654 void * bd_claiming;
654 void * bd_holder; 655 void * bd_holder;
655 int bd_holders; 656 int bd_holders;
656#ifdef CONFIG_SYSFS 657#ifdef CONFIG_SYSFS