aboutsummaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c279
1 files changed, 233 insertions, 46 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d11d0289f3d2..55dcb7884f4d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -404,20 +404,28 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
404 * NULL first argument is nfsd_sync_dir() and that's not a directory. 404 * NULL first argument is nfsd_sync_dir() and that's not a directory.
405 */ 405 */
406 406
407static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) 407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
408{ 408{
409 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 409 struct inode *bd_inode = filp->f_mapping->host;
410 struct block_device *bdev = I_BDEV(bd_inode);
410 int error; 411 int error;
411 412
412 error = sync_blockdev(bdev); 413 /*
413 if (error) 414 * There is no need to serialise calls to blkdev_issue_flush with
414 return error; 415 * i_mutex and doing so causes performance issues with concurrent
415 416 * O_SYNC writers to a block device.
416 error = blkdev_issue_flush(bdev, NULL); 417 */
418 mutex_unlock(&bd_inode->i_mutex);
419
420 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
417 if (error == -EOPNOTSUPP) 421 if (error == -EOPNOTSUPP)
418 error = 0; 422 error = 0;
423
424 mutex_lock(&bd_inode->i_mutex);
425
419 return error; 426 return error;
420} 427}
428EXPORT_SYMBOL(blkdev_fsync);
421 429
422/* 430/*
423 * pseudo-fs 431 * pseudo-fs
@@ -660,41 +668,209 @@ void bd_forget(struct inode *inode)
660 iput(bdev->bd_inode); 668 iput(bdev->bd_inode);
661} 669}
662 670
663int bd_claim(struct block_device *bdev, void *holder) 671/**
672 * bd_may_claim - test whether a block device can be claimed
673 * @bdev: block device of interest
674 * @whole: whole block device containing @bdev, may equal @bdev
675 * @holder: holder trying to claim @bdev
676 *
677 * Test whther @bdev can be claimed by @holder.
678 *
679 * CONTEXT:
680 * spin_lock(&bdev_lock).
681 *
682 * RETURNS:
683 * %true if @bdev can be claimed, %false otherwise.
684 */
685static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
686 void *holder)
664{ 687{
665 int res;
666 spin_lock(&bdev_lock);
667
668 /* first decide result */
669 if (bdev->bd_holder == holder) 688 if (bdev->bd_holder == holder)
670 res = 0; /* already a holder */ 689 return true; /* already a holder */
671 else if (bdev->bd_holder != NULL) 690 else if (bdev->bd_holder != NULL)
672 res = -EBUSY; /* held by someone else */ 691 return false; /* held by someone else */
673 else if (bdev->bd_contains == bdev) 692 else if (bdev->bd_contains == bdev)
674 res = 0; /* is a whole device which isn't held */ 693 return true; /* is a whole device which isn't held */
675 694
676 else if (bdev->bd_contains->bd_holder == bd_claim) 695 else if (whole->bd_holder == bd_claim)
677 res = 0; /* is a partition of a device that is being partitioned */ 696 return true; /* is a partition of a device that is being partitioned */
678 else if (bdev->bd_contains->bd_holder != NULL) 697 else if (whole->bd_holder != NULL)
679 res = -EBUSY; /* is a partition of a held device */ 698 return false; /* is a partition of a held device */
680 else 699 else
681 res = 0; /* is a partition of an un-held device */ 700 return true; /* is a partition of an un-held device */
701}
702
703/**
704 * bd_prepare_to_claim - prepare to claim a block device
705 * @bdev: block device of interest
706 * @whole: the whole device containing @bdev, may equal @bdev
707 * @holder: holder trying to claim @bdev
708 *
709 * Prepare to claim @bdev. This function fails if @bdev is already
710 * claimed by another holder and waits if another claiming is in
711 * progress. This function doesn't actually claim. On successful
712 * return, the caller has ownership of bd_claiming and bd_holder[s].
713 *
714 * CONTEXT:
715 * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab
716 * it multiple times.
717 *
718 * RETURNS:
719 * 0 if @bdev can be claimed, -EBUSY otherwise.
720 */
721static int bd_prepare_to_claim(struct block_device *bdev,
722 struct block_device *whole, void *holder)
723{
724retry:
725 /* if someone else claimed, fail */
726 if (!bd_may_claim(bdev, whole, holder))
727 return -EBUSY;
728
729 /* if someone else is claiming, wait for it to finish */
730 if (whole->bd_claiming && whole->bd_claiming != holder) {
731 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
732 DEFINE_WAIT(wait);
733
734 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
735 spin_unlock(&bdev_lock);
736 schedule();
737 finish_wait(wq, &wait);
738 spin_lock(&bdev_lock);
739 goto retry;
740 }
741
742 /* yay, all mine */
743 return 0;
744}
745
746/**
747 * bd_start_claiming - start claiming a block device
748 * @bdev: block device of interest
749 * @holder: holder trying to claim @bdev
750 *
751 * @bdev is about to be opened exclusively. Check @bdev can be opened
752 * exclusively and mark that an exclusive open is in progress. Each
753 * successful call to this function must be matched with a call to
754 * either bd_claim() or bd_abort_claiming(). If this function
755 * succeeds, the matching bd_claim() is guaranteed to succeed.
756 *
757 * CONTEXT:
758 * Might sleep.
759 *
760 * RETURNS:
761 * Pointer to the block device containing @bdev on success, ERR_PTR()
762 * value on failure.
763 */
764static struct block_device *bd_start_claiming(struct block_device *bdev,
765 void *holder)
766{
767 struct gendisk *disk;
768 struct block_device *whole;
769 int partno, err;
770
771 might_sleep();
772
773 /*
774 * @bdev might not have been initialized properly yet, look up
775 * and grab the outer block device the hard way.
776 */
777 disk = get_gendisk(bdev->bd_dev, &partno);
778 if (!disk)
779 return ERR_PTR(-ENXIO);
780
781 whole = bdget_disk(disk, 0);
782 put_disk(disk);
783 if (!whole)
784 return ERR_PTR(-ENOMEM);
785
786 /* prepare to claim, if successful, mark claiming in progress */
787 spin_lock(&bdev_lock);
788
789 err = bd_prepare_to_claim(bdev, whole, holder);
790 if (err == 0) {
791 whole->bd_claiming = holder;
792 spin_unlock(&bdev_lock);
793 return whole;
794 } else {
795 spin_unlock(&bdev_lock);
796 bdput(whole);
797 return ERR_PTR(err);
798 }
799}
800
801/* releases bdev_lock */
802static void __bd_abort_claiming(struct block_device *whole, void *holder)
803{
804 BUG_ON(whole->bd_claiming != holder);
805 whole->bd_claiming = NULL;
806 wake_up_bit(&whole->bd_claiming, 0);
807
808 spin_unlock(&bdev_lock);
809 bdput(whole);
810}
811
812/**
813 * bd_abort_claiming - abort claiming a block device
814 * @whole: whole block device returned by bd_start_claiming()
815 * @holder: holder trying to claim @bdev
816 *
817 * Abort a claiming block started by bd_start_claiming(). Note that
818 * @whole is not the block device to be claimed but the whole device
819 * returned by bd_start_claiming().
820 *
821 * CONTEXT:
822 * Grabs and releases bdev_lock.
823 */
824static void bd_abort_claiming(struct block_device *whole, void *holder)
825{
826 spin_lock(&bdev_lock);
827 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
828}
829
830/**
831 * bd_claim - claim a block device
832 * @bdev: block device to claim
833 * @holder: holder trying to claim @bdev
834 *
835 * Try to claim @bdev which must have been opened successfully. This
836 * function may be called with or without preceding
837 * blk_start_claiming(). In the former case, this function is always
838 * successful and terminates the claiming block.
839 *
840 * CONTEXT:
841 * Might sleep.
842 *
843 * RETURNS:
844 * 0 if successful, -EBUSY if @bdev is already claimed.
845 */
846int bd_claim(struct block_device *bdev, void *holder)
847{
848 struct block_device *whole = bdev->bd_contains;
849 int res;
850
851 might_sleep();
682 852
683 /* now impose change */ 853 spin_lock(&bdev_lock);
684 if (res==0) { 854
855 res = bd_prepare_to_claim(bdev, whole, holder);
856 if (res == 0) {
685 /* note that for a whole device bd_holders 857 /* note that for a whole device bd_holders
686 * will be incremented twice, and bd_holder will 858 * will be incremented twice, and bd_holder will
687 * be set to bd_claim before being set to holder 859 * be set to bd_claim before being set to holder
688 */ 860 */
689 bdev->bd_contains->bd_holders ++; 861 whole->bd_holders++;
690 bdev->bd_contains->bd_holder = bd_claim; 862 whole->bd_holder = bd_claim;
691 bdev->bd_holders++; 863 bdev->bd_holders++;
692 bdev->bd_holder = holder; 864 bdev->bd_holder = holder;
693 } 865 }
694 spin_unlock(&bdev_lock); 866
867 if (whole->bd_claiming)
868 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
869 else
870 spin_unlock(&bdev_lock);
871
695 return res; 872 return res;
696} 873}
697
698EXPORT_SYMBOL(bd_claim); 874EXPORT_SYMBOL(bd_claim);
699 875
700void bd_release(struct block_device *bdev) 876void bd_release(struct block_device *bdev)
@@ -1308,6 +1484,7 @@ EXPORT_SYMBOL(blkdev_get);
1308 1484
1309static int blkdev_open(struct inode * inode, struct file * filp) 1485static int blkdev_open(struct inode * inode, struct file * filp)
1310{ 1486{
1487 struct block_device *whole = NULL;
1311 struct block_device *bdev; 1488 struct block_device *bdev;
1312 int res; 1489 int res;
1313 1490
@@ -1330,22 +1507,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1330 if (bdev == NULL) 1507 if (bdev == NULL)
1331 return -ENOMEM; 1508 return -ENOMEM;
1332 1509
1510 if (filp->f_mode & FMODE_EXCL) {
1511 whole = bd_start_claiming(bdev, filp);
1512 if (IS_ERR(whole)) {
1513 bdput(bdev);
1514 return PTR_ERR(whole);
1515 }
1516 }
1517
1333 filp->f_mapping = bdev->bd_inode->i_mapping; 1518 filp->f_mapping = bdev->bd_inode->i_mapping;
1334 1519
1335 res = blkdev_get(bdev, filp->f_mode); 1520 res = blkdev_get(bdev, filp->f_mode);
1336 if (res)
1337 return res;
1338 1521
1339 if (filp->f_mode & FMODE_EXCL) { 1522 if (whole) {
1340 res = bd_claim(bdev, filp); 1523 if (res == 0)
1341 if (res) 1524 BUG_ON(bd_claim(bdev, filp) != 0);
1342 goto out_blkdev_put; 1525 else
1526 bd_abort_claiming(whole, filp);
1343 } 1527 }
1344 1528
1345 return 0;
1346
1347 out_blkdev_put:
1348 blkdev_put(bdev, filp->f_mode);
1349 return res; 1529 return res;
1350} 1530}
1351 1531
@@ -1481,7 +1661,7 @@ const struct file_operations def_blk_fops = {
1481 .aio_read = generic_file_aio_read, 1661 .aio_read = generic_file_aio_read,
1482 .aio_write = blkdev_aio_write, 1662 .aio_write = blkdev_aio_write,
1483 .mmap = generic_file_mmap, 1663 .mmap = generic_file_mmap,
1484 .fsync = block_fsync, 1664 .fsync = blkdev_fsync,
1485 .unlocked_ioctl = block_ioctl, 1665 .unlocked_ioctl = block_ioctl,
1486#ifdef CONFIG_COMPAT 1666#ifdef CONFIG_COMPAT
1487 .compat_ioctl = compat_blkdev_ioctl, 1667 .compat_ioctl = compat_blkdev_ioctl,
@@ -1556,27 +1736,34 @@ EXPORT_SYMBOL(lookup_bdev);
1556 */ 1736 */
1557struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1737struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1558{ 1738{
1559 struct block_device *bdev; 1739 struct block_device *bdev, *whole;
1560 int error = 0; 1740 int error;
1561 1741
1562 bdev = lookup_bdev(path); 1742 bdev = lookup_bdev(path);
1563 if (IS_ERR(bdev)) 1743 if (IS_ERR(bdev))
1564 return bdev; 1744 return bdev;
1565 1745
1746 whole = bd_start_claiming(bdev, holder);
1747 if (IS_ERR(whole)) {
1748 bdput(bdev);
1749 return whole;
1750 }
1751
1566 error = blkdev_get(bdev, mode); 1752 error = blkdev_get(bdev, mode);
1567 if (error) 1753 if (error)
1568 return ERR_PTR(error); 1754 goto out_abort_claiming;
1755
1569 error = -EACCES; 1756 error = -EACCES;
1570 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1757 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1571 goto blkdev_put; 1758 goto out_blkdev_put;
1572 error = bd_claim(bdev, holder);
1573 if (error)
1574 goto blkdev_put;
1575 1759
1760 BUG_ON(bd_claim(bdev, holder) != 0);
1576 return bdev; 1761 return bdev;
1577 1762
1578blkdev_put: 1763out_blkdev_put:
1579 blkdev_put(bdev, mode); 1764 blkdev_put(bdev, mode);
1765out_abort_claiming:
1766 bd_abort_claiming(whole, holder);
1580 return ERR_PTR(error); 1767 return ERR_PTR(error);
1581} 1768}
1582 1769