diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/block_dev.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r-- | fs/block_dev.c | 906 |
1 files changed, 390 insertions, 516 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 50e8c8582faa..610e8e0b04b8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
13 | #include <linux/major.h> | 13 | #include <linux/major.h> |
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/device_cgroup.h> | 14 | #include <linux/device_cgroup.h> |
16 | #include <linux/highmem.h> | 15 | #include <linux/highmem.h> |
17 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
@@ -48,6 +47,23 @@ inline struct block_device *I_BDEV(struct inode *inode) | |||
48 | 47 | ||
49 | EXPORT_SYMBOL(I_BDEV); | 48 | EXPORT_SYMBOL(I_BDEV); |
50 | 49 | ||
50 | /* | ||
51 | * move the inode from it's current bdi to the a new bdi. if the inode is dirty | ||
52 | * we need to move it onto the dirty list of @dst so that the inode is always | ||
53 | * on the right list. | ||
54 | */ | ||
55 | static void bdev_inode_switch_bdi(struct inode *inode, | ||
56 | struct backing_dev_info *dst) | ||
57 | { | ||
58 | spin_lock(&inode_wb_list_lock); | ||
59 | spin_lock(&inode->i_lock); | ||
60 | inode->i_data.backing_dev_info = dst; | ||
61 | if (inode->i_state & I_DIRTY) | ||
62 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | ||
63 | spin_unlock(&inode->i_lock); | ||
64 | spin_unlock(&inode_wb_list_lock); | ||
65 | } | ||
66 | |||
51 | static sector_t max_block(struct block_device *bdev) | 67 | static sector_t max_block(struct block_device *bdev) |
52 | { | 68 | { |
53 | sector_t retval = ~((sector_t)0); | 69 | sector_t retval = ~((sector_t)0); |
@@ -370,7 +386,7 @@ int blkdev_fsync(struct file *filp, int datasync) | |||
370 | */ | 386 | */ |
371 | mutex_unlock(&bd_inode->i_mutex); | 387 | mutex_unlock(&bd_inode->i_mutex); |
372 | 388 | ||
373 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); | 389 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); |
374 | if (error == -EOPNOTSUPP) | 390 | if (error == -EOPNOTSUPP) |
375 | error = 0; | 391 | error = 0; |
376 | 392 | ||
@@ -395,13 +411,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) | |||
395 | return &ei->vfs_inode; | 411 | return &ei->vfs_inode; |
396 | } | 412 | } |
397 | 413 | ||
398 | static void bdev_destroy_inode(struct inode *inode) | 414 | static void bdev_i_callback(struct rcu_head *head) |
399 | { | 415 | { |
416 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
400 | struct bdev_inode *bdi = BDEV_I(inode); | 417 | struct bdev_inode *bdi = BDEV_I(inode); |
401 | 418 | ||
419 | INIT_LIST_HEAD(&inode->i_dentry); | ||
402 | kmem_cache_free(bdev_cachep, bdi); | 420 | kmem_cache_free(bdev_cachep, bdi); |
403 | } | 421 | } |
404 | 422 | ||
423 | static void bdev_destroy_inode(struct inode *inode) | ||
424 | { | ||
425 | call_rcu(&inode->i_rcu, bdev_i_callback); | ||
426 | } | ||
427 | |||
405 | static void init_once(void *foo) | 428 | static void init_once(void *foo) |
406 | { | 429 | { |
407 | struct bdev_inode *ei = (struct bdev_inode *) foo; | 430 | struct bdev_inode *ei = (struct bdev_inode *) foo; |
@@ -412,7 +435,7 @@ static void init_once(void *foo) | |||
412 | INIT_LIST_HEAD(&bdev->bd_inodes); | 435 | INIT_LIST_HEAD(&bdev->bd_inodes); |
413 | INIT_LIST_HEAD(&bdev->bd_list); | 436 | INIT_LIST_HEAD(&bdev->bd_list); |
414 | #ifdef CONFIG_SYSFS | 437 | #ifdef CONFIG_SYSFS |
415 | INIT_LIST_HEAD(&bdev->bd_holder_list); | 438 | INIT_LIST_HEAD(&bdev->bd_holder_disks); |
416 | #endif | 439 | #endif |
417 | inode_init_once(&ei->vfs_inode); | 440 | inode_init_once(&ei->vfs_inode); |
418 | /* Initialize mutex for freeze. */ | 441 | /* Initialize mutex for freeze. */ |
@@ -449,15 +472,15 @@ static const struct super_operations bdev_sops = { | |||
449 | .evict_inode = bdev_evict_inode, | 472 | .evict_inode = bdev_evict_inode, |
450 | }; | 473 | }; |
451 | 474 | ||
452 | static int bd_get_sb(struct file_system_type *fs_type, | 475 | static struct dentry *bd_mount(struct file_system_type *fs_type, |
453 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 476 | int flags, const char *dev_name, void *data) |
454 | { | 477 | { |
455 | return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); | 478 | return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576); |
456 | } | 479 | } |
457 | 480 | ||
458 | static struct file_system_type bd_type = { | 481 | static struct file_system_type bd_type = { |
459 | .name = "bdev", | 482 | .name = "bdev", |
460 | .get_sb = bd_get_sb, | 483 | .mount = bd_mount, |
461 | .kill_sb = kill_anon_super, | 484 | .kill_sb = kill_anon_super, |
462 | }; | 485 | }; |
463 | 486 | ||
@@ -550,7 +573,7 @@ EXPORT_SYMBOL(bdget); | |||
550 | */ | 573 | */ |
551 | struct block_device *bdgrab(struct block_device *bdev) | 574 | struct block_device *bdgrab(struct block_device *bdev) |
552 | { | 575 | { |
553 | atomic_inc(&bdev->bd_inode->i_count); | 576 | ihold(bdev->bd_inode); |
554 | return bdev; | 577 | return bdev; |
555 | } | 578 | } |
556 | 579 | ||
@@ -580,7 +603,7 @@ static struct block_device *bd_acquire(struct inode *inode) | |||
580 | spin_lock(&bdev_lock); | 603 | spin_lock(&bdev_lock); |
581 | bdev = inode->i_bdev; | 604 | bdev = inode->i_bdev; |
582 | if (bdev) { | 605 | if (bdev) { |
583 | atomic_inc(&bdev->bd_inode->i_count); | 606 | ihold(bdev->bd_inode); |
584 | spin_unlock(&bdev_lock); | 607 | spin_unlock(&bdev_lock); |
585 | return bdev; | 608 | return bdev; |
586 | } | 609 | } |
@@ -591,12 +614,12 @@ static struct block_device *bd_acquire(struct inode *inode) | |||
591 | spin_lock(&bdev_lock); | 614 | spin_lock(&bdev_lock); |
592 | if (!inode->i_bdev) { | 615 | if (!inode->i_bdev) { |
593 | /* | 616 | /* |
594 | * We take an additional bd_inode->i_count for inode, | 617 | * We take an additional reference to bd_inode, |
595 | * and it's released in clear_inode() of inode. | 618 | * and it's released in clear_inode() of inode. |
596 | * So, we can access it via ->i_mapping always | 619 | * So, we can access it via ->i_mapping always |
597 | * without igrab(). | 620 | * without igrab(). |
598 | */ | 621 | */ |
599 | atomic_inc(&bdev->bd_inode->i_count); | 622 | ihold(bdev->bd_inode); |
600 | inode->i_bdev = bdev; | 623 | inode->i_bdev = bdev; |
601 | inode->i_mapping = bdev->bd_inode->i_mapping; | 624 | inode->i_mapping = bdev->bd_inode->i_mapping; |
602 | list_add(&inode->i_devices, &bdev->bd_inodes); | 625 | list_add(&inode->i_devices, &bdev->bd_inodes); |
@@ -630,7 +653,7 @@ void bd_forget(struct inode *inode) | |||
630 | * @whole: whole block device containing @bdev, may equal @bdev | 653 | * @whole: whole block device containing @bdev, may equal @bdev |
631 | * @holder: holder trying to claim @bdev | 654 | * @holder: holder trying to claim @bdev |
632 | * | 655 | * |
633 | * Test whther @bdev can be claimed by @holder. | 656 | * Test whether @bdev can be claimed by @holder. |
634 | * | 657 | * |
635 | * CONTEXT: | 658 | * CONTEXT: |
636 | * spin_lock(&bdev_lock). | 659 | * spin_lock(&bdev_lock). |
@@ -648,7 +671,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, | |||
648 | else if (bdev->bd_contains == bdev) | 671 | else if (bdev->bd_contains == bdev) |
649 | return true; /* is a whole device which isn't held */ | 672 | return true; /* is a whole device which isn't held */ |
650 | 673 | ||
651 | else if (whole->bd_holder == bd_claim) | 674 | else if (whole->bd_holder == bd_may_claim) |
652 | return true; /* is a partition of a device that is being partitioned */ | 675 | return true; /* is a partition of a device that is being partitioned */ |
653 | else if (whole->bd_holder != NULL) | 676 | else if (whole->bd_holder != NULL) |
654 | return false; /* is a partition of a held device */ | 677 | return false; /* is a partition of a held device */ |
@@ -739,7 +762,19 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, | |||
739 | if (!disk) | 762 | if (!disk) |
740 | return ERR_PTR(-ENXIO); | 763 | return ERR_PTR(-ENXIO); |
741 | 764 | ||
742 | whole = bdget_disk(disk, 0); | 765 | /* |
766 | * Normally, @bdev should equal what's returned from bdget_disk() | ||
767 | * if partno is 0; however, some drivers (floppy) use multiple | ||
768 | * bdev's for the same physical device and @bdev may be one of the | ||
769 | * aliases. Keep @bdev if partno is 0. This means claimer | ||
770 | * tracking is broken for those devices but it has always been that | ||
771 | * way. | ||
772 | */ | ||
773 | if (partno) | ||
774 | whole = bdget_disk(disk, 0); | ||
775 | else | ||
776 | whole = bdgrab(bdev); | ||
777 | |||
743 | module_put(disk->fops->owner); | 778 | module_put(disk->fops->owner); |
744 | put_disk(disk); | 779 | put_disk(disk); |
745 | if (!whole) | 780 | if (!whole) |
@@ -760,452 +795,162 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, | |||
760 | } | 795 | } |
761 | } | 796 | } |
762 | 797 | ||
763 | /* releases bdev_lock */ | 798 | #ifdef CONFIG_SYSFS |
764 | static void __bd_abort_claiming(struct block_device *whole, void *holder) | 799 | struct bd_holder_disk { |
765 | { | 800 | struct list_head list; |
766 | BUG_ON(whole->bd_claiming != holder); | 801 | struct gendisk *disk; |
767 | whole->bd_claiming = NULL; | 802 | int refcnt; |
768 | wake_up_bit(&whole->bd_claiming, 0); | 803 | }; |
769 | |||
770 | spin_unlock(&bdev_lock); | ||
771 | bdput(whole); | ||
772 | } | ||
773 | |||
774 | /** | ||
775 | * bd_abort_claiming - abort claiming a block device | ||
776 | * @whole: whole block device returned by bd_start_claiming() | ||
777 | * @holder: holder trying to claim @bdev | ||
778 | * | ||
779 | * Abort a claiming block started by bd_start_claiming(). Note that | ||
780 | * @whole is not the block device to be claimed but the whole device | ||
781 | * returned by bd_start_claiming(). | ||
782 | * | ||
783 | * CONTEXT: | ||
784 | * Grabs and releases bdev_lock. | ||
785 | */ | ||
786 | static void bd_abort_claiming(struct block_device *whole, void *holder) | ||
787 | { | ||
788 | spin_lock(&bdev_lock); | ||
789 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
790 | } | ||
791 | |||
792 | /* increment holders when we have a legitimate claim. requires bdev_lock */ | ||
793 | static void __bd_claim(struct block_device *bdev, struct block_device *whole, | ||
794 | void *holder) | ||
795 | { | ||
796 | /* note that for a whole device bd_holders | ||
797 | * will be incremented twice, and bd_holder will | ||
798 | * be set to bd_claim before being set to holder | ||
799 | */ | ||
800 | whole->bd_holders++; | ||
801 | whole->bd_holder = bd_claim; | ||
802 | bdev->bd_holders++; | ||
803 | bdev->bd_holder = holder; | ||
804 | } | ||
805 | 804 | ||
806 | /** | 805 | static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, |
807 | * bd_finish_claiming - finish claiming a block device | 806 | struct gendisk *disk) |
808 | * @bdev: block device of interest (passed to bd_start_claiming()) | ||
809 | * @whole: whole block device returned by bd_start_claiming() | ||
810 | * @holder: holder trying to claim @bdev | ||
811 | * | ||
812 | * Finish a claiming block started by bd_start_claiming(). | ||
813 | * | ||
814 | * CONTEXT: | ||
815 | * Grabs and releases bdev_lock. | ||
816 | */ | ||
817 | static void bd_finish_claiming(struct block_device *bdev, | ||
818 | struct block_device *whole, void *holder) | ||
819 | { | 807 | { |
820 | spin_lock(&bdev_lock); | 808 | struct bd_holder_disk *holder; |
821 | BUG_ON(!bd_may_claim(bdev, whole, holder)); | ||
822 | __bd_claim(bdev, whole, holder); | ||
823 | __bd_abort_claiming(whole, holder); /* not actually an abort */ | ||
824 | } | ||
825 | 809 | ||
826 | /** | 810 | list_for_each_entry(holder, &bdev->bd_holder_disks, list) |
827 | * bd_claim - claim a block device | 811 | if (holder->disk == disk) |
828 | * @bdev: block device to claim | 812 | return holder; |
829 | * @holder: holder trying to claim @bdev | 813 | return NULL; |
830 | * | ||
831 | * Try to claim @bdev which must have been opened successfully. | ||
832 | * | ||
833 | * CONTEXT: | ||
834 | * Might sleep. | ||
835 | * | ||
836 | * RETURNS: | ||
837 | * 0 if successful, -EBUSY if @bdev is already claimed. | ||
838 | */ | ||
839 | int bd_claim(struct block_device *bdev, void *holder) | ||
840 | { | ||
841 | struct block_device *whole = bdev->bd_contains; | ||
842 | int res; | ||
843 | |||
844 | might_sleep(); | ||
845 | |||
846 | spin_lock(&bdev_lock); | ||
847 | res = bd_prepare_to_claim(bdev, whole, holder); | ||
848 | if (res == 0) | ||
849 | __bd_claim(bdev, whole, holder); | ||
850 | spin_unlock(&bdev_lock); | ||
851 | |||
852 | return res; | ||
853 | } | ||
854 | EXPORT_SYMBOL(bd_claim); | ||
855 | |||
856 | void bd_release(struct block_device *bdev) | ||
857 | { | ||
858 | spin_lock(&bdev_lock); | ||
859 | if (!--bdev->bd_contains->bd_holders) | ||
860 | bdev->bd_contains->bd_holder = NULL; | ||
861 | if (!--bdev->bd_holders) | ||
862 | bdev->bd_holder = NULL; | ||
863 | spin_unlock(&bdev_lock); | ||
864 | } | 814 | } |
865 | 815 | ||
866 | EXPORT_SYMBOL(bd_release); | ||
867 | |||
868 | #ifdef CONFIG_SYSFS | ||
869 | /* | ||
870 | * Functions for bd_claim_by_kobject / bd_release_from_kobject | ||
871 | * | ||
872 | * If a kobject is passed to bd_claim_by_kobject() | ||
873 | * and the kobject has a parent directory, | ||
874 | * following symlinks are created: | ||
875 | * o from the kobject to the claimed bdev | ||
876 | * o from "holders" directory of the bdev to the parent of the kobject | ||
877 | * bd_release_from_kobject() removes these symlinks. | ||
878 | * | ||
879 | * Example: | ||
880 | * If /dev/dm-0 maps to /dev/sda, kobject corresponding to | ||
881 | * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: | ||
882 | * /sys/block/dm-0/slaves/sda --> /sys/block/sda | ||
883 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | ||
884 | */ | ||
885 | |||
886 | static int add_symlink(struct kobject *from, struct kobject *to) | 816 | static int add_symlink(struct kobject *from, struct kobject *to) |
887 | { | 817 | { |
888 | if (!from || !to) | ||
889 | return 0; | ||
890 | return sysfs_create_link(from, to, kobject_name(to)); | 818 | return sysfs_create_link(from, to, kobject_name(to)); |
891 | } | 819 | } |
892 | 820 | ||
893 | static void del_symlink(struct kobject *from, struct kobject *to) | 821 | static void del_symlink(struct kobject *from, struct kobject *to) |
894 | { | 822 | { |
895 | if (!from || !to) | ||
896 | return; | ||
897 | sysfs_remove_link(from, kobject_name(to)); | 823 | sysfs_remove_link(from, kobject_name(to)); |
898 | } | 824 | } |
899 | 825 | ||
900 | /* | ||
901 | * 'struct bd_holder' contains pointers to kobjects symlinked by | ||
902 | * bd_claim_by_kobject. | ||
903 | * It's connected to bd_holder_list which is protected by bdev->bd_sem. | ||
904 | */ | ||
905 | struct bd_holder { | ||
906 | struct list_head list; /* chain of holders of the bdev */ | ||
907 | int count; /* references from the holder */ | ||
908 | struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ | ||
909 | struct kobject *hdev; /* e.g. "/block/dm-0" */ | ||
910 | struct kobject *hdir; /* e.g. "/block/sda/holders" */ | ||
911 | struct kobject *sdev; /* e.g. "/block/sda" */ | ||
912 | }; | ||
913 | |||
914 | /* | ||
915 | * Get references of related kobjects at once. | ||
916 | * Returns 1 on success. 0 on failure. | ||
917 | * | ||
918 | * Should call bd_holder_release_dirs() after successful use. | ||
919 | */ | ||
920 | static int bd_holder_grab_dirs(struct block_device *bdev, | ||
921 | struct bd_holder *bo) | ||
922 | { | ||
923 | if (!bdev || !bo) | ||
924 | return 0; | ||
925 | |||
926 | bo->sdir = kobject_get(bo->sdir); | ||
927 | if (!bo->sdir) | ||
928 | return 0; | ||
929 | |||
930 | bo->hdev = kobject_get(bo->sdir->parent); | ||
931 | if (!bo->hdev) | ||
932 | goto fail_put_sdir; | ||
933 | |||
934 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); | ||
935 | if (!bo->sdev) | ||
936 | goto fail_put_hdev; | ||
937 | |||
938 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); | ||
939 | if (!bo->hdir) | ||
940 | goto fail_put_sdev; | ||
941 | |||
942 | return 1; | ||
943 | |||
944 | fail_put_sdev: | ||
945 | kobject_put(bo->sdev); | ||
946 | fail_put_hdev: | ||
947 | kobject_put(bo->hdev); | ||
948 | fail_put_sdir: | ||
949 | kobject_put(bo->sdir); | ||
950 | |||
951 | return 0; | ||
952 | } | ||
953 | |||
954 | /* Put references of related kobjects at once. */ | ||
955 | static void bd_holder_release_dirs(struct bd_holder *bo) | ||
956 | { | ||
957 | kobject_put(bo->hdir); | ||
958 | kobject_put(bo->sdev); | ||
959 | kobject_put(bo->hdev); | ||
960 | kobject_put(bo->sdir); | ||
961 | } | ||
962 | |||
963 | static struct bd_holder *alloc_bd_holder(struct kobject *kobj) | ||
964 | { | ||
965 | struct bd_holder *bo; | ||
966 | |||
967 | bo = kzalloc(sizeof(*bo), GFP_KERNEL); | ||
968 | if (!bo) | ||
969 | return NULL; | ||
970 | |||
971 | bo->count = 1; | ||
972 | bo->sdir = kobj; | ||
973 | |||
974 | return bo; | ||
975 | } | ||
976 | |||
977 | static void free_bd_holder(struct bd_holder *bo) | ||
978 | { | ||
979 | kfree(bo); | ||
980 | } | ||
981 | |||
982 | /** | 826 | /** |
983 | * find_bd_holder - find matching struct bd_holder from the block device | 827 | * bd_link_disk_holder - create symlinks between holding disk and slave bdev |
828 | * @bdev: the claimed slave bdev | ||
829 | * @disk: the holding disk | ||
984 | * | 830 | * |
985 | * @bdev: struct block device to be searched | 831 | * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. |
986 | * @bo: target struct bd_holder | ||
987 | * | 832 | * |
988 | * Returns matching entry with @bo in @bdev->bd_holder_list. | 833 | * This functions creates the following sysfs symlinks. |
989 | * If found, increment the reference count and return the pointer. | 834 | * |
990 | * If not found, returns NULL. | 835 | * - from "slaves" directory of the holder @disk to the claimed @bdev |
991 | */ | 836 | * - from "holders" directory of the @bdev to the holder @disk |
992 | static struct bd_holder *find_bd_holder(struct block_device *bdev, | ||
993 | struct bd_holder *bo) | ||
994 | { | ||
995 | struct bd_holder *tmp; | ||
996 | |||
997 | list_for_each_entry(tmp, &bdev->bd_holder_list, list) | ||
998 | if (tmp->sdir == bo->sdir) { | ||
999 | tmp->count++; | ||
1000 | return tmp; | ||
1001 | } | ||
1002 | |||
1003 | return NULL; | ||
1004 | } | ||
1005 | |||
1006 | /** | ||
1007 | * add_bd_holder - create sysfs symlinks for bd_claim() relationship | ||
1008 | * | 837 | * |
1009 | * @bdev: block device to be bd_claimed | 838 | * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is |
1010 | * @bo: preallocated and initialized by alloc_bd_holder() | 839 | * passed to bd_link_disk_holder(), then: |
1011 | * | 840 | * |
1012 | * Add @bo to @bdev->bd_holder_list, create symlinks. | 841 | * /sys/block/dm-0/slaves/sda --> /sys/block/sda |
842 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | ||
1013 | * | 843 | * |
1014 | * Returns 0 if symlinks are created. | 844 | * The caller must have claimed @bdev before calling this function and |
1015 | * Returns -ve if something fails. | 845 | * ensure that both @bdev and @disk are valid during the creation and |
846 | * lifetime of these symlinks. | ||
847 | * | ||
848 | * CONTEXT: | ||
849 | * Might sleep. | ||
850 | * | ||
851 | * RETURNS: | ||
852 | * 0 on success, -errno on failure. | ||
1016 | */ | 853 | */ |
1017 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | 854 | int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) |
1018 | { | 855 | { |
1019 | int err; | 856 | struct bd_holder_disk *holder; |
857 | int ret = 0; | ||
1020 | 858 | ||
1021 | if (!bo) | 859 | mutex_lock(&bdev->bd_mutex); |
1022 | return -EINVAL; | ||
1023 | 860 | ||
1024 | if (!bd_holder_grab_dirs(bdev, bo)) | 861 | WARN_ON_ONCE(!bdev->bd_holder); |
1025 | return -EBUSY; | ||
1026 | 862 | ||
1027 | err = add_symlink(bo->sdir, bo->sdev); | 863 | /* FIXME: remove the following once add_disk() handles errors */ |
1028 | if (err) | 864 | if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) |
1029 | return err; | 865 | goto out_unlock; |
1030 | 866 | ||
1031 | err = add_symlink(bo->hdir, bo->hdev); | 867 | holder = bd_find_holder_disk(bdev, disk); |
1032 | if (err) { | 868 | if (holder) { |
1033 | del_symlink(bo->sdir, bo->sdev); | 869 | holder->refcnt++; |
1034 | return err; | 870 | goto out_unlock; |
1035 | } | 871 | } |
1036 | 872 | ||
1037 | list_add_tail(&bo->list, &bdev->bd_holder_list); | 873 | holder = kzalloc(sizeof(*holder), GFP_KERNEL); |
1038 | return 0; | 874 | if (!holder) { |
1039 | } | 875 | ret = -ENOMEM; |
1040 | 876 | goto out_unlock; | |
1041 | /** | ||
1042 | * del_bd_holder - delete sysfs symlinks for bd_claim() relationship | ||
1043 | * | ||
1044 | * @bdev: block device to be bd_claimed | ||
1045 | * @kobj: holder's kobject | ||
1046 | * | ||
1047 | * If there is matching entry with @kobj in @bdev->bd_holder_list | ||
1048 | * and no other bd_claim() from the same kobject, | ||
1049 | * remove the struct bd_holder from the list, delete symlinks for it. | ||
1050 | * | ||
1051 | * Returns a pointer to the struct bd_holder when it's removed from the list | ||
1052 | * and ready to be freed. | ||
1053 | * Returns NULL if matching claim isn't found or there is other bd_claim() | ||
1054 | * by the same kobject. | ||
1055 | */ | ||
1056 | static struct bd_holder *del_bd_holder(struct block_device *bdev, | ||
1057 | struct kobject *kobj) | ||
1058 | { | ||
1059 | struct bd_holder *bo; | ||
1060 | |||
1061 | list_for_each_entry(bo, &bdev->bd_holder_list, list) { | ||
1062 | if (bo->sdir == kobj) { | ||
1063 | bo->count--; | ||
1064 | BUG_ON(bo->count < 0); | ||
1065 | if (!bo->count) { | ||
1066 | list_del(&bo->list); | ||
1067 | del_symlink(bo->sdir, bo->sdev); | ||
1068 | del_symlink(bo->hdir, bo->hdev); | ||
1069 | bd_holder_release_dirs(bo); | ||
1070 | return bo; | ||
1071 | } | ||
1072 | break; | ||
1073 | } | ||
1074 | } | 877 | } |
1075 | 878 | ||
1076 | return NULL; | 879 | INIT_LIST_HEAD(&holder->list); |
1077 | } | 880 | holder->disk = disk; |
881 | holder->refcnt = 1; | ||
1078 | 882 | ||
1079 | /** | 883 | ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); |
1080 | * bd_claim_by_kobject - bd_claim() with additional kobject signature | 884 | if (ret) |
1081 | * | 885 | goto out_free; |
1082 | * @bdev: block device to be claimed | ||
1083 | * @holder: holder's signature | ||
1084 | * @kobj: holder's kobject | ||
1085 | * | ||
1086 | * Do bd_claim() and if it succeeds, create sysfs symlinks between | ||
1087 | * the bdev and the holder's kobject. | ||
1088 | * Use bd_release_from_kobject() when relesing the claimed bdev. | ||
1089 | * | ||
1090 | * Returns 0 on success. (same as bd_claim()) | ||
1091 | * Returns errno on failure. | ||
1092 | */ | ||
1093 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | ||
1094 | struct kobject *kobj) | ||
1095 | { | ||
1096 | int err; | ||
1097 | struct bd_holder *bo, *found; | ||
1098 | 886 | ||
1099 | if (!kobj) | 887 | ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); |
1100 | return -EINVAL; | 888 | if (ret) |
1101 | 889 | goto out_del; | |
1102 | bo = alloc_bd_holder(kobj); | 890 | /* |
1103 | if (!bo) | 891 | * bdev could be deleted beneath us which would implicitly destroy |
1104 | return -ENOMEM; | 892 | * the holder directory. Hold on to it. |
1105 | 893 | */ | |
1106 | mutex_lock(&bdev->bd_mutex); | 894 | kobject_get(bdev->bd_part->holder_dir); |
1107 | |||
1108 | err = bd_claim(bdev, holder); | ||
1109 | if (err) | ||
1110 | goto fail; | ||
1111 | 895 | ||
1112 | found = find_bd_holder(bdev, bo); | 896 | list_add(&holder->list, &bdev->bd_holder_disks); |
1113 | if (found) | 897 | goto out_unlock; |
1114 | goto fail; | ||
1115 | 898 | ||
1116 | err = add_bd_holder(bdev, bo); | 899 | out_del: |
1117 | if (err) | 900 | del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); |
1118 | bd_release(bdev); | 901 | out_free: |
1119 | else | 902 | kfree(holder); |
1120 | bo = NULL; | 903 | out_unlock: |
1121 | fail: | ||
1122 | mutex_unlock(&bdev->bd_mutex); | 904 | mutex_unlock(&bdev->bd_mutex); |
1123 | free_bd_holder(bo); | 905 | return ret; |
1124 | return err; | ||
1125 | } | 906 | } |
907 | EXPORT_SYMBOL_GPL(bd_link_disk_holder); | ||
1126 | 908 | ||
1127 | /** | 909 | /** |
1128 | * bd_release_from_kobject - bd_release() with additional kobject signature | 910 | * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() |
911 | * @bdev: the calimed slave bdev | ||
912 | * @disk: the holding disk | ||
1129 | * | 913 | * |
1130 | * @bdev: block device to be released | 914 | * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. |
1131 | * @kobj: holder's kobject | ||
1132 | * | 915 | * |
1133 | * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). | 916 | * CONTEXT: |
917 | * Might sleep. | ||
1134 | */ | 918 | */ |
1135 | static void bd_release_from_kobject(struct block_device *bdev, | 919 | void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) |
1136 | struct kobject *kobj) | ||
1137 | { | 920 | { |
1138 | if (!kobj) | 921 | struct bd_holder_disk *holder; |
1139 | return; | ||
1140 | 922 | ||
1141 | mutex_lock(&bdev->bd_mutex); | 923 | mutex_lock(&bdev->bd_mutex); |
1142 | bd_release(bdev); | ||
1143 | free_bd_holder(del_bd_holder(bdev, kobj)); | ||
1144 | mutex_unlock(&bdev->bd_mutex); | ||
1145 | } | ||
1146 | 924 | ||
1147 | /** | 925 | holder = bd_find_holder_disk(bdev, disk); |
1148 | * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() | ||
1149 | * | ||
1150 | * @bdev: block device to be claimed | ||
1151 | * @holder: holder's signature | ||
1152 | * @disk: holder's gendisk | ||
1153 | * | ||
1154 | * Call bd_claim_by_kobject() with getting @disk->slave_dir. | ||
1155 | */ | ||
1156 | int bd_claim_by_disk(struct block_device *bdev, void *holder, | ||
1157 | struct gendisk *disk) | ||
1158 | { | ||
1159 | return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); | ||
1160 | } | ||
1161 | EXPORT_SYMBOL_GPL(bd_claim_by_disk); | ||
1162 | 926 | ||
1163 | /** | 927 | if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { |
1164 | * bd_release_from_disk - wrapper function for bd_release_from_kobject() | 928 | del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); |
1165 | * | 929 | del_symlink(bdev->bd_part->holder_dir, |
1166 | * @bdev: block device to be claimed | 930 | &disk_to_dev(disk)->kobj); |
1167 | * @disk: holder's gendisk | 931 | kobject_put(bdev->bd_part->holder_dir); |
1168 | * | 932 | list_del_init(&holder->list); |
1169 | * Call bd_release_from_kobject() and put @disk->slave_dir. | 933 | kfree(holder); |
1170 | */ | 934 | } |
1171 | void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) | ||
1172 | { | ||
1173 | bd_release_from_kobject(bdev, disk->slave_dir); | ||
1174 | kobject_put(disk->slave_dir); | ||
1175 | } | ||
1176 | EXPORT_SYMBOL_GPL(bd_release_from_disk); | ||
1177 | #endif | ||
1178 | 935 | ||
1179 | /* | 936 | mutex_unlock(&bdev->bd_mutex); |
1180 | * Tries to open block device by device number. Use it ONLY if you | ||
1181 | * really do not have anything better - i.e. when you are behind a | ||
1182 | * truly sucky interface and all you are given is a device number. _Never_ | ||
1183 | * to be used for internal purposes. If you ever need it - reconsider | ||
1184 | * your API. | ||
1185 | */ | ||
1186 | struct block_device *open_by_devnum(dev_t dev, fmode_t mode) | ||
1187 | { | ||
1188 | struct block_device *bdev = bdget(dev); | ||
1189 | int err = -ENOMEM; | ||
1190 | if (bdev) | ||
1191 | err = blkdev_get(bdev, mode); | ||
1192 | return err ? ERR_PTR(err) : bdev; | ||
1193 | } | 937 | } |
1194 | 938 | EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); | |
1195 | EXPORT_SYMBOL(open_by_devnum); | 939 | #endif |
1196 | 940 | ||
1197 | /** | 941 | /** |
1198 | * flush_disk - invalidates all buffer-cache entries on a disk | 942 | * flush_disk - invalidates all buffer-cache entries on a disk |
1199 | * | 943 | * |
1200 | * @bdev: struct block device to be flushed | 944 | * @bdev: struct block device to be flushed |
945 | * @kill_dirty: flag to guide handling of dirty inodes | ||
1201 | * | 946 | * |
1202 | * Invalidates all buffer-cache entries on a disk. It should be called | 947 | * Invalidates all buffer-cache entries on a disk. It should be called |
1203 | * when a disk has been changed -- either by a media change or online | 948 | * when a disk has been changed -- either by a media change or online |
1204 | * resize. | 949 | * resize. |
1205 | */ | 950 | */ |
1206 | static void flush_disk(struct block_device *bdev) | 951 | static void flush_disk(struct block_device *bdev, bool kill_dirty) |
1207 | { | 952 | { |
1208 | if (__invalidate_device(bdev)) { | 953 | if (__invalidate_device(bdev, kill_dirty)) { |
1209 | char name[BDEVNAME_SIZE] = ""; | 954 | char name[BDEVNAME_SIZE] = ""; |
1210 | 955 | ||
1211 | if (bdev->bd_disk) | 956 | if (bdev->bd_disk) |
@@ -1242,7 +987,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | |||
1242 | "%s: detected capacity change from %lld to %lld\n", | 987 | "%s: detected capacity change from %lld to %lld\n", |
1243 | name, bdev_size, disk_size); | 988 | name, bdev_size, disk_size); |
1244 | i_size_write(bdev->bd_inode, disk_size); | 989 | i_size_write(bdev->bd_inode, disk_size); |
1245 | flush_disk(bdev); | 990 | flush_disk(bdev, false); |
1246 | } | 991 | } |
1247 | } | 992 | } |
1248 | EXPORT_SYMBOL(check_disk_size_change); | 993 | EXPORT_SYMBOL(check_disk_size_change); |
@@ -1288,13 +1033,14 @@ int check_disk_change(struct block_device *bdev) | |||
1288 | { | 1033 | { |
1289 | struct gendisk *disk = bdev->bd_disk; | 1034 | struct gendisk *disk = bdev->bd_disk; |
1290 | const struct block_device_operations *bdops = disk->fops; | 1035 | const struct block_device_operations *bdops = disk->fops; |
1036 | unsigned int events; | ||
1291 | 1037 | ||
1292 | if (!bdops->media_changed) | 1038 | events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | |
1293 | return 0; | 1039 | DISK_EVENT_EJECT_REQUEST); |
1294 | if (!bdops->media_changed(bdev->bd_disk)) | 1040 | if (!(events & DISK_EVENT_MEDIA_CHANGE)) |
1295 | return 0; | 1041 | return 0; |
1296 | 1042 | ||
1297 | flush_disk(bdev); | 1043 | flush_disk(bdev, true); |
1298 | if (bdops->revalidate_disk) | 1044 | if (bdops->revalidate_disk) |
1299 | bdops->revalidate_disk(bdev->bd_disk); | 1045 | bdops->revalidate_disk(bdev->bd_disk); |
1300 | return 1; | 1046 | return 1; |
@@ -1355,6 +1101,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1355 | if (!disk) | 1101 | if (!disk) |
1356 | goto out; | 1102 | goto out; |
1357 | 1103 | ||
1104 | disk_block_events(disk); | ||
1358 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1105 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1359 | if (!bdev->bd_openers) { | 1106 | if (!bdev->bd_openers) { |
1360 | bdev->bd_disk = disk; | 1107 | bdev->bd_disk = disk; |
@@ -1367,6 +1114,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1367 | if (!bdev->bd_part) | 1114 | if (!bdev->bd_part) |
1368 | goto out_clear; | 1115 | goto out_clear; |
1369 | 1116 | ||
1117 | ret = 0; | ||
1370 | if (disk->fops->open) { | 1118 | if (disk->fops->open) { |
1371 | ret = disk->fops->open(bdev, mode); | 1119 | ret = disk->fops->open(bdev, mode); |
1372 | if (ret == -ERESTARTSYS) { | 1120 | if (ret == -ERESTARTSYS) { |
@@ -1376,24 +1124,33 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1376 | */ | 1124 | */ |
1377 | disk_put_part(bdev->bd_part); | 1125 | disk_put_part(bdev->bd_part); |
1378 | bdev->bd_part = NULL; | 1126 | bdev->bd_part = NULL; |
1379 | module_put(disk->fops->owner); | ||
1380 | put_disk(disk); | ||
1381 | bdev->bd_disk = NULL; | 1127 | bdev->bd_disk = NULL; |
1382 | mutex_unlock(&bdev->bd_mutex); | 1128 | mutex_unlock(&bdev->bd_mutex); |
1129 | disk_unblock_events(disk); | ||
1130 | module_put(disk->fops->owner); | ||
1131 | put_disk(disk); | ||
1383 | goto restart; | 1132 | goto restart; |
1384 | } | 1133 | } |
1385 | if (ret) | ||
1386 | goto out_clear; | ||
1387 | } | 1134 | } |
1388 | if (!bdev->bd_openers) { | 1135 | |
1136 | if (!ret && !bdev->bd_openers) { | ||
1389 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1137 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
1390 | bdi = blk_get_backing_dev_info(bdev); | 1138 | bdi = blk_get_backing_dev_info(bdev); |
1391 | if (bdi == NULL) | 1139 | if (bdi == NULL) |
1392 | bdi = &default_backing_dev_info; | 1140 | bdi = &default_backing_dev_info; |
1393 | bdev->bd_inode->i_data.backing_dev_info = bdi; | 1141 | bdev_inode_switch_bdi(bdev->bd_inode, bdi); |
1394 | } | 1142 | } |
1395 | if (bdev->bd_invalidated) | 1143 | |
1144 | /* | ||
1145 | * If the device is invalidated, rescan partition | ||
1146 | * if open succeeded or failed with -ENOMEDIUM. | ||
1147 | * The latter is necessary to prevent ghost | ||
1148 | * partitions on a removed medium. | ||
1149 | */ | ||
1150 | if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) | ||
1396 | rescan_partitions(disk, bdev); | 1151 | rescan_partitions(disk, bdev); |
1152 | if (ret) | ||
1153 | goto out_clear; | ||
1397 | } else { | 1154 | } else { |
1398 | struct block_device *whole; | 1155 | struct block_device *whole; |
1399 | whole = bdget_disk(disk, 0); | 1156 | whole = bdget_disk(disk, 0); |
@@ -1405,8 +1162,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1405 | if (ret) | 1162 | if (ret) |
1406 | goto out_clear; | 1163 | goto out_clear; |
1407 | bdev->bd_contains = whole; | 1164 | bdev->bd_contains = whole; |
1408 | bdev->bd_inode->i_data.backing_dev_info = | 1165 | bdev_inode_switch_bdi(bdev->bd_inode, |
1409 | whole->bd_inode->i_data.backing_dev_info; | 1166 | whole->bd_inode->i_data.backing_dev_info); |
1410 | bdev->bd_part = disk_get_part(disk, partno); | 1167 | bdev->bd_part = disk_get_part(disk, partno); |
1411 | if (!(disk->flags & GENHD_FL_UP) || | 1168 | if (!(disk->flags & GENHD_FL_UP) || |
1412 | !bdev->bd_part || !bdev->bd_part->nr_sects) { | 1169 | !bdev->bd_part || !bdev->bd_part->nr_sects) { |
@@ -1416,55 +1173,213 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1416 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1173 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
1417 | } | 1174 | } |
1418 | } else { | 1175 | } else { |
1419 | module_put(disk->fops->owner); | ||
1420 | put_disk(disk); | ||
1421 | disk = NULL; | ||
1422 | if (bdev->bd_contains == bdev) { | 1176 | if (bdev->bd_contains == bdev) { |
1423 | if (bdev->bd_disk->fops->open) { | 1177 | ret = 0; |
1178 | if (bdev->bd_disk->fops->open) | ||
1424 | ret = bdev->bd_disk->fops->open(bdev, mode); | 1179 | ret = bdev->bd_disk->fops->open(bdev, mode); |
1425 | if (ret) | 1180 | /* the same as first opener case, read comment there */ |
1426 | goto out_unlock_bdev; | 1181 | if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) |
1427 | } | ||
1428 | if (bdev->bd_invalidated) | ||
1429 | rescan_partitions(bdev->bd_disk, bdev); | 1182 | rescan_partitions(bdev->bd_disk, bdev); |
1183 | if (ret) | ||
1184 | goto out_unlock_bdev; | ||
1430 | } | 1185 | } |
1186 | /* only one opener holds refs to the module and disk */ | ||
1187 | module_put(disk->fops->owner); | ||
1188 | put_disk(disk); | ||
1431 | } | 1189 | } |
1432 | bdev->bd_openers++; | 1190 | bdev->bd_openers++; |
1433 | if (for_part) | 1191 | if (for_part) |
1434 | bdev->bd_part_count++; | 1192 | bdev->bd_part_count++; |
1435 | mutex_unlock(&bdev->bd_mutex); | 1193 | mutex_unlock(&bdev->bd_mutex); |
1194 | disk_unblock_events(disk); | ||
1436 | return 0; | 1195 | return 0; |
1437 | 1196 | ||
1438 | out_clear: | 1197 | out_clear: |
1439 | disk_put_part(bdev->bd_part); | 1198 | disk_put_part(bdev->bd_part); |
1440 | bdev->bd_disk = NULL; | 1199 | bdev->bd_disk = NULL; |
1441 | bdev->bd_part = NULL; | 1200 | bdev->bd_part = NULL; |
1442 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1201 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); |
1443 | if (bdev != bdev->bd_contains) | 1202 | if (bdev != bdev->bd_contains) |
1444 | __blkdev_put(bdev->bd_contains, mode, 1); | 1203 | __blkdev_put(bdev->bd_contains, mode, 1); |
1445 | bdev->bd_contains = NULL; | 1204 | bdev->bd_contains = NULL; |
1446 | out_unlock_bdev: | 1205 | out_unlock_bdev: |
1447 | mutex_unlock(&bdev->bd_mutex); | 1206 | mutex_unlock(&bdev->bd_mutex); |
1448 | out: | 1207 | disk_unblock_events(disk); |
1449 | if (disk) | 1208 | module_put(disk->fops->owner); |
1450 | module_put(disk->fops->owner); | ||
1451 | put_disk(disk); | 1209 | put_disk(disk); |
1210 | out: | ||
1452 | bdput(bdev); | 1211 | bdput(bdev); |
1453 | 1212 | ||
1454 | return ret; | 1213 | return ret; |
1455 | } | 1214 | } |
1456 | 1215 | ||
1457 | int blkdev_get(struct block_device *bdev, fmode_t mode) | 1216 | /** |
1217 | * blkdev_get - open a block device | ||
1218 | * @bdev: block_device to open | ||
1219 | * @mode: FMODE_* mask | ||
1220 | * @holder: exclusive holder identifier | ||
1221 | * | ||
1222 | * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is | ||
1223 | * open with exclusive access. Specifying %FMODE_EXCL with %NULL | ||
1224 | * @holder is invalid. Exclusive opens may nest for the same @holder. | ||
1225 | * | ||
1226 | * On success, the reference count of @bdev is unchanged. On failure, | ||
1227 | * @bdev is put. | ||
1228 | * | ||
1229 | * CONTEXT: | ||
1230 | * Might sleep. | ||
1231 | * | ||
1232 | * RETURNS: | ||
1233 | * 0 on success, -errno on failure. | ||
1234 | */ | ||
1235 | int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | ||
1458 | { | 1236 | { |
1459 | return __blkdev_get(bdev, mode, 0); | 1237 | struct block_device *whole = NULL; |
1238 | int res; | ||
1239 | |||
1240 | WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); | ||
1241 | |||
1242 | if ((mode & FMODE_EXCL) && holder) { | ||
1243 | whole = bd_start_claiming(bdev, holder); | ||
1244 | if (IS_ERR(whole)) { | ||
1245 | bdput(bdev); | ||
1246 | return PTR_ERR(whole); | ||
1247 | } | ||
1248 | } | ||
1249 | |||
1250 | res = __blkdev_get(bdev, mode, 0); | ||
1251 | |||
1252 | if (whole) { | ||
1253 | struct gendisk *disk = whole->bd_disk; | ||
1254 | |||
1255 | /* finish claiming */ | ||
1256 | mutex_lock(&bdev->bd_mutex); | ||
1257 | spin_lock(&bdev_lock); | ||
1258 | |||
1259 | if (!res) { | ||
1260 | BUG_ON(!bd_may_claim(bdev, whole, holder)); | ||
1261 | /* | ||
1262 | * Note that for a whole device bd_holders | ||
1263 | * will be incremented twice, and bd_holder | ||
1264 | * will be set to bd_may_claim before being | ||
1265 | * set to holder | ||
1266 | */ | ||
1267 | whole->bd_holders++; | ||
1268 | whole->bd_holder = bd_may_claim; | ||
1269 | bdev->bd_holders++; | ||
1270 | bdev->bd_holder = holder; | ||
1271 | } | ||
1272 | |||
1273 | /* tell others that we're done */ | ||
1274 | BUG_ON(whole->bd_claiming != holder); | ||
1275 | whole->bd_claiming = NULL; | ||
1276 | wake_up_bit(&whole->bd_claiming, 0); | ||
1277 | |||
1278 | spin_unlock(&bdev_lock); | ||
1279 | |||
1280 | /* | ||
1281 | * Block event polling for write claims if requested. Any | ||
1282 | * write holder makes the write_holder state stick until | ||
1283 | * all are released. This is good enough and tracking | ||
1284 | * individual writeable reference is too fragile given the | ||
1285 | * way @mode is used in blkdev_get/put(). | ||
1286 | */ | ||
1287 | if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && | ||
1288 | (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { | ||
1289 | bdev->bd_write_holder = true; | ||
1290 | disk_block_events(disk); | ||
1291 | } | ||
1292 | |||
1293 | mutex_unlock(&bdev->bd_mutex); | ||
1294 | bdput(whole); | ||
1295 | } | ||
1296 | |||
1297 | return res; | ||
1460 | } | 1298 | } |
1461 | EXPORT_SYMBOL(blkdev_get); | 1299 | EXPORT_SYMBOL(blkdev_get); |
1462 | 1300 | ||
1301 | /** | ||
1302 | * blkdev_get_by_path - open a block device by name | ||
1303 | * @path: path to the block device to open | ||
1304 | * @mode: FMODE_* mask | ||
1305 | * @holder: exclusive holder identifier | ||
1306 | * | ||
1307 | * Open the blockdevice described by the device file at @path. @mode | ||
1308 | * and @holder are identical to blkdev_get(). | ||
1309 | * | ||
1310 | * On success, the returned block_device has reference count of one. | ||
1311 | * | ||
1312 | * CONTEXT: | ||
1313 | * Might sleep. | ||
1314 | * | ||
1315 | * RETURNS: | ||
1316 | * Pointer to block_device on success, ERR_PTR(-errno) on failure. | ||
1317 | */ | ||
1318 | struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, | ||
1319 | void *holder) | ||
1320 | { | ||
1321 | struct block_device *bdev; | ||
1322 | int err; | ||
1323 | |||
1324 | bdev = lookup_bdev(path); | ||
1325 | if (IS_ERR(bdev)) | ||
1326 | return bdev; | ||
1327 | |||
1328 | err = blkdev_get(bdev, mode, holder); | ||
1329 | if (err) | ||
1330 | return ERR_PTR(err); | ||
1331 | |||
1332 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { | ||
1333 | blkdev_put(bdev, mode); | ||
1334 | return ERR_PTR(-EACCES); | ||
1335 | } | ||
1336 | |||
1337 | return bdev; | ||
1338 | } | ||
1339 | EXPORT_SYMBOL(blkdev_get_by_path); | ||
1340 | |||
1341 | /** | ||
1342 | * blkdev_get_by_dev - open a block device by device number | ||
1343 | * @dev: device number of block device to open | ||
1344 | * @mode: FMODE_* mask | ||
1345 | * @holder: exclusive holder identifier | ||
1346 | * | ||
1347 | * Open the blockdevice described by device number @dev. @mode and | ||
1348 | * @holder are identical to blkdev_get(). | ||
1349 | * | ||
1350 | * Use it ONLY if you really do not have anything better - i.e. when | ||
1351 | * you are behind a truly sucky interface and all you are given is a | ||
1352 | * device number. _Never_ to be used for internal purposes. If you | ||
1353 | * ever need it - reconsider your API. | ||
1354 | * | ||
1355 | * On success, the returned block_device has reference count of one. | ||
1356 | * | ||
1357 | * CONTEXT: | ||
1358 | * Might sleep. | ||
1359 | * | ||
1360 | * RETURNS: | ||
1361 | * Pointer to block_device on success, ERR_PTR(-errno) on failure. | ||
1362 | */ | ||
1363 | struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) | ||
1364 | { | ||
1365 | struct block_device *bdev; | ||
1366 | int err; | ||
1367 | |||
1368 | bdev = bdget(dev); | ||
1369 | if (!bdev) | ||
1370 | return ERR_PTR(-ENOMEM); | ||
1371 | |||
1372 | err = blkdev_get(bdev, mode, holder); | ||
1373 | if (err) | ||
1374 | return ERR_PTR(err); | ||
1375 | |||
1376 | return bdev; | ||
1377 | } | ||
1378 | EXPORT_SYMBOL(blkdev_get_by_dev); | ||
1379 | |||
1463 | static int blkdev_open(struct inode * inode, struct file * filp) | 1380 | static int blkdev_open(struct inode * inode, struct file * filp) |
1464 | { | 1381 | { |
1465 | struct block_device *whole = NULL; | ||
1466 | struct block_device *bdev; | 1382 | struct block_device *bdev; |
1467 | int res; | ||
1468 | 1383 | ||
1469 | /* | 1384 | /* |
1470 | * Preserve backwards compatibility and allow large file access | 1385 | * Preserve backwards compatibility and allow large file access |
@@ -1485,26 +1400,9 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1485 | if (bdev == NULL) | 1400 | if (bdev == NULL) |
1486 | return -ENOMEM; | 1401 | return -ENOMEM; |
1487 | 1402 | ||
1488 | if (filp->f_mode & FMODE_EXCL) { | ||
1489 | whole = bd_start_claiming(bdev, filp); | ||
1490 | if (IS_ERR(whole)) { | ||
1491 | bdput(bdev); | ||
1492 | return PTR_ERR(whole); | ||
1493 | } | ||
1494 | } | ||
1495 | |||
1496 | filp->f_mapping = bdev->bd_inode->i_mapping; | 1403 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1497 | 1404 | ||
1498 | res = blkdev_get(bdev, filp->f_mode); | 1405 | return blkdev_get(bdev, filp->f_mode, filp); |
1499 | |||
1500 | if (whole) { | ||
1501 | if (res == 0) | ||
1502 | bd_finish_claiming(bdev, whole, filp); | ||
1503 | else | ||
1504 | bd_abort_claiming(whole, filp); | ||
1505 | } | ||
1506 | |||
1507 | return res; | ||
1508 | } | 1406 | } |
1509 | 1407 | ||
1510 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | 1408 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) |
@@ -1518,6 +1416,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1518 | bdev->bd_part_count--; | 1416 | bdev->bd_part_count--; |
1519 | 1417 | ||
1520 | if (!--bdev->bd_openers) { | 1418 | if (!--bdev->bd_openers) { |
1419 | WARN_ON_ONCE(bdev->bd_holders); | ||
1521 | sync_blockdev(bdev); | 1420 | sync_blockdev(bdev); |
1522 | kill_bdev(bdev); | 1421 | kill_bdev(bdev); |
1523 | } | 1422 | } |
@@ -1533,7 +1432,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1533 | disk_put_part(bdev->bd_part); | 1432 | disk_put_part(bdev->bd_part); |
1534 | bdev->bd_part = NULL; | 1433 | bdev->bd_part = NULL; |
1535 | bdev->bd_disk = NULL; | 1434 | bdev->bd_disk = NULL; |
1536 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1435 | bdev_inode_switch_bdi(bdev->bd_inode, |
1436 | &default_backing_dev_info); | ||
1537 | if (bdev != bdev->bd_contains) | 1437 | if (bdev != bdev->bd_contains) |
1538 | victim = bdev->bd_contains; | 1438 | victim = bdev->bd_contains; |
1539 | bdev->bd_contains = NULL; | 1439 | bdev->bd_contains = NULL; |
@@ -1547,6 +1447,43 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1547 | 1447 | ||
1548 | int blkdev_put(struct block_device *bdev, fmode_t mode) | 1448 | int blkdev_put(struct block_device *bdev, fmode_t mode) |
1549 | { | 1449 | { |
1450 | if (mode & FMODE_EXCL) { | ||
1451 | bool bdev_free; | ||
1452 | |||
1453 | /* | ||
1454 | * Release a claim on the device. The holder fields | ||
1455 | * are protected with bdev_lock. bd_mutex is to | ||
1456 | * synchronize disk_holder unlinking. | ||
1457 | */ | ||
1458 | mutex_lock(&bdev->bd_mutex); | ||
1459 | spin_lock(&bdev_lock); | ||
1460 | |||
1461 | WARN_ON_ONCE(--bdev->bd_holders < 0); | ||
1462 | WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); | ||
1463 | |||
1464 | /* bd_contains might point to self, check in a separate step */ | ||
1465 | if ((bdev_free = !bdev->bd_holders)) | ||
1466 | bdev->bd_holder = NULL; | ||
1467 | if (!bdev->bd_contains->bd_holders) | ||
1468 | bdev->bd_contains->bd_holder = NULL; | ||
1469 | |||
1470 | spin_unlock(&bdev_lock); | ||
1471 | |||
1472 | /* | ||
1473 | * If this was the last claim, remove holder link and | ||
1474 | * unblock evpoll if it was a write holder. | ||
1475 | */ | ||
1476 | if (bdev_free) { | ||
1477 | if (bdev->bd_write_holder) { | ||
1478 | disk_unblock_events(bdev->bd_disk); | ||
1479 | disk_check_events(bdev->bd_disk); | ||
1480 | bdev->bd_write_holder = false; | ||
1481 | } | ||
1482 | } | ||
1483 | |||
1484 | mutex_unlock(&bdev->bd_mutex); | ||
1485 | } | ||
1486 | |||
1550 | return __blkdev_put(bdev, mode, 0); | 1487 | return __blkdev_put(bdev, mode, 0); |
1551 | } | 1488 | } |
1552 | EXPORT_SYMBOL(blkdev_put); | 1489 | EXPORT_SYMBOL(blkdev_put); |
@@ -1554,8 +1491,7 @@ EXPORT_SYMBOL(blkdev_put); | |||
1554 | static int blkdev_close(struct inode * inode, struct file * filp) | 1491 | static int blkdev_close(struct inode * inode, struct file * filp) |
1555 | { | 1492 | { |
1556 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1493 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); |
1557 | if (bdev->bd_holder == filp) | 1494 | |
1558 | bd_release(bdev); | ||
1559 | return blkdev_put(bdev, filp->f_mode); | 1495 | return blkdev_put(bdev, filp->f_mode); |
1560 | } | 1496 | } |
1561 | 1497 | ||
@@ -1620,7 +1556,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) | |||
1620 | static const struct address_space_operations def_blk_aops = { | 1556 | static const struct address_space_operations def_blk_aops = { |
1621 | .readpage = blkdev_readpage, | 1557 | .readpage = blkdev_readpage, |
1622 | .writepage = blkdev_writepage, | 1558 | .writepage = blkdev_writepage, |
1623 | .sync_page = block_sync_page, | ||
1624 | .write_begin = blkdev_write_begin, | 1559 | .write_begin = blkdev_write_begin, |
1625 | .write_end = blkdev_write_end, | 1560 | .write_end = blkdev_write_end, |
1626 | .writepages = generic_writepages, | 1561 | .writepages = generic_writepages, |
@@ -1700,68 +1635,7 @@ fail: | |||
1700 | } | 1635 | } |
1701 | EXPORT_SYMBOL(lookup_bdev); | 1636 | EXPORT_SYMBOL(lookup_bdev); |
1702 | 1637 | ||
1703 | /** | 1638 | int __invalidate_device(struct block_device *bdev, bool kill_dirty) |
1704 | * open_bdev_exclusive - open a block device by name and set it up for use | ||
1705 | * | ||
1706 | * @path: special file representing the block device | ||
1707 | * @mode: FMODE_... combination to pass be used | ||
1708 | * @holder: owner for exclusion | ||
1709 | * | ||
1710 | * Open the blockdevice described by the special file at @path, claim it | ||
1711 | * for the @holder. | ||
1712 | */ | ||
1713 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) | ||
1714 | { | ||
1715 | struct block_device *bdev, *whole; | ||
1716 | int error; | ||
1717 | |||
1718 | bdev = lookup_bdev(path); | ||
1719 | if (IS_ERR(bdev)) | ||
1720 | return bdev; | ||
1721 | |||
1722 | whole = bd_start_claiming(bdev, holder); | ||
1723 | if (IS_ERR(whole)) { | ||
1724 | bdput(bdev); | ||
1725 | return whole; | ||
1726 | } | ||
1727 | |||
1728 | error = blkdev_get(bdev, mode); | ||
1729 | if (error) | ||
1730 | goto out_abort_claiming; | ||
1731 | |||
1732 | error = -EACCES; | ||
1733 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) | ||
1734 | goto out_blkdev_put; | ||
1735 | |||
1736 | bd_finish_claiming(bdev, whole, holder); | ||
1737 | return bdev; | ||
1738 | |||
1739 | out_blkdev_put: | ||
1740 | blkdev_put(bdev, mode); | ||
1741 | out_abort_claiming: | ||
1742 | bd_abort_claiming(whole, holder); | ||
1743 | return ERR_PTR(error); | ||
1744 | } | ||
1745 | |||
1746 | EXPORT_SYMBOL(open_bdev_exclusive); | ||
1747 | |||
1748 | /** | ||
1749 | * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() | ||
1750 | * | ||
1751 | * @bdev: blockdevice to close | ||
1752 | * @mode: mode, must match that used to open. | ||
1753 | * | ||
1754 | * This is the counterpart to open_bdev_exclusive(). | ||
1755 | */ | ||
1756 | void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) | ||
1757 | { | ||
1758 | bd_release(bdev); | ||
1759 | blkdev_put(bdev, mode); | ||
1760 | } | ||
1761 | |||
1762 | EXPORT_SYMBOL(close_bdev_exclusive); | ||
1763 | |||
1764 | int __invalidate_device(struct block_device *bdev) | ||
1765 | { | 1639 | { |
1766 | struct super_block *sb = get_super(bdev); | 1640 | struct super_block *sb = get_super(bdev); |
1767 | int res = 0; | 1641 | int res = 0; |
@@ -1774,7 +1648,7 @@ int __invalidate_device(struct block_device *bdev) | |||
1774 | * hold). | 1648 | * hold). |
1775 | */ | 1649 | */ |
1776 | shrink_dcache_sb(sb); | 1650 | shrink_dcache_sb(sb); |
1777 | res = invalidate_inodes(sb); | 1651 | res = invalidate_inodes(sb, kill_dirty); |
1778 | drop_super(sb); | 1652 | drop_super(sb); |
1779 | } | 1653 | } |
1780 | invalidate_bdev(bdev); | 1654 | invalidate_bdev(bdev); |