diff options
author | Alasdair G Kergon <agk@redhat.com> | 2006-03-27 04:17:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-27 11:44:58 -0500 |
commit | 76df1c651b66bdf07d60b3d60789feb5f58d73e3 (patch) | |
tree | 5458257a6a089396ac6ed5883a916af542ca8477 /drivers/md | |
parent | b4b610f684d13bf8691feeae5d4d7a8bd1f1033e (diff) |
[PATCH] device-mapper snapshot: fix invalidation
When a snapshot becomes invalid, s->valid is set to 0. In this state, a
snapshot can no longer be accessed.
When s->lock is acquired, before doing anything else, s->valid must be checked
to ensure the snapshot remains valid.
This patch eliminates some races (that may cause panics) by adding some
missing checks. At the same time, some unnecessary levels of indentation are
removed and snapshot invalidation is moved into a single function that always
generates a device-mapper event.
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-snap.c | 295 |
1 files changed, 174 insertions, 121 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 475514bda9d0..14bd1a1815b1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -392,6 +392,8 @@ static void read_snapshot_metadata(struct dm_snapshot *s) | |||
392 | down_write(&s->lock); | 392 | down_write(&s->lock); |
393 | s->valid = 0; | 393 | s->valid = 0; |
394 | up_write(&s->lock); | 394 | up_write(&s->lock); |
395 | |||
396 | dm_table_event(s->table); | ||
395 | } | 397 | } |
396 | } | 398 | } |
397 | 399 | ||
@@ -601,6 +603,11 @@ static void error_bios(struct bio *bio) | |||
601 | } | 603 | } |
602 | } | 604 | } |
603 | 605 | ||
606 | static inline void error_snapshot_bios(struct pending_exception *pe) | ||
607 | { | ||
608 | error_bios(bio_list_get(&pe->snapshot_bios)); | ||
609 | } | ||
610 | |||
604 | static struct bio *__flush_bios(struct pending_exception *pe) | 611 | static struct bio *__flush_bios(struct pending_exception *pe) |
605 | { | 612 | { |
606 | /* | 613 | /* |
@@ -616,6 +623,28 @@ static struct bio *__flush_bios(struct pending_exception *pe) | |||
616 | return NULL; | 623 | return NULL; |
617 | } | 624 | } |
618 | 625 | ||
626 | static void __invalidate_snapshot(struct dm_snapshot *s, | ||
627 | struct pending_exception *pe, int err) | ||
628 | { | ||
629 | if (!s->valid) | ||
630 | return; | ||
631 | |||
632 | if (err == -EIO) | ||
633 | DMERR("Invalidating snapshot: Error reading/writing."); | ||
634 | else if (err == -ENOMEM) | ||
635 | DMERR("Invalidating snapshot: Unable to allocate exception."); | ||
636 | |||
637 | if (pe) | ||
638 | remove_exception(&pe->e); | ||
639 | |||
640 | if (s->store.drop_snapshot) | ||
641 | s->store.drop_snapshot(&s->store); | ||
642 | |||
643 | s->valid = 0; | ||
644 | |||
645 | dm_table_event(s->table); | ||
646 | } | ||
647 | |||
619 | static void pending_complete(struct pending_exception *pe, int success) | 648 | static void pending_complete(struct pending_exception *pe, int success) |
620 | { | 649 | { |
621 | struct exception *e; | 650 | struct exception *e; |
@@ -623,50 +652,53 @@ static void pending_complete(struct pending_exception *pe, int success) | |||
623 | struct dm_snapshot *s = pe->snap; | 652 | struct dm_snapshot *s = pe->snap; |
624 | struct bio *flush = NULL; | 653 | struct bio *flush = NULL; |
625 | 654 | ||
626 | if (success) { | 655 | if (!success) { |
627 | e = alloc_exception(); | 656 | /* Read/write error - snapshot is unusable */ |
628 | if (!e) { | ||
629 | DMWARN("Unable to allocate exception."); | ||
630 | down_write(&s->lock); | ||
631 | s->store.drop_snapshot(&s->store); | ||
632 | s->valid = 0; | ||
633 | flush = __flush_bios(pe); | ||
634 | up_write(&s->lock); | ||
635 | |||
636 | error_bios(bio_list_get(&pe->snapshot_bios)); | ||
637 | goto out; | ||
638 | } | ||
639 | *e = pe->e; | ||
640 | |||
641 | /* | ||
642 | * Add a proper exception, and remove the | ||
643 | * in-flight exception from the list. | ||
644 | */ | ||
645 | down_write(&s->lock); | 657 | down_write(&s->lock); |
646 | insert_exception(&s->complete, e); | 658 | __invalidate_snapshot(s, pe, -EIO); |
647 | remove_exception(&pe->e); | ||
648 | flush = __flush_bios(pe); | 659 | flush = __flush_bios(pe); |
649 | |||
650 | /* Submit any pending write bios */ | ||
651 | up_write(&s->lock); | 660 | up_write(&s->lock); |
652 | 661 | ||
653 | flush_bios(bio_list_get(&pe->snapshot_bios)); | 662 | error_snapshot_bios(pe); |
654 | } else { | 663 | goto out; |
655 | /* Read/write error - snapshot is unusable */ | 664 | } |
665 | |||
666 | e = alloc_exception(); | ||
667 | if (!e) { | ||
656 | down_write(&s->lock); | 668 | down_write(&s->lock); |
657 | if (s->valid) | 669 | __invalidate_snapshot(s, pe, -ENOMEM); |
658 | DMERR("Error reading/writing snapshot"); | ||
659 | s->store.drop_snapshot(&s->store); | ||
660 | s->valid = 0; | ||
661 | remove_exception(&pe->e); | ||
662 | flush = __flush_bios(pe); | 670 | flush = __flush_bios(pe); |
663 | up_write(&s->lock); | 671 | up_write(&s->lock); |
664 | 672 | ||
665 | error_bios(bio_list_get(&pe->snapshot_bios)); | 673 | error_snapshot_bios(pe); |
674 | goto out; | ||
675 | } | ||
676 | *e = pe->e; | ||
666 | 677 | ||
667 | dm_table_event(s->table); | 678 | /* |
679 | * Add a proper exception, and remove the | ||
680 | * in-flight exception from the list. | ||
681 | */ | ||
682 | down_write(&s->lock); | ||
683 | if (!s->valid) { | ||
684 | flush = __flush_bios(pe); | ||
685 | up_write(&s->lock); | ||
686 | |||
687 | free_exception(e); | ||
688 | |||
689 | error_snapshot_bios(pe); | ||
690 | goto out; | ||
668 | } | 691 | } |
669 | 692 | ||
693 | insert_exception(&s->complete, e); | ||
694 | remove_exception(&pe->e); | ||
695 | flush = __flush_bios(pe); | ||
696 | |||
697 | up_write(&s->lock); | ||
698 | |||
699 | /* Submit any pending write bios */ | ||
700 | flush_bios(bio_list_get(&pe->snapshot_bios)); | ||
701 | |||
670 | out: | 702 | out: |
671 | primary_pe = pe->primary_pe; | 703 | primary_pe = pe->primary_pe; |
672 | 704 | ||
@@ -758,39 +790,45 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) | |||
758 | if (e) { | 790 | if (e) { |
759 | /* cast the exception to a pending exception */ | 791 | /* cast the exception to a pending exception */ |
760 | pe = container_of(e, struct pending_exception, e); | 792 | pe = container_of(e, struct pending_exception, e); |
793 | goto out; | ||
794 | } | ||
761 | 795 | ||
762 | } else { | 796 | /* |
763 | /* | 797 | * Create a new pending exception, we don't want |
764 | * Create a new pending exception, we don't want | 798 | * to hold the lock while we do this. |
765 | * to hold the lock while we do this. | 799 | */ |
766 | */ | 800 | up_write(&s->lock); |
767 | up_write(&s->lock); | 801 | pe = alloc_pending_exception(); |
768 | pe = alloc_pending_exception(); | 802 | down_write(&s->lock); |
769 | down_write(&s->lock); | ||
770 | 803 | ||
771 | e = lookup_exception(&s->pending, chunk); | 804 | if (!s->valid) { |
772 | if (e) { | 805 | free_pending_exception(pe); |
773 | free_pending_exception(pe); | 806 | return NULL; |
774 | pe = container_of(e, struct pending_exception, e); | 807 | } |
775 | } else { | ||
776 | pe->e.old_chunk = chunk; | ||
777 | bio_list_init(&pe->origin_bios); | ||
778 | bio_list_init(&pe->snapshot_bios); | ||
779 | pe->primary_pe = NULL; | ||
780 | atomic_set(&pe->sibling_count, 1); | ||
781 | pe->snap = s; | ||
782 | pe->started = 0; | ||
783 | |||
784 | if (s->store.prepare_exception(&s->store, &pe->e)) { | ||
785 | free_pending_exception(pe); | ||
786 | s->valid = 0; | ||
787 | return NULL; | ||
788 | } | ||
789 | 808 | ||
790 | insert_exception(&s->pending, &pe->e); | 809 | e = lookup_exception(&s->pending, chunk); |
791 | } | 810 | if (e) { |
811 | free_pending_exception(pe); | ||
812 | pe = container_of(e, struct pending_exception, e); | ||
813 | goto out; | ||
792 | } | 814 | } |
793 | 815 | ||
816 | pe->e.old_chunk = chunk; | ||
817 | bio_list_init(&pe->origin_bios); | ||
818 | bio_list_init(&pe->snapshot_bios); | ||
819 | pe->primary_pe = NULL; | ||
820 | atomic_set(&pe->sibling_count, 1); | ||
821 | pe->snap = s; | ||
822 | pe->started = 0; | ||
823 | |||
824 | if (s->store.prepare_exception(&s->store, &pe->e)) { | ||
825 | free_pending_exception(pe); | ||
826 | return NULL; | ||
827 | } | ||
828 | |||
829 | insert_exception(&s->pending, &pe->e); | ||
830 | |||
831 | out: | ||
794 | return pe; | 832 | return pe; |
795 | } | 833 | } |
796 | 834 | ||
@@ -807,13 +845,15 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
807 | { | 845 | { |
808 | struct exception *e; | 846 | struct exception *e; |
809 | struct dm_snapshot *s = (struct dm_snapshot *) ti->private; | 847 | struct dm_snapshot *s = (struct dm_snapshot *) ti->private; |
848 | int copy_needed = 0; | ||
810 | int r = 1; | 849 | int r = 1; |
811 | chunk_t chunk; | 850 | chunk_t chunk; |
812 | struct pending_exception *pe; | 851 | struct pending_exception *pe = NULL; |
813 | 852 | ||
814 | chunk = sector_to_chunk(s, bio->bi_sector); | 853 | chunk = sector_to_chunk(s, bio->bi_sector); |
815 | 854 | ||
816 | /* Full snapshots are not usable */ | 855 | /* Full snapshots are not usable */ |
856 | /* To get here the table must be live so s->active is always set. */ | ||
817 | if (!s->valid) | 857 | if (!s->valid) |
818 | return -EIO; | 858 | return -EIO; |
819 | 859 | ||
@@ -831,36 +871,41 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
831 | * to copy an exception */ | 871 | * to copy an exception */ |
832 | down_write(&s->lock); | 872 | down_write(&s->lock); |
833 | 873 | ||
874 | if (!s->valid) { | ||
875 | r = -EIO; | ||
876 | goto out_unlock; | ||
877 | } | ||
878 | |||
834 | /* If the block is already remapped - use that, else remap it */ | 879 | /* If the block is already remapped - use that, else remap it */ |
835 | e = lookup_exception(&s->complete, chunk); | 880 | e = lookup_exception(&s->complete, chunk); |
836 | if (e) { | 881 | if (e) { |
837 | remap_exception(s, e, bio); | 882 | remap_exception(s, e, bio); |
838 | up_write(&s->lock); | 883 | goto out_unlock; |
839 | 884 | } | |
840 | } else { | 885 | |
841 | pe = __find_pending_exception(s, bio); | 886 | pe = __find_pending_exception(s, bio); |
842 | 887 | if (!pe) { | |
843 | if (!pe) { | 888 | __invalidate_snapshot(s, pe, -ENOMEM); |
844 | if (s->store.drop_snapshot) | 889 | r = -EIO; |
845 | s->store.drop_snapshot(&s->store); | 890 | goto out_unlock; |
846 | s->valid = 0; | ||
847 | r = -EIO; | ||
848 | up_write(&s->lock); | ||
849 | } else { | ||
850 | remap_exception(s, &pe->e, bio); | ||
851 | bio_list_add(&pe->snapshot_bios, bio); | ||
852 | |||
853 | if (!pe->started) { | ||
854 | /* this is protected by snap->lock */ | ||
855 | pe->started = 1; | ||
856 | up_write(&s->lock); | ||
857 | start_copy(pe); | ||
858 | } else | ||
859 | up_write(&s->lock); | ||
860 | r = 0; | ||
861 | } | ||
862 | } | 891 | } |
863 | 892 | ||
893 | remap_exception(s, &pe->e, bio); | ||
894 | bio_list_add(&pe->snapshot_bios, bio); | ||
895 | |||
896 | if (!pe->started) { | ||
897 | /* this is protected by snap->lock */ | ||
898 | pe->started = 1; | ||
899 | copy_needed = 1; | ||
900 | } | ||
901 | |||
902 | r = 0; | ||
903 | |||
904 | out_unlock: | ||
905 | up_write(&s->lock); | ||
906 | |||
907 | if (copy_needed) | ||
908 | start_copy(pe); | ||
864 | } else { | 909 | } else { |
865 | /* | 910 | /* |
866 | * FIXME: this read path scares me because we | 911 | * FIXME: this read path scares me because we |
@@ -872,6 +917,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
872 | /* Do reads */ | 917 | /* Do reads */ |
873 | down_read(&s->lock); | 918 | down_read(&s->lock); |
874 | 919 | ||
920 | if (!s->valid) { | ||
921 | up_read(&s->lock); | ||
922 | return -EIO; | ||
923 | } | ||
924 | |||
875 | /* See if it it has been remapped */ | 925 | /* See if it it has been remapped */ |
876 | e = lookup_exception(&s->complete, chunk); | 926 | e = lookup_exception(&s->complete, chunk); |
877 | if (e) | 927 | if (e) |
@@ -948,15 +998,15 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) | |||
948 | /* Do all the snapshots on this origin */ | 998 | /* Do all the snapshots on this origin */ |
949 | list_for_each_entry (snap, snapshots, list) { | 999 | list_for_each_entry (snap, snapshots, list) { |
950 | 1000 | ||
1001 | down_write(&snap->lock); | ||
1002 | |||
951 | /* Only deal with valid and active snapshots */ | 1003 | /* Only deal with valid and active snapshots */ |
952 | if (!snap->valid || !snap->active) | 1004 | if (!snap->valid || !snap->active) |
953 | continue; | 1005 | goto next_snapshot; |
954 | 1006 | ||
955 | /* Nothing to do if writing beyond end of snapshot */ | 1007 | /* Nothing to do if writing beyond end of snapshot */ |
956 | if (bio->bi_sector >= dm_table_get_size(snap->table)) | 1008 | if (bio->bi_sector >= dm_table_get_size(snap->table)) |
957 | continue; | 1009 | goto next_snapshot; |
958 | |||
959 | down_write(&snap->lock); | ||
960 | 1010 | ||
961 | /* | 1011 | /* |
962 | * Remember, different snapshots can have | 1012 | * Remember, different snapshots can have |
@@ -973,40 +1023,43 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) | |||
973 | * won't destroy the primary_pe while we're inside this loop. | 1023 | * won't destroy the primary_pe while we're inside this loop. |
974 | */ | 1024 | */ |
975 | e = lookup_exception(&snap->complete, chunk); | 1025 | e = lookup_exception(&snap->complete, chunk); |
976 | if (!e) { | 1026 | if (e) |
977 | pe = __find_pending_exception(snap, bio); | 1027 | goto next_snapshot; |
978 | if (!pe) { | 1028 | |
979 | snap->store.drop_snapshot(&snap->store); | 1029 | pe = __find_pending_exception(snap, bio); |
980 | snap->valid = 0; | 1030 | if (!pe) { |
981 | 1031 | __invalidate_snapshot(snap, pe, ENOMEM); | |
982 | } else { | 1032 | goto next_snapshot; |
983 | if (!primary_pe) { | 1033 | } |
984 | /* | 1034 | |
985 | * Either every pe here has same | 1035 | if (!primary_pe) { |
986 | * primary_pe or none has one yet. | 1036 | /* |
987 | */ | 1037 | * Either every pe here has same |
988 | if (pe->primary_pe) | 1038 | * primary_pe or none has one yet. |
989 | primary_pe = pe->primary_pe; | 1039 | */ |
990 | else { | 1040 | if (pe->primary_pe) |
991 | primary_pe = pe; | 1041 | primary_pe = pe->primary_pe; |
992 | first = 1; | 1042 | else { |
993 | } | 1043 | primary_pe = pe; |
994 | 1044 | first = 1; | |
995 | bio_list_add(&primary_pe->origin_bios, | ||
996 | bio); | ||
997 | r = 0; | ||
998 | } | ||
999 | if (!pe->primary_pe) { | ||
1000 | atomic_inc(&primary_pe->sibling_count); | ||
1001 | pe->primary_pe = primary_pe; | ||
1002 | } | ||
1003 | if (!pe->started) { | ||
1004 | pe->started = 1; | ||
1005 | list_add_tail(&pe->list, &pe_queue); | ||
1006 | } | ||
1007 | } | 1045 | } |
1046 | |||
1047 | bio_list_add(&primary_pe->origin_bios, bio); | ||
1048 | |||
1049 | r = 0; | ||
1050 | } | ||
1051 | |||
1052 | if (!pe->primary_pe) { | ||
1053 | atomic_inc(&primary_pe->sibling_count); | ||
1054 | pe->primary_pe = primary_pe; | ||
1055 | } | ||
1056 | |||
1057 | if (!pe->started) { | ||
1058 | pe->started = 1; | ||
1059 | list_add_tail(&pe->list, &pe_queue); | ||
1008 | } | 1060 | } |
1009 | 1061 | ||
1062 | next_snapshot: | ||
1010 | up_write(&snap->lock); | 1063 | up_write(&snap->lock); |
1011 | } | 1064 | } |
1012 | 1065 | ||