aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-snap.c')
-rw-r--r--drivers/md/dm-snap.c409
1 files changed, 251 insertions, 158 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7401540086df..08312b46463a 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -49,11 +49,26 @@ struct pending_exception {
49 struct bio_list snapshot_bios; 49 struct bio_list snapshot_bios;
50 50
51 /* 51 /*
52 * Other pending_exceptions that are processing this 52 * Short-term queue of pending exceptions prior to submission.
53 * chunk. When this list is empty, we know we can
54 * complete the origins.
55 */ 53 */
56 struct list_head siblings; 54 struct list_head list;
55
56 /*
57 * The primary pending_exception is the one that holds
58 * the sibling_count and the list of origin_bios for a
59 * group of pending_exceptions. It is always last to get freed.
60 * These fields get set up when writing to the origin.
61 */
62 struct pending_exception *primary_pe;
63
64 /*
65 * Number of pending_exceptions processing this chunk.
66 * When this drops to zero we must complete the origin bios.
67 * If incrementing or decrementing this, hold pe->snap->lock for
68 * the sibling concerned and not pe->primary_pe->snap->lock unless
69 * they are the same.
70 */
71 atomic_t sibling_count;
57 72
58 /* Pointer back to snapshot context */ 73 /* Pointer back to snapshot context */
59 struct dm_snapshot *snap; 74 struct dm_snapshot *snap;
@@ -377,6 +392,8 @@ static void read_snapshot_metadata(struct dm_snapshot *s)
377 down_write(&s->lock); 392 down_write(&s->lock);
378 s->valid = 0; 393 s->valid = 0;
379 up_write(&s->lock); 394 up_write(&s->lock);
395
396 dm_table_event(s->table);
380 } 397 }
381} 398}
382 399
@@ -542,8 +559,12 @@ static void snapshot_dtr(struct dm_target *ti)
542{ 559{
543 struct dm_snapshot *s = (struct dm_snapshot *) ti->private; 560 struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
544 561
562 /* Prevent further origin writes from using this snapshot. */
563 /* After this returns there can be no new kcopyd jobs. */
545 unregister_snapshot(s); 564 unregister_snapshot(s);
546 565
566 kcopyd_client_destroy(s->kcopyd_client);
567
547 exit_exception_table(&s->pending, pending_cache); 568 exit_exception_table(&s->pending, pending_cache);
548 exit_exception_table(&s->complete, exception_cache); 569 exit_exception_table(&s->complete, exception_cache);
549 570
@@ -552,7 +573,7 @@ static void snapshot_dtr(struct dm_target *ti)
552 573
553 dm_put_device(ti, s->origin); 574 dm_put_device(ti, s->origin);
554 dm_put_device(ti, s->cow); 575 dm_put_device(ti, s->cow);
555 kcopyd_client_destroy(s->kcopyd_client); 576
556 kfree(s); 577 kfree(s);
557} 578}
558 579
@@ -586,78 +607,117 @@ static void error_bios(struct bio *bio)
586 } 607 }
587} 608}
588 609
610static inline void error_snapshot_bios(struct pending_exception *pe)
611{
612 error_bios(bio_list_get(&pe->snapshot_bios));
613}
614
589static struct bio *__flush_bios(struct pending_exception *pe) 615static struct bio *__flush_bios(struct pending_exception *pe)
590{ 616{
591 struct pending_exception *sibling; 617 /*
618 * If this pe is involved in a write to the origin and
619 * it is the last sibling to complete then release
620 * the bios for the original write to the origin.
621 */
622
623 if (pe->primary_pe &&
624 atomic_dec_and_test(&pe->primary_pe->sibling_count))
625 return bio_list_get(&pe->primary_pe->origin_bios);
626
627 return NULL;
628}
629
630static void __invalidate_snapshot(struct dm_snapshot *s,
631 struct pending_exception *pe, int err)
632{
633 if (!s->valid)
634 return;
592 635
593 if (list_empty(&pe->siblings)) 636 if (err == -EIO)
594 return bio_list_get(&pe->origin_bios); 637 DMERR("Invalidating snapshot: Error reading/writing.");
638 else if (err == -ENOMEM)
639 DMERR("Invalidating snapshot: Unable to allocate exception.");
595 640
596 sibling = list_entry(pe->siblings.next, 641 if (pe)
597 struct pending_exception, siblings); 642 remove_exception(&pe->e);
598 643
599 list_del(&pe->siblings); 644 if (s->store.drop_snapshot)
645 s->store.drop_snapshot(&s->store);
600 646
601 /* This is fine as long as kcopyd is single-threaded. If kcopyd 647 s->valid = 0;
602 * becomes multi-threaded, we'll need some locking here.
603 */
604 bio_list_merge(&sibling->origin_bios, &pe->origin_bios);
605 648
606 return NULL; 649 dm_table_event(s->table);
607} 650}
608 651
609static void pending_complete(struct pending_exception *pe, int success) 652static void pending_complete(struct pending_exception *pe, int success)
610{ 653{
611 struct exception *e; 654 struct exception *e;
655 struct pending_exception *primary_pe;
612 struct dm_snapshot *s = pe->snap; 656 struct dm_snapshot *s = pe->snap;
613 struct bio *flush = NULL; 657 struct bio *flush = NULL;
614 658
615 if (success) { 659 if (!success) {
616 e = alloc_exception(); 660 /* Read/write error - snapshot is unusable */
617 if (!e) {
618 DMWARN("Unable to allocate exception.");
619 down_write(&s->lock);
620 s->store.drop_snapshot(&s->store);
621 s->valid = 0;
622 flush = __flush_bios(pe);
623 up_write(&s->lock);
624
625 error_bios(bio_list_get(&pe->snapshot_bios));
626 goto out;
627 }
628 *e = pe->e;
629
630 /*
631 * Add a proper exception, and remove the
632 * in-flight exception from the list.
633 */
634 down_write(&s->lock); 661 down_write(&s->lock);
635 insert_exception(&s->complete, e); 662 __invalidate_snapshot(s, pe, -EIO);
636 remove_exception(&pe->e);
637 flush = __flush_bios(pe); 663 flush = __flush_bios(pe);
638
639 /* Submit any pending write bios */
640 up_write(&s->lock); 664 up_write(&s->lock);
641 665
642 flush_bios(bio_list_get(&pe->snapshot_bios)); 666 error_snapshot_bios(pe);
643 } else { 667 goto out;
644 /* Read/write error - snapshot is unusable */ 668 }
669
670 e = alloc_exception();
671 if (!e) {
645 down_write(&s->lock); 672 down_write(&s->lock);
646 if (s->valid) 673 __invalidate_snapshot(s, pe, -ENOMEM);
647 DMERR("Error reading/writing snapshot");
648 s->store.drop_snapshot(&s->store);
649 s->valid = 0;
650 remove_exception(&pe->e);
651 flush = __flush_bios(pe); 674 flush = __flush_bios(pe);
652 up_write(&s->lock); 675 up_write(&s->lock);
653 676
654 error_bios(bio_list_get(&pe->snapshot_bios)); 677 error_snapshot_bios(pe);
678 goto out;
679 }
680 *e = pe->e;
655 681
656 dm_table_event(s->table); 682 /*
683 * Add a proper exception, and remove the
684 * in-flight exception from the list.
685 */
686 down_write(&s->lock);
687 if (!s->valid) {
688 flush = __flush_bios(pe);
689 up_write(&s->lock);
690
691 free_exception(e);
692
693 error_snapshot_bios(pe);
694 goto out;
657 } 695 }
658 696
697 insert_exception(&s->complete, e);
698 remove_exception(&pe->e);
699 flush = __flush_bios(pe);
700
701 up_write(&s->lock);
702
703 /* Submit any pending write bios */
704 flush_bios(bio_list_get(&pe->snapshot_bios));
705
659 out: 706 out:
660 free_pending_exception(pe); 707 primary_pe = pe->primary_pe;
708
709 /*
710 * Free the pe if it's not linked to an origin write or if
711 * it's not itself a primary pe.
712 */
713 if (!primary_pe || primary_pe != pe)
714 free_pending_exception(pe);
715
716 /*
717 * Free the primary pe if nothing references it.
718 */
719 if (primary_pe && !atomic_read(&primary_pe->sibling_count))
720 free_pending_exception(primary_pe);
661 721
662 if (flush) 722 if (flush)
663 flush_bios(flush); 723 flush_bios(flush);
@@ -734,38 +794,45 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
734 if (e) { 794 if (e) {
735 /* cast the exception to a pending exception */ 795 /* cast the exception to a pending exception */
736 pe = container_of(e, struct pending_exception, e); 796 pe = container_of(e, struct pending_exception, e);
797 goto out;
798 }
737 799
738 } else { 800 /*
739 /* 801 * Create a new pending exception, we don't want
740 * Create a new pending exception, we don't want 802 * to hold the lock while we do this.
741 * to hold the lock while we do this. 803 */
742 */ 804 up_write(&s->lock);
743 up_write(&s->lock); 805 pe = alloc_pending_exception();
744 pe = alloc_pending_exception(); 806 down_write(&s->lock);
745 down_write(&s->lock);
746 807
747 e = lookup_exception(&s->pending, chunk); 808 if (!s->valid) {
748 if (e) { 809 free_pending_exception(pe);
749 free_pending_exception(pe); 810 return NULL;
750 pe = container_of(e, struct pending_exception, e); 811 }
751 } else {
752 pe->e.old_chunk = chunk;
753 bio_list_init(&pe->origin_bios);
754 bio_list_init(&pe->snapshot_bios);
755 INIT_LIST_HEAD(&pe->siblings);
756 pe->snap = s;
757 pe->started = 0;
758
759 if (s->store.prepare_exception(&s->store, &pe->e)) {
760 free_pending_exception(pe);
761 s->valid = 0;
762 return NULL;
763 }
764 812
765 insert_exception(&s->pending, &pe->e); 813 e = lookup_exception(&s->pending, chunk);
766 } 814 if (e) {
815 free_pending_exception(pe);
816 pe = container_of(e, struct pending_exception, e);
817 goto out;
818 }
819
820 pe->e.old_chunk = chunk;
821 bio_list_init(&pe->origin_bios);
822 bio_list_init(&pe->snapshot_bios);
823 pe->primary_pe = NULL;
824 atomic_set(&pe->sibling_count, 1);
825 pe->snap = s;
826 pe->started = 0;
827
828 if (s->store.prepare_exception(&s->store, &pe->e)) {
829 free_pending_exception(pe);
830 return NULL;
767 } 831 }
768 832
833 insert_exception(&s->pending, &pe->e);
834
835 out:
769 return pe; 836 return pe;
770} 837}
771 838
@@ -782,13 +849,15 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
782{ 849{
783 struct exception *e; 850 struct exception *e;
784 struct dm_snapshot *s = (struct dm_snapshot *) ti->private; 851 struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
852 int copy_needed = 0;
785 int r = 1; 853 int r = 1;
786 chunk_t chunk; 854 chunk_t chunk;
787 struct pending_exception *pe; 855 struct pending_exception *pe = NULL;
788 856
789 chunk = sector_to_chunk(s, bio->bi_sector); 857 chunk = sector_to_chunk(s, bio->bi_sector);
790 858
791 /* Full snapshots are not usable */ 859 /* Full snapshots are not usable */
860 /* To get here the table must be live so s->active is always set. */
792 if (!s->valid) 861 if (!s->valid)
793 return -EIO; 862 return -EIO;
794 863
@@ -806,36 +875,41 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
806 * to copy an exception */ 875 * to copy an exception */
807 down_write(&s->lock); 876 down_write(&s->lock);
808 877
878 if (!s->valid) {
879 r = -EIO;
880 goto out_unlock;
881 }
882
809 /* If the block is already remapped - use that, else remap it */ 883 /* If the block is already remapped - use that, else remap it */
810 e = lookup_exception(&s->complete, chunk); 884 e = lookup_exception(&s->complete, chunk);
811 if (e) { 885 if (e) {
812 remap_exception(s, e, bio); 886 remap_exception(s, e, bio);
813 up_write(&s->lock); 887 goto out_unlock;
814 888 }
815 } else { 889
816 pe = __find_pending_exception(s, bio); 890 pe = __find_pending_exception(s, bio);
817 891 if (!pe) {
818 if (!pe) { 892 __invalidate_snapshot(s, pe, -ENOMEM);
819 if (s->store.drop_snapshot) 893 r = -EIO;
820 s->store.drop_snapshot(&s->store); 894 goto out_unlock;
821 s->valid = 0; 895 }
822 r = -EIO; 896
823 up_write(&s->lock); 897 remap_exception(s, &pe->e, bio);
824 } else { 898 bio_list_add(&pe->snapshot_bios, bio);
825 remap_exception(s, &pe->e, bio); 899
826 bio_list_add(&pe->snapshot_bios, bio); 900 if (!pe->started) {
827 901 /* this is protected by snap->lock */
828 if (!pe->started) { 902 pe->started = 1;
829 /* this is protected by snap->lock */ 903 copy_needed = 1;
830 pe->started = 1;
831 up_write(&s->lock);
832 start_copy(pe);
833 } else
834 up_write(&s->lock);
835 r = 0;
836 }
837 } 904 }
838 905
906 r = 0;
907
908 out_unlock:
909 up_write(&s->lock);
910
911 if (copy_needed)
912 start_copy(pe);
839 } else { 913 } else {
840 /* 914 /*
841 * FIXME: this read path scares me because we 915 * FIXME: this read path scares me because we
@@ -847,6 +921,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
847 /* Do reads */ 921 /* Do reads */
848 down_read(&s->lock); 922 down_read(&s->lock);
849 923
924 if (!s->valid) {
925 up_read(&s->lock);
926 return -EIO;
927 }
928
850 /* See if it it has been remapped */ 929 /* See if it it has been remapped */
851 e = lookup_exception(&s->complete, chunk); 930 e = lookup_exception(&s->complete, chunk);
852 if (e) 931 if (e)
@@ -884,9 +963,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
884 snap->store.fraction_full(&snap->store, 963 snap->store.fraction_full(&snap->store,
885 &numerator, 964 &numerator,
886 &denominator); 965 &denominator);
887 snprintf(result, maxlen, 966 snprintf(result, maxlen, "%llu/%llu",
888 SECTOR_FORMAT "/" SECTOR_FORMAT, 967 (unsigned long long)numerator,
889 numerator, denominator); 968 (unsigned long long)denominator);
890 } 969 }
891 else 970 else
892 snprintf(result, maxlen, "Unknown"); 971 snprintf(result, maxlen, "Unknown");
@@ -899,9 +978,10 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
899 * to make private copies if the output is to 978 * to make private copies if the output is to
900 * make sense. 979 * make sense.
901 */ 980 */
902 snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT, 981 snprintf(result, maxlen, "%s %s %c %llu",
903 snap->origin->name, snap->cow->name, 982 snap->origin->name, snap->cow->name,
904 snap->type, snap->chunk_size); 983 snap->type,
984 (unsigned long long)snap->chunk_size);
905 break; 985 break;
906 } 986 }
907 987
@@ -911,40 +991,27 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
911/*----------------------------------------------------------------- 991/*-----------------------------------------------------------------
912 * Origin methods 992 * Origin methods
913 *---------------------------------------------------------------*/ 993 *---------------------------------------------------------------*/
914static void list_merge(struct list_head *l1, struct list_head *l2)
915{
916 struct list_head *l1_n, *l2_p;
917
918 l1_n = l1->next;
919 l2_p = l2->prev;
920
921 l1->next = l2;
922 l2->prev = l1;
923
924 l2_p->next = l1_n;
925 l1_n->prev = l2_p;
926}
927
928static int __origin_write(struct list_head *snapshots, struct bio *bio) 994static int __origin_write(struct list_head *snapshots, struct bio *bio)
929{ 995{
930 int r = 1, first = 1; 996 int r = 1, first = 0;
931 struct dm_snapshot *snap; 997 struct dm_snapshot *snap;
932 struct exception *e; 998 struct exception *e;
933 struct pending_exception *pe, *last = NULL; 999 struct pending_exception *pe, *next_pe, *primary_pe = NULL;
934 chunk_t chunk; 1000 chunk_t chunk;
1001 LIST_HEAD(pe_queue);
935 1002
936 /* Do all the snapshots on this origin */ 1003 /* Do all the snapshots on this origin */
937 list_for_each_entry (snap, snapshots, list) { 1004 list_for_each_entry (snap, snapshots, list) {
938 1005
1006 down_write(&snap->lock);
1007
939 /* Only deal with valid and active snapshots */ 1008 /* Only deal with valid and active snapshots */
940 if (!snap->valid || !snap->active) 1009 if (!snap->valid || !snap->active)
941 continue; 1010 goto next_snapshot;
942 1011
943 /* Nothing to do if writing beyond end of snapshot */ 1012 /* Nothing to do if writing beyond end of snapshot */
944 if (bio->bi_sector >= dm_table_get_size(snap->table)) 1013 if (bio->bi_sector >= dm_table_get_size(snap->table))
945 continue; 1014 goto next_snapshot;
946
947 down_write(&snap->lock);
948 1015
949 /* 1016 /*
950 * Remember, different snapshots can have 1017 * Remember, different snapshots can have
@@ -956,49 +1023,75 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
956 * Check exception table to see if block 1023 * Check exception table to see if block
957 * is already remapped in this snapshot 1024 * is already remapped in this snapshot
958 * and trigger an exception if not. 1025 * and trigger an exception if not.
1026 *
1027 * sibling_count is initialised to 1 so pending_complete()
1028 * won't destroy the primary_pe while we're inside this loop.
959 */ 1029 */
960 e = lookup_exception(&snap->complete, chunk); 1030 e = lookup_exception(&snap->complete, chunk);
961 if (!e) { 1031 if (e)
962 pe = __find_pending_exception(snap, bio); 1032 goto next_snapshot;
963 if (!pe) { 1033
964 snap->store.drop_snapshot(&snap->store); 1034 pe = __find_pending_exception(snap, bio);
965 snap->valid = 0; 1035 if (!pe) {
966 1036 __invalidate_snapshot(snap, pe, ENOMEM);
967 } else { 1037 goto next_snapshot;
968 if (last) 1038 }
969 list_merge(&pe->siblings, 1039
970 &last->siblings); 1040 if (!primary_pe) {
971 1041 /*
972 last = pe; 1042 * Either every pe here has same
973 r = 0; 1043 * primary_pe or none has one yet.
1044 */
1045 if (pe->primary_pe)
1046 primary_pe = pe->primary_pe;
1047 else {
1048 primary_pe = pe;
1049 first = 1;
974 } 1050 }
1051
1052 bio_list_add(&primary_pe->origin_bios, bio);
1053
1054 r = 0;
1055 }
1056
1057 if (!pe->primary_pe) {
1058 atomic_inc(&primary_pe->sibling_count);
1059 pe->primary_pe = primary_pe;
1060 }
1061
1062 if (!pe->started) {
1063 pe->started = 1;
1064 list_add_tail(&pe->list, &pe_queue);
975 } 1065 }
976 1066
1067 next_snapshot:
977 up_write(&snap->lock); 1068 up_write(&snap->lock);
978 } 1069 }
979 1070
1071 if (!primary_pe)
1072 goto out;
1073
980 /* 1074 /*
981 * Now that we have a complete pe list we can start the copying. 1075 * If this is the first time we're processing this chunk and
1076 * sibling_count is now 1 it means all the pending exceptions
1077 * got completed while we were in the loop above, so it falls to
1078 * us here to remove the primary_pe and submit any origin_bios.
982 */ 1079 */
983 if (last) { 1080
984 pe = last; 1081 if (first && atomic_dec_and_test(&primary_pe->sibling_count)) {
985 do { 1082 flush_bios(bio_list_get(&primary_pe->origin_bios));
986 down_write(&pe->snap->lock); 1083 free_pending_exception(primary_pe);
987 if (first) 1084 /* If we got here, pe_queue is necessarily empty. */
988 bio_list_add(&pe->origin_bios, bio); 1085 goto out;
989 if (!pe->started) {
990 pe->started = 1;
991 up_write(&pe->snap->lock);
992 start_copy(pe);
993 } else
994 up_write(&pe->snap->lock);
995 first = 0;
996 pe = list_entry(pe->siblings.next,
997 struct pending_exception, siblings);
998
999 } while (pe != last);
1000 } 1086 }
1001 1087
1088 /*
1089 * Now that we have a complete pe list we can start the copying.
1090 */
1091 list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
1092 start_copy(pe);
1093
1094 out:
1002 return r; 1095 return r;
1003} 1096}
1004 1097