diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-snap.c | 122 |
1 files changed, 80 insertions, 42 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 874f145431d8..475514bda9d0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -54,11 +54,21 @@ struct pending_exception { | |||
54 | struct list_head list; | 54 | struct list_head list; |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Other pending_exceptions that are processing this | 57 | * The primary pending_exception is the one that holds |
58 | * chunk. When this list is empty, we know we can | 58 | * the sibling_count and the list of origin_bios for a |
59 | * complete the origins. | 59 | * group of pending_exceptions. It is always last to get freed. |
60 | * These fields get set up when writing to the origin. | ||
60 | */ | 61 | */ |
61 | struct list_head siblings; | 62 | struct pending_exception *primary_pe; |
63 | |||
64 | /* | ||
65 | * Number of pending_exceptions processing this chunk. | ||
66 | * When this drops to zero we must complete the origin bios. | ||
67 | * If incrementing or decrementing this, hold pe->snap->lock for | ||
68 | * the sibling concerned and not pe->primary_pe->snap->lock unless | ||
69 | * they are the same. | ||
70 | */ | ||
71 | atomic_t sibling_count; | ||
62 | 72 | ||
63 | /* Pointer back to snapshot context */ | 73 | /* Pointer back to snapshot context */ |
64 | struct dm_snapshot *snap; | 74 | struct dm_snapshot *snap; |
@@ -593,20 +603,15 @@ static void error_bios(struct bio *bio) | |||
593 | 603 | ||
594 | static struct bio *__flush_bios(struct pending_exception *pe) | 604 | static struct bio *__flush_bios(struct pending_exception *pe) |
595 | { | 605 | { |
596 | struct pending_exception *sibling; | 606 | /* |
597 | 607 | * If this pe is involved in a write to the origin and | |
598 | if (list_empty(&pe->siblings)) | 608 | * it is the last sibling to complete then release |
599 | return bio_list_get(&pe->origin_bios); | 609 | * the bios for the original write to the origin. |
600 | |||
601 | sibling = list_entry(pe->siblings.next, | ||
602 | struct pending_exception, siblings); | ||
603 | |||
604 | list_del(&pe->siblings); | ||
605 | |||
606 | /* This is fine as long as kcopyd is single-threaded. If kcopyd | ||
607 | * becomes multi-threaded, we'll need some locking here. | ||
608 | */ | 610 | */ |
609 | bio_list_merge(&sibling->origin_bios, &pe->origin_bios); | 611 | |
612 | if (pe->primary_pe && | ||
613 | atomic_dec_and_test(&pe->primary_pe->sibling_count)) | ||
614 | return bio_list_get(&pe->primary_pe->origin_bios); | ||
610 | 615 | ||
611 | return NULL; | 616 | return NULL; |
612 | } | 617 | } |
@@ -614,6 +619,7 @@ static struct bio *__flush_bios(struct pending_exception *pe) | |||
614 | static void pending_complete(struct pending_exception *pe, int success) | 619 | static void pending_complete(struct pending_exception *pe, int success) |
615 | { | 620 | { |
616 | struct exception *e; | 621 | struct exception *e; |
622 | struct pending_exception *primary_pe; | ||
617 | struct dm_snapshot *s = pe->snap; | 623 | struct dm_snapshot *s = pe->snap; |
618 | struct bio *flush = NULL; | 624 | struct bio *flush = NULL; |
619 | 625 | ||
@@ -662,7 +668,20 @@ static void pending_complete(struct pending_exception *pe, int success) | |||
662 | } | 668 | } |
663 | 669 | ||
664 | out: | 670 | out: |
665 | free_pending_exception(pe); | 671 | primary_pe = pe->primary_pe; |
672 | |||
673 | /* | ||
674 | * Free the pe if it's not linked to an origin write or if | ||
675 | * it's not itself a primary pe. | ||
676 | */ | ||
677 | if (!primary_pe || primary_pe != pe) | ||
678 | free_pending_exception(pe); | ||
679 | |||
680 | /* | ||
681 | * Free the primary pe if nothing references it. | ||
682 | */ | ||
683 | if (primary_pe && !atomic_read(&primary_pe->sibling_count)) | ||
684 | free_pending_exception(primary_pe); | ||
666 | 685 | ||
667 | if (flush) | 686 | if (flush) |
668 | flush_bios(flush); | 687 | flush_bios(flush); |
@@ -757,7 +776,8 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) | |||
757 | pe->e.old_chunk = chunk; | 776 | pe->e.old_chunk = chunk; |
758 | bio_list_init(&pe->origin_bios); | 777 | bio_list_init(&pe->origin_bios); |
759 | bio_list_init(&pe->snapshot_bios); | 778 | bio_list_init(&pe->snapshot_bios); |
760 | INIT_LIST_HEAD(&pe->siblings); | 779 | pe->primary_pe = NULL; |
780 | atomic_set(&pe->sibling_count, 1); | ||
761 | pe->snap = s; | 781 | pe->snap = s; |
762 | pe->started = 0; | 782 | pe->started = 0; |
763 | 783 | ||
@@ -916,26 +936,12 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, | |||
916 | /*----------------------------------------------------------------- | 936 | /*----------------------------------------------------------------- |
917 | * Origin methods | 937 | * Origin methods |
918 | *---------------------------------------------------------------*/ | 938 | *---------------------------------------------------------------*/ |
919 | static void list_merge(struct list_head *l1, struct list_head *l2) | ||
920 | { | ||
921 | struct list_head *l1_n, *l2_p; | ||
922 | |||
923 | l1_n = l1->next; | ||
924 | l2_p = l2->prev; | ||
925 | |||
926 | l1->next = l2; | ||
927 | l2->prev = l1; | ||
928 | |||
929 | l2_p->next = l1_n; | ||
930 | l1_n->prev = l2_p; | ||
931 | } | ||
932 | |||
933 | static int __origin_write(struct list_head *snapshots, struct bio *bio) | 939 | static int __origin_write(struct list_head *snapshots, struct bio *bio) |
934 | { | 940 | { |
935 | int r = 1, first = 1; | 941 | int r = 1, first = 0; |
936 | struct dm_snapshot *snap; | 942 | struct dm_snapshot *snap; |
937 | struct exception *e; | 943 | struct exception *e; |
938 | struct pending_exception *pe, *next_pe, *last = NULL; | 944 | struct pending_exception *pe, *next_pe, *primary_pe = NULL; |
939 | chunk_t chunk; | 945 | chunk_t chunk; |
940 | LIST_HEAD(pe_queue); | 946 | LIST_HEAD(pe_queue); |
941 | 947 | ||
@@ -962,6 +968,9 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) | |||
962 | * Check exception table to see if block | 968 | * Check exception table to see if block |
963 | * is already remapped in this snapshot | 969 | * is already remapped in this snapshot |
964 | * and trigger an exception if not. | 970 | * and trigger an exception if not. |
971 | * | ||
972 | * sibling_count is initialised to 1 so pending_complete() | ||
973 | * won't destroy the primary_pe while we're inside this loop. | ||
965 | */ | 974 | */ |
966 | e = lookup_exception(&snap->complete, chunk); | 975 | e = lookup_exception(&snap->complete, chunk); |
967 | if (!e) { | 976 | if (!e) { |
@@ -971,31 +980,60 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) | |||
971 | snap->valid = 0; | 980 | snap->valid = 0; |
972 | 981 | ||
973 | } else { | 982 | } else { |
974 | if (first) { | 983 | if (!primary_pe) { |
975 | bio_list_add(&pe->origin_bios, bio); | 984 | /* |
985 | * Either every pe here has same | ||
986 | * primary_pe or none has one yet. | ||
987 | */ | ||
988 | if (pe->primary_pe) | ||
989 | primary_pe = pe->primary_pe; | ||
990 | else { | ||
991 | primary_pe = pe; | ||
992 | first = 1; | ||
993 | } | ||
994 | |||
995 | bio_list_add(&primary_pe->origin_bios, | ||
996 | bio); | ||
976 | r = 0; | 997 | r = 0; |
977 | first = 0; | ||
978 | } | 998 | } |
979 | if (last && list_empty(&pe->siblings)) | 999 | if (!pe->primary_pe) { |
980 | list_merge(&pe->siblings, | 1000 | atomic_inc(&primary_pe->sibling_count); |
981 | &last->siblings); | 1001 | pe->primary_pe = primary_pe; |
1002 | } | ||
982 | if (!pe->started) { | 1003 | if (!pe->started) { |
983 | pe->started = 1; | 1004 | pe->started = 1; |
984 | list_add_tail(&pe->list, &pe_queue); | 1005 | list_add_tail(&pe->list, &pe_queue); |
985 | } | 1006 | } |
986 | last = pe; | ||
987 | } | 1007 | } |
988 | } | 1008 | } |
989 | 1009 | ||
990 | up_write(&snap->lock); | 1010 | up_write(&snap->lock); |
991 | } | 1011 | } |
992 | 1012 | ||
1013 | if (!primary_pe) | ||
1014 | goto out; | ||
1015 | |||
1016 | /* | ||
1017 | * If this is the first time we're processing this chunk and | ||
1018 | * sibling_count is now 1 it means all the pending exceptions | ||
1019 | * got completed while we were in the loop above, so it falls to | ||
1020 | * us here to remove the primary_pe and submit any origin_bios. | ||
1021 | */ | ||
1022 | |||
1023 | if (first && atomic_dec_and_test(&primary_pe->sibling_count)) { | ||
1024 | flush_bios(bio_list_get(&primary_pe->origin_bios)); | ||
1025 | free_pending_exception(primary_pe); | ||
1026 | /* If we got here, pe_queue is necessarily empty. */ | ||
1027 | goto out; | ||
1028 | } | ||
1029 | |||
993 | /* | 1030 | /* |
994 | * Now that we have a complete pe list we can start the copying. | 1031 | * Now that we have a complete pe list we can start the copying. |
995 | */ | 1032 | */ |
996 | list_for_each_entry_safe(pe, next_pe, &pe_queue, list) | 1033 | list_for_each_entry_safe(pe, next_pe, &pe_queue, list) |
997 | start_copy(pe); | 1034 | start_copy(pe); |
998 | 1035 | ||
1036 | out: | ||
999 | return r; | 1037 | return r; |
1000 | } | 1038 | } |
1001 | 1039 | ||