diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2009-12-10 18:52:33 -0500 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-12-10 18:52:33 -0500 |
commit | 9fe862548821b0c206c58e8057b782530a173703 (patch) | |
tree | 1a23bec091d5163dfec29750b5daf3e93637c5ab | |
parent | 1e03f97e4301f75a2f3b649787f7876516764929 (diff) |
dm snapshot: queue writes to chunks being merged
While a set of chunks is being merged, any overlapping writes need to be
queued.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r-- | drivers/md/dm-snap.c | 91 |
1 files changed, 78 insertions, 13 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index dc2412e6c5cf..91a47c522b09 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -109,6 +109,16 @@ struct dm_snapshot { | |||
109 | 109 | ||
110 | /* Wait for events based on state_bits */ | 110 | /* Wait for events based on state_bits */ |
111 | unsigned long state_bits; | 111 | unsigned long state_bits; |
112 | |||
113 | /* Range of chunks currently being merged. */ | ||
114 | chunk_t first_merging_chunk; | ||
115 | int num_merging_chunks; | ||
116 | |||
117 | /* | ||
118 | * Incoming bios that overlap with chunks being merged must wait | ||
119 | * for them to be committed. | ||
120 | */ | ||
121 | struct bio_list bios_queued_during_merge; | ||
112 | }; | 122 | }; |
113 | 123 | ||
114 | /* | 124 | /* |
@@ -747,6 +757,14 @@ static void merge_shutdown(struct dm_snapshot *s) | |||
747 | wake_up_bit(&s->state_bits, RUNNING_MERGE); | 757 | wake_up_bit(&s->state_bits, RUNNING_MERGE); |
748 | } | 758 | } |
749 | 759 | ||
760 | static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s) | ||
761 | { | ||
762 | s->first_merging_chunk = 0; | ||
763 | s->num_merging_chunks = 0; | ||
764 | |||
765 | return bio_list_get(&s->bios_queued_during_merge); | ||
766 | } | ||
767 | |||
750 | /* | 768 | /* |
751 | * Remove one chunk from the index of completed exceptions. | 769 | * Remove one chunk from the index of completed exceptions. |
752 | */ | 770 | */ |
@@ -755,8 +773,6 @@ static int __remove_single_exception_chunk(struct dm_snapshot *s, | |||
755 | { | 773 | { |
756 | struct dm_exception *e; | 774 | struct dm_exception *e; |
757 | 775 | ||
758 | /* FIXME: interlock writes to this chunk */ | ||
759 | |||
760 | e = dm_lookup_exception(&s->complete, old_chunk); | 776 | e = dm_lookup_exception(&s->complete, old_chunk); |
761 | if (!e) { | 777 | if (!e) { |
762 | DMERR("Corruption detected: exception for block %llu is " | 778 | DMERR("Corruption detected: exception for block %llu is " |
@@ -801,14 +817,32 @@ static int __remove_single_exception_chunk(struct dm_snapshot *s, | |||
801 | return 0; | 817 | return 0; |
802 | } | 818 | } |
803 | 819 | ||
804 | static int remove_single_exception_chunk(struct dm_snapshot *s, | 820 | static void flush_bios(struct bio *bio); |
805 | chunk_t old_chunk) | 821 | |
822 | static int remove_single_exception_chunk(struct dm_snapshot *s) | ||
806 | { | 823 | { |
807 | int r = 0; | 824 | struct bio *b = NULL; |
825 | int r; | ||
826 | chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; | ||
808 | 827 | ||
809 | down_write(&s->lock); | 828 | down_write(&s->lock); |
810 | r = __remove_single_exception_chunk(s, old_chunk); | 829 | |
830 | /* | ||
831 | * Process chunks (and associated exceptions) in reverse order | ||
832 | * so that dm_consecutive_chunk_count_dec() accounting works. | ||
833 | */ | ||
834 | do { | ||
835 | r = __remove_single_exception_chunk(s, old_chunk); | ||
836 | if (r) | ||
837 | goto out; | ||
838 | } while (old_chunk-- > s->first_merging_chunk); | ||
839 | |||
840 | b = __release_queued_bios_after_merge(s); | ||
841 | |||
842 | out: | ||
811 | up_write(&s->lock); | 843 | up_write(&s->lock); |
844 | if (b) | ||
845 | flush_bios(b); | ||
812 | 846 | ||
813 | return r; | 847 | return r; |
814 | } | 848 | } |
@@ -844,9 +878,6 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) | |||
844 | 878 | ||
845 | /* TODO: use larger I/O size once we verify that kcopyd handles it */ | 879 | /* TODO: use larger I/O size once we verify that kcopyd handles it */ |
846 | 880 | ||
847 | if (remove_single_exception_chunk(s, old_chunk) < 0) | ||
848 | goto shut; | ||
849 | |||
850 | dest.bdev = s->origin->bdev; | 881 | dest.bdev = s->origin->bdev; |
851 | dest.sector = chunk_to_sector(s->store, old_chunk); | 882 | dest.sector = chunk_to_sector(s->store, old_chunk); |
852 | dest.count = min((sector_t)s->store->chunk_size, | 883 | dest.count = min((sector_t)s->store->chunk_size, |
@@ -856,6 +887,13 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) | |||
856 | src.sector = chunk_to_sector(s->store, new_chunk); | 887 | src.sector = chunk_to_sector(s->store, new_chunk); |
857 | src.count = dest.count; | 888 | src.count = dest.count; |
858 | 889 | ||
890 | down_write(&s->lock); | ||
891 | s->first_merging_chunk = old_chunk; | ||
892 | s->num_merging_chunks = 1; | ||
893 | up_write(&s->lock); | ||
894 | |||
895 | /* !!! FIXME: wait until writes to this chunk drain */ | ||
896 | |||
859 | dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); | 897 | dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); |
860 | return; | 898 | return; |
861 | 899 | ||
@@ -863,9 +901,12 @@ shut: | |||
863 | merge_shutdown(s); | 901 | merge_shutdown(s); |
864 | } | 902 | } |
865 | 903 | ||
904 | static void error_bios(struct bio *bio); | ||
905 | |||
866 | static void merge_callback(int read_err, unsigned long write_err, void *context) | 906 | static void merge_callback(int read_err, unsigned long write_err, void *context) |
867 | { | 907 | { |
868 | struct dm_snapshot *s = context; | 908 | struct dm_snapshot *s = context; |
909 | struct bio *b = NULL; | ||
869 | 910 | ||
870 | if (read_err || write_err) { | 911 | if (read_err || write_err) { |
871 | if (read_err) | 912 | if (read_err) |
@@ -875,16 +916,25 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) | |||
875 | goto shut; | 916 | goto shut; |
876 | } | 917 | } |
877 | 918 | ||
878 | if (s->store->type->commit_merge(s->store, 1) < 0) { | 919 | if (s->store->type->commit_merge(s->store, |
920 | s->num_merging_chunks) < 0) { | ||
879 | DMERR("Write error in exception store: shutting down merge"); | 921 | DMERR("Write error in exception store: shutting down merge"); |
880 | goto shut; | 922 | goto shut; |
881 | } | 923 | } |
882 | 924 | ||
925 | if (remove_single_exception_chunk(s) < 0) | ||
926 | goto shut; | ||
927 | |||
883 | snapshot_merge_next_chunks(s); | 928 | snapshot_merge_next_chunks(s); |
884 | 929 | ||
885 | return; | 930 | return; |
886 | 931 | ||
887 | shut: | 932 | shut: |
933 | down_write(&s->lock); | ||
934 | b = __release_queued_bios_after_merge(s); | ||
935 | up_write(&s->lock); | ||
936 | error_bios(b); | ||
937 | |||
888 | merge_shutdown(s); | 938 | merge_shutdown(s); |
889 | } | 939 | } |
890 | 940 | ||
@@ -983,6 +1033,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
983 | INIT_LIST_HEAD(&s->list); | 1033 | INIT_LIST_HEAD(&s->list); |
984 | spin_lock_init(&s->pe_lock); | 1034 | spin_lock_init(&s->pe_lock); |
985 | s->state_bits = 0; | 1035 | s->state_bits = 0; |
1036 | s->first_merging_chunk = 0; | ||
1037 | s->num_merging_chunks = 0; | ||
1038 | bio_list_init(&s->bios_queued_during_merge); | ||
986 | 1039 | ||
987 | /* Allocate hash table for COW data */ | 1040 | /* Allocate hash table for COW data */ |
988 | if (init_hash_tables(s)) { | 1041 | if (init_hash_tables(s)) { |
@@ -1539,6 +1592,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
1539 | * For each chunk, if there is an existing exception, it is used to | 1592 | * For each chunk, if there is an existing exception, it is used to |
1540 | * redirect I/O to the cow device. Otherwise I/O is sent to the origin, | 1593 | * redirect I/O to the cow device. Otherwise I/O is sent to the origin, |
1541 | * which in turn might generate exceptions in other snapshots. | 1594 | * which in turn might generate exceptions in other snapshots. |
1595 | * If merging is currently taking place on the chunk in question, the | ||
1596 | * I/O is deferred by adding it to s->bios_queued_during_merge. | ||
1542 | */ | 1597 | */ |
1543 | static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | 1598 | static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, |
1544 | union map_info *map_context) | 1599 | union map_info *map_context) |
@@ -1559,7 +1614,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
1559 | 1614 | ||
1560 | chunk = sector_to_chunk(s->store, bio->bi_sector); | 1615 | chunk = sector_to_chunk(s->store, bio->bi_sector); |
1561 | 1616 | ||
1562 | down_read(&s->lock); | 1617 | down_write(&s->lock); |
1563 | 1618 | ||
1564 | /* Full snapshots are not usable */ | 1619 | /* Full snapshots are not usable */ |
1565 | if (!s->valid) { | 1620 | if (!s->valid) { |
@@ -1570,6 +1625,16 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
1570 | /* If the block is already remapped - use that */ | 1625 | /* If the block is already remapped - use that */ |
1571 | e = dm_lookup_exception(&s->complete, chunk); | 1626 | e = dm_lookup_exception(&s->complete, chunk); |
1572 | if (e) { | 1627 | if (e) { |
1628 | /* Queue writes overlapping with chunks being merged */ | ||
1629 | if (bio_rw(bio) == WRITE && | ||
1630 | chunk >= s->first_merging_chunk && | ||
1631 | chunk < (s->first_merging_chunk + | ||
1632 | s->num_merging_chunks)) { | ||
1633 | bio->bi_bdev = s->origin->bdev; | ||
1634 | bio_list_add(&s->bios_queued_during_merge, bio); | ||
1635 | r = DM_MAPIO_SUBMITTED; | ||
1636 | goto out_unlock; | ||
1637 | } | ||
1573 | remap_exception(s, e, bio, chunk); | 1638 | remap_exception(s, e, bio, chunk); |
1574 | goto out_unlock; | 1639 | goto out_unlock; |
1575 | } | 1640 | } |
@@ -1577,12 +1642,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
1577 | bio->bi_bdev = s->origin->bdev; | 1642 | bio->bi_bdev = s->origin->bdev; |
1578 | 1643 | ||
1579 | if (bio_rw(bio) == WRITE) { | 1644 | if (bio_rw(bio) == WRITE) { |
1580 | up_read(&s->lock); | 1645 | up_write(&s->lock); |
1581 | return do_origin(s->origin, bio); | 1646 | return do_origin(s->origin, bio); |
1582 | } | 1647 | } |
1583 | 1648 | ||
1584 | out_unlock: | 1649 | out_unlock: |
1585 | up_read(&s->lock); | 1650 | up_write(&s->lock); |
1586 | 1651 | ||
1587 | return r; | 1652 | return r; |
1588 | } | 1653 | } |