summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikos Tsironis <ntsironis@arrikto.com>2019-03-17 08:22:55 -0400
committerMike Snitzer <snitzer@redhat.com>2019-04-18 16:18:27 -0400
commit65fc7c37047797a0a7b80d0ad2c063deda569337 (patch)
treedd4568f827c5be59b4b9dbe7053662cf9fe05d83
parent34191ae816b04518943fb073207d259881a54315 (diff)
dm snapshot: Don't sleep holding the snapshot lock
When completing a pending exception, pending_complete() waits for all conflicting reads to drain, before inserting the final, completed exception. Conflicting reads are snapshot reads redirected to the origin, because the relevant chunk is not remapped to the COW device the moment we receive the read. The completed exception must be inserted into the exception table after all conflicting reads drain to ensure snapshot reads don't return corrupted data. This is required because inserting the completed exception into the exception table signals that the relevant chunk is remapped and both origin writes and snapshot merging will now overwrite the chunk in origin. This wait is done holding the snapshot lock to ensure that pending_complete() doesn't starve if new snapshot reads keep coming for this chunk. In preparation for the next commit, where we use a spinlock instead of a mutex to protect the exception tables, we remove the need for holding the lock while waiting for conflicting reads to drain. We achieve this in two steps: 1. pending_complete() inserts the completed exception before waiting for conflicting reads to drain and removes the pending exception after all conflicting reads drain. This ensures that new snapshot reads will be redirected to the COW device, instead of the origin, and thus pending_complete() will not starve. Moreover, we use the existence of both a completed and a pending exception to signify that the COW is done but there are conflicting reads in flight. 2. In __origin_write() we check first if there is a pending exception and then if there is a completed exception. If there is a pending exception any submitted BIO is delayed on the pe->origin_bios list and DM_MAPIO_SUBMITTED is returned. This ensures that neither writes to the origin nor snapshot merging can overwrite the origin chunk, until all conflicting reads drain, and thus snapshot reads will not return corrupted data. Summarizing, we now have the following possible combinations of pending and completed exceptions for a chunk, along with their meaning: A. No exceptions exist: The chunk has not been remapped yet. B. Only a pending exception exists: The chunk is currently being copied to the COW device. C. Both a pending and a completed exception exist: COW for this chunk has completed but there are snapshot reads in flight which had been redirected to the origin before the chunk was remapped. D. Only the completed exception exists: COW has been completed and there are no conflicting reads in flight. Co-developed-by: Ilias Tsitsimpis <iliastsi@arrikto.com> Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com> Acked-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--drivers/md/dm-snap.c102
1 files changed, 65 insertions, 37 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a168963b757d..051e4d076323 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1501,16 +1501,24 @@ static void pending_complete(void *context, int success)
1501 goto out; 1501 goto out;
1502 } 1502 }
1503 1503
1504 /* Check for conflicting reads */
1505 __check_for_conflicting_io(s, pe->e.old_chunk);
1506
1507 /* 1504 /*
1508 * Add a proper exception, and remove the 1505 * Add a proper exception. After inserting the completed exception all
1509 * in-flight exception from the list. 1506 * subsequent snapshot reads to this chunk will be redirected to the
1507 * COW device. This ensures that we do not starve. Moreover, as long
1508 * as the pending exception exists, neither origin writes nor snapshot
1509 * merging can overwrite the chunk in origin.
1510 */ 1510 */
1511 dm_insert_exception(&s->complete, e); 1511 dm_insert_exception(&s->complete, e);
1512 1512
1513 /* Wait for conflicting reads to drain */
1514 if (__chunk_is_tracked(s, pe->e.old_chunk)) {
1515 mutex_unlock(&s->lock);
1516 __check_for_conflicting_io(s, pe->e.old_chunk);
1517 mutex_lock(&s->lock);
1518 }
1519
1513out: 1520out:
1521 /* Remove the in-flight exception from the list */
1514 dm_remove_exception(&pe->e); 1522 dm_remove_exception(&pe->e);
1515 snapshot_bios = bio_list_get(&pe->snapshot_bios); 1523 snapshot_bios = bio_list_get(&pe->snapshot_bios);
1516 origin_bios = bio_list_get(&pe->origin_bios); 1524 origin_bios = bio_list_get(&pe->origin_bios);
@@ -1660,25 +1668,15 @@ __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
1660} 1668}
1661 1669
1662/* 1670/*
1663 * Looks to see if this snapshot already has a pending exception 1671 * Inserts a pending exception into the pending table.
1664 * for this chunk, otherwise it allocates a new one and inserts
1665 * it into the pending table.
1666 * 1672 *
1667 * NOTE: a write lock must be held on snap->lock before calling 1673 * NOTE: a write lock must be held on snap->lock before calling
1668 * this. 1674 * this.
1669 */ 1675 */
1670static struct dm_snap_pending_exception * 1676static struct dm_snap_pending_exception *
1671__find_pending_exception(struct dm_snapshot *s, 1677__insert_pending_exception(struct dm_snapshot *s,
1672 struct dm_snap_pending_exception *pe, chunk_t chunk) 1678 struct dm_snap_pending_exception *pe, chunk_t chunk)
1673{ 1679{
1674 struct dm_snap_pending_exception *pe2;
1675
1676 pe2 = __lookup_pending_exception(s, chunk);
1677 if (pe2) {
1678 free_pending_exception(pe);
1679 return pe2;
1680 }
1681
1682 pe->e.old_chunk = chunk; 1680 pe->e.old_chunk = chunk;
1683 bio_list_init(&pe->origin_bios); 1681 bio_list_init(&pe->origin_bios);
1684 bio_list_init(&pe->snapshot_bios); 1682 bio_list_init(&pe->snapshot_bios);
@@ -1697,6 +1695,29 @@ __find_pending_exception(struct dm_snapshot *s,
1697 return pe; 1695 return pe;
1698} 1696}
1699 1697
1698/*
1699 * Looks to see if this snapshot already has a pending exception
1700 * for this chunk, otherwise it allocates a new one and inserts
1701 * it into the pending table.
1702 *
1703 * NOTE: a write lock must be held on snap->lock before calling
1704 * this.
1705 */
1706static struct dm_snap_pending_exception *
1707__find_pending_exception(struct dm_snapshot *s,
1708 struct dm_snap_pending_exception *pe, chunk_t chunk)
1709{
1710 struct dm_snap_pending_exception *pe2;
1711
1712 pe2 = __lookup_pending_exception(s, chunk);
1713 if (pe2) {
1714 free_pending_exception(pe);
1715 return pe2;
1716 }
1717
1718 return __insert_pending_exception(s, pe, chunk);
1719}
1720
1700static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1721static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
1701 struct bio *bio, chunk_t chunk) 1722 struct bio *bio, chunk_t chunk)
1702{ 1723{
@@ -2107,7 +2128,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
2107 int r = DM_MAPIO_REMAPPED; 2128 int r = DM_MAPIO_REMAPPED;
2108 struct dm_snapshot *snap; 2129 struct dm_snapshot *snap;
2109 struct dm_exception *e; 2130 struct dm_exception *e;
2110 struct dm_snap_pending_exception *pe; 2131 struct dm_snap_pending_exception *pe, *pe2;
2111 struct dm_snap_pending_exception *pe_to_start_now = NULL; 2132 struct dm_snap_pending_exception *pe_to_start_now = NULL;
2112 struct dm_snap_pending_exception *pe_to_start_last = NULL; 2133 struct dm_snap_pending_exception *pe_to_start_last = NULL;
2113 chunk_t chunk; 2134 chunk_t chunk;
@@ -2137,17 +2158,17 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
2137 */ 2158 */
2138 chunk = sector_to_chunk(snap->store, sector); 2159 chunk = sector_to_chunk(snap->store, sector);
2139 2160
2140 /*
2141 * Check exception table to see if block
2142 * is already remapped in this snapshot
2143 * and trigger an exception if not.
2144 */
2145 e = dm_lookup_exception(&snap->complete, chunk);
2146 if (e)
2147 goto next_snapshot;
2148
2149 pe = __lookup_pending_exception(snap, chunk); 2161 pe = __lookup_pending_exception(snap, chunk);
2150 if (!pe) { 2162 if (!pe) {
2163 /*
2164 * Check exception table to see if block is already
2165 * remapped in this snapshot and trigger an exception
2166 * if not.
2167 */
2168 e = dm_lookup_exception(&snap->complete, chunk);
2169 if (e)
2170 goto next_snapshot;
2171
2151 mutex_unlock(&snap->lock); 2172 mutex_unlock(&snap->lock);
2152 pe = alloc_pending_exception(snap); 2173 pe = alloc_pending_exception(snap);
2153 mutex_lock(&snap->lock); 2174 mutex_lock(&snap->lock);
@@ -2157,16 +2178,23 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
2157 goto next_snapshot; 2178 goto next_snapshot;
2158 } 2179 }
2159 2180
2160 e = dm_lookup_exception(&snap->complete, chunk); 2181 pe2 = __lookup_pending_exception(snap, chunk);
2161 if (e) { 2182
2183 if (!pe2) {
2184 e = dm_lookup_exception(&snap->complete, chunk);
2185 if (e) {
2186 free_pending_exception(pe);
2187 goto next_snapshot;
2188 }
2189
2190 pe = __insert_pending_exception(snap, pe, chunk);
2191 if (!pe) {
2192 __invalidate_snapshot(snap, -ENOMEM);
2193 goto next_snapshot;
2194 }
2195 } else {
2162 free_pending_exception(pe); 2196 free_pending_exception(pe);
2163 goto next_snapshot; 2197 pe = pe2;
2164 }
2165
2166 pe = __find_pending_exception(snap, pe, chunk);
2167 if (!pe) {
2168 __invalidate_snapshot(snap, -ENOMEM);
2169 goto next_snapshot;
2170 } 2198 }
2171 } 2199 }
2172 2200