diff options
author | Mike Snitzer <snitzer@redhat.com> | 2009-12-10 18:52:24 -0500 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-12-10 18:52:24 -0500 |
commit | c1f0c183f6acc6d32c5a1d0249ec68bf783af7b1 (patch) | |
tree | f8e8e88e5403a9dbbebff14dc4afd6d475d1c03a | |
parent | 042d2a9bcd80fe12d4b0871706aa9dd2231e8238 (diff) |
dm snapshot: allow live exception store handover between tables
Permit in-use snapshot exception data to be 'handed over' from one
snapshot instance to another. This is a pre-requisite for patches
that allow the changes made in a snapshot device to be merged back into
its origin device and also allows device resizing.
The basic call sequence is:
dmsetup load new_snapshot (referencing the existing in-use cow device)
- the ctr code detects that the cow is already in use and allows the
two snapshot target instances to be linked together
dmsetup suspend original_snapshot
dmsetup resume new_snapshot
- the new_snapshot becomes live, and if anything now tries to access
the original one it will receive -EIO
dmsetup remove original_snapshot
(There can only be two snapshot targets referencing the same cow device
simultaneously.)
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r-- | drivers/md/dm-snap.c | 263 |
1 files changed, 236 insertions, 27 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index fd04caa90340..b5b9118c0636 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -303,22 +303,116 @@ static void __insert_origin(struct origin *o) | |||
303 | } | 303 | } |
304 | 304 | ||
305 | /* | 305 | /* |
306 | * _origins_lock must be held when calling this function. | ||
307 | * Returns number of snapshots registered using the supplied cow device, plus: | ||
308 | * snap_src - a snapshot suitable for use as a source of exception handover | ||
309 | * snap_dest - a snapshot capable of receiving exception handover. | ||
310 | * | ||
311 | * Possible return values and states: | ||
312 | * 0: NULL, NULL - first new snapshot | ||
313 | * 1: snap_src, NULL - normal snapshot | ||
314 | * 2: snap_src, snap_dest - waiting for handover | ||
315 | * 2: snap_src, NULL - handed over, waiting for old to be deleted | ||
316 | * 1: NULL, snap_dest - source got destroyed without handover | ||
317 | */ | ||
318 | static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, | ||
319 | struct dm_snapshot **snap_src, | ||
320 | struct dm_snapshot **snap_dest) | ||
321 | { | ||
322 | struct dm_snapshot *s; | ||
323 | struct origin *o; | ||
324 | int count = 0; | ||
325 | int active; | ||
326 | |||
327 | o = __lookup_origin(snap->origin->bdev); | ||
328 | if (!o) | ||
329 | goto out; | ||
330 | |||
331 | list_for_each_entry(s, &o->snapshots, list) { | ||
332 | if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) | ||
333 | continue; | ||
334 | |||
335 | down_read(&s->lock); | ||
336 | active = s->active; | ||
337 | up_read(&s->lock); | ||
338 | |||
339 | if (active) { | ||
340 | if (snap_src) | ||
341 | *snap_src = s; | ||
342 | } else if (snap_dest) | ||
343 | *snap_dest = s; | ||
344 | |||
345 | count++; | ||
346 | } | ||
347 | |||
348 | out: | ||
349 | return count; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * On success, returns 1 if this snapshot is a handover destination, | ||
354 | * otherwise returns 0. | ||
355 | */ | ||
356 | static int __validate_exception_handover(struct dm_snapshot *snap) | ||
357 | { | ||
358 | struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; | ||
359 | |||
360 | /* Does snapshot need exceptions handed over to it? */ | ||
361 | if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest) == 2) || | ||
362 | snap_dest) { | ||
363 | snap->ti->error = "Snapshot cow pairing for exception " | ||
364 | "table handover failed"; | ||
365 | return -EINVAL; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * If no snap_src was found, snap cannot become a handover | ||
370 | * destination. | ||
371 | */ | ||
372 | if (!snap_src) | ||
373 | return 0; | ||
374 | |||
375 | return 1; | ||
376 | } | ||
377 | |||
378 | static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) | ||
379 | { | ||
380 | struct dm_snapshot *l; | ||
381 | |||
382 | /* Sort the list according to chunk size, largest-first smallest-last */ | ||
383 | list_for_each_entry(l, &o->snapshots, list) | ||
384 | if (l->store->chunk_size < s->store->chunk_size) | ||
385 | break; | ||
386 | list_add_tail(&s->list, &l->list); | ||
387 | } | ||
388 | |||
389 | /* | ||
306 | * Make a note of the snapshot and its origin so we can look it | 390 | * Make a note of the snapshot and its origin so we can look it |
307 | * up when the origin has a write on it. | 391 | * up when the origin has a write on it. |
392 | * | ||
393 | * Also validate snapshot exception store handovers. | ||
394 | * On success, returns 1 if this registration is a handover destination, | ||
395 | * otherwise returns 0. | ||
308 | */ | 396 | */ |
309 | static int register_snapshot(struct dm_snapshot *snap) | 397 | static int register_snapshot(struct dm_snapshot *snap) |
310 | { | 398 | { |
311 | struct dm_snapshot *l; | 399 | struct origin *o, *new_o = NULL; |
312 | struct origin *o, *new_o; | ||
313 | struct block_device *bdev = snap->origin->bdev; | 400 | struct block_device *bdev = snap->origin->bdev; |
401 | int r = 0; | ||
314 | 402 | ||
315 | new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); | 403 | new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); |
316 | if (!new_o) | 404 | if (!new_o) |
317 | return -ENOMEM; | 405 | return -ENOMEM; |
318 | 406 | ||
319 | down_write(&_origins_lock); | 407 | down_write(&_origins_lock); |
320 | o = __lookup_origin(bdev); | ||
321 | 408 | ||
409 | r = __validate_exception_handover(snap); | ||
410 | if (r < 0) { | ||
411 | kfree(new_o); | ||
412 | goto out; | ||
413 | } | ||
414 | |||
415 | o = __lookup_origin(bdev); | ||
322 | if (o) | 416 | if (o) |
323 | kfree(new_o); | 417 | kfree(new_o); |
324 | else { | 418 | else { |
@@ -332,14 +426,27 @@ static int register_snapshot(struct dm_snapshot *snap) | |||
332 | __insert_origin(o); | 426 | __insert_origin(o); |
333 | } | 427 | } |
334 | 428 | ||
335 | /* Sort the list according to chunk size, largest-first smallest-last */ | 429 | __insert_snapshot(o, snap); |
336 | list_for_each_entry(l, &o->snapshots, list) | 430 | |
337 | if (l->store->chunk_size < snap->store->chunk_size) | 431 | out: |
338 | break; | 432 | up_write(&_origins_lock); |
339 | list_add_tail(&snap->list, &l->list); | 433 | |
434 | return r; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Move snapshot to correct place in list according to chunk size. | ||
439 | */ | ||
440 | static void reregister_snapshot(struct dm_snapshot *s) | ||
441 | { | ||
442 | struct block_device *bdev = s->origin->bdev; | ||
443 | |||
444 | down_write(&_origins_lock); | ||
445 | |||
446 | list_del(&s->list); | ||
447 | __insert_snapshot(__lookup_origin(bdev), s); | ||
340 | 448 | ||
341 | up_write(&_origins_lock); | 449 | up_write(&_origins_lock); |
342 | return 0; | ||
343 | } | 450 | } |
344 | 451 | ||
345 | static void unregister_snapshot(struct dm_snapshot *s) | 452 | static void unregister_snapshot(struct dm_snapshot *s) |
@@ -350,7 +457,7 @@ static void unregister_snapshot(struct dm_snapshot *s) | |||
350 | o = __lookup_origin(s->origin->bdev); | 457 | o = __lookup_origin(s->origin->bdev); |
351 | 458 | ||
352 | list_del(&s->list); | 459 | list_del(&s->list); |
353 | if (list_empty(&o->snapshots)) { | 460 | if (o && list_empty(&o->snapshots)) { |
354 | list_del(&o->hash_list); | 461 | list_del(&o->hash_list); |
355 | kfree(o); | 462 | kfree(o); |
356 | } | 463 | } |
@@ -662,6 +769,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
662 | s->suspended = 0; | 769 | s->suspended = 0; |
663 | atomic_set(&s->pending_exceptions_count, 0); | 770 | atomic_set(&s->pending_exceptions_count, 0); |
664 | init_rwsem(&s->lock); | 771 | init_rwsem(&s->lock); |
772 | INIT_LIST_HEAD(&s->list); | ||
665 | spin_lock_init(&s->pe_lock); | 773 | spin_lock_init(&s->pe_lock); |
666 | 774 | ||
667 | /* Allocate hash table for COW data */ | 775 | /* Allocate hash table for COW data */ |
@@ -696,39 +804,55 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
696 | 804 | ||
697 | spin_lock_init(&s->tracked_chunk_lock); | 805 | spin_lock_init(&s->tracked_chunk_lock); |
698 | 806 | ||
699 | /* Metadata must only be loaded into one table at once */ | 807 | bio_list_init(&s->queued_bios); |
808 | INIT_WORK(&s->queued_bios_work, flush_queued_bios); | ||
809 | |||
810 | ti->private = s; | ||
811 | ti->num_flush_requests = 1; | ||
812 | |||
813 | /* Add snapshot to the list of snapshots for this origin */ | ||
814 | /* Exceptions aren't triggered till snapshot_resume() is called */ | ||
815 | r = register_snapshot(s); | ||
816 | if (r == -ENOMEM) { | ||
817 | ti->error = "Snapshot origin struct allocation failed"; | ||
818 | goto bad_load_and_register; | ||
819 | } else if (r < 0) { | ||
820 | /* invalid handover, register_snapshot has set ti->error */ | ||
821 | goto bad_load_and_register; | ||
822 | } | ||
823 | |||
824 | /* | ||
825 | * Metadata must only be loaded into one table at once, so skip this | ||
826 | * if metadata will be handed over during resume. | ||
827 | * Chunk size will be set during the handover - set it to zero to | ||
828 | * ensure it's ignored. | ||
829 | */ | ||
830 | if (r > 0) { | ||
831 | s->store->chunk_size = 0; | ||
832 | return 0; | ||
833 | } | ||
834 | |||
700 | r = s->store->type->read_metadata(s->store, dm_add_exception, | 835 | r = s->store->type->read_metadata(s->store, dm_add_exception, |
701 | (void *)s); | 836 | (void *)s); |
702 | if (r < 0) { | 837 | if (r < 0) { |
703 | ti->error = "Failed to read snapshot metadata"; | 838 | ti->error = "Failed to read snapshot metadata"; |
704 | goto bad_load_and_register; | 839 | goto bad_read_metadata; |
705 | } else if (r > 0) { | 840 | } else if (r > 0) { |
706 | s->valid = 0; | 841 | s->valid = 0; |
707 | DMWARN("Snapshot is marked invalid."); | 842 | DMWARN("Snapshot is marked invalid."); |
708 | } | 843 | } |
709 | 844 | ||
710 | bio_list_init(&s->queued_bios); | ||
711 | INIT_WORK(&s->queued_bios_work, flush_queued_bios); | ||
712 | |||
713 | if (!s->store->chunk_size) { | 845 | if (!s->store->chunk_size) { |
714 | ti->error = "Chunk size not set"; | 846 | ti->error = "Chunk size not set"; |
715 | goto bad_load_and_register; | 847 | goto bad_read_metadata; |
716 | } | ||
717 | |||
718 | /* Add snapshot to the list of snapshots for this origin */ | ||
719 | /* Exceptions aren't triggered till snapshot_resume() is called */ | ||
720 | if (register_snapshot(s)) { | ||
721 | r = -EINVAL; | ||
722 | ti->error = "Cannot register snapshot origin"; | ||
723 | goto bad_load_and_register; | ||
724 | } | 848 | } |
725 | |||
726 | ti->private = s; | ||
727 | ti->split_io = s->store->chunk_size; | 849 | ti->split_io = s->store->chunk_size; |
728 | ti->num_flush_requests = 1; | ||
729 | 850 | ||
730 | return 0; | 851 | return 0; |
731 | 852 | ||
853 | bad_read_metadata: | ||
854 | unregister_snapshot(s); | ||
855 | |||
732 | bad_load_and_register: | 856 | bad_load_and_register: |
733 | mempool_destroy(s->tracked_chunk_pool); | 857 | mempool_destroy(s->tracked_chunk_pool); |
734 | 858 | ||
@@ -767,15 +891,58 @@ static void __free_exceptions(struct dm_snapshot *s) | |||
767 | dm_exception_table_exit(&s->complete, exception_cache); | 891 | dm_exception_table_exit(&s->complete, exception_cache); |
768 | } | 892 | } |
769 | 893 | ||
894 | static void __handover_exceptions(struct dm_snapshot *snap_src, | ||
895 | struct dm_snapshot *snap_dest) | ||
896 | { | ||
897 | union { | ||
898 | struct dm_exception_table table_swap; | ||
899 | struct dm_exception_store *store_swap; | ||
900 | } u; | ||
901 | |||
902 | /* | ||
903 | * Swap all snapshot context information between the two instances. | ||
904 | */ | ||
905 | u.table_swap = snap_dest->complete; | ||
906 | snap_dest->complete = snap_src->complete; | ||
907 | snap_src->complete = u.table_swap; | ||
908 | |||
909 | u.store_swap = snap_dest->store; | ||
910 | snap_dest->store = snap_src->store; | ||
911 | snap_src->store = u.store_swap; | ||
912 | |||
913 | snap_dest->store->snap = snap_dest; | ||
914 | snap_src->store->snap = snap_src; | ||
915 | |||
916 | snap_dest->ti->split_io = snap_dest->store->chunk_size; | ||
917 | snap_dest->valid = snap_src->valid; | ||
918 | |||
919 | /* | ||
920 | * Set source invalid to ensure it receives no further I/O. | ||
921 | */ | ||
922 | snap_src->valid = 0; | ||
923 | } | ||
924 | |||
770 | static void snapshot_dtr(struct dm_target *ti) | 925 | static void snapshot_dtr(struct dm_target *ti) |
771 | { | 926 | { |
772 | #ifdef CONFIG_DM_DEBUG | 927 | #ifdef CONFIG_DM_DEBUG |
773 | int i; | 928 | int i; |
774 | #endif | 929 | #endif |
775 | struct dm_snapshot *s = ti->private; | 930 | struct dm_snapshot *s = ti->private; |
931 | struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; | ||
776 | 932 | ||
777 | flush_workqueue(ksnapd); | 933 | flush_workqueue(ksnapd); |
778 | 934 | ||
935 | down_read(&_origins_lock); | ||
936 | /* Check whether exception handover must be cancelled */ | ||
937 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest); | ||
938 | if (snap_src && snap_dest && (s == snap_src)) { | ||
939 | down_write(&snap_dest->lock); | ||
940 | snap_dest->valid = 0; | ||
941 | up_write(&snap_dest->lock); | ||
942 | DMERR("Cancelling snapshot handover."); | ||
943 | } | ||
944 | up_read(&_origins_lock); | ||
945 | |||
779 | /* Prevent further origin writes from using this snapshot. */ | 946 | /* Prevent further origin writes from using this snapshot. */ |
780 | /* After this returns there can be no new kcopyd jobs. */ | 947 | /* After this returns there can be no new kcopyd jobs. */ |
781 | unregister_snapshot(s); | 948 | unregister_snapshot(s); |
@@ -1188,9 +1355,50 @@ static void snapshot_postsuspend(struct dm_target *ti) | |||
1188 | up_write(&s->lock); | 1355 | up_write(&s->lock); |
1189 | } | 1356 | } |
1190 | 1357 | ||
1358 | static int snapshot_preresume(struct dm_target *ti) | ||
1359 | { | ||
1360 | int r = 0; | ||
1361 | struct dm_snapshot *s = ti->private; | ||
1362 | struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; | ||
1363 | |||
1364 | down_read(&_origins_lock); | ||
1365 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest); | ||
1366 | if (snap_src && snap_dest) { | ||
1367 | down_read(&snap_src->lock); | ||
1368 | if (s == snap_src) { | ||
1369 | DMERR("Unable to resume snapshot source until " | ||
1370 | "handover completes."); | ||
1371 | r = -EINVAL; | ||
1372 | } else if (!snap_src->suspended) { | ||
1373 | DMERR("Unable to perform snapshot handover until " | ||
1374 | "source is suspended."); | ||
1375 | r = -EINVAL; | ||
1376 | } | ||
1377 | up_read(&snap_src->lock); | ||
1378 | } | ||
1379 | up_read(&_origins_lock); | ||
1380 | |||
1381 | return r; | ||
1382 | } | ||
1383 | |||
1191 | static void snapshot_resume(struct dm_target *ti) | 1384 | static void snapshot_resume(struct dm_target *ti) |
1192 | { | 1385 | { |
1193 | struct dm_snapshot *s = ti->private; | 1386 | struct dm_snapshot *s = ti->private; |
1387 | struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; | ||
1388 | |||
1389 | down_read(&_origins_lock); | ||
1390 | (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest); | ||
1391 | if (snap_src && snap_dest) { | ||
1392 | down_write(&snap_src->lock); | ||
1393 | down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); | ||
1394 | __handover_exceptions(snap_src, snap_dest); | ||
1395 | up_write(&snap_dest->lock); | ||
1396 | up_write(&snap_src->lock); | ||
1397 | } | ||
1398 | up_read(&_origins_lock); | ||
1399 | |||
1400 | /* Now we have correct chunk size, reregister */ | ||
1401 | reregister_snapshot(s); | ||
1194 | 1402 | ||
1195 | down_write(&s->lock); | 1403 | down_write(&s->lock); |
1196 | s->active = 1; | 1404 | s->active = 1; |
@@ -1510,6 +1718,7 @@ static struct target_type snapshot_target = { | |||
1510 | .map = snapshot_map, | 1718 | .map = snapshot_map, |
1511 | .end_io = snapshot_end_io, | 1719 | .end_io = snapshot_end_io, |
1512 | .postsuspend = snapshot_postsuspend, | 1720 | .postsuspend = snapshot_postsuspend, |
1721 | .preresume = snapshot_preresume, | ||
1513 | .resume = snapshot_resume, | 1722 | .resume = snapshot_resume, |
1514 | .status = snapshot_status, | 1723 | .status = snapshot_status, |
1515 | .iterate_devices = snapshot_iterate_devices, | 1724 | .iterate_devices = snapshot_iterate_devices, |