diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Makefile | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 56 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 108 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-path-selector.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 791 | ||||
-rw-r--r-- | drivers/md/dm-region-hash.c | 704 | ||||
-rw-r--r-- | drivers/md/dm-round-robin.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 11 | ||||
-rw-r--r-- | drivers/md/dm-snap.h | 5 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-zero.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 49 | ||||
-rw-r--r-- | drivers/md/dm.h | 9 |
19 files changed, 977 insertions, 798 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index f1ef33dfd8cf..1c615804ea76 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -34,7 +34,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | |||
34 | obj-$(CONFIG_DM_DELAY) += dm-delay.o | 34 | obj-$(CONFIG_DM_DELAY) += dm-delay.o |
35 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o | 35 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o |
36 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 36 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
37 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o | 37 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o |
38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
39 | 39 | ||
40 | quiet_cmd_unroll = UNROLL $@ | 40 | quiet_cmd_unroll = UNROLL $@ |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 682ef9e6acd3..ce26c84af064 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/page.h> | 23 | #include <asm/page.h> |
24 | #include <asm/unaligned.h> | 24 | #include <asm/unaligned.h> |
25 | 25 | ||
26 | #include "dm.h" | 26 | #include <linux/device-mapper.h> |
27 | 27 | ||
28 | #define DM_MSG_PREFIX "crypt" | 28 | #define DM_MSG_PREFIX "crypt" |
29 | #define MESG_STR(x) x, sizeof(x) | 29 | #define MESG_STR(x) x, sizeof(x) |
@@ -56,6 +56,7 @@ struct dm_crypt_io { | |||
56 | atomic_t pending; | 56 | atomic_t pending; |
57 | int error; | 57 | int error; |
58 | sector_t sector; | 58 | sector_t sector; |
59 | struct dm_crypt_io *base_io; | ||
59 | }; | 60 | }; |
60 | 61 | ||
61 | struct dm_crypt_request { | 62 | struct dm_crypt_request { |
@@ -93,7 +94,6 @@ struct crypt_config { | |||
93 | 94 | ||
94 | struct workqueue_struct *io_queue; | 95 | struct workqueue_struct *io_queue; |
95 | struct workqueue_struct *crypt_queue; | 96 | struct workqueue_struct *crypt_queue; |
96 | wait_queue_head_t writeq; | ||
97 | 97 | ||
98 | /* | 98 | /* |
99 | * crypto related data | 99 | * crypto related data |
@@ -534,6 +534,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, | |||
534 | io->base_bio = bio; | 534 | io->base_bio = bio; |
535 | io->sector = sector; | 535 | io->sector = sector; |
536 | io->error = 0; | 536 | io->error = 0; |
537 | io->base_io = NULL; | ||
537 | atomic_set(&io->pending, 0); | 538 | atomic_set(&io->pending, 0); |
538 | 539 | ||
539 | return io; | 540 | return io; |
@@ -547,6 +548,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io) | |||
547 | /* | 548 | /* |
548 | * One of the bios was finished. Check for completion of | 549 | * One of the bios was finished. Check for completion of |
549 | * the whole request and correctly clean up the buffer. | 550 | * the whole request and correctly clean up the buffer. |
551 | * If base_io is set, wait for the last fragment to complete. | ||
550 | */ | 552 | */ |
551 | static void crypt_dec_pending(struct dm_crypt_io *io) | 553 | static void crypt_dec_pending(struct dm_crypt_io *io) |
552 | { | 554 | { |
@@ -555,7 +557,14 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
555 | if (!atomic_dec_and_test(&io->pending)) | 557 | if (!atomic_dec_and_test(&io->pending)) |
556 | return; | 558 | return; |
557 | 559 | ||
558 | bio_endio(io->base_bio, io->error); | 560 | if (likely(!io->base_io)) |
561 | bio_endio(io->base_bio, io->error); | ||
562 | else { | ||
563 | if (io->error && !io->base_io->error) | ||
564 | io->base_io->error = io->error; | ||
565 | crypt_dec_pending(io->base_io); | ||
566 | } | ||
567 | |||
559 | mempool_free(io, cc->io_pool); | 568 | mempool_free(io, cc->io_pool); |
560 | } | 569 | } |
561 | 570 | ||
@@ -646,10 +655,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io) | |||
646 | static void kcryptd_io_write(struct dm_crypt_io *io) | 655 | static void kcryptd_io_write(struct dm_crypt_io *io) |
647 | { | 656 | { |
648 | struct bio *clone = io->ctx.bio_out; | 657 | struct bio *clone = io->ctx.bio_out; |
649 | struct crypt_config *cc = io->target->private; | ||
650 | |||
651 | generic_make_request(clone); | 658 | generic_make_request(clone); |
652 | wake_up(&cc->writeq); | ||
653 | } | 659 | } |
654 | 660 | ||
655 | static void kcryptd_io(struct work_struct *work) | 661 | static void kcryptd_io(struct work_struct *work) |
@@ -688,7 +694,6 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, | |||
688 | BUG_ON(io->ctx.idx_out < clone->bi_vcnt); | 694 | BUG_ON(io->ctx.idx_out < clone->bi_vcnt); |
689 | 695 | ||
690 | clone->bi_sector = cc->start + io->sector; | 696 | clone->bi_sector = cc->start + io->sector; |
691 | io->sector += bio_sectors(clone); | ||
692 | 697 | ||
693 | if (async) | 698 | if (async) |
694 | kcryptd_queue_io(io); | 699 | kcryptd_queue_io(io); |
@@ -700,16 +705,18 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
700 | { | 705 | { |
701 | struct crypt_config *cc = io->target->private; | 706 | struct crypt_config *cc = io->target->private; |
702 | struct bio *clone; | 707 | struct bio *clone; |
708 | struct dm_crypt_io *new_io; | ||
703 | int crypt_finished; | 709 | int crypt_finished; |
704 | unsigned out_of_pages = 0; | 710 | unsigned out_of_pages = 0; |
705 | unsigned remaining = io->base_bio->bi_size; | 711 | unsigned remaining = io->base_bio->bi_size; |
712 | sector_t sector = io->sector; | ||
706 | int r; | 713 | int r; |
707 | 714 | ||
708 | /* | 715 | /* |
709 | * Prevent io from disappearing until this function completes. | 716 | * Prevent io from disappearing until this function completes. |
710 | */ | 717 | */ |
711 | crypt_inc_pending(io); | 718 | crypt_inc_pending(io); |
712 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); | 719 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); |
713 | 720 | ||
714 | /* | 721 | /* |
715 | * The allocated buffers can be smaller than the whole bio, | 722 | * The allocated buffers can be smaller than the whole bio, |
@@ -726,6 +733,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
726 | io->ctx.idx_out = 0; | 733 | io->ctx.idx_out = 0; |
727 | 734 | ||
728 | remaining -= clone->bi_size; | 735 | remaining -= clone->bi_size; |
736 | sector += bio_sectors(clone); | ||
729 | 737 | ||
730 | crypt_inc_pending(io); | 738 | crypt_inc_pending(io); |
731 | r = crypt_convert(cc, &io->ctx); | 739 | r = crypt_convert(cc, &io->ctx); |
@@ -741,6 +749,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
741 | */ | 749 | */ |
742 | if (unlikely(r < 0)) | 750 | if (unlikely(r < 0)) |
743 | break; | 751 | break; |
752 | |||
753 | io->sector = sector; | ||
744 | } | 754 | } |
745 | 755 | ||
746 | /* | 756 | /* |
@@ -750,8 +760,33 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
750 | if (unlikely(out_of_pages)) | 760 | if (unlikely(out_of_pages)) |
751 | congestion_wait(WRITE, HZ/100); | 761 | congestion_wait(WRITE, HZ/100); |
752 | 762 | ||
753 | if (unlikely(remaining)) | 763 | /* |
754 | wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); | 764 | * With async crypto it is unsafe to share the crypto context |
765 | * between fragments, so switch to a new dm_crypt_io structure. | ||
766 | */ | ||
767 | if (unlikely(!crypt_finished && remaining)) { | ||
768 | new_io = crypt_io_alloc(io->target, io->base_bio, | ||
769 | sector); | ||
770 | crypt_inc_pending(new_io); | ||
771 | crypt_convert_init(cc, &new_io->ctx, NULL, | ||
772 | io->base_bio, sector); | ||
773 | new_io->ctx.idx_in = io->ctx.idx_in; | ||
774 | new_io->ctx.offset_in = io->ctx.offset_in; | ||
775 | |||
776 | /* | ||
777 | * Fragments after the first use the base_io | ||
778 | * pending count. | ||
779 | */ | ||
780 | if (!io->base_io) | ||
781 | new_io->base_io = io; | ||
782 | else { | ||
783 | new_io->base_io = io->base_io; | ||
784 | crypt_inc_pending(io->base_io); | ||
785 | crypt_dec_pending(io); | ||
786 | } | ||
787 | |||
788 | io = new_io; | ||
789 | } | ||
755 | } | 790 | } |
756 | 791 | ||
757 | crypt_dec_pending(io); | 792 | crypt_dec_pending(io); |
@@ -1078,7 +1113,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1078 | goto bad_crypt_queue; | 1113 | goto bad_crypt_queue; |
1079 | } | 1114 | } |
1080 | 1115 | ||
1081 | init_waitqueue_head(&cc->writeq); | ||
1082 | ti->private = cc; | 1116 | ti->private = cc; |
1083 | return 0; | 1117 | return 0; |
1084 | 1118 | ||
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index bdd37f881c42..848b381f1173 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -13,7 +13,8 @@ | |||
13 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | 15 | ||
16 | #include "dm.h" | 16 | #include <linux/device-mapper.h> |
17 | |||
17 | #include "dm-bio-list.h" | 18 | #include "dm-bio-list.h" |
18 | 19 | ||
19 | #define DM_MSG_PREFIX "delay" | 20 | #define DM_MSG_PREFIX "delay" |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 769ab677f8e0..01590f3e0009 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -7,7 +7,6 @@ | |||
7 | * This file is released under the GPL. | 7 | * This file is released under the GPL. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include "dm.h" | ||
11 | #include "dm-snap.h" | 10 | #include "dm-snap.h" |
12 | 11 | ||
13 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
@@ -105,6 +104,11 @@ struct pstore { | |||
105 | void *area; | 104 | void *area; |
106 | 105 | ||
107 | /* | 106 | /* |
107 | * An area of zeros used to clear the next area. | ||
108 | */ | ||
109 | void *zero_area; | ||
110 | |||
111 | /* | ||
108 | * Used to keep track of which metadata area the data in | 112 | * Used to keep track of which metadata area the data in |
109 | * 'chunk' refers to. | 113 | * 'chunk' refers to. |
110 | */ | 114 | */ |
@@ -149,6 +153,13 @@ static int alloc_area(struct pstore *ps) | |||
149 | if (!ps->area) | 153 | if (!ps->area) |
150 | return r; | 154 | return r; |
151 | 155 | ||
156 | ps->zero_area = vmalloc(len); | ||
157 | if (!ps->zero_area) { | ||
158 | vfree(ps->area); | ||
159 | return r; | ||
160 | } | ||
161 | memset(ps->zero_area, 0, len); | ||
162 | |||
152 | return 0; | 163 | return 0; |
153 | } | 164 | } |
154 | 165 | ||
@@ -156,6 +167,8 @@ static void free_area(struct pstore *ps) | |||
156 | { | 167 | { |
157 | vfree(ps->area); | 168 | vfree(ps->area); |
158 | ps->area = NULL; | 169 | ps->area = NULL; |
170 | vfree(ps->zero_area); | ||
171 | ps->zero_area = NULL; | ||
159 | } | 172 | } |
160 | 173 | ||
161 | struct mdata_req { | 174 | struct mdata_req { |
@@ -220,25 +233,41 @@ static chunk_t area_location(struct pstore *ps, chunk_t area) | |||
220 | * Read or write a metadata area. Remembering to skip the first | 233 | * Read or write a metadata area. Remembering to skip the first |
221 | * chunk which holds the header. | 234 | * chunk which holds the header. |
222 | */ | 235 | */ |
223 | static int area_io(struct pstore *ps, chunk_t area, int rw) | 236 | static int area_io(struct pstore *ps, int rw) |
224 | { | 237 | { |
225 | int r; | 238 | int r; |
226 | chunk_t chunk; | 239 | chunk_t chunk; |
227 | 240 | ||
228 | chunk = area_location(ps, area); | 241 | chunk = area_location(ps, ps->current_area); |
229 | 242 | ||
230 | r = chunk_io(ps, chunk, rw, 0); | 243 | r = chunk_io(ps, chunk, rw, 0); |
231 | if (r) | 244 | if (r) |
232 | return r; | 245 | return r; |
233 | 246 | ||
234 | ps->current_area = area; | ||
235 | return 0; | 247 | return 0; |
236 | } | 248 | } |
237 | 249 | ||
238 | static int zero_area(struct pstore *ps, chunk_t area) | 250 | static void zero_memory_area(struct pstore *ps) |
239 | { | 251 | { |
240 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | 252 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); |
241 | return area_io(ps, area, WRITE); | 253 | } |
254 | |||
255 | static int zero_disk_area(struct pstore *ps, chunk_t area) | ||
256 | { | ||
257 | struct dm_io_region where = { | ||
258 | .bdev = ps->snap->cow->bdev, | ||
259 | .sector = ps->snap->chunk_size * area_location(ps, area), | ||
260 | .count = ps->snap->chunk_size, | ||
261 | }; | ||
262 | struct dm_io_request io_req = { | ||
263 | .bi_rw = WRITE, | ||
264 | .mem.type = DM_IO_VMA, | ||
265 | .mem.ptr.vma = ps->zero_area, | ||
266 | .client = ps->io_client, | ||
267 | .notify.fn = NULL, | ||
268 | }; | ||
269 | |||
270 | return dm_io(&io_req, 1, &where, NULL); | ||
242 | } | 271 | } |
243 | 272 | ||
244 | static int read_header(struct pstore *ps, int *new_snapshot) | 273 | static int read_header(struct pstore *ps, int *new_snapshot) |
@@ -411,15 +440,14 @@ static int insert_exceptions(struct pstore *ps, int *full) | |||
411 | 440 | ||
412 | static int read_exceptions(struct pstore *ps) | 441 | static int read_exceptions(struct pstore *ps) |
413 | { | 442 | { |
414 | chunk_t area; | ||
415 | int r, full = 1; | 443 | int r, full = 1; |
416 | 444 | ||
417 | /* | 445 | /* |
418 | * Keeping reading chunks and inserting exceptions until | 446 | * Keeping reading chunks and inserting exceptions until |
419 | * we find a partially full area. | 447 | * we find a partially full area. |
420 | */ | 448 | */ |
421 | for (area = 0; full; area++) { | 449 | for (ps->current_area = 0; full; ps->current_area++) { |
422 | r = area_io(ps, area, READ); | 450 | r = area_io(ps, READ); |
423 | if (r) | 451 | if (r) |
424 | return r; | 452 | return r; |
425 | 453 | ||
@@ -428,6 +456,8 @@ static int read_exceptions(struct pstore *ps) | |||
428 | return r; | 456 | return r; |
429 | } | 457 | } |
430 | 458 | ||
459 | ps->current_area--; | ||
460 | |||
431 | return 0; | 461 | return 0; |
432 | } | 462 | } |
433 | 463 | ||
@@ -486,12 +516,13 @@ static int persistent_read_metadata(struct exception_store *store) | |||
486 | return r; | 516 | return r; |
487 | } | 517 | } |
488 | 518 | ||
489 | r = zero_area(ps, 0); | 519 | ps->current_area = 0; |
520 | zero_memory_area(ps); | ||
521 | r = zero_disk_area(ps, 0); | ||
490 | if (r) { | 522 | if (r) { |
491 | DMWARN("zero_area(0) failed"); | 523 | DMWARN("zero_disk_area(0) failed"); |
492 | return r; | 524 | return r; |
493 | } | 525 | } |
494 | |||
495 | } else { | 526 | } else { |
496 | /* | 527 | /* |
497 | * Sanity checks. | 528 | * Sanity checks. |
@@ -551,7 +582,6 @@ static void persistent_commit(struct exception_store *store, | |||
551 | void (*callback) (void *, int success), | 582 | void (*callback) (void *, int success), |
552 | void *callback_context) | 583 | void *callback_context) |
553 | { | 584 | { |
554 | int r; | ||
555 | unsigned int i; | 585 | unsigned int i; |
556 | struct pstore *ps = get_info(store); | 586 | struct pstore *ps = get_info(store); |
557 | struct disk_exception de; | 587 | struct disk_exception de; |
@@ -572,33 +602,41 @@ static void persistent_commit(struct exception_store *store, | |||
572 | cb->context = callback_context; | 602 | cb->context = callback_context; |
573 | 603 | ||
574 | /* | 604 | /* |
575 | * If there are no more exceptions in flight, or we have | 605 | * If there are exceptions in flight and we have not yet |
576 | * filled this metadata area we commit the exceptions to | 606 | * filled this metadata area there's nothing more to do. |
577 | * disk. | ||
578 | */ | 607 | */ |
579 | if (atomic_dec_and_test(&ps->pending_count) || | 608 | if (!atomic_dec_and_test(&ps->pending_count) && |
580 | (ps->current_committed == ps->exceptions_per_area)) { | 609 | (ps->current_committed != ps->exceptions_per_area)) |
581 | r = area_io(ps, ps->current_area, WRITE); | 610 | return; |
582 | if (r) | ||
583 | ps->valid = 0; | ||
584 | 611 | ||
585 | /* | 612 | /* |
586 | * Have we completely filled the current area ? | 613 | * If we completely filled the current area, then wipe the next one. |
587 | */ | 614 | */ |
588 | if (ps->current_committed == ps->exceptions_per_area) { | 615 | if ((ps->current_committed == ps->exceptions_per_area) && |
589 | ps->current_committed = 0; | 616 | zero_disk_area(ps, ps->current_area + 1)) |
590 | r = zero_area(ps, ps->current_area + 1); | 617 | ps->valid = 0; |
591 | if (r) | ||
592 | ps->valid = 0; | ||
593 | } | ||
594 | 618 | ||
595 | for (i = 0; i < ps->callback_count; i++) { | 619 | /* |
596 | cb = ps->callbacks + i; | 620 | * Commit exceptions to disk. |
597 | cb->callback(cb->context, r == 0 ? 1 : 0); | 621 | */ |
598 | } | 622 | if (ps->valid && area_io(ps, WRITE)) |
623 | ps->valid = 0; | ||
599 | 624 | ||
600 | ps->callback_count = 0; | 625 | /* |
626 | * Advance to the next area if this one is full. | ||
627 | */ | ||
628 | if (ps->current_committed == ps->exceptions_per_area) { | ||
629 | ps->current_committed = 0; | ||
630 | ps->current_area++; | ||
631 | zero_memory_area(ps); | ||
601 | } | 632 | } |
633 | |||
634 | for (i = 0; i < ps->callback_count; i++) { | ||
635 | cb = ps->callbacks + i; | ||
636 | cb->callback(cb->context, ps->valid); | ||
637 | } | ||
638 | |||
639 | ps->callback_count = 0; | ||
602 | } | 640 | } |
603 | 641 | ||
604 | static void persistent_drop(struct exception_store *store) | 642 | static void persistent_drop(struct exception_store *store) |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 4789c42d9a3a..2fd6d4450637 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | 8 | #include <linux/device-mapper.h> |
9 | 9 | ||
10 | #include <linux/bio.h> | 10 | #include <linux/bio.h> |
11 | #include <linux/mempool.h> | 11 | #include <linux/mempool.h> |
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 996802b8a452..3073618269ea 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/workqueue.h> | 23 | #include <linux/workqueue.h> |
24 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
25 | #include <linux/device-mapper.h> | ||
25 | #include <linux/dm-kcopyd.h> | 26 | #include <linux/dm-kcopyd.h> |
26 | 27 | ||
27 | #include "dm.h" | 28 | #include "dm.h" |
@@ -268,6 +269,17 @@ static void push(struct list_head *jobs, struct kcopyd_job *job) | |||
268 | spin_unlock_irqrestore(&kc->job_lock, flags); | 269 | spin_unlock_irqrestore(&kc->job_lock, flags); |
269 | } | 270 | } |
270 | 271 | ||
272 | |||
273 | static void push_head(struct list_head *jobs, struct kcopyd_job *job) | ||
274 | { | ||
275 | unsigned long flags; | ||
276 | struct dm_kcopyd_client *kc = job->kc; | ||
277 | |||
278 | spin_lock_irqsave(&kc->job_lock, flags); | ||
279 | list_add(&job->list, jobs); | ||
280 | spin_unlock_irqrestore(&kc->job_lock, flags); | ||
281 | } | ||
282 | |||
271 | /* | 283 | /* |
272 | * These three functions process 1 item from the corresponding | 284 | * These three functions process 1 item from the corresponding |
273 | * job list. | 285 | * job list. |
@@ -398,7 +410,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, | |||
398 | * We couldn't service this job ATM, so | 410 | * We couldn't service this job ATM, so |
399 | * push this job back onto the list. | 411 | * push this job back onto the list. |
400 | */ | 412 | */ |
401 | push(jobs, job); | 413 | push_head(jobs, job); |
402 | break; | 414 | break; |
403 | } | 415 | } |
404 | 416 | ||
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 6449bcdf84ca..1b29e9136758 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -5,12 +5,12 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm.h" | 7 | #include "dm.h" |
8 | |||
9 | #include <linux/module.h> | 8 | #include <linux/module.h> |
10 | #include <linux/init.h> | 9 | #include <linux/init.h> |
11 | #include <linux/blkdev.h> | 10 | #include <linux/blkdev.h> |
12 | #include <linux/bio.h> | 11 | #include <linux/bio.h> |
13 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/device-mapper.h> | ||
14 | 14 | ||
15 | #define DM_MSG_PREFIX "linear" | 15 | #define DM_MSG_PREFIX "linear" |
16 | 16 | ||
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5b48478c79f5..a8c0fc79ca78 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/dm-io.h> | 12 | #include <linux/dm-io.h> |
13 | #include <linux/dm-dirty-log.h> | 13 | #include <linux/dm-dirty-log.h> |
14 | 14 | ||
15 | #include "dm.h" | 15 | #include <linux/device-mapper.h> |
16 | 16 | ||
17 | #define DM_MSG_PREFIX "dirty region log" | 17 | #define DM_MSG_PREFIX "dirty region log" |
18 | 18 | ||
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 9bf3460c5540..abf6e8cfaedb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -5,7 +5,8 @@ | |||
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | 8 | #include <linux/device-mapper.h> |
9 | |||
9 | #include "dm-path-selector.h" | 10 | #include "dm-path-selector.h" |
10 | #include "dm-bio-list.h" | 11 | #include "dm-bio-list.h" |
11 | #include "dm-bio-record.h" | 12 | #include "dm-bio-record.h" |
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index ca1bb636a3e4..96ea226155b1 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c | |||
@@ -9,7 +9,8 @@ | |||
9 | * Path selector registration. | 9 | * Path selector registration. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include "dm.h" | 12 | #include <linux/device-mapper.h> |
13 | |||
13 | #include "dm-path-selector.h" | 14 | #include "dm-path-selector.h" |
14 | 15 | ||
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 29913e42c4ab..92dcc06832a4 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -1,30 +1,30 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2003 Sistina Software Limited. | 2 | * Copyright (C) 2003 Sistina Software Limited. |
3 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. | ||
3 | * | 4 | * |
4 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
5 | */ | 6 | */ |
6 | 7 | ||
7 | #include "dm.h" | ||
8 | #include "dm-bio-list.h" | 8 | #include "dm-bio-list.h" |
9 | #include "dm-bio-record.h" | 9 | #include "dm-bio-record.h" |
10 | 10 | ||
11 | #include <linux/ctype.h> | ||
12 | #include <linux/init.h> | 11 | #include <linux/init.h> |
13 | #include <linux/mempool.h> | 12 | #include <linux/mempool.h> |
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
17 | #include <linux/time.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | #include <linux/workqueue.h> | 16 | #include <linux/workqueue.h> |
20 | #include <linux/log2.h> | 17 | #include <linux/device-mapper.h> |
21 | #include <linux/hardirq.h> | ||
22 | #include <linux/dm-io.h> | 18 | #include <linux/dm-io.h> |
23 | #include <linux/dm-dirty-log.h> | 19 | #include <linux/dm-dirty-log.h> |
24 | #include <linux/dm-kcopyd.h> | 20 | #include <linux/dm-kcopyd.h> |
21 | #include <linux/dm-region-hash.h> | ||
25 | 22 | ||
26 | #define DM_MSG_PREFIX "raid1" | 23 | #define DM_MSG_PREFIX "raid1" |
24 | |||
25 | #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ | ||
27 | #define DM_IO_PAGES 64 | 26 | #define DM_IO_PAGES 64 |
27 | #define DM_KCOPYD_PAGES 64 | ||
28 | 28 | ||
29 | #define DM_RAID1_HANDLE_ERRORS 0x01 | 29 | #define DM_RAID1_HANDLE_ERRORS 0x01 |
30 | #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) | 30 | #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) |
@@ -32,87 +32,6 @@ | |||
32 | static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); | 32 | static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); |
33 | 33 | ||
34 | /*----------------------------------------------------------------- | 34 | /*----------------------------------------------------------------- |
35 | * Region hash | ||
36 | * | ||
37 | * The mirror splits itself up into discrete regions. Each | ||
38 | * region can be in one of three states: clean, dirty, | ||
39 | * nosync. There is no need to put clean regions in the hash. | ||
40 | * | ||
41 | * In addition to being present in the hash table a region _may_ | ||
42 | * be present on one of three lists. | ||
43 | * | ||
44 | * clean_regions: Regions on this list have no io pending to | ||
45 | * them, they are in sync, we are no longer interested in them, | ||
46 | * they are dull. rh_update_states() will remove them from the | ||
47 | * hash table. | ||
48 | * | ||
49 | * quiesced_regions: These regions have been spun down, ready | ||
50 | * for recovery. rh_recovery_start() will remove regions from | ||
51 | * this list and hand them to kmirrord, which will schedule the | ||
52 | * recovery io with kcopyd. | ||
53 | * | ||
54 | * recovered_regions: Regions that kcopyd has successfully | ||
55 | * recovered. rh_update_states() will now schedule any delayed | ||
56 | * io, up the recovery_count, and remove the region from the | ||
57 | * hash. | ||
58 | * | ||
59 | * There are 2 locks: | ||
60 | * A rw spin lock 'hash_lock' protects just the hash table, | ||
61 | * this is never held in write mode from interrupt context, | ||
62 | * which I believe means that we only have to disable irqs when | ||
63 | * doing a write lock. | ||
64 | * | ||
65 | * An ordinary spin lock 'region_lock' that protects the three | ||
66 | * lists in the region_hash, with the 'state', 'list' and | ||
67 | * 'bhs_delayed' fields of the regions. This is used from irq | ||
68 | * context, so all other uses will have to suspend local irqs. | ||
69 | *---------------------------------------------------------------*/ | ||
70 | struct mirror_set; | ||
71 | struct region_hash { | ||
72 | struct mirror_set *ms; | ||
73 | uint32_t region_size; | ||
74 | unsigned region_shift; | ||
75 | |||
76 | /* holds persistent region state */ | ||
77 | struct dm_dirty_log *log; | ||
78 | |||
79 | /* hash table */ | ||
80 | rwlock_t hash_lock; | ||
81 | mempool_t *region_pool; | ||
82 | unsigned int mask; | ||
83 | unsigned int nr_buckets; | ||
84 | struct list_head *buckets; | ||
85 | |||
86 | spinlock_t region_lock; | ||
87 | atomic_t recovery_in_flight; | ||
88 | struct semaphore recovery_count; | ||
89 | struct list_head clean_regions; | ||
90 | struct list_head quiesced_regions; | ||
91 | struct list_head recovered_regions; | ||
92 | struct list_head failed_recovered_regions; | ||
93 | }; | ||
94 | |||
95 | enum { | ||
96 | RH_CLEAN, | ||
97 | RH_DIRTY, | ||
98 | RH_NOSYNC, | ||
99 | RH_RECOVERING | ||
100 | }; | ||
101 | |||
102 | struct region { | ||
103 | struct region_hash *rh; /* FIXME: can we get rid of this ? */ | ||
104 | region_t key; | ||
105 | int state; | ||
106 | |||
107 | struct list_head hash_list; | ||
108 | struct list_head list; | ||
109 | |||
110 | atomic_t pending; | ||
111 | struct bio_list delayed_bios; | ||
112 | }; | ||
113 | |||
114 | |||
115 | /*----------------------------------------------------------------- | ||
116 | * Mirror set structures. | 35 | * Mirror set structures. |
117 | *---------------------------------------------------------------*/ | 36 | *---------------------------------------------------------------*/ |
118 | enum dm_raid1_error { | 37 | enum dm_raid1_error { |
@@ -132,8 +51,7 @@ struct mirror { | |||
132 | struct mirror_set { | 51 | struct mirror_set { |
133 | struct dm_target *ti; | 52 | struct dm_target *ti; |
134 | struct list_head list; | 53 | struct list_head list; |
135 | struct region_hash rh; | 54 | |
136 | struct dm_kcopyd_client *kcopyd_client; | ||
137 | uint64_t features; | 55 | uint64_t features; |
138 | 56 | ||
139 | spinlock_t lock; /* protects the lists */ | 57 | spinlock_t lock; /* protects the lists */ |
@@ -141,6 +59,8 @@ struct mirror_set { | |||
141 | struct bio_list writes; | 59 | struct bio_list writes; |
142 | struct bio_list failures; | 60 | struct bio_list failures; |
143 | 61 | ||
62 | struct dm_region_hash *rh; | ||
63 | struct dm_kcopyd_client *kcopyd_client; | ||
144 | struct dm_io_client *io_client; | 64 | struct dm_io_client *io_client; |
145 | mempool_t *read_record_pool; | 65 | mempool_t *read_record_pool; |
146 | 66 | ||
@@ -159,25 +79,14 @@ struct mirror_set { | |||
159 | 79 | ||
160 | struct work_struct trigger_event; | 80 | struct work_struct trigger_event; |
161 | 81 | ||
162 | unsigned int nr_mirrors; | 82 | unsigned nr_mirrors; |
163 | struct mirror mirror[0]; | 83 | struct mirror mirror[0]; |
164 | }; | 84 | }; |
165 | 85 | ||
166 | /* | 86 | static void wakeup_mirrord(void *context) |
167 | * Conversion fns | ||
168 | */ | ||
169 | static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio) | ||
170 | { | ||
171 | return (bio->bi_sector - rh->ms->ti->begin) >> rh->region_shift; | ||
172 | } | ||
173 | |||
174 | static inline sector_t region_to_sector(struct region_hash *rh, region_t region) | ||
175 | { | 87 | { |
176 | return region << rh->region_shift; | 88 | struct mirror_set *ms = context; |
177 | } | ||
178 | 89 | ||
179 | static void wake(struct mirror_set *ms) | ||
180 | { | ||
181 | queue_work(ms->kmirrord_wq, &ms->kmirrord_work); | 90 | queue_work(ms->kmirrord_wq, &ms->kmirrord_work); |
182 | } | 91 | } |
183 | 92 | ||
@@ -186,7 +95,7 @@ static void delayed_wake_fn(unsigned long data) | |||
186 | struct mirror_set *ms = (struct mirror_set *) data; | 95 | struct mirror_set *ms = (struct mirror_set *) data; |
187 | 96 | ||
188 | clear_bit(0, &ms->timer_pending); | 97 | clear_bit(0, &ms->timer_pending); |
189 | wake(ms); | 98 | wakeup_mirrord(ms); |
190 | } | 99 | } |
191 | 100 | ||
192 | static void delayed_wake(struct mirror_set *ms) | 101 | static void delayed_wake(struct mirror_set *ms) |
@@ -200,473 +109,34 @@ static void delayed_wake(struct mirror_set *ms) | |||
200 | add_timer(&ms->timer); | 109 | add_timer(&ms->timer); |
201 | } | 110 | } |
202 | 111 | ||
203 | /* FIXME move this */ | 112 | static void wakeup_all_recovery_waiters(void *context) |
204 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); | ||
205 | |||
206 | #define MIN_REGIONS 64 | ||
207 | #define MAX_RECOVERY 1 | ||
208 | static int rh_init(struct region_hash *rh, struct mirror_set *ms, | ||
209 | struct dm_dirty_log *log, uint32_t region_size, | ||
210 | region_t nr_regions) | ||
211 | { | 113 | { |
212 | unsigned int nr_buckets, max_buckets; | 114 | wake_up_all(&_kmirrord_recovery_stopped); |
213 | size_t i; | ||
214 | |||
215 | /* | ||
216 | * Calculate a suitable number of buckets for our hash | ||
217 | * table. | ||
218 | */ | ||
219 | max_buckets = nr_regions >> 6; | ||
220 | for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) | ||
221 | ; | ||
222 | nr_buckets >>= 1; | ||
223 | |||
224 | rh->ms = ms; | ||
225 | rh->log = log; | ||
226 | rh->region_size = region_size; | ||
227 | rh->region_shift = ffs(region_size) - 1; | ||
228 | rwlock_init(&rh->hash_lock); | ||
229 | rh->mask = nr_buckets - 1; | ||
230 | rh->nr_buckets = nr_buckets; | ||
231 | |||
232 | rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); | ||
233 | if (!rh->buckets) { | ||
234 | DMERR("unable to allocate region hash memory"); | ||
235 | return -ENOMEM; | ||
236 | } | ||
237 | |||
238 | for (i = 0; i < nr_buckets; i++) | ||
239 | INIT_LIST_HEAD(rh->buckets + i); | ||
240 | |||
241 | spin_lock_init(&rh->region_lock); | ||
242 | sema_init(&rh->recovery_count, 0); | ||
243 | atomic_set(&rh->recovery_in_flight, 0); | ||
244 | INIT_LIST_HEAD(&rh->clean_regions); | ||
245 | INIT_LIST_HEAD(&rh->quiesced_regions); | ||
246 | INIT_LIST_HEAD(&rh->recovered_regions); | ||
247 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | ||
248 | |||
249 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | ||
250 | sizeof(struct region)); | ||
251 | if (!rh->region_pool) { | ||
252 | vfree(rh->buckets); | ||
253 | rh->buckets = NULL; | ||
254 | return -ENOMEM; | ||
255 | } | ||
256 | |||
257 | return 0; | ||
258 | } | 115 | } |
259 | 116 | ||
260 | static void rh_exit(struct region_hash *rh) | 117 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) |
261 | { | ||
262 | unsigned int h; | ||
263 | struct region *reg, *nreg; | ||
264 | |||
265 | BUG_ON(!list_empty(&rh->quiesced_regions)); | ||
266 | for (h = 0; h < rh->nr_buckets; h++) { | ||
267 | list_for_each_entry_safe(reg, nreg, rh->buckets + h, hash_list) { | ||
268 | BUG_ON(atomic_read(®->pending)); | ||
269 | mempool_free(reg, rh->region_pool); | ||
270 | } | ||
271 | } | ||
272 | |||
273 | if (rh->log) | ||
274 | dm_dirty_log_destroy(rh->log); | ||
275 | if (rh->region_pool) | ||
276 | mempool_destroy(rh->region_pool); | ||
277 | vfree(rh->buckets); | ||
278 | } | ||
279 | |||
280 | #define RH_HASH_MULT 2654435387U | ||
281 | |||
282 | static inline unsigned int rh_hash(struct region_hash *rh, region_t region) | ||
283 | { | ||
284 | return (unsigned int) ((region * RH_HASH_MULT) >> 12) & rh->mask; | ||
285 | } | ||
286 | |||
287 | static struct region *__rh_lookup(struct region_hash *rh, region_t region) | ||
288 | { | ||
289 | struct region *reg; | ||
290 | |||
291 | list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list) | ||
292 | if (reg->key == region) | ||
293 | return reg; | ||
294 | |||
295 | return NULL; | ||
296 | } | ||
297 | |||
298 | static void __rh_insert(struct region_hash *rh, struct region *reg) | ||
299 | { | ||
300 | unsigned int h = rh_hash(rh, reg->key); | ||
301 | list_add(®->hash_list, rh->buckets + h); | ||
302 | } | ||
303 | |||
304 | static struct region *__rh_alloc(struct region_hash *rh, region_t region) | ||
305 | { | ||
306 | struct region *reg, *nreg; | ||
307 | |||
308 | read_unlock(&rh->hash_lock); | ||
309 | nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); | ||
310 | if (unlikely(!nreg)) | ||
311 | nreg = kmalloc(sizeof(struct region), GFP_NOIO); | ||
312 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? | ||
313 | RH_CLEAN : RH_NOSYNC; | ||
314 | nreg->rh = rh; | ||
315 | nreg->key = region; | ||
316 | |||
317 | INIT_LIST_HEAD(&nreg->list); | ||
318 | |||
319 | atomic_set(&nreg->pending, 0); | ||
320 | bio_list_init(&nreg->delayed_bios); | ||
321 | write_lock_irq(&rh->hash_lock); | ||
322 | |||
323 | reg = __rh_lookup(rh, region); | ||
324 | if (reg) | ||
325 | /* we lost the race */ | ||
326 | mempool_free(nreg, rh->region_pool); | ||
327 | |||
328 | else { | ||
329 | __rh_insert(rh, nreg); | ||
330 | if (nreg->state == RH_CLEAN) { | ||
331 | spin_lock(&rh->region_lock); | ||
332 | list_add(&nreg->list, &rh->clean_regions); | ||
333 | spin_unlock(&rh->region_lock); | ||
334 | } | ||
335 | reg = nreg; | ||
336 | } | ||
337 | write_unlock_irq(&rh->hash_lock); | ||
338 | read_lock(&rh->hash_lock); | ||
339 | |||
340 | return reg; | ||
341 | } | ||
342 | |||
343 | static inline struct region *__rh_find(struct region_hash *rh, region_t region) | ||
344 | { | ||
345 | struct region *reg; | ||
346 | |||
347 | reg = __rh_lookup(rh, region); | ||
348 | if (!reg) | ||
349 | reg = __rh_alloc(rh, region); | ||
350 | |||
351 | return reg; | ||
352 | } | ||
353 | |||
354 | static int rh_state(struct region_hash *rh, region_t region, int may_block) | ||
355 | { | ||
356 | int r; | ||
357 | struct region *reg; | ||
358 | |||
359 | read_lock(&rh->hash_lock); | ||
360 | reg = __rh_lookup(rh, region); | ||
361 | read_unlock(&rh->hash_lock); | ||
362 | |||
363 | if (reg) | ||
364 | return reg->state; | ||
365 | |||
366 | /* | ||
367 | * The region wasn't in the hash, so we fall back to the | ||
368 | * dirty log. | ||
369 | */ | ||
370 | r = rh->log->type->in_sync(rh->log, region, may_block); | ||
371 | |||
372 | /* | ||
373 | * Any error from the dirty log (eg. -EWOULDBLOCK) gets | ||
374 | * taken as a RH_NOSYNC | ||
375 | */ | ||
376 | return r == 1 ? RH_CLEAN : RH_NOSYNC; | ||
377 | } | ||
378 | |||
379 | static inline int rh_in_sync(struct region_hash *rh, | ||
380 | region_t region, int may_block) | ||
381 | { | ||
382 | int state = rh_state(rh, region, may_block); | ||
383 | return state == RH_CLEAN || state == RH_DIRTY; | ||
384 | } | ||
385 | |||
386 | static void dispatch_bios(struct mirror_set *ms, struct bio_list *bio_list) | ||
387 | { | ||
388 | struct bio *bio; | ||
389 | |||
390 | while ((bio = bio_list_pop(bio_list))) { | ||
391 | queue_bio(ms, bio, WRITE); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | static void complete_resync_work(struct region *reg, int success) | ||
396 | { | ||
397 | struct region_hash *rh = reg->rh; | ||
398 | |||
399 | rh->log->type->set_region_sync(rh->log, reg->key, success); | ||
400 | |||
401 | /* | ||
402 | * Dispatch the bios before we call 'wake_up_all'. | ||
403 | * This is important because if we are suspending, | ||
404 | * we want to know that recovery is complete and | ||
405 | * the work queue is flushed. If we wake_up_all | ||
406 | * before we dispatch_bios (queue bios and call wake()), | ||
407 | * then we risk suspending before the work queue | ||
408 | * has been properly flushed. | ||
409 | */ | ||
410 | dispatch_bios(rh->ms, ®->delayed_bios); | ||
411 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | ||
412 | wake_up_all(&_kmirrord_recovery_stopped); | ||
413 | up(&rh->recovery_count); | ||
414 | } | ||
415 | |||
416 | static void rh_update_states(struct region_hash *rh) | ||
417 | { | ||
418 | struct region *reg, *next; | ||
419 | |||
420 | LIST_HEAD(clean); | ||
421 | LIST_HEAD(recovered); | ||
422 | LIST_HEAD(failed_recovered); | ||
423 | |||
424 | /* | ||
425 | * Quickly grab the lists. | ||
426 | */ | ||
427 | write_lock_irq(&rh->hash_lock); | ||
428 | spin_lock(&rh->region_lock); | ||
429 | if (!list_empty(&rh->clean_regions)) { | ||
430 | list_splice_init(&rh->clean_regions, &clean); | ||
431 | |||
432 | list_for_each_entry(reg, &clean, list) | ||
433 | list_del(®->hash_list); | ||
434 | } | ||
435 | |||
436 | if (!list_empty(&rh->recovered_regions)) { | ||
437 | list_splice_init(&rh->recovered_regions, &recovered); | ||
438 | |||
439 | list_for_each_entry (reg, &recovered, list) | ||
440 | list_del(®->hash_list); | ||
441 | } | ||
442 | |||
443 | if (!list_empty(&rh->failed_recovered_regions)) { | ||
444 | list_splice_init(&rh->failed_recovered_regions, | ||
445 | &failed_recovered); | ||
446 | |||
447 | list_for_each_entry(reg, &failed_recovered, list) | ||
448 | list_del(®->hash_list); | ||
449 | } | ||
450 | |||
451 | spin_unlock(&rh->region_lock); | ||
452 | write_unlock_irq(&rh->hash_lock); | ||
453 | |||
454 | /* | ||
455 | * All the regions on the recovered and clean lists have | ||
456 | * now been pulled out of the system, so no need to do | ||
457 | * any more locking. | ||
458 | */ | ||
459 | list_for_each_entry_safe (reg, next, &recovered, list) { | ||
460 | rh->log->type->clear_region(rh->log, reg->key); | ||
461 | complete_resync_work(reg, 1); | ||
462 | mempool_free(reg, rh->region_pool); | ||
463 | } | ||
464 | |||
465 | list_for_each_entry_safe(reg, next, &failed_recovered, list) { | ||
466 | complete_resync_work(reg, errors_handled(rh->ms) ? 0 : 1); | ||
467 | mempool_free(reg, rh->region_pool); | ||
468 | } | ||
469 | |||
470 | list_for_each_entry_safe(reg, next, &clean, list) { | ||
471 | rh->log->type->clear_region(rh->log, reg->key); | ||
472 | mempool_free(reg, rh->region_pool); | ||
473 | } | ||
474 | |||
475 | rh->log->type->flush(rh->log); | ||
476 | } | ||
477 | |||
478 | static void rh_inc(struct region_hash *rh, region_t region) | ||
479 | { | ||
480 | struct region *reg; | ||
481 | |||
482 | read_lock(&rh->hash_lock); | ||
483 | reg = __rh_find(rh, region); | ||
484 | |||
485 | spin_lock_irq(&rh->region_lock); | ||
486 | atomic_inc(®->pending); | ||
487 | |||
488 | if (reg->state == RH_CLEAN) { | ||
489 | reg->state = RH_DIRTY; | ||
490 | list_del_init(®->list); /* take off the clean list */ | ||
491 | spin_unlock_irq(&rh->region_lock); | ||
492 | |||
493 | rh->log->type->mark_region(rh->log, reg->key); | ||
494 | } else | ||
495 | spin_unlock_irq(&rh->region_lock); | ||
496 | |||
497 | |||
498 | read_unlock(&rh->hash_lock); | ||
499 | } | ||
500 | |||
501 | static void rh_inc_pending(struct region_hash *rh, struct bio_list *bios) | ||
502 | { | ||
503 | struct bio *bio; | ||
504 | |||
505 | for (bio = bios->head; bio; bio = bio->bi_next) | ||
506 | rh_inc(rh, bio_to_region(rh, bio)); | ||
507 | } | ||
508 | |||
509 | static void rh_dec(struct region_hash *rh, region_t region) | ||
510 | { | 118 | { |
511 | unsigned long flags; | 119 | unsigned long flags; |
512 | struct region *reg; | ||
513 | int should_wake = 0; | 120 | int should_wake = 0; |
121 | struct bio_list *bl; | ||
514 | 122 | ||
515 | read_lock(&rh->hash_lock); | 123 | bl = (rw == WRITE) ? &ms->writes : &ms->reads; |
516 | reg = __rh_lookup(rh, region); | 124 | spin_lock_irqsave(&ms->lock, flags); |
517 | read_unlock(&rh->hash_lock); | 125 | should_wake = !(bl->head); |
518 | 126 | bio_list_add(bl, bio); | |
519 | spin_lock_irqsave(&rh->region_lock, flags); | 127 | spin_unlock_irqrestore(&ms->lock, flags); |
520 | if (atomic_dec_and_test(®->pending)) { | ||
521 | /* | ||
522 | * There is no pending I/O for this region. | ||
523 | * We can move the region to corresponding list for next action. | ||
524 | * At this point, the region is not yet connected to any list. | ||
525 | * | ||
526 | * If the state is RH_NOSYNC, the region should be kept off | ||
527 | * from clean list. | ||
528 | * The hash entry for RH_NOSYNC will remain in memory | ||
529 | * until the region is recovered or the map is reloaded. | ||
530 | */ | ||
531 | |||
532 | /* do nothing for RH_NOSYNC */ | ||
533 | if (reg->state == RH_RECOVERING) { | ||
534 | list_add_tail(®->list, &rh->quiesced_regions); | ||
535 | } else if (reg->state == RH_DIRTY) { | ||
536 | reg->state = RH_CLEAN; | ||
537 | list_add(®->list, &rh->clean_regions); | ||
538 | } | ||
539 | should_wake = 1; | ||
540 | } | ||
541 | spin_unlock_irqrestore(&rh->region_lock, flags); | ||
542 | 128 | ||
543 | if (should_wake) | 129 | if (should_wake) |
544 | wake(rh->ms); | 130 | wakeup_mirrord(ms); |
545 | } | ||
546 | |||
547 | /* | ||
548 | * Starts quiescing a region in preparation for recovery. | ||
549 | */ | ||
550 | static int __rh_recovery_prepare(struct region_hash *rh) | ||
551 | { | ||
552 | int r; | ||
553 | struct region *reg; | ||
554 | region_t region; | ||
555 | |||
556 | /* | ||
557 | * Ask the dirty log what's next. | ||
558 | */ | ||
559 | r = rh->log->type->get_resync_work(rh->log, ®ion); | ||
560 | if (r <= 0) | ||
561 | return r; | ||
562 | |||
563 | /* | ||
564 | * Get this region, and start it quiescing by setting the | ||
565 | * recovering flag. | ||
566 | */ | ||
567 | read_lock(&rh->hash_lock); | ||
568 | reg = __rh_find(rh, region); | ||
569 | read_unlock(&rh->hash_lock); | ||
570 | |||
571 | spin_lock_irq(&rh->region_lock); | ||
572 | reg->state = RH_RECOVERING; | ||
573 | |||
574 | /* Already quiesced ? */ | ||
575 | if (atomic_read(®->pending)) | ||
576 | list_del_init(®->list); | ||
577 | else | ||
578 | list_move(®->list, &rh->quiesced_regions); | ||
579 | |||
580 | spin_unlock_irq(&rh->region_lock); | ||
581 | |||
582 | return 1; | ||
583 | } | ||
584 | |||
585 | static void rh_recovery_prepare(struct region_hash *rh) | ||
586 | { | ||
587 | /* Extra reference to avoid race with rh_stop_recovery */ | ||
588 | atomic_inc(&rh->recovery_in_flight); | ||
589 | |||
590 | while (!down_trylock(&rh->recovery_count)) { | ||
591 | atomic_inc(&rh->recovery_in_flight); | ||
592 | if (__rh_recovery_prepare(rh) <= 0) { | ||
593 | atomic_dec(&rh->recovery_in_flight); | ||
594 | up(&rh->recovery_count); | ||
595 | break; | ||
596 | } | ||
597 | } | ||
598 | |||
599 | /* Drop the extra reference */ | ||
600 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | ||
601 | wake_up_all(&_kmirrord_recovery_stopped); | ||
602 | } | ||
603 | |||
604 | /* | ||
605 | * Returns any quiesced regions. | ||
606 | */ | ||
607 | static struct region *rh_recovery_start(struct region_hash *rh) | ||
608 | { | ||
609 | struct region *reg = NULL; | ||
610 | |||
611 | spin_lock_irq(&rh->region_lock); | ||
612 | if (!list_empty(&rh->quiesced_regions)) { | ||
613 | reg = list_entry(rh->quiesced_regions.next, | ||
614 | struct region, list); | ||
615 | list_del_init(®->list); /* remove from the quiesced list */ | ||
616 | } | ||
617 | spin_unlock_irq(&rh->region_lock); | ||
618 | |||
619 | return reg; | ||
620 | } | ||
621 | |||
622 | static void rh_recovery_end(struct region *reg, int success) | ||
623 | { | ||
624 | struct region_hash *rh = reg->rh; | ||
625 | |||
626 | spin_lock_irq(&rh->region_lock); | ||
627 | if (success) | ||
628 | list_add(®->list, ®->rh->recovered_regions); | ||
629 | else { | ||
630 | reg->state = RH_NOSYNC; | ||
631 | list_add(®->list, ®->rh->failed_recovered_regions); | ||
632 | } | ||
633 | spin_unlock_irq(&rh->region_lock); | ||
634 | |||
635 | wake(rh->ms); | ||
636 | } | 131 | } |
637 | 132 | ||
638 | static int rh_flush(struct region_hash *rh) | 133 | static void dispatch_bios(void *context, struct bio_list *bio_list) |
639 | { | 134 | { |
640 | return rh->log->type->flush(rh->log); | 135 | struct mirror_set *ms = context; |
641 | } | 136 | struct bio *bio; |
642 | |||
643 | static void rh_delay(struct region_hash *rh, struct bio *bio) | ||
644 | { | ||
645 | struct region *reg; | ||
646 | |||
647 | read_lock(&rh->hash_lock); | ||
648 | reg = __rh_find(rh, bio_to_region(rh, bio)); | ||
649 | bio_list_add(®->delayed_bios, bio); | ||
650 | read_unlock(&rh->hash_lock); | ||
651 | } | ||
652 | |||
653 | static void rh_stop_recovery(struct region_hash *rh) | ||
654 | { | ||
655 | int i; | ||
656 | |||
657 | /* wait for any recovering regions */ | ||
658 | for (i = 0; i < MAX_RECOVERY; i++) | ||
659 | down(&rh->recovery_count); | ||
660 | } | ||
661 | |||
662 | static void rh_start_recovery(struct region_hash *rh) | ||
663 | { | ||
664 | int i; | ||
665 | |||
666 | for (i = 0; i < MAX_RECOVERY; i++) | ||
667 | up(&rh->recovery_count); | ||
668 | 137 | ||
669 | wake(rh->ms); | 138 | while ((bio = bio_list_pop(bio_list))) |
139 | queue_bio(ms, bio, WRITE); | ||
670 | } | 140 | } |
671 | 141 | ||
672 | #define MIN_READ_RECORDS 20 | 142 | #define MIN_READ_RECORDS 20 |
@@ -776,8 +246,8 @@ out: | |||
776 | static void recovery_complete(int read_err, unsigned long write_err, | 246 | static void recovery_complete(int read_err, unsigned long write_err, |
777 | void *context) | 247 | void *context) |
778 | { | 248 | { |
779 | struct region *reg = (struct region *)context; | 249 | struct dm_region *reg = context; |
780 | struct mirror_set *ms = reg->rh->ms; | 250 | struct mirror_set *ms = dm_rh_region_context(reg); |
781 | int m, bit = 0; | 251 | int m, bit = 0; |
782 | 252 | ||
783 | if (read_err) { | 253 | if (read_err) { |
@@ -803,31 +273,33 @@ static void recovery_complete(int read_err, unsigned long write_err, | |||
803 | } | 273 | } |
804 | } | 274 | } |
805 | 275 | ||
806 | rh_recovery_end(reg, !(read_err || write_err)); | 276 | dm_rh_recovery_end(reg, !(read_err || write_err)); |
807 | } | 277 | } |
808 | 278 | ||
809 | static int recover(struct mirror_set *ms, struct region *reg) | 279 | static int recover(struct mirror_set *ms, struct dm_region *reg) |
810 | { | 280 | { |
811 | int r; | 281 | int r; |
812 | unsigned int i; | 282 | unsigned i; |
813 | struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; | 283 | struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; |
814 | struct mirror *m; | 284 | struct mirror *m; |
815 | unsigned long flags = 0; | 285 | unsigned long flags = 0; |
286 | region_t key = dm_rh_get_region_key(reg); | ||
287 | sector_t region_size = dm_rh_get_region_size(ms->rh); | ||
816 | 288 | ||
817 | /* fill in the source */ | 289 | /* fill in the source */ |
818 | m = get_default_mirror(ms); | 290 | m = get_default_mirror(ms); |
819 | from.bdev = m->dev->bdev; | 291 | from.bdev = m->dev->bdev; |
820 | from.sector = m->offset + region_to_sector(reg->rh, reg->key); | 292 | from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key); |
821 | if (reg->key == (ms->nr_regions - 1)) { | 293 | if (key == (ms->nr_regions - 1)) { |
822 | /* | 294 | /* |
823 | * The final region may be smaller than | 295 | * The final region may be smaller than |
824 | * region_size. | 296 | * region_size. |
825 | */ | 297 | */ |
826 | from.count = ms->ti->len & (reg->rh->region_size - 1); | 298 | from.count = ms->ti->len & (region_size - 1); |
827 | if (!from.count) | 299 | if (!from.count) |
828 | from.count = reg->rh->region_size; | 300 | from.count = region_size; |
829 | } else | 301 | } else |
830 | from.count = reg->rh->region_size; | 302 | from.count = region_size; |
831 | 303 | ||
832 | /* fill in the destinations */ | 304 | /* fill in the destinations */ |
833 | for (i = 0, dest = to; i < ms->nr_mirrors; i++) { | 305 | for (i = 0, dest = to; i < ms->nr_mirrors; i++) { |
@@ -836,7 +308,7 @@ static int recover(struct mirror_set *ms, struct region *reg) | |||
836 | 308 | ||
837 | m = ms->mirror + i; | 309 | m = ms->mirror + i; |
838 | dest->bdev = m->dev->bdev; | 310 | dest->bdev = m->dev->bdev; |
839 | dest->sector = m->offset + region_to_sector(reg->rh, reg->key); | 311 | dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key); |
840 | dest->count = from.count; | 312 | dest->count = from.count; |
841 | dest++; | 313 | dest++; |
842 | } | 314 | } |
@@ -853,22 +325,22 @@ static int recover(struct mirror_set *ms, struct region *reg) | |||
853 | 325 | ||
854 | static void do_recovery(struct mirror_set *ms) | 326 | static void do_recovery(struct mirror_set *ms) |
855 | { | 327 | { |
328 | struct dm_region *reg; | ||
329 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); | ||
856 | int r; | 330 | int r; |
857 | struct region *reg; | ||
858 | struct dm_dirty_log *log = ms->rh.log; | ||
859 | 331 | ||
860 | /* | 332 | /* |
861 | * Start quiescing some regions. | 333 | * Start quiescing some regions. |
862 | */ | 334 | */ |
863 | rh_recovery_prepare(&ms->rh); | 335 | dm_rh_recovery_prepare(ms->rh); |
864 | 336 | ||
865 | /* | 337 | /* |
866 | * Copy any already quiesced regions. | 338 | * Copy any already quiesced regions. |
867 | */ | 339 | */ |
868 | while ((reg = rh_recovery_start(&ms->rh))) { | 340 | while ((reg = dm_rh_recovery_start(ms->rh))) { |
869 | r = recover(ms, reg); | 341 | r = recover(ms, reg); |
870 | if (r) | 342 | if (r) |
871 | rh_recovery_end(reg, 0); | 343 | dm_rh_recovery_end(reg, 0); |
872 | } | 344 | } |
873 | 345 | ||
874 | /* | 346 | /* |
@@ -909,9 +381,10 @@ static int default_ok(struct mirror *m) | |||
909 | 381 | ||
910 | static int mirror_available(struct mirror_set *ms, struct bio *bio) | 382 | static int mirror_available(struct mirror_set *ms, struct bio *bio) |
911 | { | 383 | { |
912 | region_t region = bio_to_region(&ms->rh, bio); | 384 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
385 | region_t region = dm_rh_bio_to_region(ms->rh, bio); | ||
913 | 386 | ||
914 | if (ms->rh.log->type->in_sync(ms->rh.log, region, 0)) | 387 | if (log->type->in_sync(log, region, 0)) |
915 | return choose_mirror(ms, bio->bi_sector) ? 1 : 0; | 388 | return choose_mirror(ms, bio->bi_sector) ? 1 : 0; |
916 | 389 | ||
917 | return 0; | 390 | return 0; |
@@ -985,7 +458,14 @@ static void read_async_bio(struct mirror *m, struct bio *bio) | |||
985 | 458 | ||
986 | map_region(&io, m, bio); | 459 | map_region(&io, m, bio); |
987 | bio_set_m(bio, m); | 460 | bio_set_m(bio, m); |
988 | (void) dm_io(&io_req, 1, &io, NULL); | 461 | BUG_ON(dm_io(&io_req, 1, &io, NULL)); |
462 | } | ||
463 | |||
464 | static inline int region_in_sync(struct mirror_set *ms, region_t region, | ||
465 | int may_block) | ||
466 | { | ||
467 | int state = dm_rh_get_state(ms->rh, region, may_block); | ||
468 | return state == DM_RH_CLEAN || state == DM_RH_DIRTY; | ||
989 | } | 469 | } |
990 | 470 | ||
991 | static void do_reads(struct mirror_set *ms, struct bio_list *reads) | 471 | static void do_reads(struct mirror_set *ms, struct bio_list *reads) |
@@ -995,13 +475,13 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) | |||
995 | struct mirror *m; | 475 | struct mirror *m; |
996 | 476 | ||
997 | while ((bio = bio_list_pop(reads))) { | 477 | while ((bio = bio_list_pop(reads))) { |
998 | region = bio_to_region(&ms->rh, bio); | 478 | region = dm_rh_bio_to_region(ms->rh, bio); |
999 | m = get_default_mirror(ms); | 479 | m = get_default_mirror(ms); |
1000 | 480 | ||
1001 | /* | 481 | /* |
1002 | * We can only read balance if the region is in sync. | 482 | * We can only read balance if the region is in sync. |
1003 | */ | 483 | */ |
1004 | if (likely(rh_in_sync(&ms->rh, region, 1))) | 484 | if (likely(region_in_sync(ms, region, 1))) |
1005 | m = choose_mirror(ms, bio->bi_sector); | 485 | m = choose_mirror(ms, bio->bi_sector); |
1006 | else if (m && atomic_read(&m->error_count)) | 486 | else if (m && atomic_read(&m->error_count)) |
1007 | m = NULL; | 487 | m = NULL; |
@@ -1024,57 +504,6 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) | |||
1024 | * NOSYNC: increment pending, just write to the default mirror | 504 | * NOSYNC: increment pending, just write to the default mirror |
1025 | *---------------------------------------------------------------*/ | 505 | *---------------------------------------------------------------*/ |
1026 | 506 | ||
1027 | /* __bio_mark_nosync | ||
1028 | * @ms | ||
1029 | * @bio | ||
1030 | * @done | ||
1031 | * @error | ||
1032 | * | ||
1033 | * The bio was written on some mirror(s) but failed on other mirror(s). | ||
1034 | * We can successfully endio the bio but should avoid the region being | ||
1035 | * marked clean by setting the state RH_NOSYNC. | ||
1036 | * | ||
1037 | * This function is _not_ safe in interrupt context! | ||
1038 | */ | ||
1039 | static void __bio_mark_nosync(struct mirror_set *ms, | ||
1040 | struct bio *bio, unsigned done, int error) | ||
1041 | { | ||
1042 | unsigned long flags; | ||
1043 | struct region_hash *rh = &ms->rh; | ||
1044 | struct dm_dirty_log *log = ms->rh.log; | ||
1045 | struct region *reg; | ||
1046 | region_t region = bio_to_region(rh, bio); | ||
1047 | int recovering = 0; | ||
1048 | |||
1049 | /* We must inform the log that the sync count has changed. */ | ||
1050 | log->type->set_region_sync(log, region, 0); | ||
1051 | ms->in_sync = 0; | ||
1052 | |||
1053 | read_lock(&rh->hash_lock); | ||
1054 | reg = __rh_find(rh, region); | ||
1055 | read_unlock(&rh->hash_lock); | ||
1056 | |||
1057 | /* region hash entry should exist because write was in-flight */ | ||
1058 | BUG_ON(!reg); | ||
1059 | BUG_ON(!list_empty(®->list)); | ||
1060 | |||
1061 | spin_lock_irqsave(&rh->region_lock, flags); | ||
1062 | /* | ||
1063 | * Possible cases: | ||
1064 | * 1) RH_DIRTY | ||
1065 | * 2) RH_NOSYNC: was dirty, other preceeding writes failed | ||
1066 | * 3) RH_RECOVERING: flushing pending writes | ||
1067 | * Either case, the region should have not been connected to list. | ||
1068 | */ | ||
1069 | recovering = (reg->state == RH_RECOVERING); | ||
1070 | reg->state = RH_NOSYNC; | ||
1071 | BUG_ON(!list_empty(®->list)); | ||
1072 | spin_unlock_irqrestore(&rh->region_lock, flags); | ||
1073 | |||
1074 | bio_endio(bio, error); | ||
1075 | if (recovering) | ||
1076 | complete_resync_work(reg, 0); | ||
1077 | } | ||
1078 | 507 | ||
1079 | static void write_callback(unsigned long error, void *context) | 508 | static void write_callback(unsigned long error, void *context) |
1080 | { | 509 | { |
@@ -1119,7 +548,7 @@ static void write_callback(unsigned long error, void *context) | |||
1119 | bio_list_add(&ms->failures, bio); | 548 | bio_list_add(&ms->failures, bio); |
1120 | spin_unlock_irqrestore(&ms->lock, flags); | 549 | spin_unlock_irqrestore(&ms->lock, flags); |
1121 | if (should_wake) | 550 | if (should_wake) |
1122 | wake(ms); | 551 | wakeup_mirrord(ms); |
1123 | return; | 552 | return; |
1124 | } | 553 | } |
1125 | out: | 554 | out: |
@@ -1149,7 +578,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
1149 | */ | 578 | */ |
1150 | bio_set_m(bio, get_default_mirror(ms)); | 579 | bio_set_m(bio, get_default_mirror(ms)); |
1151 | 580 | ||
1152 | (void) dm_io(&io_req, ms->nr_mirrors, io, NULL); | 581 | BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL)); |
1153 | } | 582 | } |
1154 | 583 | ||
1155 | static void do_writes(struct mirror_set *ms, struct bio_list *writes) | 584 | static void do_writes(struct mirror_set *ms, struct bio_list *writes) |
@@ -1169,18 +598,19 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
1169 | bio_list_init(&recover); | 598 | bio_list_init(&recover); |
1170 | 599 | ||
1171 | while ((bio = bio_list_pop(writes))) { | 600 | while ((bio = bio_list_pop(writes))) { |
1172 | state = rh_state(&ms->rh, bio_to_region(&ms->rh, bio), 1); | 601 | state = dm_rh_get_state(ms->rh, |
602 | dm_rh_bio_to_region(ms->rh, bio), 1); | ||
1173 | switch (state) { | 603 | switch (state) { |
1174 | case RH_CLEAN: | 604 | case DM_RH_CLEAN: |
1175 | case RH_DIRTY: | 605 | case DM_RH_DIRTY: |
1176 | this_list = &sync; | 606 | this_list = &sync; |
1177 | break; | 607 | break; |
1178 | 608 | ||
1179 | case RH_NOSYNC: | 609 | case DM_RH_NOSYNC: |
1180 | this_list = &nosync; | 610 | this_list = &nosync; |
1181 | break; | 611 | break; |
1182 | 612 | ||
1183 | case RH_RECOVERING: | 613 | case DM_RH_RECOVERING: |
1184 | this_list = &recover; | 614 | this_list = &recover; |
1185 | break; | 615 | break; |
1186 | } | 616 | } |
@@ -1193,9 +623,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
1193 | * be written to (writes to recover regions are going to | 623 | * be written to (writes to recover regions are going to |
1194 | * be delayed). | 624 | * be delayed). |
1195 | */ | 625 | */ |
1196 | rh_inc_pending(&ms->rh, &sync); | 626 | dm_rh_inc_pending(ms->rh, &sync); |
1197 | rh_inc_pending(&ms->rh, &nosync); | 627 | dm_rh_inc_pending(ms->rh, &nosync); |
1198 | ms->log_failure = rh_flush(&ms->rh) ? 1 : 0; | 628 | ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0; |
1199 | 629 | ||
1200 | /* | 630 | /* |
1201 | * Dispatch io. | 631 | * Dispatch io. |
@@ -1204,13 +634,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
1204 | spin_lock_irq(&ms->lock); | 634 | spin_lock_irq(&ms->lock); |
1205 | bio_list_merge(&ms->failures, &sync); | 635 | bio_list_merge(&ms->failures, &sync); |
1206 | spin_unlock_irq(&ms->lock); | 636 | spin_unlock_irq(&ms->lock); |
1207 | wake(ms); | 637 | wakeup_mirrord(ms); |
1208 | } else | 638 | } else |
1209 | while ((bio = bio_list_pop(&sync))) | 639 | while ((bio = bio_list_pop(&sync))) |
1210 | do_write(ms, bio); | 640 | do_write(ms, bio); |
1211 | 641 | ||
1212 | while ((bio = bio_list_pop(&recover))) | 642 | while ((bio = bio_list_pop(&recover))) |
1213 | rh_delay(&ms->rh, bio); | 643 | dm_rh_delay(ms->rh, bio); |
1214 | 644 | ||
1215 | while ((bio = bio_list_pop(&nosync))) { | 645 | while ((bio = bio_list_pop(&nosync))) { |
1216 | map_bio(get_default_mirror(ms), bio); | 646 | map_bio(get_default_mirror(ms), bio); |
@@ -1227,7 +657,8 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) | |||
1227 | 657 | ||
1228 | if (!ms->log_failure) { | 658 | if (!ms->log_failure) { |
1229 | while ((bio = bio_list_pop(failures))) | 659 | while ((bio = bio_list_pop(failures))) |
1230 | __bio_mark_nosync(ms, bio, bio->bi_size, 0); | 660 | ms->in_sync = 0; |
661 | dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0); | ||
1231 | return; | 662 | return; |
1232 | } | 663 | } |
1233 | 664 | ||
@@ -1280,8 +711,8 @@ static void trigger_event(struct work_struct *work) | |||
1280 | *---------------------------------------------------------------*/ | 711 | *---------------------------------------------------------------*/ |
1281 | static void do_mirror(struct work_struct *work) | 712 | static void do_mirror(struct work_struct *work) |
1282 | { | 713 | { |
1283 | struct mirror_set *ms =container_of(work, struct mirror_set, | 714 | struct mirror_set *ms = container_of(work, struct mirror_set, |
1284 | kmirrord_work); | 715 | kmirrord_work); |
1285 | struct bio_list reads, writes, failures; | 716 | struct bio_list reads, writes, failures; |
1286 | unsigned long flags; | 717 | unsigned long flags; |
1287 | 718 | ||
@@ -1294,7 +725,7 @@ static void do_mirror(struct work_struct *work) | |||
1294 | bio_list_init(&ms->failures); | 725 | bio_list_init(&ms->failures); |
1295 | spin_unlock_irqrestore(&ms->lock, flags); | 726 | spin_unlock_irqrestore(&ms->lock, flags); |
1296 | 727 | ||
1297 | rh_update_states(&ms->rh); | 728 | dm_rh_update_states(ms->rh, errors_handled(ms)); |
1298 | do_recovery(ms); | 729 | do_recovery(ms); |
1299 | do_reads(ms, &reads); | 730 | do_reads(ms, &reads); |
1300 | do_writes(ms, &writes); | 731 | do_writes(ms, &writes); |
@@ -1303,7 +734,6 @@ static void do_mirror(struct work_struct *work) | |||
1303 | dm_table_unplug_all(ms->ti->table); | 734 | dm_table_unplug_all(ms->ti->table); |
1304 | } | 735 | } |
1305 | 736 | ||
1306 | |||
1307 | /*----------------------------------------------------------------- | 737 | /*----------------------------------------------------------------- |
1308 | * Target functions | 738 | * Target functions |
1309 | *---------------------------------------------------------------*/ | 739 | *---------------------------------------------------------------*/ |
@@ -1315,9 +745,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, | |||
1315 | size_t len; | 745 | size_t len; |
1316 | struct mirror_set *ms = NULL; | 746 | struct mirror_set *ms = NULL; |
1317 | 747 | ||
1318 | if (array_too_big(sizeof(*ms), sizeof(ms->mirror[0]), nr_mirrors)) | ||
1319 | return NULL; | ||
1320 | |||
1321 | len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors); | 748 | len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors); |
1322 | 749 | ||
1323 | ms = kzalloc(len, GFP_KERNEL); | 750 | ms = kzalloc(len, GFP_KERNEL); |
@@ -1353,7 +780,11 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, | |||
1353 | return NULL; | 780 | return NULL; |
1354 | } | 781 | } |
1355 | 782 | ||
1356 | if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { | 783 | ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, |
784 | wakeup_all_recovery_waiters, | ||
785 | ms->ti->begin, MAX_RECOVERY, | ||
786 | dl, region_size, ms->nr_regions); | ||
787 | if (IS_ERR(ms->rh)) { | ||
1357 | ti->error = "Error creating dirty region hash"; | 788 | ti->error = "Error creating dirty region hash"; |
1358 | dm_io_client_destroy(ms->io_client); | 789 | dm_io_client_destroy(ms->io_client); |
1359 | mempool_destroy(ms->read_record_pool); | 790 | mempool_destroy(ms->read_record_pool); |
@@ -1371,7 +802,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, | |||
1371 | dm_put_device(ti, ms->mirror[m].dev); | 802 | dm_put_device(ti, ms->mirror[m].dev); |
1372 | 803 | ||
1373 | dm_io_client_destroy(ms->io_client); | 804 | dm_io_client_destroy(ms->io_client); |
1374 | rh_exit(&ms->rh); | 805 | dm_region_hash_destroy(ms->rh); |
1375 | mempool_destroy(ms->read_record_pool); | 806 | mempool_destroy(ms->read_record_pool); |
1376 | kfree(ms); | 807 | kfree(ms); |
1377 | } | 808 | } |
@@ -1411,10 +842,10 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, | |||
1411 | * Create dirty log: log_type #log_params <log_params> | 842 | * Create dirty log: log_type #log_params <log_params> |
1412 | */ | 843 | */ |
1413 | static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, | 844 | static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, |
1414 | unsigned int argc, char **argv, | 845 | unsigned argc, char **argv, |
1415 | unsigned int *args_used) | 846 | unsigned *args_used) |
1416 | { | 847 | { |
1417 | unsigned int param_count; | 848 | unsigned param_count; |
1418 | struct dm_dirty_log *dl; | 849 | struct dm_dirty_log *dl; |
1419 | 850 | ||
1420 | if (argc < 2) { | 851 | if (argc < 2) { |
@@ -1545,7 +976,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1545 | } | 976 | } |
1546 | 977 | ||
1547 | ti->private = ms; | 978 | ti->private = ms; |
1548 | ti->split_io = ms->rh.region_size; | 979 | ti->split_io = dm_rh_get_region_size(ms->rh); |
1549 | 980 | ||
1550 | ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); | 981 | ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); |
1551 | if (!ms->kmirrord_wq) { | 982 | if (!ms->kmirrord_wq) { |
@@ -1580,11 +1011,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1580 | goto err_destroy_wq; | 1011 | goto err_destroy_wq; |
1581 | } | 1012 | } |
1582 | 1013 | ||
1583 | r = dm_kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); | 1014 | r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client); |
1584 | if (r) | 1015 | if (r) |
1585 | goto err_destroy_wq; | 1016 | goto err_destroy_wq; |
1586 | 1017 | ||
1587 | wake(ms); | 1018 | wakeup_mirrord(ms); |
1588 | return 0; | 1019 | return 0; |
1589 | 1020 | ||
1590 | err_destroy_wq: | 1021 | err_destroy_wq: |
@@ -1605,22 +1036,6 @@ static void mirror_dtr(struct dm_target *ti) | |||
1605 | free_context(ms, ti, ms->nr_mirrors); | 1036 | free_context(ms, ti, ms->nr_mirrors); |
1606 | } | 1037 | } |
1607 | 1038 | ||
1608 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) | ||
1609 | { | ||
1610 | unsigned long flags; | ||
1611 | int should_wake = 0; | ||
1612 | struct bio_list *bl; | ||
1613 | |||
1614 | bl = (rw == WRITE) ? &ms->writes : &ms->reads; | ||
1615 | spin_lock_irqsave(&ms->lock, flags); | ||
1616 | should_wake = !(bl->head); | ||
1617 | bio_list_add(bl, bio); | ||
1618 | spin_unlock_irqrestore(&ms->lock, flags); | ||
1619 | |||
1620 | if (should_wake) | ||
1621 | wake(ms); | ||
1622 | } | ||
1623 | |||
1624 | /* | 1039 | /* |
1625 | * Mirror mapping function | 1040 | * Mirror mapping function |
1626 | */ | 1041 | */ |
@@ -1631,16 +1046,16 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, | |||
1631 | struct mirror *m; | 1046 | struct mirror *m; |
1632 | struct mirror_set *ms = ti->private; | 1047 | struct mirror_set *ms = ti->private; |
1633 | struct dm_raid1_read_record *read_record = NULL; | 1048 | struct dm_raid1_read_record *read_record = NULL; |
1049 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); | ||
1634 | 1050 | ||
1635 | if (rw == WRITE) { | 1051 | if (rw == WRITE) { |
1636 | /* Save region for mirror_end_io() handler */ | 1052 | /* Save region for mirror_end_io() handler */ |
1637 | map_context->ll = bio_to_region(&ms->rh, bio); | 1053 | map_context->ll = dm_rh_bio_to_region(ms->rh, bio); |
1638 | queue_bio(ms, bio, rw); | 1054 | queue_bio(ms, bio, rw); |
1639 | return DM_MAPIO_SUBMITTED; | 1055 | return DM_MAPIO_SUBMITTED; |
1640 | } | 1056 | } |
1641 | 1057 | ||
1642 | r = ms->rh.log->type->in_sync(ms->rh.log, | 1058 | r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0); |
1643 | bio_to_region(&ms->rh, bio), 0); | ||
1644 | if (r < 0 && r != -EWOULDBLOCK) | 1059 | if (r < 0 && r != -EWOULDBLOCK) |
1645 | return r; | 1060 | return r; |
1646 | 1061 | ||
@@ -1688,7 +1103,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, | |||
1688 | * We need to dec pending if this was a write. | 1103 | * We need to dec pending if this was a write. |
1689 | */ | 1104 | */ |
1690 | if (rw == WRITE) { | 1105 | if (rw == WRITE) { |
1691 | rh_dec(&ms->rh, map_context->ll); | 1106 | dm_rh_dec(ms->rh, map_context->ll); |
1692 | return error; | 1107 | return error; |
1693 | } | 1108 | } |
1694 | 1109 | ||
@@ -1744,7 +1159,7 @@ out: | |||
1744 | static void mirror_presuspend(struct dm_target *ti) | 1159 | static void mirror_presuspend(struct dm_target *ti) |
1745 | { | 1160 | { |
1746 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1161 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1747 | struct dm_dirty_log *log = ms->rh.log; | 1162 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1748 | 1163 | ||
1749 | atomic_set(&ms->suspend, 1); | 1164 | atomic_set(&ms->suspend, 1); |
1750 | 1165 | ||
@@ -1752,10 +1167,10 @@ static void mirror_presuspend(struct dm_target *ti) | |||
1752 | * We must finish up all the work that we've | 1167 | * We must finish up all the work that we've |
1753 | * generated (i.e. recovery work). | 1168 | * generated (i.e. recovery work). |
1754 | */ | 1169 | */ |
1755 | rh_stop_recovery(&ms->rh); | 1170 | dm_rh_stop_recovery(ms->rh); |
1756 | 1171 | ||
1757 | wait_event(_kmirrord_recovery_stopped, | 1172 | wait_event(_kmirrord_recovery_stopped, |
1758 | !atomic_read(&ms->rh.recovery_in_flight)); | 1173 | !dm_rh_recovery_in_flight(ms->rh)); |
1759 | 1174 | ||
1760 | if (log->type->presuspend && log->type->presuspend(log)) | 1175 | if (log->type->presuspend && log->type->presuspend(log)) |
1761 | /* FIXME: need better error handling */ | 1176 | /* FIXME: need better error handling */ |
@@ -1773,7 +1188,7 @@ static void mirror_presuspend(struct dm_target *ti) | |||
1773 | static void mirror_postsuspend(struct dm_target *ti) | 1188 | static void mirror_postsuspend(struct dm_target *ti) |
1774 | { | 1189 | { |
1775 | struct mirror_set *ms = ti->private; | 1190 | struct mirror_set *ms = ti->private; |
1776 | struct dm_dirty_log *log = ms->rh.log; | 1191 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1777 | 1192 | ||
1778 | if (log->type->postsuspend && log->type->postsuspend(log)) | 1193 | if (log->type->postsuspend && log->type->postsuspend(log)) |
1779 | /* FIXME: need better error handling */ | 1194 | /* FIXME: need better error handling */ |
@@ -1783,13 +1198,13 @@ static void mirror_postsuspend(struct dm_target *ti) | |||
1783 | static void mirror_resume(struct dm_target *ti) | 1198 | static void mirror_resume(struct dm_target *ti) |
1784 | { | 1199 | { |
1785 | struct mirror_set *ms = ti->private; | 1200 | struct mirror_set *ms = ti->private; |
1786 | struct dm_dirty_log *log = ms->rh.log; | 1201 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1787 | 1202 | ||
1788 | atomic_set(&ms->suspend, 0); | 1203 | atomic_set(&ms->suspend, 0); |
1789 | if (log->type->resume && log->type->resume(log)) | 1204 | if (log->type->resume && log->type->resume(log)) |
1790 | /* FIXME: need better error handling */ | 1205 | /* FIXME: need better error handling */ |
1791 | DMWARN("log resume failed"); | 1206 | DMWARN("log resume failed"); |
1792 | rh_start_recovery(&ms->rh); | 1207 | dm_rh_start_recovery(ms->rh); |
1793 | } | 1208 | } |
1794 | 1209 | ||
1795 | /* | 1210 | /* |
@@ -1821,7 +1236,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type, | |||
1821 | { | 1236 | { |
1822 | unsigned int m, sz = 0; | 1237 | unsigned int m, sz = 0; |
1823 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1238 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1824 | struct dm_dirty_log *log = ms->rh.log; | 1239 | struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1825 | char buffer[ms->nr_mirrors + 1]; | 1240 | char buffer[ms->nr_mirrors + 1]; |
1826 | 1241 | ||
1827 | switch (type) { | 1242 | switch (type) { |
@@ -1834,15 +1249,15 @@ static int mirror_status(struct dm_target *ti, status_type_t type, | |||
1834 | buffer[m] = '\0'; | 1249 | buffer[m] = '\0'; |
1835 | 1250 | ||
1836 | DMEMIT("%llu/%llu 1 %s ", | 1251 | DMEMIT("%llu/%llu 1 %s ", |
1837 | (unsigned long long)log->type->get_sync_count(ms->rh.log), | 1252 | (unsigned long long)log->type->get_sync_count(log), |
1838 | (unsigned long long)ms->nr_regions, buffer); | 1253 | (unsigned long long)ms->nr_regions, buffer); |
1839 | 1254 | ||
1840 | sz += log->type->status(ms->rh.log, type, result+sz, maxlen-sz); | 1255 | sz += log->type->status(log, type, result+sz, maxlen-sz); |
1841 | 1256 | ||
1842 | break; | 1257 | break; |
1843 | 1258 | ||
1844 | case STATUSTYPE_TABLE: | 1259 | case STATUSTYPE_TABLE: |
1845 | sz = log->type->status(ms->rh.log, type, result, maxlen); | 1260 | sz = log->type->status(log, type, result, maxlen); |
1846 | 1261 | ||
1847 | DMEMIT("%d", ms->nr_mirrors); | 1262 | DMEMIT("%d", ms->nr_mirrors); |
1848 | for (m = 0; m < ms->nr_mirrors; m++) | 1263 | for (m = 0; m < ms->nr_mirrors; m++) |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c new file mode 100644 index 000000000000..59f8d9df9e1a --- /dev/null +++ b/drivers/md/dm-region-hash.c | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2003 Sistina Software Limited. | ||
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This file is released under the GPL. | ||
6 | */ | ||
7 | |||
8 | #include <linux/dm-dirty-log.h> | ||
9 | #include <linux/dm-region-hash.h> | ||
10 | |||
11 | #include <linux/ctype.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | |||
16 | #include "dm.h" | ||
17 | #include "dm-bio-list.h" | ||
18 | |||
19 | #define DM_MSG_PREFIX "region hash" | ||
20 | |||
21 | /*----------------------------------------------------------------- | ||
22 | * Region hash | ||
23 | * | ||
24 | * The mirror splits itself up into discrete regions. Each | ||
25 | * region can be in one of three states: clean, dirty, | ||
26 | * nosync. There is no need to put clean regions in the hash. | ||
27 | * | ||
28 | * In addition to being present in the hash table a region _may_ | ||
29 | * be present on one of three lists. | ||
30 | * | ||
31 | * clean_regions: Regions on this list have no io pending to | ||
32 | * them, they are in sync, we are no longer interested in them, | ||
33 | * they are dull. dm_rh_update_states() will remove them from the | ||
34 | * hash table. | ||
35 | * | ||
36 | * quiesced_regions: These regions have been spun down, ready | ||
37 | * for recovery. rh_recovery_start() will remove regions from | ||
38 | * this list and hand them to kmirrord, which will schedule the | ||
39 | * recovery io with kcopyd. | ||
40 | * | ||
41 | * recovered_regions: Regions that kcopyd has successfully | ||
42 | * recovered. dm_rh_update_states() will now schedule any delayed | ||
43 | * io, up the recovery_count, and remove the region from the | ||
44 | * hash. | ||
45 | * | ||
46 | * There are 2 locks: | ||
47 | * A rw spin lock 'hash_lock' protects just the hash table, | ||
48 | * this is never held in write mode from interrupt context, | ||
49 | * which I believe means that we only have to disable irqs when | ||
50 | * doing a write lock. | ||
51 | * | ||
52 | * An ordinary spin lock 'region_lock' that protects the three | ||
53 | * lists in the region_hash, with the 'state', 'list' and | ||
54 | * 'delayed_bios' fields of the regions. This is used from irq | ||
55 | * context, so all other uses will have to suspend local irqs. | ||
56 | *---------------------------------------------------------------*/ | ||
57 | struct dm_region_hash { | ||
58 | uint32_t region_size; | ||
59 | unsigned region_shift; | ||
60 | |||
61 | /* holds persistent region state */ | ||
62 | struct dm_dirty_log *log; | ||
63 | |||
64 | /* hash table */ | ||
65 | rwlock_t hash_lock; | ||
66 | mempool_t *region_pool; | ||
67 | unsigned mask; | ||
68 | unsigned nr_buckets; | ||
69 | unsigned prime; | ||
70 | unsigned shift; | ||
71 | struct list_head *buckets; | ||
72 | |||
73 | unsigned max_recovery; /* Max # of regions to recover in parallel */ | ||
74 | |||
75 | spinlock_t region_lock; | ||
76 | atomic_t recovery_in_flight; | ||
77 | struct semaphore recovery_count; | ||
78 | struct list_head clean_regions; | ||
79 | struct list_head quiesced_regions; | ||
80 | struct list_head recovered_regions; | ||
81 | struct list_head failed_recovered_regions; | ||
82 | |||
83 | void *context; | ||
84 | sector_t target_begin; | ||
85 | |||
86 | /* Callback function to schedule bios writes */ | ||
87 | void (*dispatch_bios)(void *context, struct bio_list *bios); | ||
88 | |||
89 | /* Callback function to wakeup callers worker thread. */ | ||
90 | void (*wakeup_workers)(void *context); | ||
91 | |||
92 | /* Callback function to wakeup callers recovery waiters. */ | ||
93 | void (*wakeup_all_recovery_waiters)(void *context); | ||
94 | }; | ||
95 | |||
96 | struct dm_region { | ||
97 | struct dm_region_hash *rh; /* FIXME: can we get rid of this ? */ | ||
98 | region_t key; | ||
99 | int state; | ||
100 | |||
101 | struct list_head hash_list; | ||
102 | struct list_head list; | ||
103 | |||
104 | atomic_t pending; | ||
105 | struct bio_list delayed_bios; | ||
106 | }; | ||
107 | |||
108 | /* | ||
109 | * Conversion fns | ||
110 | */ | ||
111 | static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector) | ||
112 | { | ||
113 | return sector >> rh->region_shift; | ||
114 | } | ||
115 | |||
116 | sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region) | ||
117 | { | ||
118 | return region << rh->region_shift; | ||
119 | } | ||
120 | EXPORT_SYMBOL_GPL(dm_rh_region_to_sector); | ||
121 | |||
122 | region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio) | ||
123 | { | ||
124 | return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin); | ||
125 | } | ||
126 | EXPORT_SYMBOL_GPL(dm_rh_bio_to_region); | ||
127 | |||
128 | void *dm_rh_region_context(struct dm_region *reg) | ||
129 | { | ||
130 | return reg->rh->context; | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(dm_rh_region_context); | ||
133 | |||
134 | region_t dm_rh_get_region_key(struct dm_region *reg) | ||
135 | { | ||
136 | return reg->key; | ||
137 | } | ||
138 | EXPORT_SYMBOL_GPL(dm_rh_get_region_key); | ||
139 | |||
140 | sector_t dm_rh_get_region_size(struct dm_region_hash *rh) | ||
141 | { | ||
142 | return rh->region_size; | ||
143 | } | ||
144 | EXPORT_SYMBOL_GPL(dm_rh_get_region_size); | ||
145 | |||
146 | /* | ||
147 | * FIXME: shall we pass in a structure instead of all these args to | ||
148 | * dm_region_hash_create()???? | ||
149 | */ | ||
150 | #define RH_HASH_MULT 2654435387U | ||
151 | #define RH_HASH_SHIFT 12 | ||
152 | |||
153 | #define MIN_REGIONS 64 | ||
154 | struct dm_region_hash *dm_region_hash_create( | ||
155 | void *context, void (*dispatch_bios)(void *context, | ||
156 | struct bio_list *bios), | ||
157 | void (*wakeup_workers)(void *context), | ||
158 | void (*wakeup_all_recovery_waiters)(void *context), | ||
159 | sector_t target_begin, unsigned max_recovery, | ||
160 | struct dm_dirty_log *log, uint32_t region_size, | ||
161 | region_t nr_regions) | ||
162 | { | ||
163 | struct dm_region_hash *rh; | ||
164 | unsigned nr_buckets, max_buckets; | ||
165 | size_t i; | ||
166 | |||
167 | /* | ||
168 | * Calculate a suitable number of buckets for our hash | ||
169 | * table. | ||
170 | */ | ||
171 | max_buckets = nr_regions >> 6; | ||
172 | for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) | ||
173 | ; | ||
174 | nr_buckets >>= 1; | ||
175 | |||
176 | rh = kmalloc(sizeof(*rh), GFP_KERNEL); | ||
177 | if (!rh) { | ||
178 | DMERR("unable to allocate region hash memory"); | ||
179 | return ERR_PTR(-ENOMEM); | ||
180 | } | ||
181 | |||
182 | rh->context = context; | ||
183 | rh->dispatch_bios = dispatch_bios; | ||
184 | rh->wakeup_workers = wakeup_workers; | ||
185 | rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters; | ||
186 | rh->target_begin = target_begin; | ||
187 | rh->max_recovery = max_recovery; | ||
188 | rh->log = log; | ||
189 | rh->region_size = region_size; | ||
190 | rh->region_shift = ffs(region_size) - 1; | ||
191 | rwlock_init(&rh->hash_lock); | ||
192 | rh->mask = nr_buckets - 1; | ||
193 | rh->nr_buckets = nr_buckets; | ||
194 | |||
195 | rh->shift = RH_HASH_SHIFT; | ||
196 | rh->prime = RH_HASH_MULT; | ||
197 | |||
198 | rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); | ||
199 | if (!rh->buckets) { | ||
200 | DMERR("unable to allocate region hash bucket memory"); | ||
201 | kfree(rh); | ||
202 | return ERR_PTR(-ENOMEM); | ||
203 | } | ||
204 | |||
205 | for (i = 0; i < nr_buckets; i++) | ||
206 | INIT_LIST_HEAD(rh->buckets + i); | ||
207 | |||
208 | spin_lock_init(&rh->region_lock); | ||
209 | sema_init(&rh->recovery_count, 0); | ||
210 | atomic_set(&rh->recovery_in_flight, 0); | ||
211 | INIT_LIST_HEAD(&rh->clean_regions); | ||
212 | INIT_LIST_HEAD(&rh->quiesced_regions); | ||
213 | INIT_LIST_HEAD(&rh->recovered_regions); | ||
214 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | ||
215 | |||
216 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | ||
217 | sizeof(struct dm_region)); | ||
218 | if (!rh->region_pool) { | ||
219 | vfree(rh->buckets); | ||
220 | kfree(rh); | ||
221 | rh = ERR_PTR(-ENOMEM); | ||
222 | } | ||
223 | |||
224 | return rh; | ||
225 | } | ||
226 | EXPORT_SYMBOL_GPL(dm_region_hash_create); | ||
227 | |||
228 | void dm_region_hash_destroy(struct dm_region_hash *rh) | ||
229 | { | ||
230 | unsigned h; | ||
231 | struct dm_region *reg, *nreg; | ||
232 | |||
233 | BUG_ON(!list_empty(&rh->quiesced_regions)); | ||
234 | for (h = 0; h < rh->nr_buckets; h++) { | ||
235 | list_for_each_entry_safe(reg, nreg, rh->buckets + h, | ||
236 | hash_list) { | ||
237 | BUG_ON(atomic_read(®->pending)); | ||
238 | mempool_free(reg, rh->region_pool); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | if (rh->log) | ||
243 | dm_dirty_log_destroy(rh->log); | ||
244 | |||
245 | if (rh->region_pool) | ||
246 | mempool_destroy(rh->region_pool); | ||
247 | |||
248 | vfree(rh->buckets); | ||
249 | kfree(rh); | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(dm_region_hash_destroy); | ||
252 | |||
253 | struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh) | ||
254 | { | ||
255 | return rh->log; | ||
256 | } | ||
257 | EXPORT_SYMBOL_GPL(dm_rh_dirty_log); | ||
258 | |||
259 | static unsigned rh_hash(struct dm_region_hash *rh, region_t region) | ||
260 | { | ||
261 | return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask; | ||
262 | } | ||
263 | |||
264 | static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region) | ||
265 | { | ||
266 | struct dm_region *reg; | ||
267 | struct list_head *bucket = rh->buckets + rh_hash(rh, region); | ||
268 | |||
269 | list_for_each_entry(reg, bucket, hash_list) | ||
270 | if (reg->key == region) | ||
271 | return reg; | ||
272 | |||
273 | return NULL; | ||
274 | } | ||
275 | |||
276 | static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg) | ||
277 | { | ||
278 | list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key)); | ||
279 | } | ||
280 | |||
281 | static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) | ||
282 | { | ||
283 | struct dm_region *reg, *nreg; | ||
284 | |||
285 | nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); | ||
286 | if (unlikely(!nreg)) | ||
287 | nreg = kmalloc(sizeof(*nreg), GFP_NOIO); | ||
288 | |||
289 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? | ||
290 | DM_RH_CLEAN : DM_RH_NOSYNC; | ||
291 | nreg->rh = rh; | ||
292 | nreg->key = region; | ||
293 | INIT_LIST_HEAD(&nreg->list); | ||
294 | atomic_set(&nreg->pending, 0); | ||
295 | bio_list_init(&nreg->delayed_bios); | ||
296 | |||
297 | write_lock_irq(&rh->hash_lock); | ||
298 | reg = __rh_lookup(rh, region); | ||
299 | if (reg) | ||
300 | /* We lost the race. */ | ||
301 | mempool_free(nreg, rh->region_pool); | ||
302 | else { | ||
303 | __rh_insert(rh, nreg); | ||
304 | if (nreg->state == DM_RH_CLEAN) { | ||
305 | spin_lock(&rh->region_lock); | ||
306 | list_add(&nreg->list, &rh->clean_regions); | ||
307 | spin_unlock(&rh->region_lock); | ||
308 | } | ||
309 | |||
310 | reg = nreg; | ||
311 | } | ||
312 | write_unlock_irq(&rh->hash_lock); | ||
313 | |||
314 | return reg; | ||
315 | } | ||
316 | |||
317 | static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region) | ||
318 | { | ||
319 | struct dm_region *reg; | ||
320 | |||
321 | reg = __rh_lookup(rh, region); | ||
322 | if (!reg) { | ||
323 | read_unlock(&rh->hash_lock); | ||
324 | reg = __rh_alloc(rh, region); | ||
325 | read_lock(&rh->hash_lock); | ||
326 | } | ||
327 | |||
328 | return reg; | ||
329 | } | ||
330 | |||
331 | int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block) | ||
332 | { | ||
333 | int r; | ||
334 | struct dm_region *reg; | ||
335 | |||
336 | read_lock(&rh->hash_lock); | ||
337 | reg = __rh_lookup(rh, region); | ||
338 | read_unlock(&rh->hash_lock); | ||
339 | |||
340 | if (reg) | ||
341 | return reg->state; | ||
342 | |||
343 | /* | ||
344 | * The region wasn't in the hash, so we fall back to the | ||
345 | * dirty log. | ||
346 | */ | ||
347 | r = rh->log->type->in_sync(rh->log, region, may_block); | ||
348 | |||
349 | /* | ||
350 | * Any error from the dirty log (eg. -EWOULDBLOCK) gets | ||
351 | * taken as a DM_RH_NOSYNC | ||
352 | */ | ||
353 | return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC; | ||
354 | } | ||
355 | EXPORT_SYMBOL_GPL(dm_rh_get_state); | ||
356 | |||
357 | static void complete_resync_work(struct dm_region *reg, int success) | ||
358 | { | ||
359 | struct dm_region_hash *rh = reg->rh; | ||
360 | |||
361 | rh->log->type->set_region_sync(rh->log, reg->key, success); | ||
362 | |||
363 | /* | ||
364 | * Dispatch the bios before we call 'wake_up_all'. | ||
365 | * This is important because if we are suspending, | ||
366 | * we want to know that recovery is complete and | ||
367 | * the work queue is flushed. If we wake_up_all | ||
368 | * before we dispatch_bios (queue bios and call wake()), | ||
369 | * then we risk suspending before the work queue | ||
370 | * has been properly flushed. | ||
371 | */ | ||
372 | rh->dispatch_bios(rh->context, ®->delayed_bios); | ||
373 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | ||
374 | rh->wakeup_all_recovery_waiters(rh->context); | ||
375 | up(&rh->recovery_count); | ||
376 | } | ||
377 | |||
378 | /* dm_rh_mark_nosync | ||
379 | * @ms | ||
380 | * @bio | ||
381 | * @done | ||
382 | * @error | ||
383 | * | ||
384 | * The bio was written on some mirror(s) but failed on other mirror(s). | ||
385 | * We can successfully endio the bio but should avoid the region being | ||
386 | * marked clean by setting the state DM_RH_NOSYNC. | ||
387 | * | ||
388 | * This function is _not_ safe in interrupt context! | ||
389 | */ | ||
390 | void dm_rh_mark_nosync(struct dm_region_hash *rh, | ||
391 | struct bio *bio, unsigned done, int error) | ||
392 | { | ||
393 | unsigned long flags; | ||
394 | struct dm_dirty_log *log = rh->log; | ||
395 | struct dm_region *reg; | ||
396 | region_t region = dm_rh_bio_to_region(rh, bio); | ||
397 | int recovering = 0; | ||
398 | |||
399 | /* We must inform the log that the sync count has changed. */ | ||
400 | log->type->set_region_sync(log, region, 0); | ||
401 | |||
402 | read_lock(&rh->hash_lock); | ||
403 | reg = __rh_find(rh, region); | ||
404 | read_unlock(&rh->hash_lock); | ||
405 | |||
406 | /* region hash entry should exist because write was in-flight */ | ||
407 | BUG_ON(!reg); | ||
408 | BUG_ON(!list_empty(®->list)); | ||
409 | |||
410 | spin_lock_irqsave(&rh->region_lock, flags); | ||
411 | /* | ||
412 | * Possible cases: | ||
413 | * 1) DM_RH_DIRTY | ||
414 | * 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed | ||
415 | * 3) DM_RH_RECOVERING: flushing pending writes | ||
416 | * Either case, the region should have not been connected to list. | ||
417 | */ | ||
418 | recovering = (reg->state == DM_RH_RECOVERING); | ||
419 | reg->state = DM_RH_NOSYNC; | ||
420 | BUG_ON(!list_empty(®->list)); | ||
421 | spin_unlock_irqrestore(&rh->region_lock, flags); | ||
422 | |||
423 | bio_endio(bio, error); | ||
424 | if (recovering) | ||
425 | complete_resync_work(reg, 0); | ||
426 | } | ||
427 | EXPORT_SYMBOL_GPL(dm_rh_mark_nosync); | ||
428 | |||
429 | void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled) | ||
430 | { | ||
431 | struct dm_region *reg, *next; | ||
432 | |||
433 | LIST_HEAD(clean); | ||
434 | LIST_HEAD(recovered); | ||
435 | LIST_HEAD(failed_recovered); | ||
436 | |||
437 | /* | ||
438 | * Quickly grab the lists. | ||
439 | */ | ||
440 | write_lock_irq(&rh->hash_lock); | ||
441 | spin_lock(&rh->region_lock); | ||
442 | if (!list_empty(&rh->clean_regions)) { | ||
443 | list_splice_init(&rh->clean_regions, &clean); | ||
444 | |||
445 | list_for_each_entry(reg, &clean, list) | ||
446 | list_del(®->hash_list); | ||
447 | } | ||
448 | |||
449 | if (!list_empty(&rh->recovered_regions)) { | ||
450 | list_splice_init(&rh->recovered_regions, &recovered); | ||
451 | |||
452 | list_for_each_entry(reg, &recovered, list) | ||
453 | list_del(®->hash_list); | ||
454 | } | ||
455 | |||
456 | if (!list_empty(&rh->failed_recovered_regions)) { | ||
457 | list_splice_init(&rh->failed_recovered_regions, | ||
458 | &failed_recovered); | ||
459 | |||
460 | list_for_each_entry(reg, &failed_recovered, list) | ||
461 | list_del(®->hash_list); | ||
462 | } | ||
463 | |||
464 | spin_unlock(&rh->region_lock); | ||
465 | write_unlock_irq(&rh->hash_lock); | ||
466 | |||
467 | /* | ||
468 | * All the regions on the recovered and clean lists have | ||
469 | * now been pulled out of the system, so no need to do | ||
470 | * any more locking. | ||
471 | */ | ||
472 | list_for_each_entry_safe(reg, next, &recovered, list) { | ||
473 | rh->log->type->clear_region(rh->log, reg->key); | ||
474 | complete_resync_work(reg, 1); | ||
475 | mempool_free(reg, rh->region_pool); | ||
476 | } | ||
477 | |||
478 | list_for_each_entry_safe(reg, next, &failed_recovered, list) { | ||
479 | complete_resync_work(reg, errors_handled ? 0 : 1); | ||
480 | mempool_free(reg, rh->region_pool); | ||
481 | } | ||
482 | |||
483 | list_for_each_entry_safe(reg, next, &clean, list) { | ||
484 | rh->log->type->clear_region(rh->log, reg->key); | ||
485 | mempool_free(reg, rh->region_pool); | ||
486 | } | ||
487 | |||
488 | rh->log->type->flush(rh->log); | ||
489 | } | ||
490 | EXPORT_SYMBOL_GPL(dm_rh_update_states); | ||
491 | |||
492 | static void rh_inc(struct dm_region_hash *rh, region_t region) | ||
493 | { | ||
494 | struct dm_region *reg; | ||
495 | |||
496 | read_lock(&rh->hash_lock); | ||
497 | reg = __rh_find(rh, region); | ||
498 | |||
499 | spin_lock_irq(&rh->region_lock); | ||
500 | atomic_inc(®->pending); | ||
501 | |||
502 | if (reg->state == DM_RH_CLEAN) { | ||
503 | reg->state = DM_RH_DIRTY; | ||
504 | list_del_init(®->list); /* take off the clean list */ | ||
505 | spin_unlock_irq(&rh->region_lock); | ||
506 | |||
507 | rh->log->type->mark_region(rh->log, reg->key); | ||
508 | } else | ||
509 | spin_unlock_irq(&rh->region_lock); | ||
510 | |||
511 | |||
512 | read_unlock(&rh->hash_lock); | ||
513 | } | ||
514 | |||
515 | void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | ||
516 | { | ||
517 | struct bio *bio; | ||
518 | |||
519 | for (bio = bios->head; bio; bio = bio->bi_next) | ||
520 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | ||
521 | } | ||
522 | EXPORT_SYMBOL_GPL(dm_rh_inc_pending); | ||
523 | |||
524 | void dm_rh_dec(struct dm_region_hash *rh, region_t region) | ||
525 | { | ||
526 | unsigned long flags; | ||
527 | struct dm_region *reg; | ||
528 | int should_wake = 0; | ||
529 | |||
530 | read_lock(&rh->hash_lock); | ||
531 | reg = __rh_lookup(rh, region); | ||
532 | read_unlock(&rh->hash_lock); | ||
533 | |||
534 | spin_lock_irqsave(&rh->region_lock, flags); | ||
535 | if (atomic_dec_and_test(®->pending)) { | ||
536 | /* | ||
537 | * There is no pending I/O for this region. | ||
538 | * We can move the region to corresponding list for next action. | ||
539 | * At this point, the region is not yet connected to any list. | ||
540 | * | ||
541 | * If the state is DM_RH_NOSYNC, the region should be kept off | ||
542 | * from clean list. | ||
543 | * The hash entry for DM_RH_NOSYNC will remain in memory | ||
544 | * until the region is recovered or the map is reloaded. | ||
545 | */ | ||
546 | |||
547 | /* do nothing for DM_RH_NOSYNC */ | ||
548 | if (reg->state == DM_RH_RECOVERING) { | ||
549 | list_add_tail(®->list, &rh->quiesced_regions); | ||
550 | } else if (reg->state == DM_RH_DIRTY) { | ||
551 | reg->state = DM_RH_CLEAN; | ||
552 | list_add(®->list, &rh->clean_regions); | ||
553 | } | ||
554 | should_wake = 1; | ||
555 | } | ||
556 | spin_unlock_irqrestore(&rh->region_lock, flags); | ||
557 | |||
558 | if (should_wake) | ||
559 | rh->wakeup_workers(rh->context); | ||
560 | } | ||
561 | EXPORT_SYMBOL_GPL(dm_rh_dec); | ||
562 | |||
563 | /* | ||
564 | * Starts quiescing a region in preparation for recovery. | ||
565 | */ | ||
566 | static int __rh_recovery_prepare(struct dm_region_hash *rh) | ||
567 | { | ||
568 | int r; | ||
569 | region_t region; | ||
570 | struct dm_region *reg; | ||
571 | |||
572 | /* | ||
573 | * Ask the dirty log what's next. | ||
574 | */ | ||
575 | r = rh->log->type->get_resync_work(rh->log, ®ion); | ||
576 | if (r <= 0) | ||
577 | return r; | ||
578 | |||
579 | /* | ||
580 | * Get this region, and start it quiescing by setting the | ||
581 | * recovering flag. | ||
582 | */ | ||
583 | read_lock(&rh->hash_lock); | ||
584 | reg = __rh_find(rh, region); | ||
585 | read_unlock(&rh->hash_lock); | ||
586 | |||
587 | spin_lock_irq(&rh->region_lock); | ||
588 | reg->state = DM_RH_RECOVERING; | ||
589 | |||
590 | /* Already quiesced ? */ | ||
591 | if (atomic_read(®->pending)) | ||
592 | list_del_init(®->list); | ||
593 | else | ||
594 | list_move(®->list, &rh->quiesced_regions); | ||
595 | |||
596 | spin_unlock_irq(&rh->region_lock); | ||
597 | |||
598 | return 1; | ||
599 | } | ||
600 | |||
601 | void dm_rh_recovery_prepare(struct dm_region_hash *rh) | ||
602 | { | ||
603 | /* Extra reference to avoid race with dm_rh_stop_recovery */ | ||
604 | atomic_inc(&rh->recovery_in_flight); | ||
605 | |||
606 | while (!down_trylock(&rh->recovery_count)) { | ||
607 | atomic_inc(&rh->recovery_in_flight); | ||
608 | if (__rh_recovery_prepare(rh) <= 0) { | ||
609 | atomic_dec(&rh->recovery_in_flight); | ||
610 | up(&rh->recovery_count); | ||
611 | break; | ||
612 | } | ||
613 | } | ||
614 | |||
615 | /* Drop the extra reference */ | ||
616 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | ||
617 | rh->wakeup_all_recovery_waiters(rh->context); | ||
618 | } | ||
619 | EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare); | ||
620 | |||
621 | /* | ||
622 | * Returns any quiesced regions. | ||
623 | */ | ||
624 | struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh) | ||
625 | { | ||
626 | struct dm_region *reg = NULL; | ||
627 | |||
628 | spin_lock_irq(&rh->region_lock); | ||
629 | if (!list_empty(&rh->quiesced_regions)) { | ||
630 | reg = list_entry(rh->quiesced_regions.next, | ||
631 | struct dm_region, list); | ||
632 | list_del_init(®->list); /* remove from the quiesced list */ | ||
633 | } | ||
634 | spin_unlock_irq(&rh->region_lock); | ||
635 | |||
636 | return reg; | ||
637 | } | ||
638 | EXPORT_SYMBOL_GPL(dm_rh_recovery_start); | ||
639 | |||
640 | void dm_rh_recovery_end(struct dm_region *reg, int success) | ||
641 | { | ||
642 | struct dm_region_hash *rh = reg->rh; | ||
643 | |||
644 | spin_lock_irq(&rh->region_lock); | ||
645 | if (success) | ||
646 | list_add(®->list, ®->rh->recovered_regions); | ||
647 | else { | ||
648 | reg->state = DM_RH_NOSYNC; | ||
649 | list_add(®->list, ®->rh->failed_recovered_regions); | ||
650 | } | ||
651 | spin_unlock_irq(&rh->region_lock); | ||
652 | |||
653 | rh->wakeup_workers(rh->context); | ||
654 | } | ||
655 | EXPORT_SYMBOL_GPL(dm_rh_recovery_end); | ||
656 | |||
657 | /* Return recovery in flight count. */ | ||
658 | int dm_rh_recovery_in_flight(struct dm_region_hash *rh) | ||
659 | { | ||
660 | return atomic_read(&rh->recovery_in_flight); | ||
661 | } | ||
662 | EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight); | ||
663 | |||
664 | int dm_rh_flush(struct dm_region_hash *rh) | ||
665 | { | ||
666 | return rh->log->type->flush(rh->log); | ||
667 | } | ||
668 | EXPORT_SYMBOL_GPL(dm_rh_flush); | ||
669 | |||
670 | void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio) | ||
671 | { | ||
672 | struct dm_region *reg; | ||
673 | |||
674 | read_lock(&rh->hash_lock); | ||
675 | reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio)); | ||
676 | bio_list_add(®->delayed_bios, bio); | ||
677 | read_unlock(&rh->hash_lock); | ||
678 | } | ||
679 | EXPORT_SYMBOL_GPL(dm_rh_delay); | ||
680 | |||
681 | void dm_rh_stop_recovery(struct dm_region_hash *rh) | ||
682 | { | ||
683 | int i; | ||
684 | |||
685 | /* wait for any recovering regions */ | ||
686 | for (i = 0; i < rh->max_recovery; i++) | ||
687 | down(&rh->recovery_count); | ||
688 | } | ||
689 | EXPORT_SYMBOL_GPL(dm_rh_stop_recovery); | ||
690 | |||
691 | void dm_rh_start_recovery(struct dm_region_hash *rh) | ||
692 | { | ||
693 | int i; | ||
694 | |||
695 | for (i = 0; i < rh->max_recovery; i++) | ||
696 | up(&rh->recovery_count); | ||
697 | |||
698 | rh->wakeup_workers(rh->context); | ||
699 | } | ||
700 | EXPORT_SYMBOL_GPL(dm_rh_start_recovery); | ||
701 | |||
702 | MODULE_DESCRIPTION(DM_NAME " region hash"); | ||
703 | MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>"); | ||
704 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 391dfa2ad434..cdfbf65b28cb 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c | |||
@@ -9,7 +9,8 @@ | |||
9 | * Round-robin path selector. | 9 | * Round-robin path selector. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include "dm.h" | 12 | #include <linux/device-mapper.h> |
13 | |||
13 | #include "dm-path-selector.h" | 14 | #include "dm-path-selector.h" |
14 | 15 | ||
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6e5528aecc98..b2d9d1ac28ad 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -600,7 +600,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
600 | 600 | ||
601 | s->valid = 1; | 601 | s->valid = 1; |
602 | s->active = 0; | 602 | s->active = 0; |
603 | s->last_percent = 0; | ||
604 | init_rwsem(&s->lock); | 603 | init_rwsem(&s->lock); |
605 | spin_lock_init(&s->pe_lock); | 604 | spin_lock_init(&s->pe_lock); |
606 | s->ti = ti; | 605 | s->ti = ti; |
@@ -824,8 +823,10 @@ static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) | |||
824 | * the bios for the original write to the origin. | 823 | * the bios for the original write to the origin. |
825 | */ | 824 | */ |
826 | if (primary_pe && | 825 | if (primary_pe && |
827 | atomic_dec_and_test(&primary_pe->ref_count)) | 826 | atomic_dec_and_test(&primary_pe->ref_count)) { |
828 | origin_bios = bio_list_get(&primary_pe->origin_bios); | 827 | origin_bios = bio_list_get(&primary_pe->origin_bios); |
828 | free_pending_exception(primary_pe); | ||
829 | } | ||
829 | 830 | ||
830 | /* | 831 | /* |
831 | * Free the pe if it's not linked to an origin write or if | 832 | * Free the pe if it's not linked to an origin write or if |
@@ -834,12 +835,6 @@ static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) | |||
834 | if (!primary_pe || primary_pe != pe) | 835 | if (!primary_pe || primary_pe != pe) |
835 | free_pending_exception(pe); | 836 | free_pending_exception(pe); |
836 | 837 | ||
837 | /* | ||
838 | * Free the primary pe if nothing references it. | ||
839 | */ | ||
840 | if (primary_pe && !atomic_read(&primary_pe->ref_count)) | ||
841 | free_pending_exception(primary_pe); | ||
842 | |||
843 | return origin_bios; | 838 | return origin_bios; |
844 | } | 839 | } |
845 | 840 | ||
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 292c15609ae3..f07315fe2362 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h | |||
@@ -9,7 +9,7 @@ | |||
9 | #ifndef DM_SNAPSHOT_H | 9 | #ifndef DM_SNAPSHOT_H |
10 | #define DM_SNAPSHOT_H | 10 | #define DM_SNAPSHOT_H |
11 | 11 | ||
12 | #include "dm.h" | 12 | #include <linux/device-mapper.h> |
13 | #include "dm-bio-list.h" | 13 | #include "dm-bio-list.h" |
14 | #include <linux/blkdev.h> | 14 | #include <linux/blkdev.h> |
15 | #include <linux/workqueue.h> | 15 | #include <linux/workqueue.h> |
@@ -158,9 +158,6 @@ struct dm_snapshot { | |||
158 | /* Used for display of table */ | 158 | /* Used for display of table */ |
159 | char type; | 159 | char type; |
160 | 160 | ||
161 | /* The last percentage we notified */ | ||
162 | int last_percent; | ||
163 | |||
164 | mempool_t *pending_pool; | 161 | mempool_t *pending_pool; |
165 | 162 | ||
166 | struct exception_table pending; | 163 | struct exception_table pending; |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index b745d8ac625b..a2d068dbe9e2 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm.h" | 7 | #include <linux/device-mapper.h> |
8 | 8 | ||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -60,8 +60,8 @@ static inline struct stripe_c *alloc_context(unsigned int stripes) | |||
60 | { | 60 | { |
61 | size_t len; | 61 | size_t len; |
62 | 62 | ||
63 | if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), | 63 | if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), |
64 | stripes)) | 64 | stripes)) |
65 | return NULL; | 65 | return NULL; |
66 | 66 | ||
67 | len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); | 67 | len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); |
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index bdec206c404b..cdbf126ec106 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm.h" | 7 | #include <linux/device-mapper.h> |
8 | 8 | ||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 327de03a5bdf..d1d0cd0f5750 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -76,7 +76,6 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
76 | */ | 76 | */ |
77 | struct dm_wq_req { | 77 | struct dm_wq_req { |
78 | enum { | 78 | enum { |
79 | DM_WQ_FLUSH_ALL, | ||
80 | DM_WQ_FLUSH_DEFERRED, | 79 | DM_WQ_FLUSH_DEFERRED, |
81 | } type; | 80 | } type; |
82 | struct work_struct work; | 81 | struct work_struct work; |
@@ -151,40 +150,40 @@ static struct kmem_cache *_tio_cache; | |||
151 | 150 | ||
152 | static int __init local_init(void) | 151 | static int __init local_init(void) |
153 | { | 152 | { |
154 | int r; | 153 | int r = -ENOMEM; |
155 | 154 | ||
156 | /* allocate a slab for the dm_ios */ | 155 | /* allocate a slab for the dm_ios */ |
157 | _io_cache = KMEM_CACHE(dm_io, 0); | 156 | _io_cache = KMEM_CACHE(dm_io, 0); |
158 | if (!_io_cache) | 157 | if (!_io_cache) |
159 | return -ENOMEM; | 158 | return r; |
160 | 159 | ||
161 | /* allocate a slab for the target ios */ | 160 | /* allocate a slab for the target ios */ |
162 | _tio_cache = KMEM_CACHE(dm_target_io, 0); | 161 | _tio_cache = KMEM_CACHE(dm_target_io, 0); |
163 | if (!_tio_cache) { | 162 | if (!_tio_cache) |
164 | kmem_cache_destroy(_io_cache); | 163 | goto out_free_io_cache; |
165 | return -ENOMEM; | ||
166 | } | ||
167 | 164 | ||
168 | r = dm_uevent_init(); | 165 | r = dm_uevent_init(); |
169 | if (r) { | 166 | if (r) |
170 | kmem_cache_destroy(_tio_cache); | 167 | goto out_free_tio_cache; |
171 | kmem_cache_destroy(_io_cache); | ||
172 | return r; | ||
173 | } | ||
174 | 168 | ||
175 | _major = major; | 169 | _major = major; |
176 | r = register_blkdev(_major, _name); | 170 | r = register_blkdev(_major, _name); |
177 | if (r < 0) { | 171 | if (r < 0) |
178 | kmem_cache_destroy(_tio_cache); | 172 | goto out_uevent_exit; |
179 | kmem_cache_destroy(_io_cache); | ||
180 | dm_uevent_exit(); | ||
181 | return r; | ||
182 | } | ||
183 | 173 | ||
184 | if (!_major) | 174 | if (!_major) |
185 | _major = r; | 175 | _major = r; |
186 | 176 | ||
187 | return 0; | 177 | return 0; |
178 | |||
179 | out_uevent_exit: | ||
180 | dm_uevent_exit(); | ||
181 | out_free_tio_cache: | ||
182 | kmem_cache_destroy(_tio_cache); | ||
183 | out_free_io_cache: | ||
184 | kmem_cache_destroy(_io_cache); | ||
185 | |||
186 | return r; | ||
188 | } | 187 | } |
189 | 188 | ||
190 | static void local_exit(void) | 189 | static void local_exit(void) |
@@ -669,6 +668,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
669 | clone->bi_size = to_bytes(len); | 668 | clone->bi_size = to_bytes(len); |
670 | clone->bi_io_vec->bv_offset = offset; | 669 | clone->bi_io_vec->bv_offset = offset; |
671 | clone->bi_io_vec->bv_len = clone->bi_size; | 670 | clone->bi_io_vec->bv_len = clone->bi_size; |
671 | clone->bi_flags |= 1 << BIO_CLONED; | ||
672 | 672 | ||
673 | return clone; | 673 | return clone; |
674 | } | 674 | } |
@@ -1394,9 +1394,6 @@ static void dm_wq_work(struct work_struct *work) | |||
1394 | 1394 | ||
1395 | down_write(&md->io_lock); | 1395 | down_write(&md->io_lock); |
1396 | switch (req->type) { | 1396 | switch (req->type) { |
1397 | case DM_WQ_FLUSH_ALL: | ||
1398 | __merge_pushback_list(md); | ||
1399 | /* pass through */ | ||
1400 | case DM_WQ_FLUSH_DEFERRED: | 1397 | case DM_WQ_FLUSH_DEFERRED: |
1401 | __flush_deferred_io(md); | 1398 | __flush_deferred_io(md); |
1402 | break; | 1399 | break; |
@@ -1526,7 +1523,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1526 | if (!md->suspended_bdev) { | 1523 | if (!md->suspended_bdev) { |
1527 | DMWARN("bdget failed in dm_suspend"); | 1524 | DMWARN("bdget failed in dm_suspend"); |
1528 | r = -ENOMEM; | 1525 | r = -ENOMEM; |
1529 | goto flush_and_out; | 1526 | goto out; |
1530 | } | 1527 | } |
1531 | 1528 | ||
1532 | /* | 1529 | /* |
@@ -1577,14 +1574,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1577 | 1574 | ||
1578 | set_bit(DMF_SUSPENDED, &md->flags); | 1575 | set_bit(DMF_SUSPENDED, &md->flags); |
1579 | 1576 | ||
1580 | flush_and_out: | ||
1581 | if (r && noflush) | ||
1582 | /* | ||
1583 | * Because there may be already I/Os in the pushback list, | ||
1584 | * flush them before return. | ||
1585 | */ | ||
1586 | dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL); | ||
1587 | |||
1588 | out: | 1577 | out: |
1589 | if (r && md->suspended_bdev) { | 1578 | if (r && md->suspended_bdev) { |
1590 | bdput(md->suspended_bdev); | 1579 | bdput(md->suspended_bdev); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index cd189da2b2fa..0ade60cdef42 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -62,15 +62,6 @@ void dm_put_target_type(struct target_type *t); | |||
62 | int dm_target_iterate(void (*iter_func)(struct target_type *tt, | 62 | int dm_target_iterate(void (*iter_func)(struct target_type *tt, |
63 | void *param), void *param); | 63 | void *param), void *param); |
64 | 64 | ||
65 | /*----------------------------------------------------------------- | ||
66 | * Useful inlines. | ||
67 | *---------------------------------------------------------------*/ | ||
68 | static inline int array_too_big(unsigned long fixed, unsigned long obj, | ||
69 | unsigned long num) | ||
70 | { | ||
71 | return (num > (ULONG_MAX - fixed) / obj); | ||
72 | } | ||
73 | |||
74 | int dm_split_args(int *argc, char ***argvp, char *input); | 65 | int dm_split_args(int *argc, char ***argvp, char *input); |
75 | 66 | ||
76 | /* | 67 | /* |