diff options
Diffstat (limited to 'drivers/md')
37 files changed, 1721 insertions, 1529 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1de441a6c55f..d949b781f6f8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -260,15 +260,6 @@ config DM_DEBUG_BLOCK_STACK_TRACING | |||
260 | 260 | ||
261 | If unsure, say N. | 261 | If unsure, say N. |
262 | 262 | ||
263 | config DM_DEBUG_SPACE_MAPS | ||
264 | boolean "Extra validation for thin provisioning space maps" | ||
265 | depends on DM_THIN_PROVISIONING | ||
266 | ---help--- | ||
267 | Enable this for messages that may help debug problems with the | ||
268 | space maps used by thin provisioning. | ||
269 | |||
270 | If unsure, say N. | ||
271 | |||
272 | config DM_MIRROR | 263 | config DM_MIRROR |
273 | tristate "Mirror target" | 264 | tristate "Mirror target" |
274 | depends on BLK_DEV_DM | 265 | depends on BLK_DEV_DM |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3f06df59fd82..664743d6a6cd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -42,21 +42,21 @@ struct convert_context { | |||
42 | unsigned int offset_out; | 42 | unsigned int offset_out; |
43 | unsigned int idx_in; | 43 | unsigned int idx_in; |
44 | unsigned int idx_out; | 44 | unsigned int idx_out; |
45 | sector_t sector; | 45 | sector_t cc_sector; |
46 | atomic_t pending; | 46 | atomic_t cc_pending; |
47 | }; | 47 | }; |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * per bio private data | 50 | * per bio private data |
51 | */ | 51 | */ |
52 | struct dm_crypt_io { | 52 | struct dm_crypt_io { |
53 | struct dm_target *target; | 53 | struct crypt_config *cc; |
54 | struct bio *base_bio; | 54 | struct bio *base_bio; |
55 | struct work_struct work; | 55 | struct work_struct work; |
56 | 56 | ||
57 | struct convert_context ctx; | 57 | struct convert_context ctx; |
58 | 58 | ||
59 | atomic_t pending; | 59 | atomic_t io_pending; |
60 | int error; | 60 | int error; |
61 | sector_t sector; | 61 | sector_t sector; |
62 | struct dm_crypt_io *base_io; | 62 | struct dm_crypt_io *base_io; |
@@ -109,9 +109,6 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; | |||
109 | */ | 109 | */ |
110 | struct crypt_cpu { | 110 | struct crypt_cpu { |
111 | struct ablkcipher_request *req; | 111 | struct ablkcipher_request *req; |
112 | /* ESSIV: struct crypto_cipher *essiv_tfm */ | ||
113 | void *iv_private; | ||
114 | struct crypto_ablkcipher *tfms[0]; | ||
115 | }; | 112 | }; |
116 | 113 | ||
117 | /* | 114 | /* |
@@ -151,6 +148,10 @@ struct crypt_config { | |||
151 | * per_cpu_ptr() only. | 148 | * per_cpu_ptr() only. |
152 | */ | 149 | */ |
153 | struct crypt_cpu __percpu *cpu; | 150 | struct crypt_cpu __percpu *cpu; |
151 | |||
152 | /* ESSIV: struct crypto_cipher *essiv_tfm */ | ||
153 | void *iv_private; | ||
154 | struct crypto_ablkcipher **tfms; | ||
154 | unsigned tfms_count; | 155 | unsigned tfms_count; |
155 | 156 | ||
156 | /* | 157 | /* |
@@ -193,7 +194,7 @@ static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) | |||
193 | */ | 194 | */ |
194 | static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) | 195 | static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) |
195 | { | 196 | { |
196 | return __this_cpu_ptr(cc->cpu)->tfms[0]; | 197 | return cc->tfms[0]; |
197 | } | 198 | } |
198 | 199 | ||
199 | /* | 200 | /* |
@@ -258,7 +259,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) | |||
258 | struct hash_desc desc; | 259 | struct hash_desc desc; |
259 | struct scatterlist sg; | 260 | struct scatterlist sg; |
260 | struct crypto_cipher *essiv_tfm; | 261 | struct crypto_cipher *essiv_tfm; |
261 | int err, cpu; | 262 | int err; |
262 | 263 | ||
263 | sg_init_one(&sg, cc->key, cc->key_size); | 264 | sg_init_one(&sg, cc->key, cc->key_size); |
264 | desc.tfm = essiv->hash_tfm; | 265 | desc.tfm = essiv->hash_tfm; |
@@ -268,14 +269,12 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) | |||
268 | if (err) | 269 | if (err) |
269 | return err; | 270 | return err; |
270 | 271 | ||
271 | for_each_possible_cpu(cpu) { | 272 | essiv_tfm = cc->iv_private; |
272 | essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private, | ||
273 | 273 | ||
274 | err = crypto_cipher_setkey(essiv_tfm, essiv->salt, | 274 | err = crypto_cipher_setkey(essiv_tfm, essiv->salt, |
275 | crypto_hash_digestsize(essiv->hash_tfm)); | 275 | crypto_hash_digestsize(essiv->hash_tfm)); |
276 | if (err) | 276 | if (err) |
277 | return err; | 277 | return err; |
278 | } | ||
279 | 278 | ||
280 | return 0; | 279 | return 0; |
281 | } | 280 | } |
@@ -286,16 +285,14 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc) | |||
286 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; | 285 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; |
287 | unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); | 286 | unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); |
288 | struct crypto_cipher *essiv_tfm; | 287 | struct crypto_cipher *essiv_tfm; |
289 | int cpu, r, err = 0; | 288 | int r, err = 0; |
290 | 289 | ||
291 | memset(essiv->salt, 0, salt_size); | 290 | memset(essiv->salt, 0, salt_size); |
292 | 291 | ||
293 | for_each_possible_cpu(cpu) { | 292 | essiv_tfm = cc->iv_private; |
294 | essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private; | 293 | r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); |
295 | r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); | 294 | if (r) |
296 | if (r) | 295 | err = r; |
297 | err = r; | ||
298 | } | ||
299 | 296 | ||
300 | return err; | 297 | return err; |
301 | } | 298 | } |
@@ -335,8 +332,6 @@ static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, | |||
335 | 332 | ||
336 | static void crypt_iv_essiv_dtr(struct crypt_config *cc) | 333 | static void crypt_iv_essiv_dtr(struct crypt_config *cc) |
337 | { | 334 | { |
338 | int cpu; | ||
339 | struct crypt_cpu *cpu_cc; | ||
340 | struct crypto_cipher *essiv_tfm; | 335 | struct crypto_cipher *essiv_tfm; |
341 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; | 336 | struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; |
342 | 337 | ||
@@ -346,15 +341,12 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) | |||
346 | kzfree(essiv->salt); | 341 | kzfree(essiv->salt); |
347 | essiv->salt = NULL; | 342 | essiv->salt = NULL; |
348 | 343 | ||
349 | for_each_possible_cpu(cpu) { | 344 | essiv_tfm = cc->iv_private; |
350 | cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
351 | essiv_tfm = cpu_cc->iv_private; | ||
352 | 345 | ||
353 | if (essiv_tfm) | 346 | if (essiv_tfm) |
354 | crypto_free_cipher(essiv_tfm); | 347 | crypto_free_cipher(essiv_tfm); |
355 | 348 | ||
356 | cpu_cc->iv_private = NULL; | 349 | cc->iv_private = NULL; |
357 | } | ||
358 | } | 350 | } |
359 | 351 | ||
360 | static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, | 352 | static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, |
@@ -363,7 +355,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, | |||
363 | struct crypto_cipher *essiv_tfm = NULL; | 355 | struct crypto_cipher *essiv_tfm = NULL; |
364 | struct crypto_hash *hash_tfm = NULL; | 356 | struct crypto_hash *hash_tfm = NULL; |
365 | u8 *salt = NULL; | 357 | u8 *salt = NULL; |
366 | int err, cpu; | 358 | int err; |
367 | 359 | ||
368 | if (!opts) { | 360 | if (!opts) { |
369 | ti->error = "Digest algorithm missing for ESSIV mode"; | 361 | ti->error = "Digest algorithm missing for ESSIV mode"; |
@@ -388,15 +380,13 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, | |||
388 | cc->iv_gen_private.essiv.salt = salt; | 380 | cc->iv_gen_private.essiv.salt = salt; |
389 | cc->iv_gen_private.essiv.hash_tfm = hash_tfm; | 381 | cc->iv_gen_private.essiv.hash_tfm = hash_tfm; |
390 | 382 | ||
391 | for_each_possible_cpu(cpu) { | 383 | essiv_tfm = setup_essiv_cpu(cc, ti, salt, |
392 | essiv_tfm = setup_essiv_cpu(cc, ti, salt, | 384 | crypto_hash_digestsize(hash_tfm)); |
393 | crypto_hash_digestsize(hash_tfm)); | 385 | if (IS_ERR(essiv_tfm)) { |
394 | if (IS_ERR(essiv_tfm)) { | 386 | crypt_iv_essiv_dtr(cc); |
395 | crypt_iv_essiv_dtr(cc); | 387 | return PTR_ERR(essiv_tfm); |
396 | return PTR_ERR(essiv_tfm); | ||
397 | } | ||
398 | per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm; | ||
399 | } | 388 | } |
389 | cc->iv_private = essiv_tfm; | ||
400 | 390 | ||
401 | return 0; | 391 | return 0; |
402 | 392 | ||
@@ -410,7 +400,7 @@ bad: | |||
410 | static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, | 400 | static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, |
411 | struct dm_crypt_request *dmreq) | 401 | struct dm_crypt_request *dmreq) |
412 | { | 402 | { |
413 | struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; | 403 | struct crypto_cipher *essiv_tfm = cc->iv_private; |
414 | 404 | ||
415 | memset(iv, 0, cc->iv_size); | 405 | memset(iv, 0, cc->iv_size); |
416 | *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); | 406 | *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); |
@@ -664,7 +654,7 @@ static void crypt_convert_init(struct crypt_config *cc, | |||
664 | ctx->offset_out = 0; | 654 | ctx->offset_out = 0; |
665 | ctx->idx_in = bio_in ? bio_in->bi_idx : 0; | 655 | ctx->idx_in = bio_in ? bio_in->bi_idx : 0; |
666 | ctx->idx_out = bio_out ? bio_out->bi_idx : 0; | 656 | ctx->idx_out = bio_out ? bio_out->bi_idx : 0; |
667 | ctx->sector = sector + cc->iv_offset; | 657 | ctx->cc_sector = sector + cc->iv_offset; |
668 | init_completion(&ctx->restart); | 658 | init_completion(&ctx->restart); |
669 | } | 659 | } |
670 | 660 | ||
@@ -695,12 +685,12 @@ static int crypt_convert_block(struct crypt_config *cc, | |||
695 | struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); | 685 | struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); |
696 | struct dm_crypt_request *dmreq; | 686 | struct dm_crypt_request *dmreq; |
697 | u8 *iv; | 687 | u8 *iv; |
698 | int r = 0; | 688 | int r; |
699 | 689 | ||
700 | dmreq = dmreq_of_req(cc, req); | 690 | dmreq = dmreq_of_req(cc, req); |
701 | iv = iv_of_dmreq(cc, dmreq); | 691 | iv = iv_of_dmreq(cc, dmreq); |
702 | 692 | ||
703 | dmreq->iv_sector = ctx->sector; | 693 | dmreq->iv_sector = ctx->cc_sector; |
704 | dmreq->ctx = ctx; | 694 | dmreq->ctx = ctx; |
705 | sg_init_table(&dmreq->sg_in, 1); | 695 | sg_init_table(&dmreq->sg_in, 1); |
706 | sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, | 696 | sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, |
@@ -749,12 +739,12 @@ static void crypt_alloc_req(struct crypt_config *cc, | |||
749 | struct convert_context *ctx) | 739 | struct convert_context *ctx) |
750 | { | 740 | { |
751 | struct crypt_cpu *this_cc = this_crypt_config(cc); | 741 | struct crypt_cpu *this_cc = this_crypt_config(cc); |
752 | unsigned key_index = ctx->sector & (cc->tfms_count - 1); | 742 | unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); |
753 | 743 | ||
754 | if (!this_cc->req) | 744 | if (!this_cc->req) |
755 | this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); | 745 | this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); |
756 | 746 | ||
757 | ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]); | 747 | ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); |
758 | ablkcipher_request_set_callback(this_cc->req, | 748 | ablkcipher_request_set_callback(this_cc->req, |
759 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | 749 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, |
760 | kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); | 750 | kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); |
@@ -769,14 +759,14 @@ static int crypt_convert(struct crypt_config *cc, | |||
769 | struct crypt_cpu *this_cc = this_crypt_config(cc); | 759 | struct crypt_cpu *this_cc = this_crypt_config(cc); |
770 | int r; | 760 | int r; |
771 | 761 | ||
772 | atomic_set(&ctx->pending, 1); | 762 | atomic_set(&ctx->cc_pending, 1); |
773 | 763 | ||
774 | while(ctx->idx_in < ctx->bio_in->bi_vcnt && | 764 | while(ctx->idx_in < ctx->bio_in->bi_vcnt && |
775 | ctx->idx_out < ctx->bio_out->bi_vcnt) { | 765 | ctx->idx_out < ctx->bio_out->bi_vcnt) { |
776 | 766 | ||
777 | crypt_alloc_req(cc, ctx); | 767 | crypt_alloc_req(cc, ctx); |
778 | 768 | ||
779 | atomic_inc(&ctx->pending); | 769 | atomic_inc(&ctx->cc_pending); |
780 | 770 | ||
781 | r = crypt_convert_block(cc, ctx, this_cc->req); | 771 | r = crypt_convert_block(cc, ctx, this_cc->req); |
782 | 772 | ||
@@ -788,19 +778,19 @@ static int crypt_convert(struct crypt_config *cc, | |||
788 | /* fall through*/ | 778 | /* fall through*/ |
789 | case -EINPROGRESS: | 779 | case -EINPROGRESS: |
790 | this_cc->req = NULL; | 780 | this_cc->req = NULL; |
791 | ctx->sector++; | 781 | ctx->cc_sector++; |
792 | continue; | 782 | continue; |
793 | 783 | ||
794 | /* sync */ | 784 | /* sync */ |
795 | case 0: | 785 | case 0: |
796 | atomic_dec(&ctx->pending); | 786 | atomic_dec(&ctx->cc_pending); |
797 | ctx->sector++; | 787 | ctx->cc_sector++; |
798 | cond_resched(); | 788 | cond_resched(); |
799 | continue; | 789 | continue; |
800 | 790 | ||
801 | /* error */ | 791 | /* error */ |
802 | default: | 792 | default: |
803 | atomic_dec(&ctx->pending); | 793 | atomic_dec(&ctx->cc_pending); |
804 | return r; | 794 | return r; |
805 | } | 795 | } |
806 | } | 796 | } |
@@ -811,7 +801,7 @@ static int crypt_convert(struct crypt_config *cc, | |||
811 | static void dm_crypt_bio_destructor(struct bio *bio) | 801 | static void dm_crypt_bio_destructor(struct bio *bio) |
812 | { | 802 | { |
813 | struct dm_crypt_io *io = bio->bi_private; | 803 | struct dm_crypt_io *io = bio->bi_private; |
814 | struct crypt_config *cc = io->target->private; | 804 | struct crypt_config *cc = io->cc; |
815 | 805 | ||
816 | bio_free(bio, cc->bs); | 806 | bio_free(bio, cc->bs); |
817 | } | 807 | } |
@@ -825,7 +815,7 @@ static void dm_crypt_bio_destructor(struct bio *bio) | |||
825 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, | 815 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, |
826 | unsigned *out_of_pages) | 816 | unsigned *out_of_pages) |
827 | { | 817 | { |
828 | struct crypt_config *cc = io->target->private; | 818 | struct crypt_config *cc = io->cc; |
829 | struct bio *clone; | 819 | struct bio *clone; |
830 | unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | 820 | unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
831 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; | 821 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; |
@@ -884,26 +874,25 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) | |||
884 | } | 874 | } |
885 | } | 875 | } |
886 | 876 | ||
887 | static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, | 877 | static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, |
888 | struct bio *bio, sector_t sector) | 878 | struct bio *bio, sector_t sector) |
889 | { | 879 | { |
890 | struct crypt_config *cc = ti->private; | ||
891 | struct dm_crypt_io *io; | 880 | struct dm_crypt_io *io; |
892 | 881 | ||
893 | io = mempool_alloc(cc->io_pool, GFP_NOIO); | 882 | io = mempool_alloc(cc->io_pool, GFP_NOIO); |
894 | io->target = ti; | 883 | io->cc = cc; |
895 | io->base_bio = bio; | 884 | io->base_bio = bio; |
896 | io->sector = sector; | 885 | io->sector = sector; |
897 | io->error = 0; | 886 | io->error = 0; |
898 | io->base_io = NULL; | 887 | io->base_io = NULL; |
899 | atomic_set(&io->pending, 0); | 888 | atomic_set(&io->io_pending, 0); |
900 | 889 | ||
901 | return io; | 890 | return io; |
902 | } | 891 | } |
903 | 892 | ||
904 | static void crypt_inc_pending(struct dm_crypt_io *io) | 893 | static void crypt_inc_pending(struct dm_crypt_io *io) |
905 | { | 894 | { |
906 | atomic_inc(&io->pending); | 895 | atomic_inc(&io->io_pending); |
907 | } | 896 | } |
908 | 897 | ||
909 | /* | 898 | /* |
@@ -913,12 +902,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) | |||
913 | */ | 902 | */ |
914 | static void crypt_dec_pending(struct dm_crypt_io *io) | 903 | static void crypt_dec_pending(struct dm_crypt_io *io) |
915 | { | 904 | { |
916 | struct crypt_config *cc = io->target->private; | 905 | struct crypt_config *cc = io->cc; |
917 | struct bio *base_bio = io->base_bio; | 906 | struct bio *base_bio = io->base_bio; |
918 | struct dm_crypt_io *base_io = io->base_io; | 907 | struct dm_crypt_io *base_io = io->base_io; |
919 | int error = io->error; | 908 | int error = io->error; |
920 | 909 | ||
921 | if (!atomic_dec_and_test(&io->pending)) | 910 | if (!atomic_dec_and_test(&io->io_pending)) |
922 | return; | 911 | return; |
923 | 912 | ||
924 | mempool_free(io, cc->io_pool); | 913 | mempool_free(io, cc->io_pool); |
@@ -952,7 +941,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
952 | static void crypt_endio(struct bio *clone, int error) | 941 | static void crypt_endio(struct bio *clone, int error) |
953 | { | 942 | { |
954 | struct dm_crypt_io *io = clone->bi_private; | 943 | struct dm_crypt_io *io = clone->bi_private; |
955 | struct crypt_config *cc = io->target->private; | 944 | struct crypt_config *cc = io->cc; |
956 | unsigned rw = bio_data_dir(clone); | 945 | unsigned rw = bio_data_dir(clone); |
957 | 946 | ||
958 | if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) | 947 | if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) |
@@ -979,7 +968,7 @@ static void crypt_endio(struct bio *clone, int error) | |||
979 | 968 | ||
980 | static void clone_init(struct dm_crypt_io *io, struct bio *clone) | 969 | static void clone_init(struct dm_crypt_io *io, struct bio *clone) |
981 | { | 970 | { |
982 | struct crypt_config *cc = io->target->private; | 971 | struct crypt_config *cc = io->cc; |
983 | 972 | ||
984 | clone->bi_private = io; | 973 | clone->bi_private = io; |
985 | clone->bi_end_io = crypt_endio; | 974 | clone->bi_end_io = crypt_endio; |
@@ -990,7 +979,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) | |||
990 | 979 | ||
991 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | 980 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) |
992 | { | 981 | { |
993 | struct crypt_config *cc = io->target->private; | 982 | struct crypt_config *cc = io->cc; |
994 | struct bio *base_bio = io->base_bio; | 983 | struct bio *base_bio = io->base_bio; |
995 | struct bio *clone; | 984 | struct bio *clone; |
996 | 985 | ||
@@ -1038,7 +1027,7 @@ static void kcryptd_io(struct work_struct *work) | |||
1038 | 1027 | ||
1039 | static void kcryptd_queue_io(struct dm_crypt_io *io) | 1028 | static void kcryptd_queue_io(struct dm_crypt_io *io) |
1040 | { | 1029 | { |
1041 | struct crypt_config *cc = io->target->private; | 1030 | struct crypt_config *cc = io->cc; |
1042 | 1031 | ||
1043 | INIT_WORK(&io->work, kcryptd_io); | 1032 | INIT_WORK(&io->work, kcryptd_io); |
1044 | queue_work(cc->io_queue, &io->work); | 1033 | queue_work(cc->io_queue, &io->work); |
@@ -1047,7 +1036,7 @@ static void kcryptd_queue_io(struct dm_crypt_io *io) | |||
1047 | static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) | 1036 | static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) |
1048 | { | 1037 | { |
1049 | struct bio *clone = io->ctx.bio_out; | 1038 | struct bio *clone = io->ctx.bio_out; |
1050 | struct crypt_config *cc = io->target->private; | 1039 | struct crypt_config *cc = io->cc; |
1051 | 1040 | ||
1052 | if (unlikely(io->error < 0)) { | 1041 | if (unlikely(io->error < 0)) { |
1053 | crypt_free_buffer_pages(cc, clone); | 1042 | crypt_free_buffer_pages(cc, clone); |
@@ -1069,7 +1058,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) | |||
1069 | 1058 | ||
1070 | static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | 1059 | static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) |
1071 | { | 1060 | { |
1072 | struct crypt_config *cc = io->target->private; | 1061 | struct crypt_config *cc = io->cc; |
1073 | struct bio *clone; | 1062 | struct bio *clone; |
1074 | struct dm_crypt_io *new_io; | 1063 | struct dm_crypt_io *new_io; |
1075 | int crypt_finished; | 1064 | int crypt_finished; |
@@ -1107,7 +1096,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
1107 | if (r < 0) | 1096 | if (r < 0) |
1108 | io->error = -EIO; | 1097 | io->error = -EIO; |
1109 | 1098 | ||
1110 | crypt_finished = atomic_dec_and_test(&io->ctx.pending); | 1099 | crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); |
1111 | 1100 | ||
1112 | /* Encryption was already finished, submit io now */ | 1101 | /* Encryption was already finished, submit io now */ |
1113 | if (crypt_finished) { | 1102 | if (crypt_finished) { |
@@ -1135,7 +1124,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
1135 | * between fragments, so switch to a new dm_crypt_io structure. | 1124 | * between fragments, so switch to a new dm_crypt_io structure. |
1136 | */ | 1125 | */ |
1137 | if (unlikely(!crypt_finished && remaining)) { | 1126 | if (unlikely(!crypt_finished && remaining)) { |
1138 | new_io = crypt_io_alloc(io->target, io->base_bio, | 1127 | new_io = crypt_io_alloc(io->cc, io->base_bio, |
1139 | sector); | 1128 | sector); |
1140 | crypt_inc_pending(new_io); | 1129 | crypt_inc_pending(new_io); |
1141 | crypt_convert_init(cc, &new_io->ctx, NULL, | 1130 | crypt_convert_init(cc, &new_io->ctx, NULL, |
@@ -1169,7 +1158,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) | |||
1169 | 1158 | ||
1170 | static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) | 1159 | static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) |
1171 | { | 1160 | { |
1172 | struct crypt_config *cc = io->target->private; | 1161 | struct crypt_config *cc = io->cc; |
1173 | int r = 0; | 1162 | int r = 0; |
1174 | 1163 | ||
1175 | crypt_inc_pending(io); | 1164 | crypt_inc_pending(io); |
@@ -1181,7 +1170,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) | |||
1181 | if (r < 0) | 1170 | if (r < 0) |
1182 | io->error = -EIO; | 1171 | io->error = -EIO; |
1183 | 1172 | ||
1184 | if (atomic_dec_and_test(&io->ctx.pending)) | 1173 | if (atomic_dec_and_test(&io->ctx.cc_pending)) |
1185 | kcryptd_crypt_read_done(io); | 1174 | kcryptd_crypt_read_done(io); |
1186 | 1175 | ||
1187 | crypt_dec_pending(io); | 1176 | crypt_dec_pending(io); |
@@ -1193,7 +1182,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
1193 | struct dm_crypt_request *dmreq = async_req->data; | 1182 | struct dm_crypt_request *dmreq = async_req->data; |
1194 | struct convert_context *ctx = dmreq->ctx; | 1183 | struct convert_context *ctx = dmreq->ctx; |
1195 | struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); | 1184 | struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); |
1196 | struct crypt_config *cc = io->target->private; | 1185 | struct crypt_config *cc = io->cc; |
1197 | 1186 | ||
1198 | if (error == -EINPROGRESS) { | 1187 | if (error == -EINPROGRESS) { |
1199 | complete(&ctx->restart); | 1188 | complete(&ctx->restart); |
@@ -1208,7 +1197,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
1208 | 1197 | ||
1209 | mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); | 1198 | mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); |
1210 | 1199 | ||
1211 | if (!atomic_dec_and_test(&ctx->pending)) | 1200 | if (!atomic_dec_and_test(&ctx->cc_pending)) |
1212 | return; | 1201 | return; |
1213 | 1202 | ||
1214 | if (bio_data_dir(io->base_bio) == READ) | 1203 | if (bio_data_dir(io->base_bio) == READ) |
@@ -1229,7 +1218,7 @@ static void kcryptd_crypt(struct work_struct *work) | |||
1229 | 1218 | ||
1230 | static void kcryptd_queue_crypt(struct dm_crypt_io *io) | 1219 | static void kcryptd_queue_crypt(struct dm_crypt_io *io) |
1231 | { | 1220 | { |
1232 | struct crypt_config *cc = io->target->private; | 1221 | struct crypt_config *cc = io->cc; |
1233 | 1222 | ||
1234 | INIT_WORK(&io->work, kcryptd_crypt); | 1223 | INIT_WORK(&io->work, kcryptd_crypt); |
1235 | queue_work(cc->crypt_queue, &io->work); | 1224 | queue_work(cc->crypt_queue, &io->work); |
@@ -1241,7 +1230,6 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) | |||
1241 | static int crypt_decode_key(u8 *key, char *hex, unsigned int size) | 1230 | static int crypt_decode_key(u8 *key, char *hex, unsigned int size) |
1242 | { | 1231 | { |
1243 | char buffer[3]; | 1232 | char buffer[3]; |
1244 | char *endp; | ||
1245 | unsigned int i; | 1233 | unsigned int i; |
1246 | 1234 | ||
1247 | buffer[2] = '\0'; | 1235 | buffer[2] = '\0'; |
@@ -1250,9 +1238,7 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) | |||
1250 | buffer[0] = *hex++; | 1238 | buffer[0] = *hex++; |
1251 | buffer[1] = *hex++; | 1239 | buffer[1] = *hex++; |
1252 | 1240 | ||
1253 | key[i] = (u8)simple_strtoul(buffer, &endp, 16); | 1241 | if (kstrtou8(buffer, 16, &key[i])) |
1254 | |||
1255 | if (endp != &buffer[2]) | ||
1256 | return -EINVAL; | 1242 | return -EINVAL; |
1257 | } | 1243 | } |
1258 | 1244 | ||
@@ -1276,29 +1262,38 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) | |||
1276 | } | 1262 | } |
1277 | } | 1263 | } |
1278 | 1264 | ||
1279 | static void crypt_free_tfms(struct crypt_config *cc, int cpu) | 1265 | static void crypt_free_tfms(struct crypt_config *cc) |
1280 | { | 1266 | { |
1281 | struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
1282 | unsigned i; | 1267 | unsigned i; |
1283 | 1268 | ||
1269 | if (!cc->tfms) | ||
1270 | return; | ||
1271 | |||
1284 | for (i = 0; i < cc->tfms_count; i++) | 1272 | for (i = 0; i < cc->tfms_count; i++) |
1285 | if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) { | 1273 | if (cc->tfms[i] && !IS_ERR(cc->tfms[i])) { |
1286 | crypto_free_ablkcipher(cpu_cc->tfms[i]); | 1274 | crypto_free_ablkcipher(cc->tfms[i]); |
1287 | cpu_cc->tfms[i] = NULL; | 1275 | cc->tfms[i] = NULL; |
1288 | } | 1276 | } |
1277 | |||
1278 | kfree(cc->tfms); | ||
1279 | cc->tfms = NULL; | ||
1289 | } | 1280 | } |
1290 | 1281 | ||
1291 | static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) | 1282 | static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) |
1292 | { | 1283 | { |
1293 | struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
1294 | unsigned i; | 1284 | unsigned i; |
1295 | int err; | 1285 | int err; |
1296 | 1286 | ||
1287 | cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_ablkcipher *), | ||
1288 | GFP_KERNEL); | ||
1289 | if (!cc->tfms) | ||
1290 | return -ENOMEM; | ||
1291 | |||
1297 | for (i = 0; i < cc->tfms_count; i++) { | 1292 | for (i = 0; i < cc->tfms_count; i++) { |
1298 | cpu_cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); | 1293 | cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); |
1299 | if (IS_ERR(cpu_cc->tfms[i])) { | 1294 | if (IS_ERR(cc->tfms[i])) { |
1300 | err = PTR_ERR(cpu_cc->tfms[i]); | 1295 | err = PTR_ERR(cc->tfms[i]); |
1301 | crypt_free_tfms(cc, cpu); | 1296 | crypt_free_tfms(cc); |
1302 | return err; | 1297 | return err; |
1303 | } | 1298 | } |
1304 | } | 1299 | } |
@@ -1309,15 +1304,14 @@ static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) | |||
1309 | static int crypt_setkey_allcpus(struct crypt_config *cc) | 1304 | static int crypt_setkey_allcpus(struct crypt_config *cc) |
1310 | { | 1305 | { |
1311 | unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); | 1306 | unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); |
1312 | int cpu, err = 0, i, r; | 1307 | int err = 0, i, r; |
1313 | 1308 | ||
1314 | for_each_possible_cpu(cpu) { | 1309 | for (i = 0; i < cc->tfms_count; i++) { |
1315 | for (i = 0; i < cc->tfms_count; i++) { | 1310 | r = crypto_ablkcipher_setkey(cc->tfms[i], |
1316 | r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfms[i], | 1311 | cc->key + (i * subkey_size), |
1317 | cc->key + (i * subkey_size), subkey_size); | 1312 | subkey_size); |
1318 | if (r) | 1313 | if (r) |
1319 | err = r; | 1314 | err = r; |
1320 | } | ||
1321 | } | 1315 | } |
1322 | 1316 | ||
1323 | return err; | 1317 | return err; |
@@ -1379,9 +1373,10 @@ static void crypt_dtr(struct dm_target *ti) | |||
1379 | cpu_cc = per_cpu_ptr(cc->cpu, cpu); | 1373 | cpu_cc = per_cpu_ptr(cc->cpu, cpu); |
1380 | if (cpu_cc->req) | 1374 | if (cpu_cc->req) |
1381 | mempool_free(cpu_cc->req, cc->req_pool); | 1375 | mempool_free(cpu_cc->req, cc->req_pool); |
1382 | crypt_free_tfms(cc, cpu); | ||
1383 | } | 1376 | } |
1384 | 1377 | ||
1378 | crypt_free_tfms(cc); | ||
1379 | |||
1385 | if (cc->bs) | 1380 | if (cc->bs) |
1386 | bioset_free(cc->bs); | 1381 | bioset_free(cc->bs); |
1387 | 1382 | ||
@@ -1414,7 +1409,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1414 | struct crypt_config *cc = ti->private; | 1409 | struct crypt_config *cc = ti->private; |
1415 | char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; | 1410 | char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; |
1416 | char *cipher_api = NULL; | 1411 | char *cipher_api = NULL; |
1417 | int cpu, ret = -EINVAL; | 1412 | int ret = -EINVAL; |
1418 | char dummy; | 1413 | char dummy; |
1419 | 1414 | ||
1420 | /* Convert to crypto api definition? */ | 1415 | /* Convert to crypto api definition? */ |
@@ -1455,8 +1450,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1455 | if (tmp) | 1450 | if (tmp) |
1456 | DMWARN("Ignoring unexpected additional cipher options"); | 1451 | DMWARN("Ignoring unexpected additional cipher options"); |
1457 | 1452 | ||
1458 | cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)) + | 1453 | cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), |
1459 | cc->tfms_count * sizeof(*(cc->cpu->tfms)), | ||
1460 | __alignof__(struct crypt_cpu)); | 1454 | __alignof__(struct crypt_cpu)); |
1461 | if (!cc->cpu) { | 1455 | if (!cc->cpu) { |
1462 | ti->error = "Cannot allocate per cpu state"; | 1456 | ti->error = "Cannot allocate per cpu state"; |
@@ -1489,12 +1483,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1489 | } | 1483 | } |
1490 | 1484 | ||
1491 | /* Allocate cipher */ | 1485 | /* Allocate cipher */ |
1492 | for_each_possible_cpu(cpu) { | 1486 | ret = crypt_alloc_tfms(cc, cipher_api); |
1493 | ret = crypt_alloc_tfms(cc, cpu, cipher_api); | 1487 | if (ret < 0) { |
1494 | if (ret < 0) { | 1488 | ti->error = "Error allocating crypto tfm"; |
1495 | ti->error = "Error allocating crypto tfm"; | 1489 | goto bad; |
1496 | goto bad; | ||
1497 | } | ||
1498 | } | 1490 | } |
1499 | 1491 | ||
1500 | /* Initialize and set key */ | 1492 | /* Initialize and set key */ |
@@ -1702,7 +1694,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1702 | } | 1694 | } |
1703 | 1695 | ||
1704 | ti->num_flush_requests = 1; | 1696 | ti->num_flush_requests = 1; |
1705 | ti->discard_zeroes_data_unsupported = 1; | 1697 | ti->discard_zeroes_data_unsupported = true; |
1706 | 1698 | ||
1707 | return 0; | 1699 | return 0; |
1708 | 1700 | ||
@@ -1715,7 +1707,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1715 | union map_info *map_context) | 1707 | union map_info *map_context) |
1716 | { | 1708 | { |
1717 | struct dm_crypt_io *io; | 1709 | struct dm_crypt_io *io; |
1718 | struct crypt_config *cc; | 1710 | struct crypt_config *cc = ti->private; |
1719 | 1711 | ||
1720 | /* | 1712 | /* |
1721 | * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. | 1713 | * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. |
@@ -1723,14 +1715,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1723 | * - for REQ_DISCARD caller must use flush if IO ordering matters | 1715 | * - for REQ_DISCARD caller must use flush if IO ordering matters |
1724 | */ | 1716 | */ |
1725 | if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { | 1717 | if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { |
1726 | cc = ti->private; | ||
1727 | bio->bi_bdev = cc->dev->bdev; | 1718 | bio->bi_bdev = cc->dev->bdev; |
1728 | if (bio_sectors(bio)) | 1719 | if (bio_sectors(bio)) |
1729 | bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); | 1720 | bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); |
1730 | return DM_MAPIO_REMAPPED; | 1721 | return DM_MAPIO_REMAPPED; |
1731 | } | 1722 | } |
1732 | 1723 | ||
1733 | io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector)); | 1724 | io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector)); |
1734 | 1725 | ||
1735 | if (bio_data_dir(io->base_bio) == READ) { | 1726 | if (bio_data_dir(io->base_bio) == READ) { |
1736 | if (kcryptd_io_read(io, GFP_NOWAIT)) | 1727 | if (kcryptd_io_read(io, GFP_NOWAIT)) |
@@ -1742,7 +1733,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1742 | } | 1733 | } |
1743 | 1734 | ||
1744 | static int crypt_status(struct dm_target *ti, status_type_t type, | 1735 | static int crypt_status(struct dm_target *ti, status_type_t type, |
1745 | char *result, unsigned int maxlen) | 1736 | unsigned status_flags, char *result, unsigned maxlen) |
1746 | { | 1737 | { |
1747 | struct crypt_config *cc = ti->private; | 1738 | struct crypt_config *cc = ti->private; |
1748 | unsigned int sz = 0; | 1739 | unsigned int sz = 0; |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2dc22dddb2ae..f53846f9ab50 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio, | |||
295 | } | 295 | } |
296 | 296 | ||
297 | static int delay_status(struct dm_target *ti, status_type_t type, | 297 | static int delay_status(struct dm_target *ti, status_type_t type, |
298 | char *result, unsigned maxlen) | 298 | unsigned status_flags, char *result, unsigned maxlen) |
299 | { | 299 | { |
300 | struct delay_c *dc = ti->private; | 300 | struct delay_c *dc = ti->private; |
301 | int sz = 0; | 301 | int sz = 0; |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index aa70f7d43a1a..ebaa4f803eec 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -142,24 +142,19 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister); | |||
142 | static int set_chunk_size(struct dm_exception_store *store, | 142 | static int set_chunk_size(struct dm_exception_store *store, |
143 | const char *chunk_size_arg, char **error) | 143 | const char *chunk_size_arg, char **error) |
144 | { | 144 | { |
145 | unsigned long chunk_size_ulong; | 145 | unsigned chunk_size; |
146 | char *value; | ||
147 | 146 | ||
148 | chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); | 147 | if (kstrtouint(chunk_size_arg, 10, &chunk_size)) { |
149 | if (*chunk_size_arg == '\0' || *value != '\0' || | ||
150 | chunk_size_ulong > UINT_MAX) { | ||
151 | *error = "Invalid chunk size"; | 148 | *error = "Invalid chunk size"; |
152 | return -EINVAL; | 149 | return -EINVAL; |
153 | } | 150 | } |
154 | 151 | ||
155 | if (!chunk_size_ulong) { | 152 | if (!chunk_size) { |
156 | store->chunk_size = store->chunk_mask = store->chunk_shift = 0; | 153 | store->chunk_size = store->chunk_mask = store->chunk_shift = 0; |
157 | return 0; | 154 | return 0; |
158 | } | 155 | } |
159 | 156 | ||
160 | return dm_exception_store_set_chunk_size(store, | 157 | return dm_exception_store_set_chunk_size(store, chunk_size, error); |
161 | (unsigned) chunk_size_ulong, | ||
162 | error); | ||
163 | } | 158 | } |
164 | 159 | ||
165 | int dm_exception_store_set_chunk_size(struct dm_exception_store *store, | 160 | int dm_exception_store_set_chunk_size(struct dm_exception_store *store, |
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ac49c01f1a44..cc15543a6ad7 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c | |||
@@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, | |||
333 | } | 333 | } |
334 | 334 | ||
335 | static int flakey_status(struct dm_target *ti, status_type_t type, | 335 | static int flakey_status(struct dm_target *ti, status_type_t type, |
336 | char *result, unsigned int maxlen) | 336 | unsigned status_flags, char *result, unsigned maxlen) |
337 | { | 337 | { |
338 | unsigned sz = 0; | 338 | unsigned sz = 0; |
339 | struct flakey_c *fc = ti->private; | 339 | struct flakey_c *fc = ti->private; |
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a1a3e6df17b8..afd95986d099 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table, | |||
1054 | char *outbuf, *outptr; | 1054 | char *outbuf, *outptr; |
1055 | status_type_t type; | 1055 | status_type_t type; |
1056 | size_t remaining, len, used = 0; | 1056 | size_t remaining, len, used = 0; |
1057 | unsigned status_flags = 0; | ||
1057 | 1058 | ||
1058 | outptr = outbuf = get_result_buffer(param, param_size, &len); | 1059 | outptr = outbuf = get_result_buffer(param, param_size, &len); |
1059 | 1060 | ||
@@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table, | |||
1090 | 1091 | ||
1091 | /* Get the status/table string from the target driver */ | 1092 | /* Get the status/table string from the target driver */ |
1092 | if (ti->type->status) { | 1093 | if (ti->type->status) { |
1093 | if (ti->type->status(ti, type, outptr, remaining)) { | 1094 | if (param->flags & DM_NOFLUSH_FLAG) |
1095 | status_flags |= DM_STATUS_NOFLUSH_FLAG; | ||
1096 | if (ti->type->status(ti, type, status_flags, outptr, remaining)) { | ||
1094 | param->flags |= DM_BUFFER_FULL_FLAG; | 1097 | param->flags |= DM_BUFFER_FULL_FLAG; |
1095 | break; | 1098 | break; |
1096 | } | 1099 | } |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 3639eeab6042..1bf19a93eef0 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio, | |||
96 | } | 96 | } |
97 | 97 | ||
98 | static int linear_status(struct dm_target *ti, status_type_t type, | 98 | static int linear_status(struct dm_target *ti, status_type_t type, |
99 | char *result, unsigned int maxlen) | 99 | unsigned status_flags, char *result, unsigned maxlen) |
100 | { | 100 | { |
101 | struct linear_c *lc = (struct linear_c *) ti->private; | 101 | struct linear_c *lc = (struct linear_c *) ti->private; |
102 | 102 | ||
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 65ebaebf502b..627d19186d5a 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -571,16 +571,6 @@ static void disk_dtr(struct dm_dirty_log *log) | |||
571 | destroy_log_context(lc); | 571 | destroy_log_context(lc); |
572 | } | 572 | } |
573 | 573 | ||
574 | static int count_bits32(uint32_t *addr, unsigned size) | ||
575 | { | ||
576 | int count = 0, i; | ||
577 | |||
578 | for (i = 0; i < size; i++) { | ||
579 | count += hweight32(*(addr+i)); | ||
580 | } | ||
581 | return count; | ||
582 | } | ||
583 | |||
584 | static void fail_log_device(struct log_c *lc) | 574 | static void fail_log_device(struct log_c *lc) |
585 | { | 575 | { |
586 | if (lc->log_dev_failed) | 576 | if (lc->log_dev_failed) |
@@ -629,7 +619,8 @@ static int disk_resume(struct dm_dirty_log *log) | |||
629 | 619 | ||
630 | /* copy clean across to sync */ | 620 | /* copy clean across to sync */ |
631 | memcpy(lc->sync_bits, lc->clean_bits, size); | 621 | memcpy(lc->sync_bits, lc->clean_bits, size); |
632 | lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); | 622 | lc->sync_count = memweight(lc->clean_bits, |
623 | lc->bitset_uint32_count * sizeof(uint32_t)); | ||
633 | lc->sync_search = 0; | 624 | lc->sync_search = 0; |
634 | 625 | ||
635 | /* set the correct number of regions in the header */ | 626 | /* set the correct number of regions in the header */ |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 638dae048b4f..d8abb90a6c2f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -85,6 +85,7 @@ struct multipath { | |||
85 | unsigned queue_io:1; /* Must we queue all I/O? */ | 85 | unsigned queue_io:1; /* Must we queue all I/O? */ |
86 | unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ | 86 | unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ |
87 | unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ | 87 | unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ |
88 | unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ | ||
88 | 89 | ||
89 | unsigned pg_init_retries; /* Number of times to retry pg_init */ | 90 | unsigned pg_init_retries; /* Number of times to retry pg_init */ |
90 | unsigned pg_init_count; /* Number of times pg_init called */ | 91 | unsigned pg_init_count; /* Number of times pg_init called */ |
@@ -568,6 +569,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps | |||
568 | int r; | 569 | int r; |
569 | struct pgpath *p; | 570 | struct pgpath *p; |
570 | struct multipath *m = ti->private; | 571 | struct multipath *m = ti->private; |
572 | struct request_queue *q = NULL; | ||
573 | const char *attached_handler_name; | ||
571 | 574 | ||
572 | /* we need at least a path arg */ | 575 | /* we need at least a path arg */ |
573 | if (as->argc < 1) { | 576 | if (as->argc < 1) { |
@@ -586,13 +589,37 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps | |||
586 | goto bad; | 589 | goto bad; |
587 | } | 590 | } |
588 | 591 | ||
589 | if (m->hw_handler_name) { | 592 | if (m->retain_attached_hw_handler || m->hw_handler_name) |
590 | struct request_queue *q = bdev_get_queue(p->path.dev->bdev); | 593 | q = bdev_get_queue(p->path.dev->bdev); |
594 | |||
595 | if (m->retain_attached_hw_handler) { | ||
596 | attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); | ||
597 | if (attached_handler_name) { | ||
598 | /* | ||
599 | * Reset hw_handler_name to match the attached handler | ||
600 | * and clear any hw_handler_params associated with the | ||
601 | * ignored handler. | ||
602 | * | ||
603 | * NB. This modifies the table line to show the actual | ||
604 | * handler instead of the original table passed in. | ||
605 | */ | ||
606 | kfree(m->hw_handler_name); | ||
607 | m->hw_handler_name = attached_handler_name; | ||
608 | |||
609 | kfree(m->hw_handler_params); | ||
610 | m->hw_handler_params = NULL; | ||
611 | } | ||
612 | } | ||
591 | 613 | ||
614 | if (m->hw_handler_name) { | ||
615 | /* | ||
616 | * Increments scsi_dh reference, even when using an | ||
617 | * already-attached handler. | ||
618 | */ | ||
592 | r = scsi_dh_attach(q, m->hw_handler_name); | 619 | r = scsi_dh_attach(q, m->hw_handler_name); |
593 | if (r == -EBUSY) { | 620 | if (r == -EBUSY) { |
594 | /* | 621 | /* |
595 | * Already attached to different hw_handler, | 622 | * Already attached to different hw_handler: |
596 | * try to reattach with correct one. | 623 | * try to reattach with correct one. |
597 | */ | 624 | */ |
598 | scsi_dh_detach(q); | 625 | scsi_dh_detach(q); |
@@ -760,7 +787,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) | |||
760 | const char *arg_name; | 787 | const char *arg_name; |
761 | 788 | ||
762 | static struct dm_arg _args[] = { | 789 | static struct dm_arg _args[] = { |
763 | {0, 5, "invalid number of feature args"}, | 790 | {0, 6, "invalid number of feature args"}, |
764 | {1, 50, "pg_init_retries must be between 1 and 50"}, | 791 | {1, 50, "pg_init_retries must be between 1 and 50"}, |
765 | {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, | 792 | {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, |
766 | }; | 793 | }; |
@@ -781,6 +808,11 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) | |||
781 | continue; | 808 | continue; |
782 | } | 809 | } |
783 | 810 | ||
811 | if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { | ||
812 | m->retain_attached_hw_handler = 1; | ||
813 | continue; | ||
814 | } | ||
815 | |||
784 | if (!strcasecmp(arg_name, "pg_init_retries") && | 816 | if (!strcasecmp(arg_name, "pg_init_retries") && |
785 | (argc >= 1)) { | 817 | (argc >= 1)) { |
786 | r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); | 818 | r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); |
@@ -1346,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti) | |||
1346 | * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ | 1378 | * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ |
1347 | */ | 1379 | */ |
1348 | static int multipath_status(struct dm_target *ti, status_type_t type, | 1380 | static int multipath_status(struct dm_target *ti, status_type_t type, |
1349 | char *result, unsigned int maxlen) | 1381 | unsigned status_flags, char *result, unsigned maxlen) |
1350 | { | 1382 | { |
1351 | int sz = 0; | 1383 | int sz = 0; |
1352 | unsigned long flags; | 1384 | unsigned long flags; |
@@ -1364,13 +1396,16 @@ static int multipath_status(struct dm_target *ti, status_type_t type, | |||
1364 | else { | 1396 | else { |
1365 | DMEMIT("%u ", m->queue_if_no_path + | 1397 | DMEMIT("%u ", m->queue_if_no_path + |
1366 | (m->pg_init_retries > 0) * 2 + | 1398 | (m->pg_init_retries > 0) * 2 + |
1367 | (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2); | 1399 | (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + |
1400 | m->retain_attached_hw_handler); | ||
1368 | if (m->queue_if_no_path) | 1401 | if (m->queue_if_no_path) |
1369 | DMEMIT("queue_if_no_path "); | 1402 | DMEMIT("queue_if_no_path "); |
1370 | if (m->pg_init_retries) | 1403 | if (m->pg_init_retries) |
1371 | DMEMIT("pg_init_retries %u ", m->pg_init_retries); | 1404 | DMEMIT("pg_init_retries %u ", m->pg_init_retries); |
1372 | if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) | 1405 | if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) |
1373 | DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); | 1406 | DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); |
1407 | if (m->retain_attached_hw_handler) | ||
1408 | DMEMIT("retain_attached_hw_handler "); | ||
1374 | } | 1409 | } |
1375 | 1410 | ||
1376 | if (!m->hw_handler_name || type == STATUSTYPE_INFO) | 1411 | if (!m->hw_handler_name || type == STATUSTYPE_INFO) |
@@ -1656,7 +1691,7 @@ out: | |||
1656 | *---------------------------------------------------------------*/ | 1691 | *---------------------------------------------------------------*/ |
1657 | static struct target_type multipath_target = { | 1692 | static struct target_type multipath_target = { |
1658 | .name = "multipath", | 1693 | .name = "multipath", |
1659 | .version = {1, 4, 0}, | 1694 | .version = {1, 5, 0}, |
1660 | .module = THIS_MODULE, | 1695 | .module = THIS_MODULE, |
1661 | .ctr = multipath_ctr, | 1696 | .ctr = multipath_ctr, |
1662 | .dtr = multipath_dtr, | 1697 | .dtr = multipath_dtr, |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 017c34d78d61..982e3e390c45 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include "md.h" | 11 | #include "md.h" |
12 | #include "raid1.h" | 12 | #include "raid1.h" |
13 | #include "raid5.h" | 13 | #include "raid5.h" |
14 | #include "raid10.h" | ||
14 | #include "bitmap.h" | 15 | #include "bitmap.h" |
15 | 16 | ||
16 | #include <linux/device-mapper.h> | 17 | #include <linux/device-mapper.h> |
@@ -52,7 +53,10 @@ struct raid_dev { | |||
52 | #define DMPF_MAX_RECOVERY_RATE 0x20 | 53 | #define DMPF_MAX_RECOVERY_RATE 0x20 |
53 | #define DMPF_MAX_WRITE_BEHIND 0x40 | 54 | #define DMPF_MAX_WRITE_BEHIND 0x40 |
54 | #define DMPF_STRIPE_CACHE 0x80 | 55 | #define DMPF_STRIPE_CACHE 0x80 |
55 | #define DMPF_REGION_SIZE 0X100 | 56 | #define DMPF_REGION_SIZE 0x100 |
57 | #define DMPF_RAID10_COPIES 0x200 | ||
58 | #define DMPF_RAID10_FORMAT 0x400 | ||
59 | |||
56 | struct raid_set { | 60 | struct raid_set { |
57 | struct dm_target *ti; | 61 | struct dm_target *ti; |
58 | 62 | ||
@@ -76,6 +80,7 @@ static struct raid_type { | |||
76 | const unsigned algorithm; /* RAID algorithm. */ | 80 | const unsigned algorithm; /* RAID algorithm. */ |
77 | } raid_types[] = { | 81 | } raid_types[] = { |
78 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, | 82 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, |
83 | {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, | ||
79 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | 84 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, |
80 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 85 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, |
81 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 86 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, |
@@ -86,6 +91,17 @@ static struct raid_type { | |||
86 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
87 | }; | 92 | }; |
88 | 93 | ||
94 | static unsigned raid10_md_layout_to_copies(int layout) | ||
95 | { | ||
96 | return layout & 0xFF; | ||
97 | } | ||
98 | |||
99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | ||
100 | { | ||
101 | /* 1 "far" copy, and 'copies' "near" copies */ | ||
102 | return (1 << 8) | (copies & 0xFF); | ||
103 | } | ||
104 | |||
89 | static struct raid_type *get_raid_type(char *name) | 105 | static struct raid_type *get_raid_type(char *name) |
90 | { | 106 | { |
91 | int i; | 107 | int i; |
@@ -101,20 +117,12 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra | |||
101 | { | 117 | { |
102 | unsigned i; | 118 | unsigned i; |
103 | struct raid_set *rs; | 119 | struct raid_set *rs; |
104 | sector_t sectors_per_dev; | ||
105 | 120 | ||
106 | if (raid_devs <= raid_type->parity_devs) { | 121 | if (raid_devs <= raid_type->parity_devs) { |
107 | ti->error = "Insufficient number of devices"; | 122 | ti->error = "Insufficient number of devices"; |
108 | return ERR_PTR(-EINVAL); | 123 | return ERR_PTR(-EINVAL); |
109 | } | 124 | } |
110 | 125 | ||
111 | sectors_per_dev = ti->len; | ||
112 | if ((raid_type->level > 1) && | ||
113 | sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { | ||
114 | ti->error = "Target length not divisible by number of data devices"; | ||
115 | return ERR_PTR(-EINVAL); | ||
116 | } | ||
117 | |||
118 | rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); | 126 | rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); |
119 | if (!rs) { | 127 | if (!rs) { |
120 | ti->error = "Cannot allocate raid context"; | 128 | ti->error = "Cannot allocate raid context"; |
@@ -128,7 +136,6 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra | |||
128 | rs->md.raid_disks = raid_devs; | 136 | rs->md.raid_disks = raid_devs; |
129 | rs->md.level = raid_type->level; | 137 | rs->md.level = raid_type->level; |
130 | rs->md.new_level = rs->md.level; | 138 | rs->md.new_level = rs->md.level; |
131 | rs->md.dev_sectors = sectors_per_dev; | ||
132 | rs->md.layout = raid_type->algorithm; | 139 | rs->md.layout = raid_type->algorithm; |
133 | rs->md.new_layout = rs->md.layout; | 140 | rs->md.new_layout = rs->md.layout; |
134 | rs->md.delta_disks = 0; | 141 | rs->md.delta_disks = 0; |
@@ -143,6 +150,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra | |||
143 | * rs->md.external | 150 | * rs->md.external |
144 | * rs->md.chunk_sectors | 151 | * rs->md.chunk_sectors |
145 | * rs->md.new_chunk_sectors | 152 | * rs->md.new_chunk_sectors |
153 | * rs->md.dev_sectors | ||
146 | */ | 154 | */ |
147 | 155 | ||
148 | return rs; | 156 | return rs; |
@@ -347,12 +355,20 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
347 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 355 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
348 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 356 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
349 | * [region_size <sectors>] Defines granularity of bitmap | 357 | * [region_size <sectors>] Defines granularity of bitmap |
358 | * | ||
359 | * RAID10-only options: | ||
360 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | ||
361 | * [raid10_format <near>] Layout algorithm. (Default: near) | ||
350 | */ | 362 | */ |
351 | static int parse_raid_params(struct raid_set *rs, char **argv, | 363 | static int parse_raid_params(struct raid_set *rs, char **argv, |
352 | unsigned num_raid_params) | 364 | unsigned num_raid_params) |
353 | { | 365 | { |
366 | char *raid10_format = "near"; | ||
367 | unsigned raid10_copies = 2; | ||
354 | unsigned i, rebuild_cnt = 0; | 368 | unsigned i, rebuild_cnt = 0; |
355 | unsigned long value, region_size = 0; | 369 | unsigned long value, region_size = 0; |
370 | sector_t sectors_per_dev = rs->ti->len; | ||
371 | sector_t max_io_len; | ||
356 | char *key; | 372 | char *key; |
357 | 373 | ||
358 | /* | 374 | /* |
@@ -422,20 +438,53 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
422 | } | 438 | } |
423 | 439 | ||
424 | key = argv[i++]; | 440 | key = argv[i++]; |
441 | |||
442 | /* Parameters that take a string value are checked here. */ | ||
443 | if (!strcasecmp(key, "raid10_format")) { | ||
444 | if (rs->raid_type->level != 10) { | ||
445 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | ||
446 | return -EINVAL; | ||
447 | } | ||
448 | if (strcmp("near", argv[i])) { | ||
449 | rs->ti->error = "Invalid 'raid10_format' value given"; | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | raid10_format = argv[i]; | ||
453 | rs->print_flags |= DMPF_RAID10_FORMAT; | ||
454 | continue; | ||
455 | } | ||
456 | |||
425 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 457 | if (strict_strtoul(argv[i], 10, &value) < 0) { |
426 | rs->ti->error = "Bad numerical argument given in raid params"; | 458 | rs->ti->error = "Bad numerical argument given in raid params"; |
427 | return -EINVAL; | 459 | return -EINVAL; |
428 | } | 460 | } |
429 | 461 | ||
462 | /* Parameters that take a numeric value are checked here */ | ||
430 | if (!strcasecmp(key, "rebuild")) { | 463 | if (!strcasecmp(key, "rebuild")) { |
431 | rebuild_cnt++; | 464 | rebuild_cnt++; |
432 | if (((rs->raid_type->level != 1) && | 465 | |
433 | (rebuild_cnt > rs->raid_type->parity_devs)) || | 466 | switch (rs->raid_type->level) { |
434 | ((rs->raid_type->level == 1) && | 467 | case 1: |
435 | (rebuild_cnt > (rs->md.raid_disks - 1)))) { | 468 | if (rebuild_cnt >= rs->md.raid_disks) { |
436 | rs->ti->error = "Too many rebuild devices specified for given RAID type"; | 469 | rs->ti->error = "Too many rebuild devices specified"; |
470 | return -EINVAL; | ||
471 | } | ||
472 | break; | ||
473 | case 4: | ||
474 | case 5: | ||
475 | case 6: | ||
476 | if (rebuild_cnt > rs->raid_type->parity_devs) { | ||
477 | rs->ti->error = "Too many rebuild devices specified for given RAID type"; | ||
478 | return -EINVAL; | ||
479 | } | ||
480 | break; | ||
481 | case 10: | ||
482 | default: | ||
483 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); | ||
484 | rs->ti->error = "Rebuild not supported for this RAID type"; | ||
437 | return -EINVAL; | 485 | return -EINVAL; |
438 | } | 486 | } |
487 | |||
439 | if (value > rs->md.raid_disks) { | 488 | if (value > rs->md.raid_disks) { |
440 | rs->ti->error = "Invalid rebuild index given"; | 489 | rs->ti->error = "Invalid rebuild index given"; |
441 | return -EINVAL; | 490 | return -EINVAL; |
@@ -486,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
486 | */ | 535 | */ |
487 | value /= 2; | 536 | value /= 2; |
488 | 537 | ||
489 | if (rs->raid_type->level < 5) { | 538 | if ((rs->raid_type->level != 5) && |
539 | (rs->raid_type->level != 6)) { | ||
490 | rs->ti->error = "Inappropriate argument: stripe_cache"; | 540 | rs->ti->error = "Inappropriate argument: stripe_cache"; |
491 | return -EINVAL; | 541 | return -EINVAL; |
492 | } | 542 | } |
@@ -511,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
511 | } else if (!strcasecmp(key, "region_size")) { | 561 | } else if (!strcasecmp(key, "region_size")) { |
512 | rs->print_flags |= DMPF_REGION_SIZE; | 562 | rs->print_flags |= DMPF_REGION_SIZE; |
513 | region_size = value; | 563 | region_size = value; |
564 | } else if (!strcasecmp(key, "raid10_copies") && | ||
565 | (rs->raid_type->level == 10)) { | ||
566 | if ((value < 2) || (value > 0xFF)) { | ||
567 | rs->ti->error = "Bad value for 'raid10_copies'"; | ||
568 | return -EINVAL; | ||
569 | } | ||
570 | rs->print_flags |= DMPF_RAID10_COPIES; | ||
571 | raid10_copies = value; | ||
514 | } else { | 572 | } else { |
515 | DMERR("Unable to parse RAID parameter: %s", key); | 573 | DMERR("Unable to parse RAID parameter: %s", key); |
516 | rs->ti->error = "Unable to parse RAID parameters"; | 574 | rs->ti->error = "Unable to parse RAID parameters"; |
@@ -522,14 +580,33 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
522 | return -EINVAL; | 580 | return -EINVAL; |
523 | 581 | ||
524 | if (rs->md.chunk_sectors) | 582 | if (rs->md.chunk_sectors) |
525 | rs->ti->split_io = rs->md.chunk_sectors; | 583 | max_io_len = rs->md.chunk_sectors; |
526 | else | 584 | else |
527 | rs->ti->split_io = region_size; | 585 | max_io_len = region_size; |
528 | 586 | ||
529 | if (rs->md.chunk_sectors) | 587 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) |
530 | rs->ti->split_io = rs->md.chunk_sectors; | 588 | return -EINVAL; |
531 | else | 589 | |
532 | rs->ti->split_io = region_size; | 590 | if (rs->raid_type->level == 10) { |
591 | if (raid10_copies > rs->md.raid_disks) { | ||
592 | rs->ti->error = "Not enough devices to satisfy specification"; | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | /* (Len * #mirrors) / #devices */ | ||
597 | sectors_per_dev = rs->ti->len * raid10_copies; | ||
598 | sector_div(sectors_per_dev, rs->md.raid_disks); | ||
599 | |||
600 | rs->md.layout = raid10_format_to_md_layout(raid10_format, | ||
601 | raid10_copies); | ||
602 | rs->md.new_layout = rs->md.layout; | ||
603 | } else if ((rs->raid_type->level > 1) && | ||
604 | sector_div(sectors_per_dev, | ||
605 | (rs->md.raid_disks - rs->raid_type->parity_devs))) { | ||
606 | rs->ti->error = "Target length not divisible by number of data devices"; | ||
607 | return -EINVAL; | ||
608 | } | ||
609 | rs->md.dev_sectors = sectors_per_dev; | ||
533 | 610 | ||
534 | /* Assume there are no metadata devices until the drives are parsed */ | 611 | /* Assume there are no metadata devices until the drives are parsed */ |
535 | rs->md.persistent = 0; | 612 | rs->md.persistent = 0; |
@@ -552,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
552 | if (rs->raid_type->level == 1) | 629 | if (rs->raid_type->level == 1) |
553 | return md_raid1_congested(&rs->md, bits); | 630 | return md_raid1_congested(&rs->md, bits); |
554 | 631 | ||
632 | if (rs->raid_type->level == 10) | ||
633 | return md_raid10_congested(&rs->md, bits); | ||
634 | |||
555 | return md_raid5_congested(&rs->md, bits); | 635 | return md_raid5_congested(&rs->md, bits); |
556 | } | 636 | } |
557 | 637 | ||
@@ -870,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
870 | case 6: | 950 | case 6: |
871 | redundancy = rs->raid_type->parity_devs; | 951 | redundancy = rs->raid_type->parity_devs; |
872 | break; | 952 | break; |
953 | case 10: | ||
954 | redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; | ||
955 | break; | ||
873 | default: | 956 | default: |
874 | ti->error = "Unknown RAID type"; | 957 | ti->error = "Unknown RAID type"; |
875 | return -EINVAL; | 958 | return -EINVAL; |
@@ -1035,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1035 | goto bad; | 1118 | goto bad; |
1036 | } | 1119 | } |
1037 | 1120 | ||
1121 | if (ti->len != rs->md.array_sectors) { | ||
1122 | ti->error = "Array size does not match requested target length"; | ||
1123 | ret = -EINVAL; | ||
1124 | goto size_mismatch; | ||
1125 | } | ||
1038 | rs->callbacks.congested_fn = raid_is_congested; | 1126 | rs->callbacks.congested_fn = raid_is_congested; |
1039 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 1127 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
1040 | 1128 | ||
1041 | mddev_suspend(&rs->md); | 1129 | mddev_suspend(&rs->md); |
1042 | return 0; | 1130 | return 0; |
1043 | 1131 | ||
1132 | size_mismatch: | ||
1133 | md_stop(&rs->md); | ||
1044 | bad: | 1134 | bad: |
1045 | context_free(rs); | 1135 | context_free(rs); |
1046 | 1136 | ||
@@ -1067,7 +1157,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c | |||
1067 | } | 1157 | } |
1068 | 1158 | ||
1069 | static int raid_status(struct dm_target *ti, status_type_t type, | 1159 | static int raid_status(struct dm_target *ti, status_type_t type, |
1070 | char *result, unsigned maxlen) | 1160 | unsigned status_flags, char *result, unsigned maxlen) |
1071 | { | 1161 | { |
1072 | struct raid_set *rs = ti->private; | 1162 | struct raid_set *rs = ti->private; |
1073 | unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ | 1163 | unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ |
@@ -1189,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
1189 | DMEMIT(" region_size %lu", | 1279 | DMEMIT(" region_size %lu", |
1190 | rs->md.bitmap_info.chunksize >> 9); | 1280 | rs->md.bitmap_info.chunksize >> 9); |
1191 | 1281 | ||
1282 | if (rs->print_flags & DMPF_RAID10_COPIES) | ||
1283 | DMEMIT(" raid10_copies %u", | ||
1284 | raid10_md_layout_to_copies(rs->md.layout)); | ||
1285 | |||
1286 | if (rs->print_flags & DMPF_RAID10_FORMAT) | ||
1287 | DMEMIT(" raid10_format near"); | ||
1288 | |||
1192 | DMEMIT(" %d", rs->md.raid_disks); | 1289 | DMEMIT(" %d", rs->md.raid_disks); |
1193 | for (i = 0; i < rs->md.raid_disks; i++) { | 1290 | for (i = 0; i < rs->md.raid_disks; i++) { |
1194 | if (rs->dev[i].meta_dev) | 1291 | if (rs->dev[i].meta_dev) |
@@ -1263,7 +1360,7 @@ static void raid_resume(struct dm_target *ti) | |||
1263 | 1360 | ||
1264 | static struct target_type raid_target = { | 1361 | static struct target_type raid_target = { |
1265 | .name = "raid", | 1362 | .name = "raid", |
1266 | .version = {1, 2, 0}, | 1363 | .version = {1, 3, 0}, |
1267 | .module = THIS_MODULE, | 1364 | .module = THIS_MODULE, |
1268 | .ctr = raid_ctr, | 1365 | .ctr = raid_ctr, |
1269 | .dtr = raid_dtr, | 1366 | .dtr = raid_dtr, |
@@ -1290,6 +1387,8 @@ module_init(dm_raid_init); | |||
1290 | module_exit(dm_raid_exit); | 1387 | module_exit(dm_raid_exit); |
1291 | 1388 | ||
1292 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | 1389 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); |
1390 | MODULE_ALIAS("dm-raid1"); | ||
1391 | MODULE_ALIAS("dm-raid10"); | ||
1293 | MODULE_ALIAS("dm-raid4"); | 1392 | MODULE_ALIAS("dm-raid4"); |
1294 | MODULE_ALIAS("dm-raid5"); | 1393 | MODULE_ALIAS("dm-raid5"); |
1295 | MODULE_ALIAS("dm-raid6"); | 1394 | MODULE_ALIAS("dm-raid6"); |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index b58b7a33914a..bc5ddba8045b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -1081,10 +1081,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | ti->private = ms; | 1083 | ti->private = ms; |
1084 | ti->split_io = dm_rh_get_region_size(ms->rh); | 1084 | |
1085 | r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh)); | ||
1086 | if (r) | ||
1087 | goto err_free_context; | ||
1088 | |||
1085 | ti->num_flush_requests = 1; | 1089 | ti->num_flush_requests = 1; |
1086 | ti->num_discard_requests = 1; | 1090 | ti->num_discard_requests = 1; |
1087 | ti->discard_zeroes_data_unsupported = 1; | 1091 | ti->discard_zeroes_data_unsupported = true; |
1088 | 1092 | ||
1089 | ms->kmirrord_wq = alloc_workqueue("kmirrord", | 1093 | ms->kmirrord_wq = alloc_workqueue("kmirrord", |
1090 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); | 1094 | WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); |
@@ -1363,7 +1367,7 @@ static char device_status_char(struct mirror *m) | |||
1363 | 1367 | ||
1364 | 1368 | ||
1365 | static int mirror_status(struct dm_target *ti, status_type_t type, | 1369 | static int mirror_status(struct dm_target *ti, status_type_t type, |
1366 | char *result, unsigned int maxlen) | 1370 | unsigned status_flags, char *result, unsigned maxlen) |
1367 | { | 1371 | { |
1368 | unsigned int m, sz = 0; | 1372 | unsigned int m, sz = 0; |
1369 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1373 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6f758870fc19..a143921feaf6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -691,7 +691,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) | |||
691 | * Return a minimum chunk size of all snapshots that have the specified origin. | 691 | * Return a minimum chunk size of all snapshots that have the specified origin. |
692 | * Return zero if the origin has no snapshots. | 692 | * Return zero if the origin has no snapshots. |
693 | */ | 693 | */ |
694 | static sector_t __minimum_chunk_size(struct origin *o) | 694 | static uint32_t __minimum_chunk_size(struct origin *o) |
695 | { | 695 | { |
696 | struct dm_snapshot *snap; | 696 | struct dm_snapshot *snap; |
697 | unsigned chunk_size = 0; | 697 | unsigned chunk_size = 0; |
@@ -701,7 +701,7 @@ static sector_t __minimum_chunk_size(struct origin *o) | |||
701 | chunk_size = min_not_zero(chunk_size, | 701 | chunk_size = min_not_zero(chunk_size, |
702 | snap->store->chunk_size); | 702 | snap->store->chunk_size); |
703 | 703 | ||
704 | return chunk_size; | 704 | return (uint32_t) chunk_size; |
705 | } | 705 | } |
706 | 706 | ||
707 | /* | 707 | /* |
@@ -1172,7 +1172,10 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1172 | ti->error = "Chunk size not set"; | 1172 | ti->error = "Chunk size not set"; |
1173 | goto bad_read_metadata; | 1173 | goto bad_read_metadata; |
1174 | } | 1174 | } |
1175 | ti->split_io = s->store->chunk_size; | 1175 | |
1176 | r = dm_set_target_max_io_len(ti, s->store->chunk_size); | ||
1177 | if (r) | ||
1178 | goto bad_read_metadata; | ||
1176 | 1179 | ||
1177 | return 0; | 1180 | return 0; |
1178 | 1181 | ||
@@ -1239,7 +1242,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, | |||
1239 | snap_dest->store->snap = snap_dest; | 1242 | snap_dest->store->snap = snap_dest; |
1240 | snap_src->store->snap = snap_src; | 1243 | snap_src->store->snap = snap_src; |
1241 | 1244 | ||
1242 | snap_dest->ti->split_io = snap_dest->store->chunk_size; | 1245 | snap_dest->ti->max_io_len = snap_dest->store->chunk_size; |
1243 | snap_dest->valid = snap_src->valid; | 1246 | snap_dest->valid = snap_src->valid; |
1244 | 1247 | ||
1245 | /* | 1248 | /* |
@@ -1817,9 +1820,9 @@ static void snapshot_resume(struct dm_target *ti) | |||
1817 | up_write(&s->lock); | 1820 | up_write(&s->lock); |
1818 | } | 1821 | } |
1819 | 1822 | ||
1820 | static sector_t get_origin_minimum_chunksize(struct block_device *bdev) | 1823 | static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) |
1821 | { | 1824 | { |
1822 | sector_t min_chunksize; | 1825 | uint32_t min_chunksize; |
1823 | 1826 | ||
1824 | down_read(&_origins_lock); | 1827 | down_read(&_origins_lock); |
1825 | min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); | 1828 | min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); |
@@ -1838,15 +1841,15 @@ static void snapshot_merge_resume(struct dm_target *ti) | |||
1838 | snapshot_resume(ti); | 1841 | snapshot_resume(ti); |
1839 | 1842 | ||
1840 | /* | 1843 | /* |
1841 | * snapshot-merge acts as an origin, so set ti->split_io | 1844 | * snapshot-merge acts as an origin, so set ti->max_io_len |
1842 | */ | 1845 | */ |
1843 | ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); | 1846 | ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); |
1844 | 1847 | ||
1845 | start_merge(s); | 1848 | start_merge(s); |
1846 | } | 1849 | } |
1847 | 1850 | ||
1848 | static int snapshot_status(struct dm_target *ti, status_type_t type, | 1851 | static int snapshot_status(struct dm_target *ti, status_type_t type, |
1849 | char *result, unsigned int maxlen) | 1852 | unsigned status_flags, char *result, unsigned maxlen) |
1850 | { | 1853 | { |
1851 | unsigned sz = 0; | 1854 | unsigned sz = 0; |
1852 | struct dm_snapshot *snap = ti->private; | 1855 | struct dm_snapshot *snap = ti->private; |
@@ -2073,12 +2076,12 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, | |||
2073 | struct origin *o; | 2076 | struct origin *o; |
2074 | 2077 | ||
2075 | /* | 2078 | /* |
2076 | * The origin's __minimum_chunk_size() got stored in split_io | 2079 | * The origin's __minimum_chunk_size() got stored in max_io_len |
2077 | * by snapshot_merge_resume(). | 2080 | * by snapshot_merge_resume(). |
2078 | */ | 2081 | */ |
2079 | down_read(&_origins_lock); | 2082 | down_read(&_origins_lock); |
2080 | o = __lookup_origin(merging_snap->origin->bdev); | 2083 | o = __lookup_origin(merging_snap->origin->bdev); |
2081 | for (n = 0; n < size; n += merging_snap->ti->split_io) | 2084 | for (n = 0; n < size; n += merging_snap->ti->max_io_len) |
2082 | if (__origin_write(&o->snapshots, sector + n, NULL) == | 2085 | if (__origin_write(&o->snapshots, sector + n, NULL) == |
2083 | DM_MAPIO_SUBMITTED) | 2086 | DM_MAPIO_SUBMITTED) |
2084 | must_wait = 1; | 2087 | must_wait = 1; |
@@ -2138,18 +2141,18 @@ static int origin_map(struct dm_target *ti, struct bio *bio, | |||
2138 | } | 2141 | } |
2139 | 2142 | ||
2140 | /* | 2143 | /* |
2141 | * Set the target "split_io" field to the minimum of all the snapshots' | 2144 | * Set the target "max_io_len" field to the minimum of all the snapshots' |
2142 | * chunk sizes. | 2145 | * chunk sizes. |
2143 | */ | 2146 | */ |
2144 | static void origin_resume(struct dm_target *ti) | 2147 | static void origin_resume(struct dm_target *ti) |
2145 | { | 2148 | { |
2146 | struct dm_dev *dev = ti->private; | 2149 | struct dm_dev *dev = ti->private; |
2147 | 2150 | ||
2148 | ti->split_io = get_origin_minimum_chunksize(dev->bdev); | 2151 | ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); |
2149 | } | 2152 | } |
2150 | 2153 | ||
2151 | static int origin_status(struct dm_target *ti, status_type_t type, char *result, | 2154 | static int origin_status(struct dm_target *ti, status_type_t type, |
2152 | unsigned int maxlen) | 2155 | unsigned status_flags, char *result, unsigned maxlen) |
2153 | { | 2156 | { |
2154 | struct dm_dev *dev = ti->private; | 2157 | struct dm_dev *dev = ti->private; |
2155 | 2158 | ||
@@ -2176,7 +2179,6 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | |||
2176 | return max_size; | 2179 | return max_size; |
2177 | 2180 | ||
2178 | bvm->bi_bdev = dev->bdev; | 2181 | bvm->bi_bdev = dev->bdev; |
2179 | bvm->bi_sector = bvm->bi_sector; | ||
2180 | 2182 | ||
2181 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); | 2183 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); |
2182 | } | 2184 | } |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 35c94ff24ad5..a087bf2a8d66 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -26,14 +26,12 @@ struct stripe { | |||
26 | struct stripe_c { | 26 | struct stripe_c { |
27 | uint32_t stripes; | 27 | uint32_t stripes; |
28 | int stripes_shift; | 28 | int stripes_shift; |
29 | sector_t stripes_mask; | ||
30 | 29 | ||
31 | /* The size of this target / num. stripes */ | 30 | /* The size of this target / num. stripes */ |
32 | sector_t stripe_width; | 31 | sector_t stripe_width; |
33 | 32 | ||
34 | /* stripe chunk size */ | 33 | uint32_t chunk_size; |
35 | uint32_t chunk_shift; | 34 | int chunk_size_shift; |
36 | sector_t chunk_mask; | ||
37 | 35 | ||
38 | /* Needed for handling events */ | 36 | /* Needed for handling events */ |
39 | struct dm_target *ti; | 37 | struct dm_target *ti; |
@@ -91,7 +89,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, | |||
91 | 89 | ||
92 | /* | 90 | /* |
93 | * Construct a striped mapping. | 91 | * Construct a striped mapping. |
94 | * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+ | 92 | * <number of stripes> <chunk size> [<dev_path> <offset>]+ |
95 | */ | 93 | */ |
96 | static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) | 94 | static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
97 | { | 95 | { |
@@ -99,7 +97,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
99 | sector_t width; | 97 | sector_t width; |
100 | uint32_t stripes; | 98 | uint32_t stripes; |
101 | uint32_t chunk_size; | 99 | uint32_t chunk_size; |
102 | char *end; | ||
103 | int r; | 100 | int r; |
104 | unsigned int i; | 101 | unsigned int i; |
105 | 102 | ||
@@ -108,34 +105,23 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
108 | return -EINVAL; | 105 | return -EINVAL; |
109 | } | 106 | } |
110 | 107 | ||
111 | stripes = simple_strtoul(argv[0], &end, 10); | 108 | if (kstrtouint(argv[0], 10, &stripes) || !stripes) { |
112 | if (!stripes || *end) { | ||
113 | ti->error = "Invalid stripe count"; | 109 | ti->error = "Invalid stripe count"; |
114 | return -EINVAL; | 110 | return -EINVAL; |
115 | } | 111 | } |
116 | 112 | ||
117 | chunk_size = simple_strtoul(argv[1], &end, 10); | 113 | if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) { |
118 | if (*end) { | ||
119 | ti->error = "Invalid chunk_size"; | 114 | ti->error = "Invalid chunk_size"; |
120 | return -EINVAL; | 115 | return -EINVAL; |
121 | } | 116 | } |
122 | 117 | ||
123 | /* | 118 | width = ti->len; |
124 | * chunk_size is a power of two | 119 | if (sector_div(width, chunk_size)) { |
125 | */ | ||
126 | if (!is_power_of_2(chunk_size) || | ||
127 | (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { | ||
128 | ti->error = "Invalid chunk size"; | ||
129 | return -EINVAL; | ||
130 | } | ||
131 | |||
132 | if (ti->len & (chunk_size - 1)) { | ||
133 | ti->error = "Target length not divisible by " | 120 | ti->error = "Target length not divisible by " |
134 | "chunk size"; | 121 | "chunk size"; |
135 | return -EINVAL; | 122 | return -EINVAL; |
136 | } | 123 | } |
137 | 124 | ||
138 | width = ti->len; | ||
139 | if (sector_div(width, stripes)) { | 125 | if (sector_div(width, stripes)) { |
140 | ti->error = "Target length not divisible by " | 126 | ti->error = "Target length not divisible by " |
141 | "number of stripes"; | 127 | "number of stripes"; |
@@ -167,17 +153,21 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
167 | 153 | ||
168 | if (stripes & (stripes - 1)) | 154 | if (stripes & (stripes - 1)) |
169 | sc->stripes_shift = -1; | 155 | sc->stripes_shift = -1; |
170 | else { | 156 | else |
171 | sc->stripes_shift = ffs(stripes) - 1; | 157 | sc->stripes_shift = __ffs(stripes); |
172 | sc->stripes_mask = ((sector_t) stripes) - 1; | 158 | |
173 | } | 159 | r = dm_set_target_max_io_len(ti, chunk_size); |
160 | if (r) | ||
161 | return r; | ||
174 | 162 | ||
175 | ti->split_io = chunk_size; | ||
176 | ti->num_flush_requests = stripes; | 163 | ti->num_flush_requests = stripes; |
177 | ti->num_discard_requests = stripes; | 164 | ti->num_discard_requests = stripes; |
178 | 165 | ||
179 | sc->chunk_shift = ffs(chunk_size) - 1; | 166 | sc->chunk_size = chunk_size; |
180 | sc->chunk_mask = ((sector_t) chunk_size) - 1; | 167 | if (chunk_size & (chunk_size - 1)) |
168 | sc->chunk_size_shift = -1; | ||
169 | else | ||
170 | sc->chunk_size_shift = __ffs(chunk_size); | ||
181 | 171 | ||
182 | /* | 172 | /* |
183 | * Get the stripe destinations. | 173 | * Get the stripe destinations. |
@@ -216,17 +206,29 @@ static void stripe_dtr(struct dm_target *ti) | |||
216 | static void stripe_map_sector(struct stripe_c *sc, sector_t sector, | 206 | static void stripe_map_sector(struct stripe_c *sc, sector_t sector, |
217 | uint32_t *stripe, sector_t *result) | 207 | uint32_t *stripe, sector_t *result) |
218 | { | 208 | { |
219 | sector_t offset = dm_target_offset(sc->ti, sector); | 209 | sector_t chunk = dm_target_offset(sc->ti, sector); |
220 | sector_t chunk = offset >> sc->chunk_shift; | 210 | sector_t chunk_offset; |
211 | |||
212 | if (sc->chunk_size_shift < 0) | ||
213 | chunk_offset = sector_div(chunk, sc->chunk_size); | ||
214 | else { | ||
215 | chunk_offset = chunk & (sc->chunk_size - 1); | ||
216 | chunk >>= sc->chunk_size_shift; | ||
217 | } | ||
221 | 218 | ||
222 | if (sc->stripes_shift < 0) | 219 | if (sc->stripes_shift < 0) |
223 | *stripe = sector_div(chunk, sc->stripes); | 220 | *stripe = sector_div(chunk, sc->stripes); |
224 | else { | 221 | else { |
225 | *stripe = chunk & sc->stripes_mask; | 222 | *stripe = chunk & (sc->stripes - 1); |
226 | chunk >>= sc->stripes_shift; | 223 | chunk >>= sc->stripes_shift; |
227 | } | 224 | } |
228 | 225 | ||
229 | *result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask); | 226 | if (sc->chunk_size_shift < 0) |
227 | chunk *= sc->chunk_size; | ||
228 | else | ||
229 | chunk <<= sc->chunk_size_shift; | ||
230 | |||
231 | *result = chunk + chunk_offset; | ||
230 | } | 232 | } |
231 | 233 | ||
232 | static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, | 234 | static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, |
@@ -237,9 +239,16 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, | |||
237 | stripe_map_sector(sc, sector, &stripe, result); | 239 | stripe_map_sector(sc, sector, &stripe, result); |
238 | if (stripe == target_stripe) | 240 | if (stripe == target_stripe) |
239 | return; | 241 | return; |
240 | *result &= ~sc->chunk_mask; /* round down */ | 242 | |
243 | /* round down */ | ||
244 | sector = *result; | ||
245 | if (sc->chunk_size_shift < 0) | ||
246 | *result -= sector_div(sector, sc->chunk_size); | ||
247 | else | ||
248 | *result = sector & ~(sector_t)(sc->chunk_size - 1); | ||
249 | |||
241 | if (target_stripe < stripe) | 250 | if (target_stripe < stripe) |
242 | *result += sc->chunk_mask + 1; /* next chunk */ | 251 | *result += sc->chunk_size; /* next chunk */ |
243 | } | 252 | } |
244 | 253 | ||
245 | static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, | 254 | static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, |
@@ -302,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, | |||
302 | * | 311 | * |
303 | */ | 312 | */ |
304 | 313 | ||
305 | static int stripe_status(struct dm_target *ti, | 314 | static int stripe_status(struct dm_target *ti, status_type_t type, |
306 | status_type_t type, char *result, unsigned int maxlen) | 315 | unsigned status_flags, char *result, unsigned maxlen) |
307 | { | 316 | { |
308 | struct stripe_c *sc = (struct stripe_c *) ti->private; | 317 | struct stripe_c *sc = (struct stripe_c *) ti->private; |
309 | char buffer[sc->stripes + 1]; | 318 | char buffer[sc->stripes + 1]; |
@@ -324,7 +333,7 @@ static int stripe_status(struct dm_target *ti, | |||
324 | 333 | ||
325 | case STATUSTYPE_TABLE: | 334 | case STATUSTYPE_TABLE: |
326 | DMEMIT("%d %llu", sc->stripes, | 335 | DMEMIT("%d %llu", sc->stripes, |
327 | (unsigned long long)sc->chunk_mask + 1); | 336 | (unsigned long long)sc->chunk_size); |
328 | for (i = 0; i < sc->stripes; i++) | 337 | for (i = 0; i < sc->stripes; i++) |
329 | DMEMIT(" %s %llu", sc->stripe[i].dev->name, | 338 | DMEMIT(" %s %llu", sc->stripe[i].dev->name, |
330 | (unsigned long long)sc->stripe[i].physical_start); | 339 | (unsigned long long)sc->stripe[i].physical_start); |
@@ -391,7 +400,7 @@ static void stripe_io_hints(struct dm_target *ti, | |||
391 | struct queue_limits *limits) | 400 | struct queue_limits *limits) |
392 | { | 401 | { |
393 | struct stripe_c *sc = ti->private; | 402 | struct stripe_c *sc = ti->private; |
394 | unsigned chunk_size = (sc->chunk_mask + 1) << 9; | 403 | unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT; |
395 | 404 | ||
396 | blk_limits_io_min(limits, chunk_size); | 405 | blk_limits_io_min(limits, chunk_size); |
397 | blk_limits_io_opt(limits, chunk_size * sc->stripes); | 406 | blk_limits_io_opt(limits, chunk_size * sc->stripes); |
@@ -419,7 +428,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | |||
419 | 428 | ||
420 | static struct target_type stripe_target = { | 429 | static struct target_type stripe_target = { |
421 | .name = "striped", | 430 | .name = "striped", |
422 | .version = {1, 4, 0}, | 431 | .version = {1, 5, 0}, |
423 | .module = THIS_MODULE, | 432 | .module = THIS_MODULE, |
424 | .ctr = stripe_ctr, | 433 | .ctr = stripe_ctr, |
425 | .dtr = stripe_dtr, | 434 | .dtr = stripe_dtr, |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2e227fbf1622..f90069029aae 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -1319,6 +1319,9 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) | |||
1319 | if (!ti->num_flush_requests) | 1319 | if (!ti->num_flush_requests) |
1320 | continue; | 1320 | continue; |
1321 | 1321 | ||
1322 | if (ti->flush_supported) | ||
1323 | return 1; | ||
1324 | |||
1322 | if (ti->type->iterate_devices && | 1325 | if (ti->type->iterate_devices && |
1323 | ti->type->iterate_devices(ti, device_flush_capable, &flush)) | 1326 | ti->type->iterate_devices(ti, device_flush_capable, &flush)) |
1324 | return 1; | 1327 | return 1; |
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 3e2907f0bc46..693e149e9727 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2011 Red Hat, Inc. | 2 | * Copyright (C) 2011-2012 Red Hat, Inc. |
3 | * | 3 | * |
4 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
5 | */ | 5 | */ |
@@ -80,6 +80,12 @@ | |||
80 | #define THIN_METADATA_CACHE_SIZE 64 | 80 | #define THIN_METADATA_CACHE_SIZE 64 |
81 | #define SECTOR_TO_BLOCK_SHIFT 3 | 81 | #define SECTOR_TO_BLOCK_SHIFT 3 |
82 | 82 | ||
83 | /* | ||
84 | * 3 for btree insert + | ||
85 | * 2 for btree lookup used within space map | ||
86 | */ | ||
87 | #define THIN_MAX_CONCURRENT_LOCKS 5 | ||
88 | |||
83 | /* This should be plenty */ | 89 | /* This should be plenty */ |
84 | #define SPACE_MAP_ROOT_SIZE 128 | 90 | #define SPACE_MAP_ROOT_SIZE 128 |
85 | 91 | ||
@@ -172,13 +178,20 @@ struct dm_pool_metadata { | |||
172 | 178 | ||
173 | struct rw_semaphore root_lock; | 179 | struct rw_semaphore root_lock; |
174 | uint32_t time; | 180 | uint32_t time; |
175 | int need_commit; | ||
176 | dm_block_t root; | 181 | dm_block_t root; |
177 | dm_block_t details_root; | 182 | dm_block_t details_root; |
178 | struct list_head thin_devices; | 183 | struct list_head thin_devices; |
179 | uint64_t trans_id; | 184 | uint64_t trans_id; |
180 | unsigned long flags; | 185 | unsigned long flags; |
181 | sector_t data_block_size; | 186 | sector_t data_block_size; |
187 | bool read_only:1; | ||
188 | |||
189 | /* | ||
190 | * Set if a transaction has to be aborted but the attempt to roll back | ||
191 | * to the previous (good) transaction failed. The only pool metadata | ||
192 | * operation possible in this state is the closing of the device. | ||
193 | */ | ||
194 | bool fail_io:1; | ||
182 | }; | 195 | }; |
183 | 196 | ||
184 | struct dm_thin_device { | 197 | struct dm_thin_device { |
@@ -187,7 +200,8 @@ struct dm_thin_device { | |||
187 | dm_thin_id id; | 200 | dm_thin_id id; |
188 | 201 | ||
189 | int open_count; | 202 | int open_count; |
190 | int changed; | 203 | bool changed:1; |
204 | bool aborted_with_changes:1; | ||
191 | uint64_t mapped_blocks; | 205 | uint64_t mapped_blocks; |
192 | uint64_t transaction_id; | 206 | uint64_t transaction_id; |
193 | uint32_t creation_time; | 207 | uint32_t creation_time; |
@@ -338,7 +352,21 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le) | |||
338 | 352 | ||
339 | /*----------------------------------------------------------------*/ | 353 | /*----------------------------------------------------------------*/ |
340 | 354 | ||
341 | static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) | 355 | static int superblock_lock_zero(struct dm_pool_metadata *pmd, |
356 | struct dm_block **sblock) | ||
357 | { | ||
358 | return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
359 | &sb_validator, sblock); | ||
360 | } | ||
361 | |||
362 | static int superblock_lock(struct dm_pool_metadata *pmd, | ||
363 | struct dm_block **sblock) | ||
364 | { | ||
365 | return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
366 | &sb_validator, sblock); | ||
367 | } | ||
368 | |||
369 | static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) | ||
342 | { | 370 | { |
343 | int r; | 371 | int r; |
344 | unsigned i; | 372 | unsigned i; |
@@ -365,72 +393,9 @@ static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) | |||
365 | return dm_bm_unlock(b); | 393 | return dm_bm_unlock(b); |
366 | } | 394 | } |
367 | 395 | ||
368 | static int init_pmd(struct dm_pool_metadata *pmd, | 396 | static void __setup_btree_details(struct dm_pool_metadata *pmd) |
369 | struct dm_block_manager *bm, | ||
370 | dm_block_t nr_blocks, int create) | ||
371 | { | 397 | { |
372 | int r; | 398 | pmd->info.tm = pmd->tm; |
373 | struct dm_space_map *sm, *data_sm; | ||
374 | struct dm_transaction_manager *tm; | ||
375 | struct dm_block *sblock; | ||
376 | |||
377 | if (create) { | ||
378 | r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, | ||
379 | &sb_validator, &tm, &sm, &sblock); | ||
380 | if (r < 0) { | ||
381 | DMERR("tm_create_with_sm failed"); | ||
382 | return r; | ||
383 | } | ||
384 | |||
385 | data_sm = dm_sm_disk_create(tm, nr_blocks); | ||
386 | if (IS_ERR(data_sm)) { | ||
387 | DMERR("sm_disk_create failed"); | ||
388 | dm_tm_unlock(tm, sblock); | ||
389 | r = PTR_ERR(data_sm); | ||
390 | goto bad; | ||
391 | } | ||
392 | } else { | ||
393 | struct thin_disk_superblock *disk_super = NULL; | ||
394 | size_t space_map_root_offset = | ||
395 | offsetof(struct thin_disk_superblock, metadata_space_map_root); | ||
396 | |||
397 | r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, | ||
398 | &sb_validator, space_map_root_offset, | ||
399 | SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock); | ||
400 | if (r < 0) { | ||
401 | DMERR("tm_open_with_sm failed"); | ||
402 | return r; | ||
403 | } | ||
404 | |||
405 | disk_super = dm_block_data(sblock); | ||
406 | data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, | ||
407 | sizeof(disk_super->data_space_map_root)); | ||
408 | if (IS_ERR(data_sm)) { | ||
409 | DMERR("sm_disk_open failed"); | ||
410 | r = PTR_ERR(data_sm); | ||
411 | goto bad; | ||
412 | } | ||
413 | } | ||
414 | |||
415 | |||
416 | r = dm_tm_unlock(tm, sblock); | ||
417 | if (r < 0) { | ||
418 | DMERR("couldn't unlock superblock"); | ||
419 | goto bad_data_sm; | ||
420 | } | ||
421 | |||
422 | pmd->bm = bm; | ||
423 | pmd->metadata_sm = sm; | ||
424 | pmd->data_sm = data_sm; | ||
425 | pmd->tm = tm; | ||
426 | pmd->nb_tm = dm_tm_create_non_blocking_clone(tm); | ||
427 | if (!pmd->nb_tm) { | ||
428 | DMERR("could not create clone tm"); | ||
429 | r = -ENOMEM; | ||
430 | goto bad_data_sm; | ||
431 | } | ||
432 | |||
433 | pmd->info.tm = tm; | ||
434 | pmd->info.levels = 2; | 399 | pmd->info.levels = 2; |
435 | pmd->info.value_type.context = pmd->data_sm; | 400 | pmd->info.value_type.context = pmd->data_sm; |
436 | pmd->info.value_type.size = sizeof(__le64); | 401 | pmd->info.value_type.size = sizeof(__le64); |
@@ -441,7 +406,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, | |||
441 | memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); | 406 | memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); |
442 | pmd->nb_info.tm = pmd->nb_tm; | 407 | pmd->nb_info.tm = pmd->nb_tm; |
443 | 408 | ||
444 | pmd->tl_info.tm = tm; | 409 | pmd->tl_info.tm = pmd->tm; |
445 | pmd->tl_info.levels = 1; | 410 | pmd->tl_info.levels = 1; |
446 | pmd->tl_info.value_type.context = &pmd->info; | 411 | pmd->tl_info.value_type.context = &pmd->info; |
447 | pmd->tl_info.value_type.size = sizeof(__le64); | 412 | pmd->tl_info.value_type.size = sizeof(__le64); |
@@ -449,7 +414,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, | |||
449 | pmd->tl_info.value_type.dec = subtree_dec; | 414 | pmd->tl_info.value_type.dec = subtree_dec; |
450 | pmd->tl_info.value_type.equal = subtree_equal; | 415 | pmd->tl_info.value_type.equal = subtree_equal; |
451 | 416 | ||
452 | pmd->bl_info.tm = tm; | 417 | pmd->bl_info.tm = pmd->tm; |
453 | pmd->bl_info.levels = 1; | 418 | pmd->bl_info.levels = 1; |
454 | pmd->bl_info.value_type.context = pmd->data_sm; | 419 | pmd->bl_info.value_type.context = pmd->data_sm; |
455 | pmd->bl_info.value_type.size = sizeof(__le64); | 420 | pmd->bl_info.value_type.size = sizeof(__le64); |
@@ -457,48 +422,266 @@ static int init_pmd(struct dm_pool_metadata *pmd, | |||
457 | pmd->bl_info.value_type.dec = data_block_dec; | 422 | pmd->bl_info.value_type.dec = data_block_dec; |
458 | pmd->bl_info.value_type.equal = data_block_equal; | 423 | pmd->bl_info.value_type.equal = data_block_equal; |
459 | 424 | ||
460 | pmd->details_info.tm = tm; | 425 | pmd->details_info.tm = pmd->tm; |
461 | pmd->details_info.levels = 1; | 426 | pmd->details_info.levels = 1; |
462 | pmd->details_info.value_type.context = NULL; | 427 | pmd->details_info.value_type.context = NULL; |
463 | pmd->details_info.value_type.size = sizeof(struct disk_device_details); | 428 | pmd->details_info.value_type.size = sizeof(struct disk_device_details); |
464 | pmd->details_info.value_type.inc = NULL; | 429 | pmd->details_info.value_type.inc = NULL; |
465 | pmd->details_info.value_type.dec = NULL; | 430 | pmd->details_info.value_type.dec = NULL; |
466 | pmd->details_info.value_type.equal = NULL; | 431 | pmd->details_info.value_type.equal = NULL; |
432 | } | ||
467 | 433 | ||
468 | pmd->root = 0; | 434 | static int __write_initial_superblock(struct dm_pool_metadata *pmd) |
435 | { | ||
436 | int r; | ||
437 | struct dm_block *sblock; | ||
438 | size_t metadata_len, data_len; | ||
439 | struct thin_disk_superblock *disk_super; | ||
440 | sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; | ||
469 | 441 | ||
470 | init_rwsem(&pmd->root_lock); | 442 | if (bdev_size > THIN_METADATA_MAX_SECTORS) |
471 | pmd->time = 0; | 443 | bdev_size = THIN_METADATA_MAX_SECTORS; |
472 | pmd->need_commit = 0; | 444 | |
473 | pmd->details_root = 0; | 445 | r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); |
474 | pmd->trans_id = 0; | 446 | if (r < 0) |
475 | pmd->flags = 0; | 447 | return r; |
476 | INIT_LIST_HEAD(&pmd->thin_devices); | 448 | |
449 | r = dm_sm_root_size(pmd->data_sm, &data_len); | ||
450 | if (r < 0) | ||
451 | return r; | ||
452 | |||
453 | r = dm_sm_commit(pmd->data_sm); | ||
454 | if (r < 0) | ||
455 | return r; | ||
456 | |||
457 | r = dm_tm_pre_commit(pmd->tm); | ||
458 | if (r < 0) | ||
459 | return r; | ||
460 | |||
461 | r = superblock_lock_zero(pmd, &sblock); | ||
462 | if (r) | ||
463 | return r; | ||
464 | |||
465 | disk_super = dm_block_data(sblock); | ||
466 | disk_super->flags = 0; | ||
467 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); | ||
468 | disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); | ||
469 | disk_super->version = cpu_to_le32(THIN_VERSION); | ||
470 | disk_super->time = 0; | ||
471 | disk_super->trans_id = 0; | ||
472 | disk_super->held_root = 0; | ||
473 | |||
474 | r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, | ||
475 | metadata_len); | ||
476 | if (r < 0) | ||
477 | goto bad_locked; | ||
478 | |||
479 | r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, | ||
480 | data_len); | ||
481 | if (r < 0) | ||
482 | goto bad_locked; | ||
483 | |||
484 | disk_super->data_mapping_root = cpu_to_le64(pmd->root); | ||
485 | disk_super->device_details_root = cpu_to_le64(pmd->details_root); | ||
486 | disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | ||
487 | disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); | ||
488 | disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); | ||
489 | |||
490 | return dm_tm_commit(pmd->tm, sblock); | ||
491 | |||
492 | bad_locked: | ||
493 | dm_bm_unlock(sblock); | ||
494 | return r; | ||
495 | } | ||
496 | |||
497 | static int __format_metadata(struct dm_pool_metadata *pmd) | ||
498 | { | ||
499 | int r; | ||
500 | |||
501 | r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
502 | &pmd->tm, &pmd->metadata_sm); | ||
503 | if (r < 0) { | ||
504 | DMERR("tm_create_with_sm failed"); | ||
505 | return r; | ||
506 | } | ||
507 | |||
508 | pmd->data_sm = dm_sm_disk_create(pmd->tm, 0); | ||
509 | if (IS_ERR(pmd->data_sm)) { | ||
510 | DMERR("sm_disk_create failed"); | ||
511 | r = PTR_ERR(pmd->data_sm); | ||
512 | goto bad_cleanup_tm; | ||
513 | } | ||
514 | |||
515 | pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); | ||
516 | if (!pmd->nb_tm) { | ||
517 | DMERR("could not create non-blocking clone tm"); | ||
518 | r = -ENOMEM; | ||
519 | goto bad_cleanup_data_sm; | ||
520 | } | ||
521 | |||
522 | __setup_btree_details(pmd); | ||
523 | |||
524 | r = dm_btree_empty(&pmd->info, &pmd->root); | ||
525 | if (r < 0) | ||
526 | goto bad_cleanup_nb_tm; | ||
527 | |||
528 | r = dm_btree_empty(&pmd->details_info, &pmd->details_root); | ||
529 | if (r < 0) { | ||
530 | DMERR("couldn't create devices root"); | ||
531 | goto bad_cleanup_nb_tm; | ||
532 | } | ||
533 | |||
534 | r = __write_initial_superblock(pmd); | ||
535 | if (r) | ||
536 | goto bad_cleanup_nb_tm; | ||
477 | 537 | ||
478 | return 0; | 538 | return 0; |
479 | 539 | ||
480 | bad_data_sm: | 540 | bad_cleanup_nb_tm: |
481 | dm_sm_destroy(data_sm); | 541 | dm_tm_destroy(pmd->nb_tm); |
482 | bad: | 542 | bad_cleanup_data_sm: |
483 | dm_tm_destroy(tm); | 543 | dm_sm_destroy(pmd->data_sm); |
484 | dm_sm_destroy(sm); | 544 | bad_cleanup_tm: |
545 | dm_tm_destroy(pmd->tm); | ||
546 | dm_sm_destroy(pmd->metadata_sm); | ||
547 | |||
548 | return r; | ||
549 | } | ||
550 | |||
551 | static int __check_incompat_features(struct thin_disk_superblock *disk_super, | ||
552 | struct dm_pool_metadata *pmd) | ||
553 | { | ||
554 | uint32_t features; | ||
555 | |||
556 | features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; | ||
557 | if (features) { | ||
558 | DMERR("could not access metadata due to unsupported optional features (%lx).", | ||
559 | (unsigned long)features); | ||
560 | return -EINVAL; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Check for read-only metadata to skip the following RDWR checks. | ||
565 | */ | ||
566 | if (get_disk_ro(pmd->bdev->bd_disk)) | ||
567 | return 0; | ||
568 | |||
569 | features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; | ||
570 | if (features) { | ||
571 | DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", | ||
572 | (unsigned long)features); | ||
573 | return -EINVAL; | ||
574 | } | ||
575 | |||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | static int __open_metadata(struct dm_pool_metadata *pmd) | ||
580 | { | ||
581 | int r; | ||
582 | struct dm_block *sblock; | ||
583 | struct thin_disk_superblock *disk_super; | ||
584 | |||
585 | r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
586 | &sb_validator, &sblock); | ||
587 | if (r < 0) { | ||
588 | DMERR("couldn't read superblock"); | ||
589 | return r; | ||
590 | } | ||
591 | |||
592 | disk_super = dm_block_data(sblock); | ||
593 | |||
594 | r = __check_incompat_features(disk_super, pmd); | ||
595 | if (r < 0) | ||
596 | goto bad_unlock_sblock; | ||
597 | |||
598 | r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
599 | disk_super->metadata_space_map_root, | ||
600 | sizeof(disk_super->metadata_space_map_root), | ||
601 | &pmd->tm, &pmd->metadata_sm); | ||
602 | if (r < 0) { | ||
603 | DMERR("tm_open_with_sm failed"); | ||
604 | goto bad_unlock_sblock; | ||
605 | } | ||
606 | |||
607 | pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, | ||
608 | sizeof(disk_super->data_space_map_root)); | ||
609 | if (IS_ERR(pmd->data_sm)) { | ||
610 | DMERR("sm_disk_open failed"); | ||
611 | r = PTR_ERR(pmd->data_sm); | ||
612 | goto bad_cleanup_tm; | ||
613 | } | ||
614 | |||
615 | pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); | ||
616 | if (!pmd->nb_tm) { | ||
617 | DMERR("could not create non-blocking clone tm"); | ||
618 | r = -ENOMEM; | ||
619 | goto bad_cleanup_data_sm; | ||
620 | } | ||
621 | |||
622 | __setup_btree_details(pmd); | ||
623 | return dm_bm_unlock(sblock); | ||
624 | |||
625 | bad_cleanup_data_sm: | ||
626 | dm_sm_destroy(pmd->data_sm); | ||
627 | bad_cleanup_tm: | ||
628 | dm_tm_destroy(pmd->tm); | ||
629 | dm_sm_destroy(pmd->metadata_sm); | ||
630 | bad_unlock_sblock: | ||
631 | dm_bm_unlock(sblock); | ||
632 | |||
633 | return r; | ||
634 | } | ||
635 | |||
636 | static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) | ||
637 | { | ||
638 | int r, unformatted; | ||
639 | |||
640 | r = __superblock_all_zeroes(pmd->bm, &unformatted); | ||
641 | if (r) | ||
642 | return r; | ||
643 | |||
644 | if (unformatted) | ||
645 | return format_device ? __format_metadata(pmd) : -EPERM; | ||
646 | |||
647 | return __open_metadata(pmd); | ||
648 | } | ||
649 | |||
650 | static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) | ||
651 | { | ||
652 | int r; | ||
653 | |||
654 | pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, | ||
655 | THIN_METADATA_CACHE_SIZE, | ||
656 | THIN_MAX_CONCURRENT_LOCKS); | ||
657 | if (IS_ERR(pmd->bm)) { | ||
658 | DMERR("could not create block manager"); | ||
659 | return PTR_ERR(pmd->bm); | ||
660 | } | ||
661 | |||
662 | r = __open_or_format_metadata(pmd, format_device); | ||
663 | if (r) | ||
664 | dm_block_manager_destroy(pmd->bm); | ||
485 | 665 | ||
486 | return r; | 666 | return r; |
487 | } | 667 | } |
488 | 668 | ||
669 | static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) | ||
670 | { | ||
671 | dm_sm_destroy(pmd->data_sm); | ||
672 | dm_sm_destroy(pmd->metadata_sm); | ||
673 | dm_tm_destroy(pmd->nb_tm); | ||
674 | dm_tm_destroy(pmd->tm); | ||
675 | dm_block_manager_destroy(pmd->bm); | ||
676 | } | ||
677 | |||
489 | static int __begin_transaction(struct dm_pool_metadata *pmd) | 678 | static int __begin_transaction(struct dm_pool_metadata *pmd) |
490 | { | 679 | { |
491 | int r; | 680 | int r; |
492 | u32 features; | ||
493 | struct thin_disk_superblock *disk_super; | 681 | struct thin_disk_superblock *disk_super; |
494 | struct dm_block *sblock; | 682 | struct dm_block *sblock; |
495 | 683 | ||
496 | /* | 684 | /* |
497 | * __maybe_commit_transaction() resets these | ||
498 | */ | ||
499 | WARN_ON(pmd->need_commit); | ||
500 | |||
501 | /* | ||
502 | * We re-read the superblock every time. Shouldn't need to do this | 685 | * We re-read the superblock every time. Shouldn't need to do this |
503 | * really. | 686 | * really. |
504 | */ | 687 | */ |
@@ -515,32 +698,8 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) | |||
515 | pmd->flags = le32_to_cpu(disk_super->flags); | 698 | pmd->flags = le32_to_cpu(disk_super->flags); |
516 | pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); | 699 | pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); |
517 | 700 | ||
518 | features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; | ||
519 | if (features) { | ||
520 | DMERR("could not access metadata due to " | ||
521 | "unsupported optional features (%lx).", | ||
522 | (unsigned long)features); | ||
523 | r = -EINVAL; | ||
524 | goto out; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * Check for read-only metadata to skip the following RDWR checks. | ||
529 | */ | ||
530 | if (get_disk_ro(pmd->bdev->bd_disk)) | ||
531 | goto out; | ||
532 | |||
533 | features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; | ||
534 | if (features) { | ||
535 | DMERR("could not access metadata RDWR due to " | ||
536 | "unsupported optional features (%lx).", | ||
537 | (unsigned long)features); | ||
538 | r = -EINVAL; | ||
539 | } | ||
540 | |||
541 | out: | ||
542 | dm_bm_unlock(sblock); | 701 | dm_bm_unlock(sblock); |
543 | return r; | 702 | return 0; |
544 | } | 703 | } |
545 | 704 | ||
546 | static int __write_changed_details(struct dm_pool_metadata *pmd) | 705 | static int __write_changed_details(struct dm_pool_metadata *pmd) |
@@ -573,8 +732,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) | |||
573 | list_del(&td->list); | 732 | list_del(&td->list); |
574 | kfree(td); | 733 | kfree(td); |
575 | } | 734 | } |
576 | |||
577 | pmd->need_commit = 1; | ||
578 | } | 735 | } |
579 | 736 | ||
580 | return 0; | 737 | return 0; |
@@ -582,9 +739,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) | |||
582 | 739 | ||
583 | static int __commit_transaction(struct dm_pool_metadata *pmd) | 740 | static int __commit_transaction(struct dm_pool_metadata *pmd) |
584 | { | 741 | { |
585 | /* | ||
586 | * FIXME: Associated pool should be made read-only on failure. | ||
587 | */ | ||
588 | int r; | 742 | int r; |
589 | size_t metadata_len, data_len; | 743 | size_t metadata_len, data_len; |
590 | struct thin_disk_superblock *disk_super; | 744 | struct thin_disk_superblock *disk_super; |
@@ -597,31 +751,27 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) | |||
597 | 751 | ||
598 | r = __write_changed_details(pmd); | 752 | r = __write_changed_details(pmd); |
599 | if (r < 0) | 753 | if (r < 0) |
600 | goto out; | 754 | return r; |
601 | |||
602 | if (!pmd->need_commit) | ||
603 | goto out; | ||
604 | 755 | ||
605 | r = dm_sm_commit(pmd->data_sm); | 756 | r = dm_sm_commit(pmd->data_sm); |
606 | if (r < 0) | 757 | if (r < 0) |
607 | goto out; | 758 | return r; |
608 | 759 | ||
609 | r = dm_tm_pre_commit(pmd->tm); | 760 | r = dm_tm_pre_commit(pmd->tm); |
610 | if (r < 0) | 761 | if (r < 0) |
611 | goto out; | 762 | return r; |
612 | 763 | ||
613 | r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); | 764 | r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); |
614 | if (r < 0) | 765 | if (r < 0) |
615 | goto out; | 766 | return r; |
616 | 767 | ||
617 | r = dm_sm_root_size(pmd->data_sm, &data_len); | 768 | r = dm_sm_root_size(pmd->data_sm, &data_len); |
618 | if (r < 0) | 769 | if (r < 0) |
619 | goto out; | 770 | return r; |
620 | 771 | ||
621 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | 772 | r = superblock_lock(pmd, &sblock); |
622 | &sb_validator, &sblock); | ||
623 | if (r) | 773 | if (r) |
624 | goto out; | 774 | return r; |
625 | 775 | ||
626 | disk_super = dm_block_data(sblock); | 776 | disk_super = dm_block_data(sblock); |
627 | disk_super->time = cpu_to_le32(pmd->time); | 777 | disk_super->time = cpu_to_le32(pmd->time); |
@@ -640,12 +790,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) | |||
640 | if (r < 0) | 790 | if (r < 0) |
641 | goto out_locked; | 791 | goto out_locked; |
642 | 792 | ||
643 | r = dm_tm_commit(pmd->tm, sblock); | 793 | return dm_tm_commit(pmd->tm, sblock); |
644 | if (!r) | ||
645 | pmd->need_commit = 0; | ||
646 | |||
647 | out: | ||
648 | return r; | ||
649 | 794 | ||
650 | out_locked: | 795 | out_locked: |
651 | dm_bm_unlock(sblock); | 796 | dm_bm_unlock(sblock); |
@@ -653,15 +798,11 @@ out_locked: | |||
653 | } | 798 | } |
654 | 799 | ||
655 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, | 800 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, |
656 | sector_t data_block_size) | 801 | sector_t data_block_size, |
802 | bool format_device) | ||
657 | { | 803 | { |
658 | int r; | 804 | int r; |
659 | struct thin_disk_superblock *disk_super; | ||
660 | struct dm_pool_metadata *pmd; | 805 | struct dm_pool_metadata *pmd; |
661 | sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; | ||
662 | struct dm_block_manager *bm; | ||
663 | int create; | ||
664 | struct dm_block *sblock; | ||
665 | 806 | ||
666 | pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); | 807 | pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); |
667 | if (!pmd) { | 808 | if (!pmd) { |
@@ -669,90 +810,28 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, | |||
669 | return ERR_PTR(-ENOMEM); | 810 | return ERR_PTR(-ENOMEM); |
670 | } | 811 | } |
671 | 812 | ||
672 | /* | 813 | init_rwsem(&pmd->root_lock); |
673 | * Max hex locks: | 814 | pmd->time = 0; |
674 | * 3 for btree insert + | 815 | INIT_LIST_HEAD(&pmd->thin_devices); |
675 | * 2 for btree lookup used within space map | 816 | pmd->read_only = false; |
676 | */ | 817 | pmd->fail_io = false; |
677 | bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, | 818 | pmd->bdev = bdev; |
678 | THIN_METADATA_CACHE_SIZE, 5); | 819 | pmd->data_block_size = data_block_size; |
679 | if (!bm) { | ||
680 | DMERR("could not create block manager"); | ||
681 | kfree(pmd); | ||
682 | return ERR_PTR(-ENOMEM); | ||
683 | } | ||
684 | |||
685 | r = superblock_all_zeroes(bm, &create); | ||
686 | if (r) { | ||
687 | dm_block_manager_destroy(bm); | ||
688 | kfree(pmd); | ||
689 | return ERR_PTR(r); | ||
690 | } | ||
691 | |||
692 | 820 | ||
693 | r = init_pmd(pmd, bm, 0, create); | 821 | r = __create_persistent_data_objects(pmd, format_device); |
694 | if (r) { | 822 | if (r) { |
695 | dm_block_manager_destroy(bm); | ||
696 | kfree(pmd); | 823 | kfree(pmd); |
697 | return ERR_PTR(r); | 824 | return ERR_PTR(r); |
698 | } | 825 | } |
699 | pmd->bdev = bdev; | ||
700 | |||
701 | if (!create) { | ||
702 | r = __begin_transaction(pmd); | ||
703 | if (r < 0) | ||
704 | goto bad; | ||
705 | return pmd; | ||
706 | } | ||
707 | |||
708 | /* | ||
709 | * Create. | ||
710 | */ | ||
711 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | ||
712 | &sb_validator, &sblock); | ||
713 | if (r) | ||
714 | goto bad; | ||
715 | |||
716 | if (bdev_size > THIN_METADATA_MAX_SECTORS) | ||
717 | bdev_size = THIN_METADATA_MAX_SECTORS; | ||
718 | |||
719 | disk_super = dm_block_data(sblock); | ||
720 | disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); | ||
721 | disk_super->version = cpu_to_le32(THIN_VERSION); | ||
722 | disk_super->time = 0; | ||
723 | disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | ||
724 | disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); | ||
725 | disk_super->data_block_size = cpu_to_le32(data_block_size); | ||
726 | |||
727 | r = dm_bm_unlock(sblock); | ||
728 | if (r < 0) | ||
729 | goto bad; | ||
730 | |||
731 | r = dm_btree_empty(&pmd->info, &pmd->root); | ||
732 | if (r < 0) | ||
733 | goto bad; | ||
734 | |||
735 | r = dm_btree_empty(&pmd->details_info, &pmd->details_root); | ||
736 | if (r < 0) { | ||
737 | DMERR("couldn't create devices root"); | ||
738 | goto bad; | ||
739 | } | ||
740 | 826 | ||
741 | pmd->flags = 0; | 827 | r = __begin_transaction(pmd); |
742 | pmd->need_commit = 1; | ||
743 | r = dm_pool_commit_metadata(pmd); | ||
744 | if (r < 0) { | 828 | if (r < 0) { |
745 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | 829 | if (dm_pool_metadata_close(pmd) < 0) |
746 | __func__, r); | 830 | DMWARN("%s: dm_pool_metadata_close() failed.", __func__); |
747 | goto bad; | 831 | return ERR_PTR(r); |
748 | } | 832 | } |
749 | 833 | ||
750 | return pmd; | 834 | return pmd; |
751 | |||
752 | bad: | ||
753 | if (dm_pool_metadata_close(pmd) < 0) | ||
754 | DMWARN("%s: dm_pool_metadata_close() failed.", __func__); | ||
755 | return ERR_PTR(r); | ||
756 | } | 835 | } |
757 | 836 | ||
758 | int dm_pool_metadata_close(struct dm_pool_metadata *pmd) | 837 | int dm_pool_metadata_close(struct dm_pool_metadata *pmd) |
@@ -778,18 +857,17 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) | |||
778 | return -EBUSY; | 857 | return -EBUSY; |
779 | } | 858 | } |
780 | 859 | ||
781 | r = __commit_transaction(pmd); | 860 | if (!pmd->read_only && !pmd->fail_io) { |
782 | if (r < 0) | 861 | r = __commit_transaction(pmd); |
783 | DMWARN("%s: __commit_transaction() failed, error = %d", | 862 | if (r < 0) |
784 | __func__, r); | 863 | DMWARN("%s: __commit_transaction() failed, error = %d", |
864 | __func__, r); | ||
865 | } | ||
785 | 866 | ||
786 | dm_tm_destroy(pmd->tm); | 867 | if (!pmd->fail_io) |
787 | dm_tm_destroy(pmd->nb_tm); | 868 | __destroy_persistent_data_objects(pmd); |
788 | dm_block_manager_destroy(pmd->bm); | ||
789 | dm_sm_destroy(pmd->metadata_sm); | ||
790 | dm_sm_destroy(pmd->data_sm); | ||
791 | kfree(pmd); | ||
792 | 869 | ||
870 | kfree(pmd); | ||
793 | return 0; | 871 | return 0; |
794 | } | 872 | } |
795 | 873 | ||
@@ -850,6 +928,7 @@ static int __open_device(struct dm_pool_metadata *pmd, | |||
850 | (*td)->id = dev; | 928 | (*td)->id = dev; |
851 | (*td)->open_count = 1; | 929 | (*td)->open_count = 1; |
852 | (*td)->changed = changed; | 930 | (*td)->changed = changed; |
931 | (*td)->aborted_with_changes = false; | ||
853 | (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); | 932 | (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); |
854 | (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); | 933 | (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); |
855 | (*td)->creation_time = le32_to_cpu(details_le.creation_time); | 934 | (*td)->creation_time = le32_to_cpu(details_le.creation_time); |
@@ -911,10 +990,11 @@ static int __create_thin(struct dm_pool_metadata *pmd, | |||
911 | 990 | ||
912 | int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) | 991 | int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) |
913 | { | 992 | { |
914 | int r; | 993 | int r = -EINVAL; |
915 | 994 | ||
916 | down_write(&pmd->root_lock); | 995 | down_write(&pmd->root_lock); |
917 | r = __create_thin(pmd, dev); | 996 | if (!pmd->fail_io) |
997 | r = __create_thin(pmd, dev); | ||
918 | up_write(&pmd->root_lock); | 998 | up_write(&pmd->root_lock); |
919 | 999 | ||
920 | return r; | 1000 | return r; |
@@ -1001,10 +1081,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd, | |||
1001 | dm_thin_id dev, | 1081 | dm_thin_id dev, |
1002 | dm_thin_id origin) | 1082 | dm_thin_id origin) |
1003 | { | 1083 | { |
1004 | int r; | 1084 | int r = -EINVAL; |
1005 | 1085 | ||
1006 | down_write(&pmd->root_lock); | 1086 | down_write(&pmd->root_lock); |
1007 | r = __create_snap(pmd, dev, origin); | 1087 | if (!pmd->fail_io) |
1088 | r = __create_snap(pmd, dev, origin); | ||
1008 | up_write(&pmd->root_lock); | 1089 | up_write(&pmd->root_lock); |
1009 | 1090 | ||
1010 | return r; | 1091 | return r; |
@@ -1037,18 +1118,17 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) | |||
1037 | if (r) | 1118 | if (r) |
1038 | return r; | 1119 | return r; |
1039 | 1120 | ||
1040 | pmd->need_commit = 1; | ||
1041 | |||
1042 | return 0; | 1121 | return 0; |
1043 | } | 1122 | } |
1044 | 1123 | ||
1045 | int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, | 1124 | int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, |
1046 | dm_thin_id dev) | 1125 | dm_thin_id dev) |
1047 | { | 1126 | { |
1048 | int r; | 1127 | int r = -EINVAL; |
1049 | 1128 | ||
1050 | down_write(&pmd->root_lock); | 1129 | down_write(&pmd->root_lock); |
1051 | r = __delete_device(pmd, dev); | 1130 | if (!pmd->fail_io) |
1131 | r = __delete_device(pmd, dev); | ||
1052 | up_write(&pmd->root_lock); | 1132 | up_write(&pmd->root_lock); |
1053 | 1133 | ||
1054 | return r; | 1134 | return r; |
@@ -1058,28 +1138,40 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, | |||
1058 | uint64_t current_id, | 1138 | uint64_t current_id, |
1059 | uint64_t new_id) | 1139 | uint64_t new_id) |
1060 | { | 1140 | { |
1141 | int r = -EINVAL; | ||
1142 | |||
1061 | down_write(&pmd->root_lock); | 1143 | down_write(&pmd->root_lock); |
1144 | |||
1145 | if (pmd->fail_io) | ||
1146 | goto out; | ||
1147 | |||
1062 | if (pmd->trans_id != current_id) { | 1148 | if (pmd->trans_id != current_id) { |
1063 | up_write(&pmd->root_lock); | ||
1064 | DMERR("mismatched transaction id"); | 1149 | DMERR("mismatched transaction id"); |
1065 | return -EINVAL; | 1150 | goto out; |
1066 | } | 1151 | } |
1067 | 1152 | ||
1068 | pmd->trans_id = new_id; | 1153 | pmd->trans_id = new_id; |
1069 | pmd->need_commit = 1; | 1154 | r = 0; |
1155 | |||
1156 | out: | ||
1070 | up_write(&pmd->root_lock); | 1157 | up_write(&pmd->root_lock); |
1071 | 1158 | ||
1072 | return 0; | 1159 | return r; |
1073 | } | 1160 | } |
1074 | 1161 | ||
1075 | int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, | 1162 | int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, |
1076 | uint64_t *result) | 1163 | uint64_t *result) |
1077 | { | 1164 | { |
1165 | int r = -EINVAL; | ||
1166 | |||
1078 | down_read(&pmd->root_lock); | 1167 | down_read(&pmd->root_lock); |
1079 | *result = pmd->trans_id; | 1168 | if (!pmd->fail_io) { |
1169 | *result = pmd->trans_id; | ||
1170 | r = 0; | ||
1171 | } | ||
1080 | up_read(&pmd->root_lock); | 1172 | up_read(&pmd->root_lock); |
1081 | 1173 | ||
1082 | return 0; | 1174 | return r; |
1083 | } | 1175 | } |
1084 | 1176 | ||
1085 | static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) | 1177 | static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) |
@@ -1108,8 +1200,6 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) | |||
1108 | 1200 | ||
1109 | dm_tm_dec(pmd->tm, held_root); | 1201 | dm_tm_dec(pmd->tm, held_root); |
1110 | dm_tm_unlock(pmd->tm, copy); | 1202 | dm_tm_unlock(pmd->tm, copy); |
1111 | pmd->need_commit = 1; | ||
1112 | |||
1113 | return -EBUSY; | 1203 | return -EBUSY; |
1114 | } | 1204 | } |
1115 | 1205 | ||
@@ -1131,29 +1221,25 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) | |||
1131 | /* | 1221 | /* |
1132 | * Write the held root into the superblock. | 1222 | * Write the held root into the superblock. |
1133 | */ | 1223 | */ |
1134 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | 1224 | r = superblock_lock(pmd, &sblock); |
1135 | &sb_validator, &sblock); | ||
1136 | if (r) { | 1225 | if (r) { |
1137 | dm_tm_dec(pmd->tm, held_root); | 1226 | dm_tm_dec(pmd->tm, held_root); |
1138 | pmd->need_commit = 1; | ||
1139 | return r; | 1227 | return r; |
1140 | } | 1228 | } |
1141 | 1229 | ||
1142 | disk_super = dm_block_data(sblock); | 1230 | disk_super = dm_block_data(sblock); |
1143 | disk_super->held_root = cpu_to_le64(held_root); | 1231 | disk_super->held_root = cpu_to_le64(held_root); |
1144 | dm_bm_unlock(sblock); | 1232 | dm_bm_unlock(sblock); |
1145 | |||
1146 | pmd->need_commit = 1; | ||
1147 | |||
1148 | return 0; | 1233 | return 0; |
1149 | } | 1234 | } |
1150 | 1235 | ||
1151 | int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) | 1236 | int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) |
1152 | { | 1237 | { |
1153 | int r; | 1238 | int r = -EINVAL; |
1154 | 1239 | ||
1155 | down_write(&pmd->root_lock); | 1240 | down_write(&pmd->root_lock); |
1156 | r = __reserve_metadata_snap(pmd); | 1241 | if (!pmd->fail_io) |
1242 | r = __reserve_metadata_snap(pmd); | ||
1157 | up_write(&pmd->root_lock); | 1243 | up_write(&pmd->root_lock); |
1158 | 1244 | ||
1159 | return r; | 1245 | return r; |
@@ -1166,15 +1252,13 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) | |||
1166 | struct dm_block *sblock, *copy; | 1252 | struct dm_block *sblock, *copy; |
1167 | dm_block_t held_root; | 1253 | dm_block_t held_root; |
1168 | 1254 | ||
1169 | r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, | 1255 | r = superblock_lock(pmd, &sblock); |
1170 | &sb_validator, &sblock); | ||
1171 | if (r) | 1256 | if (r) |
1172 | return r; | 1257 | return r; |
1173 | 1258 | ||
1174 | disk_super = dm_block_data(sblock); | 1259 | disk_super = dm_block_data(sblock); |
1175 | held_root = le64_to_cpu(disk_super->held_root); | 1260 | held_root = le64_to_cpu(disk_super->held_root); |
1176 | disk_super->held_root = cpu_to_le64(0); | 1261 | disk_super->held_root = cpu_to_le64(0); |
1177 | pmd->need_commit = 1; | ||
1178 | 1262 | ||
1179 | dm_bm_unlock(sblock); | 1263 | dm_bm_unlock(sblock); |
1180 | 1264 | ||
@@ -1197,10 +1281,11 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) | |||
1197 | 1281 | ||
1198 | int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) | 1282 | int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) |
1199 | { | 1283 | { |
1200 | int r; | 1284 | int r = -EINVAL; |
1201 | 1285 | ||
1202 | down_write(&pmd->root_lock); | 1286 | down_write(&pmd->root_lock); |
1203 | r = __release_metadata_snap(pmd); | 1287 | if (!pmd->fail_io) |
1288 | r = __release_metadata_snap(pmd); | ||
1204 | up_write(&pmd->root_lock); | 1289 | up_write(&pmd->root_lock); |
1205 | 1290 | ||
1206 | return r; | 1291 | return r; |
@@ -1227,10 +1312,11 @@ static int __get_metadata_snap(struct dm_pool_metadata *pmd, | |||
1227 | int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, | 1312 | int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, |
1228 | dm_block_t *result) | 1313 | dm_block_t *result) |
1229 | { | 1314 | { |
1230 | int r; | 1315 | int r = -EINVAL; |
1231 | 1316 | ||
1232 | down_read(&pmd->root_lock); | 1317 | down_read(&pmd->root_lock); |
1233 | r = __get_metadata_snap(pmd, result); | 1318 | if (!pmd->fail_io) |
1319 | r = __get_metadata_snap(pmd, result); | ||
1234 | up_read(&pmd->root_lock); | 1320 | up_read(&pmd->root_lock); |
1235 | 1321 | ||
1236 | return r; | 1322 | return r; |
@@ -1239,10 +1325,11 @@ int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, | |||
1239 | int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, | 1325 | int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, |
1240 | struct dm_thin_device **td) | 1326 | struct dm_thin_device **td) |
1241 | { | 1327 | { |
1242 | int r; | 1328 | int r = -EINVAL; |
1243 | 1329 | ||
1244 | down_write(&pmd->root_lock); | 1330 | down_write(&pmd->root_lock); |
1245 | r = __open_device(pmd, dev, 0, td); | 1331 | if (!pmd->fail_io) |
1332 | r = __open_device(pmd, dev, 0, td); | ||
1246 | up_write(&pmd->root_lock); | 1333 | up_write(&pmd->root_lock); |
1247 | 1334 | ||
1248 | return r; | 1335 | return r; |
@@ -1262,7 +1349,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) | |||
1262 | return td->id; | 1349 | return td->id; |
1263 | } | 1350 | } |
1264 | 1351 | ||
1265 | static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) | 1352 | static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) |
1266 | { | 1353 | { |
1267 | return td->snapshotted_time > time; | 1354 | return td->snapshotted_time > time; |
1268 | } | 1355 | } |
@@ -1270,28 +1357,31 @@ static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) | |||
1270 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, | 1357 | int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, |
1271 | int can_block, struct dm_thin_lookup_result *result) | 1358 | int can_block, struct dm_thin_lookup_result *result) |
1272 | { | 1359 | { |
1273 | int r; | 1360 | int r = -EINVAL; |
1274 | uint64_t block_time = 0; | 1361 | uint64_t block_time = 0; |
1275 | __le64 value; | 1362 | __le64 value; |
1276 | struct dm_pool_metadata *pmd = td->pmd; | 1363 | struct dm_pool_metadata *pmd = td->pmd; |
1277 | dm_block_t keys[2] = { td->id, block }; | 1364 | dm_block_t keys[2] = { td->id, block }; |
1365 | struct dm_btree_info *info; | ||
1278 | 1366 | ||
1279 | if (can_block) { | 1367 | if (can_block) { |
1280 | down_read(&pmd->root_lock); | 1368 | down_read(&pmd->root_lock); |
1281 | r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); | 1369 | info = &pmd->info; |
1282 | if (!r) | 1370 | } else if (down_read_trylock(&pmd->root_lock)) |
1283 | block_time = le64_to_cpu(value); | 1371 | info = &pmd->nb_info; |
1284 | up_read(&pmd->root_lock); | 1372 | else |
1285 | |||
1286 | } else if (down_read_trylock(&pmd->root_lock)) { | ||
1287 | r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); | ||
1288 | if (!r) | ||
1289 | block_time = le64_to_cpu(value); | ||
1290 | up_read(&pmd->root_lock); | ||
1291 | |||
1292 | } else | ||
1293 | return -EWOULDBLOCK; | 1373 | return -EWOULDBLOCK; |
1294 | 1374 | ||
1375 | if (pmd->fail_io) | ||
1376 | goto out; | ||
1377 | |||
1378 | r = dm_btree_lookup(info, pmd->root, keys, &value); | ||
1379 | if (!r) | ||
1380 | block_time = le64_to_cpu(value); | ||
1381 | |||
1382 | out: | ||
1383 | up_read(&pmd->root_lock); | ||
1384 | |||
1295 | if (!r) { | 1385 | if (!r) { |
1296 | dm_block_t exception_block; | 1386 | dm_block_t exception_block; |
1297 | uint32_t exception_time; | 1387 | uint32_t exception_time; |
@@ -1312,7 +1402,6 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, | |||
1312 | struct dm_pool_metadata *pmd = td->pmd; | 1402 | struct dm_pool_metadata *pmd = td->pmd; |
1313 | dm_block_t keys[2] = { td->id, block }; | 1403 | dm_block_t keys[2] = { td->id, block }; |
1314 | 1404 | ||
1315 | pmd->need_commit = 1; | ||
1316 | value = cpu_to_le64(pack_block_time(data_block, pmd->time)); | 1405 | value = cpu_to_le64(pack_block_time(data_block, pmd->time)); |
1317 | __dm_bless_for_disk(&value); | 1406 | __dm_bless_for_disk(&value); |
1318 | 1407 | ||
@@ -1321,10 +1410,9 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, | |||
1321 | if (r) | 1410 | if (r) |
1322 | return r; | 1411 | return r; |
1323 | 1412 | ||
1324 | if (inserted) { | 1413 | td->changed = 1; |
1414 | if (inserted) | ||
1325 | td->mapped_blocks++; | 1415 | td->mapped_blocks++; |
1326 | td->changed = 1; | ||
1327 | } | ||
1328 | 1416 | ||
1329 | return 0; | 1417 | return 0; |
1330 | } | 1418 | } |
@@ -1332,10 +1420,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, | |||
1332 | int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, | 1420 | int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, |
1333 | dm_block_t data_block) | 1421 | dm_block_t data_block) |
1334 | { | 1422 | { |
1335 | int r; | 1423 | int r = -EINVAL; |
1336 | 1424 | ||
1337 | down_write(&td->pmd->root_lock); | 1425 | down_write(&td->pmd->root_lock); |
1338 | r = __insert(td, block, data_block); | 1426 | if (!td->pmd->fail_io) |
1427 | r = __insert(td, block, data_block); | ||
1339 | up_write(&td->pmd->root_lock); | 1428 | up_write(&td->pmd->root_lock); |
1340 | 1429 | ||
1341 | return r; | 1430 | return r; |
@@ -1353,31 +1442,51 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) | |||
1353 | 1442 | ||
1354 | td->mapped_blocks--; | 1443 | td->mapped_blocks--; |
1355 | td->changed = 1; | 1444 | td->changed = 1; |
1356 | pmd->need_commit = 1; | ||
1357 | 1445 | ||
1358 | return 0; | 1446 | return 0; |
1359 | } | 1447 | } |
1360 | 1448 | ||
1361 | int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) | 1449 | int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) |
1362 | { | 1450 | { |
1363 | int r; | 1451 | int r = -EINVAL; |
1364 | 1452 | ||
1365 | down_write(&td->pmd->root_lock); | 1453 | down_write(&td->pmd->root_lock); |
1366 | r = __remove(td, block); | 1454 | if (!td->pmd->fail_io) |
1455 | r = __remove(td, block); | ||
1367 | up_write(&td->pmd->root_lock); | 1456 | up_write(&td->pmd->root_lock); |
1368 | 1457 | ||
1369 | return r; | 1458 | return r; |
1370 | } | 1459 | } |
1371 | 1460 | ||
1372 | int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) | 1461 | bool dm_thin_changed_this_transaction(struct dm_thin_device *td) |
1373 | { | 1462 | { |
1374 | int r; | 1463 | int r; |
1375 | 1464 | ||
1376 | down_write(&pmd->root_lock); | 1465 | down_read(&td->pmd->root_lock); |
1466 | r = td->changed; | ||
1467 | up_read(&td->pmd->root_lock); | ||
1377 | 1468 | ||
1378 | r = dm_sm_new_block(pmd->data_sm, result); | 1469 | return r; |
1379 | pmd->need_commit = 1; | 1470 | } |
1471 | |||
1472 | bool dm_thin_aborted_changes(struct dm_thin_device *td) | ||
1473 | { | ||
1474 | bool r; | ||
1380 | 1475 | ||
1476 | down_read(&td->pmd->root_lock); | ||
1477 | r = td->aborted_with_changes; | ||
1478 | up_read(&td->pmd->root_lock); | ||
1479 | |||
1480 | return r; | ||
1481 | } | ||
1482 | |||
1483 | int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) | ||
1484 | { | ||
1485 | int r = -EINVAL; | ||
1486 | |||
1487 | down_write(&pmd->root_lock); | ||
1488 | if (!pmd->fail_io) | ||
1489 | r = dm_sm_new_block(pmd->data_sm, result); | ||
1381 | up_write(&pmd->root_lock); | 1490 | up_write(&pmd->root_lock); |
1382 | 1491 | ||
1383 | return r; | 1492 | return r; |
@@ -1385,9 +1494,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) | |||
1385 | 1494 | ||
1386 | int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) | 1495 | int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) |
1387 | { | 1496 | { |
1388 | int r; | 1497 | int r = -EINVAL; |
1389 | 1498 | ||
1390 | down_write(&pmd->root_lock); | 1499 | down_write(&pmd->root_lock); |
1500 | if (pmd->fail_io) | ||
1501 | goto out; | ||
1391 | 1502 | ||
1392 | r = __commit_transaction(pmd); | 1503 | r = __commit_transaction(pmd); |
1393 | if (r <= 0) | 1504 | if (r <= 0) |
@@ -1402,12 +1513,41 @@ out: | |||
1402 | return r; | 1513 | return r; |
1403 | } | 1514 | } |
1404 | 1515 | ||
1516 | static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) | ||
1517 | { | ||
1518 | struct dm_thin_device *td; | ||
1519 | |||
1520 | list_for_each_entry(td, &pmd->thin_devices, list) | ||
1521 | td->aborted_with_changes = td->changed; | ||
1522 | } | ||
1523 | |||
1524 | int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) | ||
1525 | { | ||
1526 | int r = -EINVAL; | ||
1527 | |||
1528 | down_write(&pmd->root_lock); | ||
1529 | if (pmd->fail_io) | ||
1530 | goto out; | ||
1531 | |||
1532 | __set_abort_with_changes_flags(pmd); | ||
1533 | __destroy_persistent_data_objects(pmd); | ||
1534 | r = __create_persistent_data_objects(pmd, false); | ||
1535 | if (r) | ||
1536 | pmd->fail_io = true; | ||
1537 | |||
1538 | out: | ||
1539 | up_write(&pmd->root_lock); | ||
1540 | |||
1541 | return r; | ||
1542 | } | ||
1543 | |||
1405 | int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) | 1544 | int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) |
1406 | { | 1545 | { |
1407 | int r; | 1546 | int r = -EINVAL; |
1408 | 1547 | ||
1409 | down_read(&pmd->root_lock); | 1548 | down_read(&pmd->root_lock); |
1410 | r = dm_sm_get_nr_free(pmd->data_sm, result); | 1549 | if (!pmd->fail_io) |
1550 | r = dm_sm_get_nr_free(pmd->data_sm, result); | ||
1411 | up_read(&pmd->root_lock); | 1551 | up_read(&pmd->root_lock); |
1412 | 1552 | ||
1413 | return r; | 1553 | return r; |
@@ -1416,10 +1556,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul | |||
1416 | int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, | 1556 | int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, |
1417 | dm_block_t *result) | 1557 | dm_block_t *result) |
1418 | { | 1558 | { |
1419 | int r; | 1559 | int r = -EINVAL; |
1420 | 1560 | ||
1421 | down_read(&pmd->root_lock); | 1561 | down_read(&pmd->root_lock); |
1422 | r = dm_sm_get_nr_free(pmd->metadata_sm, result); | 1562 | if (!pmd->fail_io) |
1563 | r = dm_sm_get_nr_free(pmd->metadata_sm, result); | ||
1423 | up_read(&pmd->root_lock); | 1564 | up_read(&pmd->root_lock); |
1424 | 1565 | ||
1425 | return r; | 1566 | return r; |
@@ -1428,10 +1569,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, | |||
1428 | int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, | 1569 | int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, |
1429 | dm_block_t *result) | 1570 | dm_block_t *result) |
1430 | { | 1571 | { |
1431 | int r; | 1572 | int r = -EINVAL; |
1432 | 1573 | ||
1433 | down_read(&pmd->root_lock); | 1574 | down_read(&pmd->root_lock); |
1434 | r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); | 1575 | if (!pmd->fail_io) |
1576 | r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); | ||
1435 | up_read(&pmd->root_lock); | 1577 | up_read(&pmd->root_lock); |
1436 | 1578 | ||
1437 | return r; | 1579 | return r; |
@@ -1448,10 +1590,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) | |||
1448 | 1590 | ||
1449 | int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) | 1591 | int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) |
1450 | { | 1592 | { |
1451 | int r; | 1593 | int r = -EINVAL; |
1452 | 1594 | ||
1453 | down_read(&pmd->root_lock); | 1595 | down_read(&pmd->root_lock); |
1454 | r = dm_sm_get_nr_blocks(pmd->data_sm, result); | 1596 | if (!pmd->fail_io) |
1597 | r = dm_sm_get_nr_blocks(pmd->data_sm, result); | ||
1455 | up_read(&pmd->root_lock); | 1598 | up_read(&pmd->root_lock); |
1456 | 1599 | ||
1457 | return r; | 1600 | return r; |
@@ -1459,13 +1602,17 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) | |||
1459 | 1602 | ||
1460 | int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) | 1603 | int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) |
1461 | { | 1604 | { |
1605 | int r = -EINVAL; | ||
1462 | struct dm_pool_metadata *pmd = td->pmd; | 1606 | struct dm_pool_metadata *pmd = td->pmd; |
1463 | 1607 | ||
1464 | down_read(&pmd->root_lock); | 1608 | down_read(&pmd->root_lock); |
1465 | *result = td->mapped_blocks; | 1609 | if (!pmd->fail_io) { |
1610 | *result = td->mapped_blocks; | ||
1611 | r = 0; | ||
1612 | } | ||
1466 | up_read(&pmd->root_lock); | 1613 | up_read(&pmd->root_lock); |
1467 | 1614 | ||
1468 | return 0; | 1615 | return r; |
1469 | } | 1616 | } |
1470 | 1617 | ||
1471 | static int __highest_block(struct dm_thin_device *td, dm_block_t *result) | 1618 | static int __highest_block(struct dm_thin_device *td, dm_block_t *result) |
@@ -1487,11 +1634,12 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result) | |||
1487 | int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, | 1634 | int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, |
1488 | dm_block_t *result) | 1635 | dm_block_t *result) |
1489 | { | 1636 | { |
1490 | int r; | 1637 | int r = -EINVAL; |
1491 | struct dm_pool_metadata *pmd = td->pmd; | 1638 | struct dm_pool_metadata *pmd = td->pmd; |
1492 | 1639 | ||
1493 | down_read(&pmd->root_lock); | 1640 | down_read(&pmd->root_lock); |
1494 | r = __highest_block(td, result); | 1641 | if (!pmd->fail_io) |
1642 | r = __highest_block(td, result); | ||
1495 | up_read(&pmd->root_lock); | 1643 | up_read(&pmd->root_lock); |
1496 | 1644 | ||
1497 | return r; | 1645 | return r; |
@@ -1514,20 +1662,25 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) | |||
1514 | return -EINVAL; | 1662 | return -EINVAL; |
1515 | } | 1663 | } |
1516 | 1664 | ||
1517 | r = dm_sm_extend(pmd->data_sm, new_count - old_count); | 1665 | return dm_sm_extend(pmd->data_sm, new_count - old_count); |
1518 | if (!r) | ||
1519 | pmd->need_commit = 1; | ||
1520 | |||
1521 | return r; | ||
1522 | } | 1666 | } |
1523 | 1667 | ||
1524 | int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) | 1668 | int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) |
1525 | { | 1669 | { |
1526 | int r; | 1670 | int r = -EINVAL; |
1527 | 1671 | ||
1528 | down_write(&pmd->root_lock); | 1672 | down_write(&pmd->root_lock); |
1529 | r = __resize_data_dev(pmd, new_count); | 1673 | if (!pmd->fail_io) |
1674 | r = __resize_data_dev(pmd, new_count); | ||
1530 | up_write(&pmd->root_lock); | 1675 | up_write(&pmd->root_lock); |
1531 | 1676 | ||
1532 | return r; | 1677 | return r; |
1533 | } | 1678 | } |
1679 | |||
1680 | void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) | ||
1681 | { | ||
1682 | down_write(&pmd->root_lock); | ||
1683 | pmd->read_only = true; | ||
1684 | dm_bm_set_read_only(pmd->bm); | ||
1685 | up_write(&pmd->root_lock); | ||
1686 | } | ||
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index b88918ccdaf6..0cecc3702885 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h | |||
@@ -38,7 +38,8 @@ typedef uint64_t dm_thin_id; | |||
38 | * Reopens or creates a new, empty metadata volume. | 38 | * Reopens or creates a new, empty metadata volume. |
39 | */ | 39 | */ |
40 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, | 40 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, |
41 | sector_t data_block_size); | 41 | sector_t data_block_size, |
42 | bool format_device); | ||
42 | 43 | ||
43 | int dm_pool_metadata_close(struct dm_pool_metadata *pmd); | 44 | int dm_pool_metadata_close(struct dm_pool_metadata *pmd); |
44 | 45 | ||
@@ -79,6 +80,16 @@ int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, | |||
79 | int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); | 80 | int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); |
80 | 81 | ||
81 | /* | 82 | /* |
83 | * Discards all uncommitted changes. Rereads the superblock, rolling back | ||
84 | * to the last good transaction. Thin devices remain open. | ||
85 | * dm_thin_aborted_changes() tells you if they had uncommitted changes. | ||
86 | * | ||
87 | * If this call fails it's only useful to call dm_pool_metadata_close(). | ||
88 | * All other methods will fail with -EINVAL. | ||
89 | */ | ||
90 | int dm_pool_abort_metadata(struct dm_pool_metadata *pmd); | ||
91 | |||
92 | /* | ||
82 | * Set/get userspace transaction id. | 93 | * Set/get userspace transaction id. |
83 | */ | 94 | */ |
84 | int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, | 95 | int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, |
@@ -119,7 +130,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td); | |||
119 | 130 | ||
120 | struct dm_thin_lookup_result { | 131 | struct dm_thin_lookup_result { |
121 | dm_block_t block; | 132 | dm_block_t block; |
122 | int shared; | 133 | unsigned shared:1; |
123 | }; | 134 | }; |
124 | 135 | ||
125 | /* | 136 | /* |
@@ -147,6 +158,10 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); | |||
147 | /* | 158 | /* |
148 | * Queries. | 159 | * Queries. |
149 | */ | 160 | */ |
161 | bool dm_thin_changed_this_transaction(struct dm_thin_device *td); | ||
162 | |||
163 | bool dm_thin_aborted_changes(struct dm_thin_device *td); | ||
164 | |||
150 | int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, | 165 | int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, |
151 | dm_block_t *highest_mapped); | 166 | dm_block_t *highest_mapped); |
152 | 167 | ||
@@ -171,6 +186,12 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); | |||
171 | */ | 186 | */ |
172 | int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); | 187 | int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); |
173 | 188 | ||
189 | /* | ||
190 | * Flicks the underlying block manager into read only mode, so you know | ||
191 | * that nothing is changing. | ||
192 | */ | ||
193 | void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); | ||
194 | |||
174 | /*----------------------------------------------------------------*/ | 195 | /*----------------------------------------------------------------*/ |
175 | 196 | ||
176 | #endif | 197 | #endif |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 68694da0d21d..af1fc3b2c2ad 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -1,10 +1,11 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2011 Red Hat UK. | 2 | * Copyright (C) 2011-2012 Red Hat UK. |
3 | * | 3 | * |
4 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm-thin-metadata.h" | 7 | #include "dm-thin-metadata.h" |
8 | #include "dm.h" | ||
8 | 9 | ||
9 | #include <linux/device-mapper.h> | 10 | #include <linux/device-mapper.h> |
10 | #include <linux/dm-io.h> | 11 | #include <linux/dm-io.h> |
@@ -19,7 +20,7 @@ | |||
19 | /* | 20 | /* |
20 | * Tunable constants | 21 | * Tunable constants |
21 | */ | 22 | */ |
22 | #define ENDIO_HOOK_POOL_SIZE 10240 | 23 | #define ENDIO_HOOK_POOL_SIZE 1024 |
23 | #define DEFERRED_SET_SIZE 64 | 24 | #define DEFERRED_SET_SIZE 64 |
24 | #define MAPPING_POOL_SIZE 1024 | 25 | #define MAPPING_POOL_SIZE 1024 |
25 | #define PRISON_CELLS 1024 | 26 | #define PRISON_CELLS 1024 |
@@ -496,12 +497,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | |||
496 | */ | 497 | */ |
497 | struct dm_thin_new_mapping; | 498 | struct dm_thin_new_mapping; |
498 | 499 | ||
500 | /* | ||
501 | * The pool runs in 3 modes. Ordered in degraded order for comparisons. | ||
502 | */ | ||
503 | enum pool_mode { | ||
504 | PM_WRITE, /* metadata may be changed */ | ||
505 | PM_READ_ONLY, /* metadata may not be changed */ | ||
506 | PM_FAIL, /* all I/O fails */ | ||
507 | }; | ||
508 | |||
499 | struct pool_features { | 509 | struct pool_features { |
510 | enum pool_mode mode; | ||
511 | |||
500 | unsigned zero_new_blocks:1; | 512 | unsigned zero_new_blocks:1; |
501 | unsigned discard_enabled:1; | 513 | unsigned discard_enabled:1; |
502 | unsigned discard_passdown:1; | 514 | unsigned discard_passdown:1; |
503 | }; | 515 | }; |
504 | 516 | ||
517 | struct thin_c; | ||
518 | typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); | ||
519 | typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); | ||
520 | |||
505 | struct pool { | 521 | struct pool { |
506 | struct list_head list; | 522 | struct list_head list; |
507 | struct dm_target *ti; /* Only set if a pool target is bound */ | 523 | struct dm_target *ti; /* Only set if a pool target is bound */ |
@@ -510,10 +526,9 @@ struct pool { | |||
510 | struct block_device *md_dev; | 526 | struct block_device *md_dev; |
511 | struct dm_pool_metadata *pmd; | 527 | struct dm_pool_metadata *pmd; |
512 | 528 | ||
513 | uint32_t sectors_per_block; | ||
514 | unsigned block_shift; | ||
515 | dm_block_t offset_mask; | ||
516 | dm_block_t low_water_blocks; | 529 | dm_block_t low_water_blocks; |
530 | uint32_t sectors_per_block; | ||
531 | int sectors_per_block_shift; | ||
517 | 532 | ||
518 | struct pool_features pf; | 533 | struct pool_features pf; |
519 | unsigned low_water_triggered:1; /* A dm event has been sent */ | 534 | unsigned low_water_triggered:1; /* A dm event has been sent */ |
@@ -526,8 +541,8 @@ struct pool { | |||
526 | struct work_struct worker; | 541 | struct work_struct worker; |
527 | struct delayed_work waker; | 542 | struct delayed_work waker; |
528 | 543 | ||
529 | unsigned ref_count; | ||
530 | unsigned long last_commit_jiffies; | 544 | unsigned long last_commit_jiffies; |
545 | unsigned ref_count; | ||
531 | 546 | ||
532 | spinlock_t lock; | 547 | spinlock_t lock; |
533 | struct bio_list deferred_bios; | 548 | struct bio_list deferred_bios; |
@@ -543,8 +558,17 @@ struct pool { | |||
543 | struct dm_thin_new_mapping *next_mapping; | 558 | struct dm_thin_new_mapping *next_mapping; |
544 | mempool_t *mapping_pool; | 559 | mempool_t *mapping_pool; |
545 | mempool_t *endio_hook_pool; | 560 | mempool_t *endio_hook_pool; |
561 | |||
562 | process_bio_fn process_bio; | ||
563 | process_bio_fn process_discard; | ||
564 | |||
565 | process_mapping_fn process_prepared_mapping; | ||
566 | process_mapping_fn process_prepared_discard; | ||
546 | }; | 567 | }; |
547 | 568 | ||
569 | static enum pool_mode get_pool_mode(struct pool *pool); | ||
570 | static void set_pool_mode(struct pool *pool, enum pool_mode mode); | ||
571 | |||
548 | /* | 572 | /* |
549 | * Target context for a pool. | 573 | * Target context for a pool. |
550 | */ | 574 | */ |
@@ -679,16 +703,28 @@ static void requeue_io(struct thin_c *tc) | |||
679 | 703 | ||
680 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) | 704 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) |
681 | { | 705 | { |
682 | return bio->bi_sector >> tc->pool->block_shift; | 706 | sector_t block_nr = bio->bi_sector; |
707 | |||
708 | if (tc->pool->sectors_per_block_shift < 0) | ||
709 | (void) sector_div(block_nr, tc->pool->sectors_per_block); | ||
710 | else | ||
711 | block_nr >>= tc->pool->sectors_per_block_shift; | ||
712 | |||
713 | return block_nr; | ||
683 | } | 714 | } |
684 | 715 | ||
685 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) | 716 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) |
686 | { | 717 | { |
687 | struct pool *pool = tc->pool; | 718 | struct pool *pool = tc->pool; |
719 | sector_t bi_sector = bio->bi_sector; | ||
688 | 720 | ||
689 | bio->bi_bdev = tc->pool_dev->bdev; | 721 | bio->bi_bdev = tc->pool_dev->bdev; |
690 | bio->bi_sector = (block << pool->block_shift) + | 722 | if (tc->pool->sectors_per_block_shift < 0) |
691 | (bio->bi_sector & pool->offset_mask); | 723 | bio->bi_sector = (block * pool->sectors_per_block) + |
724 | sector_div(bi_sector, pool->sectors_per_block); | ||
725 | else | ||
726 | bio->bi_sector = (block << pool->sectors_per_block_shift) | | ||
727 | (bi_sector & (pool->sectors_per_block - 1)); | ||
692 | } | 728 | } |
693 | 729 | ||
694 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) | 730 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) |
@@ -696,21 +732,39 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) | |||
696 | bio->bi_bdev = tc->origin_dev->bdev; | 732 | bio->bi_bdev = tc->origin_dev->bdev; |
697 | } | 733 | } |
698 | 734 | ||
735 | static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) | ||
736 | { | ||
737 | return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && | ||
738 | dm_thin_changed_this_transaction(tc->td); | ||
739 | } | ||
740 | |||
699 | static void issue(struct thin_c *tc, struct bio *bio) | 741 | static void issue(struct thin_c *tc, struct bio *bio) |
700 | { | 742 | { |
701 | struct pool *pool = tc->pool; | 743 | struct pool *pool = tc->pool; |
702 | unsigned long flags; | 744 | unsigned long flags; |
703 | 745 | ||
746 | if (!bio_triggers_commit(tc, bio)) { | ||
747 | generic_make_request(bio); | ||
748 | return; | ||
749 | } | ||
750 | |||
704 | /* | 751 | /* |
705 | * Batch together any FUA/FLUSH bios we find and then issue | 752 | * Complete bio with an error if earlier I/O caused changes to |
706 | * a single commit for them in process_deferred_bios(). | 753 | * the metadata that can't be committed e.g, due to I/O errors |
754 | * on the metadata device. | ||
707 | */ | 755 | */ |
708 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { | 756 | if (dm_thin_aborted_changes(tc->td)) { |
709 | spin_lock_irqsave(&pool->lock, flags); | 757 | bio_io_error(bio); |
710 | bio_list_add(&pool->deferred_flush_bios, bio); | 758 | return; |
711 | spin_unlock_irqrestore(&pool->lock, flags); | 759 | } |
712 | } else | 760 | |
713 | generic_make_request(bio); | 761 | /* |
762 | * Batch together any bios that trigger commits and then issue a | ||
763 | * single commit for them in process_deferred_bios(). | ||
764 | */ | ||
765 | spin_lock_irqsave(&pool->lock, flags); | ||
766 | bio_list_add(&pool->deferred_flush_bios, bio); | ||
767 | spin_unlock_irqrestore(&pool->lock, flags); | ||
714 | } | 768 | } |
715 | 769 | ||
716 | static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) | 770 | static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) |
@@ -847,6 +901,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell | |||
847 | wake_worker(pool); | 901 | wake_worker(pool); |
848 | } | 902 | } |
849 | 903 | ||
904 | static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) | ||
905 | { | ||
906 | if (m->bio) | ||
907 | m->bio->bi_end_io = m->saved_bi_end_io; | ||
908 | cell_error(m->cell); | ||
909 | list_del(&m->list); | ||
910 | mempool_free(m, m->tc->pool->mapping_pool); | ||
911 | } | ||
850 | static void process_prepared_mapping(struct dm_thin_new_mapping *m) | 912 | static void process_prepared_mapping(struct dm_thin_new_mapping *m) |
851 | { | 913 | { |
852 | struct thin_c *tc = m->tc; | 914 | struct thin_c *tc = m->tc; |
@@ -859,7 +921,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) | |||
859 | 921 | ||
860 | if (m->err) { | 922 | if (m->err) { |
861 | cell_error(m->cell); | 923 | cell_error(m->cell); |
862 | return; | 924 | goto out; |
863 | } | 925 | } |
864 | 926 | ||
865 | /* | 927 | /* |
@@ -871,7 +933,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) | |||
871 | if (r) { | 933 | if (r) { |
872 | DMERR("dm_thin_insert_block() failed"); | 934 | DMERR("dm_thin_insert_block() failed"); |
873 | cell_error(m->cell); | 935 | cell_error(m->cell); |
874 | return; | 936 | goto out; |
875 | } | 937 | } |
876 | 938 | ||
877 | /* | 939 | /* |
@@ -886,22 +948,25 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) | |||
886 | } else | 948 | } else |
887 | cell_defer(tc, m->cell, m->data_block); | 949 | cell_defer(tc, m->cell, m->data_block); |
888 | 950 | ||
951 | out: | ||
889 | list_del(&m->list); | 952 | list_del(&m->list); |
890 | mempool_free(m, tc->pool->mapping_pool); | 953 | mempool_free(m, tc->pool->mapping_pool); |
891 | } | 954 | } |
892 | 955 | ||
893 | static void process_prepared_discard(struct dm_thin_new_mapping *m) | 956 | static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) |
894 | { | 957 | { |
895 | int r; | ||
896 | struct thin_c *tc = m->tc; | 958 | struct thin_c *tc = m->tc; |
897 | 959 | ||
898 | r = dm_thin_remove_block(tc->td, m->virt_block); | 960 | bio_io_error(m->bio); |
899 | if (r) | 961 | cell_defer_except(tc, m->cell); |
900 | DMERR("dm_thin_remove_block() failed"); | 962 | cell_defer_except(tc, m->cell2); |
963 | mempool_free(m, tc->pool->mapping_pool); | ||
964 | } | ||
965 | |||
966 | static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) | ||
967 | { | ||
968 | struct thin_c *tc = m->tc; | ||
901 | 969 | ||
902 | /* | ||
903 | * Pass the discard down to the underlying device? | ||
904 | */ | ||
905 | if (m->pass_discard) | 970 | if (m->pass_discard) |
906 | remap_and_issue(tc, m->bio, m->data_block); | 971 | remap_and_issue(tc, m->bio, m->data_block); |
907 | else | 972 | else |
@@ -912,8 +977,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) | |||
912 | mempool_free(m, tc->pool->mapping_pool); | 977 | mempool_free(m, tc->pool->mapping_pool); |
913 | } | 978 | } |
914 | 979 | ||
980 | static void process_prepared_discard(struct dm_thin_new_mapping *m) | ||
981 | { | ||
982 | int r; | ||
983 | struct thin_c *tc = m->tc; | ||
984 | |||
985 | r = dm_thin_remove_block(tc->td, m->virt_block); | ||
986 | if (r) | ||
987 | DMERR("dm_thin_remove_block() failed"); | ||
988 | |||
989 | process_prepared_discard_passdown(m); | ||
990 | } | ||
991 | |||
915 | static void process_prepared(struct pool *pool, struct list_head *head, | 992 | static void process_prepared(struct pool *pool, struct list_head *head, |
916 | void (*fn)(struct dm_thin_new_mapping *)) | 993 | process_mapping_fn *fn) |
917 | { | 994 | { |
918 | unsigned long flags; | 995 | unsigned long flags; |
919 | struct list_head maps; | 996 | struct list_head maps; |
@@ -925,7 +1002,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, | |||
925 | spin_unlock_irqrestore(&pool->lock, flags); | 1002 | spin_unlock_irqrestore(&pool->lock, flags); |
926 | 1003 | ||
927 | list_for_each_entry_safe(m, tmp, &maps, list) | 1004 | list_for_each_entry_safe(m, tmp, &maps, list) |
928 | fn(m); | 1005 | (*fn)(m); |
929 | } | 1006 | } |
930 | 1007 | ||
931 | /* | 1008 | /* |
@@ -933,9 +1010,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, | |||
933 | */ | 1010 | */ |
934 | static int io_overlaps_block(struct pool *pool, struct bio *bio) | 1011 | static int io_overlaps_block(struct pool *pool, struct bio *bio) |
935 | { | 1012 | { |
936 | return !(bio->bi_sector & pool->offset_mask) && | 1013 | return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT); |
937 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | ||
938 | |||
939 | } | 1014 | } |
940 | 1015 | ||
941 | static int io_overwrites_block(struct pool *pool, struct bio *bio) | 1016 | static int io_overwrites_block(struct pool *pool, struct bio *bio) |
@@ -1093,6 +1168,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
1093 | } | 1168 | } |
1094 | } | 1169 | } |
1095 | 1170 | ||
1171 | static int commit(struct pool *pool) | ||
1172 | { | ||
1173 | int r; | ||
1174 | |||
1175 | r = dm_pool_commit_metadata(pool->pmd); | ||
1176 | if (r) | ||
1177 | DMERR("commit failed, error = %d", r); | ||
1178 | |||
1179 | return r; | ||
1180 | } | ||
1181 | |||
1182 | /* | ||
1183 | * A non-zero return indicates read_only or fail_io mode. | ||
1184 | * Many callers don't care about the return value. | ||
1185 | */ | ||
1186 | static int commit_or_fallback(struct pool *pool) | ||
1187 | { | ||
1188 | int r; | ||
1189 | |||
1190 | if (get_pool_mode(pool) != PM_WRITE) | ||
1191 | return -EINVAL; | ||
1192 | |||
1193 | r = commit(pool); | ||
1194 | if (r) | ||
1195 | set_pool_mode(pool, PM_READ_ONLY); | ||
1196 | |||
1197 | return r; | ||
1198 | } | ||
1199 | |||
1096 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | 1200 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) |
1097 | { | 1201 | { |
1098 | int r; | 1202 | int r; |
@@ -1121,12 +1225,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | |||
1121 | * Try to commit to see if that will free up some | 1225 | * Try to commit to see if that will free up some |
1122 | * more space. | 1226 | * more space. |
1123 | */ | 1227 | */ |
1124 | r = dm_pool_commit_metadata(pool->pmd); | 1228 | (void) commit_or_fallback(pool); |
1125 | if (r) { | ||
1126 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
1127 | __func__, r); | ||
1128 | return r; | ||
1129 | } | ||
1130 | 1229 | ||
1131 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); | 1230 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); |
1132 | if (r) | 1231 | if (r) |
@@ -1218,7 +1317,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1218 | */ | 1317 | */ |
1219 | m = get_next_mapping(pool); | 1318 | m = get_next_mapping(pool); |
1220 | m->tc = tc; | 1319 | m->tc = tc; |
1221 | m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown; | 1320 | m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown; |
1222 | m->virt_block = block; | 1321 | m->virt_block = block; |
1223 | m->data_block = lookup_result.block; | 1322 | m->data_block = lookup_result.block; |
1224 | m->cell = cell; | 1323 | m->cell = cell; |
@@ -1234,15 +1333,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1234 | } | 1333 | } |
1235 | } else { | 1334 | } else { |
1236 | /* | 1335 | /* |
1237 | * This path is hit if people are ignoring | 1336 | * The DM core makes sure that the discard doesn't span |
1238 | * limits->discard_granularity. It ignores any | 1337 | * a block boundary. So we submit the discard of a |
1239 | * part of the discard that is in a subsequent | 1338 | * partial block appropriately. |
1240 | * block. | ||
1241 | */ | 1339 | */ |
1242 | sector_t offset = bio->bi_sector - (block << pool->block_shift); | ||
1243 | unsigned remaining = (pool->sectors_per_block - offset) << 9; | ||
1244 | bio->bi_size = min(bio->bi_size, remaining); | ||
1245 | |||
1246 | cell_release_singleton(cell, bio); | 1340 | cell_release_singleton(cell, bio); |
1247 | cell_release_singleton(cell2, bio); | 1341 | cell_release_singleton(cell2, bio); |
1248 | if ((!lookup_result.shared) && pool->pf.discard_passdown) | 1342 | if ((!lookup_result.shared) && pool->pf.discard_passdown) |
@@ -1310,7 +1404,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, | |||
1310 | if (bio_detain(pool->prison, &key, bio, &cell)) | 1404 | if (bio_detain(pool->prison, &key, bio, &cell)) |
1311 | return; | 1405 | return; |
1312 | 1406 | ||
1313 | if (bio_data_dir(bio) == WRITE) | 1407 | if (bio_data_dir(bio) == WRITE && bio->bi_size) |
1314 | break_sharing(tc, bio, block, &key, lookup_result, cell); | 1408 | break_sharing(tc, bio, block, &key, lookup_result, cell); |
1315 | else { | 1409 | else { |
1316 | struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; | 1410 | struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; |
@@ -1362,6 +1456,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block | |||
1362 | 1456 | ||
1363 | default: | 1457 | default: |
1364 | DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); | 1458 | DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); |
1459 | set_pool_mode(tc->pool, PM_READ_ONLY); | ||
1365 | cell_error(cell); | 1460 | cell_error(cell); |
1366 | break; | 1461 | break; |
1367 | } | 1462 | } |
@@ -1419,6 +1514,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio) | |||
1419 | } | 1514 | } |
1420 | } | 1515 | } |
1421 | 1516 | ||
1517 | static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | ||
1518 | { | ||
1519 | int r; | ||
1520 | int rw = bio_data_dir(bio); | ||
1521 | dm_block_t block = get_bio_block(tc, bio); | ||
1522 | struct dm_thin_lookup_result lookup_result; | ||
1523 | |||
1524 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | ||
1525 | switch (r) { | ||
1526 | case 0: | ||
1527 | if (lookup_result.shared && (rw == WRITE) && bio->bi_size) | ||
1528 | bio_io_error(bio); | ||
1529 | else | ||
1530 | remap_and_issue(tc, bio, lookup_result.block); | ||
1531 | break; | ||
1532 | |||
1533 | case -ENODATA: | ||
1534 | if (rw != READ) { | ||
1535 | bio_io_error(bio); | ||
1536 | break; | ||
1537 | } | ||
1538 | |||
1539 | if (tc->origin_dev) { | ||
1540 | remap_to_origin_and_issue(tc, bio); | ||
1541 | break; | ||
1542 | } | ||
1543 | |||
1544 | zero_fill_bio(bio); | ||
1545 | bio_endio(bio, 0); | ||
1546 | break; | ||
1547 | |||
1548 | default: | ||
1549 | DMERR("dm_thin_find_block() failed, error = %d", r); | ||
1550 | bio_io_error(bio); | ||
1551 | break; | ||
1552 | } | ||
1553 | } | ||
1554 | |||
1555 | static void process_bio_fail(struct thin_c *tc, struct bio *bio) | ||
1556 | { | ||
1557 | bio_io_error(bio); | ||
1558 | } | ||
1559 | |||
1422 | static int need_commit_due_to_time(struct pool *pool) | 1560 | static int need_commit_due_to_time(struct pool *pool) |
1423 | { | 1561 | { |
1424 | return jiffies < pool->last_commit_jiffies || | 1562 | return jiffies < pool->last_commit_jiffies || |
@@ -1430,7 +1568,6 @@ static void process_deferred_bios(struct pool *pool) | |||
1430 | unsigned long flags; | 1568 | unsigned long flags; |
1431 | struct bio *bio; | 1569 | struct bio *bio; |
1432 | struct bio_list bios; | 1570 | struct bio_list bios; |
1433 | int r; | ||
1434 | 1571 | ||
1435 | bio_list_init(&bios); | 1572 | bio_list_init(&bios); |
1436 | 1573 | ||
@@ -1457,9 +1594,9 @@ static void process_deferred_bios(struct pool *pool) | |||
1457 | } | 1594 | } |
1458 | 1595 | ||
1459 | if (bio->bi_rw & REQ_DISCARD) | 1596 | if (bio->bi_rw & REQ_DISCARD) |
1460 | process_discard(tc, bio); | 1597 | pool->process_discard(tc, bio); |
1461 | else | 1598 | else |
1462 | process_bio(tc, bio); | 1599 | pool->process_bio(tc, bio); |
1463 | } | 1600 | } |
1464 | 1601 | ||
1465 | /* | 1602 | /* |
@@ -1475,10 +1612,7 @@ static void process_deferred_bios(struct pool *pool) | |||
1475 | if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) | 1612 | if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) |
1476 | return; | 1613 | return; |
1477 | 1614 | ||
1478 | r = dm_pool_commit_metadata(pool->pmd); | 1615 | if (commit_or_fallback(pool)) { |
1479 | if (r) { | ||
1480 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
1481 | __func__, r); | ||
1482 | while ((bio = bio_list_pop(&bios))) | 1616 | while ((bio = bio_list_pop(&bios))) |
1483 | bio_io_error(bio); | 1617 | bio_io_error(bio); |
1484 | return; | 1618 | return; |
@@ -1493,8 +1627,8 @@ static void do_worker(struct work_struct *ws) | |||
1493 | { | 1627 | { |
1494 | struct pool *pool = container_of(ws, struct pool, worker); | 1628 | struct pool *pool = container_of(ws, struct pool, worker); |
1495 | 1629 | ||
1496 | process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); | 1630 | process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); |
1497 | process_prepared(pool, &pool->prepared_discards, process_prepared_discard); | 1631 | process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); |
1498 | process_deferred_bios(pool); | 1632 | process_deferred_bios(pool); |
1499 | } | 1633 | } |
1500 | 1634 | ||
@@ -1511,6 +1645,52 @@ static void do_waker(struct work_struct *ws) | |||
1511 | 1645 | ||
1512 | /*----------------------------------------------------------------*/ | 1646 | /*----------------------------------------------------------------*/ |
1513 | 1647 | ||
1648 | static enum pool_mode get_pool_mode(struct pool *pool) | ||
1649 | { | ||
1650 | return pool->pf.mode; | ||
1651 | } | ||
1652 | |||
1653 | static void set_pool_mode(struct pool *pool, enum pool_mode mode) | ||
1654 | { | ||
1655 | int r; | ||
1656 | |||
1657 | pool->pf.mode = mode; | ||
1658 | |||
1659 | switch (mode) { | ||
1660 | case PM_FAIL: | ||
1661 | DMERR("switching pool to failure mode"); | ||
1662 | pool->process_bio = process_bio_fail; | ||
1663 | pool->process_discard = process_bio_fail; | ||
1664 | pool->process_prepared_mapping = process_prepared_mapping_fail; | ||
1665 | pool->process_prepared_discard = process_prepared_discard_fail; | ||
1666 | break; | ||
1667 | |||
1668 | case PM_READ_ONLY: | ||
1669 | DMERR("switching pool to read-only mode"); | ||
1670 | r = dm_pool_abort_metadata(pool->pmd); | ||
1671 | if (r) { | ||
1672 | DMERR("aborting transaction failed"); | ||
1673 | set_pool_mode(pool, PM_FAIL); | ||
1674 | } else { | ||
1675 | dm_pool_metadata_read_only(pool->pmd); | ||
1676 | pool->process_bio = process_bio_read_only; | ||
1677 | pool->process_discard = process_discard; | ||
1678 | pool->process_prepared_mapping = process_prepared_mapping_fail; | ||
1679 | pool->process_prepared_discard = process_prepared_discard_passdown; | ||
1680 | } | ||
1681 | break; | ||
1682 | |||
1683 | case PM_WRITE: | ||
1684 | pool->process_bio = process_bio; | ||
1685 | pool->process_discard = process_discard; | ||
1686 | pool->process_prepared_mapping = process_prepared_mapping; | ||
1687 | pool->process_prepared_discard = process_prepared_discard; | ||
1688 | break; | ||
1689 | } | ||
1690 | } | ||
1691 | |||
1692 | /*----------------------------------------------------------------*/ | ||
1693 | |||
1514 | /* | 1694 | /* |
1515 | * Mapping functions. | 1695 | * Mapping functions. |
1516 | */ | 1696 | */ |
@@ -1556,6 +1736,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, | |||
1556 | struct dm_thin_lookup_result result; | 1736 | struct dm_thin_lookup_result result; |
1557 | 1737 | ||
1558 | map_context->ptr = thin_hook_bio(tc, bio); | 1738 | map_context->ptr = thin_hook_bio(tc, bio); |
1739 | |||
1740 | if (get_pool_mode(tc->pool) == PM_FAIL) { | ||
1741 | bio_io_error(bio); | ||
1742 | return DM_MAPIO_SUBMITTED; | ||
1743 | } | ||
1744 | |||
1559 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { | 1745 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { |
1560 | thin_defer_bio(tc, bio); | 1746 | thin_defer_bio(tc, bio); |
1561 | return DM_MAPIO_SUBMITTED; | 1747 | return DM_MAPIO_SUBMITTED; |
@@ -1592,14 +1778,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, | |||
1592 | break; | 1778 | break; |
1593 | 1779 | ||
1594 | case -ENODATA: | 1780 | case -ENODATA: |
1781 | if (get_pool_mode(tc->pool) == PM_READ_ONLY) { | ||
1782 | /* | ||
1783 | * This block isn't provisioned, and we have no way | ||
1784 | * of doing so. Just error it. | ||
1785 | */ | ||
1786 | bio_io_error(bio); | ||
1787 | r = DM_MAPIO_SUBMITTED; | ||
1788 | break; | ||
1789 | } | ||
1790 | /* fall through */ | ||
1791 | |||
1792 | case -EWOULDBLOCK: | ||
1595 | /* | 1793 | /* |
1596 | * In future, the failed dm_thin_find_block above could | 1794 | * In future, the failed dm_thin_find_block above could |
1597 | * provide the hint to load the metadata into cache. | 1795 | * provide the hint to load the metadata into cache. |
1598 | */ | 1796 | */ |
1599 | case -EWOULDBLOCK: | ||
1600 | thin_defer_bio(tc, bio); | 1797 | thin_defer_bio(tc, bio); |
1601 | r = DM_MAPIO_SUBMITTED; | 1798 | r = DM_MAPIO_SUBMITTED; |
1602 | break; | 1799 | break; |
1800 | |||
1801 | default: | ||
1802 | /* | ||
1803 | * Must always call bio_io_error on failure. | ||
1804 | * dm_thin_find_block can fail with -EINVAL if the | ||
1805 | * pool is switched to fail-io mode. | ||
1806 | */ | ||
1807 | bio_io_error(bio); | ||
1808 | r = DM_MAPIO_SUBMITTED; | ||
1809 | break; | ||
1603 | } | 1810 | } |
1604 | 1811 | ||
1605 | return r; | 1812 | return r; |
@@ -1636,15 +1843,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1636 | { | 1843 | { |
1637 | struct pool_c *pt = ti->private; | 1844 | struct pool_c *pt = ti->private; |
1638 | 1845 | ||
1846 | /* | ||
1847 | * We want to make sure that degraded pools are never upgraded. | ||
1848 | */ | ||
1849 | enum pool_mode old_mode = pool->pf.mode; | ||
1850 | enum pool_mode new_mode = pt->pf.mode; | ||
1851 | |||
1852 | if (old_mode > new_mode) | ||
1853 | new_mode = old_mode; | ||
1854 | |||
1639 | pool->ti = ti; | 1855 | pool->ti = ti; |
1640 | pool->low_water_blocks = pt->low_water_blocks; | 1856 | pool->low_water_blocks = pt->low_water_blocks; |
1641 | pool->pf = pt->pf; | 1857 | pool->pf = pt->pf; |
1858 | set_pool_mode(pool, new_mode); | ||
1642 | 1859 | ||
1643 | /* | 1860 | /* |
1644 | * If discard_passdown was enabled verify that the data device | 1861 | * If discard_passdown was enabled verify that the data device |
1645 | * supports discards. Disable discard_passdown if not; otherwise | 1862 | * supports discards. Disable discard_passdown if not; otherwise |
1646 | * -EOPNOTSUPP will be returned. | 1863 | * -EOPNOTSUPP will be returned. |
1647 | */ | 1864 | */ |
1865 | /* FIXME: pull this out into a sep fn. */ | ||
1648 | if (pt->pf.discard_passdown) { | 1866 | if (pt->pf.discard_passdown) { |
1649 | struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); | 1867 | struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); |
1650 | if (!q || !blk_queue_discard(q)) { | 1868 | if (!q || !blk_queue_discard(q)) { |
@@ -1670,6 +1888,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) | |||
1670 | /* Initialize pool features. */ | 1888 | /* Initialize pool features. */ |
1671 | static void pool_features_init(struct pool_features *pf) | 1889 | static void pool_features_init(struct pool_features *pf) |
1672 | { | 1890 | { |
1891 | pf->mode = PM_WRITE; | ||
1673 | pf->zero_new_blocks = 1; | 1892 | pf->zero_new_blocks = 1; |
1674 | pf->discard_enabled = 1; | 1893 | pf->discard_enabled = 1; |
1675 | pf->discard_passdown = 1; | 1894 | pf->discard_passdown = 1; |
@@ -1700,14 +1919,16 @@ static struct kmem_cache *_endio_hook_cache; | |||
1700 | 1919 | ||
1701 | static struct pool *pool_create(struct mapped_device *pool_md, | 1920 | static struct pool *pool_create(struct mapped_device *pool_md, |
1702 | struct block_device *metadata_dev, | 1921 | struct block_device *metadata_dev, |
1703 | unsigned long block_size, char **error) | 1922 | unsigned long block_size, |
1923 | int read_only, char **error) | ||
1704 | { | 1924 | { |
1705 | int r; | 1925 | int r; |
1706 | void *err_p; | 1926 | void *err_p; |
1707 | struct pool *pool; | 1927 | struct pool *pool; |
1708 | struct dm_pool_metadata *pmd; | 1928 | struct dm_pool_metadata *pmd; |
1929 | bool format_device = read_only ? false : true; | ||
1709 | 1930 | ||
1710 | pmd = dm_pool_metadata_open(metadata_dev, block_size); | 1931 | pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); |
1711 | if (IS_ERR(pmd)) { | 1932 | if (IS_ERR(pmd)) { |
1712 | *error = "Error creating metadata object"; | 1933 | *error = "Error creating metadata object"; |
1713 | return (struct pool *)pmd; | 1934 | return (struct pool *)pmd; |
@@ -1722,8 +1943,10 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1722 | 1943 | ||
1723 | pool->pmd = pmd; | 1944 | pool->pmd = pmd; |
1724 | pool->sectors_per_block = block_size; | 1945 | pool->sectors_per_block = block_size; |
1725 | pool->block_shift = ffs(block_size) - 1; | 1946 | if (block_size & (block_size - 1)) |
1726 | pool->offset_mask = block_size - 1; | 1947 | pool->sectors_per_block_shift = -1; |
1948 | else | ||
1949 | pool->sectors_per_block_shift = __ffs(block_size); | ||
1727 | pool->low_water_blocks = 0; | 1950 | pool->low_water_blocks = 0; |
1728 | pool_features_init(&pool->pf); | 1951 | pool_features_init(&pool->pf); |
1729 | pool->prison = prison_create(PRISON_CELLS); | 1952 | pool->prison = prison_create(PRISON_CELLS); |
@@ -1822,25 +2045,29 @@ static void __pool_dec(struct pool *pool) | |||
1822 | 2045 | ||
1823 | static struct pool *__pool_find(struct mapped_device *pool_md, | 2046 | static struct pool *__pool_find(struct mapped_device *pool_md, |
1824 | struct block_device *metadata_dev, | 2047 | struct block_device *metadata_dev, |
1825 | unsigned long block_size, char **error, | 2048 | unsigned long block_size, int read_only, |
1826 | int *created) | 2049 | char **error, int *created) |
1827 | { | 2050 | { |
1828 | struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); | 2051 | struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); |
1829 | 2052 | ||
1830 | if (pool) { | 2053 | if (pool) { |
1831 | if (pool->pool_md != pool_md) | 2054 | if (pool->pool_md != pool_md) { |
2055 | *error = "metadata device already in use by a pool"; | ||
1832 | return ERR_PTR(-EBUSY); | 2056 | return ERR_PTR(-EBUSY); |
2057 | } | ||
1833 | __pool_inc(pool); | 2058 | __pool_inc(pool); |
1834 | 2059 | ||
1835 | } else { | 2060 | } else { |
1836 | pool = __pool_table_lookup(pool_md); | 2061 | pool = __pool_table_lookup(pool_md); |
1837 | if (pool) { | 2062 | if (pool) { |
1838 | if (pool->md_dev != metadata_dev) | 2063 | if (pool->md_dev != metadata_dev) { |
2064 | *error = "different pool cannot replace a pool"; | ||
1839 | return ERR_PTR(-EINVAL); | 2065 | return ERR_PTR(-EINVAL); |
2066 | } | ||
1840 | __pool_inc(pool); | 2067 | __pool_inc(pool); |
1841 | 2068 | ||
1842 | } else { | 2069 | } else { |
1843 | pool = pool_create(pool_md, metadata_dev, block_size, error); | 2070 | pool = pool_create(pool_md, metadata_dev, block_size, read_only, error); |
1844 | *created = 1; | 2071 | *created = 1; |
1845 | } | 2072 | } |
1846 | } | 2073 | } |
@@ -1891,19 +2118,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1891 | arg_name = dm_shift_arg(as); | 2118 | arg_name = dm_shift_arg(as); |
1892 | argc--; | 2119 | argc--; |
1893 | 2120 | ||
1894 | if (!strcasecmp(arg_name, "skip_block_zeroing")) { | 2121 | if (!strcasecmp(arg_name, "skip_block_zeroing")) |
1895 | pf->zero_new_blocks = 0; | 2122 | pf->zero_new_blocks = 0; |
1896 | continue; | 2123 | |
1897 | } else if (!strcasecmp(arg_name, "ignore_discard")) { | 2124 | else if (!strcasecmp(arg_name, "ignore_discard")) |
1898 | pf->discard_enabled = 0; | 2125 | pf->discard_enabled = 0; |
1899 | continue; | 2126 | |
1900 | } else if (!strcasecmp(arg_name, "no_discard_passdown")) { | 2127 | else if (!strcasecmp(arg_name, "no_discard_passdown")) |
1901 | pf->discard_passdown = 0; | 2128 | pf->discard_passdown = 0; |
1902 | continue; | ||
1903 | } | ||
1904 | 2129 | ||
1905 | ti->error = "Unrecognised pool feature requested"; | 2130 | else if (!strcasecmp(arg_name, "read_only")) |
1906 | r = -EINVAL; | 2131 | pf->mode = PM_READ_ONLY; |
2132 | |||
2133 | else { | ||
2134 | ti->error = "Unrecognised pool feature requested"; | ||
2135 | r = -EINVAL; | ||
2136 | break; | ||
2137 | } | ||
1907 | } | 2138 | } |
1908 | 2139 | ||
1909 | return r; | 2140 | return r; |
@@ -1967,7 +2198,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1967 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || | 2198 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || |
1968 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || | 2199 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || |
1969 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || | 2200 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || |
1970 | !is_power_of_2(block_size)) { | 2201 | block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { |
1971 | ti->error = "Invalid block size"; | 2202 | ti->error = "Invalid block size"; |
1972 | r = -EINVAL; | 2203 | r = -EINVAL; |
1973 | goto out; | 2204 | goto out; |
@@ -1996,7 +2227,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1996 | } | 2227 | } |
1997 | 2228 | ||
1998 | pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, | 2229 | pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, |
1999 | block_size, &ti->error, &pool_created); | 2230 | block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created); |
2000 | if (IS_ERR(pool)) { | 2231 | if (IS_ERR(pool)) { |
2001 | r = PTR_ERR(pool); | 2232 | r = PTR_ERR(pool); |
2002 | goto out_free_pt; | 2233 | goto out_free_pt; |
@@ -2014,6 +2245,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2014 | goto out_flags_changed; | 2245 | goto out_flags_changed; |
2015 | } | 2246 | } |
2016 | 2247 | ||
2248 | /* | ||
2249 | * The block layer requires discard_granularity to be a power of 2. | ||
2250 | */ | ||
2251 | if (pf.discard_enabled && !is_power_of_2(block_size)) { | ||
2252 | ti->error = "Discard support must be disabled when the block size is not a power of 2"; | ||
2253 | r = -EINVAL; | ||
2254 | goto out_flags_changed; | ||
2255 | } | ||
2256 | |||
2017 | pt->pool = pool; | 2257 | pt->pool = pool; |
2018 | pt->ti = ti; | 2258 | pt->ti = ti; |
2019 | pt->metadata_dev = metadata_dev; | 2259 | pt->metadata_dev = metadata_dev; |
@@ -2033,7 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2033 | * stacking of discard limits (this keeps the pool and | 2273 | * stacking of discard limits (this keeps the pool and |
2034 | * thin devices' discard limits consistent). | 2274 | * thin devices' discard limits consistent). |
2035 | */ | 2275 | */ |
2036 | ti->discards_supported = 1; | 2276 | ti->discards_supported = true; |
2037 | } | 2277 | } |
2038 | ti->private = pt; | 2278 | ti->private = pt; |
2039 | 2279 | ||
@@ -2093,7 +2333,8 @@ static int pool_preresume(struct dm_target *ti) | |||
2093 | int r; | 2333 | int r; |
2094 | struct pool_c *pt = ti->private; | 2334 | struct pool_c *pt = ti->private; |
2095 | struct pool *pool = pt->pool; | 2335 | struct pool *pool = pt->pool; |
2096 | dm_block_t data_size, sb_data_size; | 2336 | sector_t data_size = ti->len; |
2337 | dm_block_t sb_data_size; | ||
2097 | 2338 | ||
2098 | /* | 2339 | /* |
2099 | * Take control of the pool object. | 2340 | * Take control of the pool object. |
@@ -2102,7 +2343,8 @@ static int pool_preresume(struct dm_target *ti) | |||
2102 | if (r) | 2343 | if (r) |
2103 | return r; | 2344 | return r; |
2104 | 2345 | ||
2105 | data_size = ti->len >> pool->block_shift; | 2346 | (void) sector_div(data_size, pool->sectors_per_block); |
2347 | |||
2106 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); | 2348 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); |
2107 | if (r) { | 2349 | if (r) { |
2108 | DMERR("failed to retrieve data device size"); | 2350 | DMERR("failed to retrieve data device size"); |
@@ -2111,22 +2353,19 @@ static int pool_preresume(struct dm_target *ti) | |||
2111 | 2353 | ||
2112 | if (data_size < sb_data_size) { | 2354 | if (data_size < sb_data_size) { |
2113 | DMERR("pool target too small, is %llu blocks (expected %llu)", | 2355 | DMERR("pool target too small, is %llu blocks (expected %llu)", |
2114 | data_size, sb_data_size); | 2356 | (unsigned long long)data_size, sb_data_size); |
2115 | return -EINVAL; | 2357 | return -EINVAL; |
2116 | 2358 | ||
2117 | } else if (data_size > sb_data_size) { | 2359 | } else if (data_size > sb_data_size) { |
2118 | r = dm_pool_resize_data_dev(pool->pmd, data_size); | 2360 | r = dm_pool_resize_data_dev(pool->pmd, data_size); |
2119 | if (r) { | 2361 | if (r) { |
2120 | DMERR("failed to resize data device"); | 2362 | DMERR("failed to resize data device"); |
2363 | /* FIXME Stricter than necessary: Rollback transaction instead here */ | ||
2364 | set_pool_mode(pool, PM_READ_ONLY); | ||
2121 | return r; | 2365 | return r; |
2122 | } | 2366 | } |
2123 | 2367 | ||
2124 | r = dm_pool_commit_metadata(pool->pmd); | 2368 | (void) commit_or_fallback(pool); |
2125 | if (r) { | ||
2126 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
2127 | __func__, r); | ||
2128 | return r; | ||
2129 | } | ||
2130 | } | 2369 | } |
2131 | 2370 | ||
2132 | return 0; | 2371 | return 0; |
@@ -2149,19 +2388,12 @@ static void pool_resume(struct dm_target *ti) | |||
2149 | 2388 | ||
2150 | static void pool_postsuspend(struct dm_target *ti) | 2389 | static void pool_postsuspend(struct dm_target *ti) |
2151 | { | 2390 | { |
2152 | int r; | ||
2153 | struct pool_c *pt = ti->private; | 2391 | struct pool_c *pt = ti->private; |
2154 | struct pool *pool = pt->pool; | 2392 | struct pool *pool = pt->pool; |
2155 | 2393 | ||
2156 | cancel_delayed_work(&pool->waker); | 2394 | cancel_delayed_work(&pool->waker); |
2157 | flush_workqueue(pool->wq); | 2395 | flush_workqueue(pool->wq); |
2158 | 2396 | (void) commit_or_fallback(pool); | |
2159 | r = dm_pool_commit_metadata(pool->pmd); | ||
2160 | if (r < 0) { | ||
2161 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
2162 | __func__, r); | ||
2163 | /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/ | ||
2164 | } | ||
2165 | } | 2397 | } |
2166 | 2398 | ||
2167 | static int check_arg_count(unsigned argc, unsigned args_required) | 2399 | static int check_arg_count(unsigned argc, unsigned args_required) |
@@ -2295,12 +2527,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct | |||
2295 | if (r) | 2527 | if (r) |
2296 | return r; | 2528 | return r; |
2297 | 2529 | ||
2298 | r = dm_pool_commit_metadata(pool->pmd); | 2530 | (void) commit_or_fallback(pool); |
2299 | if (r) { | ||
2300 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
2301 | __func__, r); | ||
2302 | return r; | ||
2303 | } | ||
2304 | 2531 | ||
2305 | r = dm_pool_reserve_metadata_snap(pool->pmd); | 2532 | r = dm_pool_reserve_metadata_snap(pool->pmd); |
2306 | if (r) | 2533 | if (r) |
@@ -2361,25 +2588,41 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) | |||
2361 | else | 2588 | else |
2362 | DMWARN("Unrecognised thin pool target message received: %s", argv[0]); | 2589 | DMWARN("Unrecognised thin pool target message received: %s", argv[0]); |
2363 | 2590 | ||
2364 | if (!r) { | 2591 | if (!r) |
2365 | r = dm_pool_commit_metadata(pool->pmd); | 2592 | (void) commit_or_fallback(pool); |
2366 | if (r) | ||
2367 | DMERR("%s message: dm_pool_commit_metadata() failed, error = %d", | ||
2368 | argv[0], r); | ||
2369 | } | ||
2370 | 2593 | ||
2371 | return r; | 2594 | return r; |
2372 | } | 2595 | } |
2373 | 2596 | ||
2597 | static void emit_flags(struct pool_features *pf, char *result, | ||
2598 | unsigned sz, unsigned maxlen) | ||
2599 | { | ||
2600 | unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + | ||
2601 | !pf->discard_passdown + (pf->mode == PM_READ_ONLY); | ||
2602 | DMEMIT("%u ", count); | ||
2603 | |||
2604 | if (!pf->zero_new_blocks) | ||
2605 | DMEMIT("skip_block_zeroing "); | ||
2606 | |||
2607 | if (!pf->discard_enabled) | ||
2608 | DMEMIT("ignore_discard "); | ||
2609 | |||
2610 | if (!pf->discard_passdown) | ||
2611 | DMEMIT("no_discard_passdown "); | ||
2612 | |||
2613 | if (pf->mode == PM_READ_ONLY) | ||
2614 | DMEMIT("read_only "); | ||
2615 | } | ||
2616 | |||
2374 | /* | 2617 | /* |
2375 | * Status line is: | 2618 | * Status line is: |
2376 | * <transaction id> <used metadata sectors>/<total metadata sectors> | 2619 | * <transaction id> <used metadata sectors>/<total metadata sectors> |
2377 | * <used data sectors>/<total data sectors> <held metadata root> | 2620 | * <used data sectors>/<total data sectors> <held metadata root> |
2378 | */ | 2621 | */ |
2379 | static int pool_status(struct dm_target *ti, status_type_t type, | 2622 | static int pool_status(struct dm_target *ti, status_type_t type, |
2380 | char *result, unsigned maxlen) | 2623 | unsigned status_flags, char *result, unsigned maxlen) |
2381 | { | 2624 | { |
2382 | int r, count; | 2625 | int r; |
2383 | unsigned sz = 0; | 2626 | unsigned sz = 0; |
2384 | uint64_t transaction_id; | 2627 | uint64_t transaction_id; |
2385 | dm_block_t nr_free_blocks_data; | 2628 | dm_block_t nr_free_blocks_data; |
@@ -2394,6 +2637,15 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2394 | 2637 | ||
2395 | switch (type) { | 2638 | switch (type) { |
2396 | case STATUSTYPE_INFO: | 2639 | case STATUSTYPE_INFO: |
2640 | if (get_pool_mode(pool) == PM_FAIL) { | ||
2641 | DMEMIT("Fail"); | ||
2642 | break; | ||
2643 | } | ||
2644 | |||
2645 | /* Commit to ensure statistics aren't out-of-date */ | ||
2646 | if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) | ||
2647 | (void) commit_or_fallback(pool); | ||
2648 | |||
2397 | r = dm_pool_get_metadata_transaction_id(pool->pmd, | 2649 | r = dm_pool_get_metadata_transaction_id(pool->pmd, |
2398 | &transaction_id); | 2650 | &transaction_id); |
2399 | if (r) | 2651 | if (r) |
@@ -2429,9 +2681,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2429 | (unsigned long long)nr_blocks_data); | 2681 | (unsigned long long)nr_blocks_data); |
2430 | 2682 | ||
2431 | if (held_root) | 2683 | if (held_root) |
2432 | DMEMIT("%llu", held_root); | 2684 | DMEMIT("%llu ", held_root); |
2685 | else | ||
2686 | DMEMIT("- "); | ||
2687 | |||
2688 | if (pool->pf.mode == PM_READ_ONLY) | ||
2689 | DMEMIT("ro "); | ||
2690 | else | ||
2691 | DMEMIT("rw "); | ||
2692 | |||
2693 | if (pool->pf.discard_enabled && pool->pf.discard_passdown) | ||
2694 | DMEMIT("discard_passdown"); | ||
2433 | else | 2695 | else |
2434 | DMEMIT("-"); | 2696 | DMEMIT("no_discard_passdown"); |
2435 | 2697 | ||
2436 | break; | 2698 | break; |
2437 | 2699 | ||
@@ -2441,20 +2703,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2441 | format_dev_t(buf2, pt->data_dev->bdev->bd_dev), | 2703 | format_dev_t(buf2, pt->data_dev->bdev->bd_dev), |
2442 | (unsigned long)pool->sectors_per_block, | 2704 | (unsigned long)pool->sectors_per_block, |
2443 | (unsigned long long)pt->low_water_blocks); | 2705 | (unsigned long long)pt->low_water_blocks); |
2444 | 2706 | emit_flags(&pt->pf, result, sz, maxlen); | |
2445 | count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled + | ||
2446 | !pt->pf.discard_passdown; | ||
2447 | DMEMIT("%u ", count); | ||
2448 | |||
2449 | if (!pool->pf.zero_new_blocks) | ||
2450 | DMEMIT("skip_block_zeroing "); | ||
2451 | |||
2452 | if (!pool->pf.discard_enabled) | ||
2453 | DMEMIT("ignore_discard "); | ||
2454 | |||
2455 | if (!pt->pf.discard_passdown) | ||
2456 | DMEMIT("no_discard_passdown "); | ||
2457 | |||
2458 | break; | 2707 | break; |
2459 | } | 2708 | } |
2460 | 2709 | ||
@@ -2492,7 +2741,8 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits) | |||
2492 | 2741 | ||
2493 | /* | 2742 | /* |
2494 | * This is just a hint, and not enforced. We have to cope with | 2743 | * This is just a hint, and not enforced. We have to cope with |
2495 | * bios that overlap 2 blocks. | 2744 | * bios that cover a block partially. A discard that spans a block |
2745 | * boundary is not sent to this target. | ||
2496 | */ | 2746 | */ |
2497 | limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; | 2747 | limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; |
2498 | limits->discard_zeroes_data = pool->pf.zero_new_blocks; | 2748 | limits->discard_zeroes_data = pool->pf.zero_new_blocks; |
@@ -2513,7 +2763,7 @@ static struct target_type pool_target = { | |||
2513 | .name = "thin-pool", | 2763 | .name = "thin-pool", |
2514 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2764 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2515 | DM_TARGET_IMMUTABLE, | 2765 | DM_TARGET_IMMUTABLE, |
2516 | .version = {1, 2, 0}, | 2766 | .version = {1, 3, 0}, |
2517 | .module = THIS_MODULE, | 2767 | .module = THIS_MODULE, |
2518 | .ctr = pool_ctr, | 2768 | .ctr = pool_ctr, |
2519 | .dtr = pool_dtr, | 2769 | .dtr = pool_dtr, |
@@ -2618,20 +2868,31 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2618 | } | 2868 | } |
2619 | __pool_inc(tc->pool); | 2869 | __pool_inc(tc->pool); |
2620 | 2870 | ||
2871 | if (get_pool_mode(tc->pool) == PM_FAIL) { | ||
2872 | ti->error = "Couldn't open thin device, Pool is in fail mode"; | ||
2873 | goto bad_thin_open; | ||
2874 | } | ||
2875 | |||
2621 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); | 2876 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); |
2622 | if (r) { | 2877 | if (r) { |
2623 | ti->error = "Couldn't open thin internal device"; | 2878 | ti->error = "Couldn't open thin internal device"; |
2624 | goto bad_thin_open; | 2879 | goto bad_thin_open; |
2625 | } | 2880 | } |
2626 | 2881 | ||
2627 | ti->split_io = tc->pool->sectors_per_block; | 2882 | r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); |
2883 | if (r) | ||
2884 | goto bad_thin_open; | ||
2885 | |||
2628 | ti->num_flush_requests = 1; | 2886 | ti->num_flush_requests = 1; |
2887 | ti->flush_supported = true; | ||
2629 | 2888 | ||
2630 | /* In case the pool supports discards, pass them on. */ | 2889 | /* In case the pool supports discards, pass them on. */ |
2631 | if (tc->pool->pf.discard_enabled) { | 2890 | if (tc->pool->pf.discard_enabled) { |
2632 | ti->discards_supported = 1; | 2891 | ti->discards_supported = true; |
2633 | ti->num_discard_requests = 1; | 2892 | ti->num_discard_requests = 1; |
2634 | ti->discard_zeroes_data_unsupported = 1; | 2893 | ti->discard_zeroes_data_unsupported = true; |
2894 | /* Discard requests must be split on a block boundary */ | ||
2895 | ti->split_discard_requests = true; | ||
2635 | } | 2896 | } |
2636 | 2897 | ||
2637 | dm_put(pool_md); | 2898 | dm_put(pool_md); |
@@ -2712,7 +2973,7 @@ static void thin_postsuspend(struct dm_target *ti) | |||
2712 | * <nr mapped sectors> <highest mapped sector> | 2973 | * <nr mapped sectors> <highest mapped sector> |
2713 | */ | 2974 | */ |
2714 | static int thin_status(struct dm_target *ti, status_type_t type, | 2975 | static int thin_status(struct dm_target *ti, status_type_t type, |
2715 | char *result, unsigned maxlen) | 2976 | unsigned status_flags, char *result, unsigned maxlen) |
2716 | { | 2977 | { |
2717 | int r; | 2978 | int r; |
2718 | ssize_t sz = 0; | 2979 | ssize_t sz = 0; |
@@ -2720,6 +2981,11 @@ static int thin_status(struct dm_target *ti, status_type_t type, | |||
2720 | char buf[BDEVNAME_SIZE]; | 2981 | char buf[BDEVNAME_SIZE]; |
2721 | struct thin_c *tc = ti->private; | 2982 | struct thin_c *tc = ti->private; |
2722 | 2983 | ||
2984 | if (get_pool_mode(tc->pool) == PM_FAIL) { | ||
2985 | DMEMIT("Fail"); | ||
2986 | return 0; | ||
2987 | } | ||
2988 | |||
2723 | if (!tc->td) | 2989 | if (!tc->td) |
2724 | DMEMIT("-"); | 2990 | DMEMIT("-"); |
2725 | else { | 2991 | else { |
@@ -2757,19 +3023,21 @@ static int thin_status(struct dm_target *ti, status_type_t type, | |||
2757 | static int thin_iterate_devices(struct dm_target *ti, | 3023 | static int thin_iterate_devices(struct dm_target *ti, |
2758 | iterate_devices_callout_fn fn, void *data) | 3024 | iterate_devices_callout_fn fn, void *data) |
2759 | { | 3025 | { |
2760 | dm_block_t blocks; | 3026 | sector_t blocks; |
2761 | struct thin_c *tc = ti->private; | 3027 | struct thin_c *tc = ti->private; |
3028 | struct pool *pool = tc->pool; | ||
2762 | 3029 | ||
2763 | /* | 3030 | /* |
2764 | * We can't call dm_pool_get_data_dev_size() since that blocks. So | 3031 | * We can't call dm_pool_get_data_dev_size() since that blocks. So |
2765 | * we follow a more convoluted path through to the pool's target. | 3032 | * we follow a more convoluted path through to the pool's target. |
2766 | */ | 3033 | */ |
2767 | if (!tc->pool->ti) | 3034 | if (!pool->ti) |
2768 | return 0; /* nothing is bound */ | 3035 | return 0; /* nothing is bound */ |
2769 | 3036 | ||
2770 | blocks = tc->pool->ti->len >> tc->pool->block_shift; | 3037 | blocks = pool->ti->len; |
3038 | (void) sector_div(blocks, pool->sectors_per_block); | ||
2771 | if (blocks) | 3039 | if (blocks) |
2772 | return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); | 3040 | return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); |
2773 | 3041 | ||
2774 | return 0; | 3042 | return 0; |
2775 | } | 3043 | } |
@@ -2786,7 +3054,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
2786 | 3054 | ||
2787 | static struct target_type thin_target = { | 3055 | static struct target_type thin_target = { |
2788 | .name = "thin", | 3056 | .name = "thin", |
2789 | .version = {1, 1, 0}, | 3057 | .version = {1, 3, 0}, |
2790 | .module = THIS_MODULE, | 3058 | .module = THIS_MODULE, |
2791 | .ctr = thin_ctr, | 3059 | .ctr = thin_ctr, |
2792 | .dtr = thin_dtr, | 3060 | .dtr = thin_dtr, |
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index fa365d39b612..254d19268ad2 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c | |||
@@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio, | |||
515 | * Status: V (valid) or C (corruption found) | 515 | * Status: V (valid) or C (corruption found) |
516 | */ | 516 | */ |
517 | static int verity_status(struct dm_target *ti, status_type_t type, | 517 | static int verity_status(struct dm_target *ti, status_type_t type, |
518 | char *result, unsigned maxlen) | 518 | unsigned status_flags, char *result, unsigned maxlen) |
519 | { | 519 | { |
520 | struct dm_verity *v = ti->private; | 520 | struct dm_verity *v = ti->private; |
521 | unsigned sz = 0; | 521 | unsigned sz = 0; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e24143cc2040..4e09b6ff5b49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -968,22 +968,41 @@ static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti | |||
968 | static sector_t max_io_len(sector_t sector, struct dm_target *ti) | 968 | static sector_t max_io_len(sector_t sector, struct dm_target *ti) |
969 | { | 969 | { |
970 | sector_t len = max_io_len_target_boundary(sector, ti); | 970 | sector_t len = max_io_len_target_boundary(sector, ti); |
971 | sector_t offset, max_len; | ||
971 | 972 | ||
972 | /* | 973 | /* |
973 | * Does the target need to split even further ? | 974 | * Does the target need to split even further? |
974 | */ | 975 | */ |
975 | if (ti->split_io) { | 976 | if (ti->max_io_len) { |
976 | sector_t boundary; | 977 | offset = dm_target_offset(ti, sector); |
977 | sector_t offset = dm_target_offset(ti, sector); | 978 | if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) |
978 | boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) | 979 | max_len = sector_div(offset, ti->max_io_len); |
979 | - offset; | 980 | else |
980 | if (len > boundary) | 981 | max_len = offset & (ti->max_io_len - 1); |
981 | len = boundary; | 982 | max_len = ti->max_io_len - max_len; |
983 | |||
984 | if (len > max_len) | ||
985 | len = max_len; | ||
982 | } | 986 | } |
983 | 987 | ||
984 | return len; | 988 | return len; |
985 | } | 989 | } |
986 | 990 | ||
991 | int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) | ||
992 | { | ||
993 | if (len > UINT_MAX) { | ||
994 | DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", | ||
995 | (unsigned long long)len, UINT_MAX); | ||
996 | ti->error = "Maximum size of target IO is too large"; | ||
997 | return -EINVAL; | ||
998 | } | ||
999 | |||
1000 | ti->max_io_len = (uint32_t) len; | ||
1001 | |||
1002 | return 0; | ||
1003 | } | ||
1004 | EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); | ||
1005 | |||
987 | static void __map_bio(struct dm_target *ti, struct bio *clone, | 1006 | static void __map_bio(struct dm_target *ti, struct bio *clone, |
988 | struct dm_target_io *tio) | 1007 | struct dm_target_io *tio) |
989 | { | 1008 | { |
@@ -1196,7 +1215,10 @@ static int __clone_and_map_discard(struct clone_info *ci) | |||
1196 | if (!ti->num_discard_requests) | 1215 | if (!ti->num_discard_requests) |
1197 | return -EOPNOTSUPP; | 1216 | return -EOPNOTSUPP; |
1198 | 1217 | ||
1199 | len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); | 1218 | if (!ti->split_discard_requests) |
1219 | len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); | ||
1220 | else | ||
1221 | len = min(ci->sector_count, max_io_len(ci->sector, ti)); | ||
1200 | 1222 | ||
1201 | __issue_target_requests(ci, ti, ti->num_discard_requests, len); | 1223 | __issue_target_requests(ci, ti, ti->num_discard_requests, len); |
1202 | 1224 | ||
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b7dacd59d8d7..52eef493d266 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -23,6 +23,11 @@ | |||
23 | #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) | 23 | #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Status feature flags | ||
27 | */ | ||
28 | #define DM_STATUS_NOFLUSH_FLAG (1 << 0) | ||
29 | |||
30 | /* | ||
26 | * Type of table and mapped_device's mempool | 31 | * Type of table and mapped_device's mempool |
27 | */ | 32 | */ |
28 | #define DM_TYPE_NONE 0 | 33 | #define DM_TYPE_NONE 0 |
diff --git a/drivers/md/md.c b/drivers/md/md.c index db02d2efb76f..fcd098794d37 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3893,17 +3893,13 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) | |||
3893 | break; | 3893 | break; |
3894 | case clear: | 3894 | case clear: |
3895 | /* stopping an active array */ | 3895 | /* stopping an active array */ |
3896 | if (atomic_read(&mddev->openers) > 0) | ||
3897 | return -EBUSY; | ||
3898 | err = do_md_stop(mddev, 0, NULL); | 3896 | err = do_md_stop(mddev, 0, NULL); |
3899 | break; | 3897 | break; |
3900 | case inactive: | 3898 | case inactive: |
3901 | /* stopping an active array */ | 3899 | /* stopping an active array */ |
3902 | if (mddev->pers) { | 3900 | if (mddev->pers) |
3903 | if (atomic_read(&mddev->openers) > 0) | ||
3904 | return -EBUSY; | ||
3905 | err = do_md_stop(mddev, 2, NULL); | 3901 | err = do_md_stop(mddev, 2, NULL); |
3906 | } else | 3902 | else |
3907 | err = 0; /* already inactive */ | 3903 | err = 0; /* already inactive */ |
3908 | break; | 3904 | break; |
3909 | case suspended: | 3905 | case suspended: |
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index cfa95f662230..d8e7cb767c1e 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile | |||
@@ -1,7 +1,6 @@ | |||
1 | obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o | 1 | obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o |
2 | dm-persistent-data-objs := \ | 2 | dm-persistent-data-objs := \ |
3 | dm-block-manager.o \ | 3 | dm-block-manager.o \ |
4 | dm-space-map-checker.o \ | ||
5 | dm-space-map-common.o \ | 4 | dm-space-map-common.o \ |
6 | dm-space-map-disk.o \ | 5 | dm-space-map-disk.o \ |
7 | dm-space-map-metadata.o \ | 6 | dm-space-map-metadata.o \ |
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 0317ecdc6e53..5ba277768d99 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c | |||
@@ -325,11 +325,6 @@ static struct dm_buffer *to_buffer(struct dm_block *b) | |||
325 | return (struct dm_buffer *) b; | 325 | return (struct dm_buffer *) b; |
326 | } | 326 | } |
327 | 327 | ||
328 | static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm) | ||
329 | { | ||
330 | return (struct dm_bufio_client *) bm; | ||
331 | } | ||
332 | |||
333 | dm_block_t dm_block_location(struct dm_block *b) | 328 | dm_block_t dm_block_location(struct dm_block *b) |
334 | { | 329 | { |
335 | return dm_bufio_get_block_number(to_buffer(b)); | 330 | return dm_bufio_get_block_number(to_buffer(b)); |
@@ -367,34 +362,60 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf) | |||
367 | /*---------------------------------------------------------------- | 362 | /*---------------------------------------------------------------- |
368 | * Public interface | 363 | * Public interface |
369 | *--------------------------------------------------------------*/ | 364 | *--------------------------------------------------------------*/ |
365 | struct dm_block_manager { | ||
366 | struct dm_bufio_client *bufio; | ||
367 | bool read_only:1; | ||
368 | }; | ||
369 | |||
370 | struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, | 370 | struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, |
371 | unsigned block_size, | 371 | unsigned block_size, |
372 | unsigned cache_size, | 372 | unsigned cache_size, |
373 | unsigned max_held_per_thread) | 373 | unsigned max_held_per_thread) |
374 | { | 374 | { |
375 | return (struct dm_block_manager *) | 375 | int r; |
376 | dm_bufio_client_create(bdev, block_size, max_held_per_thread, | 376 | struct dm_block_manager *bm; |
377 | sizeof(struct buffer_aux), | 377 | |
378 | dm_block_manager_alloc_callback, | 378 | bm = kmalloc(sizeof(*bm), GFP_KERNEL); |
379 | dm_block_manager_write_callback); | 379 | if (!bm) { |
380 | r = -ENOMEM; | ||
381 | goto bad; | ||
382 | } | ||
383 | |||
384 | bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, | ||
385 | sizeof(struct buffer_aux), | ||
386 | dm_block_manager_alloc_callback, | ||
387 | dm_block_manager_write_callback); | ||
388 | if (IS_ERR(bm->bufio)) { | ||
389 | r = PTR_ERR(bm->bufio); | ||
390 | kfree(bm); | ||
391 | goto bad; | ||
392 | } | ||
393 | |||
394 | bm->read_only = false; | ||
395 | |||
396 | return bm; | ||
397 | |||
398 | bad: | ||
399 | return ERR_PTR(r); | ||
380 | } | 400 | } |
381 | EXPORT_SYMBOL_GPL(dm_block_manager_create); | 401 | EXPORT_SYMBOL_GPL(dm_block_manager_create); |
382 | 402 | ||
383 | void dm_block_manager_destroy(struct dm_block_manager *bm) | 403 | void dm_block_manager_destroy(struct dm_block_manager *bm) |
384 | { | 404 | { |
385 | return dm_bufio_client_destroy(to_bufio(bm)); | 405 | dm_bufio_client_destroy(bm->bufio); |
406 | kfree(bm); | ||
386 | } | 407 | } |
387 | EXPORT_SYMBOL_GPL(dm_block_manager_destroy); | 408 | EXPORT_SYMBOL_GPL(dm_block_manager_destroy); |
388 | 409 | ||
389 | unsigned dm_bm_block_size(struct dm_block_manager *bm) | 410 | unsigned dm_bm_block_size(struct dm_block_manager *bm) |
390 | { | 411 | { |
391 | return dm_bufio_get_block_size(to_bufio(bm)); | 412 | return dm_bufio_get_block_size(bm->bufio); |
392 | } | 413 | } |
393 | EXPORT_SYMBOL_GPL(dm_bm_block_size); | 414 | EXPORT_SYMBOL_GPL(dm_bm_block_size); |
394 | 415 | ||
395 | dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) | 416 | dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) |
396 | { | 417 | { |
397 | return dm_bufio_get_device_size(to_bufio(bm)); | 418 | return dm_bufio_get_device_size(bm->bufio); |
398 | } | 419 | } |
399 | 420 | ||
400 | static int dm_bm_validate_buffer(struct dm_block_manager *bm, | 421 | static int dm_bm_validate_buffer(struct dm_block_manager *bm, |
@@ -406,7 +427,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, | |||
406 | int r; | 427 | int r; |
407 | if (!v) | 428 | if (!v) |
408 | return 0; | 429 | return 0; |
409 | r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm))); | 430 | r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); |
410 | if (unlikely(r)) | 431 | if (unlikely(r)) |
411 | return r; | 432 | return r; |
412 | aux->validator = v; | 433 | aux->validator = v; |
@@ -430,7 +451,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, | |||
430 | void *p; | 451 | void *p; |
431 | int r; | 452 | int r; |
432 | 453 | ||
433 | p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); | 454 | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); |
434 | if (unlikely(IS_ERR(p))) | 455 | if (unlikely(IS_ERR(p))) |
435 | return PTR_ERR(p); | 456 | return PTR_ERR(p); |
436 | 457 | ||
@@ -463,7 +484,10 @@ int dm_bm_write_lock(struct dm_block_manager *bm, | |||
463 | void *p; | 484 | void *p; |
464 | int r; | 485 | int r; |
465 | 486 | ||
466 | p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); | 487 | if (bm->read_only) |
488 | return -EPERM; | ||
489 | |||
490 | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); | ||
467 | if (unlikely(IS_ERR(p))) | 491 | if (unlikely(IS_ERR(p))) |
468 | return PTR_ERR(p); | 492 | return PTR_ERR(p); |
469 | 493 | ||
@@ -496,7 +520,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, | |||
496 | void *p; | 520 | void *p; |
497 | int r; | 521 | int r; |
498 | 522 | ||
499 | p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result); | 523 | p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); |
500 | if (unlikely(IS_ERR(p))) | 524 | if (unlikely(IS_ERR(p))) |
501 | return PTR_ERR(p); | 525 | return PTR_ERR(p); |
502 | if (unlikely(!p)) | 526 | if (unlikely(!p)) |
@@ -529,7 +553,10 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, | |||
529 | struct buffer_aux *aux; | 553 | struct buffer_aux *aux; |
530 | void *p; | 554 | void *p; |
531 | 555 | ||
532 | p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result); | 556 | if (bm->read_only) |
557 | return -EPERM; | ||
558 | |||
559 | p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); | ||
533 | if (unlikely(IS_ERR(p))) | 560 | if (unlikely(IS_ERR(p))) |
534 | return PTR_ERR(p); | 561 | return PTR_ERR(p); |
535 | 562 | ||
@@ -547,6 +574,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, | |||
547 | 574 | ||
548 | return 0; | 575 | return 0; |
549 | } | 576 | } |
577 | EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); | ||
550 | 578 | ||
551 | int dm_bm_unlock(struct dm_block *b) | 579 | int dm_bm_unlock(struct dm_block *b) |
552 | { | 580 | { |
@@ -565,45 +593,30 @@ int dm_bm_unlock(struct dm_block *b) | |||
565 | } | 593 | } |
566 | EXPORT_SYMBOL_GPL(dm_bm_unlock); | 594 | EXPORT_SYMBOL_GPL(dm_bm_unlock); |
567 | 595 | ||
568 | int dm_bm_unlock_move(struct dm_block *b, dm_block_t n) | ||
569 | { | ||
570 | struct buffer_aux *aux; | ||
571 | |||
572 | aux = dm_bufio_get_aux_data(to_buffer(b)); | ||
573 | |||
574 | if (aux->write_locked) { | ||
575 | dm_bufio_mark_buffer_dirty(to_buffer(b)); | ||
576 | bl_up_write(&aux->lock); | ||
577 | } else | ||
578 | bl_up_read(&aux->lock); | ||
579 | |||
580 | dm_bufio_release_move(to_buffer(b), n); | ||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, | 596 | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, |
585 | struct dm_block *superblock) | 597 | struct dm_block *superblock) |
586 | { | 598 | { |
587 | int r; | 599 | int r; |
588 | 600 | ||
589 | r = dm_bufio_write_dirty_buffers(to_bufio(bm)); | 601 | if (bm->read_only) |
590 | if (unlikely(r)) | 602 | return -EPERM; |
591 | return r; | 603 | |
592 | r = dm_bufio_issue_flush(to_bufio(bm)); | 604 | r = dm_bufio_write_dirty_buffers(bm->bufio); |
593 | if (unlikely(r)) | 605 | if (unlikely(r)) { |
606 | dm_bm_unlock(superblock); | ||
594 | return r; | 607 | return r; |
608 | } | ||
595 | 609 | ||
596 | dm_bm_unlock(superblock); | 610 | dm_bm_unlock(superblock); |
597 | 611 | ||
598 | r = dm_bufio_write_dirty_buffers(to_bufio(bm)); | 612 | return dm_bufio_write_dirty_buffers(bm->bufio); |
599 | if (unlikely(r)) | 613 | } |
600 | return r; | ||
601 | r = dm_bufio_issue_flush(to_bufio(bm)); | ||
602 | if (unlikely(r)) | ||
603 | return r; | ||
604 | 614 | ||
605 | return 0; | 615 | void dm_bm_set_read_only(struct dm_block_manager *bm) |
616 | { | ||
617 | bm->read_only = true; | ||
606 | } | 618 | } |
619 | EXPORT_SYMBOL_GPL(dm_bm_set_read_only); | ||
607 | 620 | ||
608 | u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) | 621 | u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) |
609 | { | 622 | { |
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 924833d2dfa6..be5bff61be28 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h | |||
@@ -97,14 +97,6 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b, | |||
97 | int dm_bm_unlock(struct dm_block *b); | 97 | int dm_bm_unlock(struct dm_block *b); |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * An optimisation; we often want to copy a block's contents to a new | ||
101 | * block. eg, as part of the shadowing operation. It's far better for | ||
102 | * bufio to do this move behind the scenes than hold 2 locks and memcpy the | ||
103 | * data. | ||
104 | */ | ||
105 | int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); | ||
106 | |||
107 | /* | ||
108 | * It's a common idiom to have a superblock that should be committed last. | 100 | * It's a common idiom to have a superblock that should be committed last. |
109 | * | 101 | * |
110 | * @superblock should be write-locked on entry. It will be unlocked during | 102 | * @superblock should be write-locked on entry. It will be unlocked during |
@@ -116,6 +108,19 @@ int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); | |||
116 | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, | 108 | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, |
117 | struct dm_block *superblock); | 109 | struct dm_block *superblock); |
118 | 110 | ||
111 | /* | ||
112 | * Switches the bm to a read only mode. Once read-only mode | ||
113 | * has been entered the following functions will return -EPERM. | ||
114 | * | ||
115 | * dm_bm_write_lock | ||
116 | * dm_bm_write_lock_zero | ||
117 | * dm_bm_flush_and_unlock | ||
118 | * | ||
119 | * Additionally you should not use dm_bm_unlock_move, however no error will | ||
120 | * be returned if you do. | ||
121 | */ | ||
122 | void dm_bm_set_read_only(struct dm_block_manager *bm); | ||
123 | |||
119 | u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); | 124 | u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); |
120 | 125 | ||
121 | /*----------------------------------------------------------------*/ | 126 | /*----------------------------------------------------------------*/ |
diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c deleted file mode 100644 index fc90c11620ad..000000000000 --- a/drivers/md/persistent-data/dm-space-map-checker.c +++ /dev/null | |||
@@ -1,446 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include "dm-space-map-checker.h" | ||
8 | |||
9 | #include <linux/device-mapper.h> | ||
10 | #include <linux/export.h> | ||
11 | #include <linux/vmalloc.h> | ||
12 | |||
13 | #ifdef CONFIG_DM_DEBUG_SPACE_MAPS | ||
14 | |||
15 | #define DM_MSG_PREFIX "space map checker" | ||
16 | |||
17 | /*----------------------------------------------------------------*/ | ||
18 | |||
19 | struct count_array { | ||
20 | dm_block_t nr; | ||
21 | dm_block_t nr_free; | ||
22 | |||
23 | uint32_t *counts; | ||
24 | }; | ||
25 | |||
26 | static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count) | ||
27 | { | ||
28 | if (b >= ca->nr) | ||
29 | return -EINVAL; | ||
30 | |||
31 | *count = ca->counts[b]; | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r) | ||
36 | { | ||
37 | if (b >= ca->nr) | ||
38 | return -EINVAL; | ||
39 | |||
40 | *r = ca->counts[b] > 1; | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count) | ||
45 | { | ||
46 | uint32_t old_count; | ||
47 | |||
48 | if (b >= ca->nr) | ||
49 | return -EINVAL; | ||
50 | |||
51 | old_count = ca->counts[b]; | ||
52 | |||
53 | if (!count && old_count) | ||
54 | ca->nr_free++; | ||
55 | |||
56 | else if (count && !old_count) | ||
57 | ca->nr_free--; | ||
58 | |||
59 | ca->counts[b] = count; | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static int ca_inc_block(struct count_array *ca, dm_block_t b) | ||
64 | { | ||
65 | if (b >= ca->nr) | ||
66 | return -EINVAL; | ||
67 | |||
68 | ca_set_count(ca, b, ca->counts[b] + 1); | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static int ca_dec_block(struct count_array *ca, dm_block_t b) | ||
73 | { | ||
74 | if (b >= ca->nr) | ||
75 | return -EINVAL; | ||
76 | |||
77 | BUG_ON(ca->counts[b] == 0); | ||
78 | ca_set_count(ca, b, ca->counts[b] - 1); | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static int ca_create(struct count_array *ca, struct dm_space_map *sm) | ||
83 | { | ||
84 | int r; | ||
85 | dm_block_t nr_blocks; | ||
86 | |||
87 | r = dm_sm_get_nr_blocks(sm, &nr_blocks); | ||
88 | if (r) | ||
89 | return r; | ||
90 | |||
91 | ca->nr = nr_blocks; | ||
92 | ca->nr_free = nr_blocks; | ||
93 | |||
94 | if (!nr_blocks) | ||
95 | ca->counts = NULL; | ||
96 | else { | ||
97 | ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks); | ||
98 | if (!ca->counts) | ||
99 | return -ENOMEM; | ||
100 | } | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | static void ca_destroy(struct count_array *ca) | ||
106 | { | ||
107 | vfree(ca->counts); | ||
108 | } | ||
109 | |||
110 | static int ca_load(struct count_array *ca, struct dm_space_map *sm) | ||
111 | { | ||
112 | int r; | ||
113 | uint32_t count; | ||
114 | dm_block_t nr_blocks, i; | ||
115 | |||
116 | r = dm_sm_get_nr_blocks(sm, &nr_blocks); | ||
117 | if (r) | ||
118 | return r; | ||
119 | |||
120 | BUG_ON(ca->nr != nr_blocks); | ||
121 | |||
122 | DMWARN("Loading debug space map from disk. This may take some time"); | ||
123 | for (i = 0; i < nr_blocks; i++) { | ||
124 | r = dm_sm_get_count(sm, i, &count); | ||
125 | if (r) { | ||
126 | DMERR("load failed"); | ||
127 | return r; | ||
128 | } | ||
129 | |||
130 | ca_set_count(ca, i, count); | ||
131 | } | ||
132 | DMWARN("Load complete"); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static int ca_extend(struct count_array *ca, dm_block_t extra_blocks) | ||
138 | { | ||
139 | dm_block_t nr_blocks = ca->nr + extra_blocks; | ||
140 | uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks); | ||
141 | if (!counts) | ||
142 | return -ENOMEM; | ||
143 | |||
144 | if (ca->counts) { | ||
145 | memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); | ||
146 | ca_destroy(ca); | ||
147 | } | ||
148 | ca->nr = nr_blocks; | ||
149 | ca->nr_free += extra_blocks; | ||
150 | ca->counts = counts; | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | static int ca_commit(struct count_array *old, struct count_array *new) | ||
155 | { | ||
156 | if (old->nr != new->nr) { | ||
157 | BUG_ON(old->nr > new->nr); | ||
158 | ca_extend(old, new->nr - old->nr); | ||
159 | } | ||
160 | |||
161 | BUG_ON(old->nr != new->nr); | ||
162 | old->nr_free = new->nr_free; | ||
163 | memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr); | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | /*----------------------------------------------------------------*/ | ||
168 | |||
169 | struct sm_checker { | ||
170 | struct dm_space_map sm; | ||
171 | |||
172 | struct count_array old_counts; | ||
173 | struct count_array counts; | ||
174 | |||
175 | struct dm_space_map *real_sm; | ||
176 | }; | ||
177 | |||
178 | static void sm_checker_destroy(struct dm_space_map *sm) | ||
179 | { | ||
180 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
181 | |||
182 | dm_sm_destroy(smc->real_sm); | ||
183 | ca_destroy(&smc->old_counts); | ||
184 | ca_destroy(&smc->counts); | ||
185 | kfree(smc); | ||
186 | } | ||
187 | |||
188 | static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) | ||
189 | { | ||
190 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
191 | int r = dm_sm_get_nr_blocks(smc->real_sm, count); | ||
192 | if (!r) | ||
193 | BUG_ON(smc->old_counts.nr != *count); | ||
194 | return r; | ||
195 | } | ||
196 | |||
197 | static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count) | ||
198 | { | ||
199 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
200 | int r = dm_sm_get_nr_free(smc->real_sm, count); | ||
201 | if (!r) { | ||
202 | /* | ||
203 | * Slow, but we know it's correct. | ||
204 | */ | ||
205 | dm_block_t b, n = 0; | ||
206 | for (b = 0; b < smc->old_counts.nr; b++) | ||
207 | if (smc->old_counts.counts[b] == 0 && | ||
208 | smc->counts.counts[b] == 0) | ||
209 | n++; | ||
210 | |||
211 | if (n != *count) | ||
212 | DMERR("free block counts differ, checker %u, sm-disk:%u", | ||
213 | (unsigned) n, (unsigned) *count); | ||
214 | } | ||
215 | return r; | ||
216 | } | ||
217 | |||
218 | static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b) | ||
219 | { | ||
220 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
221 | int r = dm_sm_new_block(smc->real_sm, b); | ||
222 | |||
223 | if (!r) { | ||
224 | BUG_ON(*b >= smc->old_counts.nr); | ||
225 | BUG_ON(smc->old_counts.counts[*b] != 0); | ||
226 | BUG_ON(*b >= smc->counts.nr); | ||
227 | BUG_ON(smc->counts.counts[*b] != 0); | ||
228 | ca_set_count(&smc->counts, *b, 1); | ||
229 | } | ||
230 | |||
231 | return r; | ||
232 | } | ||
233 | |||
234 | static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b) | ||
235 | { | ||
236 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
237 | int r = dm_sm_inc_block(smc->real_sm, b); | ||
238 | int r2 = ca_inc_block(&smc->counts, b); | ||
239 | BUG_ON(r != r2); | ||
240 | return r; | ||
241 | } | ||
242 | |||
243 | static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b) | ||
244 | { | ||
245 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
246 | int r = dm_sm_dec_block(smc->real_sm, b); | ||
247 | int r2 = ca_dec_block(&smc->counts, b); | ||
248 | BUG_ON(r != r2); | ||
249 | return r; | ||
250 | } | ||
251 | |||
252 | static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) | ||
253 | { | ||
254 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
255 | uint32_t result2 = 0; | ||
256 | int r = dm_sm_get_count(smc->real_sm, b, result); | ||
257 | int r2 = ca_get_count(&smc->counts, b, &result2); | ||
258 | |||
259 | BUG_ON(r != r2); | ||
260 | if (!r) | ||
261 | BUG_ON(*result != result2); | ||
262 | return r; | ||
263 | } | ||
264 | |||
265 | static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) | ||
266 | { | ||
267 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
268 | int result2 = 0; | ||
269 | int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result); | ||
270 | int r2 = ca_count_more_than_one(&smc->counts, b, &result2); | ||
271 | |||
272 | BUG_ON(r != r2); | ||
273 | if (!r) | ||
274 | BUG_ON(!(*result) && result2); | ||
275 | return r; | ||
276 | } | ||
277 | |||
278 | static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) | ||
279 | { | ||
280 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
281 | uint32_t old_rc; | ||
282 | int r = dm_sm_set_count(smc->real_sm, b, count); | ||
283 | int r2; | ||
284 | |||
285 | BUG_ON(b >= smc->counts.nr); | ||
286 | old_rc = smc->counts.counts[b]; | ||
287 | r2 = ca_set_count(&smc->counts, b, count); | ||
288 | BUG_ON(r != r2); | ||
289 | |||
290 | return r; | ||
291 | } | ||
292 | |||
293 | static int sm_checker_commit(struct dm_space_map *sm) | ||
294 | { | ||
295 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
296 | int r; | ||
297 | |||
298 | r = dm_sm_commit(smc->real_sm); | ||
299 | if (r) | ||
300 | return r; | ||
301 | |||
302 | r = ca_commit(&smc->old_counts, &smc->counts); | ||
303 | if (r) | ||
304 | return r; | ||
305 | |||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks) | ||
310 | { | ||
311 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
312 | int r = dm_sm_extend(smc->real_sm, extra_blocks); | ||
313 | if (r) | ||
314 | return r; | ||
315 | |||
316 | return ca_extend(&smc->counts, extra_blocks); | ||
317 | } | ||
318 | |||
319 | static int sm_checker_root_size(struct dm_space_map *sm, size_t *result) | ||
320 | { | ||
321 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
322 | return dm_sm_root_size(smc->real_sm, result); | ||
323 | } | ||
324 | |||
325 | static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len) | ||
326 | { | ||
327 | struct sm_checker *smc = container_of(sm, struct sm_checker, sm); | ||
328 | return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len); | ||
329 | } | ||
330 | |||
331 | /*----------------------------------------------------------------*/ | ||
332 | |||
333 | static struct dm_space_map ops_ = { | ||
334 | .destroy = sm_checker_destroy, | ||
335 | .get_nr_blocks = sm_checker_get_nr_blocks, | ||
336 | .get_nr_free = sm_checker_get_nr_free, | ||
337 | .inc_block = sm_checker_inc_block, | ||
338 | .dec_block = sm_checker_dec_block, | ||
339 | .new_block = sm_checker_new_block, | ||
340 | .get_count = sm_checker_get_count, | ||
341 | .count_is_more_than_one = sm_checker_count_more_than_one, | ||
342 | .set_count = sm_checker_set_count, | ||
343 | .commit = sm_checker_commit, | ||
344 | .extend = sm_checker_extend, | ||
345 | .root_size = sm_checker_root_size, | ||
346 | .copy_root = sm_checker_copy_root | ||
347 | }; | ||
348 | |||
349 | struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) | ||
350 | { | ||
351 | int r; | ||
352 | struct sm_checker *smc; | ||
353 | |||
354 | if (IS_ERR_OR_NULL(sm)) | ||
355 | return ERR_PTR(-EINVAL); | ||
356 | |||
357 | smc = kmalloc(sizeof(*smc), GFP_KERNEL); | ||
358 | if (!smc) | ||
359 | return ERR_PTR(-ENOMEM); | ||
360 | |||
361 | memcpy(&smc->sm, &ops_, sizeof(smc->sm)); | ||
362 | r = ca_create(&smc->old_counts, sm); | ||
363 | if (r) { | ||
364 | kfree(smc); | ||
365 | return ERR_PTR(r); | ||
366 | } | ||
367 | |||
368 | r = ca_create(&smc->counts, sm); | ||
369 | if (r) { | ||
370 | ca_destroy(&smc->old_counts); | ||
371 | kfree(smc); | ||
372 | return ERR_PTR(r); | ||
373 | } | ||
374 | |||
375 | smc->real_sm = sm; | ||
376 | |||
377 | r = ca_load(&smc->counts, sm); | ||
378 | if (r) { | ||
379 | ca_destroy(&smc->counts); | ||
380 | ca_destroy(&smc->old_counts); | ||
381 | kfree(smc); | ||
382 | return ERR_PTR(r); | ||
383 | } | ||
384 | |||
385 | r = ca_commit(&smc->old_counts, &smc->counts); | ||
386 | if (r) { | ||
387 | ca_destroy(&smc->counts); | ||
388 | ca_destroy(&smc->old_counts); | ||
389 | kfree(smc); | ||
390 | return ERR_PTR(r); | ||
391 | } | ||
392 | |||
393 | return &smc->sm; | ||
394 | } | ||
395 | EXPORT_SYMBOL_GPL(dm_sm_checker_create); | ||
396 | |||
397 | struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) | ||
398 | { | ||
399 | int r; | ||
400 | struct sm_checker *smc; | ||
401 | |||
402 | if (IS_ERR_OR_NULL(sm)) | ||
403 | return ERR_PTR(-EINVAL); | ||
404 | |||
405 | smc = kmalloc(sizeof(*smc), GFP_KERNEL); | ||
406 | if (!smc) | ||
407 | return ERR_PTR(-ENOMEM); | ||
408 | |||
409 | memcpy(&smc->sm, &ops_, sizeof(smc->sm)); | ||
410 | r = ca_create(&smc->old_counts, sm); | ||
411 | if (r) { | ||
412 | kfree(smc); | ||
413 | return ERR_PTR(r); | ||
414 | } | ||
415 | |||
416 | r = ca_create(&smc->counts, sm); | ||
417 | if (r) { | ||
418 | ca_destroy(&smc->old_counts); | ||
419 | kfree(smc); | ||
420 | return ERR_PTR(r); | ||
421 | } | ||
422 | |||
423 | smc->real_sm = sm; | ||
424 | return &smc->sm; | ||
425 | } | ||
426 | EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); | ||
427 | |||
428 | /*----------------------------------------------------------------*/ | ||
429 | |||
430 | #else | ||
431 | |||
432 | struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) | ||
433 | { | ||
434 | return sm; | ||
435 | } | ||
436 | EXPORT_SYMBOL_GPL(dm_sm_checker_create); | ||
437 | |||
438 | struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) | ||
439 | { | ||
440 | return sm; | ||
441 | } | ||
442 | EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); | ||
443 | |||
444 | /*----------------------------------------------------------------*/ | ||
445 | |||
446 | #endif | ||
diff --git a/drivers/md/persistent-data/dm-space-map-checker.h b/drivers/md/persistent-data/dm-space-map-checker.h deleted file mode 100644 index 444dccf6688c..000000000000 --- a/drivers/md/persistent-data/dm-space-map-checker.h +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 Red Hat, Inc. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #ifndef SNAPSHOTS_SPACE_MAP_CHECKER_H | ||
8 | #define SNAPSHOTS_SPACE_MAP_CHECKER_H | ||
9 | |||
10 | #include "dm-space-map.h" | ||
11 | |||
12 | /*----------------------------------------------------------------*/ | ||
13 | |||
14 | /* | ||
15 | * This space map wraps a real on-disk space map, and verifies all of its | ||
16 | * operations. It uses a lot of memory, so only use if you have a specific | ||
17 | * problem that you're debugging. | ||
18 | * | ||
19 | * Ownership of @sm passes. | ||
20 | */ | ||
21 | struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm); | ||
22 | struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm); | ||
23 | |||
24 | /*----------------------------------------------------------------*/ | ||
25 | |||
26 | #endif | ||
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index ff3beed6ad2d..d77602d63c83 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c | |||
@@ -224,6 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) | |||
224 | ll->nr_blocks = 0; | 224 | ll->nr_blocks = 0; |
225 | ll->bitmap_root = 0; | 225 | ll->bitmap_root = 0; |
226 | ll->ref_count_root = 0; | 226 | ll->ref_count_root = 0; |
227 | ll->bitmap_index_changed = false; | ||
227 | 228 | ||
228 | return 0; | 229 | return 0; |
229 | } | 230 | } |
@@ -476,7 +477,15 @@ int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) | |||
476 | 477 | ||
477 | int sm_ll_commit(struct ll_disk *ll) | 478 | int sm_ll_commit(struct ll_disk *ll) |
478 | { | 479 | { |
479 | return ll->commit(ll); | 480 | int r = 0; |
481 | |||
482 | if (ll->bitmap_index_changed) { | ||
483 | r = ll->commit(ll); | ||
484 | if (!r) | ||
485 | ll->bitmap_index_changed = false; | ||
486 | } | ||
487 | |||
488 | return r; | ||
480 | } | 489 | } |
481 | 490 | ||
482 | /*----------------------------------------------------------------*/ | 491 | /*----------------------------------------------------------------*/ |
@@ -491,6 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, | |||
491 | static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, | 500 | static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, |
492 | struct disk_index_entry *ie) | 501 | struct disk_index_entry *ie) |
493 | { | 502 | { |
503 | ll->bitmap_index_changed = true; | ||
494 | memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); | 504 | memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); |
495 | return 0; | 505 | return 0; |
496 | } | 506 | } |
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 8f220821a9a9..b3078d5eda0c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h | |||
@@ -78,6 +78,7 @@ struct ll_disk { | |||
78 | open_index_fn open_index; | 78 | open_index_fn open_index; |
79 | max_index_entries_fn max_entries; | 79 | max_index_entries_fn max_entries; |
80 | commit_fn commit; | 80 | commit_fn commit; |
81 | bool bitmap_index_changed:1; | ||
81 | }; | 82 | }; |
82 | 83 | ||
83 | struct disk_sm_root { | 84 | struct disk_sm_root { |
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 3d0ed5332883..f6d29e614ab7 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c | |||
@@ -4,7 +4,6 @@ | |||
4 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include "dm-space-map-checker.h" | ||
8 | #include "dm-space-map-common.h" | 7 | #include "dm-space-map-common.h" |
9 | #include "dm-space-map-disk.h" | 8 | #include "dm-space-map-disk.h" |
10 | #include "dm-space-map.h" | 9 | #include "dm-space-map.h" |
@@ -252,9 +251,8 @@ static struct dm_space_map ops = { | |||
252 | .copy_root = sm_disk_copy_root | 251 | .copy_root = sm_disk_copy_root |
253 | }; | 252 | }; |
254 | 253 | ||
255 | static struct dm_space_map *dm_sm_disk_create_real( | 254 | struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, |
256 | struct dm_transaction_manager *tm, | 255 | dm_block_t nr_blocks) |
257 | dm_block_t nr_blocks) | ||
258 | { | 256 | { |
259 | int r; | 257 | int r; |
260 | struct sm_disk *smd; | 258 | struct sm_disk *smd; |
@@ -285,27 +283,10 @@ bad: | |||
285 | kfree(smd); | 283 | kfree(smd); |
286 | return ERR_PTR(r); | 284 | return ERR_PTR(r); |
287 | } | 285 | } |
288 | |||
289 | struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, | ||
290 | dm_block_t nr_blocks) | ||
291 | { | ||
292 | struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks); | ||
293 | struct dm_space_map *smc; | ||
294 | |||
295 | if (IS_ERR_OR_NULL(sm)) | ||
296 | return sm; | ||
297 | |||
298 | smc = dm_sm_checker_create_fresh(sm); | ||
299 | if (IS_ERR(smc)) | ||
300 | dm_sm_destroy(sm); | ||
301 | |||
302 | return smc; | ||
303 | } | ||
304 | EXPORT_SYMBOL_GPL(dm_sm_disk_create); | 286 | EXPORT_SYMBOL_GPL(dm_sm_disk_create); |
305 | 287 | ||
306 | static struct dm_space_map *dm_sm_disk_open_real( | 288 | struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, |
307 | struct dm_transaction_manager *tm, | 289 | void *root_le, size_t len) |
308 | void *root_le, size_t len) | ||
309 | { | 290 | { |
310 | int r; | 291 | int r; |
311 | struct sm_disk *smd; | 292 | struct sm_disk *smd; |
@@ -332,13 +313,6 @@ bad: | |||
332 | kfree(smd); | 313 | kfree(smd); |
333 | return ERR_PTR(r); | 314 | return ERR_PTR(r); |
334 | } | 315 | } |
335 | |||
336 | struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, | ||
337 | void *root_le, size_t len) | ||
338 | { | ||
339 | return dm_sm_checker_create( | ||
340 | dm_sm_disk_open_real(tm, root_le, len)); | ||
341 | } | ||
342 | EXPORT_SYMBOL_GPL(dm_sm_disk_open); | 316 | EXPORT_SYMBOL_GPL(dm_sm_disk_open); |
343 | 317 | ||
344 | /*----------------------------------------------------------------*/ | 318 | /*----------------------------------------------------------------*/ |
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index e5604b32d91f..d247a35da3c6 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c | |||
@@ -5,7 +5,6 @@ | |||
5 | */ | 5 | */ |
6 | #include "dm-transaction-manager.h" | 6 | #include "dm-transaction-manager.h" |
7 | #include "dm-space-map.h" | 7 | #include "dm-space-map.h" |
8 | #include "dm-space-map-checker.h" | ||
9 | #include "dm-space-map-disk.h" | 8 | #include "dm-space-map-disk.h" |
10 | #include "dm-space-map-metadata.h" | 9 | #include "dm-space-map-metadata.h" |
11 | #include "dm-persistent-data-internal.h" | 10 | #include "dm-persistent-data-internal.h" |
@@ -220,13 +219,24 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, | |||
220 | if (r < 0) | 219 | if (r < 0) |
221 | return r; | 220 | return r; |
222 | 221 | ||
223 | r = dm_bm_unlock_move(orig_block, new); | 222 | /* |
224 | if (r < 0) { | 223 | * It would be tempting to use dm_bm_unlock_move here, but some |
224 | * code, such as the space maps, keeps using the old data structures | ||
225 | * secure in the knowledge they won't be changed until the next | ||
226 | * transaction. Using unlock_move would force a synchronous read | ||
227 | * since the old block would no longer be in the cache. | ||
228 | */ | ||
229 | r = dm_bm_write_lock_zero(tm->bm, new, v, result); | ||
230 | if (r) { | ||
225 | dm_bm_unlock(orig_block); | 231 | dm_bm_unlock(orig_block); |
226 | return r; | 232 | return r; |
227 | } | 233 | } |
228 | 234 | ||
229 | return dm_bm_write_lock(tm->bm, new, v, result); | 235 | memcpy(dm_block_data(*result), dm_block_data(orig_block), |
236 | dm_bm_block_size(tm->bm)); | ||
237 | |||
238 | dm_bm_unlock(orig_block); | ||
239 | return r; | ||
230 | } | 240 | } |
231 | 241 | ||
232 | int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, | 242 | int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, |
@@ -311,98 +321,61 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm) | |||
311 | 321 | ||
312 | static int dm_tm_create_internal(struct dm_block_manager *bm, | 322 | static int dm_tm_create_internal(struct dm_block_manager *bm, |
313 | dm_block_t sb_location, | 323 | dm_block_t sb_location, |
314 | struct dm_block_validator *sb_validator, | ||
315 | size_t root_offset, size_t root_max_len, | ||
316 | struct dm_transaction_manager **tm, | 324 | struct dm_transaction_manager **tm, |
317 | struct dm_space_map **sm, | 325 | struct dm_space_map **sm, |
318 | struct dm_block **sblock, | 326 | int create, |
319 | int create) | 327 | void *sm_root, size_t sm_len) |
320 | { | 328 | { |
321 | int r; | 329 | int r; |
322 | struct dm_space_map *inner; | ||
323 | 330 | ||
324 | inner = dm_sm_metadata_init(); | 331 | *sm = dm_sm_metadata_init(); |
325 | if (IS_ERR(inner)) | 332 | if (IS_ERR(*sm)) |
326 | return PTR_ERR(inner); | 333 | return PTR_ERR(*sm); |
327 | 334 | ||
328 | *tm = dm_tm_create(bm, inner); | 335 | *tm = dm_tm_create(bm, *sm); |
329 | if (IS_ERR(*tm)) { | 336 | if (IS_ERR(*tm)) { |
330 | dm_sm_destroy(inner); | 337 | dm_sm_destroy(*sm); |
331 | return PTR_ERR(*tm); | 338 | return PTR_ERR(*tm); |
332 | } | 339 | } |
333 | 340 | ||
334 | if (create) { | 341 | if (create) { |
335 | r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location, | 342 | r = dm_sm_metadata_create(*sm, *tm, dm_bm_nr_blocks(bm), |
336 | sb_validator, sblock); | ||
337 | if (r < 0) { | ||
338 | DMERR("couldn't lock superblock"); | ||
339 | goto bad1; | ||
340 | } | ||
341 | |||
342 | r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm), | ||
343 | sb_location); | 343 | sb_location); |
344 | if (r) { | 344 | if (r) { |
345 | DMERR("couldn't create metadata space map"); | 345 | DMERR("couldn't create metadata space map"); |
346 | goto bad2; | 346 | goto bad; |
347 | } | ||
348 | |||
349 | *sm = dm_sm_checker_create(inner); | ||
350 | if (IS_ERR(*sm)) { | ||
351 | r = PTR_ERR(*sm); | ||
352 | goto bad2; | ||
353 | } | 347 | } |
354 | 348 | ||
355 | } else { | 349 | } else { |
356 | r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, | 350 | r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len); |
357 | sb_validator, sblock); | ||
358 | if (r < 0) { | ||
359 | DMERR("couldn't lock superblock"); | ||
360 | goto bad1; | ||
361 | } | ||
362 | |||
363 | r = dm_sm_metadata_open(inner, *tm, | ||
364 | dm_block_data(*sblock) + root_offset, | ||
365 | root_max_len); | ||
366 | if (r) { | 351 | if (r) { |
367 | DMERR("couldn't open metadata space map"); | 352 | DMERR("couldn't open metadata space map"); |
368 | goto bad2; | 353 | goto bad; |
369 | } | ||
370 | |||
371 | *sm = dm_sm_checker_create(inner); | ||
372 | if (IS_ERR(*sm)) { | ||
373 | r = PTR_ERR(*sm); | ||
374 | goto bad2; | ||
375 | } | 354 | } |
376 | } | 355 | } |
377 | 356 | ||
378 | return 0; | 357 | return 0; |
379 | 358 | ||
380 | bad2: | 359 | bad: |
381 | dm_tm_unlock(*tm, *sblock); | ||
382 | bad1: | ||
383 | dm_tm_destroy(*tm); | 360 | dm_tm_destroy(*tm); |
384 | dm_sm_destroy(inner); | 361 | dm_sm_destroy(*sm); |
385 | return r; | 362 | return r; |
386 | } | 363 | } |
387 | 364 | ||
388 | int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, | 365 | int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, |
389 | struct dm_block_validator *sb_validator, | ||
390 | struct dm_transaction_manager **tm, | 366 | struct dm_transaction_manager **tm, |
391 | struct dm_space_map **sm, struct dm_block **sblock) | 367 | struct dm_space_map **sm) |
392 | { | 368 | { |
393 | return dm_tm_create_internal(bm, sb_location, sb_validator, | 369 | return dm_tm_create_internal(bm, sb_location, tm, sm, 1, NULL, 0); |
394 | 0, 0, tm, sm, sblock, 1); | ||
395 | } | 370 | } |
396 | EXPORT_SYMBOL_GPL(dm_tm_create_with_sm); | 371 | EXPORT_SYMBOL_GPL(dm_tm_create_with_sm); |
397 | 372 | ||
398 | int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, | 373 | int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, |
399 | struct dm_block_validator *sb_validator, | 374 | void *sm_root, size_t root_len, |
400 | size_t root_offset, size_t root_max_len, | ||
401 | struct dm_transaction_manager **tm, | 375 | struct dm_transaction_manager **tm, |
402 | struct dm_space_map **sm, struct dm_block **sblock) | 376 | struct dm_space_map **sm) |
403 | { | 377 | { |
404 | return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset, | 378 | return dm_tm_create_internal(bm, sb_location, tm, sm, 0, sm_root, root_len); |
405 | root_max_len, tm, sm, sblock, 0); | ||
406 | } | 379 | } |
407 | EXPORT_SYMBOL_GPL(dm_tm_open_with_sm); | 380 | EXPORT_SYMBOL_GPL(dm_tm_open_with_sm); |
408 | 381 | ||
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 6da784871db4..b5b139076ca5 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h | |||
@@ -115,16 +115,17 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm); | |||
115 | * | 115 | * |
116 | * Returns a tm that has an open transaction to write the new disk sm. | 116 | * Returns a tm that has an open transaction to write the new disk sm. |
117 | * Caller should store the new sm root and commit. | 117 | * Caller should store the new sm root and commit. |
118 | * | ||
119 | * The superblock location is passed so the metadata space map knows it | ||
120 | * shouldn't be used. | ||
118 | */ | 121 | */ |
119 | int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, | 122 | int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, |
120 | struct dm_block_validator *sb_validator, | ||
121 | struct dm_transaction_manager **tm, | 123 | struct dm_transaction_manager **tm, |
122 | struct dm_space_map **sm, struct dm_block **sblock); | 124 | struct dm_space_map **sm); |
123 | 125 | ||
124 | int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, | 126 | int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, |
125 | struct dm_block_validator *sb_validator, | 127 | void *sm_root, size_t root_len, |
126 | size_t root_offset, size_t root_max_len, | ||
127 | struct dm_transaction_manager **tm, | 128 | struct dm_transaction_manager **tm, |
128 | struct dm_space_map **sm, struct dm_block **sblock); | 129 | struct dm_space_map **sm); |
129 | 130 | ||
130 | #endif /* _LINUX_DM_TRANSACTION_MANAGER_H */ | 131 | #endif /* _LINUX_DM_TRANSACTION_MANAGER_H */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9f01870d031c..611b5f797618 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -46,6 +46,20 @@ | |||
46 | */ | 46 | */ |
47 | #define NR_RAID1_BIOS 256 | 47 | #define NR_RAID1_BIOS 256 |
48 | 48 | ||
49 | /* when we get a read error on a read-only array, we redirect to another | ||
50 | * device without failing the first device, or trying to over-write to | ||
51 | * correct the read error. To keep track of bad blocks on a per-bio | ||
52 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
53 | */ | ||
54 | #define IO_BLOCKED ((struct bio *)1) | ||
55 | /* When we successfully write to a known bad-block, we need to remove the | ||
56 | * bad-block marking which must be done from process context. So we record | ||
57 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
58 | */ | ||
59 | #define IO_MADE_GOOD ((struct bio *)2) | ||
60 | |||
61 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
62 | |||
49 | /* When there are this many requests queue to be written by | 63 | /* When there are this many requests queue to be written by |
50 | * the raid1 thread, we become 'congested' to provide back-pressure | 64 | * the raid1 thread, we become 'congested' to provide back-pressure |
51 | * for writeback. | 65 | * for writeback. |
@@ -483,12 +497,14 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
483 | const sector_t this_sector = r1_bio->sector; | 497 | const sector_t this_sector = r1_bio->sector; |
484 | int sectors; | 498 | int sectors; |
485 | int best_good_sectors; | 499 | int best_good_sectors; |
486 | int start_disk; | 500 | int best_disk, best_dist_disk, best_pending_disk; |
487 | int best_disk; | 501 | int has_nonrot_disk; |
488 | int i; | 502 | int disk; |
489 | sector_t best_dist; | 503 | sector_t best_dist; |
504 | unsigned int min_pending; | ||
490 | struct md_rdev *rdev; | 505 | struct md_rdev *rdev; |
491 | int choose_first; | 506 | int choose_first; |
507 | int choose_next_idle; | ||
492 | 508 | ||
493 | rcu_read_lock(); | 509 | rcu_read_lock(); |
494 | /* | 510 | /* |
@@ -499,26 +515,26 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
499 | retry: | 515 | retry: |
500 | sectors = r1_bio->sectors; | 516 | sectors = r1_bio->sectors; |
501 | best_disk = -1; | 517 | best_disk = -1; |
518 | best_dist_disk = -1; | ||
502 | best_dist = MaxSector; | 519 | best_dist = MaxSector; |
520 | best_pending_disk = -1; | ||
521 | min_pending = UINT_MAX; | ||
503 | best_good_sectors = 0; | 522 | best_good_sectors = 0; |
523 | has_nonrot_disk = 0; | ||
524 | choose_next_idle = 0; | ||
504 | 525 | ||
505 | if (conf->mddev->recovery_cp < MaxSector && | 526 | if (conf->mddev->recovery_cp < MaxSector && |
506 | (this_sector + sectors >= conf->next_resync)) { | 527 | (this_sector + sectors >= conf->next_resync)) |
507 | choose_first = 1; | 528 | choose_first = 1; |
508 | start_disk = 0; | 529 | else |
509 | } else { | ||
510 | choose_first = 0; | 530 | choose_first = 0; |
511 | start_disk = conf->last_used; | ||
512 | } | ||
513 | 531 | ||
514 | for (i = 0 ; i < conf->raid_disks * 2 ; i++) { | 532 | for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { |
515 | sector_t dist; | 533 | sector_t dist; |
516 | sector_t first_bad; | 534 | sector_t first_bad; |
517 | int bad_sectors; | 535 | int bad_sectors; |
518 | 536 | unsigned int pending; | |
519 | int disk = start_disk + i; | 537 | bool nonrot; |
520 | if (disk >= conf->raid_disks * 2) | ||
521 | disk -= conf->raid_disks * 2; | ||
522 | 538 | ||
523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 539 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
524 | if (r1_bio->bios[disk] == IO_BLOCKED | 540 | if (r1_bio->bios[disk] == IO_BLOCKED |
@@ -577,22 +593,77 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
577 | } else | 593 | } else |
578 | best_good_sectors = sectors; | 594 | best_good_sectors = sectors; |
579 | 595 | ||
596 | nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); | ||
597 | has_nonrot_disk |= nonrot; | ||
598 | pending = atomic_read(&rdev->nr_pending); | ||
580 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 599 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
581 | if (choose_first | 600 | if (choose_first) { |
582 | /* Don't change to another disk for sequential reads */ | 601 | best_disk = disk; |
583 | || conf->next_seq_sect == this_sector | 602 | break; |
584 | || dist == 0 | 603 | } |
585 | /* If device is idle, use it */ | 604 | /* Don't change to another disk for sequential reads */ |
586 | || atomic_read(&rdev->nr_pending) == 0) { | 605 | if (conf->mirrors[disk].next_seq_sect == this_sector |
606 | || dist == 0) { | ||
607 | int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; | ||
608 | struct raid1_info *mirror = &conf->mirrors[disk]; | ||
609 | |||
610 | best_disk = disk; | ||
611 | /* | ||
612 | * If buffered sequential IO size exceeds optimal | ||
613 | * iosize, check if there is idle disk. If yes, choose | ||
614 | * the idle disk. read_balance could already choose an | ||
615 | * idle disk before noticing it's a sequential IO in | ||
616 | * this disk. This doesn't matter because this disk | ||
617 | * will idle, next time it will be utilized after the | ||
618 | * first disk has IO size exceeds optimal iosize. In | ||
619 | * this way, iosize of the first disk will be optimal | ||
620 | * iosize at least. iosize of the second disk might be | ||
621 | * small, but not a big deal since when the second disk | ||
622 | * starts IO, the first disk is likely still busy. | ||
623 | */ | ||
624 | if (nonrot && opt_iosize > 0 && | ||
625 | mirror->seq_start != MaxSector && | ||
626 | mirror->next_seq_sect > opt_iosize && | ||
627 | mirror->next_seq_sect - opt_iosize >= | ||
628 | mirror->seq_start) { | ||
629 | choose_next_idle = 1; | ||
630 | continue; | ||
631 | } | ||
632 | break; | ||
633 | } | ||
634 | /* If device is idle, use it */ | ||
635 | if (pending == 0) { | ||
587 | best_disk = disk; | 636 | best_disk = disk; |
588 | break; | 637 | break; |
589 | } | 638 | } |
639 | |||
640 | if (choose_next_idle) | ||
641 | continue; | ||
642 | |||
643 | if (min_pending > pending) { | ||
644 | min_pending = pending; | ||
645 | best_pending_disk = disk; | ||
646 | } | ||
647 | |||
590 | if (dist < best_dist) { | 648 | if (dist < best_dist) { |
591 | best_dist = dist; | 649 | best_dist = dist; |
592 | best_disk = disk; | 650 | best_dist_disk = disk; |
593 | } | 651 | } |
594 | } | 652 | } |
595 | 653 | ||
654 | /* | ||
655 | * If all disks are rotational, choose the closest disk. If any disk is | ||
656 | * non-rotational, choose the disk with less pending request even the | ||
657 | * disk is rotational, which might/might not be optimal for raids with | ||
658 | * mixed ratation/non-rotational disks depending on workload. | ||
659 | */ | ||
660 | if (best_disk == -1) { | ||
661 | if (has_nonrot_disk) | ||
662 | best_disk = best_pending_disk; | ||
663 | else | ||
664 | best_disk = best_dist_disk; | ||
665 | } | ||
666 | |||
596 | if (best_disk >= 0) { | 667 | if (best_disk >= 0) { |
597 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); | 668 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); |
598 | if (!rdev) | 669 | if (!rdev) |
@@ -606,8 +677,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
606 | goto retry; | 677 | goto retry; |
607 | } | 678 | } |
608 | sectors = best_good_sectors; | 679 | sectors = best_good_sectors; |
609 | conf->next_seq_sect = this_sector + sectors; | 680 | |
610 | conf->last_used = best_disk; | 681 | if (conf->mirrors[best_disk].next_seq_sect != this_sector) |
682 | conf->mirrors[best_disk].seq_start = this_sector; | ||
683 | |||
684 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; | ||
611 | } | 685 | } |
612 | rcu_read_unlock(); | 686 | rcu_read_unlock(); |
613 | *max_sectors = sectors; | 687 | *max_sectors = sectors; |
@@ -911,7 +985,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
911 | static void make_request(struct mddev *mddev, struct bio * bio) | 985 | static void make_request(struct mddev *mddev, struct bio * bio) |
912 | { | 986 | { |
913 | struct r1conf *conf = mddev->private; | 987 | struct r1conf *conf = mddev->private; |
914 | struct mirror_info *mirror; | 988 | struct raid1_info *mirror; |
915 | struct r1bio *r1_bio; | 989 | struct r1bio *r1_bio; |
916 | struct bio *read_bio; | 990 | struct bio *read_bio; |
917 | int i, disks; | 991 | int i, disks; |
@@ -1415,7 +1489,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1415 | struct r1conf *conf = mddev->private; | 1489 | struct r1conf *conf = mddev->private; |
1416 | int err = -EEXIST; | 1490 | int err = -EEXIST; |
1417 | int mirror = 0; | 1491 | int mirror = 0; |
1418 | struct mirror_info *p; | 1492 | struct raid1_info *p; |
1419 | int first = 0; | 1493 | int first = 0; |
1420 | int last = conf->raid_disks - 1; | 1494 | int last = conf->raid_disks - 1; |
1421 | struct request_queue *q = bdev_get_queue(rdev->bdev); | 1495 | struct request_queue *q = bdev_get_queue(rdev->bdev); |
@@ -1484,7 +1558,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1484 | struct r1conf *conf = mddev->private; | 1558 | struct r1conf *conf = mddev->private; |
1485 | int err = 0; | 1559 | int err = 0; |
1486 | int number = rdev->raid_disk; | 1560 | int number = rdev->raid_disk; |
1487 | struct mirror_info *p = conf->mirrors+ number; | 1561 | struct raid1_info *p = conf->mirrors + number; |
1488 | 1562 | ||
1489 | if (rdev != p->rdev) | 1563 | if (rdev != p->rdev) |
1490 | p = conf->mirrors + conf->raid_disks + number; | 1564 | p = conf->mirrors + conf->raid_disks + number; |
@@ -2421,6 +2495,18 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2421 | bio->bi_rw = READ; | 2495 | bio->bi_rw = READ; |
2422 | bio->bi_end_io = end_sync_read; | 2496 | bio->bi_end_io = end_sync_read; |
2423 | read_targets++; | 2497 | read_targets++; |
2498 | } else if (!test_bit(WriteErrorSeen, &rdev->flags) && | ||
2499 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | ||
2500 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { | ||
2501 | /* | ||
2502 | * The device is suitable for reading (InSync), | ||
2503 | * but has bad block(s) here. Let's try to correct them, | ||
2504 | * if we are doing resync or repair. Otherwise, leave | ||
2505 | * this device alone for this sync request. | ||
2506 | */ | ||
2507 | bio->bi_rw = WRITE; | ||
2508 | bio->bi_end_io = end_sync_write; | ||
2509 | write_targets++; | ||
2424 | } | 2510 | } |
2425 | } | 2511 | } |
2426 | if (bio->bi_end_io) { | 2512 | if (bio->bi_end_io) { |
@@ -2478,7 +2564,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2478 | /* There is nowhere to write, so all non-sync | 2564 | /* There is nowhere to write, so all non-sync |
2479 | * drives must be failed - so we are finished | 2565 | * drives must be failed - so we are finished |
2480 | */ | 2566 | */ |
2481 | sector_t rv = max_sector - sector_nr; | 2567 | sector_t rv; |
2568 | if (min_bad > 0) | ||
2569 | max_sector = sector_nr + min_bad; | ||
2570 | rv = max_sector - sector_nr; | ||
2482 | *skipped = 1; | 2571 | *skipped = 1; |
2483 | put_buf(r1_bio); | 2572 | put_buf(r1_bio); |
2484 | return rv; | 2573 | return rv; |
@@ -2571,7 +2660,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2571 | { | 2660 | { |
2572 | struct r1conf *conf; | 2661 | struct r1conf *conf; |
2573 | int i; | 2662 | int i; |
2574 | struct mirror_info *disk; | 2663 | struct raid1_info *disk; |
2575 | struct md_rdev *rdev; | 2664 | struct md_rdev *rdev; |
2576 | int err = -ENOMEM; | 2665 | int err = -ENOMEM; |
2577 | 2666 | ||
@@ -2579,7 +2668,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2579 | if (!conf) | 2668 | if (!conf) |
2580 | goto abort; | 2669 | goto abort; |
2581 | 2670 | ||
2582 | conf->mirrors = kzalloc(sizeof(struct mirror_info) | 2671 | conf->mirrors = kzalloc(sizeof(struct raid1_info) |
2583 | * mddev->raid_disks * 2, | 2672 | * mddev->raid_disks * 2, |
2584 | GFP_KERNEL); | 2673 | GFP_KERNEL); |
2585 | if (!conf->mirrors) | 2674 | if (!conf->mirrors) |
@@ -2622,6 +2711,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2622 | mddev->merge_check_needed = 1; | 2711 | mddev->merge_check_needed = 1; |
2623 | 2712 | ||
2624 | disk->head_position = 0; | 2713 | disk->head_position = 0; |
2714 | disk->seq_start = MaxSector; | ||
2625 | } | 2715 | } |
2626 | conf->raid_disks = mddev->raid_disks; | 2716 | conf->raid_disks = mddev->raid_disks; |
2627 | conf->mddev = mddev; | 2717 | conf->mddev = mddev; |
@@ -2635,7 +2725,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2635 | conf->recovery_disabled = mddev->recovery_disabled - 1; | 2725 | conf->recovery_disabled = mddev->recovery_disabled - 1; |
2636 | 2726 | ||
2637 | err = -EIO; | 2727 | err = -EIO; |
2638 | conf->last_used = -1; | ||
2639 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2728 | for (i = 0; i < conf->raid_disks * 2; i++) { |
2640 | 2729 | ||
2641 | disk = conf->mirrors + i; | 2730 | disk = conf->mirrors + i; |
@@ -2661,19 +2750,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2661 | if (disk->rdev && | 2750 | if (disk->rdev && |
2662 | (disk->rdev->saved_raid_disk < 0)) | 2751 | (disk->rdev->saved_raid_disk < 0)) |
2663 | conf->fullsync = 1; | 2752 | conf->fullsync = 1; |
2664 | } else if (conf->last_used < 0) | 2753 | } |
2665 | /* | ||
2666 | * The first working device is used as a | ||
2667 | * starting point to read balancing. | ||
2668 | */ | ||
2669 | conf->last_used = i; | ||
2670 | } | 2754 | } |
2671 | 2755 | ||
2672 | if (conf->last_used < 0) { | ||
2673 | printk(KERN_ERR "md/raid1:%s: no operational mirrors\n", | ||
2674 | mdname(mddev)); | ||
2675 | goto abort; | ||
2676 | } | ||
2677 | err = -ENOMEM; | 2756 | err = -ENOMEM; |
2678 | conf->thread = md_register_thread(raid1d, mddev, "raid1"); | 2757 | conf->thread = md_register_thread(raid1d, mddev, "raid1"); |
2679 | if (!conf->thread) { | 2758 | if (!conf->thread) { |
@@ -2848,7 +2927,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2848 | */ | 2927 | */ |
2849 | mempool_t *newpool, *oldpool; | 2928 | mempool_t *newpool, *oldpool; |
2850 | struct pool_info *newpoolinfo; | 2929 | struct pool_info *newpoolinfo; |
2851 | struct mirror_info *newmirrors; | 2930 | struct raid1_info *newmirrors; |
2852 | struct r1conf *conf = mddev->private; | 2931 | struct r1conf *conf = mddev->private; |
2853 | int cnt, raid_disks; | 2932 | int cnt, raid_disks; |
2854 | unsigned long flags; | 2933 | unsigned long flags; |
@@ -2891,7 +2970,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2891 | kfree(newpoolinfo); | 2970 | kfree(newpoolinfo); |
2892 | return -ENOMEM; | 2971 | return -ENOMEM; |
2893 | } | 2972 | } |
2894 | newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2, | 2973 | newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2, |
2895 | GFP_KERNEL); | 2974 | GFP_KERNEL); |
2896 | if (!newmirrors) { | 2975 | if (!newmirrors) { |
2897 | kfree(newpoolinfo); | 2976 | kfree(newpoolinfo); |
@@ -2930,7 +3009,6 @@ static int raid1_reshape(struct mddev *mddev) | |||
2930 | conf->raid_disks = mddev->raid_disks = raid_disks; | 3009 | conf->raid_disks = mddev->raid_disks = raid_disks; |
2931 | mddev->delta_disks = 0; | 3010 | mddev->delta_disks = 0; |
2932 | 3011 | ||
2933 | conf->last_used = 0; /* just make sure it is in-range */ | ||
2934 | lower_barrier(conf); | 3012 | lower_barrier(conf); |
2935 | 3013 | ||
2936 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3014 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 80ded139314c..0ff3715fb7eb 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -1,9 +1,15 @@ | |||
1 | #ifndef _RAID1_H | 1 | #ifndef _RAID1_H |
2 | #define _RAID1_H | 2 | #define _RAID1_H |
3 | 3 | ||
4 | struct mirror_info { | 4 | struct raid1_info { |
5 | struct md_rdev *rdev; | 5 | struct md_rdev *rdev; |
6 | sector_t head_position; | 6 | sector_t head_position; |
7 | |||
8 | /* When choose the best device for a read (read_balance()) | ||
9 | * we try to keep sequential reads one the same device | ||
10 | */ | ||
11 | sector_t next_seq_sect; | ||
12 | sector_t seq_start; | ||
7 | }; | 13 | }; |
8 | 14 | ||
9 | /* | 15 | /* |
@@ -24,17 +30,11 @@ struct pool_info { | |||
24 | 30 | ||
25 | struct r1conf { | 31 | struct r1conf { |
26 | struct mddev *mddev; | 32 | struct mddev *mddev; |
27 | struct mirror_info *mirrors; /* twice 'raid_disks' to | 33 | struct raid1_info *mirrors; /* twice 'raid_disks' to |
28 | * allow for replacements. | 34 | * allow for replacements. |
29 | */ | 35 | */ |
30 | int raid_disks; | 36 | int raid_disks; |
31 | 37 | ||
32 | /* When choose the best device for a read (read_balance()) | ||
33 | * we try to keep sequential reads one the same device | ||
34 | * using 'last_used' and 'next_seq_sect' | ||
35 | */ | ||
36 | int last_used; | ||
37 | sector_t next_seq_sect; | ||
38 | /* During resync, read_balancing is only allowed on the part | 38 | /* During resync, read_balancing is only allowed on the part |
39 | * of the array that has been resynced. 'next_resync' tells us | 39 | * of the array that has been resynced. 'next_resync' tells us |
40 | * where that is. | 40 | * where that is. |
@@ -135,20 +135,6 @@ struct r1bio { | |||
135 | /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ | 135 | /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ |
136 | }; | 136 | }; |
137 | 137 | ||
138 | /* when we get a read error on a read-only array, we redirect to another | ||
139 | * device without failing the first device, or trying to over-write to | ||
140 | * correct the read error. To keep track of bad blocks on a per-bio | ||
141 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
142 | */ | ||
143 | #define IO_BLOCKED ((struct bio *)1) | ||
144 | /* When we successfully write to a known bad-block, we need to remove the | ||
145 | * bad-block marking which must be done from process context. So we record | ||
146 | * the success by setting bios[n] to IO_MADE_GOOD | ||
147 | */ | ||
148 | #define IO_MADE_GOOD ((struct bio *)2) | ||
149 | |||
150 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
151 | |||
152 | /* bits for r1bio.state */ | 138 | /* bits for r1bio.state */ |
153 | #define R1BIO_Uptodate 0 | 139 | #define R1BIO_Uptodate 0 |
154 | #define R1BIO_IsSync 1 | 140 | #define R1BIO_IsSync 1 |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5d33603a497d..de5ed6fd8806 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -60,7 +60,21 @@ | |||
60 | */ | 60 | */ |
61 | #define NR_RAID10_BIOS 256 | 61 | #define NR_RAID10_BIOS 256 |
62 | 62 | ||
63 | /* When there are this many requests queue to be written by | 63 | /* when we get a read error on a read-only array, we redirect to another |
64 | * device without failing the first device, or trying to over-write to | ||
65 | * correct the read error. To keep track of bad blocks on a per-bio | ||
66 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
67 | */ | ||
68 | #define IO_BLOCKED ((struct bio *)1) | ||
69 | /* When we successfully write to a known bad-block, we need to remove the | ||
70 | * bad-block marking which must be done from process context. So we record | ||
71 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
72 | */ | ||
73 | #define IO_MADE_GOOD ((struct bio *)2) | ||
74 | |||
75 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
76 | |||
77 | /* When there are this many requests queued to be written by | ||
64 | * the raid10 thread, we become 'congested' to provide back-pressure | 78 | * the raid10 thread, we become 'congested' to provide back-pressure |
65 | * for writeback. | 79 | * for writeback. |
66 | */ | 80 | */ |
@@ -717,7 +731,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, | |||
717 | int sectors = r10_bio->sectors; | 731 | int sectors = r10_bio->sectors; |
718 | int best_good_sectors; | 732 | int best_good_sectors; |
719 | sector_t new_distance, best_dist; | 733 | sector_t new_distance, best_dist; |
720 | struct md_rdev *rdev, *best_rdev; | 734 | struct md_rdev *best_rdev, *rdev = NULL; |
721 | int do_balance; | 735 | int do_balance; |
722 | int best_slot; | 736 | int best_slot; |
723 | struct geom *geo = &conf->geo; | 737 | struct geom *geo = &conf->geo; |
@@ -839,9 +853,8 @@ retry: | |||
839 | return rdev; | 853 | return rdev; |
840 | } | 854 | } |
841 | 855 | ||
842 | static int raid10_congested(void *data, int bits) | 856 | int md_raid10_congested(struct mddev *mddev, int bits) |
843 | { | 857 | { |
844 | struct mddev *mddev = data; | ||
845 | struct r10conf *conf = mddev->private; | 858 | struct r10conf *conf = mddev->private; |
846 | int i, ret = 0; | 859 | int i, ret = 0; |
847 | 860 | ||
@@ -849,8 +862,6 @@ static int raid10_congested(void *data, int bits) | |||
849 | conf->pending_count >= max_queued_requests) | 862 | conf->pending_count >= max_queued_requests) |
850 | return 1; | 863 | return 1; |
851 | 864 | ||
852 | if (mddev_congested(mddev, bits)) | ||
853 | return 1; | ||
854 | rcu_read_lock(); | 865 | rcu_read_lock(); |
855 | for (i = 0; | 866 | for (i = 0; |
856 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) | 867 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) |
@@ -866,6 +877,15 @@ static int raid10_congested(void *data, int bits) | |||
866 | rcu_read_unlock(); | 877 | rcu_read_unlock(); |
867 | return ret; | 878 | return ret; |
868 | } | 879 | } |
880 | EXPORT_SYMBOL_GPL(md_raid10_congested); | ||
881 | |||
882 | static int raid10_congested(void *data, int bits) | ||
883 | { | ||
884 | struct mddev *mddev = data; | ||
885 | |||
886 | return mddev_congested(mddev, bits) || | ||
887 | md_raid10_congested(mddev, bits); | ||
888 | } | ||
869 | 889 | ||
870 | static void flush_pending_writes(struct r10conf *conf) | 890 | static void flush_pending_writes(struct r10conf *conf) |
871 | { | 891 | { |
@@ -1546,7 +1566,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) | |||
1546 | static void print_conf(struct r10conf *conf) | 1566 | static void print_conf(struct r10conf *conf) |
1547 | { | 1567 | { |
1548 | int i; | 1568 | int i; |
1549 | struct mirror_info *tmp; | 1569 | struct raid10_info *tmp; |
1550 | 1570 | ||
1551 | printk(KERN_DEBUG "RAID10 conf printout:\n"); | 1571 | printk(KERN_DEBUG "RAID10 conf printout:\n"); |
1552 | if (!conf) { | 1572 | if (!conf) { |
@@ -1580,7 +1600,7 @@ static int raid10_spare_active(struct mddev *mddev) | |||
1580 | { | 1600 | { |
1581 | int i; | 1601 | int i; |
1582 | struct r10conf *conf = mddev->private; | 1602 | struct r10conf *conf = mddev->private; |
1583 | struct mirror_info *tmp; | 1603 | struct raid10_info *tmp; |
1584 | int count = 0; | 1604 | int count = 0; |
1585 | unsigned long flags; | 1605 | unsigned long flags; |
1586 | 1606 | ||
@@ -1655,7 +1675,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1655 | else | 1675 | else |
1656 | mirror = first; | 1676 | mirror = first; |
1657 | for ( ; mirror <= last ; mirror++) { | 1677 | for ( ; mirror <= last ; mirror++) { |
1658 | struct mirror_info *p = &conf->mirrors[mirror]; | 1678 | struct raid10_info *p = &conf->mirrors[mirror]; |
1659 | if (p->recovery_disabled == mddev->recovery_disabled) | 1679 | if (p->recovery_disabled == mddev->recovery_disabled) |
1660 | continue; | 1680 | continue; |
1661 | if (p->rdev) { | 1681 | if (p->rdev) { |
@@ -1709,7 +1729,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1709 | int err = 0; | 1729 | int err = 0; |
1710 | int number = rdev->raid_disk; | 1730 | int number = rdev->raid_disk; |
1711 | struct md_rdev **rdevp; | 1731 | struct md_rdev **rdevp; |
1712 | struct mirror_info *p = conf->mirrors + number; | 1732 | struct raid10_info *p = conf->mirrors + number; |
1713 | 1733 | ||
1714 | print_conf(conf); | 1734 | print_conf(conf); |
1715 | if (rdev == p->rdev) | 1735 | if (rdev == p->rdev) |
@@ -2875,7 +2895,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
2875 | sector_t sect; | 2895 | sector_t sect; |
2876 | int must_sync; | 2896 | int must_sync; |
2877 | int any_working; | 2897 | int any_working; |
2878 | struct mirror_info *mirror = &conf->mirrors[i]; | 2898 | struct raid10_info *mirror = &conf->mirrors[i]; |
2879 | 2899 | ||
2880 | if ((mirror->rdev == NULL || | 2900 | if ((mirror->rdev == NULL || |
2881 | test_bit(In_sync, &mirror->rdev->flags)) | 2901 | test_bit(In_sync, &mirror->rdev->flags)) |
@@ -3387,7 +3407,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) | |||
3387 | goto out; | 3407 | goto out; |
3388 | 3408 | ||
3389 | /* FIXME calc properly */ | 3409 | /* FIXME calc properly */ |
3390 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*(mddev->raid_disks + | 3410 | conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + |
3391 | max(0,mddev->delta_disks)), | 3411 | max(0,mddev->delta_disks)), |
3392 | GFP_KERNEL); | 3412 | GFP_KERNEL); |
3393 | if (!conf->mirrors) | 3413 | if (!conf->mirrors) |
@@ -3451,7 +3471,7 @@ static int run(struct mddev *mddev) | |||
3451 | { | 3471 | { |
3452 | struct r10conf *conf; | 3472 | struct r10conf *conf; |
3453 | int i, disk_idx, chunk_size; | 3473 | int i, disk_idx, chunk_size; |
3454 | struct mirror_info *disk; | 3474 | struct raid10_info *disk; |
3455 | struct md_rdev *rdev; | 3475 | struct md_rdev *rdev; |
3456 | sector_t size; | 3476 | sector_t size; |
3457 | sector_t min_offset_diff = 0; | 3477 | sector_t min_offset_diff = 0; |
@@ -3471,12 +3491,14 @@ static int run(struct mddev *mddev) | |||
3471 | conf->thread = NULL; | 3491 | conf->thread = NULL; |
3472 | 3492 | ||
3473 | chunk_size = mddev->chunk_sectors << 9; | 3493 | chunk_size = mddev->chunk_sectors << 9; |
3474 | blk_queue_io_min(mddev->queue, chunk_size); | 3494 | if (mddev->queue) { |
3475 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3495 | blk_queue_io_min(mddev->queue, chunk_size); |
3476 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3496 | if (conf->geo.raid_disks % conf->geo.near_copies) |
3477 | else | 3497 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
3478 | blk_queue_io_opt(mddev->queue, chunk_size * | 3498 | else |
3479 | (conf->geo.raid_disks / conf->geo.near_copies)); | 3499 | blk_queue_io_opt(mddev->queue, chunk_size * |
3500 | (conf->geo.raid_disks / conf->geo.near_copies)); | ||
3501 | } | ||
3480 | 3502 | ||
3481 | rdev_for_each(rdev, mddev) { | 3503 | rdev_for_each(rdev, mddev) { |
3482 | long long diff; | 3504 | long long diff; |
@@ -3510,8 +3532,9 @@ static int run(struct mddev *mddev) | |||
3510 | if (first || diff < min_offset_diff) | 3532 | if (first || diff < min_offset_diff) |
3511 | min_offset_diff = diff; | 3533 | min_offset_diff = diff; |
3512 | 3534 | ||
3513 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 3535 | if (mddev->gendisk) |
3514 | rdev->data_offset << 9); | 3536 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
3537 | rdev->data_offset << 9); | ||
3515 | 3538 | ||
3516 | disk->head_position = 0; | 3539 | disk->head_position = 0; |
3517 | } | 3540 | } |
@@ -3574,22 +3597,22 @@ static int run(struct mddev *mddev) | |||
3574 | md_set_array_sectors(mddev, size); | 3597 | md_set_array_sectors(mddev, size); |
3575 | mddev->resync_max_sectors = size; | 3598 | mddev->resync_max_sectors = size; |
3576 | 3599 | ||
3577 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 3600 | if (mddev->queue) { |
3578 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3579 | |||
3580 | /* Calculate max read-ahead size. | ||
3581 | * We need to readahead at least twice a whole stripe.... | ||
3582 | * maybe... | ||
3583 | */ | ||
3584 | { | ||
3585 | int stripe = conf->geo.raid_disks * | 3601 | int stripe = conf->geo.raid_disks * |
3586 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | 3602 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
3603 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | ||
3604 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3605 | |||
3606 | /* Calculate max read-ahead size. | ||
3607 | * We need to readahead at least twice a whole stripe.... | ||
3608 | * maybe... | ||
3609 | */ | ||
3587 | stripe /= conf->geo.near_copies; | 3610 | stripe /= conf->geo.near_copies; |
3588 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 3611 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
3589 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 3612 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
3613 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3590 | } | 3614 | } |
3591 | 3615 | ||
3592 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3593 | 3616 | ||
3594 | if (md_integrity_register(mddev)) | 3617 | if (md_integrity_register(mddev)) |
3595 | goto out_free_conf; | 3618 | goto out_free_conf; |
@@ -3640,7 +3663,10 @@ static int stop(struct mddev *mddev) | |||
3640 | lower_barrier(conf); | 3663 | lower_barrier(conf); |
3641 | 3664 | ||
3642 | md_unregister_thread(&mddev->thread); | 3665 | md_unregister_thread(&mddev->thread); |
3643 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 3666 | if (mddev->queue) |
3667 | /* the unplug fn references 'conf'*/ | ||
3668 | blk_sync_queue(mddev->queue); | ||
3669 | |||
3644 | if (conf->r10bio_pool) | 3670 | if (conf->r10bio_pool) |
3645 | mempool_destroy(conf->r10bio_pool); | 3671 | mempool_destroy(conf->r10bio_pool); |
3646 | kfree(conf->mirrors); | 3672 | kfree(conf->mirrors); |
@@ -3804,7 +3830,7 @@ static int raid10_check_reshape(struct mddev *mddev) | |||
3804 | if (mddev->delta_disks > 0) { | 3830 | if (mddev->delta_disks > 0) { |
3805 | /* allocate new 'mirrors' list */ | 3831 | /* allocate new 'mirrors' list */ |
3806 | conf->mirrors_new = kzalloc( | 3832 | conf->mirrors_new = kzalloc( |
3807 | sizeof(struct mirror_info) | 3833 | sizeof(struct raid10_info) |
3808 | *(mddev->raid_disks + | 3834 | *(mddev->raid_disks + |
3809 | mddev->delta_disks), | 3835 | mddev->delta_disks), |
3810 | GFP_KERNEL); | 3836 | GFP_KERNEL); |
@@ -3929,7 +3955,7 @@ static int raid10_start_reshape(struct mddev *mddev) | |||
3929 | spin_lock_irq(&conf->device_lock); | 3955 | spin_lock_irq(&conf->device_lock); |
3930 | if (conf->mirrors_new) { | 3956 | if (conf->mirrors_new) { |
3931 | memcpy(conf->mirrors_new, conf->mirrors, | 3957 | memcpy(conf->mirrors_new, conf->mirrors, |
3932 | sizeof(struct mirror_info)*conf->prev.raid_disks); | 3958 | sizeof(struct raid10_info)*conf->prev.raid_disks); |
3933 | smp_mb(); | 3959 | smp_mb(); |
3934 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ | 3960 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ |
3935 | conf->mirrors_old = conf->mirrors; | 3961 | conf->mirrors_old = conf->mirrors; |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 135b1b0a1554..007c2c68dd83 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _RAID10_H | 1 | #ifndef _RAID10_H |
2 | #define _RAID10_H | 2 | #define _RAID10_H |
3 | 3 | ||
4 | struct mirror_info { | 4 | struct raid10_info { |
5 | struct md_rdev *rdev, *replacement; | 5 | struct md_rdev *rdev, *replacement; |
6 | sector_t head_position; | 6 | sector_t head_position; |
7 | int recovery_disabled; /* matches | 7 | int recovery_disabled; /* matches |
@@ -13,8 +13,8 @@ struct mirror_info { | |||
13 | 13 | ||
14 | struct r10conf { | 14 | struct r10conf { |
15 | struct mddev *mddev; | 15 | struct mddev *mddev; |
16 | struct mirror_info *mirrors; | 16 | struct raid10_info *mirrors; |
17 | struct mirror_info *mirrors_new, *mirrors_old; | 17 | struct raid10_info *mirrors_new, *mirrors_old; |
18 | spinlock_t device_lock; | 18 | spinlock_t device_lock; |
19 | 19 | ||
20 | /* geometry */ | 20 | /* geometry */ |
@@ -123,20 +123,6 @@ struct r10bio { | |||
123 | } devs[0]; | 123 | } devs[0]; |
124 | }; | 124 | }; |
125 | 125 | ||
126 | /* when we get a read error on a read-only array, we redirect to another | ||
127 | * device without failing the first device, or trying to over-write to | ||
128 | * correct the read error. To keep track of bad blocks on a per-bio | ||
129 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
130 | */ | ||
131 | #define IO_BLOCKED ((struct bio*)1) | ||
132 | /* When we successfully write to a known bad-block, we need to remove the | ||
133 | * bad-block marking which must be done from process context. So we record | ||
134 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
135 | */ | ||
136 | #define IO_MADE_GOOD ((struct bio *)2) | ||
137 | |||
138 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
139 | |||
140 | /* bits for r10bio.state */ | 126 | /* bits for r10bio.state */ |
141 | enum r10bio_state { | 127 | enum r10bio_state { |
142 | R10BIO_Uptodate, | 128 | R10BIO_Uptodate, |
@@ -159,4 +145,7 @@ enum r10bio_state { | |||
159 | */ | 145 | */ |
160 | R10BIO_Previous, | 146 | R10BIO_Previous, |
161 | }; | 147 | }; |
148 | |||
149 | extern int md_raid10_congested(struct mddev *mddev, int bits); | ||
150 | |||
162 | #endif | 151 | #endif |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9e41ae37bd40..adda94df5eb2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) | |||
99 | * We maintain a biased count of active stripes in the bottom 16 bits of | 99 | * We maintain a biased count of active stripes in the bottom 16 bits of |
100 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits | 100 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits |
101 | */ | 101 | */ |
102 | static inline int raid5_bi_phys_segments(struct bio *bio) | 102 | static inline int raid5_bi_processed_stripes(struct bio *bio) |
103 | { | 103 | { |
104 | return bio->bi_phys_segments & 0xffff; | 104 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
105 | return (atomic_read(segments) >> 16) & 0xffff; | ||
105 | } | 106 | } |
106 | 107 | ||
107 | static inline int raid5_bi_hw_segments(struct bio *bio) | 108 | static inline int raid5_dec_bi_active_stripes(struct bio *bio) |
108 | { | 109 | { |
109 | return (bio->bi_phys_segments >> 16) & 0xffff; | 110 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
111 | return atomic_sub_return(1, segments) & 0xffff; | ||
110 | } | 112 | } |
111 | 113 | ||
112 | static inline int raid5_dec_bi_phys_segments(struct bio *bio) | 114 | static inline void raid5_inc_bi_active_stripes(struct bio *bio) |
113 | { | 115 | { |
114 | --bio->bi_phys_segments; | 116 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
115 | return raid5_bi_phys_segments(bio); | 117 | atomic_inc(segments); |
116 | } | 118 | } |
117 | 119 | ||
118 | static inline int raid5_dec_bi_hw_segments(struct bio *bio) | 120 | static inline void raid5_set_bi_processed_stripes(struct bio *bio, |
121 | unsigned int cnt) | ||
119 | { | 122 | { |
120 | unsigned short val = raid5_bi_hw_segments(bio); | 123 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
124 | int old, new; | ||
121 | 125 | ||
122 | --val; | 126 | do { |
123 | bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); | 127 | old = atomic_read(segments); |
124 | return val; | 128 | new = (old & 0xffff) | (cnt << 16); |
129 | } while (atomic_cmpxchg(segments, old, new) != old); | ||
125 | } | 130 | } |
126 | 131 | ||
127 | static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | 132 | static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt) |
128 | { | 133 | { |
129 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16); | 134 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
135 | atomic_set(segments, cnt); | ||
130 | } | 136 | } |
131 | 137 | ||
132 | /* Find first data disk in a raid6 stripe */ | 138 | /* Find first data disk in a raid6 stripe */ |
@@ -190,49 +196,56 @@ static int stripe_operations_active(struct stripe_head *sh) | |||
190 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); | 196 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); |
191 | } | 197 | } |
192 | 198 | ||
193 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | 199 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) |
194 | { | 200 | { |
195 | if (atomic_dec_and_test(&sh->count)) { | 201 | BUG_ON(!list_empty(&sh->lru)); |
196 | BUG_ON(!list_empty(&sh->lru)); | 202 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
197 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 203 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
198 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 204 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
199 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 205 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
200 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 206 | list_add_tail(&sh->lru, &conf->delayed_list); |
201 | list_add_tail(&sh->lru, &conf->delayed_list); | 207 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
202 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 208 | sh->bm_seq - conf->seq_write > 0) |
203 | sh->bm_seq - conf->seq_write > 0) | 209 | list_add_tail(&sh->lru, &conf->bitmap_list); |
204 | list_add_tail(&sh->lru, &conf->bitmap_list); | 210 | else { |
205 | else { | 211 | clear_bit(STRIPE_DELAYED, &sh->state); |
206 | clear_bit(STRIPE_DELAYED, &sh->state); | 212 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
207 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 213 | list_add_tail(&sh->lru, &conf->handle_list); |
208 | list_add_tail(&sh->lru, &conf->handle_list); | 214 | } |
209 | } | 215 | md_wakeup_thread(conf->mddev->thread); |
210 | md_wakeup_thread(conf->mddev->thread); | 216 | } else { |
211 | } else { | 217 | BUG_ON(stripe_operations_active(sh)); |
212 | BUG_ON(stripe_operations_active(sh)); | 218 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
213 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 219 | if (atomic_dec_return(&conf->preread_active_stripes) |
214 | if (atomic_dec_return(&conf->preread_active_stripes) | 220 | < IO_THRESHOLD) |
215 | < IO_THRESHOLD) | 221 | md_wakeup_thread(conf->mddev->thread); |
216 | md_wakeup_thread(conf->mddev->thread); | 222 | atomic_dec(&conf->active_stripes); |
217 | atomic_dec(&conf->active_stripes); | 223 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { |
218 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { | 224 | list_add_tail(&sh->lru, &conf->inactive_list); |
219 | list_add_tail(&sh->lru, &conf->inactive_list); | 225 | wake_up(&conf->wait_for_stripe); |
220 | wake_up(&conf->wait_for_stripe); | 226 | if (conf->retry_read_aligned) |
221 | if (conf->retry_read_aligned) | 227 | md_wakeup_thread(conf->mddev->thread); |
222 | md_wakeup_thread(conf->mddev->thread); | ||
223 | } | ||
224 | } | 228 | } |
225 | } | 229 | } |
226 | } | 230 | } |
227 | 231 | ||
232 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | ||
233 | { | ||
234 | if (atomic_dec_and_test(&sh->count)) | ||
235 | do_release_stripe(conf, sh); | ||
236 | } | ||
237 | |||
228 | static void release_stripe(struct stripe_head *sh) | 238 | static void release_stripe(struct stripe_head *sh) |
229 | { | 239 | { |
230 | struct r5conf *conf = sh->raid_conf; | 240 | struct r5conf *conf = sh->raid_conf; |
231 | unsigned long flags; | 241 | unsigned long flags; |
232 | 242 | ||
233 | spin_lock_irqsave(&conf->device_lock, flags); | 243 | local_irq_save(flags); |
234 | __release_stripe(conf, sh); | 244 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { |
235 | spin_unlock_irqrestore(&conf->device_lock, flags); | 245 | do_release_stripe(conf, sh); |
246 | spin_unlock(&conf->device_lock); | ||
247 | } | ||
248 | local_irq_restore(flags); | ||
236 | } | 249 | } |
237 | 250 | ||
238 | static inline void remove_hash(struct stripe_head *sh) | 251 | static inline void remove_hash(struct stripe_head *sh) |
@@ -641,6 +654,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
641 | else | 654 | else |
642 | bi->bi_sector = (sh->sector | 655 | bi->bi_sector = (sh->sector |
643 | + rdev->data_offset); | 656 | + rdev->data_offset); |
657 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | ||
658 | bi->bi_rw |= REQ_FLUSH; | ||
659 | |||
644 | bi->bi_flags = 1 << BIO_UPTODATE; | 660 | bi->bi_flags = 1 << BIO_UPTODATE; |
645 | bi->bi_idx = 0; | 661 | bi->bi_idx = 0; |
646 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 662 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
@@ -750,14 +766,12 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
750 | { | 766 | { |
751 | struct stripe_head *sh = stripe_head_ref; | 767 | struct stripe_head *sh = stripe_head_ref; |
752 | struct bio *return_bi = NULL; | 768 | struct bio *return_bi = NULL; |
753 | struct r5conf *conf = sh->raid_conf; | ||
754 | int i; | 769 | int i; |
755 | 770 | ||
756 | pr_debug("%s: stripe %llu\n", __func__, | 771 | pr_debug("%s: stripe %llu\n", __func__, |
757 | (unsigned long long)sh->sector); | 772 | (unsigned long long)sh->sector); |
758 | 773 | ||
759 | /* clear completed biofills */ | 774 | /* clear completed biofills */ |
760 | spin_lock_irq(&conf->device_lock); | ||
761 | for (i = sh->disks; i--; ) { | 775 | for (i = sh->disks; i--; ) { |
762 | struct r5dev *dev = &sh->dev[i]; | 776 | struct r5dev *dev = &sh->dev[i]; |
763 | 777 | ||
@@ -775,7 +789,7 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
775 | while (rbi && rbi->bi_sector < | 789 | while (rbi && rbi->bi_sector < |
776 | dev->sector + STRIPE_SECTORS) { | 790 | dev->sector + STRIPE_SECTORS) { |
777 | rbi2 = r5_next_bio(rbi, dev->sector); | 791 | rbi2 = r5_next_bio(rbi, dev->sector); |
778 | if (!raid5_dec_bi_phys_segments(rbi)) { | 792 | if (!raid5_dec_bi_active_stripes(rbi)) { |
779 | rbi->bi_next = return_bi; | 793 | rbi->bi_next = return_bi; |
780 | return_bi = rbi; | 794 | return_bi = rbi; |
781 | } | 795 | } |
@@ -783,7 +797,6 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
783 | } | 797 | } |
784 | } | 798 | } |
785 | } | 799 | } |
786 | spin_unlock_irq(&conf->device_lock); | ||
787 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); | 800 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); |
788 | 801 | ||
789 | return_io(return_bi); | 802 | return_io(return_bi); |
@@ -795,7 +808,6 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
795 | static void ops_run_biofill(struct stripe_head *sh) | 808 | static void ops_run_biofill(struct stripe_head *sh) |
796 | { | 809 | { |
797 | struct dma_async_tx_descriptor *tx = NULL; | 810 | struct dma_async_tx_descriptor *tx = NULL; |
798 | struct r5conf *conf = sh->raid_conf; | ||
799 | struct async_submit_ctl submit; | 811 | struct async_submit_ctl submit; |
800 | int i; | 812 | int i; |
801 | 813 | ||
@@ -806,10 +818,10 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
806 | struct r5dev *dev = &sh->dev[i]; | 818 | struct r5dev *dev = &sh->dev[i]; |
807 | if (test_bit(R5_Wantfill, &dev->flags)) { | 819 | if (test_bit(R5_Wantfill, &dev->flags)) { |
808 | struct bio *rbi; | 820 | struct bio *rbi; |
809 | spin_lock_irq(&conf->device_lock); | 821 | spin_lock_irq(&sh->stripe_lock); |
810 | dev->read = rbi = dev->toread; | 822 | dev->read = rbi = dev->toread; |
811 | dev->toread = NULL; | 823 | dev->toread = NULL; |
812 | spin_unlock_irq(&conf->device_lock); | 824 | spin_unlock_irq(&sh->stripe_lock); |
813 | while (rbi && rbi->bi_sector < | 825 | while (rbi && rbi->bi_sector < |
814 | dev->sector + STRIPE_SECTORS) { | 826 | dev->sector + STRIPE_SECTORS) { |
815 | tx = async_copy_data(0, rbi, dev->page, | 827 | tx = async_copy_data(0, rbi, dev->page, |
@@ -1145,12 +1157,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1145 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { | 1157 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { |
1146 | struct bio *wbi; | 1158 | struct bio *wbi; |
1147 | 1159 | ||
1148 | spin_lock_irq(&sh->raid_conf->device_lock); | 1160 | spin_lock_irq(&sh->stripe_lock); |
1149 | chosen = dev->towrite; | 1161 | chosen = dev->towrite; |
1150 | dev->towrite = NULL; | 1162 | dev->towrite = NULL; |
1151 | BUG_ON(dev->written); | 1163 | BUG_ON(dev->written); |
1152 | wbi = dev->written = chosen; | 1164 | wbi = dev->written = chosen; |
1153 | spin_unlock_irq(&sh->raid_conf->device_lock); | 1165 | spin_unlock_irq(&sh->stripe_lock); |
1154 | 1166 | ||
1155 | while (wbi && wbi->bi_sector < | 1167 | while (wbi && wbi->bi_sector < |
1156 | dev->sector + STRIPE_SECTORS) { | 1168 | dev->sector + STRIPE_SECTORS) { |
@@ -1455,6 +1467,8 @@ static int grow_one_stripe(struct r5conf *conf) | |||
1455 | init_waitqueue_head(&sh->ops.wait_for_ops); | 1467 | init_waitqueue_head(&sh->ops.wait_for_ops); |
1456 | #endif | 1468 | #endif |
1457 | 1469 | ||
1470 | spin_lock_init(&sh->stripe_lock); | ||
1471 | |||
1458 | if (grow_buffers(sh)) { | 1472 | if (grow_buffers(sh)) { |
1459 | shrink_buffers(sh); | 1473 | shrink_buffers(sh); |
1460 | kmem_cache_free(conf->slab_cache, sh); | 1474 | kmem_cache_free(conf->slab_cache, sh); |
@@ -1740,7 +1754,9 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1740 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); | 1754 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); |
1741 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1755 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1742 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1756 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1743 | } | 1757 | } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
1758 | clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1759 | |||
1744 | if (atomic_read(&rdev->read_errors)) | 1760 | if (atomic_read(&rdev->read_errors)) |
1745 | atomic_set(&rdev->read_errors, 0); | 1761 | atomic_set(&rdev->read_errors, 0); |
1746 | } else { | 1762 | } else { |
@@ -1785,7 +1801,11 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1785 | else | 1801 | else |
1786 | retry = 1; | 1802 | retry = 1; |
1787 | if (retry) | 1803 | if (retry) |
1788 | set_bit(R5_ReadError, &sh->dev[i].flags); | 1804 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { |
1805 | set_bit(R5_ReadError, &sh->dev[i].flags); | ||
1806 | clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1807 | } else | ||
1808 | set_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1789 | else { | 1809 | else { |
1790 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1810 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1791 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1811 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
@@ -2341,11 +2361,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2341 | (unsigned long long)bi->bi_sector, | 2361 | (unsigned long long)bi->bi_sector, |
2342 | (unsigned long long)sh->sector); | 2362 | (unsigned long long)sh->sector); |
2343 | 2363 | ||
2344 | 2364 | /* | |
2345 | spin_lock_irq(&conf->device_lock); | 2365 | * If several bio share a stripe. The bio bi_phys_segments acts as a |
2366 | * reference count to avoid race. The reference count should already be | ||
2367 | * increased before this function is called (for example, in | ||
2368 | * make_request()), so other bio sharing this stripe will not free the | ||
2369 | * stripe. If a stripe is owned by one stripe, the stripe lock will | ||
2370 | * protect it. | ||
2371 | */ | ||
2372 | spin_lock_irq(&sh->stripe_lock); | ||
2346 | if (forwrite) { | 2373 | if (forwrite) { |
2347 | bip = &sh->dev[dd_idx].towrite; | 2374 | bip = &sh->dev[dd_idx].towrite; |
2348 | if (*bip == NULL && sh->dev[dd_idx].written == NULL) | 2375 | if (*bip == NULL) |
2349 | firstwrite = 1; | 2376 | firstwrite = 1; |
2350 | } else | 2377 | } else |
2351 | bip = &sh->dev[dd_idx].toread; | 2378 | bip = &sh->dev[dd_idx].toread; |
@@ -2361,7 +2388,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2361 | if (*bip) | 2388 | if (*bip) |
2362 | bi->bi_next = *bip; | 2389 | bi->bi_next = *bip; |
2363 | *bip = bi; | 2390 | *bip = bi; |
2364 | bi->bi_phys_segments++; | 2391 | raid5_inc_bi_active_stripes(bi); |
2365 | 2392 | ||
2366 | if (forwrite) { | 2393 | if (forwrite) { |
2367 | /* check if page is covered */ | 2394 | /* check if page is covered */ |
@@ -2376,7 +2403,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2376 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) | 2403 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) |
2377 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); | 2404 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); |
2378 | } | 2405 | } |
2379 | spin_unlock_irq(&conf->device_lock); | 2406 | spin_unlock_irq(&sh->stripe_lock); |
2380 | 2407 | ||
2381 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", | 2408 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", |
2382 | (unsigned long long)(*bip)->bi_sector, | 2409 | (unsigned long long)(*bip)->bi_sector, |
@@ -2392,7 +2419,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2392 | 2419 | ||
2393 | overlap: | 2420 | overlap: |
2394 | set_bit(R5_Overlap, &sh->dev[dd_idx].flags); | 2421 | set_bit(R5_Overlap, &sh->dev[dd_idx].flags); |
2395 | spin_unlock_irq(&conf->device_lock); | 2422 | spin_unlock_irq(&sh->stripe_lock); |
2396 | return 0; | 2423 | return 0; |
2397 | } | 2424 | } |
2398 | 2425 | ||
@@ -2442,10 +2469,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2442 | rdev_dec_pending(rdev, conf->mddev); | 2469 | rdev_dec_pending(rdev, conf->mddev); |
2443 | } | 2470 | } |
2444 | } | 2471 | } |
2445 | spin_lock_irq(&conf->device_lock); | 2472 | spin_lock_irq(&sh->stripe_lock); |
2446 | /* fail all writes first */ | 2473 | /* fail all writes first */ |
2447 | bi = sh->dev[i].towrite; | 2474 | bi = sh->dev[i].towrite; |
2448 | sh->dev[i].towrite = NULL; | 2475 | sh->dev[i].towrite = NULL; |
2476 | spin_unlock_irq(&sh->stripe_lock); | ||
2449 | if (bi) { | 2477 | if (bi) { |
2450 | s->to_write--; | 2478 | s->to_write--; |
2451 | bitmap_end = 1; | 2479 | bitmap_end = 1; |
@@ -2458,13 +2486,17 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2458 | sh->dev[i].sector + STRIPE_SECTORS) { | 2486 | sh->dev[i].sector + STRIPE_SECTORS) { |
2459 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); | 2487 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); |
2460 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2488 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2461 | if (!raid5_dec_bi_phys_segments(bi)) { | 2489 | if (!raid5_dec_bi_active_stripes(bi)) { |
2462 | md_write_end(conf->mddev); | 2490 | md_write_end(conf->mddev); |
2463 | bi->bi_next = *return_bi; | 2491 | bi->bi_next = *return_bi; |
2464 | *return_bi = bi; | 2492 | *return_bi = bi; |
2465 | } | 2493 | } |
2466 | bi = nextbi; | 2494 | bi = nextbi; |
2467 | } | 2495 | } |
2496 | if (bitmap_end) | ||
2497 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | ||
2498 | STRIPE_SECTORS, 0, 0); | ||
2499 | bitmap_end = 0; | ||
2468 | /* and fail all 'written' */ | 2500 | /* and fail all 'written' */ |
2469 | bi = sh->dev[i].written; | 2501 | bi = sh->dev[i].written; |
2470 | sh->dev[i].written = NULL; | 2502 | sh->dev[i].written = NULL; |
@@ -2473,7 +2505,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2473 | sh->dev[i].sector + STRIPE_SECTORS) { | 2505 | sh->dev[i].sector + STRIPE_SECTORS) { |
2474 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | 2506 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); |
2475 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2507 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2476 | if (!raid5_dec_bi_phys_segments(bi)) { | 2508 | if (!raid5_dec_bi_active_stripes(bi)) { |
2477 | md_write_end(conf->mddev); | 2509 | md_write_end(conf->mddev); |
2478 | bi->bi_next = *return_bi; | 2510 | bi->bi_next = *return_bi; |
2479 | *return_bi = bi; | 2511 | *return_bi = bi; |
@@ -2497,14 +2529,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2497 | struct bio *nextbi = | 2529 | struct bio *nextbi = |
2498 | r5_next_bio(bi, sh->dev[i].sector); | 2530 | r5_next_bio(bi, sh->dev[i].sector); |
2499 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2531 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2500 | if (!raid5_dec_bi_phys_segments(bi)) { | 2532 | if (!raid5_dec_bi_active_stripes(bi)) { |
2501 | bi->bi_next = *return_bi; | 2533 | bi->bi_next = *return_bi; |
2502 | *return_bi = bi; | 2534 | *return_bi = bi; |
2503 | } | 2535 | } |
2504 | bi = nextbi; | 2536 | bi = nextbi; |
2505 | } | 2537 | } |
2506 | } | 2538 | } |
2507 | spin_unlock_irq(&conf->device_lock); | ||
2508 | if (bitmap_end) | 2539 | if (bitmap_end) |
2509 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | 2540 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, |
2510 | STRIPE_SECTORS, 0, 0); | 2541 | STRIPE_SECTORS, 0, 0); |
@@ -2708,30 +2739,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2708 | test_bit(R5_UPTODATE, &dev->flags)) { | 2739 | test_bit(R5_UPTODATE, &dev->flags)) { |
2709 | /* We can return any write requests */ | 2740 | /* We can return any write requests */ |
2710 | struct bio *wbi, *wbi2; | 2741 | struct bio *wbi, *wbi2; |
2711 | int bitmap_end = 0; | ||
2712 | pr_debug("Return write for disc %d\n", i); | 2742 | pr_debug("Return write for disc %d\n", i); |
2713 | spin_lock_irq(&conf->device_lock); | ||
2714 | wbi = dev->written; | 2743 | wbi = dev->written; |
2715 | dev->written = NULL; | 2744 | dev->written = NULL; |
2716 | while (wbi && wbi->bi_sector < | 2745 | while (wbi && wbi->bi_sector < |
2717 | dev->sector + STRIPE_SECTORS) { | 2746 | dev->sector + STRIPE_SECTORS) { |
2718 | wbi2 = r5_next_bio(wbi, dev->sector); | 2747 | wbi2 = r5_next_bio(wbi, dev->sector); |
2719 | if (!raid5_dec_bi_phys_segments(wbi)) { | 2748 | if (!raid5_dec_bi_active_stripes(wbi)) { |
2720 | md_write_end(conf->mddev); | 2749 | md_write_end(conf->mddev); |
2721 | wbi->bi_next = *return_bi; | 2750 | wbi->bi_next = *return_bi; |
2722 | *return_bi = wbi; | 2751 | *return_bi = wbi; |
2723 | } | 2752 | } |
2724 | wbi = wbi2; | 2753 | wbi = wbi2; |
2725 | } | 2754 | } |
2726 | if (dev->towrite == NULL) | 2755 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, |
2727 | bitmap_end = 1; | 2756 | STRIPE_SECTORS, |
2728 | spin_unlock_irq(&conf->device_lock); | ||
2729 | if (bitmap_end) | ||
2730 | bitmap_endwrite(conf->mddev->bitmap, | ||
2731 | sh->sector, | ||
2732 | STRIPE_SECTORS, | ||
2733 | !test_bit(STRIPE_DEGRADED, &sh->state), | 2757 | !test_bit(STRIPE_DEGRADED, &sh->state), |
2734 | 0); | 2758 | 0); |
2735 | } | 2759 | } |
2736 | } | 2760 | } |
2737 | 2761 | ||
@@ -3183,7 +3207,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3183 | 3207 | ||
3184 | /* Now to look around and see what can be done */ | 3208 | /* Now to look around and see what can be done */ |
3185 | rcu_read_lock(); | 3209 | rcu_read_lock(); |
3186 | spin_lock_irq(&conf->device_lock); | ||
3187 | for (i=disks; i--; ) { | 3210 | for (i=disks; i--; ) { |
3188 | struct md_rdev *rdev; | 3211 | struct md_rdev *rdev; |
3189 | sector_t first_bad; | 3212 | sector_t first_bad; |
@@ -3329,7 +3352,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3329 | do_recovery = 1; | 3352 | do_recovery = 1; |
3330 | } | 3353 | } |
3331 | } | 3354 | } |
3332 | spin_unlock_irq(&conf->device_lock); | ||
3333 | if (test_bit(STRIPE_SYNCING, &sh->state)) { | 3355 | if (test_bit(STRIPE_SYNCING, &sh->state)) { |
3334 | /* If there is a failed device being replaced, | 3356 | /* If there is a failed device being replaced, |
3335 | * we must be recovering. | 3357 | * we must be recovering. |
@@ -3792,7 +3814,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) | |||
3792 | * this sets the active strip count to 1 and the processed | 3814 | * this sets the active strip count to 1 and the processed |
3793 | * strip count to zero (upper 8 bits) | 3815 | * strip count to zero (upper 8 bits) |
3794 | */ | 3816 | */ |
3795 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | 3817 | raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */ |
3796 | } | 3818 | } |
3797 | 3819 | ||
3798 | return bi; | 3820 | return bi; |
@@ -4170,7 +4192,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4170 | finish_wait(&conf->wait_for_overlap, &w); | 4192 | finish_wait(&conf->wait_for_overlap, &w); |
4171 | set_bit(STRIPE_HANDLE, &sh->state); | 4193 | set_bit(STRIPE_HANDLE, &sh->state); |
4172 | clear_bit(STRIPE_DELAYED, &sh->state); | 4194 | clear_bit(STRIPE_DELAYED, &sh->state); |
4173 | if ((bi->bi_rw & REQ_SYNC) && | 4195 | if ((bi->bi_rw & REQ_NOIDLE) && |
4174 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4196 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4175 | atomic_inc(&conf->preread_active_stripes); | 4197 | atomic_inc(&conf->preread_active_stripes); |
4176 | release_stripe_plug(mddev, sh); | 4198 | release_stripe_plug(mddev, sh); |
@@ -4182,9 +4204,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4182 | } | 4204 | } |
4183 | } | 4205 | } |
4184 | 4206 | ||
4185 | spin_lock_irq(&conf->device_lock); | 4207 | remaining = raid5_dec_bi_active_stripes(bi); |
4186 | remaining = raid5_dec_bi_phys_segments(bi); | ||
4187 | spin_unlock_irq(&conf->device_lock); | ||
4188 | if (remaining == 0) { | 4208 | if (remaining == 0) { |
4189 | 4209 | ||
4190 | if ( rw == WRITE ) | 4210 | if ( rw == WRITE ) |
@@ -4540,7 +4560,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4540 | sector += STRIPE_SECTORS, | 4560 | sector += STRIPE_SECTORS, |
4541 | scnt++) { | 4561 | scnt++) { |
4542 | 4562 | ||
4543 | if (scnt < raid5_bi_hw_segments(raid_bio)) | 4563 | if (scnt < raid5_bi_processed_stripes(raid_bio)) |
4544 | /* already done this stripe */ | 4564 | /* already done this stripe */ |
4545 | continue; | 4565 | continue; |
4546 | 4566 | ||
@@ -4548,25 +4568,24 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4548 | 4568 | ||
4549 | if (!sh) { | 4569 | if (!sh) { |
4550 | /* failed to get a stripe - must wait */ | 4570 | /* failed to get a stripe - must wait */ |
4551 | raid5_set_bi_hw_segments(raid_bio, scnt); | 4571 | raid5_set_bi_processed_stripes(raid_bio, scnt); |
4552 | conf->retry_read_aligned = raid_bio; | 4572 | conf->retry_read_aligned = raid_bio; |
4553 | return handled; | 4573 | return handled; |
4554 | } | 4574 | } |
4555 | 4575 | ||
4556 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { | 4576 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { |
4557 | release_stripe(sh); | 4577 | release_stripe(sh); |
4558 | raid5_set_bi_hw_segments(raid_bio, scnt); | 4578 | raid5_set_bi_processed_stripes(raid_bio, scnt); |
4559 | conf->retry_read_aligned = raid_bio; | 4579 | conf->retry_read_aligned = raid_bio; |
4560 | return handled; | 4580 | return handled; |
4561 | } | 4581 | } |
4562 | 4582 | ||
4583 | set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); | ||
4563 | handle_stripe(sh); | 4584 | handle_stripe(sh); |
4564 | release_stripe(sh); | 4585 | release_stripe(sh); |
4565 | handled++; | 4586 | handled++; |
4566 | } | 4587 | } |
4567 | spin_lock_irq(&conf->device_lock); | 4588 | remaining = raid5_dec_bi_active_stripes(raid_bio); |
4568 | remaining = raid5_dec_bi_phys_segments(raid_bio); | ||
4569 | spin_unlock_irq(&conf->device_lock); | ||
4570 | if (remaining == 0) | 4589 | if (remaining == 0) |
4571 | bio_endio(raid_bio, 0); | 4590 | bio_endio(raid_bio, 0); |
4572 | if (atomic_dec_and_test(&conf->active_aligned_reads)) | 4591 | if (atomic_dec_and_test(&conf->active_aligned_reads)) |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 9a7b36f0a425..a9fc24901eda 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -210,6 +210,7 @@ struct stripe_head { | |||
210 | int disks; /* disks in stripe */ | 210 | int disks; /* disks in stripe */ |
211 | enum check_states check_state; | 211 | enum check_states check_state; |
212 | enum reconstruct_states reconstruct_state; | 212 | enum reconstruct_states reconstruct_state; |
213 | spinlock_t stripe_lock; | ||
213 | /** | 214 | /** |
214 | * struct stripe_operations | 215 | * struct stripe_operations |
215 | * @target - STRIPE_OP_COMPUTE_BLK target | 216 | * @target - STRIPE_OP_COMPUTE_BLK target |
@@ -273,6 +274,7 @@ enum r5dev_flags { | |||
273 | R5_Wantwrite, | 274 | R5_Wantwrite, |
274 | R5_Overlap, /* There is a pending overlapping request | 275 | R5_Overlap, /* There is a pending overlapping request |
275 | * on this block */ | 276 | * on this block */ |
277 | R5_ReadNoMerge, /* prevent bio from merging in block-layer */ | ||
276 | R5_ReadError, /* seen a read error here recently */ | 278 | R5_ReadError, /* seen a read error here recently */ |
277 | R5_ReWrite, /* have tried to over-write the readerror */ | 279 | R5_ReWrite, /* have tried to over-write the readerror */ |
278 | 280 | ||