diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/bcache.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/closure.h | 2 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 61 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 12 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 106 | ||||
-rw-r--r-- | drivers/md/dm-verity.c | 15 | ||||
-rw-r--r-- | drivers/md/dm.c | 3 | ||||
-rw-r--r-- | drivers/md/md.c | 20 | ||||
-rw-r--r-- | drivers/md/raid10.c | 13 | ||||
-rw-r--r-- | drivers/md/raid5.c | 163 | ||||
-rw-r--r-- | drivers/md/raid5.h | 4 |
15 files changed, 292 insertions, 130 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 82c9c5d35251..d2ebcf323094 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -828,7 +828,7 @@ static inline bool cached_dev_get(struct cached_dev *dc) | |||
828 | return false; | 828 | return false; |
829 | 829 | ||
830 | /* Paired with the mb in cached_dev_attach */ | 830 | /* Paired with the mb in cached_dev_attach */ |
831 | smp_mb__after_atomic_inc(); | 831 | smp_mb__after_atomic(); |
832 | return true; | 832 | return true; |
833 | } | 833 | } |
834 | 834 | ||
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 7ef7461912be..a08e3eeac3c5 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h | |||
@@ -243,7 +243,7 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn, | |||
243 | cl->fn = fn; | 243 | cl->fn = fn; |
244 | cl->wq = wq; | 244 | cl->wq = wq; |
245 | /* between atomic_dec() in closure_put() */ | 245 | /* between atomic_dec() in closure_put() */ |
246 | smp_mb__before_atomic_dec(); | 246 | smp_mb__before_atomic(); |
247 | } | 247 | } |
248 | 248 | ||
249 | static inline void closure_queue(struct closure *cl) | 249 | static inline void closure_queue(struct closure *cl) |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 9a8e66ae04f5..67f8b31e2054 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store, | |||
669 | /* | 669 | /* |
670 | * return a pointer to the page in the filemap that contains the given bit | 670 | * return a pointer to the page in the filemap that contains the given bit |
671 | * | 671 | * |
672 | * this lookup is complicated by the fact that the bitmap sb might be exactly | ||
673 | * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page | ||
674 | * 0 or page 1 | ||
675 | */ | 672 | */ |
676 | static inline struct page *filemap_get_page(struct bitmap_storage *store, | 673 | static inline struct page *filemap_get_page(struct bitmap_storage *store, |
677 | unsigned long chunk) | 674 | unsigned long chunk) |
678 | { | 675 | { |
679 | if (file_page_index(store, chunk) >= store->file_pages) | 676 | if (file_page_index(store, chunk) >= store->file_pages) |
680 | return NULL; | 677 | return NULL; |
681 | return store->filemap[file_page_index(store, chunk) | 678 | return store->filemap[file_page_index(store, chunk)]; |
682 | - file_page_index(store, 0)]; | ||
683 | } | 679 | } |
684 | 680 | ||
685 | static int bitmap_storage_alloc(struct bitmap_storage *store, | 681 | static int bitmap_storage_alloc(struct bitmap_storage *store, |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 66c5d130c8c2..4e84095833db 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -607,9 +607,9 @@ static void write_endio(struct bio *bio, int error) | |||
607 | 607 | ||
608 | BUG_ON(!test_bit(B_WRITING, &b->state)); | 608 | BUG_ON(!test_bit(B_WRITING, &b->state)); |
609 | 609 | ||
610 | smp_mb__before_clear_bit(); | 610 | smp_mb__before_atomic(); |
611 | clear_bit(B_WRITING, &b->state); | 611 | clear_bit(B_WRITING, &b->state); |
612 | smp_mb__after_clear_bit(); | 612 | smp_mb__after_atomic(); |
613 | 613 | ||
614 | wake_up_bit(&b->state, B_WRITING); | 614 | wake_up_bit(&b->state, B_WRITING); |
615 | } | 615 | } |
@@ -997,9 +997,9 @@ static void read_endio(struct bio *bio, int error) | |||
997 | 997 | ||
998 | BUG_ON(!test_bit(B_READING, &b->state)); | 998 | BUG_ON(!test_bit(B_READING, &b->state)); |
999 | 999 | ||
1000 | smp_mb__before_clear_bit(); | 1000 | smp_mb__before_atomic(); |
1001 | clear_bit(B_READING, &b->state); | 1001 | clear_bit(B_READING, &b->state); |
1002 | smp_mb__after_clear_bit(); | 1002 | smp_mb__after_atomic(); |
1003 | 1003 | ||
1004 | wake_up_bit(&b->state, B_READING); | 1004 | wake_up_bit(&b->state, B_READING); |
1005 | } | 1005 | } |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1bf4a71919ec..5f054c44b485 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -2178,6 +2178,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2178 | ti->num_discard_bios = 1; | 2178 | ti->num_discard_bios = 1; |
2179 | ti->discards_supported = true; | 2179 | ti->discards_supported = true; |
2180 | ti->discard_zeroes_data_unsupported = true; | 2180 | ti->discard_zeroes_data_unsupported = true; |
2181 | /* Discard bios must be split on a block boundary */ | ||
2182 | ti->split_discard_bios = true; | ||
2181 | 2183 | ||
2182 | cache->features = ca->features; | 2184 | cache->features = ca->features; |
2183 | ti->per_bio_data_size = get_per_bio_data_size(cache); | 2185 | ti->per_bio_data_size = get_per_bio_data_size(cache); |
@@ -2488,6 +2490,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2488 | 2490 | ||
2489 | } else { | 2491 | } else { |
2490 | inc_hit_counter(cache, bio); | 2492 | inc_hit_counter(cache, bio); |
2493 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
2491 | 2494 | ||
2492 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && | 2495 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && |
2493 | !is_dirty(cache, lookup_result.cblock)) | 2496 | !is_dirty(cache, lookup_result.cblock)) |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 784695d22fde..53b213226c01 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/crypto.h> | 19 | #include <linux/crypto.h> |
20 | #include <linux/workqueue.h> | 20 | #include <linux/workqueue.h> |
21 | #include <linux/backing-dev.h> | 21 | #include <linux/backing-dev.h> |
22 | #include <linux/percpu.h> | ||
23 | #include <linux/atomic.h> | 22 | #include <linux/atomic.h> |
24 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
25 | #include <asm/page.h> | 24 | #include <asm/page.h> |
@@ -43,6 +42,7 @@ struct convert_context { | |||
43 | struct bvec_iter iter_out; | 42 | struct bvec_iter iter_out; |
44 | sector_t cc_sector; | 43 | sector_t cc_sector; |
45 | atomic_t cc_pending; | 44 | atomic_t cc_pending; |
45 | struct ablkcipher_request *req; | ||
46 | }; | 46 | }; |
47 | 47 | ||
48 | /* | 48 | /* |
@@ -111,15 +111,7 @@ struct iv_tcw_private { | |||
111 | enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; | 111 | enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * Duplicated per-CPU state for cipher. | 114 | * The fields in here must be read only after initialization. |
115 | */ | ||
116 | struct crypt_cpu { | ||
117 | struct ablkcipher_request *req; | ||
118 | }; | ||
119 | |||
120 | /* | ||
121 | * The fields in here must be read only after initialization, | ||
122 | * changing state should be in crypt_cpu. | ||
123 | */ | 115 | */ |
124 | struct crypt_config { | 116 | struct crypt_config { |
125 | struct dm_dev *dev; | 117 | struct dm_dev *dev; |
@@ -150,12 +142,6 @@ struct crypt_config { | |||
150 | sector_t iv_offset; | 142 | sector_t iv_offset; |
151 | unsigned int iv_size; | 143 | unsigned int iv_size; |
152 | 144 | ||
153 | /* | ||
154 | * Duplicated per cpu state. Access through | ||
155 | * per_cpu_ptr() only. | ||
156 | */ | ||
157 | struct crypt_cpu __percpu *cpu; | ||
158 | |||
159 | /* ESSIV: struct crypto_cipher *essiv_tfm */ | 145 | /* ESSIV: struct crypto_cipher *essiv_tfm */ |
160 | void *iv_private; | 146 | void *iv_private; |
161 | struct crypto_ablkcipher **tfms; | 147 | struct crypto_ablkcipher **tfms; |
@@ -192,11 +178,6 @@ static void clone_init(struct dm_crypt_io *, struct bio *); | |||
192 | static void kcryptd_queue_crypt(struct dm_crypt_io *io); | 178 | static void kcryptd_queue_crypt(struct dm_crypt_io *io); |
193 | static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); | 179 | static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); |
194 | 180 | ||
195 | static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) | ||
196 | { | ||
197 | return this_cpu_ptr(cc->cpu); | ||
198 | } | ||
199 | |||
200 | /* | 181 | /* |
201 | * Use this to access cipher attributes that are the same for each CPU. | 182 | * Use this to access cipher attributes that are the same for each CPU. |
202 | */ | 183 | */ |
@@ -903,16 +884,15 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
903 | static void crypt_alloc_req(struct crypt_config *cc, | 884 | static void crypt_alloc_req(struct crypt_config *cc, |
904 | struct convert_context *ctx) | 885 | struct convert_context *ctx) |
905 | { | 886 | { |
906 | struct crypt_cpu *this_cc = this_crypt_config(cc); | ||
907 | unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); | 887 | unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); |
908 | 888 | ||
909 | if (!this_cc->req) | 889 | if (!ctx->req) |
910 | this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); | 890 | ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); |
911 | 891 | ||
912 | ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); | 892 | ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); |
913 | ablkcipher_request_set_callback(this_cc->req, | 893 | ablkcipher_request_set_callback(ctx->req, |
914 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | 894 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, |
915 | kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); | 895 | kcryptd_async_done, dmreq_of_req(cc, ctx->req)); |
916 | } | 896 | } |
917 | 897 | ||
918 | /* | 898 | /* |
@@ -921,7 +901,6 @@ static void crypt_alloc_req(struct crypt_config *cc, | |||
921 | static int crypt_convert(struct crypt_config *cc, | 901 | static int crypt_convert(struct crypt_config *cc, |
922 | struct convert_context *ctx) | 902 | struct convert_context *ctx) |
923 | { | 903 | { |
924 | struct crypt_cpu *this_cc = this_crypt_config(cc); | ||
925 | int r; | 904 | int r; |
926 | 905 | ||
927 | atomic_set(&ctx->cc_pending, 1); | 906 | atomic_set(&ctx->cc_pending, 1); |
@@ -932,7 +911,7 @@ static int crypt_convert(struct crypt_config *cc, | |||
932 | 911 | ||
933 | atomic_inc(&ctx->cc_pending); | 912 | atomic_inc(&ctx->cc_pending); |
934 | 913 | ||
935 | r = crypt_convert_block(cc, ctx, this_cc->req); | 914 | r = crypt_convert_block(cc, ctx, ctx->req); |
936 | 915 | ||
937 | switch (r) { | 916 | switch (r) { |
938 | /* async */ | 917 | /* async */ |
@@ -941,7 +920,7 @@ static int crypt_convert(struct crypt_config *cc, | |||
941 | reinit_completion(&ctx->restart); | 920 | reinit_completion(&ctx->restart); |
942 | /* fall through*/ | 921 | /* fall through*/ |
943 | case -EINPROGRESS: | 922 | case -EINPROGRESS: |
944 | this_cc->req = NULL; | 923 | ctx->req = NULL; |
945 | ctx->cc_sector++; | 924 | ctx->cc_sector++; |
946 | continue; | 925 | continue; |
947 | 926 | ||
@@ -1040,6 +1019,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, | |||
1040 | io->sector = sector; | 1019 | io->sector = sector; |
1041 | io->error = 0; | 1020 | io->error = 0; |
1042 | io->base_io = NULL; | 1021 | io->base_io = NULL; |
1022 | io->ctx.req = NULL; | ||
1043 | atomic_set(&io->io_pending, 0); | 1023 | atomic_set(&io->io_pending, 0); |
1044 | 1024 | ||
1045 | return io; | 1025 | return io; |
@@ -1065,6 +1045,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
1065 | if (!atomic_dec_and_test(&io->io_pending)) | 1045 | if (!atomic_dec_and_test(&io->io_pending)) |
1066 | return; | 1046 | return; |
1067 | 1047 | ||
1048 | if (io->ctx.req) | ||
1049 | mempool_free(io->ctx.req, cc->req_pool); | ||
1068 | mempool_free(io, cc->io_pool); | 1050 | mempool_free(io, cc->io_pool); |
1069 | 1051 | ||
1070 | if (likely(!base_io)) | 1052 | if (likely(!base_io)) |
@@ -1492,8 +1474,6 @@ static int crypt_wipe_key(struct crypt_config *cc) | |||
1492 | static void crypt_dtr(struct dm_target *ti) | 1474 | static void crypt_dtr(struct dm_target *ti) |
1493 | { | 1475 | { |
1494 | struct crypt_config *cc = ti->private; | 1476 | struct crypt_config *cc = ti->private; |
1495 | struct crypt_cpu *cpu_cc; | ||
1496 | int cpu; | ||
1497 | 1477 | ||
1498 | ti->private = NULL; | 1478 | ti->private = NULL; |
1499 | 1479 | ||
@@ -1505,13 +1485,6 @@ static void crypt_dtr(struct dm_target *ti) | |||
1505 | if (cc->crypt_queue) | 1485 | if (cc->crypt_queue) |
1506 | destroy_workqueue(cc->crypt_queue); | 1486 | destroy_workqueue(cc->crypt_queue); |
1507 | 1487 | ||
1508 | if (cc->cpu) | ||
1509 | for_each_possible_cpu(cpu) { | ||
1510 | cpu_cc = per_cpu_ptr(cc->cpu, cpu); | ||
1511 | if (cpu_cc->req) | ||
1512 | mempool_free(cpu_cc->req, cc->req_pool); | ||
1513 | } | ||
1514 | |||
1515 | crypt_free_tfms(cc); | 1488 | crypt_free_tfms(cc); |
1516 | 1489 | ||
1517 | if (cc->bs) | 1490 | if (cc->bs) |
@@ -1530,9 +1503,6 @@ static void crypt_dtr(struct dm_target *ti) | |||
1530 | if (cc->dev) | 1503 | if (cc->dev) |
1531 | dm_put_device(ti, cc->dev); | 1504 | dm_put_device(ti, cc->dev); |
1532 | 1505 | ||
1533 | if (cc->cpu) | ||
1534 | free_percpu(cc->cpu); | ||
1535 | |||
1536 | kzfree(cc->cipher); | 1506 | kzfree(cc->cipher); |
1537 | kzfree(cc->cipher_string); | 1507 | kzfree(cc->cipher_string); |
1538 | 1508 | ||
@@ -1588,13 +1558,6 @@ static int crypt_ctr_cipher(struct dm_target *ti, | |||
1588 | if (tmp) | 1558 | if (tmp) |
1589 | DMWARN("Ignoring unexpected additional cipher options"); | 1559 | DMWARN("Ignoring unexpected additional cipher options"); |
1590 | 1560 | ||
1591 | cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), | ||
1592 | __alignof__(struct crypt_cpu)); | ||
1593 | if (!cc->cpu) { | ||
1594 | ti->error = "Cannot allocate per cpu state"; | ||
1595 | goto bad_mem; | ||
1596 | } | ||
1597 | |||
1598 | /* | 1561 | /* |
1599 | * For compatibility with the original dm-crypt mapping format, if | 1562 | * For compatibility with the original dm-crypt mapping format, if |
1600 | * only the cipher name is supplied, use cbc-plain. | 1563 | * only the cipher name is supplied, use cbc-plain. |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index aa009e865871..ebfa411d1a7d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -445,11 +445,11 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, | |||
445 | else | 445 | else |
446 | m->saved_queue_if_no_path = queue_if_no_path; | 446 | m->saved_queue_if_no_path = queue_if_no_path; |
447 | m->queue_if_no_path = queue_if_no_path; | 447 | m->queue_if_no_path = queue_if_no_path; |
448 | if (!m->queue_if_no_path) | ||
449 | dm_table_run_md_queue_async(m->ti->table); | ||
450 | |||
451 | spin_unlock_irqrestore(&m->lock, flags); | 448 | spin_unlock_irqrestore(&m->lock, flags); |
452 | 449 | ||
450 | if (!queue_if_no_path) | ||
451 | dm_table_run_md_queue_async(m->ti->table); | ||
452 | |||
453 | return 0; | 453 | return 0; |
454 | } | 454 | } |
455 | 455 | ||
@@ -954,7 +954,7 @@ out: | |||
954 | */ | 954 | */ |
955 | static int reinstate_path(struct pgpath *pgpath) | 955 | static int reinstate_path(struct pgpath *pgpath) |
956 | { | 956 | { |
957 | int r = 0; | 957 | int r = 0, run_queue = 0; |
958 | unsigned long flags; | 958 | unsigned long flags; |
959 | struct multipath *m = pgpath->pg->m; | 959 | struct multipath *m = pgpath->pg->m; |
960 | 960 | ||
@@ -978,7 +978,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
978 | 978 | ||
979 | if (!m->nr_valid_paths++) { | 979 | if (!m->nr_valid_paths++) { |
980 | m->current_pgpath = NULL; | 980 | m->current_pgpath = NULL; |
981 | dm_table_run_md_queue_async(m->ti->table); | 981 | run_queue = 1; |
982 | } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { | 982 | } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { |
983 | if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) | 983 | if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) |
984 | m->pg_init_in_progress++; | 984 | m->pg_init_in_progress++; |
@@ -991,6 +991,8 @@ static int reinstate_path(struct pgpath *pgpath) | |||
991 | 991 | ||
992 | out: | 992 | out: |
993 | spin_unlock_irqrestore(&m->lock, flags); | 993 | spin_unlock_irqrestore(&m->lock, flags); |
994 | if (run_queue) | ||
995 | dm_table_run_md_queue_async(m->ti->table); | ||
994 | 996 | ||
995 | return r; | 997 | return r; |
996 | } | 998 | } |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ebddef5237e4..8e0caed0bf74 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -642,7 +642,7 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe) | |||
642 | struct dm_snapshot *s = pe->snap; | 642 | struct dm_snapshot *s = pe->snap; |
643 | 643 | ||
644 | mempool_free(pe, s->pending_pool); | 644 | mempool_free(pe, s->pending_pool); |
645 | smp_mb__before_atomic_dec(); | 645 | smp_mb__before_atomic(); |
646 | atomic_dec(&s->pending_exceptions_count); | 646 | atomic_dec(&s->pending_exceptions_count); |
647 | } | 647 | } |
648 | 648 | ||
@@ -783,7 +783,7 @@ static int init_hash_tables(struct dm_snapshot *s) | |||
783 | static void merge_shutdown(struct dm_snapshot *s) | 783 | static void merge_shutdown(struct dm_snapshot *s) |
784 | { | 784 | { |
785 | clear_bit_unlock(RUNNING_MERGE, &s->state_bits); | 785 | clear_bit_unlock(RUNNING_MERGE, &s->state_bits); |
786 | smp_mb__after_clear_bit(); | 786 | smp_mb__after_atomic(); |
787 | wake_up_bit(&s->state_bits, RUNNING_MERGE); | 787 | wake_up_bit(&s->state_bits, RUNNING_MERGE); |
788 | } | 788 | } |
789 | 789 | ||
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 53728be84dee..242ac2ea5f29 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -27,6 +27,9 @@ | |||
27 | #define MAPPING_POOL_SIZE 1024 | 27 | #define MAPPING_POOL_SIZE 1024 |
28 | #define PRISON_CELLS 1024 | 28 | #define PRISON_CELLS 1024 |
29 | #define COMMIT_PERIOD HZ | 29 | #define COMMIT_PERIOD HZ |
30 | #define NO_SPACE_TIMEOUT_SECS 60 | ||
31 | |||
32 | static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS; | ||
30 | 33 | ||
31 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, | 34 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, |
32 | "A percentage of time allocated for copy on write"); | 35 | "A percentage of time allocated for copy on write"); |
@@ -175,6 +178,7 @@ struct pool { | |||
175 | struct workqueue_struct *wq; | 178 | struct workqueue_struct *wq; |
176 | struct work_struct worker; | 179 | struct work_struct worker; |
177 | struct delayed_work waker; | 180 | struct delayed_work waker; |
181 | struct delayed_work no_space_timeout; | ||
178 | 182 | ||
179 | unsigned long last_commit_jiffies; | 183 | unsigned long last_commit_jiffies; |
180 | unsigned ref_count; | 184 | unsigned ref_count; |
@@ -232,6 +236,13 @@ struct thin_c { | |||
232 | struct bio_list deferred_bio_list; | 236 | struct bio_list deferred_bio_list; |
233 | struct bio_list retry_on_resume_list; | 237 | struct bio_list retry_on_resume_list; |
234 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ | 238 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ |
239 | |||
240 | /* | ||
241 | * Ensures the thin is not destroyed until the worker has finished | ||
242 | * iterating the active_thins list. | ||
243 | */ | ||
244 | atomic_t refcount; | ||
245 | struct completion can_destroy; | ||
235 | }; | 246 | }; |
236 | 247 | ||
237 | /*----------------------------------------------------------------*/ | 248 | /*----------------------------------------------------------------*/ |
@@ -928,7 +939,7 @@ static int commit(struct pool *pool) | |||
928 | { | 939 | { |
929 | int r; | 940 | int r; |
930 | 941 | ||
931 | if (get_pool_mode(pool) != PM_WRITE) | 942 | if (get_pool_mode(pool) >= PM_READ_ONLY) |
932 | return -EINVAL; | 943 | return -EINVAL; |
933 | 944 | ||
934 | r = dm_pool_commit_metadata(pool->pmd); | 945 | r = dm_pool_commit_metadata(pool->pmd); |
@@ -1486,6 +1497,45 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1486 | blk_finish_plug(&plug); | 1497 | blk_finish_plug(&plug); |
1487 | } | 1498 | } |
1488 | 1499 | ||
1500 | static void thin_get(struct thin_c *tc); | ||
1501 | static void thin_put(struct thin_c *tc); | ||
1502 | |||
1503 | /* | ||
1504 | * We can't hold rcu_read_lock() around code that can block. So we | ||
1505 | * find a thin with the rcu lock held; bump a refcount; then drop | ||
1506 | * the lock. | ||
1507 | */ | ||
1508 | static struct thin_c *get_first_thin(struct pool *pool) | ||
1509 | { | ||
1510 | struct thin_c *tc = NULL; | ||
1511 | |||
1512 | rcu_read_lock(); | ||
1513 | if (!list_empty(&pool->active_thins)) { | ||
1514 | tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list); | ||
1515 | thin_get(tc); | ||
1516 | } | ||
1517 | rcu_read_unlock(); | ||
1518 | |||
1519 | return tc; | ||
1520 | } | ||
1521 | |||
1522 | static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc) | ||
1523 | { | ||
1524 | struct thin_c *old_tc = tc; | ||
1525 | |||
1526 | rcu_read_lock(); | ||
1527 | list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) { | ||
1528 | thin_get(tc); | ||
1529 | thin_put(old_tc); | ||
1530 | rcu_read_unlock(); | ||
1531 | return tc; | ||
1532 | } | ||
1533 | thin_put(old_tc); | ||
1534 | rcu_read_unlock(); | ||
1535 | |||
1536 | return NULL; | ||
1537 | } | ||
1538 | |||
1489 | static void process_deferred_bios(struct pool *pool) | 1539 | static void process_deferred_bios(struct pool *pool) |
1490 | { | 1540 | { |
1491 | unsigned long flags; | 1541 | unsigned long flags; |
@@ -1493,10 +1543,11 @@ static void process_deferred_bios(struct pool *pool) | |||
1493 | struct bio_list bios; | 1543 | struct bio_list bios; |
1494 | struct thin_c *tc; | 1544 | struct thin_c *tc; |
1495 | 1545 | ||
1496 | rcu_read_lock(); | 1546 | tc = get_first_thin(pool); |
1497 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | 1547 | while (tc) { |
1498 | process_thin_deferred_bios(tc); | 1548 | process_thin_deferred_bios(tc); |
1499 | rcu_read_unlock(); | 1549 | tc = get_next_thin(pool, tc); |
1550 | } | ||
1500 | 1551 | ||
1501 | /* | 1552 | /* |
1502 | * If there are any deferred flush bios, we must commit | 1553 | * If there are any deferred flush bios, we must commit |
@@ -1543,6 +1594,20 @@ static void do_waker(struct work_struct *ws) | |||
1543 | queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); | 1594 | queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); |
1544 | } | 1595 | } |
1545 | 1596 | ||
1597 | /* | ||
1598 | * We're holding onto IO to allow userland time to react. After the | ||
1599 | * timeout either the pool will have been resized (and thus back in | ||
1600 | * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. | ||
1601 | */ | ||
1602 | static void do_no_space_timeout(struct work_struct *ws) | ||
1603 | { | ||
1604 | struct pool *pool = container_of(to_delayed_work(ws), struct pool, | ||
1605 | no_space_timeout); | ||
1606 | |||
1607 | if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) | ||
1608 | set_pool_mode(pool, PM_READ_ONLY); | ||
1609 | } | ||
1610 | |||
1546 | /*----------------------------------------------------------------*/ | 1611 | /*----------------------------------------------------------------*/ |
1547 | 1612 | ||
1548 | struct noflush_work { | 1613 | struct noflush_work { |
@@ -1578,7 +1643,7 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) | |||
1578 | { | 1643 | { |
1579 | struct noflush_work w; | 1644 | struct noflush_work w; |
1580 | 1645 | ||
1581 | INIT_WORK(&w.worker, fn); | 1646 | INIT_WORK_ONSTACK(&w.worker, fn); |
1582 | w.tc = tc; | 1647 | w.tc = tc; |
1583 | atomic_set(&w.complete, 0); | 1648 | atomic_set(&w.complete, 0); |
1584 | init_waitqueue_head(&w.wait); | 1649 | init_waitqueue_head(&w.wait); |
@@ -1607,6 +1672,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1607 | struct pool_c *pt = pool->ti->private; | 1672 | struct pool_c *pt = pool->ti->private; |
1608 | bool needs_check = dm_pool_metadata_needs_check(pool->pmd); | 1673 | bool needs_check = dm_pool_metadata_needs_check(pool->pmd); |
1609 | enum pool_mode old_mode = get_pool_mode(pool); | 1674 | enum pool_mode old_mode = get_pool_mode(pool); |
1675 | unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ; | ||
1610 | 1676 | ||
1611 | /* | 1677 | /* |
1612 | * Never allow the pool to transition to PM_WRITE mode if user | 1678 | * Never allow the pool to transition to PM_WRITE mode if user |
@@ -1668,6 +1734,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1668 | pool->process_discard = process_discard; | 1734 | pool->process_discard = process_discard; |
1669 | pool->process_prepared_mapping = process_prepared_mapping; | 1735 | pool->process_prepared_mapping = process_prepared_mapping; |
1670 | pool->process_prepared_discard = process_prepared_discard_passdown; | 1736 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1737 | |||
1738 | if (!pool->pf.error_if_no_space && no_space_timeout) | ||
1739 | queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); | ||
1671 | break; | 1740 | break; |
1672 | 1741 | ||
1673 | case PM_WRITE: | 1742 | case PM_WRITE: |
@@ -2053,6 +2122,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2053 | 2122 | ||
2054 | INIT_WORK(&pool->worker, do_worker); | 2123 | INIT_WORK(&pool->worker, do_worker); |
2055 | INIT_DELAYED_WORK(&pool->waker, do_waker); | 2124 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
2125 | INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); | ||
2056 | spin_lock_init(&pool->lock); | 2126 | spin_lock_init(&pool->lock); |
2057 | bio_list_init(&pool->deferred_flush_bios); | 2127 | bio_list_init(&pool->deferred_flush_bios); |
2058 | INIT_LIST_HEAD(&pool->prepared_mappings); | 2128 | INIT_LIST_HEAD(&pool->prepared_mappings); |
@@ -2615,6 +2685,7 @@ static void pool_postsuspend(struct dm_target *ti) | |||
2615 | struct pool *pool = pt->pool; | 2685 | struct pool *pool = pt->pool; |
2616 | 2686 | ||
2617 | cancel_delayed_work(&pool->waker); | 2687 | cancel_delayed_work(&pool->waker); |
2688 | cancel_delayed_work(&pool->no_space_timeout); | ||
2618 | flush_workqueue(pool->wq); | 2689 | flush_workqueue(pool->wq); |
2619 | (void) commit(pool); | 2690 | (void) commit(pool); |
2620 | } | 2691 | } |
@@ -3061,11 +3132,25 @@ static struct target_type pool_target = { | |||
3061 | /*---------------------------------------------------------------- | 3132 | /*---------------------------------------------------------------- |
3062 | * Thin target methods | 3133 | * Thin target methods |
3063 | *--------------------------------------------------------------*/ | 3134 | *--------------------------------------------------------------*/ |
3135 | static void thin_get(struct thin_c *tc) | ||
3136 | { | ||
3137 | atomic_inc(&tc->refcount); | ||
3138 | } | ||
3139 | |||
3140 | static void thin_put(struct thin_c *tc) | ||
3141 | { | ||
3142 | if (atomic_dec_and_test(&tc->refcount)) | ||
3143 | complete(&tc->can_destroy); | ||
3144 | } | ||
3145 | |||
3064 | static void thin_dtr(struct dm_target *ti) | 3146 | static void thin_dtr(struct dm_target *ti) |
3065 | { | 3147 | { |
3066 | struct thin_c *tc = ti->private; | 3148 | struct thin_c *tc = ti->private; |
3067 | unsigned long flags; | 3149 | unsigned long flags; |
3068 | 3150 | ||
3151 | thin_put(tc); | ||
3152 | wait_for_completion(&tc->can_destroy); | ||
3153 | |||
3069 | spin_lock_irqsave(&tc->pool->lock, flags); | 3154 | spin_lock_irqsave(&tc->pool->lock, flags); |
3070 | list_del_rcu(&tc->list); | 3155 | list_del_rcu(&tc->list); |
3071 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3156 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
@@ -3101,6 +3186,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3101 | struct thin_c *tc; | 3186 | struct thin_c *tc; |
3102 | struct dm_dev *pool_dev, *origin_dev; | 3187 | struct dm_dev *pool_dev, *origin_dev; |
3103 | struct mapped_device *pool_md; | 3188 | struct mapped_device *pool_md; |
3189 | unsigned long flags; | ||
3104 | 3190 | ||
3105 | mutex_lock(&dm_thin_pool_table.mutex); | 3191 | mutex_lock(&dm_thin_pool_table.mutex); |
3106 | 3192 | ||
@@ -3191,9 +3277,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3191 | 3277 | ||
3192 | mutex_unlock(&dm_thin_pool_table.mutex); | 3278 | mutex_unlock(&dm_thin_pool_table.mutex); |
3193 | 3279 | ||
3194 | spin_lock(&tc->pool->lock); | 3280 | atomic_set(&tc->refcount, 1); |
3281 | init_completion(&tc->can_destroy); | ||
3282 | |||
3283 | spin_lock_irqsave(&tc->pool->lock, flags); | ||
3195 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | 3284 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); |
3196 | spin_unlock(&tc->pool->lock); | 3285 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
3197 | /* | 3286 | /* |
3198 | * This synchronize_rcu() call is needed here otherwise we risk a | 3287 | * This synchronize_rcu() call is needed here otherwise we risk a |
3199 | * wake_worker() call finding no bios to process (because the newly | 3288 | * wake_worker() call finding no bios to process (because the newly |
@@ -3422,6 +3511,9 @@ static void dm_thin_exit(void) | |||
3422 | module_init(dm_thin_init); | 3511 | module_init(dm_thin_init); |
3423 | module_exit(dm_thin_exit); | 3512 | module_exit(dm_thin_exit); |
3424 | 3513 | ||
3514 | module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR); | ||
3515 | MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds"); | ||
3516 | |||
3425 | MODULE_DESCRIPTION(DM_NAME " thin provisioning target"); | 3517 | MODULE_DESCRIPTION(DM_NAME " thin provisioning target"); |
3426 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 3518 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); |
3427 | MODULE_LICENSE("GPL"); | 3519 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 796007a5e0e1..7a7bab8947ae 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c | |||
@@ -330,15 +330,17 @@ test_block_hash: | |||
330 | return r; | 330 | return r; |
331 | } | 331 | } |
332 | } | 332 | } |
333 | |||
334 | todo = 1 << v->data_dev_block_bits; | 333 | todo = 1 << v->data_dev_block_bits; |
335 | while (io->iter.bi_size) { | 334 | do { |
336 | u8 *page; | 335 | u8 *page; |
336 | unsigned len; | ||
337 | struct bio_vec bv = bio_iter_iovec(bio, io->iter); | 337 | struct bio_vec bv = bio_iter_iovec(bio, io->iter); |
338 | 338 | ||
339 | page = kmap_atomic(bv.bv_page); | 339 | page = kmap_atomic(bv.bv_page); |
340 | r = crypto_shash_update(desc, page + bv.bv_offset, | 340 | len = bv.bv_len; |
341 | bv.bv_len); | 341 | if (likely(len >= todo)) |
342 | len = todo; | ||
343 | r = crypto_shash_update(desc, page + bv.bv_offset, len); | ||
342 | kunmap_atomic(page); | 344 | kunmap_atomic(page); |
343 | 345 | ||
344 | if (r < 0) { | 346 | if (r < 0) { |
@@ -346,8 +348,9 @@ test_block_hash: | |||
346 | return r; | 348 | return r; |
347 | } | 349 | } |
348 | 350 | ||
349 | bio_advance_iter(bio, &io->iter, bv.bv_len); | 351 | bio_advance_iter(bio, &io->iter, len); |
350 | } | 352 | todo -= len; |
353 | } while (todo); | ||
351 | 354 | ||
352 | if (!v->version) { | 355 | if (!v->version) { |
353 | r = crypto_shash_update(desc, v->salt, v->salt_size); | 356 | r = crypto_shash_update(desc, v->salt, v->salt_size); |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 455e64916498..aa9e093343d4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1544,7 +1544,6 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1544 | clone->cmd = rq->cmd; | 1544 | clone->cmd = rq->cmd; |
1545 | clone->cmd_len = rq->cmd_len; | 1545 | clone->cmd_len = rq->cmd_len; |
1546 | clone->sense = rq->sense; | 1546 | clone->sense = rq->sense; |
1547 | clone->buffer = rq->buffer; | ||
1548 | clone->end_io = end_clone_request; | 1547 | clone->end_io = end_clone_request; |
1549 | clone->end_io_data = tio; | 1548 | clone->end_io_data = tio; |
1550 | 1549 | ||
@@ -2447,7 +2446,7 @@ static void dm_wq_work(struct work_struct *work) | |||
2447 | static void dm_queue_flush(struct mapped_device *md) | 2446 | static void dm_queue_flush(struct mapped_device *md) |
2448 | { | 2447 | { |
2449 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2448 | clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2450 | smp_mb__after_clear_bit(); | 2449 | smp_mb__after_atomic(); |
2451 | queue_work(md->wq, &md->work); | 2450 | queue_work(md->wq, &md->work); |
2452 | } | 2451 | } |
2453 | 2452 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 8fda38d23e38..34846856dbc6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3448 | mddev->level = LEVEL_NONE; | 3448 | mddev->level = LEVEL_NONE; |
3449 | return rv; | 3449 | return rv; |
3450 | } | 3450 | } |
3451 | if (mddev->ro) | ||
3452 | return -EROFS; | ||
3451 | 3453 | ||
3452 | /* request to change the personality. Need to ensure: | 3454 | /* request to change the personality. Need to ensure: |
3453 | * - array is not engaged in resync/recovery/reshape | 3455 | * - array is not engaged in resync/recovery/reshape |
@@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) | |||
3634 | int err; | 3636 | int err; |
3635 | if (mddev->pers->check_reshape == NULL) | 3637 | if (mddev->pers->check_reshape == NULL) |
3636 | return -EBUSY; | 3638 | return -EBUSY; |
3639 | if (mddev->ro) | ||
3640 | return -EROFS; | ||
3637 | mddev->new_layout = n; | 3641 | mddev->new_layout = n; |
3638 | err = mddev->pers->check_reshape(mddev); | 3642 | err = mddev->pers->check_reshape(mddev); |
3639 | if (err) { | 3643 | if (err) { |
@@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) | |||
3723 | int err; | 3727 | int err; |
3724 | if (mddev->pers->check_reshape == NULL) | 3728 | if (mddev->pers->check_reshape == NULL) |
3725 | return -EBUSY; | 3729 | return -EBUSY; |
3730 | if (mddev->ro) | ||
3731 | return -EROFS; | ||
3726 | mddev->new_chunk_sectors = n >> 9; | 3732 | mddev->new_chunk_sectors = n >> 9; |
3727 | err = mddev->pers->check_reshape(mddev); | 3733 | err = mddev->pers->check_reshape(mddev); |
3728 | if (err) { | 3734 | if (err) { |
@@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) | |||
6135 | */ | 6141 | */ |
6136 | if (mddev->sync_thread) | 6142 | if (mddev->sync_thread) |
6137 | return -EBUSY; | 6143 | return -EBUSY; |
6144 | if (mddev->ro) | ||
6145 | return -EROFS; | ||
6138 | 6146 | ||
6139 | rdev_for_each(rdev, mddev) { | 6147 | rdev_for_each(rdev, mddev) { |
6140 | sector_t avail = rdev->sectors; | 6148 | sector_t avail = rdev->sectors; |
@@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) | |||
6157 | /* change the number of raid disks */ | 6165 | /* change the number of raid disks */ |
6158 | if (mddev->pers->check_reshape == NULL) | 6166 | if (mddev->pers->check_reshape == NULL) |
6159 | return -EINVAL; | 6167 | return -EINVAL; |
6168 | if (mddev->ro) | ||
6169 | return -EROFS; | ||
6160 | if (raid_disks <= 0 || | 6170 | if (raid_disks <= 0 || |
6161 | (mddev->max_disks && raid_disks >= mddev->max_disks)) | 6171 | (mddev->max_disks && raid_disks >= mddev->max_disks)) |
6162 | return -EINVAL; | 6172 | return -EINVAL; |
@@ -7381,8 +7391,10 @@ void md_do_sync(struct md_thread *thread) | |||
7381 | /* just incase thread restarts... */ | 7391 | /* just incase thread restarts... */ |
7382 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) | 7392 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) |
7383 | return; | 7393 | return; |
7384 | if (mddev->ro) /* never try to sync a read-only array */ | 7394 | if (mddev->ro) {/* never try to sync a read-only array */ |
7395 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
7385 | return; | 7396 | return; |
7397 | } | ||
7386 | 7398 | ||
7387 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 7399 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
7388 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { | 7400 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { |
@@ -7824,6 +7836,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7824 | /* There is no thread, but we need to call | 7836 | /* There is no thread, but we need to call |
7825 | * ->spare_active and clear saved_raid_disk | 7837 | * ->spare_active and clear saved_raid_disk |
7826 | */ | 7838 | */ |
7839 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
7827 | md_reap_sync_thread(mddev); | 7840 | md_reap_sync_thread(mddev); |
7828 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 7841 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
7829 | goto unlock; | 7842 | goto unlock; |
@@ -8330,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) | |||
8330 | if (a < s) { | 8343 | if (a < s) { |
8331 | /* we need to split this range */ | 8344 | /* we need to split this range */ |
8332 | if (bb->count >= MD_MAX_BADBLOCKS) { | 8345 | if (bb->count >= MD_MAX_BADBLOCKS) { |
8333 | rv = 0; | 8346 | rv = -ENOSPC; |
8334 | goto out; | 8347 | goto out; |
8335 | } | 8348 | } |
8336 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); | 8349 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); |
@@ -8516,7 +8529,8 @@ static int md_notify_reboot(struct notifier_block *this, | |||
8516 | if (mddev_trylock(mddev)) { | 8529 | if (mddev_trylock(mddev)) { |
8517 | if (mddev->pers) | 8530 | if (mddev->pers) |
8518 | __md_stop_writes(mddev); | 8531 | __md_stop_writes(mddev); |
8519 | mddev->safemode = 2; | 8532 | if (mddev->persistent) |
8533 | mddev->safemode = 2; | ||
8520 | mddev_unlock(mddev); | 8534 | mddev_unlock(mddev); |
8521 | } | 8535 | } |
8522 | need_delay = 1; | 8536 | need_delay = 1; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 33fc408e5eac..cb882aae9e20 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1172,6 +1172,13 @@ static void __make_request(struct mddev *mddev, struct bio *bio) | |||
1172 | int max_sectors; | 1172 | int max_sectors; |
1173 | int sectors; | 1173 | int sectors; |
1174 | 1174 | ||
1175 | /* | ||
1176 | * Register the new request and wait if the reconstruction | ||
1177 | * thread has put up a bar for new requests. | ||
1178 | * Continue immediately if no resync is active currently. | ||
1179 | */ | ||
1180 | wait_barrier(conf); | ||
1181 | |||
1175 | sectors = bio_sectors(bio); | 1182 | sectors = bio_sectors(bio); |
1176 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | 1183 | while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
1177 | bio->bi_iter.bi_sector < conf->reshape_progress && | 1184 | bio->bi_iter.bi_sector < conf->reshape_progress && |
@@ -1552,12 +1559,6 @@ static void make_request(struct mddev *mddev, struct bio *bio) | |||
1552 | 1559 | ||
1553 | md_write_start(mddev, bio); | 1560 | md_write_start(mddev, bio); |
1554 | 1561 | ||
1555 | /* | ||
1556 | * Register the new request and wait if the reconstruction | ||
1557 | * thread has put up a bar for new requests. | ||
1558 | * Continue immediately if no resync is active currently. | ||
1559 | */ | ||
1560 | wait_barrier(conf); | ||
1561 | 1562 | ||
1562 | do { | 1563 | do { |
1563 | 1564 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 25247a852912..6234b2e84587 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
292 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
294 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 294 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
296 | list_add_tail(&sh->lru, &conf->delayed_list); | 296 | list_add_tail(&sh->lru, &conf->delayed_list); |
297 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 297 | if (atomic_read(&conf->preread_active_stripes) |
298 | < IO_THRESHOLD) | ||
299 | md_wakeup_thread(conf->mddev->thread); | ||
300 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | ||
298 | sh->bm_seq - conf->seq_write > 0) | 301 | sh->bm_seq - conf->seq_write > 0) |
299 | list_add_tail(&sh->lru, &conf->bitmap_list); | 302 | list_add_tail(&sh->lru, &conf->bitmap_list); |
300 | else { | 303 | else { |
@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh) | |||
413 | int hash; | 416 | int hash; |
414 | bool wakeup; | 417 | bool wakeup; |
415 | 418 | ||
419 | /* Avoid release_list until the last reference. | ||
420 | */ | ||
421 | if (atomic_add_unless(&sh->count, -1, 1)) | ||
422 | return; | ||
423 | |||
416 | if (unlikely(!conf->mddev->thread) || | 424 | if (unlikely(!conf->mddev->thread) || |
417 | test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) | 425 | test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) |
418 | goto slow_path; | 426 | goto slow_path; |
@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh) | |||
479 | int num = sh->raid_conf->pool_size; | 487 | int num = sh->raid_conf->pool_size; |
480 | 488 | ||
481 | for (i = 0; i < num ; i++) { | 489 | for (i = 0; i < num ; i++) { |
490 | WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); | ||
482 | p = sh->dev[i].page; | 491 | p = sh->dev[i].page; |
483 | if (!p) | 492 | if (!p) |
484 | continue; | 493 | continue; |
@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh) | |||
499 | return 1; | 508 | return 1; |
500 | } | 509 | } |
501 | sh->dev[i].page = page; | 510 | sh->dev[i].page = page; |
511 | sh->dev[i].orig_page = page; | ||
502 | } | 512 | } |
503 | return 0; | 513 | return 0; |
504 | } | 514 | } |
@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
855 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | 865 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
856 | bi->bi_rw |= REQ_NOMERGE; | 866 | bi->bi_rw |= REQ_NOMERGE; |
857 | 867 | ||
868 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
869 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
870 | sh->dev[i].vec.bv_page = sh->dev[i].page; | ||
858 | bi->bi_vcnt = 1; | 871 | bi->bi_vcnt = 1; |
859 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 872 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
860 | bi->bi_io_vec[0].bv_offset = 0; | 873 | bi->bi_io_vec[0].bv_offset = 0; |
@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
899 | else | 912 | else |
900 | rbi->bi_iter.bi_sector = (sh->sector | 913 | rbi->bi_iter.bi_sector = (sh->sector |
901 | + rrdev->data_offset); | 914 | + rrdev->data_offset); |
915 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
916 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
917 | sh->dev[i].rvec.bv_page = sh->dev[i].page; | ||
902 | rbi->bi_vcnt = 1; | 918 | rbi->bi_vcnt = 1; |
903 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 919 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
904 | rbi->bi_io_vec[0].bv_offset = 0; | 920 | rbi->bi_io_vec[0].bv_offset = 0; |
@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
927 | } | 943 | } |
928 | 944 | ||
929 | static struct dma_async_tx_descriptor * | 945 | static struct dma_async_tx_descriptor * |
930 | async_copy_data(int frombio, struct bio *bio, struct page *page, | 946 | async_copy_data(int frombio, struct bio *bio, struct page **page, |
931 | sector_t sector, struct dma_async_tx_descriptor *tx) | 947 | sector_t sector, struct dma_async_tx_descriptor *tx, |
948 | struct stripe_head *sh) | ||
932 | { | 949 | { |
933 | struct bio_vec bvl; | 950 | struct bio_vec bvl; |
934 | struct bvec_iter iter; | 951 | struct bvec_iter iter; |
@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
965 | if (clen > 0) { | 982 | if (clen > 0) { |
966 | b_offset += bvl.bv_offset; | 983 | b_offset += bvl.bv_offset; |
967 | bio_page = bvl.bv_page; | 984 | bio_page = bvl.bv_page; |
968 | if (frombio) | 985 | if (frombio) { |
969 | tx = async_memcpy(page, bio_page, page_offset, | 986 | if (sh->raid_conf->skip_copy && |
987 | b_offset == 0 && page_offset == 0 && | ||
988 | clen == STRIPE_SIZE) | ||
989 | *page = bio_page; | ||
990 | else | ||
991 | tx = async_memcpy(*page, bio_page, page_offset, | ||
970 | b_offset, clen, &submit); | 992 | b_offset, clen, &submit); |
971 | else | 993 | } else |
972 | tx = async_memcpy(bio_page, page, b_offset, | 994 | tx = async_memcpy(bio_page, *page, b_offset, |
973 | page_offset, clen, &submit); | 995 | page_offset, clen, &submit); |
974 | } | 996 | } |
975 | /* chain the operations */ | 997 | /* chain the operations */ |
@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
1045 | spin_unlock_irq(&sh->stripe_lock); | 1067 | spin_unlock_irq(&sh->stripe_lock); |
1046 | while (rbi && rbi->bi_iter.bi_sector < | 1068 | while (rbi && rbi->bi_iter.bi_sector < |
1047 | dev->sector + STRIPE_SECTORS) { | 1069 | dev->sector + STRIPE_SECTORS) { |
1048 | tx = async_copy_data(0, rbi, dev->page, | 1070 | tx = async_copy_data(0, rbi, &dev->page, |
1049 | dev->sector, tx); | 1071 | dev->sector, tx, sh); |
1050 | rbi = r5_next_bio(rbi, dev->sector); | 1072 | rbi = r5_next_bio(rbi, dev->sector); |
1051 | } | 1073 | } |
1052 | } | 1074 | } |
@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1384 | BUG_ON(dev->written); | 1406 | BUG_ON(dev->written); |
1385 | wbi = dev->written = chosen; | 1407 | wbi = dev->written = chosen; |
1386 | spin_unlock_irq(&sh->stripe_lock); | 1408 | spin_unlock_irq(&sh->stripe_lock); |
1409 | WARN_ON(dev->page != dev->orig_page); | ||
1387 | 1410 | ||
1388 | while (wbi && wbi->bi_iter.bi_sector < | 1411 | while (wbi && wbi->bi_iter.bi_sector < |
1389 | dev->sector + STRIPE_SECTORS) { | 1412 | dev->sector + STRIPE_SECTORS) { |
@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1393 | set_bit(R5_SyncIO, &dev->flags); | 1416 | set_bit(R5_SyncIO, &dev->flags); |
1394 | if (wbi->bi_rw & REQ_DISCARD) | 1417 | if (wbi->bi_rw & REQ_DISCARD) |
1395 | set_bit(R5_Discard, &dev->flags); | 1418 | set_bit(R5_Discard, &dev->flags); |
1396 | else | 1419 | else { |
1397 | tx = async_copy_data(1, wbi, dev->page, | 1420 | tx = async_copy_data(1, wbi, &dev->page, |
1398 | dev->sector, tx); | 1421 | dev->sector, tx, sh); |
1422 | if (dev->page != dev->orig_page) { | ||
1423 | set_bit(R5_SkipCopy, &dev->flags); | ||
1424 | clear_bit(R5_UPTODATE, &dev->flags); | ||
1425 | clear_bit(R5_OVERWRITE, &dev->flags); | ||
1426 | } | ||
1427 | } | ||
1399 | wbi = r5_next_bio(wbi, dev->sector); | 1428 | wbi = r5_next_bio(wbi, dev->sector); |
1400 | } | 1429 | } |
1401 | } | 1430 | } |
@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1426 | struct r5dev *dev = &sh->dev[i]; | 1455 | struct r5dev *dev = &sh->dev[i]; |
1427 | 1456 | ||
1428 | if (dev->written || i == pd_idx || i == qd_idx) { | 1457 | if (dev->written || i == pd_idx || i == qd_idx) { |
1429 | if (!discard) | 1458 | if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) |
1430 | set_bit(R5_UPTODATE, &dev->flags); | 1459 | set_bit(R5_UPTODATE, &dev->flags); |
1431 | if (fua) | 1460 | if (fua) |
1432 | set_bit(R5_WantFUA, &dev->flags); | 1461 | set_bit(R5_WantFUA, &dev->flags); |
@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1839 | osh = get_free_stripe(conf, hash); | 1868 | osh = get_free_stripe(conf, hash); |
1840 | unlock_device_hash_lock(conf, hash); | 1869 | unlock_device_hash_lock(conf, hash); |
1841 | atomic_set(&nsh->count, 1); | 1870 | atomic_set(&nsh->count, 1); |
1842 | for(i=0; i<conf->pool_size; i++) | 1871 | for(i=0; i<conf->pool_size; i++) { |
1843 | nsh->dev[i].page = osh->dev[i].page; | 1872 | nsh->dev[i].page = osh->dev[i].page; |
1873 | nsh->dev[i].orig_page = osh->dev[i].page; | ||
1874 | } | ||
1844 | for( ; i<newsize; i++) | 1875 | for( ; i<newsize; i++) |
1845 | nsh->dev[i].page = NULL; | 1876 | nsh->dev[i].page = NULL; |
1846 | nsh->hash_lock_index = hash; | 1877 | nsh->hash_lock_index = hash; |
@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1896 | if (nsh->dev[i].page == NULL) { | 1927 | if (nsh->dev[i].page == NULL) { |
1897 | struct page *p = alloc_page(GFP_NOIO); | 1928 | struct page *p = alloc_page(GFP_NOIO); |
1898 | nsh->dev[i].page = p; | 1929 | nsh->dev[i].page = p; |
1930 | nsh->dev[i].orig_page = p; | ||
1899 | if (!p) | 1931 | if (!p) |
1900 | err = -ENOMEM; | 1932 | err = -ENOMEM; |
1901 | } | 1933 | } |
@@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
2133 | } | 2165 | } |
2134 | 2166 | ||
2135 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); | 2167 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); |
2136 | 2168 | ||
2137 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) | 2169 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) |
2138 | { | 2170 | { |
2139 | struct r5dev *dev = &sh->dev[i]; | 2171 | struct r5dev *dev = &sh->dev[i]; |
2140 | 2172 | ||
2141 | bio_init(&dev->req); | 2173 | bio_init(&dev->req); |
2142 | dev->req.bi_io_vec = &dev->vec; | 2174 | dev->req.bi_io_vec = &dev->vec; |
2143 | dev->req.bi_vcnt++; | 2175 | dev->req.bi_max_vecs = 1; |
2144 | dev->req.bi_max_vecs++; | ||
2145 | dev->req.bi_private = sh; | 2176 | dev->req.bi_private = sh; |
2146 | dev->vec.bv_page = dev->page; | ||
2147 | 2177 | ||
2148 | bio_init(&dev->rreq); | 2178 | bio_init(&dev->rreq); |
2149 | dev->rreq.bi_io_vec = &dev->rvec; | 2179 | dev->rreq.bi_io_vec = &dev->rvec; |
2150 | dev->rreq.bi_vcnt++; | 2180 | dev->rreq.bi_max_vecs = 1; |
2151 | dev->rreq.bi_max_vecs++; | ||
2152 | dev->rreq.bi_private = sh; | 2181 | dev->rreq.bi_private = sh; |
2153 | dev->rvec.bv_page = dev->page; | ||
2154 | 2182 | ||
2155 | dev->flags = 0; | 2183 | dev->flags = 0; |
2156 | dev->sector = compute_blocknr(sh, i, previous); | 2184 | dev->sector = compute_blocknr(sh, i, previous); |
@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2750 | /* and fail all 'written' */ | 2778 | /* and fail all 'written' */ |
2751 | bi = sh->dev[i].written; | 2779 | bi = sh->dev[i].written; |
2752 | sh->dev[i].written = NULL; | 2780 | sh->dev[i].written = NULL; |
2781 | if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { | ||
2782 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
2783 | sh->dev[i].page = sh->dev[i].orig_page; | ||
2784 | } | ||
2785 | |||
2753 | if (bi) bitmap_end = 1; | 2786 | if (bi) bitmap_end = 1; |
2754 | while (bi && bi->bi_iter.bi_sector < | 2787 | while (bi && bi->bi_iter.bi_sector < |
2755 | sh->dev[i].sector + STRIPE_SECTORS) { | 2788 | sh->dev[i].sector + STRIPE_SECTORS) { |
@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, | |||
2886 | (s->failed >= 1 && fdev[0]->toread) || | 2919 | (s->failed >= 1 && fdev[0]->toread) || |
2887 | (s->failed >= 2 && fdev[1]->toread) || | 2920 | (s->failed >= 2 && fdev[1]->toread) || |
2888 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && | 2921 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && |
2922 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && | ||
2889 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || | 2923 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || |
2890 | (sh->raid_conf->level == 6 && s->failed && s->to_write))) { | 2924 | (sh->raid_conf->level == 6 && s->failed && s->to_write && |
2925 | s->to_write < sh->raid_conf->raid_disks - 2 && | ||
2926 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { | ||
2891 | /* we would like to get this block, possibly by computing it, | 2927 | /* we would like to get this block, possibly by computing it, |
2892 | * otherwise read it if the backing disk is insync | 2928 | * otherwise read it if the backing disk is insync |
2893 | */ | 2929 | */ |
@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2991 | dev = &sh->dev[i]; | 3027 | dev = &sh->dev[i]; |
2992 | if (!test_bit(R5_LOCKED, &dev->flags) && | 3028 | if (!test_bit(R5_LOCKED, &dev->flags) && |
2993 | (test_bit(R5_UPTODATE, &dev->flags) || | 3029 | (test_bit(R5_UPTODATE, &dev->flags) || |
2994 | test_bit(R5_Discard, &dev->flags))) { | 3030 | test_bit(R5_Discard, &dev->flags) || |
3031 | test_bit(R5_SkipCopy, &dev->flags))) { | ||
2995 | /* We can return any write requests */ | 3032 | /* We can return any write requests */ |
2996 | struct bio *wbi, *wbi2; | 3033 | struct bio *wbi, *wbi2; |
2997 | pr_debug("Return write for disc %d\n", i); | 3034 | pr_debug("Return write for disc %d\n", i); |
2998 | if (test_and_clear_bit(R5_Discard, &dev->flags)) | 3035 | if (test_and_clear_bit(R5_Discard, &dev->flags)) |
2999 | clear_bit(R5_UPTODATE, &dev->flags); | 3036 | clear_bit(R5_UPTODATE, &dev->flags); |
3037 | if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { | ||
3038 | WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); | ||
3039 | dev->page = dev->orig_page; | ||
3040 | } | ||
3000 | wbi = dev->written; | 3041 | wbi = dev->written; |
3001 | dev->written = NULL; | 3042 | dev->written = NULL; |
3002 | while (wbi && wbi->bi_iter.bi_sector < | 3043 | while (wbi && wbi->bi_iter.bi_sector < |
@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
3015 | 0); | 3056 | 0); |
3016 | } else if (test_bit(R5_Discard, &dev->flags)) | 3057 | } else if (test_bit(R5_Discard, &dev->flags)) |
3017 | discard_pending = 1; | 3058 | discard_pending = 1; |
3059 | WARN_ON(test_bit(R5_SkipCopy, &dev->flags)); | ||
3060 | WARN_ON(dev->page != dev->orig_page); | ||
3018 | } | 3061 | } |
3019 | if (!discard_pending && | 3062 | if (!discard_pending && |
3020 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { | 3063 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { |
@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3086 | !test_bit(R5_LOCKED, &dev->flags) && | 3129 | !test_bit(R5_LOCKED, &dev->flags) && |
3087 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3130 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3088 | test_bit(R5_Wantcompute, &dev->flags))) { | 3131 | test_bit(R5_Wantcompute, &dev->flags))) { |
3089 | if (test_bit(R5_Insync, &dev->flags)) rcw++; | 3132 | if (test_bit(R5_Insync, &dev->flags)) |
3133 | rcw++; | ||
3090 | else | 3134 | else |
3091 | rcw += 2*disks; | 3135 | rcw += 2*disks; |
3092 | } | 3136 | } |
@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3107 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3151 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3108 | test_bit(R5_Wantcompute, &dev->flags)) && | 3152 | test_bit(R5_Wantcompute, &dev->flags)) && |
3109 | test_bit(R5_Insync, &dev->flags)) { | 3153 | test_bit(R5_Insync, &dev->flags)) { |
3110 | if ( | 3154 | if (test_bit(STRIPE_PREREAD_ACTIVE, |
3111 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3155 | &sh->state)) { |
3112 | pr_debug("Read_old block " | 3156 | pr_debug("Read_old block %d for r-m-w\n", |
3113 | "%d for r-m-w\n", i); | 3157 | i); |
3114 | set_bit(R5_LOCKED, &dev->flags); | 3158 | set_bit(R5_LOCKED, &dev->flags); |
3115 | set_bit(R5_Wantread, &dev->flags); | 3159 | set_bit(R5_Wantread, &dev->flags); |
3116 | s->locked++; | 3160 | s->locked++; |
@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3133 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3177 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3134 | test_bit(R5_Wantcompute, &dev->flags))) { | 3178 | test_bit(R5_Wantcompute, &dev->flags))) { |
3135 | rcw++; | 3179 | rcw++; |
3136 | if (!test_bit(R5_Insync, &dev->flags)) | 3180 | if (test_bit(R5_Insync, &dev->flags) && |
3137 | continue; /* it's a failed drive */ | 3181 | test_bit(STRIPE_PREREAD_ACTIVE, |
3138 | if ( | 3182 | &sh->state)) { |
3139 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
3140 | pr_debug("Read_old block " | 3183 | pr_debug("Read_old block " |
3141 | "%d for Reconstruct\n", i); | 3184 | "%d for Reconstruct\n", i); |
3142 | set_bit(R5_LOCKED, &dev->flags); | 3185 | set_bit(R5_LOCKED, &dev->flags); |
@@ -4370,8 +4413,7 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) | |||
4370 | sh->group = NULL; | 4413 | sh->group = NULL; |
4371 | } | 4414 | } |
4372 | list_del_init(&sh->lru); | 4415 | list_del_init(&sh->lru); |
4373 | atomic_inc(&sh->count); | 4416 | BUG_ON(atomic_inc_return(&sh->count) != 1); |
4374 | BUG_ON(atomic_read(&sh->count) != 1); | ||
4375 | return sh; | 4417 | return sh; |
4376 | } | 4418 | } |
4377 | 4419 | ||
@@ -4401,7 +4443,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | |||
4401 | * STRIPE_ON_UNPLUG_LIST clear but the stripe | 4443 | * STRIPE_ON_UNPLUG_LIST clear but the stripe |
4402 | * is still in our list | 4444 | * is still in our list |
4403 | */ | 4445 | */ |
4404 | smp_mb__before_clear_bit(); | 4446 | smp_mb__before_atomic(); |
4405 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); | 4447 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); |
4406 | /* | 4448 | /* |
4407 | * STRIPE_ON_RELEASE_LIST could be set here. In that | 4449 | * STRIPE_ON_RELEASE_LIST could be set here. In that |
@@ -5032,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int | |||
5032 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); | 5074 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); |
5033 | 5075 | ||
5034 | set_bit(STRIPE_SYNC_REQUESTED, &sh->state); | 5076 | set_bit(STRIPE_SYNC_REQUESTED, &sh->state); |
5077 | set_bit(STRIPE_HANDLE, &sh->state); | ||
5035 | 5078 | ||
5036 | handle_stripe(sh); | ||
5037 | release_stripe(sh); | 5079 | release_stripe(sh); |
5038 | 5080 | ||
5039 | return STRIPE_SECTORS; | 5081 | return STRIPE_SECTORS; |
@@ -5073,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
5073 | /* already done this stripe */ | 5115 | /* already done this stripe */ |
5074 | continue; | 5116 | continue; |
5075 | 5117 | ||
5076 | sh = get_active_stripe(conf, sector, 0, 1, 0); | 5118 | sh = get_active_stripe(conf, sector, 0, 1, 1); |
5077 | 5119 | ||
5078 | if (!sh) { | 5120 | if (!sh) { |
5079 | /* failed to get a stripe - must wait */ | 5121 | /* failed to get a stripe - must wait */ |
@@ -5356,6 +5398,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | |||
5356 | raid5_store_preread_threshold); | 5398 | raid5_store_preread_threshold); |
5357 | 5399 | ||
5358 | static ssize_t | 5400 | static ssize_t |
5401 | raid5_show_skip_copy(struct mddev *mddev, char *page) | ||
5402 | { | ||
5403 | struct r5conf *conf = mddev->private; | ||
5404 | if (conf) | ||
5405 | return sprintf(page, "%d\n", conf->skip_copy); | ||
5406 | else | ||
5407 | return 0; | ||
5408 | } | ||
5409 | |||
5410 | static ssize_t | ||
5411 | raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) | ||
5412 | { | ||
5413 | struct r5conf *conf = mddev->private; | ||
5414 | unsigned long new; | ||
5415 | if (len >= PAGE_SIZE) | ||
5416 | return -EINVAL; | ||
5417 | if (!conf) | ||
5418 | return -ENODEV; | ||
5419 | |||
5420 | if (kstrtoul(page, 10, &new)) | ||
5421 | return -EINVAL; | ||
5422 | new = !!new; | ||
5423 | if (new == conf->skip_copy) | ||
5424 | return len; | ||
5425 | |||
5426 | mddev_suspend(mddev); | ||
5427 | conf->skip_copy = new; | ||
5428 | if (new) | ||
5429 | mddev->queue->backing_dev_info.capabilities |= | ||
5430 | BDI_CAP_STABLE_WRITES; | ||
5431 | else | ||
5432 | mddev->queue->backing_dev_info.capabilities &= | ||
5433 | ~BDI_CAP_STABLE_WRITES; | ||
5434 | mddev_resume(mddev); | ||
5435 | return len; | ||
5436 | } | ||
5437 | |||
5438 | static struct md_sysfs_entry | ||
5439 | raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR, | ||
5440 | raid5_show_skip_copy, | ||
5441 | raid5_store_skip_copy); | ||
5442 | |||
5443 | |||
5444 | static ssize_t | ||
5359 | stripe_cache_active_show(struct mddev *mddev, char *page) | 5445 | stripe_cache_active_show(struct mddev *mddev, char *page) |
5360 | { | 5446 | { |
5361 | struct r5conf *conf = mddev->private; | 5447 | struct r5conf *conf = mddev->private; |
@@ -5440,6 +5526,7 @@ static struct attribute *raid5_attrs[] = { | |||
5440 | &raid5_stripecache_active.attr, | 5526 | &raid5_stripecache_active.attr, |
5441 | &raid5_preread_bypass_threshold.attr, | 5527 | &raid5_preread_bypass_threshold.attr, |
5442 | &raid5_group_thread_cnt.attr, | 5528 | &raid5_group_thread_cnt.attr, |
5529 | &raid5_skip_copy.attr, | ||
5443 | NULL, | 5530 | NULL, |
5444 | }; | 5531 | }; |
5445 | static struct attribute_group raid5_attrs_group = { | 5532 | static struct attribute_group raid5_attrs_group = { |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 01ad8ae8f578..bc72cd4be5f8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -232,7 +232,7 @@ struct stripe_head { | |||
232 | */ | 232 | */ |
233 | struct bio req, rreq; | 233 | struct bio req, rreq; |
234 | struct bio_vec vec, rvec; | 234 | struct bio_vec vec, rvec; |
235 | struct page *page; | 235 | struct page *page, *orig_page; |
236 | struct bio *toread, *read, *towrite, *written; | 236 | struct bio *toread, *read, *towrite, *written; |
237 | sector_t sector; /* sector of this page */ | 237 | sector_t sector; /* sector of this page */ |
238 | unsigned long flags; | 238 | unsigned long flags; |
@@ -299,6 +299,7 @@ enum r5dev_flags { | |||
299 | * data in, and now is a good time to write it out. | 299 | * data in, and now is a good time to write it out. |
300 | */ | 300 | */ |
301 | R5_Discard, /* Discard the stripe */ | 301 | R5_Discard, /* Discard the stripe */ |
302 | R5_SkipCopy, /* Don't copy data from bio to stripe cache */ | ||
302 | }; | 303 | }; |
303 | 304 | ||
304 | /* | 305 | /* |
@@ -436,6 +437,7 @@ struct r5conf { | |||
436 | atomic_t pending_full_writes; /* full write backlog */ | 437 | atomic_t pending_full_writes; /* full write backlog */ |
437 | int bypass_count; /* bypassed prereads */ | 438 | int bypass_count; /* bypassed prereads */ |
438 | int bypass_threshold; /* preread nice */ | 439 | int bypass_threshold; /* preread nice */ |
440 | int skip_copy; /* Don't copy data from bio to stripe cache */ | ||
439 | struct list_head *last_hold; /* detect hold_list promotions */ | 441 | struct list_head *last_hold; /* detect hold_list promotions */ |
440 | 442 | ||
441 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ | 443 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ |