aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/bset.c39
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/journal.c33
-rw-r--r--drivers/md/bcache/request.c15
-rw-r--r--drivers/md/bcache/sysfs.c9
-rw-r--r--drivers/md/bcache/util.c11
-rw-r--r--drivers/md/bcache/util.h12
-rw-r--r--drivers/md/bcache/writeback.c42
-rw-r--r--drivers/md/dm-io.c7
-rw-r--r--drivers/md/dm-mpath.c18
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-snap.c5
-rw-r--r--drivers/md/dm-stats.c23
-rw-r--r--drivers/md/dm-thin.c14
-rw-r--r--drivers/md/dm.c71
-rw-r--r--drivers/md/dm.h3
17 files changed, 226 insertions, 89 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b39f6f0b45f2..0f12382aa35d 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
498 */ 498 */
499 atomic_t has_dirty; 499 atomic_t has_dirty;
500 500
501 struct ratelimit writeback_rate; 501 struct bch_ratelimit writeback_rate;
502 struct delayed_work writeback_rate_update; 502 struct delayed_work writeback_rate_update;
503 503
504 /* 504 /*
@@ -507,10 +507,9 @@ struct cached_dev {
507 */ 507 */
508 sector_t last_read; 508 sector_t last_read;
509 509
510 /* Number of writeback bios in flight */ 510 /* Limit number of writeback bios in flight */
511 atomic_t in_flight; 511 struct semaphore in_flight;
512 struct closure_with_timer writeback; 512 struct closure_with_timer writeback;
513 struct closure_waitlist writeback_wait;
514 513
515 struct keybuf writeback_keys; 514 struct keybuf writeback_keys;
516 515
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8010eed06a51..22d1ae72c282 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
926 926
927/* Mergesort */ 927/* Mergesort */
928 928
929static void sort_key_next(struct btree_iter *iter,
930 struct btree_iter_set *i)
931{
932 i->k = bkey_next(i->k);
933
934 if (i->k == i->end)
935 *i = iter->data[--iter->used];
936}
937
929static void btree_sort_fixup(struct btree_iter *iter) 938static void btree_sort_fixup(struct btree_iter *iter)
930{ 939{
931 while (iter->used > 1) { 940 while (iter->used > 1) {
932 struct btree_iter_set *top = iter->data, *i = top + 1; 941 struct btree_iter_set *top = iter->data, *i = top + 1;
933 struct bkey *k;
934 942
935 if (iter->used > 2 && 943 if (iter->used > 2 &&
936 btree_iter_cmp(i[0], i[1])) 944 btree_iter_cmp(i[0], i[1]))
937 i++; 945 i++;
938 946
939 for (k = i->k; 947 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
940 k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
941 k = bkey_next(k))
942 if (top->k > i->k)
943 __bch_cut_front(top->k, k);
944 else if (KEY_SIZE(k))
945 bch_cut_back(&START_KEY(k), top->k);
946
947 if (top->k < i->k || k == i->k)
948 break; 948 break;
949 949
950 heap_sift(iter, i - top, btree_iter_cmp); 950 if (!KEY_SIZE(i->k)) {
951 sort_key_next(iter, i);
952 heap_sift(iter, i - top, btree_iter_cmp);
953 continue;
954 }
955
956 if (top->k > i->k) {
957 if (bkey_cmp(top->k, i->k) >= 0)
958 sort_key_next(iter, i);
959 else
960 bch_cut_front(top->k, i->k);
961
962 heap_sift(iter, i - top, btree_iter_cmp);
963 } else {
964 /* can't happen because of comparison func */
965 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
966 bch_cut_back(&START_KEY(i->k), top->k);
967 }
951 } 968 }
952} 969}
953 970
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index f9764e61978b..f42fc7ed9cd6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
255 255
256 return; 256 return;
257err: 257err:
258 bch_cache_set_error(b->c, "io error reading bucket %lu", 258 bch_cache_set_error(b->c, "io error reading bucket %zu",
259 PTR_BUCKET_NR(b->c, &b->key, 0)); 259 PTR_BUCKET_NR(b->c, &b->key, 0));
260} 260}
261 261
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
612 return SHRINK_STOP; 612 return SHRINK_STOP;
613 613
614 /* Return -1 if we can't do anything right now */ 614 /* Return -1 if we can't do anything right now */
615 if (sc->gfp_mask & __GFP_WAIT) 615 if (sc->gfp_mask & __GFP_IO)
616 mutex_lock(&c->bucket_lock); 616 mutex_lock(&c->bucket_lock);
617 else if (!mutex_trylock(&c->bucket_lock)) 617 else if (!mutex_trylock(&c->bucket_lock))
618 return -1; 618 return -1;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ba95ab84b2be..8435f81e5d85 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
153 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); 153 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
154 pr_debug("%u journal buckets", ca->sb.njournal_buckets); 154 pr_debug("%u journal buckets", ca->sb.njournal_buckets);
155 155
156 /* Read journal buckets ordered by golden ratio hash to quickly 156 /*
157 * Read journal buckets ordered by golden ratio hash to quickly
157 * find a sequence of buckets with valid journal entries 158 * find a sequence of buckets with valid journal entries
158 */ 159 */
159 for (i = 0; i < ca->sb.njournal_buckets; i++) { 160 for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
166 goto bsearch; 167 goto bsearch;
167 } 168 }
168 169
169 /* If that fails, check all the buckets we haven't checked 170 /*
171 * If that fails, check all the buckets we haven't checked
170 * already 172 * already
171 */ 173 */
172 pr_debug("falling back to linear search"); 174 pr_debug("falling back to linear search");
173 175
174 for (l = 0; l < ca->sb.njournal_buckets; l++) { 176 for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
175 if (test_bit(l, bitmap)) 177 l < ca->sb.njournal_buckets;
176 continue; 178 l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
177
178 if (read_bucket(l)) 179 if (read_bucket(l))
179 goto bsearch; 180 goto bsearch;
180 } 181
182 if (list_empty(list))
183 continue;
181bsearch: 184bsearch:
182 /* Binary search */ 185 /* Binary search */
183 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); 186 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ bsearch:
197 r = m; 200 r = m;
198 } 201 }
199 202
200 /* Read buckets in reverse order until we stop finding more 203 /*
204 * Read buckets in reverse order until we stop finding more
201 * journal entries 205 * journal entries
202 */ 206 */
203 pr_debug("finishing up"); 207 pr_debug("finishing up: m %u njournal_buckets %u",
208 m, ca->sb.njournal_buckets);
204 l = m; 209 l = m;
205 210
206 while (1) { 211 while (1) {
@@ -228,9 +233,10 @@ bsearch:
228 } 233 }
229 } 234 }
230 235
231 c->journal.seq = list_entry(list->prev, 236 if (!list_empty(list))
232 struct journal_replay, 237 c->journal.seq = list_entry(list->prev,
233 list)->j.seq; 238 struct journal_replay,
239 list)->j.seq;
234 240
235 return 0; 241 return 0;
236#undef read_bucket 242#undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
428 return; 434 return;
429 } 435 }
430 436
431 switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { 437 switch (atomic_read(&ja->discard_in_flight)) {
432 case DISCARD_IN_FLIGHT: 438 case DISCARD_IN_FLIGHT:
433 return; 439 return;
434 440
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
689 if (cl) 695 if (cl)
690 BUG_ON(!closure_wait(&w->wait, cl)); 696 BUG_ON(!closure_wait(&w->wait, cl));
691 697
698 closure_flush(&c->journal.io);
692 __journal_try_write(c, true); 699 __journal_try_write(c, true);
693 } 700 }
694} 701}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 786a1a4f74d8..71eb233b9ace 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s)
997 } else { 997 } else {
998 bch_writeback_add(dc); 998 bch_writeback_add(dc);
999 999
1000 if (s->op.flush_journal) { 1000 if (bio->bi_rw & REQ_FLUSH) {
1001 /* Also need to send a flush to the backing device */ 1001 /* Also need to send a flush to the backing device */
1002 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, 1002 struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
1003 dc->disk.bio_split); 1003 dc->disk.bio_split);
1004 1004
1005 bio->bi_size = 0; 1005 flush->bi_rw = WRITE_FLUSH;
1006 bio->bi_vcnt = 0; 1006 flush->bi_bdev = bio->bi_bdev;
1007 closure_bio_submit(bio, cl, s->d); 1007 flush->bi_end_io = request_endio;
1008 flush->bi_private = cl;
1009
1010 closure_bio_submit(flush, cl, s->d);
1008 } else { 1011 } else {
1009 s->op.cache_bio = bio; 1012 s->op.cache_bio = bio;
1010 } 1013 }
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4fe6ab2fbe2e..924dcfdae111 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -223,8 +223,13 @@ STORE(__cached_dev)
223 } 223 }
224 224
225 if (attr == &sysfs_label) { 225 if (attr == &sysfs_label) {
226 /* note: endlines are preserved */ 226 if (size > SB_LABEL_SIZE)
227 memcpy(dc->sb.label, buf, SB_LABEL_SIZE); 227 return -EINVAL;
228 memcpy(dc->sb.label, buf, size);
229 if (size < SB_LABEL_SIZE)
230 dc->sb.label[size] = '\0';
231 if (size && dc->sb.label[size - 1] == '\n')
232 dc->sb.label[size - 1] = '\0';
228 bch_write_bdev_super(dc, NULL); 233 bch_write_bdev_super(dc, NULL);
229 if (dc->disk.c) { 234 if (dc->disk.c) {
230 memcpy(dc->disk.c->uuids[dc->disk.id].label, 235 memcpy(dc->disk.c->uuids[dc->disk.id].label,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 98eb81159a22..420dad545c7d 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
190 stats->last = now ?: 1; 190 stats->last = now ?: 1;
191} 191}
192 192
193unsigned bch_next_delay(struct ratelimit *d, uint64_t done) 193/**
194 * bch_next_delay() - increment @d by the amount of work done, and return how
195 * long to delay until the next time to do some work.
196 *
197 * @d - the struct bch_ratelimit to update
198 * @done - the amount of work done, in arbitrary units
199 *
200 * Returns the amount of time to delay by, in jiffies
201 */
202uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
194{ 203{
195 uint64_t now = local_clock(); 204 uint64_t now = local_clock();
196 205
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1ae2a73ad85f..ea345c6896f4 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
450 (ewma) >> factor; \ 450 (ewma) >> factor; \
451}) 451})
452 452
453struct ratelimit { 453struct bch_ratelimit {
454 /* Next time we want to do some work, in nanoseconds */
454 uint64_t next; 455 uint64_t next;
456
457 /*
458 * Rate at which we want to do work, in units per nanosecond
459 * The units here correspond to the units passed to bch_next_delay()
460 */
455 unsigned rate; 461 unsigned rate;
456}; 462};
457 463
458static inline void ratelimit_reset(struct ratelimit *d) 464static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
459{ 465{
460 d->next = local_clock(); 466 d->next = local_clock();
461} 467}
462 468
463unsigned bch_next_delay(struct ratelimit *d, uint64_t done); 469uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
464 470
465#define __DIV_SAFE(n, d, zero) \ 471#define __DIV_SAFE(n, d, zero) \
466({ \ 472({ \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff551628..ba3ee48320f2 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
94 94
95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) 95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
96{ 96{
97 uint64_t ret;
98
97 if (atomic_read(&dc->disk.detaching) || 99 if (atomic_read(&dc->disk.detaching) ||
98 !dc->writeback_percent) 100 !dc->writeback_percent)
99 return 0; 101 return 0;
100 102
101 return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); 103 ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
104
105 return min_t(uint64_t, ret, HZ);
102} 106}
103 107
104/* Background writeback */ 108/* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
208 212
209 up_write(&dc->writeback_lock); 213 up_write(&dc->writeback_lock);
210 214
211 ratelimit_reset(&dc->writeback_rate); 215 bch_ratelimit_reset(&dc->writeback_rate);
212 216
213 /* Punt to workqueue only so we don't recurse and blow the stack */ 217 /* Punt to workqueue only so we don't recurse and blow the stack */
214 continue_at(cl, read_dirty, dirty_wq); 218 continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
318 } 322 }
319 323
320 bch_keybuf_del(&dc->writeback_keys, w); 324 bch_keybuf_del(&dc->writeback_keys, w);
321 atomic_dec_bug(&dc->in_flight); 325 up(&dc->in_flight);
322
323 closure_wake_up(&dc->writeback_wait);
324 326
325 closure_return_with_destructor(cl, dirty_io_destructor); 327 closure_return_with_destructor(cl, dirty_io_destructor);
326} 328}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
349 351
350 closure_bio_submit(&io->bio, cl, &io->dc->disk); 352 closure_bio_submit(&io->bio, cl, &io->dc->disk);
351 353
352 continue_at(cl, write_dirty_finish, dirty_wq); 354 continue_at(cl, write_dirty_finish, system_wq);
353} 355}
354 356
355static void read_dirty_endio(struct bio *bio, int error) 357static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
369 371
370 closure_bio_submit(&io->bio, cl, &io->dc->disk); 372 closure_bio_submit(&io->bio, cl, &io->dc->disk);
371 373
372 continue_at(cl, write_dirty, dirty_wq); 374 continue_at(cl, write_dirty, system_wq);
373} 375}
374 376
375static void read_dirty(struct closure *cl) 377static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
394 396
395 if (delay > 0 && 397 if (delay > 0 &&
396 (KEY_START(&w->key) != dc->last_read || 398 (KEY_START(&w->key) != dc->last_read ||
397 jiffies_to_msecs(delay) > 50)) { 399 jiffies_to_msecs(delay) > 50))
398 w->private = NULL; 400 delay = schedule_timeout_uninterruptible(delay);
399
400 closure_delay(&dc->writeback, delay);
401 continue_at(cl, read_dirty, dirty_wq);
402 }
403 401
404 dc->last_read = KEY_OFFSET(&w->key); 402 dc->last_read = KEY_OFFSET(&w->key);
405 403
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
424 422
425 trace_bcache_writeback(&w->key); 423 trace_bcache_writeback(&w->key);
426 424
427 closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); 425 down(&dc->in_flight);
426 closure_call(&io->cl, read_dirty_submit, NULL, cl);
428 427
429 delay = writeback_delay(dc, KEY_SIZE(&w->key)); 428 delay = writeback_delay(dc, KEY_SIZE(&w->key));
430
431 atomic_inc(&dc->in_flight);
432
433 if (!closure_wait_event(&dc->writeback_wait, cl,
434 atomic_read(&dc->in_flight) < 64))
435 continue_at(cl, read_dirty, dirty_wq);
436 } 429 }
437 430
438 if (0) { 431 if (0) {
@@ -442,7 +435,11 @@ err:
442 bch_keybuf_del(&dc->writeback_keys, w); 435 bch_keybuf_del(&dc->writeback_keys, w);
443 } 436 }
444 437
445 refill_dirty(cl); 438 /*
439 * Wait for outstanding writeback IOs to finish (and keybuf slots to be
440 * freed) before refilling again
441 */
442 continue_at(cl, refill_dirty, dirty_wq);
446} 443}
447 444
448/* Init */ 445/* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
484 481
485void bch_cached_dev_writeback_init(struct cached_dev *dc) 482void bch_cached_dev_writeback_init(struct cached_dev *dc)
486{ 483{
484 sema_init(&dc->in_flight, 64);
487 closure_init_unlocked(&dc->writeback); 485 closure_init_unlocked(&dc->writeback);
488 init_rwsem(&dc->writeback_lock); 486 init_rwsem(&dc->writeback_lock);
489 487
@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
513 511
514int __init bch_writeback_init(void) 512int __init bch_writeback_init(void)
515{ 513{
516 dirty_wq = create_singlethread_workqueue("bcache_writeback"); 514 dirty_wq = create_workqueue("bcache_writeback");
517 if (!dirty_wq) 515 if (!dirty_wq)
518 return -ENOMEM; 516 return -ENOMEM;
519 517
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ea49834377c8..2a20986a2fec 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -19,8 +19,6 @@
19#define DM_MSG_PREFIX "io" 19#define DM_MSG_PREFIX "io"
20 20
21#define DM_IO_MAX_REGIONS BITS_PER_LONG 21#define DM_IO_MAX_REGIONS BITS_PER_LONG
22#define MIN_IOS 16
23#define MIN_BIOS 16
24 22
25struct dm_io_client { 23struct dm_io_client {
26 mempool_t *pool; 24 mempool_t *pool;
@@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
50struct dm_io_client *dm_io_client_create(void) 48struct dm_io_client *dm_io_client_create(void)
51{ 49{
52 struct dm_io_client *client; 50 struct dm_io_client *client;
51 unsigned min_ios = dm_get_reserved_bio_based_ios();
53 52
54 client = kmalloc(sizeof(*client), GFP_KERNEL); 53 client = kmalloc(sizeof(*client), GFP_KERNEL);
55 if (!client) 54 if (!client)
56 return ERR_PTR(-ENOMEM); 55 return ERR_PTR(-ENOMEM);
57 56
58 client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); 57 client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
59 if (!client->pool) 58 if (!client->pool)
60 goto bad; 59 goto bad;
61 60
62 client->bios = bioset_create(MIN_BIOS, 0); 61 client->bios = bioset_create(min_ios, 0);
63 if (!client->bios) 62 if (!client->bios)
64 goto bad; 63 goto bad;
65 64
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index b759a127f9c3..de570a558764 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/device-mapper.h> 8#include <linux/device-mapper.h>
9 9
10#include "dm.h"
10#include "dm-path-selector.h" 11#include "dm-path-selector.h"
11#include "dm-uevent.h" 12#include "dm-uevent.h"
12 13
@@ -116,8 +117,6 @@ struct dm_mpath_io {
116 117
117typedef int (*action_fn) (struct pgpath *pgpath); 118typedef int (*action_fn) (struct pgpath *pgpath);
118 119
119#define MIN_IOS 256 /* Mempool size */
120
121static struct kmem_cache *_mpio_cache; 120static struct kmem_cache *_mpio_cache;
122 121
123static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 122static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
190static struct multipath *alloc_multipath(struct dm_target *ti) 189static struct multipath *alloc_multipath(struct dm_target *ti)
191{ 190{
192 struct multipath *m; 191 struct multipath *m;
192 unsigned min_ios = dm_get_reserved_rq_based_ios();
193 193
194 m = kzalloc(sizeof(*m), GFP_KERNEL); 194 m = kzalloc(sizeof(*m), GFP_KERNEL);
195 if (m) { 195 if (m) {
@@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
202 INIT_WORK(&m->trigger_event, trigger_event); 202 INIT_WORK(&m->trigger_event, trigger_event);
203 init_waitqueue_head(&m->pg_init_wait); 203 init_waitqueue_head(&m->pg_init_wait);
204 mutex_init(&m->work_mutex); 204 mutex_init(&m->work_mutex);
205 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 205 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
206 if (!m->mpio_pool) { 206 if (!m->mpio_pool) {
207 kfree(m); 207 kfree(m);
208 return NULL; 208 return NULL;
@@ -1268,6 +1268,7 @@ static int noretry_error(int error)
1268 case -EREMOTEIO: 1268 case -EREMOTEIO:
1269 case -EILSEQ: 1269 case -EILSEQ:
1270 case -ENODATA: 1270 case -ENODATA:
1271 case -ENOSPC:
1271 return 1; 1272 return 1;
1272 } 1273 }
1273 1274
@@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
1298 if (!error && !clone->errors) 1299 if (!error && !clone->errors)
1299 return 0; /* I/O complete */ 1300 return 0; /* I/O complete */
1300 1301
1301 if (noretry_error(error)) 1302 if (noretry_error(error)) {
1303 if ((clone->cmd_flags & REQ_WRITE_SAME) &&
1304 !clone->q->limits.max_write_same_sectors) {
1305 struct queue_limits *limits;
1306
1307 /* device doesn't really support WRITE SAME, disable it */
1308 limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
1309 limits->max_write_same_sectors = 0;
1310 }
1302 return error; 1311 return error;
1312 }
1303 1313
1304 if (mpio->pgpath) 1314 if (mpio->pgpath)
1305 fail_path(mpio->pgpath); 1315 fail_path(mpio->pgpath);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 3ac415675b6c..4caa8e6d59d7 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
256 */ 256 */
257 INIT_WORK_ONSTACK(&req.work, do_metadata); 257 INIT_WORK_ONSTACK(&req.work, do_metadata);
258 queue_work(ps->metadata_wq, &req.work); 258 queue_work(ps->metadata_wq, &req.work);
259 flush_work(&req.work); 259 flush_workqueue(ps->metadata_wq);
260 260
261 return req.result; 261 return req.result;
262} 262}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c434e5aab2df..aec57d76db5d 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -725,17 +725,16 @@ static int calc_max_buckets(void)
725 */ 725 */
726static int init_hash_tables(struct dm_snapshot *s) 726static int init_hash_tables(struct dm_snapshot *s)
727{ 727{
728 sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 728 sector_t hash_size, cow_dev_size, max_buckets;
729 729
730 /* 730 /*
731 * Calculate based on the size of the original volume or 731 * Calculate based on the size of the original volume or
732 * the COW volume... 732 * the COW volume...
733 */ 733 */
734 cow_dev_size = get_dev_size(s->cow->bdev); 734 cow_dev_size = get_dev_size(s->cow->bdev);
735 origin_dev_size = get_dev_size(s->origin->bdev);
736 max_buckets = calc_max_buckets(); 735 max_buckets = calc_max_buckets();
737 736
738 hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 737 hash_size = cow_dev_size >> s->store->chunk_shift;
739 hash_size = min(hash_size, max_buckets); 738 hash_size = min(hash_size, max_buckets);
740 739
741 if (hash_size < 64) 740 if (hash_size < 64)
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 8ae31e8d3d64..3d404c1371ed 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
451 struct dm_stat_percpu *p; 451 struct dm_stat_percpu *p;
452 452
453 /* 453 /*
454 * For strict correctness we should use local_irq_disable/enable 454 * For strict correctness we should use local_irq_save/restore
455 * instead of preempt_disable/enable. 455 * instead of preempt_disable/enable.
456 * 456 *
457 * This is racy if the driver finishes bios from non-interrupt 457 * preempt_disable/enable is racy if the driver finishes bios
458 * context as well as from interrupt context or from more different 458 * from non-interrupt context as well as from interrupt context
459 * interrupts. 459 * or from more different interrupts.
460 * 460 *
461 * However, the race only results in not counting some events, 461 * On 64-bit architectures the race only results in not counting some
462 * so it is acceptable. 462 * events, so it is acceptable. On 32-bit architectures the race could
463 * cause the counter going off by 2^32, so we need to do proper locking
464 * there.
463 * 465 *
464 * part_stat_lock()/part_stat_unlock() have this race too. 466 * part_stat_lock()/part_stat_unlock() have this race too.
465 */ 467 */
468#if BITS_PER_LONG == 32
469 unsigned long flags;
470 local_irq_save(flags);
471#else
466 preempt_disable(); 472 preempt_disable();
473#endif
467 p = &s->stat_percpu[smp_processor_id()][entry]; 474 p = &s->stat_percpu[smp_processor_id()][entry];
468 475
469 if (!end) { 476 if (!end) {
@@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
478 p->ticks[idx] += duration; 485 p->ticks[idx] += duration;
479 } 486 }
480 487
488#if BITS_PER_LONG == 32
489 local_irq_restore(flags);
490#else
481 preempt_enable(); 491 preempt_enable();
492#endif
482} 493}
483 494
484static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw, 495static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ed063427d676..2c0cf511ec23 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2095 * them down to the data device. The thin device's discard 2095 * them down to the data device. The thin device's discard
2096 * processing will cause mappings to be removed from the btree. 2096 * processing will cause mappings to be removed from the btree.
2097 */ 2097 */
2098 ti->discard_zeroes_data_unsupported = true;
2098 if (pf.discard_enabled && pf.discard_passdown) { 2099 if (pf.discard_enabled && pf.discard_passdown) {
2099 ti->num_discard_bios = 1; 2100 ti->num_discard_bios = 1;
2100 2101
@@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2104 * thin devices' discard limits consistent). 2105 * thin devices' discard limits consistent).
2105 */ 2106 */
2106 ti->discards_supported = true; 2107 ti->discards_supported = true;
2107 ti->discard_zeroes_data_unsupported = true;
2108 } 2108 }
2109 ti->private = pt; 2109 ti->private = pt;
2110 2110
@@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
2689 * They get transferred to the live pool in bind_control_target() 2689 * They get transferred to the live pool in bind_control_target()
2690 * called from pool_preresume(). 2690 * called from pool_preresume().
2691 */ 2691 */
2692 if (!pt->adjusted_pf.discard_enabled) 2692 if (!pt->adjusted_pf.discard_enabled) {
2693 /*
2694 * Must explicitly disallow stacking discard limits otherwise the
2695 * block layer will stack them if pool's data device has support.
2696 * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
2697 * user to see that, so make sure to set all discard limits to 0.
2698 */
2699 limits->discard_granularity = 0;
2693 return; 2700 return;
2701 }
2694 2702
2695 disable_passdown_if_not_supported(pt); 2703 disable_passdown_if_not_supported(pt);
2696 2704
@@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
2826 ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); 2834 ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
2827 2835
2828 /* In case the pool supports discards, pass them on. */ 2836 /* In case the pool supports discards, pass them on. */
2837 ti->discard_zeroes_data_unsupported = true;
2829 if (tc->pool->pf.discard_enabled) { 2838 if (tc->pool->pf.discard_enabled) {
2830 ti->discards_supported = true; 2839 ti->discards_supported = true;
2831 ti->num_discard_bios = 1; 2840 ti->num_discard_bios = 1;
2832 ti->discard_zeroes_data_unsupported = true;
2833 /* Discard bios must be split on a block boundary */ 2841 /* Discard bios must be split on a block boundary */
2834 ti->split_discard_bios = true; 2842 ti->split_discard_bios = true;
2835 } 2843 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6a5e9ed2fcc3..b3e26c7d1417 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -211,10 +211,55 @@ struct dm_md_mempools {
211 struct bio_set *bs; 211 struct bio_set *bs;
212}; 212};
213 213
214#define MIN_IOS 256 214#define RESERVED_BIO_BASED_IOS 16
215#define RESERVED_REQUEST_BASED_IOS 256
216#define RESERVED_MAX_IOS 1024
215static struct kmem_cache *_io_cache; 217static struct kmem_cache *_io_cache;
216static struct kmem_cache *_rq_tio_cache; 218static struct kmem_cache *_rq_tio_cache;
217 219
220/*
221 * Bio-based DM's mempools' reserved IOs set by the user.
222 */
223static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
224
225/*
226 * Request-based DM's mempools' reserved IOs set by the user.
227 */
228static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
229
230static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
231 unsigned def, unsigned max)
232{
233 unsigned ios = ACCESS_ONCE(*reserved_ios);
234 unsigned modified_ios = 0;
235
236 if (!ios)
237 modified_ios = def;
238 else if (ios > max)
239 modified_ios = max;
240
241 if (modified_ios) {
242 (void)cmpxchg(reserved_ios, ios, modified_ios);
243 ios = modified_ios;
244 }
245
246 return ios;
247}
248
249unsigned dm_get_reserved_bio_based_ios(void)
250{
251 return __dm_get_reserved_ios(&reserved_bio_based_ios,
252 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
253}
254EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
255
256unsigned dm_get_reserved_rq_based_ios(void)
257{
258 return __dm_get_reserved_ios(&reserved_rq_based_ios,
259 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
260}
261EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
262
218static int __init local_init(void) 263static int __init local_init(void)
219{ 264{
220 int r = -ENOMEM; 265 int r = -ENOMEM;
@@ -2278,6 +2323,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2278} 2323}
2279 2324
2280/* 2325/*
2326 * The queue_limits are only valid as long as you have a reference
2327 * count on 'md'.
2328 */
2329struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2330{
2331 BUG_ON(!atomic_read(&md->holders));
2332 return &md->queue->limits;
2333}
2334EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2335
2336/*
2281 * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 2337 * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
2282 */ 2338 */
2283static int dm_init_request_based_queue(struct mapped_device *md) 2339static int dm_init_request_based_queue(struct mapped_device *md)
@@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
2862 2918
2863 if (type == DM_TYPE_BIO_BASED) { 2919 if (type == DM_TYPE_BIO_BASED) {
2864 cachep = _io_cache; 2920 cachep = _io_cache;
2865 pool_size = 16; 2921 pool_size = dm_get_reserved_bio_based_ios();
2866 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 2922 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
2867 } else if (type == DM_TYPE_REQUEST_BASED) { 2923 } else if (type == DM_TYPE_REQUEST_BASED) {
2868 cachep = _rq_tio_cache; 2924 cachep = _rq_tio_cache;
2869 pool_size = MIN_IOS; 2925 pool_size = dm_get_reserved_rq_based_ios();
2870 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 2926 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
2871 /* per_bio_data_size is not used. See __bind_mempools(). */ 2927 /* per_bio_data_size is not used. See __bind_mempools(). */
2872 WARN_ON(per_bio_data_size != 0); 2928 WARN_ON(per_bio_data_size != 0);
2873 } else 2929 } else
2874 goto out; 2930 goto out;
2875 2931
2876 pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); 2932 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
2877 if (!pools->io_pool) 2933 if (!pools->io_pool)
2878 goto out; 2934 goto out;
2879 2935
@@ -2924,6 +2980,13 @@ module_exit(dm_exit);
2924 2980
2925module_param(major, uint, 0); 2981module_param(major, uint, 0);
2926MODULE_PARM_DESC(major, "The major number of the device mapper"); 2982MODULE_PARM_DESC(major, "The major number of the device mapper");
2983
2984module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
2985MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
2986
2987module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
2988MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
2989
2927MODULE_DESCRIPTION(DM_NAME " driver"); 2990MODULE_DESCRIPTION(DM_NAME " driver");
2928MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 2991MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
2929MODULE_LICENSE("GPL"); 2992MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 5e604cc7b4aa..1d1ad7b7e527 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
184/* 184/*
185 * Helpers that are used by DM core 185 * Helpers that are used by DM core
186 */ 186 */
187unsigned dm_get_reserved_bio_based_ios(void);
188unsigned dm_get_reserved_rq_based_ios(void);
189
187static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) 190static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
188{ 191{
189 return !maxlen || strlen(result) + 1 >= maxlen; 192 return !maxlen || strlen(result) + 1 >= maxlen;