aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 16:33:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 16:33:29 -0400
commit0e04c641b199435f3779454055f6a7de258ecdfc (patch)
tree9b79da0c52240bf4b96270ac12356ad75da6f739 /drivers/md
parent7550cfab3d4053b54f16e2fe337affde71d1eb51 (diff)
parent09869de57ed2728ae3c619803932a86cb0e2c4f8 (diff)
Merge tag 'dm-3.16-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: "This pull request is later than I'd have liked because I was waiting for some performance data to help finally justify sending the long-standing dm-crypt cpu scalability improvements upstream. Unfortunately we came up short, so those dm-crypt changes will continue to wait, but it seems we're not far off. . Add dm_accept_partial_bio interface to DM core to allow DM targets to only process a portion of a bio, the remainder being sent in the next bio. This enables the old dm snapshot-origin target to only split write bios on chunk boundaries, read bios are now sent to the origin device unchanged. . Add DM core support for disabling WRITE SAME if the underlying SCSI layer disables it due to command failure. . Reduce lock contention in DM's bio-prison. . A few small cleanups and fixes to dm-thin and dm-era" * tag 'dm-3.16-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm thin: update discard_granularity to reflect the thin-pool blocksize dm bio prison: implement per bucket locking in the dm_bio_prison hash table dm: remove symbol export for dm_set_device_limits dm: disable WRITE SAME if it fails dm era: check for a non-NULL metadata object before closing it dm thin: return ENOSPC instead of EIO when error_if_no_space enabled dm thin: cleanup noflush_work to use a proper completion dm snapshot: do not split read bios sent to snapshot-origin target dm snapshot: allocate a per-target structure for snapshot-origin target dm: introduce dm_accept_partial_bio dm: change sector_count member in clone_info from sector_t to unsigned
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-bio-prison.c70
-rw-r--r--drivers/md/dm-bio-prison.h2
-rw-r--r--drivers/md/dm-era-target.c3
-rw-r--r--drivers/md/dm-mpath.c11
-rw-r--r--drivers/md/dm-snap.c67
-rw-r--r--drivers/md/dm-table.c5
-rw-r--r--drivers/md/dm-thin.c93
-rw-r--r--drivers/md/dm.c86
8 files changed, 227 insertions, 110 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index 85f0b7074257..f752d12081ff 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -14,13 +14,17 @@
14 14
15/*----------------------------------------------------------------*/ 15/*----------------------------------------------------------------*/
16 16
17struct dm_bio_prison { 17struct bucket {
18 spinlock_t lock; 18 spinlock_t lock;
19 struct hlist_head cells;
20};
21
22struct dm_bio_prison {
19 mempool_t *cell_pool; 23 mempool_t *cell_pool;
20 24
21 unsigned nr_buckets; 25 unsigned nr_buckets;
22 unsigned hash_mask; 26 unsigned hash_mask;
23 struct hlist_head *cells; 27 struct bucket *buckets;
24}; 28};
25 29
26/*----------------------------------------------------------------*/ 30/*----------------------------------------------------------------*/
@@ -40,6 +44,12 @@ static uint32_t calc_nr_buckets(unsigned nr_cells)
40 44
41static struct kmem_cache *_cell_cache; 45static struct kmem_cache *_cell_cache;
42 46
47static void init_bucket(struct bucket *b)
48{
49 spin_lock_init(&b->lock);
50 INIT_HLIST_HEAD(&b->cells);
51}
52
43/* 53/*
44 * @nr_cells should be the number of cells you want in use _concurrently_. 54 * @nr_cells should be the number of cells you want in use _concurrently_.
45 * Don't confuse it with the number of distinct keys. 55 * Don't confuse it with the number of distinct keys.
@@ -49,13 +59,12 @@ struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
49 unsigned i; 59 unsigned i;
50 uint32_t nr_buckets = calc_nr_buckets(nr_cells); 60 uint32_t nr_buckets = calc_nr_buckets(nr_cells);
51 size_t len = sizeof(struct dm_bio_prison) + 61 size_t len = sizeof(struct dm_bio_prison) +
52 (sizeof(struct hlist_head) * nr_buckets); 62 (sizeof(struct bucket) * nr_buckets);
53 struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); 63 struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);
54 64
55 if (!prison) 65 if (!prison)
56 return NULL; 66 return NULL;
57 67
58 spin_lock_init(&prison->lock);
59 prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); 68 prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
60 if (!prison->cell_pool) { 69 if (!prison->cell_pool) {
61 kfree(prison); 70 kfree(prison);
@@ -64,9 +73,9 @@ struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
64 73
65 prison->nr_buckets = nr_buckets; 74 prison->nr_buckets = nr_buckets;
66 prison->hash_mask = nr_buckets - 1; 75 prison->hash_mask = nr_buckets - 1;
67 prison->cells = (struct hlist_head *) (prison + 1); 76 prison->buckets = (struct bucket *) (prison + 1);
68 for (i = 0; i < nr_buckets; i++) 77 for (i = 0; i < nr_buckets; i++)
69 INIT_HLIST_HEAD(prison->cells + i); 78 init_bucket(prison->buckets + i);
70 79
71 return prison; 80 return prison;
72} 81}
@@ -107,40 +116,44 @@ static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs)
107 (lhs->block == rhs->block); 116 (lhs->block == rhs->block);
108} 117}
109 118
110static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, 119static struct bucket *get_bucket(struct dm_bio_prison *prison,
120 struct dm_cell_key *key)
121{
122 return prison->buckets + hash_key(prison, key);
123}
124
125static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
111 struct dm_cell_key *key) 126 struct dm_cell_key *key)
112{ 127{
113 struct dm_bio_prison_cell *cell; 128 struct dm_bio_prison_cell *cell;
114 129
115 hlist_for_each_entry(cell, bucket, list) 130 hlist_for_each_entry(cell, &b->cells, list)
116 if (keys_equal(&cell->key, key)) 131 if (keys_equal(&cell->key, key))
117 return cell; 132 return cell;
118 133
119 return NULL; 134 return NULL;
120} 135}
121 136
122static void __setup_new_cell(struct dm_bio_prison *prison, 137static void __setup_new_cell(struct bucket *b,
123 struct dm_cell_key *key, 138 struct dm_cell_key *key,
124 struct bio *holder, 139 struct bio *holder,
125 uint32_t hash,
126 struct dm_bio_prison_cell *cell) 140 struct dm_bio_prison_cell *cell)
127{ 141{
128 memcpy(&cell->key, key, sizeof(cell->key)); 142 memcpy(&cell->key, key, sizeof(cell->key));
129 cell->holder = holder; 143 cell->holder = holder;
130 bio_list_init(&cell->bios); 144 bio_list_init(&cell->bios);
131 hlist_add_head(&cell->list, prison->cells + hash); 145 hlist_add_head(&cell->list, &b->cells);
132} 146}
133 147
134static int __bio_detain(struct dm_bio_prison *prison, 148static int __bio_detain(struct bucket *b,
135 struct dm_cell_key *key, 149 struct dm_cell_key *key,
136 struct bio *inmate, 150 struct bio *inmate,
137 struct dm_bio_prison_cell *cell_prealloc, 151 struct dm_bio_prison_cell *cell_prealloc,
138 struct dm_bio_prison_cell **cell_result) 152 struct dm_bio_prison_cell **cell_result)
139{ 153{
140 uint32_t hash = hash_key(prison, key);
141 struct dm_bio_prison_cell *cell; 154 struct dm_bio_prison_cell *cell;
142 155
143 cell = __search_bucket(prison->cells + hash, key); 156 cell = __search_bucket(b, key);
144 if (cell) { 157 if (cell) {
145 if (inmate) 158 if (inmate)
146 bio_list_add(&cell->bios, inmate); 159 bio_list_add(&cell->bios, inmate);
@@ -148,7 +161,7 @@ static int __bio_detain(struct dm_bio_prison *prison,
148 return 1; 161 return 1;
149 } 162 }
150 163
151 __setup_new_cell(prison, key, inmate, hash, cell_prealloc); 164 __setup_new_cell(b, key, inmate, cell_prealloc);
152 *cell_result = cell_prealloc; 165 *cell_result = cell_prealloc;
153 return 0; 166 return 0;
154} 167}
@@ -161,10 +174,11 @@ static int bio_detain(struct dm_bio_prison *prison,
161{ 174{
162 int r; 175 int r;
163 unsigned long flags; 176 unsigned long flags;
177 struct bucket *b = get_bucket(prison, key);
164 178
165 spin_lock_irqsave(&prison->lock, flags); 179 spin_lock_irqsave(&b->lock, flags);
166 r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); 180 r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
167 spin_unlock_irqrestore(&prison->lock, flags); 181 spin_unlock_irqrestore(&b->lock, flags);
168 182
169 return r; 183 return r;
170} 184}
@@ -208,10 +222,11 @@ void dm_cell_release(struct dm_bio_prison *prison,
208 struct bio_list *bios) 222 struct bio_list *bios)
209{ 223{
210 unsigned long flags; 224 unsigned long flags;
225 struct bucket *b = get_bucket(prison, &cell->key);
211 226
212 spin_lock_irqsave(&prison->lock, flags); 227 spin_lock_irqsave(&b->lock, flags);
213 __cell_release(cell, bios); 228 __cell_release(cell, bios);
214 spin_unlock_irqrestore(&prison->lock, flags); 229 spin_unlock_irqrestore(&b->lock, flags);
215} 230}
216EXPORT_SYMBOL_GPL(dm_cell_release); 231EXPORT_SYMBOL_GPL(dm_cell_release);
217 232
@@ -230,28 +245,25 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
230 struct bio_list *inmates) 245 struct bio_list *inmates)
231{ 246{
232 unsigned long flags; 247 unsigned long flags;
248 struct bucket *b = get_bucket(prison, &cell->key);
233 249
234 spin_lock_irqsave(&prison->lock, flags); 250 spin_lock_irqsave(&b->lock, flags);
235 __cell_release_no_holder(cell, inmates); 251 __cell_release_no_holder(cell, inmates);
236 spin_unlock_irqrestore(&prison->lock, flags); 252 spin_unlock_irqrestore(&b->lock, flags);
237} 253}
238EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); 254EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
239 255
240void dm_cell_error(struct dm_bio_prison *prison, 256void dm_cell_error(struct dm_bio_prison *prison,
241 struct dm_bio_prison_cell *cell) 257 struct dm_bio_prison_cell *cell, int error)
242{ 258{
243 struct bio_list bios; 259 struct bio_list bios;
244 struct bio *bio; 260 struct bio *bio;
245 unsigned long flags;
246 261
247 bio_list_init(&bios); 262 bio_list_init(&bios);
248 263 dm_cell_release(prison, cell, &bios);
249 spin_lock_irqsave(&prison->lock, flags);
250 __cell_release(cell, &bios);
251 spin_unlock_irqrestore(&prison->lock, flags);
252 264
253 while ((bio = bio_list_pop(&bios))) 265 while ((bio = bio_list_pop(&bios)))
254 bio_io_error(bio); 266 bio_endio(bio, error);
255} 267}
256EXPORT_SYMBOL_GPL(dm_cell_error); 268EXPORT_SYMBOL_GPL(dm_cell_error);
257 269
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 3f833190eadf..6805a142b750 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -85,7 +85,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
85 struct dm_bio_prison_cell *cell, 85 struct dm_bio_prison_cell *cell,
86 struct bio_list *inmates); 86 struct bio_list *inmates);
87void dm_cell_error(struct dm_bio_prison *prison, 87void dm_cell_error(struct dm_bio_prison *prison,
88 struct dm_bio_prison_cell *cell); 88 struct dm_bio_prison_cell *cell, int error);
89 89
90/*----------------------------------------------------------------*/ 90/*----------------------------------------------------------------*/
91 91
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 414dad4cb49b..ad913cd4aded 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1391,7 +1391,8 @@ static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1391 1391
1392static void era_destroy(struct era *era) 1392static void era_destroy(struct era *era)
1393{ 1393{
1394 metadata_close(era->md); 1394 if (era->md)
1395 metadata_close(era->md);
1395 1396
1396 if (era->wq) 1397 if (era->wq)
1397 destroy_workqueue(era->wq); 1398 destroy_workqueue(era->wq);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ebfa411d1a7d..3f6fd9d33ba3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1242,17 +1242,8 @@ static int do_end_io(struct multipath *m, struct request *clone,
1242 if (!error && !clone->errors) 1242 if (!error && !clone->errors)
1243 return 0; /* I/O complete */ 1243 return 0; /* I/O complete */
1244 1244
1245 if (noretry_error(error)) { 1245 if (noretry_error(error))
1246 if ((clone->cmd_flags & REQ_WRITE_SAME) &&
1247 !clone->q->limits.max_write_same_sectors) {
1248 struct queue_limits *limits;
1249
1250 /* device doesn't really support WRITE SAME, disable it */
1251 limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
1252 limits->max_write_same_sectors = 0;
1253 }
1254 return error; 1246 return error;
1255 }
1256 1247
1257 if (mpio->pgpath) 1248 if (mpio->pgpath)
1258 fail_path(mpio->pgpath); 1249 fail_path(mpio->pgpath);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 8e0caed0bf74..5bd2290cfb1e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2141,6 +2141,11 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
2141 * Origin: maps a linear range of a device, with hooks for snapshotting. 2141 * Origin: maps a linear range of a device, with hooks for snapshotting.
2142 */ 2142 */
2143 2143
2144struct dm_origin {
2145 struct dm_dev *dev;
2146 unsigned split_boundary;
2147};
2148
2144/* 2149/*
2145 * Construct an origin mapping: <dev_path> 2150 * Construct an origin mapping: <dev_path>
2146 * The context for an origin is merely a 'struct dm_dev *' 2151 * The context for an origin is merely a 'struct dm_dev *'
@@ -2149,41 +2154,65 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
2149static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 2154static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2150{ 2155{
2151 int r; 2156 int r;
2152 struct dm_dev *dev; 2157 struct dm_origin *o;
2153 2158
2154 if (argc != 1) { 2159 if (argc != 1) {
2155 ti->error = "origin: incorrect number of arguments"; 2160 ti->error = "origin: incorrect number of arguments";
2156 return -EINVAL; 2161 return -EINVAL;
2157 } 2162 }
2158 2163
2159 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); 2164 o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
2165 if (!o) {
2166 ti->error = "Cannot allocate private origin structure";
2167 r = -ENOMEM;
2168 goto bad_alloc;
2169 }
2170
2171 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
2160 if (r) { 2172 if (r) {
2161 ti->error = "Cannot get target device"; 2173 ti->error = "Cannot get target device";
2162 return r; 2174 goto bad_open;
2163 } 2175 }
2164 2176
2165 ti->private = dev; 2177 ti->private = o;
2166 ti->num_flush_bios = 1; 2178 ti->num_flush_bios = 1;
2167 2179
2168 return 0; 2180 return 0;
2181
2182bad_open:
2183 kfree(o);
2184bad_alloc:
2185 return r;
2169} 2186}
2170 2187
2171static void origin_dtr(struct dm_target *ti) 2188static void origin_dtr(struct dm_target *ti)
2172{ 2189{
2173 struct dm_dev *dev = ti->private; 2190 struct dm_origin *o = ti->private;
2174 dm_put_device(ti, dev); 2191 dm_put_device(ti, o->dev);
2192 kfree(o);
2175} 2193}
2176 2194
2177static int origin_map(struct dm_target *ti, struct bio *bio) 2195static int origin_map(struct dm_target *ti, struct bio *bio)
2178{ 2196{
2179 struct dm_dev *dev = ti->private; 2197 struct dm_origin *o = ti->private;
2180 bio->bi_bdev = dev->bdev; 2198 unsigned available_sectors;
2181 2199
2182 if (bio->bi_rw & REQ_FLUSH) 2200 bio->bi_bdev = o->dev->bdev;
2201
2202 if (unlikely(bio->bi_rw & REQ_FLUSH))
2183 return DM_MAPIO_REMAPPED; 2203 return DM_MAPIO_REMAPPED;
2184 2204
2205 if (bio_rw(bio) != WRITE)
2206 return DM_MAPIO_REMAPPED;
2207
2208 available_sectors = o->split_boundary -
2209 ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1));
2210
2211 if (bio_sectors(bio) > available_sectors)
2212 dm_accept_partial_bio(bio, available_sectors);
2213
2185 /* Only tell snapshots if this is a write */ 2214 /* Only tell snapshots if this is a write */
2186 return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 2215 return do_origin(o->dev, bio);
2187} 2216}
2188 2217
2189/* 2218/*
@@ -2192,15 +2221,15 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
2192 */ 2221 */
2193static void origin_resume(struct dm_target *ti) 2222static void origin_resume(struct dm_target *ti)
2194{ 2223{
2195 struct dm_dev *dev = ti->private; 2224 struct dm_origin *o = ti->private;
2196 2225
2197 ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); 2226 o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
2198} 2227}
2199 2228
2200static void origin_status(struct dm_target *ti, status_type_t type, 2229static void origin_status(struct dm_target *ti, status_type_t type,
2201 unsigned status_flags, char *result, unsigned maxlen) 2230 unsigned status_flags, char *result, unsigned maxlen)
2202{ 2231{
2203 struct dm_dev *dev = ti->private; 2232 struct dm_origin *o = ti->private;
2204 2233
2205 switch (type) { 2234 switch (type) {
2206 case STATUSTYPE_INFO: 2235 case STATUSTYPE_INFO:
@@ -2208,7 +2237,7 @@ static void origin_status(struct dm_target *ti, status_type_t type,
2208 break; 2237 break;
2209 2238
2210 case STATUSTYPE_TABLE: 2239 case STATUSTYPE_TABLE:
2211 snprintf(result, maxlen, "%s", dev->name); 2240 snprintf(result, maxlen, "%s", o->dev->name);
2212 break; 2241 break;
2213 } 2242 }
2214} 2243}
@@ -2216,13 +2245,13 @@ static void origin_status(struct dm_target *ti, status_type_t type,
2216static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 2245static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
2217 struct bio_vec *biovec, int max_size) 2246 struct bio_vec *biovec, int max_size)
2218{ 2247{
2219 struct dm_dev *dev = ti->private; 2248 struct dm_origin *o = ti->private;
2220 struct request_queue *q = bdev_get_queue(dev->bdev); 2249 struct request_queue *q = bdev_get_queue(o->dev->bdev);
2221 2250
2222 if (!q->merge_bvec_fn) 2251 if (!q->merge_bvec_fn)
2223 return max_size; 2252 return max_size;
2224 2253
2225 bvm->bi_bdev = dev->bdev; 2254 bvm->bi_bdev = o->dev->bdev;
2226 2255
2227 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2256 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
2228} 2257}
@@ -2230,9 +2259,9 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
2230static int origin_iterate_devices(struct dm_target *ti, 2259static int origin_iterate_devices(struct dm_target *ti,
2231 iterate_devices_callout_fn fn, void *data) 2260 iterate_devices_callout_fn fn, void *data)
2232{ 2261{
2233 struct dm_dev *dev = ti->private; 2262 struct dm_origin *o = ti->private;
2234 2263
2235 return fn(ti, dev, 0, ti->len, data); 2264 return fn(ti, o->dev, 0, ti->len, data);
2236} 2265}
2237 2266
2238static struct target_type origin_target = { 2267static struct target_type origin_target = {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 50601ec7017a..5f59f1e3e5b1 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -465,8 +465,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
465} 465}
466EXPORT_SYMBOL(dm_get_device); 466EXPORT_SYMBOL(dm_get_device);
467 467
468int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 468static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
469 sector_t start, sector_t len, void *data) 469 sector_t start, sector_t len, void *data)
470{ 470{
471 struct queue_limits *limits = data; 471 struct queue_limits *limits = data;
472 struct block_device *bdev = dev->bdev; 472 struct block_device *bdev = dev->bdev;
@@ -499,7 +499,6 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
499 (unsigned int) (PAGE_SIZE >> 9)); 499 (unsigned int) (PAGE_SIZE >> 9));
500 return 0; 500 return 0;
501} 501}
502EXPORT_SYMBOL_GPL(dm_set_device_limits);
503 502
504/* 503/*
505 * Decrement a device's use count and remove it if necessary. 504 * Decrement a device's use count and remove it if necessary.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 242ac2ea5f29..fc9c848a60c9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -310,13 +310,18 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc,
310 wake_worker(pool); 310 wake_worker(pool);
311} 311}
312 312
313static void cell_error(struct pool *pool, 313static void cell_error_with_code(struct pool *pool,
314 struct dm_bio_prison_cell *cell) 314 struct dm_bio_prison_cell *cell, int error_code)
315{ 315{
316 dm_cell_error(pool->prison, cell); 316 dm_cell_error(pool->prison, cell, error_code);
317 dm_bio_prison_free_cell(pool->prison, cell); 317 dm_bio_prison_free_cell(pool->prison, cell);
318} 318}
319 319
320static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
321{
322 cell_error_with_code(pool, cell, -EIO);
323}
324
320/*----------------------------------------------------------------*/ 325/*----------------------------------------------------------------*/
321 326
322/* 327/*
@@ -1027,7 +1032,7 @@ static void retry_on_resume(struct bio *bio)
1027 spin_unlock_irqrestore(&tc->lock, flags); 1032 spin_unlock_irqrestore(&tc->lock, flags);
1028} 1033}
1029 1034
1030static bool should_error_unserviceable_bio(struct pool *pool) 1035static int should_error_unserviceable_bio(struct pool *pool)
1031{ 1036{
1032 enum pool_mode m = get_pool_mode(pool); 1037 enum pool_mode m = get_pool_mode(pool);
1033 1038
@@ -1035,25 +1040,27 @@ static bool should_error_unserviceable_bio(struct pool *pool)
1035 case PM_WRITE: 1040 case PM_WRITE:
1036 /* Shouldn't get here */ 1041 /* Shouldn't get here */
1037 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); 1042 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
1038 return true; 1043 return -EIO;
1039 1044
1040 case PM_OUT_OF_DATA_SPACE: 1045 case PM_OUT_OF_DATA_SPACE:
1041 return pool->pf.error_if_no_space; 1046 return pool->pf.error_if_no_space ? -ENOSPC : 0;
1042 1047
1043 case PM_READ_ONLY: 1048 case PM_READ_ONLY:
1044 case PM_FAIL: 1049 case PM_FAIL:
1045 return true; 1050 return -EIO;
1046 default: 1051 default:
1047 /* Shouldn't get here */ 1052 /* Shouldn't get here */
1048 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); 1053 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
1049 return true; 1054 return -EIO;
1050 } 1055 }
1051} 1056}
1052 1057
1053static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) 1058static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
1054{ 1059{
1055 if (should_error_unserviceable_bio(pool)) 1060 int error = should_error_unserviceable_bio(pool);
1056 bio_io_error(bio); 1061
1062 if (error)
1063 bio_endio(bio, error);
1057 else 1064 else
1058 retry_on_resume(bio); 1065 retry_on_resume(bio);
1059} 1066}
@@ -1062,18 +1069,21 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
1062{ 1069{
1063 struct bio *bio; 1070 struct bio *bio;
1064 struct bio_list bios; 1071 struct bio_list bios;
1072 int error;
1065 1073
1066 if (should_error_unserviceable_bio(pool)) { 1074 error = should_error_unserviceable_bio(pool);
1067 cell_error(pool, cell); 1075 if (error) {
1076 cell_error_with_code(pool, cell, error);
1068 return; 1077 return;
1069 } 1078 }
1070 1079
1071 bio_list_init(&bios); 1080 bio_list_init(&bios);
1072 cell_release(pool, cell, &bios); 1081 cell_release(pool, cell, &bios);
1073 1082
1074 if (should_error_unserviceable_bio(pool)) 1083 error = should_error_unserviceable_bio(pool);
1084 if (error)
1075 while ((bio = bio_list_pop(&bios))) 1085 while ((bio = bio_list_pop(&bios)))
1076 bio_io_error(bio); 1086 bio_endio(bio, error);
1077 else 1087 else
1078 while ((bio = bio_list_pop(&bios))) 1088 while ((bio = bio_list_pop(&bios)))
1079 retry_on_resume(bio); 1089 retry_on_resume(bio);
@@ -1610,47 +1620,63 @@ static void do_no_space_timeout(struct work_struct *ws)
1610 1620
1611/*----------------------------------------------------------------*/ 1621/*----------------------------------------------------------------*/
1612 1622
1613struct noflush_work { 1623struct pool_work {
1614 struct work_struct worker; 1624 struct work_struct worker;
1615 struct thin_c *tc; 1625 struct completion complete;
1626};
1627
1628static struct pool_work *to_pool_work(struct work_struct *ws)
1629{
1630 return container_of(ws, struct pool_work, worker);
1631}
1616 1632
1617 atomic_t complete; 1633static void pool_work_complete(struct pool_work *pw)
1618 wait_queue_head_t wait; 1634{
1635 complete(&pw->complete);
1636}
1637
1638static void pool_work_wait(struct pool_work *pw, struct pool *pool,
1639 void (*fn)(struct work_struct *))
1640{
1641 INIT_WORK_ONSTACK(&pw->worker, fn);
1642 init_completion(&pw->complete);
1643 queue_work(pool->wq, &pw->worker);
1644 wait_for_completion(&pw->complete);
1645}
1646
1647/*----------------------------------------------------------------*/
1648
1649struct noflush_work {
1650 struct pool_work pw;
1651 struct thin_c *tc;
1619}; 1652};
1620 1653
1621static void complete_noflush_work(struct noflush_work *w) 1654static struct noflush_work *to_noflush(struct work_struct *ws)
1622{ 1655{
1623 atomic_set(&w->complete, 1); 1656 return container_of(to_pool_work(ws), struct noflush_work, pw);
1624 wake_up(&w->wait);
1625} 1657}
1626 1658
1627static void do_noflush_start(struct work_struct *ws) 1659static void do_noflush_start(struct work_struct *ws)
1628{ 1660{
1629 struct noflush_work *w = container_of(ws, struct noflush_work, worker); 1661 struct noflush_work *w = to_noflush(ws);
1630 w->tc->requeue_mode = true; 1662 w->tc->requeue_mode = true;
1631 requeue_io(w->tc); 1663 requeue_io(w->tc);
1632 complete_noflush_work(w); 1664 pool_work_complete(&w->pw);
1633} 1665}
1634 1666
1635static void do_noflush_stop(struct work_struct *ws) 1667static void do_noflush_stop(struct work_struct *ws)
1636{ 1668{
1637 struct noflush_work *w = container_of(ws, struct noflush_work, worker); 1669 struct noflush_work *w = to_noflush(ws);
1638 w->tc->requeue_mode = false; 1670 w->tc->requeue_mode = false;
1639 complete_noflush_work(w); 1671 pool_work_complete(&w->pw);
1640} 1672}
1641 1673
1642static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) 1674static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
1643{ 1675{
1644 struct noflush_work w; 1676 struct noflush_work w;
1645 1677
1646 INIT_WORK_ONSTACK(&w.worker, fn);
1647 w.tc = tc; 1678 w.tc = tc;
1648 atomic_set(&w.complete, 0); 1679 pool_work_wait(&w.pw, tc->pool, fn);
1649 init_waitqueue_head(&w.wait);
1650
1651 queue_work(tc->pool->wq, &w.worker);
1652
1653 wait_event(w.wait, atomic_read(&w.complete));
1654} 1680}
1655 1681
1656/*----------------------------------------------------------------*/ 1682/*----------------------------------------------------------------*/
@@ -3068,7 +3094,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
3068 */ 3094 */
3069 if (pt->adjusted_pf.discard_passdown) { 3095 if (pt->adjusted_pf.discard_passdown) {
3070 data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; 3096 data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
3071 limits->discard_granularity = data_limits->discard_granularity; 3097 limits->discard_granularity = max(data_limits->discard_granularity,
3098 pool->sectors_per_block << SECTOR_SHIFT);
3072 } else 3099 } else
3073 limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; 3100 limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
3074} 3101}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index aa9e093343d4..437d99045ef2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -755,6 +755,14 @@ static void dec_pending(struct dm_io *io, int error)
755 } 755 }
756} 756}
757 757
758static void disable_write_same(struct mapped_device *md)
759{
760 struct queue_limits *limits = dm_get_queue_limits(md);
761
762 /* device doesn't really support WRITE SAME, disable it */
763 limits->max_write_same_sectors = 0;
764}
765
758static void clone_endio(struct bio *bio, int error) 766static void clone_endio(struct bio *bio, int error)
759{ 767{
760 int r = 0; 768 int r = 0;
@@ -783,6 +791,10 @@ static void clone_endio(struct bio *bio, int error)
783 } 791 }
784 } 792 }
785 793
794 if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
795 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
796 disable_write_same(md);
797
786 free_tio(md, tio); 798 free_tio(md, tio);
787 dec_pending(io, error); 799 dec_pending(io, error);
788} 800}
@@ -977,6 +989,10 @@ static void dm_done(struct request *clone, int error, bool mapped)
977 r = rq_end_io(tio->ti, clone, error, &tio->info); 989 r = rq_end_io(tio->ti, clone, error, &tio->info);
978 } 990 }
979 991
992 if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
993 !clone->q->limits.max_write_same_sectors))
994 disable_write_same(tio->md);
995
980 if (r <= 0) 996 if (r <= 0)
981 /* The target wants to complete the I/O */ 997 /* The target wants to complete the I/O */
982 dm_end_request(clone, r); 998 dm_end_request(clone, r);
@@ -1110,6 +1126,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
1110} 1126}
1111EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 1127EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1112 1128
1129/*
1130 * A target may call dm_accept_partial_bio only from the map routine. It is
1131 * allowed for all bio types except REQ_FLUSH.
1132 *
1133 * dm_accept_partial_bio informs the dm that the target only wants to process
1134 * additional n_sectors sectors of the bio and the rest of the data should be
1135 * sent in a next bio.
1136 *
1137 * A diagram that explains the arithmetics:
1138 * +--------------------+---------------+-------+
1139 * | 1 | 2 | 3 |
1140 * +--------------------+---------------+-------+
1141 *
1142 * <-------------- *tio->len_ptr --------------->
1143 * <------- bi_size ------->
1144 * <-- n_sectors -->
1145 *
1146 * Region 1 was already iterated over with bio_advance or similar function.
1147 * (it may be empty if the target doesn't use bio_advance)
1148 * Region 2 is the remaining bio size that the target wants to process.
1149 * (it may be empty if region 1 is non-empty, although there is no reason
1150 * to make it empty)
1151 * The target requires that region 3 is to be sent in the next bio.
1152 *
1153 * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
1154 * the partially processed part (the sum of regions 1+2) must be the same for all
1155 * copies of the bio.
1156 */
1157void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1158{
1159 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1160 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
1161 BUG_ON(bio->bi_rw & REQ_FLUSH);
1162 BUG_ON(bi_size > *tio->len_ptr);
1163 BUG_ON(n_sectors > bi_size);
1164 *tio->len_ptr -= bi_size - n_sectors;
1165 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1166}
1167EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1168
1113static void __map_bio(struct dm_target_io *tio) 1169static void __map_bio(struct dm_target_io *tio)
1114{ 1170{
1115 int r; 1171 int r;
@@ -1152,10 +1208,10 @@ struct clone_info {
1152 struct bio *bio; 1208 struct bio *bio;
1153 struct dm_io *io; 1209 struct dm_io *io;
1154 sector_t sector; 1210 sector_t sector;
1155 sector_t sector_count; 1211 unsigned sector_count;
1156}; 1212};
1157 1213
1158static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) 1214static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
1159{ 1215{
1160 bio->bi_iter.bi_sector = sector; 1216 bio->bi_iter.bi_sector = sector;
1161 bio->bi_iter.bi_size = to_bytes(len); 1217 bio->bi_iter.bi_size = to_bytes(len);
@@ -1200,11 +1256,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
1200 1256
1201static void __clone_and_map_simple_bio(struct clone_info *ci, 1257static void __clone_and_map_simple_bio(struct clone_info *ci,
1202 struct dm_target *ti, 1258 struct dm_target *ti,
1203 unsigned target_bio_nr, sector_t len) 1259 unsigned target_bio_nr, unsigned *len)
1204{ 1260{
1205 struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); 1261 struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
1206 struct bio *clone = &tio->clone; 1262 struct bio *clone = &tio->clone;
1207 1263
1264 tio->len_ptr = len;
1265
1208 /* 1266 /*
1209 * Discard requests require the bio's inline iovecs be initialized. 1267 * Discard requests require the bio's inline iovecs be initialized.
1210 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush 1268 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
@@ -1212,13 +1270,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
1212 */ 1270 */
1213 __bio_clone_fast(clone, ci->bio); 1271 __bio_clone_fast(clone, ci->bio);
1214 if (len) 1272 if (len)
1215 bio_setup_sector(clone, ci->sector, len); 1273 bio_setup_sector(clone, ci->sector, *len);
1216 1274
1217 __map_bio(tio); 1275 __map_bio(tio);
1218} 1276}
1219 1277
1220static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, 1278static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
1221 unsigned num_bios, sector_t len) 1279 unsigned num_bios, unsigned *len)
1222{ 1280{
1223 unsigned target_bio_nr; 1281 unsigned target_bio_nr;
1224 1282
@@ -1233,13 +1291,13 @@ static int __send_empty_flush(struct clone_info *ci)
1233 1291
1234 BUG_ON(bio_has_data(ci->bio)); 1292 BUG_ON(bio_has_data(ci->bio));
1235 while ((ti = dm_table_get_target(ci->map, target_nr++))) 1293 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1236 __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); 1294 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
1237 1295
1238 return 0; 1296 return 0;
1239} 1297}
1240 1298
1241static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, 1299static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
1242 sector_t sector, unsigned len) 1300 sector_t sector, unsigned *len)
1243{ 1301{
1244 struct bio *bio = ci->bio; 1302 struct bio *bio = ci->bio;
1245 struct dm_target_io *tio; 1303 struct dm_target_io *tio;
@@ -1254,7 +1312,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
1254 1312
1255 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { 1313 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
1256 tio = alloc_tio(ci, ti, 0, target_bio_nr); 1314 tio = alloc_tio(ci, ti, 0, target_bio_nr);
1257 clone_bio(tio, bio, sector, len); 1315 tio->len_ptr = len;
1316 clone_bio(tio, bio, sector, *len);
1258 __map_bio(tio); 1317 __map_bio(tio);
1259 } 1318 }
1260} 1319}
@@ -1283,7 +1342,7 @@ static int __send_changing_extent_only(struct clone_info *ci,
1283 is_split_required_fn is_split_required) 1342 is_split_required_fn is_split_required)
1284{ 1343{
1285 struct dm_target *ti; 1344 struct dm_target *ti;
1286 sector_t len; 1345 unsigned len;
1287 unsigned num_bios; 1346 unsigned num_bios;
1288 1347
1289 do { 1348 do {
@@ -1302,11 +1361,11 @@ static int __send_changing_extent_only(struct clone_info *ci,
1302 return -EOPNOTSUPP; 1361 return -EOPNOTSUPP;
1303 1362
1304 if (is_split_required && !is_split_required(ti)) 1363 if (is_split_required && !is_split_required(ti))
1305 len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 1364 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1306 else 1365 else
1307 len = min(ci->sector_count, max_io_len(ci->sector, ti)); 1366 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
1308 1367
1309 __send_duplicate_bios(ci, ti, num_bios, len); 1368 __send_duplicate_bios(ci, ti, num_bios, &len);
1310 1369
1311 ci->sector += len; 1370 ci->sector += len;
1312 } while (ci->sector_count -= len); 1371 } while (ci->sector_count -= len);
@@ -1345,7 +1404,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
1345 1404
1346 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); 1405 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
1347 1406
1348 __clone_and_map_data_bio(ci, ti, ci->sector, len); 1407 __clone_and_map_data_bio(ci, ti, ci->sector, &len);
1349 1408
1350 ci->sector += len; 1409 ci->sector += len;
1351 ci->sector_count -= len; 1410 ci->sector_count -= len;
@@ -1439,7 +1498,6 @@ static int dm_merge_bvec(struct request_queue *q,
1439 * just one page. 1498 * just one page.
1440 */ 1499 */
1441 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) 1500 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1442
1443 max_size = 0; 1501 max_size = 0;
1444 1502
1445out: 1503out: