aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-21 20:08:06 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-21 20:08:06 -0500
commitb49249d10324d0fd6fb29725c2807dfd80d0edbc (patch)
tree9a8fa724e6c9f9283530979c6e32a311c74999d5
parent10532b560bacf23766f9c7dc09778b31b198ff45 (diff)
parent45e621d45e24ffc4cb2b2935e8438987b860063a (diff)
Merge tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull dm update from Alasdair G Kergon: "Miscellaneous device-mapper fixes, cleanups and performance improvements. Of particular note: - Disable broken WRITE SAME support in all targets except linear and striped. Use it when kcopyd is zeroing blocks. - Remove several mempools from targets by moving the data into the bio's new front_pad area(which dm calls 'per_bio_data'). - Fix a race in thin provisioning if discards are misused. - Prevent userspace from interfering with the ioctl parameters and use kmalloc for the data buffer if it's small instead of vmalloc. - Throttle some annoying error messages when I/O fails." * tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (36 commits) dm stripe: add WRITE SAME support dm: remove map_info dm snapshot: do not use map_context dm thin: dont use map_context dm raid1: dont use map_context dm flakey: dont use map_context dm raid1: rename read_record to bio_record dm: move target request nr to dm_target_io dm snapshot: use per_bio_data dm verity: use per_bio_data dm raid1: use per_bio_data dm: introduce per_bio_data dm kcopyd: add WRITE SAME support to dm_kcopyd_zero dm linear: add WRITE SAME support dm: add WRITE SAME support dm: prepare to support WRITE SAME dm ioctl: use kmalloc if possible dm ioctl: remove PF_MEMALLOC dm persistent data: improve improve space map block alloc failure message dm thin: use DMERR_LIMIT for errors ...
-rw-r--r--drivers/md/dm-bio-prison.c25
-rw-r--r--drivers/md/dm-bio-prison.h1
-rw-r--r--drivers/md/dm-crypt.c5
-rw-r--r--drivers/md/dm-delay.c5
-rw-r--r--drivers/md/dm-flakey.c21
-rw-r--r--drivers/md/dm-io.c23
-rw-r--r--drivers/md/dm-ioctl.c64
-rw-r--r--drivers/md/dm-kcopyd.c18
-rw-r--r--drivers/md/dm-linear.c6
-rw-r--r--drivers/md/dm-raid.c8
-rw-r--r--drivers/md/dm-raid1.c75
-rw-r--r--drivers/md/dm-snap.c90
-rw-r--r--drivers/md/dm-stripe.c20
-rw-r--r--drivers/md/dm-table.c41
-rw-r--r--drivers/md/dm-target.c5
-rw-r--r--drivers/md/dm-thin-metadata.c2
-rw-r--r--drivers/md/dm-thin.c234
-rw-r--r--drivers/md/dm-verity.c25
-rw-r--r--drivers/md/dm-zero.c5
-rw-r--r--drivers/md/dm.c84
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c12
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h16
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c50
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c20
-rw-r--r--drivers/md/persistent-data/dm-btree.c31
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c16
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c2
-rw-r--r--include/linux/device-mapper.h55
-rw-r--r--include/uapi/linux/dm-ioctl.h4
30 files changed, 522 insertions, 443 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index e4e841567459..aefb78e3cbf9 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -208,31 +208,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
208EXPORT_SYMBOL_GPL(dm_cell_release); 208EXPORT_SYMBOL_GPL(dm_cell_release);
209 209
210/* 210/*
211 * There are a couple of places where we put a bio into a cell briefly
212 * before taking it out again. In these situations we know that no other
213 * bio may be in the cell. This function releases the cell, and also does
214 * a sanity check.
215 */
216static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
217{
218 BUG_ON(cell->holder != bio);
219 BUG_ON(!bio_list_empty(&cell->bios));
220
221 __cell_release(cell, NULL);
222}
223
224void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
225{
226 unsigned long flags;
227 struct dm_bio_prison *prison = cell->prison;
228
229 spin_lock_irqsave(&prison->lock, flags);
230 __cell_release_singleton(cell, bio);
231 spin_unlock_irqrestore(&prison->lock, flags);
232}
233EXPORT_SYMBOL_GPL(dm_cell_release_singleton);
234
235/*
236 * Sometimes we don't want the holder, just the additional bios. 211 * Sometimes we don't want the holder, just the additional bios.
237 */ 212 */
238static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) 213static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 4e0ac376700a..53d1a7a84e2f 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
44 struct bio *inmate, struct dm_bio_prison_cell **ref); 44 struct bio *inmate, struct dm_bio_prison_cell **ref);
45 45
46void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); 46void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios);
47void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed
48void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); 47void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates);
49void dm_cell_error(struct dm_bio_prison_cell *cell); 48void dm_cell_error(struct dm_bio_prison_cell *cell);
50 49
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bbf459bca61d..f7369f9d8595 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1689,8 +1689,7 @@ bad:
1689 return ret; 1689 return ret;
1690} 1690}
1691 1691
1692static int crypt_map(struct dm_target *ti, struct bio *bio, 1692static int crypt_map(struct dm_target *ti, struct bio *bio)
1693 union map_info *map_context)
1694{ 1693{
1695 struct dm_crypt_io *io; 1694 struct dm_crypt_io *io;
1696 struct crypt_config *cc = ti->private; 1695 struct crypt_config *cc = ti->private;
@@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
1846 1845
1847static struct target_type crypt_target = { 1846static struct target_type crypt_target = {
1848 .name = "crypt", 1847 .name = "crypt",
1849 .version = {1, 11, 0}, 1848 .version = {1, 12, 0},
1850 .module = THIS_MODULE, 1849 .module = THIS_MODULE,
1851 .ctr = crypt_ctr, 1850 .ctr = crypt_ctr,
1852 .dtr = crypt_dtr, 1851 .dtr = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index f53846f9ab50..cc1bd048acb2 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti)
274 atomic_set(&dc->may_delay, 1); 274 atomic_set(&dc->may_delay, 1);
275} 275}
276 276
277static int delay_map(struct dm_target *ti, struct bio *bio, 277static int delay_map(struct dm_target *ti, struct bio *bio)
278 union map_info *map_context)
279{ 278{
280 struct delay_c *dc = ti->private; 279 struct delay_c *dc = ti->private;
281 280
@@ -338,7 +337,7 @@ out:
338 337
339static struct target_type delay_target = { 338static struct target_type delay_target = {
340 .name = "delay", 339 .name = "delay",
341 .version = {1, 1, 0}, 340 .version = {1, 2, 0},
342 .module = THIS_MODULE, 341 .module = THIS_MODULE,
343 .ctr = delay_ctr, 342 .ctr = delay_ctr,
344 .dtr = delay_dtr, 343 .dtr = delay_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index cc15543a6ad7..9721f2ffb1a2 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -39,6 +39,10 @@ enum feature_flag_bits {
39 DROP_WRITES 39 DROP_WRITES
40}; 40};
41 41
42struct per_bio_data {
43 bool bio_submitted;
44};
45
42static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, 46static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
43 struct dm_target *ti) 47 struct dm_target *ti)
44{ 48{
@@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
214 218
215 ti->num_flush_requests = 1; 219 ti->num_flush_requests = 1;
216 ti->num_discard_requests = 1; 220 ti->num_discard_requests = 1;
221 ti->per_bio_data_size = sizeof(struct per_bio_data);
217 ti->private = fc; 222 ti->private = fc;
218 return 0; 223 return 0;
219 224
@@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
265 } 270 }
266} 271}
267 272
268static int flakey_map(struct dm_target *ti, struct bio *bio, 273static int flakey_map(struct dm_target *ti, struct bio *bio)
269 union map_info *map_context)
270{ 274{
271 struct flakey_c *fc = ti->private; 275 struct flakey_c *fc = ti->private;
272 unsigned elapsed; 276 unsigned elapsed;
277 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
278 pb->bio_submitted = false;
273 279
274 /* Are we alive ? */ 280 /* Are we alive ? */
275 elapsed = (jiffies - fc->start_time) / HZ; 281 elapsed = (jiffies - fc->start_time) / HZ;
@@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
277 /* 283 /*
278 * Flag this bio as submitted while down. 284 * Flag this bio as submitted while down.
279 */ 285 */
280 map_context->ll = 1; 286 pb->bio_submitted = true;
281 287
282 /* 288 /*
283 * Map reads as normal. 289 * Map reads as normal.
@@ -314,17 +320,16 @@ map_bio:
314 return DM_MAPIO_REMAPPED; 320 return DM_MAPIO_REMAPPED;
315} 321}
316 322
317static int flakey_end_io(struct dm_target *ti, struct bio *bio, 323static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
318 int error, union map_info *map_context)
319{ 324{
320 struct flakey_c *fc = ti->private; 325 struct flakey_c *fc = ti->private;
321 unsigned bio_submitted_while_down = map_context->ll; 326 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
322 327
323 /* 328 /*
324 * Corrupt successful READs while in down state. 329 * Corrupt successful READs while in down state.
325 * If flags were specified, only corrupt those that match. 330 * If flags were specified, only corrupt those that match.
326 */ 331 */
327 if (fc->corrupt_bio_byte && !error && bio_submitted_while_down && 332 if (fc->corrupt_bio_byte && !error && pb->bio_submitted &&
328 (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && 333 (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
329 all_corrupt_bio_flags_match(bio, fc)) 334 all_corrupt_bio_flags_match(bio, fc))
330 corrupt_bio_data(bio, fc); 335 corrupt_bio_data(bio, fc);
@@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
406 411
407static struct target_type flakey_target = { 412static struct target_type flakey_target = {
408 .name = "flakey", 413 .name = "flakey",
409 .version = {1, 2, 0}, 414 .version = {1, 3, 0},
410 .module = THIS_MODULE, 415 .module = THIS_MODULE,
411 .ctr = flakey_ctr, 416 .ctr = flakey_ctr,
412 .dtr = flakey_dtr, 417 .dtr = flakey_dtr,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 1c46f97d6664..ea49834377c8 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
287 unsigned num_bvecs; 287 unsigned num_bvecs;
288 sector_t remaining = where->count; 288 sector_t remaining = where->count;
289 struct request_queue *q = bdev_get_queue(where->bdev); 289 struct request_queue *q = bdev_get_queue(where->bdev);
290 sector_t discard_sectors; 290 unsigned short logical_block_size = queue_logical_block_size(q);
291 sector_t num_sectors;
291 292
292 /* 293 /*
293 * where->count may be zero if rw holds a flush and we need to 294 * where->count may be zero if rw holds a flush and we need to
@@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
297 /* 298 /*
298 * Allocate a suitably sized-bio. 299 * Allocate a suitably sized-bio.
299 */ 300 */
300 if (rw & REQ_DISCARD) 301 if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
301 num_bvecs = 1; 302 num_bvecs = 1;
302 else 303 else
303 num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), 304 num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
@@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
310 store_io_and_region_in_bio(bio, io, region); 311 store_io_and_region_in_bio(bio, io, region);
311 312
312 if (rw & REQ_DISCARD) { 313 if (rw & REQ_DISCARD) {
313 discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); 314 num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
314 bio->bi_size = discard_sectors << SECTOR_SHIFT; 315 bio->bi_size = num_sectors << SECTOR_SHIFT;
315 remaining -= discard_sectors; 316 remaining -= num_sectors;
317 } else if (rw & REQ_WRITE_SAME) {
318 /*
319 * WRITE SAME only uses a single page.
320 */
321 dp->get_page(dp, &page, &len, &offset);
322 bio_add_page(bio, page, logical_block_size, offset);
323 num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
324 bio->bi_size = num_sectors << SECTOR_SHIFT;
325
326 offset = 0;
327 remaining -= num_sectors;
328 dp->next_page(dp);
316 } else while (remaining) { 329 } else while (remaining) {
317 /* 330 /*
318 * Try and add as many pages as possible. 331 * Try and add as many pages as possible.
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index afd95986d099..0666b5d14b88 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
1543 return r; 1543 return r;
1544} 1544}
1545 1545
1546static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) 1546#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */
1547#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */
1548
1549static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
1550{
1551 if (param_flags & DM_WIPE_BUFFER)
1552 memset(param, 0, param_size);
1553
1554 if (param_flags & DM_PARAMS_VMALLOC)
1555 vfree(param);
1556 else
1557 kfree(param);
1558}
1559
1560static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags)
1547{ 1561{
1548 struct dm_ioctl tmp, *dmi; 1562 struct dm_ioctl tmp, *dmi;
1549 int secure_data; 1563 int secure_data;
@@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
1556 1570
1557 secure_data = tmp.flags & DM_SECURE_DATA_FLAG; 1571 secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
1558 1572
1559 dmi = vmalloc(tmp.data_size); 1573 *param_flags = secure_data ? DM_WIPE_BUFFER : 0;
1574
1575 /*
1576 * Try to avoid low memory issues when a device is suspended.
1577 * Use kmalloc() rather than vmalloc() when we can.
1578 */
1579 dmi = NULL;
1580 if (tmp.data_size <= KMALLOC_MAX_SIZE)
1581 dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
1582
1583 if (!dmi) {
1584 dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
1585 *param_flags |= DM_PARAMS_VMALLOC;
1586 }
1587
1560 if (!dmi) { 1588 if (!dmi) {
1561 if (secure_data && clear_user(user, tmp.data_size)) 1589 if (secure_data && clear_user(user, tmp.data_size))
1562 return -EFAULT; 1590 return -EFAULT;
@@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
1566 if (copy_from_user(dmi, user, tmp.data_size)) 1594 if (copy_from_user(dmi, user, tmp.data_size))
1567 goto bad; 1595 goto bad;
1568 1596
1597 /*
1598 * Abort if something changed the ioctl data while it was being copied.
1599 */
1600 if (dmi->data_size != tmp.data_size) {
1601 DMERR("rejecting ioctl: data size modified while processing parameters");
1602 goto bad;
1603 }
1604
1569 /* Wipe the user buffer so we do not return it to userspace */ 1605 /* Wipe the user buffer so we do not return it to userspace */
1570 if (secure_data && clear_user(user, tmp.data_size)) 1606 if (secure_data && clear_user(user, tmp.data_size))
1571 goto bad; 1607 goto bad;
@@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
1574 return 0; 1610 return 0;
1575 1611
1576bad: 1612bad:
1577 if (secure_data) 1613 free_params(dmi, tmp.data_size, *param_flags);
1578 memset(dmi, 0, tmp.data_size); 1614
1579 vfree(dmi);
1580 return -EFAULT; 1615 return -EFAULT;
1581} 1616}
1582 1617
@@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
1613static int ctl_ioctl(uint command, struct dm_ioctl __user *user) 1648static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
1614{ 1649{
1615 int r = 0; 1650 int r = 0;
1616 int wipe_buffer; 1651 int param_flags;
1617 unsigned int cmd; 1652 unsigned int cmd;
1618 struct dm_ioctl *uninitialized_var(param); 1653 struct dm_ioctl *uninitialized_var(param);
1619 ioctl_fn fn = NULL; 1654 ioctl_fn fn = NULL;
@@ -1649,24 +1684,14 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
1649 } 1684 }
1650 1685
1651 /* 1686 /*
1652 * Trying to avoid low memory issues when a device is
1653 * suspended.
1654 */
1655 current->flags |= PF_MEMALLOC;
1656
1657 /*
1658 * Copy the parameters into kernel space. 1687 * Copy the parameters into kernel space.
1659 */ 1688 */
1660 r = copy_params(user, &param); 1689 r = copy_params(user, &param, &param_flags);
1661
1662 current->flags &= ~PF_MEMALLOC;
1663 1690
1664 if (r) 1691 if (r)
1665 return r; 1692 return r;
1666 1693
1667 input_param_size = param->data_size; 1694 input_param_size = param->data_size;
1668 wipe_buffer = param->flags & DM_SECURE_DATA_FLAG;
1669
1670 r = validate_params(cmd, param); 1695 r = validate_params(cmd, param);
1671 if (r) 1696 if (r)
1672 goto out; 1697 goto out;
@@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
1681 r = -EFAULT; 1706 r = -EFAULT;
1682 1707
1683out: 1708out:
1684 if (wipe_buffer) 1709 free_params(param, input_param_size, param_flags);
1685 memset(param, 0, input_param_size);
1686
1687 vfree(param);
1688 return r; 1710 return r;
1689} 1711}
1690 1712
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index bed444c93d8d..68c02673263b 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context)
349 struct dm_kcopyd_client *kc = job->kc; 349 struct dm_kcopyd_client *kc = job->kc;
350 350
351 if (error) { 351 if (error) {
352 if (job->rw == WRITE) 352 if (job->rw & WRITE)
353 job->write_err |= error; 353 job->write_err |= error;
354 else 354 else
355 job->read_err = 1; 355 job->read_err = 1;
@@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context)
361 } 361 }
362 } 362 }
363 363
364 if (job->rw == WRITE) 364 if (job->rw & WRITE)
365 push(&kc->complete_jobs, job); 365 push(&kc->complete_jobs, job);
366 366
367 else { 367 else {
@@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
432 432
433 if (r < 0) { 433 if (r < 0) {
434 /* error this rogue job */ 434 /* error this rogue job */
435 if (job->rw == WRITE) 435 if (job->rw & WRITE)
436 job->write_err = (unsigned long) -1L; 436 job->write_err = (unsigned long) -1L;
437 else 437 else
438 job->read_err = 1; 438 job->read_err = 1;
@@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
585 unsigned int flags, dm_kcopyd_notify_fn fn, void *context) 585 unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
586{ 586{
587 struct kcopyd_job *job; 587 struct kcopyd_job *job;
588 int i;
588 589
589 /* 590 /*
590 * Allocate an array of jobs consisting of one master job 591 * Allocate an array of jobs consisting of one master job
@@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
611 memset(&job->source, 0, sizeof job->source); 612 memset(&job->source, 0, sizeof job->source);
612 job->source.count = job->dests[0].count; 613 job->source.count = job->dests[0].count;
613 job->pages = &zero_page_list; 614 job->pages = &zero_page_list;
614 job->rw = WRITE; 615
616 /*
617 * Use WRITE SAME to optimize zeroing if all dests support it.
618 */
619 job->rw = WRITE | REQ_WRITE_SAME;
620 for (i = 0; i < job->num_dests; i++)
621 if (!bdev_write_same(job->dests[i].bdev)) {
622 job->rw = WRITE;
623 break;
624 }
615 } 625 }
616 626
617 job->fn = fn; 627 job->fn = fn;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 1bf19a93eef0..328cad5617ab 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
55 55
56 ti->num_flush_requests = 1; 56 ti->num_flush_requests = 1;
57 ti->num_discard_requests = 1; 57 ti->num_discard_requests = 1;
58 ti->num_write_same_requests = 1;
58 ti->private = lc; 59 ti->private = lc;
59 return 0; 60 return 0;
60 61
@@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
87 bio->bi_sector = linear_map_sector(ti, bio->bi_sector); 88 bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
88} 89}
89 90
90static int linear_map(struct dm_target *ti, struct bio *bio, 91static int linear_map(struct dm_target *ti, struct bio *bio)
91 union map_info *map_context)
92{ 92{
93 linear_map_bio(ti, bio); 93 linear_map_bio(ti, bio);
94 94
@@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti,
155 155
156static struct target_type linear_target = { 156static struct target_type linear_target = {
157 .name = "linear", 157 .name = "linear",
158 .version = {1, 1, 0}, 158 .version = {1, 2, 0},
159 .module = THIS_MODULE, 159 .module = THIS_MODULE,
160 .ctr = linear_ctr, 160 .ctr = linear_ctr,
161 .dtr = linear_dtr, 161 .dtr = linear_dtr,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 45d94a7e7f6d..3d8984edeff7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
295 * Choose a reasonable default. All figures in sectors. 295 * Choose a reasonable default. All figures in sectors.
296 */ 296 */
297 if (min_region_size > (1 << 13)) { 297 if (min_region_size > (1 << 13)) {
298 /* If not a power of 2, make it the next power of 2 */
299 if (min_region_size & (min_region_size - 1))
300 region_size = 1 << fls(region_size);
298 DMINFO("Choosing default region size of %lu sectors", 301 DMINFO("Choosing default region size of %lu sectors",
299 region_size); 302 region_size);
300 region_size = min_region_size;
301 } else { 303 } else {
302 DMINFO("Choosing default region size of 4MiB"); 304 DMINFO("Choosing default region size of 4MiB");
303 region_size = 1 << 13; /* sectors */ 305 region_size = 1 << 13; /* sectors */
@@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti)
1216 context_free(rs); 1218 context_free(rs);
1217} 1219}
1218 1220
1219static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) 1221static int raid_map(struct dm_target *ti, struct bio *bio)
1220{ 1222{
1221 struct raid_set *rs = ti->private; 1223 struct raid_set *rs = ti->private;
1222 struct mddev *mddev = &rs->md; 1224 struct mddev *mddev = &rs->md;
@@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti)
1430 1432
1431static struct target_type raid_target = { 1433static struct target_type raid_target = {
1432 .name = "raid", 1434 .name = "raid",
1433 .version = {1, 3, 1}, 1435 .version = {1, 4, 0},
1434 .module = THIS_MODULE, 1436 .module = THIS_MODULE,
1435 .ctr = raid_ctr, 1437 .ctr = raid_ctr,
1436 .dtr = raid_dtr, 1438 .dtr = raid_dtr,
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index fd61f98ee1f6..fa519185ebba 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -61,7 +61,6 @@ struct mirror_set {
61 struct dm_region_hash *rh; 61 struct dm_region_hash *rh;
62 struct dm_kcopyd_client *kcopyd_client; 62 struct dm_kcopyd_client *kcopyd_client;
63 struct dm_io_client *io_client; 63 struct dm_io_client *io_client;
64 mempool_t *read_record_pool;
65 64
66 /* recovery */ 65 /* recovery */
67 region_t nr_regions; 66 region_t nr_regions;
@@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
139 queue_bio(ms, bio, WRITE); 138 queue_bio(ms, bio, WRITE);
140} 139}
141 140
142#define MIN_READ_RECORDS 20 141struct dm_raid1_bio_record {
143struct dm_raid1_read_record {
144 struct mirror *m; 142 struct mirror *m;
143 /* if details->bi_bdev == NULL, details were not saved */
145 struct dm_bio_details details; 144 struct dm_bio_details details;
145 region_t write_region;
146}; 146};
147 147
148static struct kmem_cache *_dm_raid1_read_record_cache;
149
150/* 148/*
151 * Every mirror should look like this one. 149 * Every mirror should look like this one.
152 */ 150 */
@@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
876 atomic_set(&ms->suspend, 0); 874 atomic_set(&ms->suspend, 0);
877 atomic_set(&ms->default_mirror, DEFAULT_MIRROR); 875 atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
878 876
879 ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
880 _dm_raid1_read_record_cache);
881
882 if (!ms->read_record_pool) {
883 ti->error = "Error creating mirror read_record_pool";
884 kfree(ms);
885 return NULL;
886 }
887
888 ms->io_client = dm_io_client_create(); 877 ms->io_client = dm_io_client_create();
889 if (IS_ERR(ms->io_client)) { 878 if (IS_ERR(ms->io_client)) {
890 ti->error = "Error creating dm_io client"; 879 ti->error = "Error creating dm_io client";
891 mempool_destroy(ms->read_record_pool);
892 kfree(ms); 880 kfree(ms);
893 return NULL; 881 return NULL;
894 } 882 }
@@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
900 if (IS_ERR(ms->rh)) { 888 if (IS_ERR(ms->rh)) {
901 ti->error = "Error creating dirty region hash"; 889 ti->error = "Error creating dirty region hash";
902 dm_io_client_destroy(ms->io_client); 890 dm_io_client_destroy(ms->io_client);
903 mempool_destroy(ms->read_record_pool);
904 kfree(ms); 891 kfree(ms);
905 return NULL; 892 return NULL;
906 } 893 }
@@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
916 903
917 dm_io_client_destroy(ms->io_client); 904 dm_io_client_destroy(ms->io_client);
918 dm_region_hash_destroy(ms->rh); 905 dm_region_hash_destroy(ms->rh);
919 mempool_destroy(ms->read_record_pool);
920 kfree(ms); 906 kfree(ms);
921} 907}
922 908
@@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1088 1074
1089 ti->num_flush_requests = 1; 1075 ti->num_flush_requests = 1;
1090 ti->num_discard_requests = 1; 1076 ti->num_discard_requests = 1;
1077 ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
1091 ti->discard_zeroes_data_unsupported = true; 1078 ti->discard_zeroes_data_unsupported = true;
1092 1079
1093 ms->kmirrord_wq = alloc_workqueue("kmirrord", 1080 ms->kmirrord_wq = alloc_workqueue("kmirrord",
@@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti)
1155/* 1142/*
1156 * Mirror mapping function 1143 * Mirror mapping function
1157 */ 1144 */
1158static int mirror_map(struct dm_target *ti, struct bio *bio, 1145static int mirror_map(struct dm_target *ti, struct bio *bio)
1159 union map_info *map_context)
1160{ 1146{
1161 int r, rw = bio_rw(bio); 1147 int r, rw = bio_rw(bio);
1162 struct mirror *m; 1148 struct mirror *m;
1163 struct mirror_set *ms = ti->private; 1149 struct mirror_set *ms = ti->private;
1164 struct dm_raid1_read_record *read_record = NULL;
1165 struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 1150 struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1151 struct dm_raid1_bio_record *bio_record =
1152 dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
1153
1154 bio_record->details.bi_bdev = NULL;
1166 1155
1167 if (rw == WRITE) { 1156 if (rw == WRITE) {
1168 /* Save region for mirror_end_io() handler */ 1157 /* Save region for mirror_end_io() handler */
1169 map_context->ll = dm_rh_bio_to_region(ms->rh, bio); 1158 bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
1170 queue_bio(ms, bio, rw); 1159 queue_bio(ms, bio, rw);
1171 return DM_MAPIO_SUBMITTED; 1160 return DM_MAPIO_SUBMITTED;
1172 } 1161 }
@@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
1194 if (unlikely(!m)) 1183 if (unlikely(!m))
1195 return -EIO; 1184 return -EIO;
1196 1185
1197 read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); 1186 dm_bio_record(&bio_record->details, bio);
1198 if (likely(read_record)) { 1187 bio_record->m = m;
1199 dm_bio_record(&read_record->details, bio);
1200 map_context->ptr = read_record;
1201 read_record->m = m;
1202 }
1203 1188
1204 map_bio(m, bio); 1189 map_bio(m, bio);
1205 1190
1206 return DM_MAPIO_REMAPPED; 1191 return DM_MAPIO_REMAPPED;
1207} 1192}
1208 1193
1209static int mirror_end_io(struct dm_target *ti, struct bio *bio, 1194static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1210 int error, union map_info *map_context)
1211{ 1195{
1212 int rw = bio_rw(bio); 1196 int rw = bio_rw(bio);
1213 struct mirror_set *ms = (struct mirror_set *) ti->private; 1197 struct mirror_set *ms = (struct mirror_set *) ti->private;
1214 struct mirror *m = NULL; 1198 struct mirror *m = NULL;
1215 struct dm_bio_details *bd = NULL; 1199 struct dm_bio_details *bd = NULL;
1216 struct dm_raid1_read_record *read_record = map_context->ptr; 1200 struct dm_raid1_bio_record *bio_record =
1201 dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
1217 1202
1218 /* 1203 /*
1219 * We need to dec pending if this was a write. 1204 * We need to dec pending if this was a write.
1220 */ 1205 */
1221 if (rw == WRITE) { 1206 if (rw == WRITE) {
1222 if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) 1207 if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
1223 dm_rh_dec(ms->rh, map_context->ll); 1208 dm_rh_dec(ms->rh, bio_record->write_region);
1224 return error; 1209 return error;
1225 } 1210 }
1226 1211
@@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1231 goto out; 1216 goto out;
1232 1217
1233 if (unlikely(error)) { 1218 if (unlikely(error)) {
1234 if (!read_record) { 1219 if (!bio_record->details.bi_bdev) {
1235 /* 1220 /*
1236 * There wasn't enough memory to record necessary 1221 * There wasn't enough memory to record necessary
1237 * information for a retry or there was no other 1222 * information for a retry or there was no other
@@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1241 return -EIO; 1226 return -EIO;
1242 } 1227 }
1243 1228
1244 m = read_record->m; 1229 m = bio_record->m;
1245 1230
1246 DMERR("Mirror read failed from %s. Trying alternative device.", 1231 DMERR("Mirror read failed from %s. Trying alternative device.",
1247 m->dev->name); 1232 m->dev->name);
@@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1253 * mirror. 1238 * mirror.
1254 */ 1239 */
1255 if (default_ok(m) || mirror_available(ms, bio)) { 1240 if (default_ok(m) || mirror_available(ms, bio)) {
1256 bd = &read_record->details; 1241 bd = &bio_record->details;
1257 1242
1258 dm_bio_restore(bd, bio); 1243 dm_bio_restore(bd, bio);
1259 mempool_free(read_record, ms->read_record_pool); 1244 bio_record->details.bi_bdev = NULL;
1260 map_context->ptr = NULL;
1261 queue_bio(ms, bio, rw); 1245 queue_bio(ms, bio, rw);
1262 return 1; 1246 return DM_ENDIO_INCOMPLETE;
1263 } 1247 }
1264 DMERR("All replicated volumes dead, failing I/O"); 1248 DMERR("All replicated volumes dead, failing I/O");
1265 } 1249 }
1266 1250
1267out: 1251out:
1268 if (read_record) { 1252 bio_record->details.bi_bdev = NULL;
1269 mempool_free(read_record, ms->read_record_pool);
1270 map_context->ptr = NULL;
1271 }
1272 1253
1273 return error; 1254 return error;
1274} 1255}
@@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
1422 1403
1423static struct target_type mirror_target = { 1404static struct target_type mirror_target = {
1424 .name = "mirror", 1405 .name = "mirror",
1425 .version = {1, 12, 1}, 1406 .version = {1, 13, 1},
1426 .module = THIS_MODULE, 1407 .module = THIS_MODULE,
1427 .ctr = mirror_ctr, 1408 .ctr = mirror_ctr,
1428 .dtr = mirror_dtr, 1409 .dtr = mirror_dtr,
@@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void)
1439{ 1420{
1440 int r; 1421 int r;
1441 1422
1442 _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
1443 if (!_dm_raid1_read_record_cache) {
1444 DMERR("Can't allocate dm_raid1_read_record cache");
1445 r = -ENOMEM;
1446 goto bad_cache;
1447 }
1448
1449 r = dm_register_target(&mirror_target); 1423 r = dm_register_target(&mirror_target);
1450 if (r < 0) { 1424 if (r < 0) {
1451 DMERR("Failed to register mirror target"); 1425 DMERR("Failed to register mirror target");
@@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void)
1455 return 0; 1429 return 0;
1456 1430
1457bad_target: 1431bad_target:
1458 kmem_cache_destroy(_dm_raid1_read_record_cache);
1459bad_cache:
1460 return r; 1432 return r;
1461} 1433}
1462 1434
1463static void __exit dm_mirror_exit(void) 1435static void __exit dm_mirror_exit(void)
1464{ 1436{
1465 dm_unregister_target(&mirror_target); 1437 dm_unregister_target(&mirror_target);
1466 kmem_cache_destroy(_dm_raid1_read_record_cache);
1467} 1438}
1468 1439
1469/* Module hooks */ 1440/* Module hooks */
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a143921feaf6..59fc18ae52c2 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -79,7 +79,6 @@ struct dm_snapshot {
79 79
80 /* Chunks with outstanding reads */ 80 /* Chunks with outstanding reads */
81 spinlock_t tracked_chunk_lock; 81 spinlock_t tracked_chunk_lock;
82 mempool_t *tracked_chunk_pool;
83 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 82 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
84 83
85 /* The on disk metadata handler */ 84 /* The on disk metadata handler */
@@ -191,35 +190,38 @@ struct dm_snap_tracked_chunk {
191 chunk_t chunk; 190 chunk_t chunk;
192}; 191};
193 192
194static struct kmem_cache *tracked_chunk_cache; 193static void init_tracked_chunk(struct bio *bio)
194{
195 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
196 INIT_HLIST_NODE(&c->node);
197}
195 198
196static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 199static bool is_bio_tracked(struct bio *bio)
197 chunk_t chunk)
198{ 200{
199 struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 201 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
200 GFP_NOIO); 202 return !hlist_unhashed(&c->node);
201 unsigned long flags; 203}
204
205static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk)
206{
207 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
202 208
203 c->chunk = chunk; 209 c->chunk = chunk;
204 210
205 spin_lock_irqsave(&s->tracked_chunk_lock, flags); 211 spin_lock_irq(&s->tracked_chunk_lock);
206 hlist_add_head(&c->node, 212 hlist_add_head(&c->node,
207 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 213 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
208 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 214 spin_unlock_irq(&s->tracked_chunk_lock);
209
210 return c;
211} 215}
212 216
213static void stop_tracking_chunk(struct dm_snapshot *s, 217static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
214 struct dm_snap_tracked_chunk *c)
215{ 218{
219 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
216 unsigned long flags; 220 unsigned long flags;
217 221
218 spin_lock_irqsave(&s->tracked_chunk_lock, flags); 222 spin_lock_irqsave(&s->tracked_chunk_lock, flags);
219 hlist_del(&c->node); 223 hlist_del(&c->node);
220 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 224 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
221
222 mempool_free(c, s->tracked_chunk_pool);
223} 225}
224 226
225static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 227static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
@@ -1120,14 +1122,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1120 goto bad_pending_pool; 1122 goto bad_pending_pool;
1121 } 1123 }
1122 1124
1123 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
1124 tracked_chunk_cache);
1125 if (!s->tracked_chunk_pool) {
1126 ti->error = "Could not allocate tracked_chunk mempool for "
1127 "tracking reads";
1128 goto bad_tracked_chunk_pool;
1129 }
1130
1131 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1125 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1132 INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 1126 INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
1133 1127
@@ -1135,6 +1129,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1135 1129
1136 ti->private = s; 1130 ti->private = s;
1137 ti->num_flush_requests = num_flush_requests; 1131 ti->num_flush_requests = num_flush_requests;
1132 ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk);
1138 1133
1139 /* Add snapshot to the list of snapshots for this origin */ 1134 /* Add snapshot to the list of snapshots for this origin */
1140 /* Exceptions aren't triggered till snapshot_resume() is called */ 1135 /* Exceptions aren't triggered till snapshot_resume() is called */
@@ -1183,9 +1178,6 @@ bad_read_metadata:
1183 unregister_snapshot(s); 1178 unregister_snapshot(s);
1184 1179
1185bad_load_and_register: 1180bad_load_and_register:
1186 mempool_destroy(s->tracked_chunk_pool);
1187
1188bad_tracked_chunk_pool:
1189 mempool_destroy(s->pending_pool); 1181 mempool_destroy(s->pending_pool);
1190 1182
1191bad_pending_pool: 1183bad_pending_pool:
@@ -1290,8 +1282,6 @@ static void snapshot_dtr(struct dm_target *ti)
1290 BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 1282 BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
1291#endif 1283#endif
1292 1284
1293 mempool_destroy(s->tracked_chunk_pool);
1294
1295 __free_exceptions(s); 1285 __free_exceptions(s);
1296 1286
1297 mempool_destroy(s->pending_pool); 1287 mempool_destroy(s->pending_pool);
@@ -1577,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
1577 s->store->chunk_mask); 1567 s->store->chunk_mask);
1578} 1568}
1579 1569
1580static int snapshot_map(struct dm_target *ti, struct bio *bio, 1570static int snapshot_map(struct dm_target *ti, struct bio *bio)
1581 union map_info *map_context)
1582{ 1571{
1583 struct dm_exception *e; 1572 struct dm_exception *e;
1584 struct dm_snapshot *s = ti->private; 1573 struct dm_snapshot *s = ti->private;
@@ -1586,6 +1575,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1586 chunk_t chunk; 1575 chunk_t chunk;
1587 struct dm_snap_pending_exception *pe = NULL; 1576 struct dm_snap_pending_exception *pe = NULL;
1588 1577
1578 init_tracked_chunk(bio);
1579
1589 if (bio->bi_rw & REQ_FLUSH) { 1580 if (bio->bi_rw & REQ_FLUSH) {
1590 bio->bi_bdev = s->cow->bdev; 1581 bio->bi_bdev = s->cow->bdev;
1591 return DM_MAPIO_REMAPPED; 1582 return DM_MAPIO_REMAPPED;
@@ -1670,7 +1661,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1670 } 1661 }
1671 } else { 1662 } else {
1672 bio->bi_bdev = s->origin->bdev; 1663 bio->bi_bdev = s->origin->bdev;
1673 map_context->ptr = track_chunk(s, chunk); 1664 track_chunk(s, bio, chunk);
1674 } 1665 }
1675 1666
1676out_unlock: 1667out_unlock:
@@ -1691,20 +1682,20 @@ out:
1691 * If merging is currently taking place on the chunk in question, the 1682 * If merging is currently taking place on the chunk in question, the
1692 * I/O is deferred by adding it to s->bios_queued_during_merge. 1683 * I/O is deferred by adding it to s->bios_queued_during_merge.
1693 */ 1684 */
1694static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, 1685static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
1695 union map_info *map_context)
1696{ 1686{
1697 struct dm_exception *e; 1687 struct dm_exception *e;
1698 struct dm_snapshot *s = ti->private; 1688 struct dm_snapshot *s = ti->private;
1699 int r = DM_MAPIO_REMAPPED; 1689 int r = DM_MAPIO_REMAPPED;
1700 chunk_t chunk; 1690 chunk_t chunk;
1701 1691
1692 init_tracked_chunk(bio);
1693
1702 if (bio->bi_rw & REQ_FLUSH) { 1694 if (bio->bi_rw & REQ_FLUSH) {
1703 if (!map_context->target_request_nr) 1695 if (!dm_bio_get_target_request_nr(bio))
1704 bio->bi_bdev = s->origin->bdev; 1696 bio->bi_bdev = s->origin->bdev;
1705 else 1697 else
1706 bio->bi_bdev = s->cow->bdev; 1698 bio->bi_bdev = s->cow->bdev;
1707 map_context->ptr = NULL;
1708 return DM_MAPIO_REMAPPED; 1699 return DM_MAPIO_REMAPPED;
1709 } 1700 }
1710 1701
@@ -1733,7 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
1733 remap_exception(s, e, bio, chunk); 1724 remap_exception(s, e, bio, chunk);
1734 1725
1735 if (bio_rw(bio) == WRITE) 1726 if (bio_rw(bio) == WRITE)
1736 map_context->ptr = track_chunk(s, chunk); 1727 track_chunk(s, bio, chunk);
1737 goto out_unlock; 1728 goto out_unlock;
1738 } 1729 }
1739 1730
@@ -1751,14 +1742,12 @@ out_unlock:
1751 return r; 1742 return r;
1752} 1743}
1753 1744
1754static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 1745static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
1755 int error, union map_info *map_context)
1756{ 1746{
1757 struct dm_snapshot *s = ti->private; 1747 struct dm_snapshot *s = ti->private;
1758 struct dm_snap_tracked_chunk *c = map_context->ptr;
1759 1748
1760 if (c) 1749 if (is_bio_tracked(bio))
1761 stop_tracking_chunk(s, c); 1750 stop_tracking_chunk(s, bio);
1762 1751
1763 return 0; 1752 return 0;
1764} 1753}
@@ -2127,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti)
2127 dm_put_device(ti, dev); 2116 dm_put_device(ti, dev);
2128} 2117}
2129 2118
2130static int origin_map(struct dm_target *ti, struct bio *bio, 2119static int origin_map(struct dm_target *ti, struct bio *bio)
2131 union map_info *map_context)
2132{ 2120{
2133 struct dm_dev *dev = ti->private; 2121 struct dm_dev *dev = ti->private;
2134 bio->bi_bdev = dev->bdev; 2122 bio->bi_bdev = dev->bdev;
@@ -2193,7 +2181,7 @@ static int origin_iterate_devices(struct dm_target *ti,
2193 2181
2194static struct target_type origin_target = { 2182static struct target_type origin_target = {
2195 .name = "snapshot-origin", 2183 .name = "snapshot-origin",
2196 .version = {1, 7, 1}, 2184 .version = {1, 8, 0},
2197 .module = THIS_MODULE, 2185 .module = THIS_MODULE,
2198 .ctr = origin_ctr, 2186 .ctr = origin_ctr,
2199 .dtr = origin_dtr, 2187 .dtr = origin_dtr,
@@ -2206,7 +2194,7 @@ static struct target_type origin_target = {
2206 2194
2207static struct target_type snapshot_target = { 2195static struct target_type snapshot_target = {
2208 .name = "snapshot", 2196 .name = "snapshot",
2209 .version = {1, 10, 0}, 2197 .version = {1, 11, 0},
2210 .module = THIS_MODULE, 2198 .module = THIS_MODULE,
2211 .ctr = snapshot_ctr, 2199 .ctr = snapshot_ctr,
2212 .dtr = snapshot_dtr, 2200 .dtr = snapshot_dtr,
@@ -2220,7 +2208,7 @@ static struct target_type snapshot_target = {
2220 2208
2221static struct target_type merge_target = { 2209static struct target_type merge_target = {
2222 .name = dm_snapshot_merge_target_name, 2210 .name = dm_snapshot_merge_target_name,
2223 .version = {1, 1, 0}, 2211 .version = {1, 2, 0},
2224 .module = THIS_MODULE, 2212 .module = THIS_MODULE,
2225 .ctr = snapshot_ctr, 2213 .ctr = snapshot_ctr,
2226 .dtr = snapshot_dtr, 2214 .dtr = snapshot_dtr,
@@ -2281,17 +2269,8 @@ static int __init dm_snapshot_init(void)
2281 goto bad_pending_cache; 2269 goto bad_pending_cache;
2282 } 2270 }
2283 2271
2284 tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
2285 if (!tracked_chunk_cache) {
2286 DMERR("Couldn't create cache to track chunks in use.");
2287 r = -ENOMEM;
2288 goto bad_tracked_chunk_cache;
2289 }
2290
2291 return 0; 2272 return 0;
2292 2273
2293bad_tracked_chunk_cache:
2294 kmem_cache_destroy(pending_cache);
2295bad_pending_cache: 2274bad_pending_cache:
2296 kmem_cache_destroy(exception_cache); 2275 kmem_cache_destroy(exception_cache);
2297bad_exception_cache: 2276bad_exception_cache:
@@ -2317,7 +2296,6 @@ static void __exit dm_snapshot_exit(void)
2317 exit_origin_hash(); 2296 exit_origin_hash();
2318 kmem_cache_destroy(pending_cache); 2297 kmem_cache_destroy(pending_cache);
2319 kmem_cache_destroy(exception_cache); 2298 kmem_cache_destroy(exception_cache);
2320 kmem_cache_destroy(tracked_chunk_cache);
2321 2299
2322 dm_exception_store_exit(); 2300 dm_exception_store_exit();
2323} 2301}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index e2f876539743..c89cde86d400 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
162 162
163 ti->num_flush_requests = stripes; 163 ti->num_flush_requests = stripes;
164 ti->num_discard_requests = stripes; 164 ti->num_discard_requests = stripes;
165 ti->num_write_same_requests = stripes;
165 166
166 sc->chunk_size = chunk_size; 167 sc->chunk_size = chunk_size;
167 if (chunk_size & (chunk_size - 1)) 168 if (chunk_size & (chunk_size - 1))
@@ -251,8 +252,8 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
251 *result += sc->chunk_size; /* next chunk */ 252 *result += sc->chunk_size; /* next chunk */
252} 253}
253 254
254static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, 255static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
255 uint32_t target_stripe) 256 uint32_t target_stripe)
256{ 257{
257 sector_t begin, end; 258 sector_t begin, end;
258 259
@@ -271,23 +272,23 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
271 } 272 }
272} 273}
273 274
274static int stripe_map(struct dm_target *ti, struct bio *bio, 275static int stripe_map(struct dm_target *ti, struct bio *bio)
275 union map_info *map_context)
276{ 276{
277 struct stripe_c *sc = ti->private; 277 struct stripe_c *sc = ti->private;
278 uint32_t stripe; 278 uint32_t stripe;
279 unsigned target_request_nr; 279 unsigned target_request_nr;
280 280
281 if (bio->bi_rw & REQ_FLUSH) { 281 if (bio->bi_rw & REQ_FLUSH) {
282 target_request_nr = map_context->target_request_nr; 282 target_request_nr = dm_bio_get_target_request_nr(bio);
283 BUG_ON(target_request_nr >= sc->stripes); 283 BUG_ON(target_request_nr >= sc->stripes);
284 bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; 284 bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
285 return DM_MAPIO_REMAPPED; 285 return DM_MAPIO_REMAPPED;
286 } 286 }
287 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 287 if (unlikely(bio->bi_rw & REQ_DISCARD) ||
288 target_request_nr = map_context->target_request_nr; 288 unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
289 target_request_nr = dm_bio_get_target_request_nr(bio);
289 BUG_ON(target_request_nr >= sc->stripes); 290 BUG_ON(target_request_nr >= sc->stripes);
290 return stripe_map_discard(sc, bio, target_request_nr); 291 return stripe_map_range(sc, bio, target_request_nr);
291 } 292 }
292 293
293 stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); 294 stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
@@ -342,8 +343,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type,
342 return 0; 343 return 0;
343} 344}
344 345
345static int stripe_end_io(struct dm_target *ti, struct bio *bio, 346static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
346 int error, union map_info *map_context)
347{ 347{
348 unsigned i; 348 unsigned i;
349 char major_minor[16]; 349 char major_minor[16];
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 100368eb7991..daf25d0890b3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t)
967int dm_table_alloc_md_mempools(struct dm_table *t) 967int dm_table_alloc_md_mempools(struct dm_table *t)
968{ 968{
969 unsigned type = dm_table_get_type(t); 969 unsigned type = dm_table_get_type(t);
970 unsigned per_bio_data_size = 0;
971 struct dm_target *tgt;
972 unsigned i;
970 973
971 if (unlikely(type == DM_TYPE_NONE)) { 974 if (unlikely(type == DM_TYPE_NONE)) {
972 DMWARN("no table type is set, can't allocate mempools"); 975 DMWARN("no table type is set, can't allocate mempools");
973 return -EINVAL; 976 return -EINVAL;
974 } 977 }
975 978
976 t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); 979 if (type == DM_TYPE_BIO_BASED)
980 for (i = 0; i < t->num_targets; i++) {
981 tgt = t->targets + i;
982 per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
983 }
984
985 t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size);
977 if (!t->mempools) 986 if (!t->mempools)
978 return -ENOMEM; 987 return -ENOMEM;
979 988
@@ -1414,6 +1423,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t,
1414 return 1; 1423 return 1;
1415} 1424}
1416 1425
1426static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
1427 sector_t start, sector_t len, void *data)
1428{
1429 struct request_queue *q = bdev_get_queue(dev->bdev);
1430
1431 return q && !q->limits.max_write_same_sectors;
1432}
1433
1434static bool dm_table_supports_write_same(struct dm_table *t)
1435{
1436 struct dm_target *ti;
1437 unsigned i = 0;
1438
1439 while (i < dm_table_get_num_targets(t)) {
1440 ti = dm_table_get_target(t, i++);
1441
1442 if (!ti->num_write_same_requests)
1443 return false;
1444
1445 if (!ti->type->iterate_devices ||
1446 !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
1447 return false;
1448 }
1449
1450 return true;
1451}
1452
1417void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 1453void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1418 struct queue_limits *limits) 1454 struct queue_limits *limits)
1419{ 1455{
@@ -1445,6 +1481,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1445 else 1481 else
1446 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); 1482 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
1447 1483
1484 if (!dm_table_supports_write_same(t))
1485 q->limits.max_write_same_sectors = 0;
1486
1448 dm_table_set_integrity(t); 1487 dm_table_set_integrity(t);
1449 1488
1450 /* 1489 /*
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 8da366cf381c..617d21a77256 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt)
126 /* empty */ 126 /* empty */
127} 127}
128 128
129static int io_err_map(struct dm_target *tt, struct bio *bio, 129static int io_err_map(struct dm_target *tt, struct bio *bio)
130 union map_info *map_context)
131{ 130{
132 return -EIO; 131 return -EIO;
133} 132}
134 133
135static struct target_type error_target = { 134static struct target_type error_target = {
136 .name = "error", 135 .name = "error",
137 .version = {1, 0, 1}, 136 .version = {1, 1, 0},
138 .ctr = io_err_ctr, 137 .ctr = io_err_ctr,
139 .dtr = io_err_dtr, 138 .dtr = io_err_dtr,
140 .map = io_err_map, 139 .map = io_err_map,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 693e149e9727..4d6e85367b84 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd)
408 408
409 pmd->tl_info.tm = pmd->tm; 409 pmd->tl_info.tm = pmd->tm;
410 pmd->tl_info.levels = 1; 410 pmd->tl_info.levels = 1;
411 pmd->tl_info.value_type.context = &pmd->info; 411 pmd->tl_info.value_type.context = &pmd->bl_info;
412 pmd->tl_info.value_type.size = sizeof(__le64); 412 pmd->tl_info.value_type.size = sizeof(__le64);
413 pmd->tl_info.value_type.inc = subtree_inc; 413 pmd->tl_info.value_type.inc = subtree_inc;
414 pmd->tl_info.value_type.dec = subtree_dec; 414 pmd->tl_info.value_type.dec = subtree_dec;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 058acf3a5ba7..675ae5274016 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -186,7 +186,6 @@ struct pool {
186 186
187 struct dm_thin_new_mapping *next_mapping; 187 struct dm_thin_new_mapping *next_mapping;
188 mempool_t *mapping_pool; 188 mempool_t *mapping_pool;
189 mempool_t *endio_hook_pool;
190 189
191 process_bio_fn process_bio; 190 process_bio_fn process_bio;
192 process_bio_fn process_discard; 191 process_bio_fn process_discard;
@@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
304 bio_list_init(master); 303 bio_list_init(master);
305 304
306 while ((bio = bio_list_pop(&bios))) { 305 while ((bio = bio_list_pop(&bios))) {
307 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 306 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
308 307
309 if (h->tc == tc) 308 if (h->tc == tc)
310 bio_endio(bio, DM_ENDIO_REQUEUE); 309 bio_endio(bio, DM_ENDIO_REQUEUE);
@@ -368,6 +367,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
368 dm_thin_changed_this_transaction(tc->td); 367 dm_thin_changed_this_transaction(tc->td);
369} 368}
370 369
370static void inc_all_io_entry(struct pool *pool, struct bio *bio)
371{
372 struct dm_thin_endio_hook *h;
373
374 if (bio->bi_rw & REQ_DISCARD)
375 return;
376
377 h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
378 h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
379}
380
371static void issue(struct thin_c *tc, struct bio *bio) 381static void issue(struct thin_c *tc, struct bio *bio)
372{ 382{
373 struct pool *pool = tc->pool; 383 struct pool *pool = tc->pool;
@@ -474,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
474static void overwrite_endio(struct bio *bio, int err) 484static void overwrite_endio(struct bio *bio, int err)
475{ 485{
476 unsigned long flags; 486 unsigned long flags;
477 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 487 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
478 struct dm_thin_new_mapping *m = h->overwrite_mapping; 488 struct dm_thin_new_mapping *m = h->overwrite_mapping;
479 struct pool *pool = m->tc->pool; 489 struct pool *pool = m->tc->pool;
480 490
@@ -499,8 +509,7 @@ static void overwrite_endio(struct bio *bio, int err)
499/* 509/*
500 * This sends the bios in the cell back to the deferred_bios list. 510 * This sends the bios in the cell back to the deferred_bios list.
501 */ 511 */
502static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, 512static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
503 dm_block_t data_block)
504{ 513{
505 struct pool *pool = tc->pool; 514 struct pool *pool = tc->pool;
506 unsigned long flags; 515 unsigned long flags;
@@ -513,17 +522,13 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell,
513} 522}
514 523
515/* 524/*
516 * Same as cell_defer above, except it omits one particular detainee, 525 * Same as cell_defer except it omits the original holder of the cell.
517 * a write bio that covers the block and has already been processed.
518 */ 526 */
519static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) 527static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
520{ 528{
521 struct bio_list bios;
522 struct pool *pool = tc->pool; 529 struct pool *pool = tc->pool;
523 unsigned long flags; 530 unsigned long flags;
524 531
525 bio_list_init(&bios);
526
527 spin_lock_irqsave(&pool->lock, flags); 532 spin_lock_irqsave(&pool->lock, flags);
528 dm_cell_release_no_holder(cell, &pool->deferred_bios); 533 dm_cell_release_no_holder(cell, &pool->deferred_bios);
529 spin_unlock_irqrestore(&pool->lock, flags); 534 spin_unlock_irqrestore(&pool->lock, flags);
@@ -561,7 +566,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
561 */ 566 */
562 r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); 567 r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
563 if (r) { 568 if (r) {
564 DMERR("dm_thin_insert_block() failed"); 569 DMERR_LIMIT("dm_thin_insert_block() failed");
565 dm_cell_error(m->cell); 570 dm_cell_error(m->cell);
566 goto out; 571 goto out;
567 } 572 }
@@ -573,10 +578,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
573 * the bios in the cell. 578 * the bios in the cell.
574 */ 579 */
575 if (bio) { 580 if (bio) {
576 cell_defer_except(tc, m->cell); 581 cell_defer_no_holder(tc, m->cell);
577 bio_endio(bio, 0); 582 bio_endio(bio, 0);
578 } else 583 } else
579 cell_defer(tc, m->cell, m->data_block); 584 cell_defer(tc, m->cell);
580 585
581out: 586out:
582 list_del(&m->list); 587 list_del(&m->list);
@@ -588,8 +593,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
588 struct thin_c *tc = m->tc; 593 struct thin_c *tc = m->tc;
589 594
590 bio_io_error(m->bio); 595 bio_io_error(m->bio);
591 cell_defer_except(tc, m->cell); 596 cell_defer_no_holder(tc, m->cell);
592 cell_defer_except(tc, m->cell2); 597 cell_defer_no_holder(tc, m->cell2);
593 mempool_free(m, tc->pool->mapping_pool); 598 mempool_free(m, tc->pool->mapping_pool);
594} 599}
595 600
@@ -597,13 +602,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
597{ 602{
598 struct thin_c *tc = m->tc; 603 struct thin_c *tc = m->tc;
599 604
605 inc_all_io_entry(tc->pool, m->bio);
606 cell_defer_no_holder(tc, m->cell);
607 cell_defer_no_holder(tc, m->cell2);
608
600 if (m->pass_discard) 609 if (m->pass_discard)
601 remap_and_issue(tc, m->bio, m->data_block); 610 remap_and_issue(tc, m->bio, m->data_block);
602 else 611 else
603 bio_endio(m->bio, 0); 612 bio_endio(m->bio, 0);
604 613
605 cell_defer_except(tc, m->cell);
606 cell_defer_except(tc, m->cell2);
607 mempool_free(m, tc->pool->mapping_pool); 614 mempool_free(m, tc->pool->mapping_pool);
608} 615}
609 616
@@ -614,7 +621,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m)
614 621
615 r = dm_thin_remove_block(tc->td, m->virt_block); 622 r = dm_thin_remove_block(tc->td, m->virt_block);
616 if (r) 623 if (r)
617 DMERR("dm_thin_remove_block() failed"); 624 DMERR_LIMIT("dm_thin_remove_block() failed");
618 625
619 process_prepared_discard_passdown(m); 626 process_prepared_discard_passdown(m);
620} 627}
@@ -706,11 +713,12 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
706 * bio immediately. Otherwise we use kcopyd to clone the data first. 713 * bio immediately. Otherwise we use kcopyd to clone the data first.
707 */ 714 */
708 if (io_overwrites_block(pool, bio)) { 715 if (io_overwrites_block(pool, bio)) {
709 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 716 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
710 717
711 h->overwrite_mapping = m; 718 h->overwrite_mapping = m;
712 m->bio = bio; 719 m->bio = bio;
713 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); 720 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
721 inc_all_io_entry(pool, bio);
714 remap_and_issue(tc, bio, data_dest); 722 remap_and_issue(tc, bio, data_dest);
715 } else { 723 } else {
716 struct dm_io_region from, to; 724 struct dm_io_region from, to;
@@ -727,7 +735,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
727 0, copy_complete, m); 735 0, copy_complete, m);
728 if (r < 0) { 736 if (r < 0) {
729 mempool_free(m, pool->mapping_pool); 737 mempool_free(m, pool->mapping_pool);
730 DMERR("dm_kcopyd_copy() failed"); 738 DMERR_LIMIT("dm_kcopyd_copy() failed");
731 dm_cell_error(cell); 739 dm_cell_error(cell);
732 } 740 }
733 } 741 }
@@ -775,11 +783,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
775 process_prepared_mapping(m); 783 process_prepared_mapping(m);
776 784
777 else if (io_overwrites_block(pool, bio)) { 785 else if (io_overwrites_block(pool, bio)) {
778 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 786 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
779 787
780 h->overwrite_mapping = m; 788 h->overwrite_mapping = m;
781 m->bio = bio; 789 m->bio = bio;
782 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); 790 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
791 inc_all_io_entry(pool, bio);
783 remap_and_issue(tc, bio, data_block); 792 remap_and_issue(tc, bio, data_block);
784 } else { 793 } else {
785 int r; 794 int r;
@@ -792,7 +801,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
792 r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); 801 r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
793 if (r < 0) { 802 if (r < 0) {
794 mempool_free(m, pool->mapping_pool); 803 mempool_free(m, pool->mapping_pool);
795 DMERR("dm_kcopyd_zero() failed"); 804 DMERR_LIMIT("dm_kcopyd_zero() failed");
796 dm_cell_error(cell); 805 dm_cell_error(cell);
797 } 806 }
798 } 807 }
@@ -804,7 +813,7 @@ static int commit(struct pool *pool)
804 813
805 r = dm_pool_commit_metadata(pool->pmd); 814 r = dm_pool_commit_metadata(pool->pmd);
806 if (r) 815 if (r)
807 DMERR("commit failed, error = %d", r); 816 DMERR_LIMIT("commit failed: error = %d", r);
808 817
809 return r; 818 return r;
810} 819}
@@ -889,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
889 */ 898 */
890static void retry_on_resume(struct bio *bio) 899static void retry_on_resume(struct bio *bio)
891{ 900{
892 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 901 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
893 struct thin_c *tc = h->tc; 902 struct thin_c *tc = h->tc;
894 struct pool *pool = tc->pool; 903 struct pool *pool = tc->pool;
895 unsigned long flags; 904 unsigned long flags;
@@ -936,7 +945,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
936 */ 945 */
937 build_data_key(tc->td, lookup_result.block, &key2); 946 build_data_key(tc->td, lookup_result.block, &key2);
938 if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { 947 if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
939 dm_cell_release_singleton(cell, bio); 948 cell_defer_no_holder(tc, cell);
940 break; 949 break;
941 } 950 }
942 951
@@ -962,13 +971,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
962 wake_worker(pool); 971 wake_worker(pool);
963 } 972 }
964 } else { 973 } else {
974 inc_all_io_entry(pool, bio);
975 cell_defer_no_holder(tc, cell);
976 cell_defer_no_holder(tc, cell2);
977
965 /* 978 /*
966 * The DM core makes sure that the discard doesn't span 979 * The DM core makes sure that the discard doesn't span
967 * a block boundary. So we submit the discard of a 980 * a block boundary. So we submit the discard of a
968 * partial block appropriately. 981 * partial block appropriately.
969 */ 982 */
970 dm_cell_release_singleton(cell, bio);
971 dm_cell_release_singleton(cell2, bio);
972 if ((!lookup_result.shared) && pool->pf.discard_passdown) 983 if ((!lookup_result.shared) && pool->pf.discard_passdown)
973 remap_and_issue(tc, bio, lookup_result.block); 984 remap_and_issue(tc, bio, lookup_result.block);
974 else 985 else
@@ -980,13 +991,14 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
980 /* 991 /*
981 * It isn't provisioned, just forget it. 992 * It isn't provisioned, just forget it.
982 */ 993 */
983 dm_cell_release_singleton(cell, bio); 994 cell_defer_no_holder(tc, cell);
984 bio_endio(bio, 0); 995 bio_endio(bio, 0);
985 break; 996 break;
986 997
987 default: 998 default:
988 DMERR("discard: find block unexpectedly returned %d", r); 999 DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
989 dm_cell_release_singleton(cell, bio); 1000 __func__, r);
1001 cell_defer_no_holder(tc, cell);
990 bio_io_error(bio); 1002 bio_io_error(bio);
991 break; 1003 break;
992 } 1004 }
@@ -1012,7 +1024,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
1012 break; 1024 break;
1013 1025
1014 default: 1026 default:
1015 DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); 1027 DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
1028 __func__, r);
1016 dm_cell_error(cell); 1029 dm_cell_error(cell);
1017 break; 1030 break;
1018 } 1031 }
@@ -1037,11 +1050,12 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
1037 if (bio_data_dir(bio) == WRITE && bio->bi_size) 1050 if (bio_data_dir(bio) == WRITE && bio->bi_size)
1038 break_sharing(tc, bio, block, &key, lookup_result, cell); 1051 break_sharing(tc, bio, block, &key, lookup_result, cell);
1039 else { 1052 else {
1040 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 1053 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1041 1054
1042 h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); 1055 h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
1056 inc_all_io_entry(pool, bio);
1057 cell_defer_no_holder(tc, cell);
1043 1058
1044 dm_cell_release_singleton(cell, bio);
1045 remap_and_issue(tc, bio, lookup_result->block); 1059 remap_and_issue(tc, bio, lookup_result->block);
1046 } 1060 }
1047} 1061}
@@ -1056,7 +1070,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
1056 * Remap empty bios (flushes) immediately, without provisioning. 1070 * Remap empty bios (flushes) immediately, without provisioning.
1057 */ 1071 */
1058 if (!bio->bi_size) { 1072 if (!bio->bi_size) {
1059 dm_cell_release_singleton(cell, bio); 1073 inc_all_io_entry(tc->pool, bio);
1074 cell_defer_no_holder(tc, cell);
1075
1060 remap_and_issue(tc, bio, 0); 1076 remap_and_issue(tc, bio, 0);
1061 return; 1077 return;
1062 } 1078 }
@@ -1066,7 +1082,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
1066 */ 1082 */
1067 if (bio_data_dir(bio) == READ) { 1083 if (bio_data_dir(bio) == READ) {
1068 zero_fill_bio(bio); 1084 zero_fill_bio(bio);
1069 dm_cell_release_singleton(cell, bio); 1085 cell_defer_no_holder(tc, cell);
1070 bio_endio(bio, 0); 1086 bio_endio(bio, 0);
1071 return; 1087 return;
1072 } 1088 }
@@ -1085,7 +1101,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
1085 break; 1101 break;
1086 1102
1087 default: 1103 default:
1088 DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); 1104 DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
1105 __func__, r);
1089 set_pool_mode(tc->pool, PM_READ_ONLY); 1106 set_pool_mode(tc->pool, PM_READ_ONLY);
1090 dm_cell_error(cell); 1107 dm_cell_error(cell);
1091 break; 1108 break;
@@ -1111,34 +1128,31 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
1111 r = dm_thin_find_block(tc->td, block, 1, &lookup_result); 1128 r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
1112 switch (r) { 1129 switch (r) {
1113 case 0: 1130 case 0:
1114 /* 1131 if (lookup_result.shared) {
1115 * We can release this cell now. This thread is the only
1116 * one that puts bios into a cell, and we know there were
1117 * no preceding bios.
1118 */
1119 /*
1120 * TODO: this will probably have to change when discard goes
1121 * back in.
1122 */
1123 dm_cell_release_singleton(cell, bio);
1124
1125 if (lookup_result.shared)
1126 process_shared_bio(tc, bio, block, &lookup_result); 1132 process_shared_bio(tc, bio, block, &lookup_result);
1127 else 1133 cell_defer_no_holder(tc, cell);
1134 } else {
1135 inc_all_io_entry(tc->pool, bio);
1136 cell_defer_no_holder(tc, cell);
1137
1128 remap_and_issue(tc, bio, lookup_result.block); 1138 remap_and_issue(tc, bio, lookup_result.block);
1139 }
1129 break; 1140 break;
1130 1141
1131 case -ENODATA: 1142 case -ENODATA:
1132 if (bio_data_dir(bio) == READ && tc->origin_dev) { 1143 if (bio_data_dir(bio) == READ && tc->origin_dev) {
1133 dm_cell_release_singleton(cell, bio); 1144 inc_all_io_entry(tc->pool, bio);
1145 cell_defer_no_holder(tc, cell);
1146
1134 remap_to_origin_and_issue(tc, bio); 1147 remap_to_origin_and_issue(tc, bio);
1135 } else 1148 } else
1136 provision_block(tc, bio, block, cell); 1149 provision_block(tc, bio, block, cell);
1137 break; 1150 break;
1138 1151
1139 default: 1152 default:
1140 DMERR("dm_thin_find_block() failed, error = %d", r); 1153 DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
1141 dm_cell_release_singleton(cell, bio); 1154 __func__, r);
1155 cell_defer_no_holder(tc, cell);
1142 bio_io_error(bio); 1156 bio_io_error(bio);
1143 break; 1157 break;
1144 } 1158 }
@@ -1156,8 +1170,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
1156 case 0: 1170 case 0:
1157 if (lookup_result.shared && (rw == WRITE) && bio->bi_size) 1171 if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
1158 bio_io_error(bio); 1172 bio_io_error(bio);
1159 else 1173 else {
1174 inc_all_io_entry(tc->pool, bio);
1160 remap_and_issue(tc, bio, lookup_result.block); 1175 remap_and_issue(tc, bio, lookup_result.block);
1176 }
1161 break; 1177 break;
1162 1178
1163 case -ENODATA: 1179 case -ENODATA:
@@ -1167,6 +1183,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
1167 } 1183 }
1168 1184
1169 if (tc->origin_dev) { 1185 if (tc->origin_dev) {
1186 inc_all_io_entry(tc->pool, bio);
1170 remap_to_origin_and_issue(tc, bio); 1187 remap_to_origin_and_issue(tc, bio);
1171 break; 1188 break;
1172 } 1189 }
@@ -1176,7 +1193,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
1176 break; 1193 break;
1177 1194
1178 default: 1195 default:
1179 DMERR("dm_thin_find_block() failed, error = %d", r); 1196 DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
1197 __func__, r);
1180 bio_io_error(bio); 1198 bio_io_error(bio);
1181 break; 1199 break;
1182 } 1200 }
@@ -1207,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool)
1207 spin_unlock_irqrestore(&pool->lock, flags); 1225 spin_unlock_irqrestore(&pool->lock, flags);
1208 1226
1209 while ((bio = bio_list_pop(&bios))) { 1227 while ((bio = bio_list_pop(&bios))) {
1210 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; 1228 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1211 struct thin_c *tc = h->tc; 1229 struct thin_c *tc = h->tc;
1212 1230
1213 /* 1231 /*
@@ -1340,32 +1358,30 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
1340 wake_worker(pool); 1358 wake_worker(pool);
1341} 1359}
1342 1360
1343static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) 1361static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
1344{ 1362{
1345 struct pool *pool = tc->pool; 1363 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1346 struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
1347 1364
1348 h->tc = tc; 1365 h->tc = tc;
1349 h->shared_read_entry = NULL; 1366 h->shared_read_entry = NULL;
1350 h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); 1367 h->all_io_entry = NULL;
1351 h->overwrite_mapping = NULL; 1368 h->overwrite_mapping = NULL;
1352
1353 return h;
1354} 1369}
1355 1370
1356/* 1371/*
1357 * Non-blocking function called from the thin target's map function. 1372 * Non-blocking function called from the thin target's map function.
1358 */ 1373 */
1359static int thin_bio_map(struct dm_target *ti, struct bio *bio, 1374static int thin_bio_map(struct dm_target *ti, struct bio *bio)
1360 union map_info *map_context)
1361{ 1375{
1362 int r; 1376 int r;
1363 struct thin_c *tc = ti->private; 1377 struct thin_c *tc = ti->private;
1364 dm_block_t block = get_bio_block(tc, bio); 1378 dm_block_t block = get_bio_block(tc, bio);
1365 struct dm_thin_device *td = tc->td; 1379 struct dm_thin_device *td = tc->td;
1366 struct dm_thin_lookup_result result; 1380 struct dm_thin_lookup_result result;
1381 struct dm_bio_prison_cell *cell1, *cell2;
1382 struct dm_cell_key key;
1367 1383
1368 map_context->ptr = thin_hook_bio(tc, bio); 1384 thin_hook_bio(tc, bio);
1369 1385
1370 if (get_pool_mode(tc->pool) == PM_FAIL) { 1386 if (get_pool_mode(tc->pool) == PM_FAIL) {
1371 bio_io_error(bio); 1387 bio_io_error(bio);
@@ -1400,12 +1416,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
1400 * shared flag will be set in their case. 1416 * shared flag will be set in their case.
1401 */ 1417 */
1402 thin_defer_bio(tc, bio); 1418 thin_defer_bio(tc, bio);
1403 r = DM_MAPIO_SUBMITTED; 1419 return DM_MAPIO_SUBMITTED;
1404 } else {
1405 remap(tc, bio, result.block);
1406 r = DM_MAPIO_REMAPPED;
1407 } 1420 }
1408 break; 1421
1422 build_virtual_key(tc->td, block, &key);
1423 if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1))
1424 return DM_MAPIO_SUBMITTED;
1425
1426 build_data_key(tc->td, result.block, &key);
1427 if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) {
1428 cell_defer_no_holder(tc, cell1);
1429 return DM_MAPIO_SUBMITTED;
1430 }
1431
1432 inc_all_io_entry(tc->pool, bio);
1433 cell_defer_no_holder(tc, cell2);
1434 cell_defer_no_holder(tc, cell1);
1435
1436 remap(tc, bio, result.block);
1437 return DM_MAPIO_REMAPPED;
1409 1438
1410 case -ENODATA: 1439 case -ENODATA:
1411 if (get_pool_mode(tc->pool) == PM_READ_ONLY) { 1440 if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
@@ -1414,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
1414 * of doing so. Just error it. 1443 * of doing so. Just error it.
1415 */ 1444 */
1416 bio_io_error(bio); 1445 bio_io_error(bio);
1417 r = DM_MAPIO_SUBMITTED; 1446 return DM_MAPIO_SUBMITTED;
1418 break;
1419 } 1447 }
1420 /* fall through */ 1448 /* fall through */
1421 1449
@@ -1425,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
1425 * provide the hint to load the metadata into cache. 1453 * provide the hint to load the metadata into cache.
1426 */ 1454 */
1427 thin_defer_bio(tc, bio); 1455 thin_defer_bio(tc, bio);
1428 r = DM_MAPIO_SUBMITTED; 1456 return DM_MAPIO_SUBMITTED;
1429 break;
1430 1457
1431 default: 1458 default:
1432 /* 1459 /*
@@ -1435,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
1435 * pool is switched to fail-io mode. 1462 * pool is switched to fail-io mode.
1436 */ 1463 */
1437 bio_io_error(bio); 1464 bio_io_error(bio);
1438 r = DM_MAPIO_SUBMITTED; 1465 return DM_MAPIO_SUBMITTED;
1439 break;
1440 } 1466 }
1441
1442 return r;
1443} 1467}
1444 1468
1445static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1469static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
@@ -1566,14 +1590,12 @@ static void __pool_destroy(struct pool *pool)
1566 if (pool->next_mapping) 1590 if (pool->next_mapping)
1567 mempool_free(pool->next_mapping, pool->mapping_pool); 1591 mempool_free(pool->next_mapping, pool->mapping_pool);
1568 mempool_destroy(pool->mapping_pool); 1592 mempool_destroy(pool->mapping_pool);
1569 mempool_destroy(pool->endio_hook_pool);
1570 dm_deferred_set_destroy(pool->shared_read_ds); 1593 dm_deferred_set_destroy(pool->shared_read_ds);
1571 dm_deferred_set_destroy(pool->all_io_ds); 1594 dm_deferred_set_destroy(pool->all_io_ds);
1572 kfree(pool); 1595 kfree(pool);
1573} 1596}
1574 1597
1575static struct kmem_cache *_new_mapping_cache; 1598static struct kmem_cache *_new_mapping_cache;
1576static struct kmem_cache *_endio_hook_cache;
1577 1599
1578static struct pool *pool_create(struct mapped_device *pool_md, 1600static struct pool *pool_create(struct mapped_device *pool_md,
1579 struct block_device *metadata_dev, 1601 struct block_device *metadata_dev,
@@ -1667,13 +1689,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
1667 goto bad_mapping_pool; 1689 goto bad_mapping_pool;
1668 } 1690 }
1669 1691
1670 pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE,
1671 _endio_hook_cache);
1672 if (!pool->endio_hook_pool) {
1673 *error = "Error creating pool's endio_hook mempool";
1674 err_p = ERR_PTR(-ENOMEM);
1675 goto bad_endio_hook_pool;
1676 }
1677 pool->ref_count = 1; 1692 pool->ref_count = 1;
1678 pool->last_commit_jiffies = jiffies; 1693 pool->last_commit_jiffies = jiffies;
1679 pool->pool_md = pool_md; 1694 pool->pool_md = pool_md;
@@ -1682,8 +1697,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
1682 1697
1683 return pool; 1698 return pool;
1684 1699
1685bad_endio_hook_pool:
1686 mempool_destroy(pool->mapping_pool);
1687bad_mapping_pool: 1700bad_mapping_pool:
1688 dm_deferred_set_destroy(pool->all_io_ds); 1701 dm_deferred_set_destroy(pool->all_io_ds);
1689bad_all_io_ds: 1702bad_all_io_ds:
@@ -1966,8 +1979,7 @@ out_unlock:
1966 return r; 1979 return r;
1967} 1980}
1968 1981
1969static int pool_map(struct dm_target *ti, struct bio *bio, 1982static int pool_map(struct dm_target *ti, struct bio *bio)
1970 union map_info *map_context)
1971{ 1983{
1972 int r; 1984 int r;
1973 struct pool_c *pt = ti->private; 1985 struct pool_c *pt = ti->private;
@@ -2358,7 +2370,9 @@ static int pool_status(struct dm_target *ti, status_type_t type,
2358 else 2370 else
2359 DMEMIT("rw "); 2371 DMEMIT("rw ");
2360 2372
2361 if (pool->pf.discard_enabled && pool->pf.discard_passdown) 2373 if (!pool->pf.discard_enabled)
2374 DMEMIT("ignore_discard");
2375 else if (pool->pf.discard_passdown)
2362 DMEMIT("discard_passdown"); 2376 DMEMIT("discard_passdown");
2363 else 2377 else
2364 DMEMIT("no_discard_passdown"); 2378 DMEMIT("no_discard_passdown");
@@ -2454,7 +2468,7 @@ static struct target_type pool_target = {
2454 .name = "thin-pool", 2468 .name = "thin-pool",
2455 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 2469 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
2456 DM_TARGET_IMMUTABLE, 2470 DM_TARGET_IMMUTABLE,
2457 .version = {1, 5, 0}, 2471 .version = {1, 6, 0},
2458 .module = THIS_MODULE, 2472 .module = THIS_MODULE,
2459 .ctr = pool_ctr, 2473 .ctr = pool_ctr,
2460 .dtr = pool_dtr, 2474 .dtr = pool_dtr,
@@ -2576,6 +2590,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
2576 2590
2577 ti->num_flush_requests = 1; 2591 ti->num_flush_requests = 1;
2578 ti->flush_supported = true; 2592 ti->flush_supported = true;
2593 ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
2579 2594
2580 /* In case the pool supports discards, pass them on. */ 2595 /* In case the pool supports discards, pass them on. */
2581 if (tc->pool->pf.discard_enabled) { 2596 if (tc->pool->pf.discard_enabled) {
@@ -2609,20 +2624,17 @@ out_unlock:
2609 return r; 2624 return r;
2610} 2625}
2611 2626
2612static int thin_map(struct dm_target *ti, struct bio *bio, 2627static int thin_map(struct dm_target *ti, struct bio *bio)
2613 union map_info *map_context)
2614{ 2628{
2615 bio->bi_sector = dm_target_offset(ti, bio->bi_sector); 2629 bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
2616 2630
2617 return thin_bio_map(ti, bio, map_context); 2631 return thin_bio_map(ti, bio);
2618} 2632}
2619 2633
2620static int thin_endio(struct dm_target *ti, 2634static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
2621 struct bio *bio, int err,
2622 union map_info *map_context)
2623{ 2635{
2624 unsigned long flags; 2636 unsigned long flags;
2625 struct dm_thin_endio_hook *h = map_context->ptr; 2637 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
2626 struct list_head work; 2638 struct list_head work;
2627 struct dm_thin_new_mapping *m, *tmp; 2639 struct dm_thin_new_mapping *m, *tmp;
2628 struct pool *pool = h->tc->pool; 2640 struct pool *pool = h->tc->pool;
@@ -2643,14 +2655,15 @@ static int thin_endio(struct dm_target *ti,
2643 if (h->all_io_entry) { 2655 if (h->all_io_entry) {
2644 INIT_LIST_HEAD(&work); 2656 INIT_LIST_HEAD(&work);
2645 dm_deferred_entry_dec(h->all_io_entry, &work); 2657 dm_deferred_entry_dec(h->all_io_entry, &work);
2646 spin_lock_irqsave(&pool->lock, flags); 2658 if (!list_empty(&work)) {
2647 list_for_each_entry_safe(m, tmp, &work, list) 2659 spin_lock_irqsave(&pool->lock, flags);
2648 list_add(&m->list, &pool->prepared_discards); 2660 list_for_each_entry_safe(m, tmp, &work, list)
2649 spin_unlock_irqrestore(&pool->lock, flags); 2661 list_add(&m->list, &pool->prepared_discards);
2662 spin_unlock_irqrestore(&pool->lock, flags);
2663 wake_worker(pool);
2664 }
2650 } 2665 }
2651 2666
2652 mempool_free(h, pool->endio_hook_pool);
2653
2654 return 0; 2667 return 0;
2655} 2668}
2656 2669
@@ -2745,7 +2758,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
2745 2758
2746static struct target_type thin_target = { 2759static struct target_type thin_target = {
2747 .name = "thin", 2760 .name = "thin",
2748 .version = {1, 5, 0}, 2761 .version = {1, 6, 0},
2749 .module = THIS_MODULE, 2762 .module = THIS_MODULE,
2750 .ctr = thin_ctr, 2763 .ctr = thin_ctr,
2751 .dtr = thin_dtr, 2764 .dtr = thin_dtr,
@@ -2779,14 +2792,8 @@ static int __init dm_thin_init(void)
2779 if (!_new_mapping_cache) 2792 if (!_new_mapping_cache)
2780 goto bad_new_mapping_cache; 2793 goto bad_new_mapping_cache;
2781 2794
2782 _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0);
2783 if (!_endio_hook_cache)
2784 goto bad_endio_hook_cache;
2785
2786 return 0; 2795 return 0;
2787 2796
2788bad_endio_hook_cache:
2789 kmem_cache_destroy(_new_mapping_cache);
2790bad_new_mapping_cache: 2797bad_new_mapping_cache:
2791 dm_unregister_target(&pool_target); 2798 dm_unregister_target(&pool_target);
2792bad_pool_target: 2799bad_pool_target:
@@ -2801,7 +2808,6 @@ static void dm_thin_exit(void)
2801 dm_unregister_target(&pool_target); 2808 dm_unregister_target(&pool_target);
2802 2809
2803 kmem_cache_destroy(_new_mapping_cache); 2810 kmem_cache_destroy(_new_mapping_cache);
2804 kmem_cache_destroy(_endio_hook_cache);
2805} 2811}
2806 2812
2807module_init(dm_thin_init); 2813module_init(dm_thin_init);
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 9e7328bb4030..52cde982164a 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -55,7 +55,6 @@ struct dm_verity {
55 unsigned shash_descsize;/* the size of temporary space for crypto */ 55 unsigned shash_descsize;/* the size of temporary space for crypto */
56 int hash_failed; /* set to 1 if hash of any block failed */ 56 int hash_failed; /* set to 1 if hash of any block failed */
57 57
58 mempool_t *io_mempool; /* mempool of struct dm_verity_io */
59 mempool_t *vec_mempool; /* mempool of bio vector */ 58 mempool_t *vec_mempool; /* mempool of bio vector */
60 59
61 struct workqueue_struct *verify_wq; 60 struct workqueue_struct *verify_wq;
@@ -66,7 +65,6 @@ struct dm_verity {
66 65
67struct dm_verity_io { 66struct dm_verity_io {
68 struct dm_verity *v; 67 struct dm_verity *v;
69 struct bio *bio;
70 68
71 /* original values of bio->bi_end_io and bio->bi_private */ 69 /* original values of bio->bi_end_io and bio->bi_private */
72 bio_end_io_t *orig_bi_end_io; 70 bio_end_io_t *orig_bi_end_io;
@@ -389,8 +387,8 @@ test_block_hash:
389 */ 387 */
390static void verity_finish_io(struct dm_verity_io *io, int error) 388static void verity_finish_io(struct dm_verity_io *io, int error)
391{ 389{
392 struct bio *bio = io->bio;
393 struct dm_verity *v = io->v; 390 struct dm_verity *v = io->v;
391 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
394 392
395 bio->bi_end_io = io->orig_bi_end_io; 393 bio->bi_end_io = io->orig_bi_end_io;
396 bio->bi_private = io->orig_bi_private; 394 bio->bi_private = io->orig_bi_private;
@@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
398 if (io->io_vec != io->io_vec_inline) 396 if (io->io_vec != io->io_vec_inline)
399 mempool_free(io->io_vec, v->vec_mempool); 397 mempool_free(io->io_vec, v->vec_mempool);
400 398
401 mempool_free(io, v->io_mempool);
402
403 bio_endio(bio, error); 399 bio_endio(bio, error);
404} 400}
405 401
@@ -462,8 +458,7 @@ no_prefetch_cluster:
462 * Bio map function. It allocates dm_verity_io structure and bio vector and 458 * Bio map function. It allocates dm_verity_io structure and bio vector and
463 * fills them. Then it issues prefetches and the I/O. 459 * fills them. Then it issues prefetches and the I/O.
464 */ 460 */
465static int verity_map(struct dm_target *ti, struct bio *bio, 461static int verity_map(struct dm_target *ti, struct bio *bio)
466 union map_info *map_context)
467{ 462{
468 struct dm_verity *v = ti->private; 463 struct dm_verity *v = ti->private;
469 struct dm_verity_io *io; 464 struct dm_verity_io *io;
@@ -486,9 +481,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio,
486 if (bio_data_dir(bio) == WRITE) 481 if (bio_data_dir(bio) == WRITE)
487 return -EIO; 482 return -EIO;
488 483
489 io = mempool_alloc(v->io_mempool, GFP_NOIO); 484 io = dm_per_bio_data(bio, ti->per_bio_data_size);
490 io->v = v; 485 io->v = v;
491 io->bio = bio;
492 io->orig_bi_end_io = bio->bi_end_io; 486 io->orig_bi_end_io = bio->bi_end_io;
493 io->orig_bi_private = bio->bi_private; 487 io->orig_bi_private = bio->bi_private;
494 io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); 488 io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
@@ -610,9 +604,6 @@ static void verity_dtr(struct dm_target *ti)
610 if (v->vec_mempool) 604 if (v->vec_mempool)
611 mempool_destroy(v->vec_mempool); 605 mempool_destroy(v->vec_mempool);
612 606
613 if (v->io_mempool)
614 mempool_destroy(v->io_mempool);
615
616 if (v->bufio) 607 if (v->bufio)
617 dm_bufio_client_destroy(v->bufio); 608 dm_bufio_client_destroy(v->bufio);
618 609
@@ -841,13 +832,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
841 goto bad; 832 goto bad;
842 } 833 }
843 834
844 v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, 835 ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
845 sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2);
846 if (!v->io_mempool) {
847 ti->error = "Cannot allocate io mempool";
848 r = -ENOMEM;
849 goto bad;
850 }
851 836
852 v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, 837 v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
853 BIO_MAX_PAGES * sizeof(struct bio_vec)); 838 BIO_MAX_PAGES * sizeof(struct bio_vec));
@@ -875,7 +860,7 @@ bad:
875 860
876static struct target_type verity_target = { 861static struct target_type verity_target = {
877 .name = "verity", 862 .name = "verity",
878 .version = {1, 0, 0}, 863 .version = {1, 1, 0},
879 .module = THIS_MODULE, 864 .module = THIS_MODULE,
880 .ctr = verity_ctr, 865 .ctr = verity_ctr,
881 .dtr = verity_dtr, 866 .dtr = verity_dtr,
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index cc2b3cb81946..69a5c3b3b340 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
33/* 33/*
34 * Return zeros only on reads 34 * Return zeros only on reads
35 */ 35 */
36static int zero_map(struct dm_target *ti, struct bio *bio, 36static int zero_map(struct dm_target *ti, struct bio *bio)
37 union map_info *map_context)
38{ 37{
39 switch(bio_rw(bio)) { 38 switch(bio_rw(bio)) {
40 case READ: 39 case READ:
@@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio,
56 55
57static struct target_type zero_target = { 56static struct target_type zero_target = {
58 .name = "zero", 57 .name = "zero",
59 .version = {1, 0, 0}, 58 .version = {1, 1, 0},
60 .module = THIS_MODULE, 59 .module = THIS_MODULE,
61 .ctr = zero_ctr, 60 .ctr = zero_ctr,
62 .map = zero_map, 61 .map = zero_map,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 77e6eff41cae..c72e4d5a9617 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,18 +63,6 @@ struct dm_io {
63}; 63};
64 64
65/* 65/*
66 * For bio-based dm.
67 * One of these is allocated per target within a bio. Hopefully
68 * this will be simplified out one day.
69 */
70struct dm_target_io {
71 struct dm_io *io;
72 struct dm_target *ti;
73 union map_info info;
74 struct bio clone;
75};
76
77/*
78 * For request-based dm. 66 * For request-based dm.
79 * One of these is allocated per request. 67 * One of these is allocated per request.
80 */ 68 */
@@ -657,7 +645,7 @@ static void clone_endio(struct bio *bio, int error)
657 error = -EIO; 645 error = -EIO;
658 646
659 if (endio) { 647 if (endio) {
660 r = endio(tio->ti, bio, error, &tio->info); 648 r = endio(tio->ti, bio, error);
661 if (r < 0 || r == DM_ENDIO_REQUEUE) 649 if (r < 0 || r == DM_ENDIO_REQUEUE)
662 /* 650 /*
663 * error and requeue request are handled 651 * error and requeue request are handled
@@ -1016,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio)
1016 */ 1004 */
1017 atomic_inc(&tio->io->io_count); 1005 atomic_inc(&tio->io->io_count);
1018 sector = clone->bi_sector; 1006 sector = clone->bi_sector;
1019 r = ti->type->map(ti, clone, &tio->info); 1007 r = ti->type->map(ti, clone);
1020 if (r == DM_MAPIO_REMAPPED) { 1008 if (r == DM_MAPIO_REMAPPED) {
1021 /* the bio has been remapped so dispatch it */ 1009 /* the bio has been remapped so dispatch it */
1022 1010
@@ -1111,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
1111 tio->io = ci->io; 1099 tio->io = ci->io;
1112 tio->ti = ti; 1100 tio->ti = ti;
1113 memset(&tio->info, 0, sizeof(tio->info)); 1101 memset(&tio->info, 0, sizeof(tio->info));
1102 tio->target_request_nr = 0;
1114 1103
1115 return tio; 1104 return tio;
1116} 1105}
@@ -1121,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
1121 struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); 1110 struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs);
1122 struct bio *clone = &tio->clone; 1111 struct bio *clone = &tio->clone;
1123 1112
1124 tio->info.target_request_nr = request_nr; 1113 tio->target_request_nr = request_nr;
1125 1114
1126 /* 1115 /*
1127 * Discard requests require the bio's inline iovecs be initialized. 1116 * Discard requests require the bio's inline iovecs be initialized.
@@ -1174,7 +1163,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
1174 ci->sector_count = 0; 1163 ci->sector_count = 0;
1175} 1164}
1176 1165
1177static int __clone_and_map_discard(struct clone_info *ci) 1166typedef unsigned (*get_num_requests_fn)(struct dm_target *ti);
1167
1168static unsigned get_num_discard_requests(struct dm_target *ti)
1169{
1170 return ti->num_discard_requests;
1171}
1172
1173static unsigned get_num_write_same_requests(struct dm_target *ti)
1174{
1175 return ti->num_write_same_requests;
1176}
1177
1178typedef bool (*is_split_required_fn)(struct dm_target *ti);
1179
1180static bool is_split_required_for_discard(struct dm_target *ti)
1181{
1182 return ti->split_discard_requests;
1183}
1184
1185static int __clone_and_map_changing_extent_only(struct clone_info *ci,
1186 get_num_requests_fn get_num_requests,
1187 is_split_required_fn is_split_required)
1178{ 1188{
1179 struct dm_target *ti; 1189 struct dm_target *ti;
1180 sector_t len; 1190 sector_t len;
@@ -1185,15 +1195,15 @@ static int __clone_and_map_discard(struct clone_info *ci)
1185 return -EIO; 1195 return -EIO;
1186 1196
1187 /* 1197 /*
1188 * Even though the device advertised discard support, 1198 * Even though the device advertised support for this type of
1189 * that does not mean every target supports it, and 1199 * request, that does not mean every target supports it, and
1190 * reconfiguration might also have changed that since the 1200 * reconfiguration might also have changed that since the
1191 * check was performed. 1201 * check was performed.
1192 */ 1202 */
1193 if (!ti->num_discard_requests) 1203 if (!get_num_requests || !get_num_requests(ti))
1194 return -EOPNOTSUPP; 1204 return -EOPNOTSUPP;
1195 1205
1196 if (!ti->split_discard_requests) 1206 if (is_split_required && !is_split_required(ti))
1197 len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 1207 len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1198 else 1208 else
1199 len = min(ci->sector_count, max_io_len(ci->sector, ti)); 1209 len = min(ci->sector_count, max_io_len(ci->sector, ti));
@@ -1206,6 +1216,17 @@ static int __clone_and_map_discard(struct clone_info *ci)
1206 return 0; 1216 return 0;
1207} 1217}
1208 1218
1219static int __clone_and_map_discard(struct clone_info *ci)
1220{
1221 return __clone_and_map_changing_extent_only(ci, get_num_discard_requests,
1222 is_split_required_for_discard);
1223}
1224
1225static int __clone_and_map_write_same(struct clone_info *ci)
1226{
1227 return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL);
1228}
1229
1209static int __clone_and_map(struct clone_info *ci) 1230static int __clone_and_map(struct clone_info *ci)
1210{ 1231{
1211 struct bio *bio = ci->bio; 1232 struct bio *bio = ci->bio;
@@ -1215,6 +1236,8 @@ static int __clone_and_map(struct clone_info *ci)
1215 1236
1216 if (unlikely(bio->bi_rw & REQ_DISCARD)) 1237 if (unlikely(bio->bi_rw & REQ_DISCARD))
1217 return __clone_and_map_discard(ci); 1238 return __clone_and_map_discard(ci);
1239 else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
1240 return __clone_and_map_write_same(ci);
1218 1241
1219 ti = dm_table_find_target(ci->map, ci->sector); 1242 ti = dm_table_find_target(ci->map, ci->sector);
1220 if (!dm_target_is_valid(ti)) 1243 if (!dm_target_is_valid(ti))
@@ -1946,13 +1969,20 @@ static void free_dev(struct mapped_device *md)
1946 1969
1947static void __bind_mempools(struct mapped_device *md, struct dm_table *t) 1970static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1948{ 1971{
1949 struct dm_md_mempools *p; 1972 struct dm_md_mempools *p = dm_table_get_md_mempools(t);
1950 1973
1951 if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) 1974 if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) {
1952 /* the md already has necessary mempools */ 1975 /*
1976 * The md already has necessary mempools. Reload just the
1977 * bioset because front_pad may have changed because
1978 * a different table was loaded.
1979 */
1980 bioset_free(md->bs);
1981 md->bs = p->bs;
1982 p->bs = NULL;
1953 goto out; 1983 goto out;
1984 }
1954 1985
1955 p = dm_table_get_md_mempools(t);
1956 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); 1986 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
1957 1987
1958 md->io_pool = p->io_pool; 1988 md->io_pool = p->io_pool;
@@ -2711,7 +2741,7 @@ int dm_noflush_suspending(struct dm_target *ti)
2711} 2741}
2712EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2742EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2713 2743
2714struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) 2744struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
2715{ 2745{
2716 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); 2746 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
2717 unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; 2747 unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
@@ -2719,6 +2749,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
2719 if (!pools) 2749 if (!pools)
2720 return NULL; 2750 return NULL;
2721 2751
2752 per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io));
2753
2722 pools->io_pool = (type == DM_TYPE_BIO_BASED) ? 2754 pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
2723 mempool_create_slab_pool(MIN_IOS, _io_cache) : 2755 mempool_create_slab_pool(MIN_IOS, _io_cache) :
2724 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); 2756 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
@@ -2734,7 +2766,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
2734 2766
2735 pools->bs = (type == DM_TYPE_BIO_BASED) ? 2767 pools->bs = (type == DM_TYPE_BIO_BASED) ?
2736 bioset_create(pool_size, 2768 bioset_create(pool_size,
2737 offsetof(struct dm_target_io, clone)) : 2769 per_bio_data_size + offsetof(struct dm_target_io, clone)) :
2738 bioset_create(pool_size, 2770 bioset_create(pool_size,
2739 offsetof(struct dm_rq_clone_bio_info, clone)); 2771 offsetof(struct dm_rq_clone_bio_info, clone));
2740 if (!pools->bs) 2772 if (!pools->bs)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 6a99fefaa743..45b97da1bd06 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -159,7 +159,7 @@ void dm_kcopyd_exit(void);
159/* 159/*
160 * Mempool operations 160 * Mempool operations
161 */ 161 */
162struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity); 162struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size);
163void dm_free_md_mempools(struct dm_md_mempools *pools); 163void dm_free_md_mempools(struct dm_md_mempools *pools);
164 164
165#endif 165#endif
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index a3ae09124a67..28c3ed072a79 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm,
428 if (!v) 428 if (!v)
429 return 0; 429 return 0;
430 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 430 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
431 if (unlikely(r)) 431 if (unlikely(r)) {
432 DMERR_LIMIT("%s validator check failed for block %llu", v->name,
433 (unsigned long long) dm_bufio_get_block_number(buf));
432 return r; 434 return r;
435 }
433 aux->validator = v; 436 aux->validator = v;
434 } else { 437 } else {
435 if (unlikely(aux->validator != v)) { 438 if (unlikely(aux->validator != v)) {
436 DMERR("validator mismatch (old=%s vs new=%s) for block %llu", 439 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
437 aux->validator->name, v ? v->name : "NULL", 440 aux->validator->name, v ? v->name : "NULL",
438 (unsigned long long) 441 (unsigned long long) dm_bufio_get_block_number(buf));
439 dm_bufio_get_block_number(buf));
440 return -EINVAL; 442 return -EINVAL;
441 } 443 }
442 } 444 }
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index 5709bfeab1e8..accbb05f17b6 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -36,13 +36,13 @@ struct node_header {
36 __le32 padding; 36 __le32 padding;
37} __packed; 37} __packed;
38 38
39struct node { 39struct btree_node {
40 struct node_header header; 40 struct node_header header;
41 __le64 keys[0]; 41 __le64 keys[0];
42} __packed; 42} __packed;
43 43
44 44
45void inc_children(struct dm_transaction_manager *tm, struct node *n, 45void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
46 struct dm_btree_value_type *vt); 46 struct dm_btree_value_type *vt);
47 47
48int new_block(struct dm_btree_info *info, struct dm_block **result); 48int new_block(struct dm_btree_info *info, struct dm_block **result);
@@ -64,7 +64,7 @@ struct ro_spine {
64void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); 64void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
65int exit_ro_spine(struct ro_spine *s); 65int exit_ro_spine(struct ro_spine *s);
66int ro_step(struct ro_spine *s, dm_block_t new_child); 66int ro_step(struct ro_spine *s, dm_block_t new_child);
67struct node *ro_node(struct ro_spine *s); 67struct btree_node *ro_node(struct ro_spine *s);
68 68
69struct shadow_spine { 69struct shadow_spine {
70 struct dm_btree_info *info; 70 struct dm_btree_info *info;
@@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s);
98/* 98/*
99 * Some inlines. 99 * Some inlines.
100 */ 100 */
101static inline __le64 *key_ptr(struct node *n, uint32_t index) 101static inline __le64 *key_ptr(struct btree_node *n, uint32_t index)
102{ 102{
103 return n->keys + index; 103 return n->keys + index;
104} 104}
105 105
106static inline void *value_base(struct node *n) 106static inline void *value_base(struct btree_node *n)
107{ 107{
108 return &n->keys[le32_to_cpu(n->header.max_entries)]; 108 return &n->keys[le32_to_cpu(n->header.max_entries)];
109} 109}
110 110
111static inline void *value_ptr(struct node *n, uint32_t index) 111static inline void *value_ptr(struct btree_node *n, uint32_t index)
112{ 112{
113 uint32_t value_size = le32_to_cpu(n->header.value_size); 113 uint32_t value_size = le32_to_cpu(n->header.value_size);
114 return value_base(n) + (value_size * index); 114 return value_base(n) + (value_size * index);
@@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index)
117/* 117/*
118 * Assumes the values are suitably-aligned and converts to core format. 118 * Assumes the values are suitably-aligned and converts to core format.
119 */ 119 */
120static inline uint64_t value64(struct node *n, uint32_t index) 120static inline uint64_t value64(struct btree_node *n, uint32_t index)
121{ 121{
122 __le64 *values_le = value_base(n); 122 __le64 *values_le = value_base(n);
123 123
@@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index)
127/* 127/*
128 * Searching for a key within a single node. 128 * Searching for a key within a single node.
129 */ 129 */
130int lower_bound(struct node *n, uint64_t key); 130int lower_bound(struct btree_node *n, uint64_t key);
131 131
132extern struct dm_block_validator btree_node_validator; 132extern struct dm_block_validator btree_node_validator;
133 133
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index aa71e2359a07..c4f28133ef82 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -53,7 +53,7 @@
53/* 53/*
54 * Some little utilities for moving node data around. 54 * Some little utilities for moving node data around.
55 */ 55 */
56static void node_shift(struct node *n, int shift) 56static void node_shift(struct btree_node *n, int shift)
57{ 57{
58 uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); 58 uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
59 uint32_t value_size = le32_to_cpu(n->header.value_size); 59 uint32_t value_size = le32_to_cpu(n->header.value_size);
@@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift)
79 } 79 }
80} 80}
81 81
82static void node_copy(struct node *left, struct node *right, int shift) 82static void node_copy(struct btree_node *left, struct btree_node *right, int shift)
83{ 83{
84 uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 84 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
85 uint32_t value_size = le32_to_cpu(left->header.value_size); 85 uint32_t value_size = le32_to_cpu(left->header.value_size);
@@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift)
108/* 108/*
109 * Delete a specific entry from a leaf node. 109 * Delete a specific entry from a leaf node.
110 */ 110 */
111static void delete_at(struct node *n, unsigned index) 111static void delete_at(struct btree_node *n, unsigned index)
112{ 112{
113 unsigned nr_entries = le32_to_cpu(n->header.nr_entries); 113 unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
114 unsigned nr_to_copy = nr_entries - (index + 1); 114 unsigned nr_to_copy = nr_entries - (index + 1);
@@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index)
128 n->header.nr_entries = cpu_to_le32(nr_entries - 1); 128 n->header.nr_entries = cpu_to_le32(nr_entries - 1);
129} 129}
130 130
131static unsigned merge_threshold(struct node *n) 131static unsigned merge_threshold(struct btree_node *n)
132{ 132{
133 return le32_to_cpu(n->header.max_entries) / 3; 133 return le32_to_cpu(n->header.max_entries) / 3;
134} 134}
@@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n)
136struct child { 136struct child {
137 unsigned index; 137 unsigned index;
138 struct dm_block *block; 138 struct dm_block *block;
139 struct node *n; 139 struct btree_node *n;
140}; 140};
141 141
142static struct dm_btree_value_type le64_type = { 142static struct dm_btree_value_type le64_type = {
@@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = {
147 .equal = NULL 147 .equal = NULL
148}; 148};
149 149
150static int init_child(struct dm_btree_info *info, struct node *parent, 150static int init_child(struct dm_btree_info *info, struct btree_node *parent,
151 unsigned index, struct child *result) 151 unsigned index, struct child *result)
152{ 152{
153 int r, inc; 153 int r, inc;
@@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c)
177 return dm_tm_unlock(info->tm, c->block); 177 return dm_tm_unlock(info->tm, c->block);
178} 178}
179 179
180static void shift(struct node *left, struct node *right, int count) 180static void shift(struct btree_node *left, struct btree_node *right, int count)
181{ 181{
182 uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 182 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
183 uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 183 uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
@@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count)
203 right->header.nr_entries = cpu_to_le32(nr_right + count); 203 right->header.nr_entries = cpu_to_le32(nr_right + count);
204} 204}
205 205
206static void __rebalance2(struct dm_btree_info *info, struct node *parent, 206static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
207 struct child *l, struct child *r) 207 struct child *l, struct child *r)
208{ 208{
209 struct node *left = l->n; 209 struct btree_node *left = l->n;
210 struct node *right = r->n; 210 struct btree_node *right = r->n;
211 uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 211 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
212 uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 212 uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
213 unsigned threshold = 2 * merge_threshold(left) + 1; 213 unsigned threshold = 2 * merge_threshold(left) + 1;
@@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
239 unsigned left_index) 239 unsigned left_index)
240{ 240{
241 int r; 241 int r;
242 struct node *parent; 242 struct btree_node *parent;
243 struct child left, right; 243 struct child left, right;
244 244
245 parent = dm_block_data(shadow_current(s)); 245 parent = dm_block_data(shadow_current(s));
@@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
270 * in right, then rebalance2. This wastes some cpu, but I want something 270 * in right, then rebalance2. This wastes some cpu, but I want something
271 * simple atm. 271 * simple atm.
272 */ 272 */
273static void delete_center_node(struct dm_btree_info *info, struct node *parent, 273static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
274 struct child *l, struct child *c, struct child *r, 274 struct child *l, struct child *c, struct child *r,
275 struct node *left, struct node *center, struct node *right, 275 struct btree_node *left, struct btree_node *center, struct btree_node *right,
276 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) 276 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
277{ 277{
278 uint32_t max_entries = le32_to_cpu(left->header.max_entries); 278 uint32_t max_entries = le32_to_cpu(left->header.max_entries);
@@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent,
301/* 301/*
302 * Redistributes entries among 3 sibling nodes. 302 * Redistributes entries among 3 sibling nodes.
303 */ 303 */
304static void redistribute3(struct dm_btree_info *info, struct node *parent, 304static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
305 struct child *l, struct child *c, struct child *r, 305 struct child *l, struct child *c, struct child *r,
306 struct node *left, struct node *center, struct node *right, 306 struct btree_node *left, struct btree_node *center, struct btree_node *right,
307 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) 307 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
308{ 308{
309 int s; 309 int s;
@@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent,
343 *key_ptr(parent, r->index) = right->keys[0]; 343 *key_ptr(parent, r->index) = right->keys[0];
344} 344}
345 345
346static void __rebalance3(struct dm_btree_info *info, struct node *parent, 346static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
347 struct child *l, struct child *c, struct child *r) 347 struct child *l, struct child *c, struct child *r)
348{ 348{
349 struct node *left = l->n; 349 struct btree_node *left = l->n;
350 struct node *center = c->n; 350 struct btree_node *center = c->n;
351 struct node *right = r->n; 351 struct btree_node *right = r->n;
352 352
353 uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 353 uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
354 uint32_t nr_center = le32_to_cpu(center->header.nr_entries); 354 uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
@@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
371 unsigned left_index) 371 unsigned left_index)
372{ 372{
373 int r; 373 int r;
374 struct node *parent = dm_block_data(shadow_current(s)); 374 struct btree_node *parent = dm_block_data(shadow_current(s));
375 struct child left, center, right; 375 struct child left, center, right;
376 376
377 /* 377 /*
@@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
421{ 421{
422 int r; 422 int r;
423 struct dm_block *block; 423 struct dm_block *block;
424 struct node *n; 424 struct btree_node *n;
425 425
426 r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); 426 r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
427 if (r) 427 if (r)
@@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s,
438{ 438{
439 int i, r, has_left_sibling, has_right_sibling; 439 int i, r, has_left_sibling, has_right_sibling;
440 uint32_t child_entries; 440 uint32_t child_entries;
441 struct node *n; 441 struct btree_node *n;
442 442
443 n = dm_block_data(shadow_current(s)); 443 n = dm_block_data(shadow_current(s));
444 444
@@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s,
483 return r; 483 return r;
484} 484}
485 485
486static int do_leaf(struct node *n, uint64_t key, unsigned *index) 486static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index)
487{ 487{
488 int i = lower_bound(n, key); 488 int i = lower_bound(n, key);
489 489
@@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
506 uint64_t key, unsigned *index) 506 uint64_t key, unsigned *index)
507{ 507{
508 int i = *index, r; 508 int i = *index, r;
509 struct node *n; 509 struct btree_node *n;
510 510
511 for (;;) { 511 for (;;) {
512 r = shadow_step(s, root, vt); 512 r = shadow_step(s, root, vt);
@@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
556 unsigned level, last_level = info->levels - 1; 556 unsigned level, last_level = info->levels - 1;
557 int index = 0, r = 0; 557 int index = 0, r = 0;
558 struct shadow_spine spine; 558 struct shadow_spine spine;
559 struct node *n; 559 struct btree_node *n;
560 560
561 init_shadow_spine(&spine, info); 561 init_shadow_spine(&spine, info);
562 for (level = 0; level < info->levels; level++) { 562 for (level = 0; level < info->levels; level++) {
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index d9a7912ee8ee..f199a0c4ed04 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v,
23 struct dm_block *b, 23 struct dm_block *b,
24 size_t block_size) 24 size_t block_size)
25{ 25{
26 struct node *n = dm_block_data(b); 26 struct btree_node *n = dm_block_data(b);
27 struct node_header *h = &n->header; 27 struct node_header *h = &n->header;
28 28
29 h->blocknr = cpu_to_le64(dm_block_location(b)); 29 h->blocknr = cpu_to_le64(dm_block_location(b));
@@ -38,15 +38,15 @@ static int node_check(struct dm_block_validator *v,
38 struct dm_block *b, 38 struct dm_block *b,
39 size_t block_size) 39 size_t block_size)
40{ 40{
41 struct node *n = dm_block_data(b); 41 struct btree_node *n = dm_block_data(b);
42 struct node_header *h = &n->header; 42 struct node_header *h = &n->header;
43 size_t value_size; 43 size_t value_size;
44 __le32 csum_disk; 44 __le32 csum_disk;
45 uint32_t flags; 45 uint32_t flags;
46 46
47 if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { 47 if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
48 DMERR("node_check failed blocknr %llu wanted %llu", 48 DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu",
49 le64_to_cpu(h->blocknr), dm_block_location(b)); 49 le64_to_cpu(h->blocknr), dm_block_location(b));
50 return -ENOTBLK; 50 return -ENOTBLK;
51 } 51 }
52 52
@@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v,
54 block_size - sizeof(__le32), 54 block_size - sizeof(__le32),
55 BTREE_CSUM_XOR)); 55 BTREE_CSUM_XOR));
56 if (csum_disk != h->csum) { 56 if (csum_disk != h->csum) {
57 DMERR("node_check failed csum %u wanted %u", 57 DMERR_LIMIT("node_check failed: csum %u != wanted %u",
58 le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); 58 le32_to_cpu(csum_disk), le32_to_cpu(h->csum));
59 return -EILSEQ; 59 return -EILSEQ;
60 } 60 }
61 61
@@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v,
63 63
64 if (sizeof(struct node_header) + 64 if (sizeof(struct node_header) +
65 (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { 65 (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
66 DMERR("node_check failed: max_entries too large"); 66 DMERR_LIMIT("node_check failed: max_entries too large");
67 return -EILSEQ; 67 return -EILSEQ;
68 } 68 }
69 69
70 if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { 70 if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
71 DMERR("node_check failed, too many entries"); 71 DMERR_LIMIT("node_check failed: too many entries");
72 return -EILSEQ; 72 return -EILSEQ;
73 } 73 }
74 74
@@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v,
77 */ 77 */
78 flags = le32_to_cpu(h->flags); 78 flags = le32_to_cpu(h->flags);
79 if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { 79 if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) {
80 DMERR("node_check failed, node is neither INTERNAL or LEAF"); 80 DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF");
81 return -EILSEQ; 81 return -EILSEQ;
82 } 82 }
83 83
@@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child)
164 return r; 164 return r;
165} 165}
166 166
167struct node *ro_node(struct ro_spine *s) 167struct btree_node *ro_node(struct ro_spine *s)
168{ 168{
169 struct dm_block *block; 169 struct dm_block *block;
170 170
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index d12b2cc51f1a..4caf66918cdb 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
38/*----------------------------------------------------------------*/ 38/*----------------------------------------------------------------*/
39 39
40/* makes the assumption that no two keys are the same. */ 40/* makes the assumption that no two keys are the same. */
41static int bsearch(struct node *n, uint64_t key, int want_hi) 41static int bsearch(struct btree_node *n, uint64_t key, int want_hi)
42{ 42{
43 int lo = -1, hi = le32_to_cpu(n->header.nr_entries); 43 int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
44 44
@@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi)
58 return want_hi ? hi : lo; 58 return want_hi ? hi : lo;
59} 59}
60 60
61int lower_bound(struct node *n, uint64_t key) 61int lower_bound(struct btree_node *n, uint64_t key)
62{ 62{
63 return bsearch(n, key, 0); 63 return bsearch(n, key, 0);
64} 64}
65 65
66void inc_children(struct dm_transaction_manager *tm, struct node *n, 66void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
67 struct dm_btree_value_type *vt) 67 struct dm_btree_value_type *vt)
68{ 68{
69 unsigned i; 69 unsigned i;
@@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n,
77 vt->inc(vt->context, value_ptr(n, i)); 77 vt->inc(vt->context, value_ptr(n, i));
78} 78}
79 79
80static int insert_at(size_t value_size, struct node *node, unsigned index, 80static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
81 uint64_t key, void *value) 81 uint64_t key, void *value)
82 __dm_written_to_disk(value) 82 __dm_written_to_disk(value)
83{ 83{
@@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
122{ 122{
123 int r; 123 int r;
124 struct dm_block *b; 124 struct dm_block *b;
125 struct node *n; 125 struct btree_node *n;
126 size_t block_size; 126 size_t block_size;
127 uint32_t max_entries; 127 uint32_t max_entries;
128 128
@@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty);
154#define MAX_SPINE_DEPTH 64 154#define MAX_SPINE_DEPTH 64
155struct frame { 155struct frame {
156 struct dm_block *b; 156 struct dm_block *b;
157 struct node *n; 157 struct btree_node *n;
158 unsigned level; 158 unsigned level;
159 unsigned nr_children; 159 unsigned nr_children;
160 unsigned current_child; 160 unsigned current_child;
@@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s)
230 dm_tm_unlock(s->tm, f->b); 230 dm_tm_unlock(s->tm, f->b);
231} 231}
232 232
233static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
234{
235 return f->level < (info->levels - 1);
236}
237
233int dm_btree_del(struct dm_btree_info *info, dm_block_t root) 238int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
234{ 239{
235 int r; 240 int r;
@@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
241 s->tm = info->tm; 246 s->tm = info->tm;
242 s->top = -1; 247 s->top = -1;
243 248
244 r = push_frame(s, root, 1); 249 r = push_frame(s, root, 0);
245 if (r) 250 if (r)
246 goto out; 251 goto out;
247 252
@@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
267 if (r) 272 if (r)
268 goto out; 273 goto out;
269 274
270 } else if (f->level != (info->levels - 1)) { 275 } else if (is_internal_level(info, f)) {
271 b = value64(f->n, f->current_child); 276 b = value64(f->n, f->current_child);
272 f->current_child++; 277 f->current_child++;
273 r = push_frame(s, b, f->level + 1); 278 r = push_frame(s, b, f->level + 1);
@@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del);
295/*----------------------------------------------------------------*/ 300/*----------------------------------------------------------------*/
296 301
297static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, 302static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
298 int (*search_fn)(struct node *, uint64_t), 303 int (*search_fn)(struct btree_node *, uint64_t),
299 uint64_t *result_key, void *v, size_t value_size) 304 uint64_t *result_key, void *v, size_t value_size)
300{ 305{
301 int i, r; 306 int i, r;
@@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
406 size_t size; 411 size_t size;
407 unsigned nr_left, nr_right; 412 unsigned nr_left, nr_right;
408 struct dm_block *left, *right, *parent; 413 struct dm_block *left, *right, *parent;
409 struct node *ln, *rn, *pn; 414 struct btree_node *ln, *rn, *pn;
410 __le64 location; 415 __le64 location;
411 416
412 left = shadow_current(s); 417 left = shadow_current(s);
@@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
491 size_t size; 496 size_t size;
492 unsigned nr_left, nr_right; 497 unsigned nr_left, nr_right;
493 struct dm_block *left, *right, *new_parent; 498 struct dm_block *left, *right, *new_parent;
494 struct node *pn, *ln, *rn; 499 struct btree_node *pn, *ln, *rn;
495 __le64 val; 500 __le64 val;
496 501
497 new_parent = shadow_current(s); 502 new_parent = shadow_current(s);
@@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
576 uint64_t key, unsigned *index) 581 uint64_t key, unsigned *index)
577{ 582{
578 int r, i = *index, top = 1; 583 int r, i = *index, top = 1;
579 struct node *node; 584 struct btree_node *node;
580 585
581 for (;;) { 586 for (;;) {
582 r = shadow_step(s, root, vt); 587 r = shadow_step(s, root, vt);
@@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
643 unsigned level, index = -1, last_level = info->levels - 1; 648 unsigned level, index = -1, last_level = info->levels - 1;
644 dm_block_t block = root; 649 dm_block_t block = root;
645 struct shadow_spine spine; 650 struct shadow_spine spine;
646 struct node *n; 651 struct btree_node *n;
647 struct dm_btree_value_type le64_type; 652 struct dm_btree_value_type le64_type;
648 653
649 le64_type.context = NULL; 654 le64_type.context = NULL;
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index f3a9af8cdec3..3e7a88d99eb0 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v,
39 __le32 csum_disk; 39 __le32 csum_disk;
40 40
41 if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { 41 if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
42 DMERR("index_check failed blocknr %llu wanted %llu", 42 DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
43 le64_to_cpu(mi_le->blocknr), dm_block_location(b)); 43 le64_to_cpu(mi_le->blocknr), dm_block_location(b));
44 return -ENOTBLK; 44 return -ENOTBLK;
45 } 45 }
46 46
@@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v,
48 block_size - sizeof(__le32), 48 block_size - sizeof(__le32),
49 INDEX_CSUM_XOR)); 49 INDEX_CSUM_XOR));
50 if (csum_disk != mi_le->csum) { 50 if (csum_disk != mi_le->csum) {
51 DMERR("index_check failed csum %u wanted %u", 51 DMERR_LIMIT("index_check failed: csum %u != wanted %u",
52 le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); 52 le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
53 return -EILSEQ; 53 return -EILSEQ;
54 } 54 }
55 55
@@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v,
89 __le32 csum_disk; 89 __le32 csum_disk;
90 90
91 if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { 91 if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
92 DMERR("bitmap check failed blocknr %llu wanted %llu", 92 DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
93 le64_to_cpu(disk_header->blocknr), dm_block_location(b)); 93 le64_to_cpu(disk_header->blocknr), dm_block_location(b));
94 return -ENOTBLK; 94 return -ENOTBLK;
95 } 95 }
96 96
@@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v,
98 block_size - sizeof(__le32), 98 block_size - sizeof(__le32),
99 BITMAP_CSUM_XOR)); 99 BITMAP_CSUM_XOR));
100 if (csum_disk != disk_header->csum) { 100 if (csum_disk != disk_header->csum) {
101 DMERR("bitmap check failed csum %u wanted %u", 101 DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
102 le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); 102 le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
103 return -EILSEQ; 103 return -EILSEQ;
104 } 104 }
105 105
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index e89ae5e7a519..906cf3df71af 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
337{ 337{
338 int r = sm_metadata_new_block_(sm, b); 338 int r = sm_metadata_new_block_(sm, b);
339 if (r) 339 if (r)
340 DMERR("out of metadata space"); 340 DMERR("unable to allocate new metadata block");
341 return r; 341 return r;
342} 342}
343 343
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 38d27a10aa5d..bf6afa2fc432 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
23union map_info { 23union map_info {
24 void *ptr; 24 void *ptr;
25 unsigned long long ll; 25 unsigned long long ll;
26 unsigned target_request_nr;
27}; 26};
28 27
29/* 28/*
@@ -46,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
46 * = 1: simple remap complete 45 * = 1: simple remap complete
47 * = 2: The target wants to push back the io 46 * = 2: The target wants to push back the io
48 */ 47 */
49typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, 48typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
50 union map_info *map_context);
51typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, 49typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
52 union map_info *map_context); 50 union map_info *map_context);
53 51
@@ -60,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
60 * 2 : The target wants to push back the io 58 * 2 : The target wants to push back the io
61 */ 59 */
62typedef int (*dm_endio_fn) (struct dm_target *ti, 60typedef int (*dm_endio_fn) (struct dm_target *ti,
63 struct bio *bio, int error, 61 struct bio *bio, int error);
64 union map_info *map_context);
65typedef int (*dm_request_endio_fn) (struct dm_target *ti, 62typedef int (*dm_request_endio_fn) (struct dm_target *ti,
66 struct request *clone, int error, 63 struct request *clone, int error,
67 union map_info *map_context); 64 union map_info *map_context);
@@ -193,18 +190,30 @@ struct dm_target {
193 * A number of zero-length barrier requests that will be submitted 190 * A number of zero-length barrier requests that will be submitted
194 * to the target for the purpose of flushing cache. 191 * to the target for the purpose of flushing cache.
195 * 192 *
196 * The request number will be placed in union map_info->target_request_nr. 193 * The request number can be accessed with dm_bio_get_target_request_nr.
197 * It is a responsibility of the target driver to remap these requests 194 * It is a responsibility of the target driver to remap these requests
198 * to the real underlying devices. 195 * to the real underlying devices.
199 */ 196 */
200 unsigned num_flush_requests; 197 unsigned num_flush_requests;
201 198
202 /* 199 /*
203 * The number of discard requests that will be submitted to the 200 * The number of discard requests that will be submitted to the target.
204 * target. map_info->request_nr is used just like num_flush_requests. 201 * The request number can be accessed with dm_bio_get_target_request_nr.
205 */ 202 */
206 unsigned num_discard_requests; 203 unsigned num_discard_requests;
207 204
205 /*
206 * The number of WRITE SAME requests that will be submitted to the target.
207 * The request number can be accessed with dm_bio_get_target_request_nr.
208 */
209 unsigned num_write_same_requests;
210
211 /*
212 * The minimum number of extra bytes allocated in each bio for the
213 * target to use. dm_per_bio_data returns the data location.
214 */
215 unsigned per_bio_data_size;
216
208 /* target specific data */ 217 /* target specific data */
209 void *private; 218 void *private;
210 219
@@ -241,6 +250,36 @@ struct dm_target_callbacks {
241 int (*congested_fn) (struct dm_target_callbacks *, int); 250 int (*congested_fn) (struct dm_target_callbacks *, int);
242}; 251};
243 252
253/*
254 * For bio-based dm.
255 * One of these is allocated for each bio.
256 * This structure shouldn't be touched directly by target drivers.
257 * It is here so that we can inline dm_per_bio_data and
258 * dm_bio_from_per_bio_data
259 */
260struct dm_target_io {
261 struct dm_io *io;
262 struct dm_target *ti;
263 union map_info info;
264 unsigned target_request_nr;
265 struct bio clone;
266};
267
268static inline void *dm_per_bio_data(struct bio *bio, size_t data_size)
269{
270 return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
271}
272
273static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
274{
275 return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
276}
277
278static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio)
279{
280 return container_of(bio, struct dm_target_io, clone)->target_request_nr;
281}
282
244int dm_register_target(struct target_type *t); 283int dm_register_target(struct target_type *t);
245void dm_unregister_target(struct target_type *t); 284void dm_unregister_target(struct target_type *t);
246 285
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 91e3a360f611..539b179b349c 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -268,8 +268,8 @@ enum {
268 268
269#define DM_VERSION_MAJOR 4 269#define DM_VERSION_MAJOR 4
270#define DM_VERSION_MINOR 23 270#define DM_VERSION_MINOR 23
271#define DM_VERSION_PATCHLEVEL 0 271#define DM_VERSION_PATCHLEVEL 1
272#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" 272#define DM_VERSION_EXTRA "-ioctl (2012-12-18)"
273 273
274/* Status bits */ 274/* Status bits */
275#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 275#define DM_READONLY_FLAG (1 << 0) /* In/Out */