diff options
author | Mike Snitzer <snitzer@redhat.com> | 2014-03-20 21:17:14 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2014-03-31 14:14:15 -0400 |
commit | c140e1c4e23bdaf0a5c00b6a8b6d18f259d39a00 (patch) | |
tree | bd84c5be199d67b45c18f0bf17b9033650c57444 /drivers/md | |
parent | 760fe67e539b2f1a95dbb4c9700140eccdb1c0c1 (diff) |
dm thin: use per thin device deferred bio lists
The thin-pool previously only had a single deferred_bios list that would
collect bios for all thin devices in the pool. Split this per-pool
deferred_bios list out to per-thin deferred_bios_list -- doing so
enables increased parallelism when processing deferred bios. And now
that each thin device has it's own deferred_bios_list we can sort all
bios in the list using logical sector. The requeue code in error
handling path is also cleaner as a side-effect.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-thin.c | 165 |
1 files changed, 104 insertions, 61 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af871fd48eb4..08e62aef361d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/dm-io.h> | 12 | #include <linux/dm-io.h> |
13 | #include <linux/dm-kcopyd.h> | 13 | #include <linux/dm-kcopyd.h> |
14 | #include <linux/list.h> | 14 | #include <linux/list.h> |
15 | #include <linux/rculist.h> | ||
15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
16 | #include <linux/module.h> | 17 | #include <linux/module.h> |
17 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
@@ -178,12 +179,10 @@ struct pool { | |||
178 | unsigned ref_count; | 179 | unsigned ref_count; |
179 | 180 | ||
180 | spinlock_t lock; | 181 | spinlock_t lock; |
181 | struct bio_list deferred_bios; | ||
182 | struct bio_list deferred_flush_bios; | 182 | struct bio_list deferred_flush_bios; |
183 | struct list_head prepared_mappings; | 183 | struct list_head prepared_mappings; |
184 | struct list_head prepared_discards; | 184 | struct list_head prepared_discards; |
185 | 185 | struct list_head active_thins; | |
186 | struct bio_list retry_on_resume_list; | ||
187 | 186 | ||
188 | struct dm_deferred_set *shared_read_ds; | 187 | struct dm_deferred_set *shared_read_ds; |
189 | struct dm_deferred_set *all_io_ds; | 188 | struct dm_deferred_set *all_io_ds; |
@@ -220,6 +219,7 @@ struct pool_c { | |||
220 | * Target context for a thin. | 219 | * Target context for a thin. |
221 | */ | 220 | */ |
222 | struct thin_c { | 221 | struct thin_c { |
222 | struct list_head list; | ||
223 | struct dm_dev *pool_dev; | 223 | struct dm_dev *pool_dev; |
224 | struct dm_dev *origin_dev; | 224 | struct dm_dev *origin_dev; |
225 | dm_thin_id dev_id; | 225 | dm_thin_id dev_id; |
@@ -227,6 +227,9 @@ struct thin_c { | |||
227 | struct pool *pool; | 227 | struct pool *pool; |
228 | struct dm_thin_device *td; | 228 | struct dm_thin_device *td; |
229 | bool requeue_mode:1; | 229 | bool requeue_mode:1; |
230 | spinlock_t lock; | ||
231 | struct bio_list deferred_bio_list; | ||
232 | struct bio_list retry_on_resume_list; | ||
230 | }; | 233 | }; |
231 | 234 | ||
232 | /*----------------------------------------------------------------*/ | 235 | /*----------------------------------------------------------------*/ |
@@ -287,9 +290,9 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc, | |||
287 | struct pool *pool = tc->pool; | 290 | struct pool *pool = tc->pool; |
288 | unsigned long flags; | 291 | unsigned long flags; |
289 | 292 | ||
290 | spin_lock_irqsave(&pool->lock, flags); | 293 | spin_lock_irqsave(&tc->lock, flags); |
291 | dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios); | 294 | dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list); |
292 | spin_unlock_irqrestore(&pool->lock, flags); | 295 | spin_unlock_irqrestore(&tc->lock, flags); |
293 | 296 | ||
294 | wake_worker(pool); | 297 | wake_worker(pool); |
295 | } | 298 | } |
@@ -378,30 +381,22 @@ static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) | |||
378 | 381 | ||
379 | bio_list_init(&bios); | 382 | bio_list_init(&bios); |
380 | 383 | ||
381 | spin_lock_irqsave(&tc->pool->lock, flags); | 384 | spin_lock_irqsave(&tc->lock, flags); |
382 | bio_list_merge(&bios, master); | 385 | bio_list_merge(&bios, master); |
383 | bio_list_init(master); | 386 | bio_list_init(master); |
384 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 387 | spin_unlock_irqrestore(&tc->lock, flags); |
385 | 388 | ||
386 | while ((bio = bio_list_pop(&bios))) { | 389 | while ((bio = bio_list_pop(&bios))) |
387 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 390 | bio_endio(bio, DM_ENDIO_REQUEUE); |
388 | |||
389 | if (h->tc == tc) | ||
390 | bio_endio(bio, DM_ENDIO_REQUEUE); | ||
391 | else | ||
392 | bio_list_add(master, bio); | ||
393 | } | ||
394 | } | 391 | } |
395 | 392 | ||
396 | static void requeue_io(struct thin_c *tc) | 393 | static void requeue_io(struct thin_c *tc) |
397 | { | 394 | { |
398 | struct pool *pool = tc->pool; | 395 | requeue_bio_list(tc, &tc->deferred_bio_list); |
399 | 396 | requeue_bio_list(tc, &tc->retry_on_resume_list); | |
400 | requeue_bio_list(tc, &pool->deferred_bios); | ||
401 | requeue_bio_list(tc, &pool->retry_on_resume_list); | ||
402 | } | 397 | } |
403 | 398 | ||
404 | static void error_retry_list(struct pool *pool) | 399 | static void error_thin_retry_list(struct thin_c *tc) |
405 | { | 400 | { |
406 | struct bio *bio; | 401 | struct bio *bio; |
407 | unsigned long flags; | 402 | unsigned long flags; |
@@ -409,15 +404,25 @@ static void error_retry_list(struct pool *pool) | |||
409 | 404 | ||
410 | bio_list_init(&bios); | 405 | bio_list_init(&bios); |
411 | 406 | ||
412 | spin_lock_irqsave(&pool->lock, flags); | 407 | spin_lock_irqsave(&tc->lock, flags); |
413 | bio_list_merge(&bios, &pool->retry_on_resume_list); | 408 | bio_list_merge(&bios, &tc->retry_on_resume_list); |
414 | bio_list_init(&pool->retry_on_resume_list); | 409 | bio_list_init(&tc->retry_on_resume_list); |
415 | spin_unlock_irqrestore(&pool->lock, flags); | 410 | spin_unlock_irqrestore(&tc->lock, flags); |
416 | 411 | ||
417 | while ((bio = bio_list_pop(&bios))) | 412 | while ((bio = bio_list_pop(&bios))) |
418 | bio_io_error(bio); | 413 | bio_io_error(bio); |
419 | } | 414 | } |
420 | 415 | ||
416 | static void error_retry_list(struct pool *pool) | ||
417 | { | ||
418 | struct thin_c *tc; | ||
419 | |||
420 | rcu_read_lock(); | ||
421 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | ||
422 | error_thin_retry_list(tc); | ||
423 | rcu_read_unlock(); | ||
424 | } | ||
425 | |||
421 | /* | 426 | /* |
422 | * This section of code contains the logic for processing a thin device's IO. | 427 | * This section of code contains the logic for processing a thin device's IO. |
423 | * Much of the code depends on pool object resources (lists, workqueues, etc) | 428 | * Much of the code depends on pool object resources (lists, workqueues, etc) |
@@ -608,9 +613,9 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) | |||
608 | struct pool *pool = tc->pool; | 613 | struct pool *pool = tc->pool; |
609 | unsigned long flags; | 614 | unsigned long flags; |
610 | 615 | ||
611 | spin_lock_irqsave(&pool->lock, flags); | 616 | spin_lock_irqsave(&tc->lock, flags); |
612 | cell_release(pool, cell, &pool->deferred_bios); | 617 | cell_release(pool, cell, &tc->deferred_bio_list); |
613 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 618 | spin_unlock_irqrestore(&tc->lock, flags); |
614 | 619 | ||
615 | wake_worker(pool); | 620 | wake_worker(pool); |
616 | } | 621 | } |
@@ -623,9 +628,9 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c | |||
623 | struct pool *pool = tc->pool; | 628 | struct pool *pool = tc->pool; |
624 | unsigned long flags; | 629 | unsigned long flags; |
625 | 630 | ||
626 | spin_lock_irqsave(&pool->lock, flags); | 631 | spin_lock_irqsave(&tc->lock, flags); |
627 | cell_release_no_holder(pool, cell, &pool->deferred_bios); | 632 | cell_release_no_holder(pool, cell, &tc->deferred_bio_list); |
628 | spin_unlock_irqrestore(&pool->lock, flags); | 633 | spin_unlock_irqrestore(&tc->lock, flags); |
629 | 634 | ||
630 | wake_worker(pool); | 635 | wake_worker(pool); |
631 | } | 636 | } |
@@ -1001,12 +1006,11 @@ static void retry_on_resume(struct bio *bio) | |||
1001 | { | 1006 | { |
1002 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1007 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
1003 | struct thin_c *tc = h->tc; | 1008 | struct thin_c *tc = h->tc; |
1004 | struct pool *pool = tc->pool; | ||
1005 | unsigned long flags; | 1009 | unsigned long flags; |
1006 | 1010 | ||
1007 | spin_lock_irqsave(&pool->lock, flags); | 1011 | spin_lock_irqsave(&tc->lock, flags); |
1008 | bio_list_add(&pool->retry_on_resume_list, bio); | 1012 | bio_list_add(&tc->retry_on_resume_list, bio); |
1009 | spin_unlock_irqrestore(&pool->lock, flags); | 1013 | spin_unlock_irqrestore(&tc->lock, flags); |
1010 | } | 1014 | } |
1011 | 1015 | ||
1012 | static bool should_error_unserviceable_bio(struct pool *pool) | 1016 | static bool should_error_unserviceable_bio(struct pool *pool) |
@@ -1363,38 +1367,36 @@ static int need_commit_due_to_time(struct pool *pool) | |||
1363 | jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; | 1367 | jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; |
1364 | } | 1368 | } |
1365 | 1369 | ||
1366 | static void process_deferred_bios(struct pool *pool) | 1370 | static void process_thin_deferred_bios(struct thin_c *tc) |
1367 | { | 1371 | { |
1372 | struct pool *pool = tc->pool; | ||
1368 | unsigned long flags; | 1373 | unsigned long flags; |
1369 | struct bio *bio; | 1374 | struct bio *bio; |
1370 | struct bio_list bios; | 1375 | struct bio_list bios; |
1371 | 1376 | ||
1377 | if (tc->requeue_mode) { | ||
1378 | requeue_bio_list(tc, &tc->deferred_bio_list); | ||
1379 | return; | ||
1380 | } | ||
1381 | |||
1372 | bio_list_init(&bios); | 1382 | bio_list_init(&bios); |
1373 | 1383 | ||
1374 | spin_lock_irqsave(&pool->lock, flags); | 1384 | spin_lock_irqsave(&tc->lock, flags); |
1375 | bio_list_merge(&bios, &pool->deferred_bios); | 1385 | bio_list_merge(&bios, &tc->deferred_bio_list); |
1376 | bio_list_init(&pool->deferred_bios); | 1386 | bio_list_init(&tc->deferred_bio_list); |
1377 | spin_unlock_irqrestore(&pool->lock, flags); | 1387 | spin_unlock_irqrestore(&tc->lock, flags); |
1378 | 1388 | ||
1379 | while ((bio = bio_list_pop(&bios))) { | 1389 | while ((bio = bio_list_pop(&bios))) { |
1380 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | ||
1381 | struct thin_c *tc = h->tc; | ||
1382 | |||
1383 | if (tc->requeue_mode) { | ||
1384 | bio_endio(bio, DM_ENDIO_REQUEUE); | ||
1385 | continue; | ||
1386 | } | ||
1387 | |||
1388 | /* | 1390 | /* |
1389 | * If we've got no free new_mapping structs, and processing | 1391 | * If we've got no free new_mapping structs, and processing |
1390 | * this bio might require one, we pause until there are some | 1392 | * this bio might require one, we pause until there are some |
1391 | * prepared mappings to process. | 1393 | * prepared mappings to process. |
1392 | */ | 1394 | */ |
1393 | if (ensure_next_mapping(pool)) { | 1395 | if (ensure_next_mapping(pool)) { |
1394 | spin_lock_irqsave(&pool->lock, flags); | 1396 | spin_lock_irqsave(&tc->lock, flags); |
1395 | bio_list_add(&pool->deferred_bios, bio); | 1397 | bio_list_add(&tc->deferred_bio_list, bio); |
1396 | bio_list_merge(&pool->deferred_bios, &bios); | 1398 | bio_list_merge(&tc->deferred_bio_list, &bios); |
1397 | spin_unlock_irqrestore(&pool->lock, flags); | 1399 | spin_unlock_irqrestore(&tc->lock, flags); |
1398 | break; | 1400 | break; |
1399 | } | 1401 | } |
1400 | 1402 | ||
@@ -1403,6 +1405,19 @@ static void process_deferred_bios(struct pool *pool) | |||
1403 | else | 1405 | else |
1404 | pool->process_bio(tc, bio); | 1406 | pool->process_bio(tc, bio); |
1405 | } | 1407 | } |
1408 | } | ||
1409 | |||
1410 | static void process_deferred_bios(struct pool *pool) | ||
1411 | { | ||
1412 | unsigned long flags; | ||
1413 | struct bio *bio; | ||
1414 | struct bio_list bios; | ||
1415 | struct thin_c *tc; | ||
1416 | |||
1417 | rcu_read_lock(); | ||
1418 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | ||
1419 | process_thin_deferred_bios(tc); | ||
1420 | rcu_read_unlock(); | ||
1406 | 1421 | ||
1407 | /* | 1422 | /* |
1408 | * If there are any deferred flush bios, we must commit | 1423 | * If there are any deferred flush bios, we must commit |
@@ -1634,9 +1649,9 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) | |||
1634 | unsigned long flags; | 1649 | unsigned long flags; |
1635 | struct pool *pool = tc->pool; | 1650 | struct pool *pool = tc->pool; |
1636 | 1651 | ||
1637 | spin_lock_irqsave(&pool->lock, flags); | 1652 | spin_lock_irqsave(&tc->lock, flags); |
1638 | bio_list_add(&pool->deferred_bios, bio); | 1653 | bio_list_add(&tc->deferred_bio_list, bio); |
1639 | spin_unlock_irqrestore(&pool->lock, flags); | 1654 | spin_unlock_irqrestore(&tc->lock, flags); |
1640 | 1655 | ||
1641 | wake_worker(pool); | 1656 | wake_worker(pool); |
1642 | } | 1657 | } |
@@ -1767,10 +1782,19 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) | |||
1767 | return bdi_congested(&q->backing_dev_info, bdi_bits); | 1782 | return bdi_congested(&q->backing_dev_info, bdi_bits); |
1768 | } | 1783 | } |
1769 | 1784 | ||
1770 | static void __requeue_bios(struct pool *pool) | 1785 | static void requeue_bios(struct pool *pool) |
1771 | { | 1786 | { |
1772 | bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list); | 1787 | unsigned long flags; |
1773 | bio_list_init(&pool->retry_on_resume_list); | 1788 | struct thin_c *tc; |
1789 | |||
1790 | rcu_read_lock(); | ||
1791 | list_for_each_entry_rcu(tc, &pool->active_thins, list) { | ||
1792 | spin_lock_irqsave(&tc->lock, flags); | ||
1793 | bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list); | ||
1794 | bio_list_init(&tc->retry_on_resume_list); | ||
1795 | spin_unlock_irqrestore(&tc->lock, flags); | ||
1796 | } | ||
1797 | rcu_read_unlock(); | ||
1774 | } | 1798 | } |
1775 | 1799 | ||
1776 | /*---------------------------------------------------------------- | 1800 | /*---------------------------------------------------------------- |
@@ -1951,12 +1975,11 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1951 | INIT_WORK(&pool->worker, do_worker); | 1975 | INIT_WORK(&pool->worker, do_worker); |
1952 | INIT_DELAYED_WORK(&pool->waker, do_waker); | 1976 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
1953 | spin_lock_init(&pool->lock); | 1977 | spin_lock_init(&pool->lock); |
1954 | bio_list_init(&pool->deferred_bios); | ||
1955 | bio_list_init(&pool->deferred_flush_bios); | 1978 | bio_list_init(&pool->deferred_flush_bios); |
1956 | INIT_LIST_HEAD(&pool->prepared_mappings); | 1979 | INIT_LIST_HEAD(&pool->prepared_mappings); |
1957 | INIT_LIST_HEAD(&pool->prepared_discards); | 1980 | INIT_LIST_HEAD(&pool->prepared_discards); |
1981 | INIT_LIST_HEAD(&pool->active_thins); | ||
1958 | pool->low_water_triggered = false; | 1982 | pool->low_water_triggered = false; |
1959 | bio_list_init(&pool->retry_on_resume_list); | ||
1960 | 1983 | ||
1961 | pool->shared_read_ds = dm_deferred_set_create(); | 1984 | pool->shared_read_ds = dm_deferred_set_create(); |
1962 | if (!pool->shared_read_ds) { | 1985 | if (!pool->shared_read_ds) { |
@@ -2501,8 +2524,8 @@ static void pool_resume(struct dm_target *ti) | |||
2501 | 2524 | ||
2502 | spin_lock_irqsave(&pool->lock, flags); | 2525 | spin_lock_irqsave(&pool->lock, flags); |
2503 | pool->low_water_triggered = false; | 2526 | pool->low_water_triggered = false; |
2504 | __requeue_bios(pool); | ||
2505 | spin_unlock_irqrestore(&pool->lock, flags); | 2527 | spin_unlock_irqrestore(&pool->lock, flags); |
2528 | requeue_bios(pool); | ||
2506 | 2529 | ||
2507 | do_waker(&pool->waker.work); | 2530 | do_waker(&pool->waker.work); |
2508 | } | 2531 | } |
@@ -2962,6 +2985,12 @@ static struct target_type pool_target = { | |||
2962 | static void thin_dtr(struct dm_target *ti) | 2985 | static void thin_dtr(struct dm_target *ti) |
2963 | { | 2986 | { |
2964 | struct thin_c *tc = ti->private; | 2987 | struct thin_c *tc = ti->private; |
2988 | unsigned long flags; | ||
2989 | |||
2990 | spin_lock_irqsave(&tc->pool->lock, flags); | ||
2991 | list_del_rcu(&tc->list); | ||
2992 | spin_unlock_irqrestore(&tc->pool->lock, flags); | ||
2993 | synchronize_rcu(); | ||
2965 | 2994 | ||
2966 | mutex_lock(&dm_thin_pool_table.mutex); | 2995 | mutex_lock(&dm_thin_pool_table.mutex); |
2967 | 2996 | ||
@@ -3008,6 +3037,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3008 | r = -ENOMEM; | 3037 | r = -ENOMEM; |
3009 | goto out_unlock; | 3038 | goto out_unlock; |
3010 | } | 3039 | } |
3040 | spin_lock_init(&tc->lock); | ||
3041 | bio_list_init(&tc->deferred_bio_list); | ||
3042 | bio_list_init(&tc->retry_on_resume_list); | ||
3011 | 3043 | ||
3012 | if (argc == 3) { | 3044 | if (argc == 3) { |
3013 | r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); | 3045 | r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); |
@@ -3079,6 +3111,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3079 | 3111 | ||
3080 | mutex_unlock(&dm_thin_pool_table.mutex); | 3112 | mutex_unlock(&dm_thin_pool_table.mutex); |
3081 | 3113 | ||
3114 | spin_lock(&tc->pool->lock); | ||
3115 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | ||
3116 | spin_unlock(&tc->pool->lock); | ||
3117 | /* | ||
3118 | * This synchronize_rcu() call is needed here otherwise we risk a | ||
3119 | * wake_worker() call finding no bios to process (because the newly | ||
3120 | * added tc isn't yet visible). So this reduces latency since we | ||
3121 | * aren't then dependent on the periodic commit to wake_worker(). | ||
3122 | */ | ||
3123 | synchronize_rcu(); | ||
3124 | |||
3082 | return 0; | 3125 | return 0; |
3083 | 3126 | ||
3084 | bad_target_max_io_len: | 3127 | bad_target_max_io_len: |