aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2014-03-21 18:33:41 -0400
committerMike Snitzer <snitzer@redhat.com>2014-04-04 14:53:03 -0400
commit67324ea18812bc952ef96892fbd5817b9050413f (patch)
tree00c8f83280e10b94f3d4cbdf15d468b74cc6438a
parentc140e1c4e23bdaf0a5c00b6a8b6d18f259d39a00 (diff)
dm thin: sort the per thin deferred bios using an rb_tree
A thin-pool will allocate blocks using FIFO order for all thin devices which share the thin-pool. Because of this simplistic allocation the thin-pool's space can become fragmented quite easily; especially when multiple threads are requesting blocks in parallel. Sort each thin device's deferred_bio_list based on logical sector to help reduce fragmentation of the thin-pool's ondisk layout. The following tables illustrate the realized gains/potential offered by sorting each thin device's deferred_bio_list. An "io size"-sized random read of the device would result in "seeks/io" fragments being read, with an average "distance/seek" between each fragment. Data was written to a single thin device using multiple threads via iozone (8 threads, 64K for both the block_size and io_size). unsorted: io size seeks/io distance/seek -------------------------------------- 4k 0.000 0b 16k 0.013 11m 64k 0.065 11m 256k 0.274 10m 1m 1.109 10m 4m 4.411 10m 16m 17.097 11m 64m 60.055 13m 256m 148.798 25m 1g 809.929 21m sorted: io size seeks/io distance/seek -------------------------------------- 4k 0.000 0b 16k 0.000 1g 64k 0.001 1g 256k 0.003 1g 1m 0.011 1g 4m 0.045 1g 16m 0.181 1g 64m 0.747 1011m 256m 3.299 1g 1g 14.373 1g Signed-off-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Joe Thornber <ejt@redhat.com>
-rw-r--r--drivers/md/dm-thin.c84
1 files changed, 82 insertions, 2 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 08e62aef361d..53728be84dee 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -16,6 +16,7 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/rbtree.h>
19 20
20#define DM_MSG_PREFIX "thin" 21#define DM_MSG_PREFIX "thin"
21 22
@@ -230,6 +231,7 @@ struct thin_c {
230 spinlock_t lock; 231 spinlock_t lock;
231 struct bio_list deferred_bio_list; 232 struct bio_list deferred_bio_list;
232 struct bio_list retry_on_resume_list; 233 struct bio_list retry_on_resume_list;
234 struct rb_root sort_bio_list; /* sorted list of deferred bios */
233}; 235};
234 236
235/*----------------------------------------------------------------*/ 237/*----------------------------------------------------------------*/
@@ -371,6 +373,7 @@ struct dm_thin_endio_hook {
371 struct dm_deferred_entry *shared_read_entry; 373 struct dm_deferred_entry *shared_read_entry;
372 struct dm_deferred_entry *all_io_entry; 374 struct dm_deferred_entry *all_io_entry;
373 struct dm_thin_new_mapping *overwrite_mapping; 375 struct dm_thin_new_mapping *overwrite_mapping;
376 struct rb_node rb_node;
374}; 377};
375 378
376static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) 379static void requeue_bio_list(struct thin_c *tc, struct bio_list *master)
@@ -1367,12 +1370,77 @@ static int need_commit_due_to_time(struct pool *pool)
1367 jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; 1370 jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
1368} 1371}
1369 1372
1373#define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node)
1374#define thin_bio(pbd) dm_bio_from_per_bio_data((pbd), sizeof(struct dm_thin_endio_hook))
1375
1376static void __thin_bio_rb_add(struct thin_c *tc, struct bio *bio)
1377{
1378 struct rb_node **rbp, *parent;
1379 struct dm_thin_endio_hook *pbd;
1380 sector_t bi_sector = bio->bi_iter.bi_sector;
1381
1382 rbp = &tc->sort_bio_list.rb_node;
1383 parent = NULL;
1384 while (*rbp) {
1385 parent = *rbp;
1386 pbd = thin_pbd(parent);
1387
1388 if (bi_sector < thin_bio(pbd)->bi_iter.bi_sector)
1389 rbp = &(*rbp)->rb_left;
1390 else
1391 rbp = &(*rbp)->rb_right;
1392 }
1393
1394 pbd = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1395 rb_link_node(&pbd->rb_node, parent, rbp);
1396 rb_insert_color(&pbd->rb_node, &tc->sort_bio_list);
1397}
1398
1399static void __extract_sorted_bios(struct thin_c *tc)
1400{
1401 struct rb_node *node;
1402 struct dm_thin_endio_hook *pbd;
1403 struct bio *bio;
1404
1405 for (node = rb_first(&tc->sort_bio_list); node; node = rb_next(node)) {
1406 pbd = thin_pbd(node);
1407 bio = thin_bio(pbd);
1408
1409 bio_list_add(&tc->deferred_bio_list, bio);
1410 rb_erase(&pbd->rb_node, &tc->sort_bio_list);
1411 }
1412
1413 WARN_ON(!RB_EMPTY_ROOT(&tc->sort_bio_list));
1414}
1415
1416static void __sort_thin_deferred_bios(struct thin_c *tc)
1417{
1418 struct bio *bio;
1419 struct bio_list bios;
1420
1421 bio_list_init(&bios);
1422 bio_list_merge(&bios, &tc->deferred_bio_list);
1423 bio_list_init(&tc->deferred_bio_list);
1424
1425 /* Sort deferred_bio_list using rb-tree */
1426 while ((bio = bio_list_pop(&bios)))
1427 __thin_bio_rb_add(tc, bio);
1428
1429 /*
1430 * Transfer the sorted bios in sort_bio_list back to
1431 * deferred_bio_list to allow lockless submission of
1432 * all bios.
1433 */
1434 __extract_sorted_bios(tc);
1435}
1436
1370static void process_thin_deferred_bios(struct thin_c *tc) 1437static void process_thin_deferred_bios(struct thin_c *tc)
1371{ 1438{
1372 struct pool *pool = tc->pool; 1439 struct pool *pool = tc->pool;
1373 unsigned long flags; 1440 unsigned long flags;
1374 struct bio *bio; 1441 struct bio *bio;
1375 struct bio_list bios; 1442 struct bio_list bios;
1443 struct blk_plug plug;
1376 1444
1377 if (tc->requeue_mode) { 1445 if (tc->requeue_mode) {
1378 requeue_bio_list(tc, &tc->deferred_bio_list); 1446 requeue_bio_list(tc, &tc->deferred_bio_list);
@@ -1382,10 +1450,20 @@ static void process_thin_deferred_bios(struct thin_c *tc)
1382 bio_list_init(&bios); 1450 bio_list_init(&bios);
1383 1451
1384 spin_lock_irqsave(&tc->lock, flags); 1452 spin_lock_irqsave(&tc->lock, flags);
1453
1454 if (bio_list_empty(&tc->deferred_bio_list)) {
1455 spin_unlock_irqrestore(&tc->lock, flags);
1456 return;
1457 }
1458
1459 __sort_thin_deferred_bios(tc);
1460
1385 bio_list_merge(&bios, &tc->deferred_bio_list); 1461 bio_list_merge(&bios, &tc->deferred_bio_list);
1386 bio_list_init(&tc->deferred_bio_list); 1462 bio_list_init(&tc->deferred_bio_list);
1463
1387 spin_unlock_irqrestore(&tc->lock, flags); 1464 spin_unlock_irqrestore(&tc->lock, flags);
1388 1465
1466 blk_start_plug(&plug);
1389 while ((bio = bio_list_pop(&bios))) { 1467 while ((bio = bio_list_pop(&bios))) {
1390 /* 1468 /*
1391 * If we've got no free new_mapping structs, and processing 1469 * If we've got no free new_mapping structs, and processing
@@ -1405,6 +1483,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
1405 else 1483 else
1406 pool->process_bio(tc, bio); 1484 pool->process_bio(tc, bio);
1407 } 1485 }
1486 blk_finish_plug(&plug);
1408} 1487}
1409 1488
1410static void process_deferred_bios(struct pool *pool) 1489static void process_deferred_bios(struct pool *pool)
@@ -2964,7 +3043,7 @@ static struct target_type pool_target = {
2964 .name = "thin-pool", 3043 .name = "thin-pool",
2965 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 3044 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
2966 DM_TARGET_IMMUTABLE, 3045 DM_TARGET_IMMUTABLE,
2967 .version = {1, 11, 0}, 3046 .version = {1, 12, 0},
2968 .module = THIS_MODULE, 3047 .module = THIS_MODULE,
2969 .ctr = pool_ctr, 3048 .ctr = pool_ctr,
2970 .dtr = pool_dtr, 3049 .dtr = pool_dtr,
@@ -3040,6 +3119,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
3040 spin_lock_init(&tc->lock); 3119 spin_lock_init(&tc->lock);
3041 bio_list_init(&tc->deferred_bio_list); 3120 bio_list_init(&tc->deferred_bio_list);
3042 bio_list_init(&tc->retry_on_resume_list); 3121 bio_list_init(&tc->retry_on_resume_list);
3122 tc->sort_bio_list = RB_ROOT;
3043 3123
3044 if (argc == 3) { 3124 if (argc == 3) {
3045 r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); 3125 r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
@@ -3287,7 +3367,7 @@ static int thin_iterate_devices(struct dm_target *ti,
3287 3367
3288static struct target_type thin_target = { 3368static struct target_type thin_target = {
3289 .name = "thin", 3369 .name = "thin",
3290 .version = {1, 11, 0}, 3370 .version = {1, 12, 0},
3291 .module = THIS_MODULE, 3371 .module = THIS_MODULE,
3292 .ctr = thin_ctr, 3372 .ctr = thin_ctr,
3293 .dtr = thin_dtr, 3373 .dtr = thin_dtr,