aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm-thin.c147
1 files changed, 102 insertions, 45 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index a04eba905922..38a063f7afa4 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -130,10 +130,11 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
130struct dm_thin_new_mapping; 130struct dm_thin_new_mapping;
131 131
132/* 132/*
133 * The pool runs in 3 modes. Ordered in degraded order for comparisons. 133 * The pool runs in 4 modes. Ordered in degraded order for comparisons.
134 */ 134 */
135enum pool_mode { 135enum pool_mode {
136 PM_WRITE, /* metadata may be changed */ 136 PM_WRITE, /* metadata may be changed */
137 PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
137 PM_READ_ONLY, /* metadata may not be changed */ 138 PM_READ_ONLY, /* metadata may not be changed */
138 PM_FAIL, /* all I/O fails */ 139 PM_FAIL, /* all I/O fails */
139}; 140};
@@ -198,7 +199,6 @@ struct pool {
198}; 199};
199 200
200static enum pool_mode get_pool_mode(struct pool *pool); 201static enum pool_mode get_pool_mode(struct pool *pool);
201static void out_of_data_space(struct pool *pool);
202static void metadata_operation_failed(struct pool *pool, const char *op, int r); 202static void metadata_operation_failed(struct pool *pool, const char *op, int r);
203 203
204/* 204/*
@@ -399,6 +399,23 @@ static void requeue_io(struct thin_c *tc)
399 spin_unlock_irqrestore(&pool->lock, flags); 399 spin_unlock_irqrestore(&pool->lock, flags);
400} 400}
401 401
402static void error_retry_list(struct pool *pool)
403{
404 struct bio *bio;
405 unsigned long flags;
406 struct bio_list bios;
407
408 bio_list_init(&bios);
409
410 spin_lock_irqsave(&pool->lock, flags);
411 bio_list_merge(&bios, &pool->retry_on_resume_list);
412 bio_list_init(&pool->retry_on_resume_list);
413 spin_unlock_irqrestore(&pool->lock, flags);
414
415 while ((bio = bio_list_pop(&bios)))
416 bio_io_error(bio);
417}
418
402/* 419/*
403 * This section of code contains the logic for processing a thin device's IO. 420 * This section of code contains the logic for processing a thin device's IO.
404 * Much of the code depends on pool object resources (lists, workqueues, etc) 421 * Much of the code depends on pool object resources (lists, workqueues, etc)
@@ -925,13 +942,15 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
925 } 942 }
926} 943}
927 944
945static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
946
928static int alloc_data_block(struct thin_c *tc, dm_block_t *result) 947static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
929{ 948{
930 int r; 949 int r;
931 dm_block_t free_blocks; 950 dm_block_t free_blocks;
932 struct pool *pool = tc->pool; 951 struct pool *pool = tc->pool;
933 952
934 if (get_pool_mode(pool) != PM_WRITE) 953 if (WARN_ON(get_pool_mode(pool) != PM_WRITE))
935 return -EINVAL; 954 return -EINVAL;
936 955
937 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); 956 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
@@ -958,7 +977,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
958 } 977 }
959 978
960 if (!free_blocks) { 979 if (!free_blocks) {
961 out_of_data_space(pool); 980 set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
962 return -ENOSPC; 981 return -ENOSPC;
963 } 982 }
964 } 983 }
@@ -988,15 +1007,32 @@ static void retry_on_resume(struct bio *bio)
988 spin_unlock_irqrestore(&pool->lock, flags); 1007 spin_unlock_irqrestore(&pool->lock, flags);
989} 1008}
990 1009
991static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) 1010static bool should_error_unserviceable_bio(struct pool *pool)
992{ 1011{
993 /* 1012 enum pool_mode m = get_pool_mode(pool);
994 * When pool is read-only, no cell locking is needed because 1013
995 * nothing is changing. 1014 switch (m) {
996 */ 1015 case PM_WRITE:
997 WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); 1016 /* Shouldn't get here */
1017 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
1018 return true;
1019
1020 case PM_OUT_OF_DATA_SPACE:
1021 return pool->pf.error_if_no_space;
1022
1023 case PM_READ_ONLY:
1024 case PM_FAIL:
1025 return true;
1026 default:
1027 /* Shouldn't get here */
1028 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
1029 return true;
1030 }
1031}
998 1032
999 if (pool->pf.error_if_no_space) 1033static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
1034{
1035 if (should_error_unserviceable_bio(pool))
1000 bio_io_error(bio); 1036 bio_io_error(bio);
1001 else 1037 else
1002 retry_on_resume(bio); 1038 retry_on_resume(bio);
@@ -1007,11 +1043,20 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
1007 struct bio *bio; 1043 struct bio *bio;
1008 struct bio_list bios; 1044 struct bio_list bios;
1009 1045
1046 if (should_error_unserviceable_bio(pool)) {
1047 cell_error(pool, cell);
1048 return;
1049 }
1050
1010 bio_list_init(&bios); 1051 bio_list_init(&bios);
1011 cell_release(pool, cell, &bios); 1052 cell_release(pool, cell, &bios);
1012 1053
1013 while ((bio = bio_list_pop(&bios))) 1054 if (should_error_unserviceable_bio(pool))
1014 handle_unserviceable_bio(pool, bio); 1055 while ((bio = bio_list_pop(&bios)))
1056 bio_io_error(bio);
1057 else
1058 while ((bio = bio_list_pop(&bios)))
1059 retry_on_resume(bio);
1015} 1060}
1016 1061
1017static void process_discard(struct thin_c *tc, struct bio *bio) 1062static void process_discard(struct thin_c *tc, struct bio *bio)
@@ -1296,6 +1341,11 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
1296 } 1341 }
1297} 1342}
1298 1343
1344static void process_bio_success(struct thin_c *tc, struct bio *bio)
1345{
1346 bio_endio(bio, 0);
1347}
1348
1299static void process_bio_fail(struct thin_c *tc, struct bio *bio) 1349static void process_bio_fail(struct thin_c *tc, struct bio *bio)
1300{ 1350{
1301 bio_io_error(bio); 1351 bio_io_error(bio);
@@ -1399,9 +1449,15 @@ static enum pool_mode get_pool_mode(struct pool *pool)
1399 return pool->pf.mode; 1449 return pool->pf.mode;
1400} 1450}
1401 1451
1452static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
1453{
1454 dm_table_event(pool->ti->table);
1455 DMINFO("%s: switching pool to %s mode",
1456 dm_device_name(pool->pool_md), new_mode);
1457}
1458
1402static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) 1459static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
1403{ 1460{
1404 int r;
1405 struct pool_c *pt = pool->ti->private; 1461 struct pool_c *pt = pool->ti->private;
1406 bool needs_check = dm_pool_metadata_needs_check(pool->pmd); 1462 bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
1407 enum pool_mode old_mode = get_pool_mode(pool); 1463 enum pool_mode old_mode = get_pool_mode(pool);
@@ -1429,38 +1485,48 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
1429 switch (new_mode) { 1485 switch (new_mode) {
1430 case PM_FAIL: 1486 case PM_FAIL:
1431 if (old_mode != new_mode) 1487 if (old_mode != new_mode)
1432 DMERR("%s: switching pool to failure mode", 1488 notify_of_pool_mode_change(pool, "failure");
1433 dm_device_name(pool->pool_md));
1434 dm_pool_metadata_read_only(pool->pmd); 1489 dm_pool_metadata_read_only(pool->pmd);
1435 pool->process_bio = process_bio_fail; 1490 pool->process_bio = process_bio_fail;
1436 pool->process_discard = process_bio_fail; 1491 pool->process_discard = process_bio_fail;
1437 pool->process_prepared_mapping = process_prepared_mapping_fail; 1492 pool->process_prepared_mapping = process_prepared_mapping_fail;
1438 pool->process_prepared_discard = process_prepared_discard_fail; 1493 pool->process_prepared_discard = process_prepared_discard_fail;
1494
1495 error_retry_list(pool);
1439 break; 1496 break;
1440 1497
1441 case PM_READ_ONLY: 1498 case PM_READ_ONLY:
1442 if (old_mode != new_mode) 1499 if (old_mode != new_mode)
1443 DMERR("%s: switching pool to read-only mode", 1500 notify_of_pool_mode_change(pool, "read-only");
1444 dm_device_name(pool->pool_md)); 1501 dm_pool_metadata_read_only(pool->pmd);
1445 r = dm_pool_abort_metadata(pool->pmd); 1502 pool->process_bio = process_bio_read_only;
1446 if (r) { 1503 pool->process_discard = process_bio_success;
1447 DMERR("%s: aborting transaction failed", 1504 pool->process_prepared_mapping = process_prepared_mapping_fail;
1448 dm_device_name(pool->pool_md)); 1505 pool->process_prepared_discard = process_prepared_discard_passdown;
1449 new_mode = PM_FAIL; 1506
1450 set_pool_mode(pool, new_mode); 1507 error_retry_list(pool);
1451 } else { 1508 break;
1452 dm_pool_metadata_read_only(pool->pmd); 1509
1453 pool->process_bio = process_bio_read_only; 1510 case PM_OUT_OF_DATA_SPACE:
1454 pool->process_discard = process_discard; 1511 /*
1455 pool->process_prepared_mapping = process_prepared_mapping_fail; 1512 * Ideally we'd never hit this state; the low water mark
1456 pool->process_prepared_discard = process_prepared_discard_passdown; 1513 * would trigger userland to extend the pool before we
1457 } 1514 * completely run out of data space. However, many small
1515 * IOs to unprovisioned space can consume data space at an
1516 * alarming rate. Adjust your low water mark if you're
1517 * frequently seeing this mode.
1518 */
1519 if (old_mode != new_mode)
1520 notify_of_pool_mode_change(pool, "out-of-data-space");
1521 pool->process_bio = process_bio_read_only;
1522 pool->process_discard = process_discard;
1523 pool->process_prepared_mapping = process_prepared_mapping;
1524 pool->process_prepared_discard = process_prepared_discard_passdown;
1458 break; 1525 break;
1459 1526
1460 case PM_WRITE: 1527 case PM_WRITE:
1461 if (old_mode != new_mode) 1528 if (old_mode != new_mode)
1462 DMINFO("%s: switching pool to write mode", 1529 notify_of_pool_mode_change(pool, "write");
1463 dm_device_name(pool->pool_md));
1464 dm_pool_metadata_read_write(pool->pmd); 1530 dm_pool_metadata_read_write(pool->pmd);
1465 pool->process_bio = process_bio; 1531 pool->process_bio = process_bio;
1466 pool->process_discard = process_discard; 1532 pool->process_discard = process_discard;
@@ -1477,17 +1543,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
1477 pt->adjusted_pf.mode = new_mode; 1543 pt->adjusted_pf.mode = new_mode;
1478} 1544}
1479 1545
1480/*
1481 * Rather than calling set_pool_mode directly, use these which describe the
1482 * reason for mode degradation.
1483 */
1484static void out_of_data_space(struct pool *pool)
1485{
1486 DMERR_LIMIT("%s: no free data space available.",
1487 dm_device_name(pool->pool_md));
1488 set_pool_mode(pool, PM_READ_ONLY);
1489}
1490
1491static void abort_transaction(struct pool *pool) 1546static void abort_transaction(struct pool *pool)
1492{ 1547{
1493 const char *dev_name = dm_device_name(pool->pool_md); 1548 const char *dev_name = dm_device_name(pool->pool_md);
@@ -2719,7 +2774,9 @@ static void pool_status(struct dm_target *ti, status_type_t type,
2719 else 2774 else
2720 DMEMIT("- "); 2775 DMEMIT("- ");
2721 2776
2722 if (pool->pf.mode == PM_READ_ONLY) 2777 if (pool->pf.mode == PM_OUT_OF_DATA_SPACE)
2778 DMEMIT("out_of_data_space ");
2779 else if (pool->pf.mode == PM_READ_ONLY)
2723 DMEMIT("ro "); 2780 DMEMIT("ro ");
2724 else 2781 else
2725 DMEMIT("rw "); 2782 DMEMIT("rw ");