diff options
-rw-r--r-- | drivers/md/dm-thin.c | 147 |
1 files changed, 102 insertions, 45 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a04eba905922..38a063f7afa4 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -130,10 +130,11 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | |||
130 | struct dm_thin_new_mapping; | 130 | struct dm_thin_new_mapping; |
131 | 131 | ||
132 | /* | 132 | /* |
133 | * The pool runs in 3 modes. Ordered in degraded order for comparisons. | 133 | * The pool runs in 4 modes. Ordered in degraded order for comparisons. |
134 | */ | 134 | */ |
135 | enum pool_mode { | 135 | enum pool_mode { |
136 | PM_WRITE, /* metadata may be changed */ | 136 | PM_WRITE, /* metadata may be changed */ |
137 | PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ | ||
137 | PM_READ_ONLY, /* metadata may not be changed */ | 138 | PM_READ_ONLY, /* metadata may not be changed */ |
138 | PM_FAIL, /* all I/O fails */ | 139 | PM_FAIL, /* all I/O fails */ |
139 | }; | 140 | }; |
@@ -198,7 +199,6 @@ struct pool { | |||
198 | }; | 199 | }; |
199 | 200 | ||
200 | static enum pool_mode get_pool_mode(struct pool *pool); | 201 | static enum pool_mode get_pool_mode(struct pool *pool); |
201 | static void out_of_data_space(struct pool *pool); | ||
202 | static void metadata_operation_failed(struct pool *pool, const char *op, int r); | 202 | static void metadata_operation_failed(struct pool *pool, const char *op, int r); |
203 | 203 | ||
204 | /* | 204 | /* |
@@ -399,6 +399,23 @@ static void requeue_io(struct thin_c *tc) | |||
399 | spin_unlock_irqrestore(&pool->lock, flags); | 399 | spin_unlock_irqrestore(&pool->lock, flags); |
400 | } | 400 | } |
401 | 401 | ||
402 | static void error_retry_list(struct pool *pool) | ||
403 | { | ||
404 | struct bio *bio; | ||
405 | unsigned long flags; | ||
406 | struct bio_list bios; | ||
407 | |||
408 | bio_list_init(&bios); | ||
409 | |||
410 | spin_lock_irqsave(&pool->lock, flags); | ||
411 | bio_list_merge(&bios, &pool->retry_on_resume_list); | ||
412 | bio_list_init(&pool->retry_on_resume_list); | ||
413 | spin_unlock_irqrestore(&pool->lock, flags); | ||
414 | |||
415 | while ((bio = bio_list_pop(&bios))) | ||
416 | bio_io_error(bio); | ||
417 | } | ||
418 | |||
402 | /* | 419 | /* |
403 | * This section of code contains the logic for processing a thin device's IO. | 420 | * This section of code contains the logic for processing a thin device's IO. |
404 | * Much of the code depends on pool object resources (lists, workqueues, etc) | 421 | * Much of the code depends on pool object resources (lists, workqueues, etc) |
@@ -925,13 +942,15 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) | |||
925 | } | 942 | } |
926 | } | 943 | } |
927 | 944 | ||
945 | static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); | ||
946 | |||
928 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | 947 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) |
929 | { | 948 | { |
930 | int r; | 949 | int r; |
931 | dm_block_t free_blocks; | 950 | dm_block_t free_blocks; |
932 | struct pool *pool = tc->pool; | 951 | struct pool *pool = tc->pool; |
933 | 952 | ||
934 | if (get_pool_mode(pool) != PM_WRITE) | 953 | if (WARN_ON(get_pool_mode(pool) != PM_WRITE)) |
935 | return -EINVAL; | 954 | return -EINVAL; |
936 | 955 | ||
937 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); | 956 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); |
@@ -958,7 +977,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | |||
958 | } | 977 | } |
959 | 978 | ||
960 | if (!free_blocks) { | 979 | if (!free_blocks) { |
961 | out_of_data_space(pool); | 980 | set_pool_mode(pool, PM_OUT_OF_DATA_SPACE); |
962 | return -ENOSPC; | 981 | return -ENOSPC; |
963 | } | 982 | } |
964 | } | 983 | } |
@@ -988,15 +1007,32 @@ static void retry_on_resume(struct bio *bio) | |||
988 | spin_unlock_irqrestore(&pool->lock, flags); | 1007 | spin_unlock_irqrestore(&pool->lock, flags); |
989 | } | 1008 | } |
990 | 1009 | ||
991 | static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) | 1010 | static bool should_error_unserviceable_bio(struct pool *pool) |
992 | { | 1011 | { |
993 | /* | 1012 | enum pool_mode m = get_pool_mode(pool); |
994 | * When pool is read-only, no cell locking is needed because | 1013 | |
995 | * nothing is changing. | 1014 | switch (m) { |
996 | */ | 1015 | case PM_WRITE: |
997 | WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); | 1016 | /* Shouldn't get here */ |
1017 | DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); | ||
1018 | return true; | ||
1019 | |||
1020 | case PM_OUT_OF_DATA_SPACE: | ||
1021 | return pool->pf.error_if_no_space; | ||
1022 | |||
1023 | case PM_READ_ONLY: | ||
1024 | case PM_FAIL: | ||
1025 | return true; | ||
1026 | default: | ||
1027 | /* Shouldn't get here */ | ||
1028 | DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); | ||
1029 | return true; | ||
1030 | } | ||
1031 | } | ||
998 | 1032 | ||
999 | if (pool->pf.error_if_no_space) | 1033 | static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) |
1034 | { | ||
1035 | if (should_error_unserviceable_bio(pool)) | ||
1000 | bio_io_error(bio); | 1036 | bio_io_error(bio); |
1001 | else | 1037 | else |
1002 | retry_on_resume(bio); | 1038 | retry_on_resume(bio); |
@@ -1007,11 +1043,20 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c | |||
1007 | struct bio *bio; | 1043 | struct bio *bio; |
1008 | struct bio_list bios; | 1044 | struct bio_list bios; |
1009 | 1045 | ||
1046 | if (should_error_unserviceable_bio(pool)) { | ||
1047 | cell_error(pool, cell); | ||
1048 | return; | ||
1049 | } | ||
1050 | |||
1010 | bio_list_init(&bios); | 1051 | bio_list_init(&bios); |
1011 | cell_release(pool, cell, &bios); | 1052 | cell_release(pool, cell, &bios); |
1012 | 1053 | ||
1013 | while ((bio = bio_list_pop(&bios))) | 1054 | if (should_error_unserviceable_bio(pool)) |
1014 | handle_unserviceable_bio(pool, bio); | 1055 | while ((bio = bio_list_pop(&bios))) |
1056 | bio_io_error(bio); | ||
1057 | else | ||
1058 | while ((bio = bio_list_pop(&bios))) | ||
1059 | retry_on_resume(bio); | ||
1015 | } | 1060 | } |
1016 | 1061 | ||
1017 | static void process_discard(struct thin_c *tc, struct bio *bio) | 1062 | static void process_discard(struct thin_c *tc, struct bio *bio) |
@@ -1296,6 +1341,11 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1296 | } | 1341 | } |
1297 | } | 1342 | } |
1298 | 1343 | ||
1344 | static void process_bio_success(struct thin_c *tc, struct bio *bio) | ||
1345 | { | ||
1346 | bio_endio(bio, 0); | ||
1347 | } | ||
1348 | |||
1299 | static void process_bio_fail(struct thin_c *tc, struct bio *bio) | 1349 | static void process_bio_fail(struct thin_c *tc, struct bio *bio) |
1300 | { | 1350 | { |
1301 | bio_io_error(bio); | 1351 | bio_io_error(bio); |
@@ -1399,9 +1449,15 @@ static enum pool_mode get_pool_mode(struct pool *pool) | |||
1399 | return pool->pf.mode; | 1449 | return pool->pf.mode; |
1400 | } | 1450 | } |
1401 | 1451 | ||
1452 | static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) | ||
1453 | { | ||
1454 | dm_table_event(pool->ti->table); | ||
1455 | DMINFO("%s: switching pool to %s mode", | ||
1456 | dm_device_name(pool->pool_md), new_mode); | ||
1457 | } | ||
1458 | |||
1402 | static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | 1459 | static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) |
1403 | { | 1460 | { |
1404 | int r; | ||
1405 | struct pool_c *pt = pool->ti->private; | 1461 | struct pool_c *pt = pool->ti->private; |
1406 | bool needs_check = dm_pool_metadata_needs_check(pool->pmd); | 1462 | bool needs_check = dm_pool_metadata_needs_check(pool->pmd); |
1407 | enum pool_mode old_mode = get_pool_mode(pool); | 1463 | enum pool_mode old_mode = get_pool_mode(pool); |
@@ -1429,38 +1485,48 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1429 | switch (new_mode) { | 1485 | switch (new_mode) { |
1430 | case PM_FAIL: | 1486 | case PM_FAIL: |
1431 | if (old_mode != new_mode) | 1487 | if (old_mode != new_mode) |
1432 | DMERR("%s: switching pool to failure mode", | 1488 | notify_of_pool_mode_change(pool, "failure"); |
1433 | dm_device_name(pool->pool_md)); | ||
1434 | dm_pool_metadata_read_only(pool->pmd); | 1489 | dm_pool_metadata_read_only(pool->pmd); |
1435 | pool->process_bio = process_bio_fail; | 1490 | pool->process_bio = process_bio_fail; |
1436 | pool->process_discard = process_bio_fail; | 1491 | pool->process_discard = process_bio_fail; |
1437 | pool->process_prepared_mapping = process_prepared_mapping_fail; | 1492 | pool->process_prepared_mapping = process_prepared_mapping_fail; |
1438 | pool->process_prepared_discard = process_prepared_discard_fail; | 1493 | pool->process_prepared_discard = process_prepared_discard_fail; |
1494 | |||
1495 | error_retry_list(pool); | ||
1439 | break; | 1496 | break; |
1440 | 1497 | ||
1441 | case PM_READ_ONLY: | 1498 | case PM_READ_ONLY: |
1442 | if (old_mode != new_mode) | 1499 | if (old_mode != new_mode) |
1443 | DMERR("%s: switching pool to read-only mode", | 1500 | notify_of_pool_mode_change(pool, "read-only"); |
1444 | dm_device_name(pool->pool_md)); | 1501 | dm_pool_metadata_read_only(pool->pmd); |
1445 | r = dm_pool_abort_metadata(pool->pmd); | 1502 | pool->process_bio = process_bio_read_only; |
1446 | if (r) { | 1503 | pool->process_discard = process_bio_success; |
1447 | DMERR("%s: aborting transaction failed", | 1504 | pool->process_prepared_mapping = process_prepared_mapping_fail; |
1448 | dm_device_name(pool->pool_md)); | 1505 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1449 | new_mode = PM_FAIL; | 1506 | |
1450 | set_pool_mode(pool, new_mode); | 1507 | error_retry_list(pool); |
1451 | } else { | 1508 | break; |
1452 | dm_pool_metadata_read_only(pool->pmd); | 1509 | |
1453 | pool->process_bio = process_bio_read_only; | 1510 | case PM_OUT_OF_DATA_SPACE: |
1454 | pool->process_discard = process_discard; | 1511 | /* |
1455 | pool->process_prepared_mapping = process_prepared_mapping_fail; | 1512 | * Ideally we'd never hit this state; the low water mark |
1456 | pool->process_prepared_discard = process_prepared_discard_passdown; | 1513 | * would trigger userland to extend the pool before we |
1457 | } | 1514 | * completely run out of data space. However, many small |
1515 | * IOs to unprovisioned space can consume data space at an | ||
1516 | * alarming rate. Adjust your low water mark if you're | ||
1517 | * frequently seeing this mode. | ||
1518 | */ | ||
1519 | if (old_mode != new_mode) | ||
1520 | notify_of_pool_mode_change(pool, "out-of-data-space"); | ||
1521 | pool->process_bio = process_bio_read_only; | ||
1522 | pool->process_discard = process_discard; | ||
1523 | pool->process_prepared_mapping = process_prepared_mapping; | ||
1524 | pool->process_prepared_discard = process_prepared_discard_passdown; | ||
1458 | break; | 1525 | break; |
1459 | 1526 | ||
1460 | case PM_WRITE: | 1527 | case PM_WRITE: |
1461 | if (old_mode != new_mode) | 1528 | if (old_mode != new_mode) |
1462 | DMINFO("%s: switching pool to write mode", | 1529 | notify_of_pool_mode_change(pool, "write"); |
1463 | dm_device_name(pool->pool_md)); | ||
1464 | dm_pool_metadata_read_write(pool->pmd); | 1530 | dm_pool_metadata_read_write(pool->pmd); |
1465 | pool->process_bio = process_bio; | 1531 | pool->process_bio = process_bio; |
1466 | pool->process_discard = process_discard; | 1532 | pool->process_discard = process_discard; |
@@ -1477,17 +1543,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1477 | pt->adjusted_pf.mode = new_mode; | 1543 | pt->adjusted_pf.mode = new_mode; |
1478 | } | 1544 | } |
1479 | 1545 | ||
1480 | /* | ||
1481 | * Rather than calling set_pool_mode directly, use these which describe the | ||
1482 | * reason for mode degradation. | ||
1483 | */ | ||
1484 | static void out_of_data_space(struct pool *pool) | ||
1485 | { | ||
1486 | DMERR_LIMIT("%s: no free data space available.", | ||
1487 | dm_device_name(pool->pool_md)); | ||
1488 | set_pool_mode(pool, PM_READ_ONLY); | ||
1489 | } | ||
1490 | |||
1491 | static void abort_transaction(struct pool *pool) | 1546 | static void abort_transaction(struct pool *pool) |
1492 | { | 1547 | { |
1493 | const char *dev_name = dm_device_name(pool->pool_md); | 1548 | const char *dev_name = dm_device_name(pool->pool_md); |
@@ -2719,7 +2774,9 @@ static void pool_status(struct dm_target *ti, status_type_t type, | |||
2719 | else | 2774 | else |
2720 | DMEMIT("- "); | 2775 | DMEMIT("- "); |
2721 | 2776 | ||
2722 | if (pool->pf.mode == PM_READ_ONLY) | 2777 | if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) |
2778 | DMEMIT("out_of_data_space "); | ||
2779 | else if (pool->pf.mode == PM_READ_ONLY) | ||
2723 | DMEMIT("ro "); | 2780 | DMEMIT("ro "); |
2724 | else | 2781 | else |
2725 | DMEMIT("rw "); | 2782 | DMEMIT("rw "); |