diff options
author | Joe Thornber <ejt@redhat.com> | 2012-07-27 10:08:16 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2012-07-27 10:08:16 -0400 |
commit | e49e582965b3694f07a106adc83ddb44aa4f0890 (patch) | |
tree | fcc6ff02fd4bd9afa59b5fd3bdbb9991f1271d8a /drivers/md | |
parent | da105ed5fd7edcc5e0df7dbacef4dadda74e7ebe (diff) |
dm thin: add read only and fail io modes
Add read-only and fail-io modes to thin provisioning.
If a transaction commit fails the pool's metadata device will transition
to "read-only" mode. If a commit fails once already in read-only mode
the transition to "fail-io" mode occurs.
Once in fail-io mode the pool and all associated thin devices will
report a status of "Fail".
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-thin.c | 416 |
1 files changed, 321 insertions, 95 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 98c50f9626d8..087e9b34d290 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2011 Red Hat UK. | 2 | * Copyright (C) 2011-2012 Red Hat UK. |
3 | * | 3 | * |
4 | * This file is released under the GPL. | 4 | * This file is released under the GPL. |
5 | */ | 5 | */ |
@@ -496,12 +496,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | |||
496 | */ | 496 | */ |
497 | struct dm_thin_new_mapping; | 497 | struct dm_thin_new_mapping; |
498 | 498 | ||
499 | /* | ||
500 | * The pool runs in 3 modes. Ordered in degraded order for comparisons. | ||
501 | */ | ||
502 | enum pool_mode { | ||
503 | PM_WRITE, /* metadata may be changed */ | ||
504 | PM_READ_ONLY, /* metadata may not be changed */ | ||
505 | PM_FAIL, /* all I/O fails */ | ||
506 | }; | ||
507 | |||
499 | struct pool_features { | 508 | struct pool_features { |
509 | enum pool_mode mode; | ||
510 | |||
500 | unsigned zero_new_blocks:1; | 511 | unsigned zero_new_blocks:1; |
501 | unsigned discard_enabled:1; | 512 | unsigned discard_enabled:1; |
502 | unsigned discard_passdown:1; | 513 | unsigned discard_passdown:1; |
503 | }; | 514 | }; |
504 | 515 | ||
516 | struct thin_c; | ||
517 | typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); | ||
518 | typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); | ||
519 | |||
505 | struct pool { | 520 | struct pool { |
506 | struct list_head list; | 521 | struct list_head list; |
507 | struct dm_target *ti; /* Only set if a pool target is bound */ | 522 | struct dm_target *ti; /* Only set if a pool target is bound */ |
@@ -542,8 +557,17 @@ struct pool { | |||
542 | struct dm_thin_new_mapping *next_mapping; | 557 | struct dm_thin_new_mapping *next_mapping; |
543 | mempool_t *mapping_pool; | 558 | mempool_t *mapping_pool; |
544 | mempool_t *endio_hook_pool; | 559 | mempool_t *endio_hook_pool; |
560 | |||
561 | process_bio_fn process_bio; | ||
562 | process_bio_fn process_discard; | ||
563 | |||
564 | process_mapping_fn process_prepared_mapping; | ||
565 | process_mapping_fn process_prepared_discard; | ||
545 | }; | 566 | }; |
546 | 567 | ||
568 | static enum pool_mode get_pool_mode(struct pool *pool); | ||
569 | static void set_pool_mode(struct pool *pool, enum pool_mode mode); | ||
570 | |||
547 | /* | 571 | /* |
548 | * Target context for a pool. | 572 | * Target context for a pool. |
549 | */ | 573 | */ |
@@ -718,16 +742,28 @@ static void issue(struct thin_c *tc, struct bio *bio) | |||
718 | struct pool *pool = tc->pool; | 742 | struct pool *pool = tc->pool; |
719 | unsigned long flags; | 743 | unsigned long flags; |
720 | 744 | ||
745 | if (!bio_triggers_commit(tc, bio)) { | ||
746 | generic_make_request(bio); | ||
747 | return; | ||
748 | } | ||
749 | |||
721 | /* | 750 | /* |
722 | * Batch together any FUA/FLUSH bios we find and then issue | 751 | * Complete bio with an error if earlier I/O caused changes to |
723 | * a single commit for them in process_deferred_bios(). | 752 | * the metadata that can't be committed e.g, due to I/O errors |
753 | * on the metadata device. | ||
724 | */ | 754 | */ |
725 | if (bio_triggers_commit(tc, bio)) { | 755 | if (dm_thin_aborted_changes(tc->td)) { |
726 | spin_lock_irqsave(&pool->lock, flags); | 756 | bio_io_error(bio); |
727 | bio_list_add(&pool->deferred_flush_bios, bio); | 757 | return; |
728 | spin_unlock_irqrestore(&pool->lock, flags); | 758 | } |
729 | } else | 759 | |
730 | generic_make_request(bio); | 760 | /* |
761 | * Batch together any bios that trigger commits and then issue a | ||
762 | * single commit for them in process_deferred_bios(). | ||
763 | */ | ||
764 | spin_lock_irqsave(&pool->lock, flags); | ||
765 | bio_list_add(&pool->deferred_flush_bios, bio); | ||
766 | spin_unlock_irqrestore(&pool->lock, flags); | ||
731 | } | 767 | } |
732 | 768 | ||
733 | static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) | 769 | static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) |
@@ -864,6 +900,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell | |||
864 | wake_worker(pool); | 900 | wake_worker(pool); |
865 | } | 901 | } |
866 | 902 | ||
903 | static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) | ||
904 | { | ||
905 | if (m->bio) | ||
906 | m->bio->bi_end_io = m->saved_bi_end_io; | ||
907 | cell_error(m->cell); | ||
908 | list_del(&m->list); | ||
909 | mempool_free(m, m->tc->pool->mapping_pool); | ||
910 | } | ||
867 | static void process_prepared_mapping(struct dm_thin_new_mapping *m) | 911 | static void process_prepared_mapping(struct dm_thin_new_mapping *m) |
868 | { | 912 | { |
869 | struct thin_c *tc = m->tc; | 913 | struct thin_c *tc = m->tc; |
@@ -908,18 +952,20 @@ out: | |||
908 | mempool_free(m, tc->pool->mapping_pool); | 952 | mempool_free(m, tc->pool->mapping_pool); |
909 | } | 953 | } |
910 | 954 | ||
911 | static void process_prepared_discard(struct dm_thin_new_mapping *m) | 955 | static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) |
912 | { | 956 | { |
913 | int r; | ||
914 | struct thin_c *tc = m->tc; | 957 | struct thin_c *tc = m->tc; |
915 | 958 | ||
916 | r = dm_thin_remove_block(tc->td, m->virt_block); | 959 | bio_io_error(m->bio); |
917 | if (r) | 960 | cell_defer_except(tc, m->cell); |
918 | DMERR("dm_thin_remove_block() failed"); | 961 | cell_defer_except(tc, m->cell2); |
962 | mempool_free(m, tc->pool->mapping_pool); | ||
963 | } | ||
964 | |||
965 | static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) | ||
966 | { | ||
967 | struct thin_c *tc = m->tc; | ||
919 | 968 | ||
920 | /* | ||
921 | * Pass the discard down to the underlying device? | ||
922 | */ | ||
923 | if (m->pass_discard) | 969 | if (m->pass_discard) |
924 | remap_and_issue(tc, m->bio, m->data_block); | 970 | remap_and_issue(tc, m->bio, m->data_block); |
925 | else | 971 | else |
@@ -930,8 +976,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) | |||
930 | mempool_free(m, tc->pool->mapping_pool); | 976 | mempool_free(m, tc->pool->mapping_pool); |
931 | } | 977 | } |
932 | 978 | ||
979 | static void process_prepared_discard(struct dm_thin_new_mapping *m) | ||
980 | { | ||
981 | int r; | ||
982 | struct thin_c *tc = m->tc; | ||
983 | |||
984 | r = dm_thin_remove_block(tc->td, m->virt_block); | ||
985 | if (r) | ||
986 | DMERR("dm_thin_remove_block() failed"); | ||
987 | |||
988 | process_prepared_discard_passdown(m); | ||
989 | } | ||
990 | |||
933 | static void process_prepared(struct pool *pool, struct list_head *head, | 991 | static void process_prepared(struct pool *pool, struct list_head *head, |
934 | void (*fn)(struct dm_thin_new_mapping *)) | 992 | process_mapping_fn *fn) |
935 | { | 993 | { |
936 | unsigned long flags; | 994 | unsigned long flags; |
937 | struct list_head maps; | 995 | struct list_head maps; |
@@ -943,7 +1001,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, | |||
943 | spin_unlock_irqrestore(&pool->lock, flags); | 1001 | spin_unlock_irqrestore(&pool->lock, flags); |
944 | 1002 | ||
945 | list_for_each_entry_safe(m, tmp, &maps, list) | 1003 | list_for_each_entry_safe(m, tmp, &maps, list) |
946 | fn(m); | 1004 | (*fn)(m); |
947 | } | 1005 | } |
948 | 1006 | ||
949 | /* | 1007 | /* |
@@ -1109,6 +1167,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
1109 | } | 1167 | } |
1110 | } | 1168 | } |
1111 | 1169 | ||
1170 | static int commit(struct pool *pool) | ||
1171 | { | ||
1172 | int r; | ||
1173 | |||
1174 | r = dm_pool_commit_metadata(pool->pmd); | ||
1175 | if (r) | ||
1176 | DMERR("commit failed, error = %d", r); | ||
1177 | |||
1178 | return r; | ||
1179 | } | ||
1180 | |||
1181 | /* | ||
1182 | * A non-zero return indicates read_only or fail_io mode. | ||
1183 | * Many callers don't care about the return value. | ||
1184 | */ | ||
1185 | static int commit_or_fallback(struct pool *pool) | ||
1186 | { | ||
1187 | int r; | ||
1188 | |||
1189 | if (get_pool_mode(pool) != PM_WRITE) | ||
1190 | return -EINVAL; | ||
1191 | |||
1192 | r = commit(pool); | ||
1193 | if (r) | ||
1194 | set_pool_mode(pool, PM_READ_ONLY); | ||
1195 | |||
1196 | return r; | ||
1197 | } | ||
1198 | |||
1112 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | 1199 | static int alloc_data_block(struct thin_c *tc, dm_block_t *result) |
1113 | { | 1200 | { |
1114 | int r; | 1201 | int r; |
@@ -1137,12 +1224,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | |||
1137 | * Try to commit to see if that will free up some | 1224 | * Try to commit to see if that will free up some |
1138 | * more space. | 1225 | * more space. |
1139 | */ | 1226 | */ |
1140 | r = dm_pool_commit_metadata(pool->pmd); | 1227 | (void) commit_or_fallback(pool); |
1141 | if (r) { | ||
1142 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
1143 | __func__, r); | ||
1144 | return r; | ||
1145 | } | ||
1146 | 1228 | ||
1147 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); | 1229 | r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); |
1148 | if (r) | 1230 | if (r) |
@@ -1373,6 +1455,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block | |||
1373 | 1455 | ||
1374 | default: | 1456 | default: |
1375 | DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); | 1457 | DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); |
1458 | set_pool_mode(tc->pool, PM_READ_ONLY); | ||
1376 | cell_error(cell); | 1459 | cell_error(cell); |
1377 | break; | 1460 | break; |
1378 | } | 1461 | } |
@@ -1430,6 +1513,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio) | |||
1430 | } | 1513 | } |
1431 | } | 1514 | } |
1432 | 1515 | ||
1516 | static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | ||
1517 | { | ||
1518 | int r; | ||
1519 | int rw = bio_data_dir(bio); | ||
1520 | dm_block_t block = get_bio_block(tc, bio); | ||
1521 | struct dm_thin_lookup_result lookup_result; | ||
1522 | |||
1523 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | ||
1524 | switch (r) { | ||
1525 | case 0: | ||
1526 | if (lookup_result.shared && (rw == WRITE) && bio->bi_size) | ||
1527 | bio_io_error(bio); | ||
1528 | else | ||
1529 | remap_and_issue(tc, bio, lookup_result.block); | ||
1530 | break; | ||
1531 | |||
1532 | case -ENODATA: | ||
1533 | if (rw != READ) { | ||
1534 | bio_io_error(bio); | ||
1535 | break; | ||
1536 | } | ||
1537 | |||
1538 | if (tc->origin_dev) { | ||
1539 | remap_to_origin_and_issue(tc, bio); | ||
1540 | break; | ||
1541 | } | ||
1542 | |||
1543 | zero_fill_bio(bio); | ||
1544 | bio_endio(bio, 0); | ||
1545 | break; | ||
1546 | |||
1547 | default: | ||
1548 | DMERR("dm_thin_find_block() failed, error = %d", r); | ||
1549 | bio_io_error(bio); | ||
1550 | break; | ||
1551 | } | ||
1552 | } | ||
1553 | |||
1554 | static void process_bio_fail(struct thin_c *tc, struct bio *bio) | ||
1555 | { | ||
1556 | bio_io_error(bio); | ||
1557 | } | ||
1558 | |||
1433 | static int need_commit_due_to_time(struct pool *pool) | 1559 | static int need_commit_due_to_time(struct pool *pool) |
1434 | { | 1560 | { |
1435 | return jiffies < pool->last_commit_jiffies || | 1561 | return jiffies < pool->last_commit_jiffies || |
@@ -1441,7 +1567,6 @@ static void process_deferred_bios(struct pool *pool) | |||
1441 | unsigned long flags; | 1567 | unsigned long flags; |
1442 | struct bio *bio; | 1568 | struct bio *bio; |
1443 | struct bio_list bios; | 1569 | struct bio_list bios; |
1444 | int r; | ||
1445 | 1570 | ||
1446 | bio_list_init(&bios); | 1571 | bio_list_init(&bios); |
1447 | 1572 | ||
@@ -1468,9 +1593,9 @@ static void process_deferred_bios(struct pool *pool) | |||
1468 | } | 1593 | } |
1469 | 1594 | ||
1470 | if (bio->bi_rw & REQ_DISCARD) | 1595 | if (bio->bi_rw & REQ_DISCARD) |
1471 | process_discard(tc, bio); | 1596 | pool->process_discard(tc, bio); |
1472 | else | 1597 | else |
1473 | process_bio(tc, bio); | 1598 | pool->process_bio(tc, bio); |
1474 | } | 1599 | } |
1475 | 1600 | ||
1476 | /* | 1601 | /* |
@@ -1486,10 +1611,7 @@ static void process_deferred_bios(struct pool *pool) | |||
1486 | if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) | 1611 | if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) |
1487 | return; | 1612 | return; |
1488 | 1613 | ||
1489 | r = dm_pool_commit_metadata(pool->pmd); | 1614 | if (commit_or_fallback(pool)) { |
1490 | if (r) { | ||
1491 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
1492 | __func__, r); | ||
1493 | while ((bio = bio_list_pop(&bios))) | 1615 | while ((bio = bio_list_pop(&bios))) |
1494 | bio_io_error(bio); | 1616 | bio_io_error(bio); |
1495 | return; | 1617 | return; |
@@ -1504,8 +1626,8 @@ static void do_worker(struct work_struct *ws) | |||
1504 | { | 1626 | { |
1505 | struct pool *pool = container_of(ws, struct pool, worker); | 1627 | struct pool *pool = container_of(ws, struct pool, worker); |
1506 | 1628 | ||
1507 | process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); | 1629 | process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); |
1508 | process_prepared(pool, &pool->prepared_discards, process_prepared_discard); | 1630 | process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); |
1509 | process_deferred_bios(pool); | 1631 | process_deferred_bios(pool); |
1510 | } | 1632 | } |
1511 | 1633 | ||
@@ -1522,6 +1644,52 @@ static void do_waker(struct work_struct *ws) | |||
1522 | 1644 | ||
1523 | /*----------------------------------------------------------------*/ | 1645 | /*----------------------------------------------------------------*/ |
1524 | 1646 | ||
1647 | static enum pool_mode get_pool_mode(struct pool *pool) | ||
1648 | { | ||
1649 | return pool->pf.mode; | ||
1650 | } | ||
1651 | |||
1652 | static void set_pool_mode(struct pool *pool, enum pool_mode mode) | ||
1653 | { | ||
1654 | int r; | ||
1655 | |||
1656 | pool->pf.mode = mode; | ||
1657 | |||
1658 | switch (mode) { | ||
1659 | case PM_FAIL: | ||
1660 | DMERR("switching pool to failure mode"); | ||
1661 | pool->process_bio = process_bio_fail; | ||
1662 | pool->process_discard = process_bio_fail; | ||
1663 | pool->process_prepared_mapping = process_prepared_mapping_fail; | ||
1664 | pool->process_prepared_discard = process_prepared_discard_fail; | ||
1665 | break; | ||
1666 | |||
1667 | case PM_READ_ONLY: | ||
1668 | DMERR("switching pool to read-only mode"); | ||
1669 | r = dm_pool_abort_metadata(pool->pmd); | ||
1670 | if (r) { | ||
1671 | DMERR("aborting transaction failed"); | ||
1672 | set_pool_mode(pool, PM_FAIL); | ||
1673 | } else { | ||
1674 | dm_pool_metadata_read_only(pool->pmd); | ||
1675 | pool->process_bio = process_bio_read_only; | ||
1676 | pool->process_discard = process_discard; | ||
1677 | pool->process_prepared_mapping = process_prepared_mapping_fail; | ||
1678 | pool->process_prepared_discard = process_prepared_discard_passdown; | ||
1679 | } | ||
1680 | break; | ||
1681 | |||
1682 | case PM_WRITE: | ||
1683 | pool->process_bio = process_bio; | ||
1684 | pool->process_discard = process_discard; | ||
1685 | pool->process_prepared_mapping = process_prepared_mapping; | ||
1686 | pool->process_prepared_discard = process_prepared_discard; | ||
1687 | break; | ||
1688 | } | ||
1689 | } | ||
1690 | |||
1691 | /*----------------------------------------------------------------*/ | ||
1692 | |||
1525 | /* | 1693 | /* |
1526 | * Mapping functions. | 1694 | * Mapping functions. |
1527 | */ | 1695 | */ |
@@ -1567,6 +1735,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, | |||
1567 | struct dm_thin_lookup_result result; | 1735 | struct dm_thin_lookup_result result; |
1568 | 1736 | ||
1569 | map_context->ptr = thin_hook_bio(tc, bio); | 1737 | map_context->ptr = thin_hook_bio(tc, bio); |
1738 | |||
1739 | if (get_pool_mode(tc->pool) == PM_FAIL) { | ||
1740 | bio_io_error(bio); | ||
1741 | return DM_MAPIO_SUBMITTED; | ||
1742 | } | ||
1743 | |||
1570 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { | 1744 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { |
1571 | thin_defer_bio(tc, bio); | 1745 | thin_defer_bio(tc, bio); |
1572 | return DM_MAPIO_SUBMITTED; | 1746 | return DM_MAPIO_SUBMITTED; |
@@ -1603,14 +1777,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, | |||
1603 | break; | 1777 | break; |
1604 | 1778 | ||
1605 | case -ENODATA: | 1779 | case -ENODATA: |
1780 | if (get_pool_mode(tc->pool) == PM_READ_ONLY) { | ||
1781 | /* | ||
1782 | * This block isn't provisioned, and we have no way | ||
1783 | * of doing so. Just error it. | ||
1784 | */ | ||
1785 | bio_io_error(bio); | ||
1786 | r = DM_MAPIO_SUBMITTED; | ||
1787 | break; | ||
1788 | } | ||
1789 | /* fall through */ | ||
1790 | |||
1791 | case -EWOULDBLOCK: | ||
1606 | /* | 1792 | /* |
1607 | * In future, the failed dm_thin_find_block above could | 1793 | * In future, the failed dm_thin_find_block above could |
1608 | * provide the hint to load the metadata into cache. | 1794 | * provide the hint to load the metadata into cache. |
1609 | */ | 1795 | */ |
1610 | case -EWOULDBLOCK: | ||
1611 | thin_defer_bio(tc, bio); | 1796 | thin_defer_bio(tc, bio); |
1612 | r = DM_MAPIO_SUBMITTED; | 1797 | r = DM_MAPIO_SUBMITTED; |
1613 | break; | 1798 | break; |
1799 | |||
1800 | default: | ||
1801 | /* | ||
1802 | * Must always call bio_io_error on failure. | ||
1803 | * dm_thin_find_block can fail with -EINVAL if the | ||
1804 | * pool is switched to fail-io mode. | ||
1805 | */ | ||
1806 | bio_io_error(bio); | ||
1807 | r = DM_MAPIO_SUBMITTED; | ||
1808 | break; | ||
1614 | } | 1809 | } |
1615 | 1810 | ||
1616 | return r; | 1811 | return r; |
@@ -1647,15 +1842,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1647 | { | 1842 | { |
1648 | struct pool_c *pt = ti->private; | 1843 | struct pool_c *pt = ti->private; |
1649 | 1844 | ||
1845 | /* | ||
1846 | * We want to make sure that degraded pools are never upgraded. | ||
1847 | */ | ||
1848 | enum pool_mode old_mode = pool->pf.mode; | ||
1849 | enum pool_mode new_mode = pt->pf.mode; | ||
1850 | |||
1851 | if (old_mode > new_mode) | ||
1852 | new_mode = old_mode; | ||
1853 | |||
1650 | pool->ti = ti; | 1854 | pool->ti = ti; |
1651 | pool->low_water_blocks = pt->low_water_blocks; | 1855 | pool->low_water_blocks = pt->low_water_blocks; |
1652 | pool->pf = pt->pf; | 1856 | pool->pf = pt->pf; |
1857 | set_pool_mode(pool, new_mode); | ||
1653 | 1858 | ||
1654 | /* | 1859 | /* |
1655 | * If discard_passdown was enabled verify that the data device | 1860 | * If discard_passdown was enabled verify that the data device |
1656 | * supports discards. Disable discard_passdown if not; otherwise | 1861 | * supports discards. Disable discard_passdown if not; otherwise |
1657 | * -EOPNOTSUPP will be returned. | 1862 | * -EOPNOTSUPP will be returned. |
1658 | */ | 1863 | */ |
1864 | /* FIXME: pull this out into a sep fn. */ | ||
1659 | if (pt->pf.discard_passdown) { | 1865 | if (pt->pf.discard_passdown) { |
1660 | struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); | 1866 | struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); |
1661 | if (!q || !blk_queue_discard(q)) { | 1867 | if (!q || !blk_queue_discard(q)) { |
@@ -1681,6 +1887,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) | |||
1681 | /* Initialize pool features. */ | 1887 | /* Initialize pool features. */ |
1682 | static void pool_features_init(struct pool_features *pf) | 1888 | static void pool_features_init(struct pool_features *pf) |
1683 | { | 1889 | { |
1890 | pf->mode = PM_WRITE; | ||
1684 | pf->zero_new_blocks = 1; | 1891 | pf->zero_new_blocks = 1; |
1685 | pf->discard_enabled = 1; | 1892 | pf->discard_enabled = 1; |
1686 | pf->discard_passdown = 1; | 1893 | pf->discard_passdown = 1; |
@@ -1711,14 +1918,16 @@ static struct kmem_cache *_endio_hook_cache; | |||
1711 | 1918 | ||
1712 | static struct pool *pool_create(struct mapped_device *pool_md, | 1919 | static struct pool *pool_create(struct mapped_device *pool_md, |
1713 | struct block_device *metadata_dev, | 1920 | struct block_device *metadata_dev, |
1714 | unsigned long block_size, char **error) | 1921 | unsigned long block_size, |
1922 | int read_only, char **error) | ||
1715 | { | 1923 | { |
1716 | int r; | 1924 | int r; |
1717 | void *err_p; | 1925 | void *err_p; |
1718 | struct pool *pool; | 1926 | struct pool *pool; |
1719 | struct dm_pool_metadata *pmd; | 1927 | struct dm_pool_metadata *pmd; |
1928 | bool format_device = read_only ? false : true; | ||
1720 | 1929 | ||
1721 | pmd = dm_pool_metadata_open(metadata_dev, block_size, true); | 1930 | pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); |
1722 | if (IS_ERR(pmd)) { | 1931 | if (IS_ERR(pmd)) { |
1723 | *error = "Error creating metadata object"; | 1932 | *error = "Error creating metadata object"; |
1724 | return (struct pool *)pmd; | 1933 | return (struct pool *)pmd; |
@@ -1835,8 +2044,8 @@ static void __pool_dec(struct pool *pool) | |||
1835 | 2044 | ||
1836 | static struct pool *__pool_find(struct mapped_device *pool_md, | 2045 | static struct pool *__pool_find(struct mapped_device *pool_md, |
1837 | struct block_device *metadata_dev, | 2046 | struct block_device *metadata_dev, |
1838 | unsigned long block_size, char **error, | 2047 | unsigned long block_size, int read_only, |
1839 | int *created) | 2048 | char **error, int *created) |
1840 | { | 2049 | { |
1841 | struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); | 2050 | struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); |
1842 | 2051 | ||
@@ -1857,7 +2066,7 @@ static struct pool *__pool_find(struct mapped_device *pool_md, | |||
1857 | __pool_inc(pool); | 2066 | __pool_inc(pool); |
1858 | 2067 | ||
1859 | } else { | 2068 | } else { |
1860 | pool = pool_create(pool_md, metadata_dev, block_size, error); | 2069 | pool = pool_create(pool_md, metadata_dev, block_size, read_only, error); |
1861 | *created = 1; | 2070 | *created = 1; |
1862 | } | 2071 | } |
1863 | } | 2072 | } |
@@ -1908,19 +2117,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1908 | arg_name = dm_shift_arg(as); | 2117 | arg_name = dm_shift_arg(as); |
1909 | argc--; | 2118 | argc--; |
1910 | 2119 | ||
1911 | if (!strcasecmp(arg_name, "skip_block_zeroing")) { | 2120 | if (!strcasecmp(arg_name, "skip_block_zeroing")) |
1912 | pf->zero_new_blocks = 0; | 2121 | pf->zero_new_blocks = 0; |
1913 | continue; | 2122 | |
1914 | } else if (!strcasecmp(arg_name, "ignore_discard")) { | 2123 | else if (!strcasecmp(arg_name, "ignore_discard")) |
1915 | pf->discard_enabled = 0; | 2124 | pf->discard_enabled = 0; |
1916 | continue; | 2125 | |
1917 | } else if (!strcasecmp(arg_name, "no_discard_passdown")) { | 2126 | else if (!strcasecmp(arg_name, "no_discard_passdown")) |
1918 | pf->discard_passdown = 0; | 2127 | pf->discard_passdown = 0; |
1919 | continue; | ||
1920 | } | ||
1921 | 2128 | ||
1922 | ti->error = "Unrecognised pool feature requested"; | 2129 | else if (!strcasecmp(arg_name, "read_only")) |
1923 | r = -EINVAL; | 2130 | pf->mode = PM_READ_ONLY; |
2131 | |||
2132 | else { | ||
2133 | ti->error = "Unrecognised pool feature requested"; | ||
2134 | r = -EINVAL; | ||
2135 | break; | ||
2136 | } | ||
1924 | } | 2137 | } |
1925 | 2138 | ||
1926 | return r; | 2139 | return r; |
@@ -2013,7 +2226,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2013 | } | 2226 | } |
2014 | 2227 | ||
2015 | pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, | 2228 | pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, |
2016 | block_size, &ti->error, &pool_created); | 2229 | block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created); |
2017 | if (IS_ERR(pool)) { | 2230 | if (IS_ERR(pool)) { |
2018 | r = PTR_ERR(pool); | 2231 | r = PTR_ERR(pool); |
2019 | goto out_free_pt; | 2232 | goto out_free_pt; |
@@ -2146,15 +2359,12 @@ static int pool_preresume(struct dm_target *ti) | |||
2146 | r = dm_pool_resize_data_dev(pool->pmd, data_size); | 2359 | r = dm_pool_resize_data_dev(pool->pmd, data_size); |
2147 | if (r) { | 2360 | if (r) { |
2148 | DMERR("failed to resize data device"); | 2361 | DMERR("failed to resize data device"); |
2362 | /* FIXME Stricter than necessary: Rollback transaction instead here */ | ||
2363 | set_pool_mode(pool, PM_READ_ONLY); | ||
2149 | return r; | 2364 | return r; |
2150 | } | 2365 | } |
2151 | 2366 | ||
2152 | r = dm_pool_commit_metadata(pool->pmd); | 2367 | (void) commit_or_fallback(pool); |
2153 | if (r) { | ||
2154 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
2155 | __func__, r); | ||
2156 | return r; | ||
2157 | } | ||
2158 | } | 2368 | } |
2159 | 2369 | ||
2160 | return 0; | 2370 | return 0; |
@@ -2177,19 +2387,12 @@ static void pool_resume(struct dm_target *ti) | |||
2177 | 2387 | ||
2178 | static void pool_postsuspend(struct dm_target *ti) | 2388 | static void pool_postsuspend(struct dm_target *ti) |
2179 | { | 2389 | { |
2180 | int r; | ||
2181 | struct pool_c *pt = ti->private; | 2390 | struct pool_c *pt = ti->private; |
2182 | struct pool *pool = pt->pool; | 2391 | struct pool *pool = pt->pool; |
2183 | 2392 | ||
2184 | cancel_delayed_work(&pool->waker); | 2393 | cancel_delayed_work(&pool->waker); |
2185 | flush_workqueue(pool->wq); | 2394 | flush_workqueue(pool->wq); |
2186 | 2395 | (void) commit_or_fallback(pool); | |
2187 | r = dm_pool_commit_metadata(pool->pmd); | ||
2188 | if (r < 0) { | ||
2189 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
2190 | __func__, r); | ||
2191 | /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/ | ||
2192 | } | ||
2193 | } | 2396 | } |
2194 | 2397 | ||
2195 | static int check_arg_count(unsigned argc, unsigned args_required) | 2398 | static int check_arg_count(unsigned argc, unsigned args_required) |
@@ -2323,12 +2526,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct | |||
2323 | if (r) | 2526 | if (r) |
2324 | return r; | 2527 | return r; |
2325 | 2528 | ||
2326 | r = dm_pool_commit_metadata(pool->pmd); | 2529 | (void) commit_or_fallback(pool); |
2327 | if (r) { | ||
2328 | DMERR("%s: dm_pool_commit_metadata() failed, error = %d", | ||
2329 | __func__, r); | ||
2330 | return r; | ||
2331 | } | ||
2332 | 2530 | ||
2333 | r = dm_pool_reserve_metadata_snap(pool->pmd); | 2531 | r = dm_pool_reserve_metadata_snap(pool->pmd); |
2334 | if (r) | 2532 | if (r) |
@@ -2389,16 +2587,32 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) | |||
2389 | else | 2587 | else |
2390 | DMWARN("Unrecognised thin pool target message received: %s", argv[0]); | 2588 | DMWARN("Unrecognised thin pool target message received: %s", argv[0]); |
2391 | 2589 | ||
2392 | if (!r) { | 2590 | if (!r) |
2393 | r = dm_pool_commit_metadata(pool->pmd); | 2591 | (void) commit_or_fallback(pool); |
2394 | if (r) | ||
2395 | DMERR("%s message: dm_pool_commit_metadata() failed, error = %d", | ||
2396 | argv[0], r); | ||
2397 | } | ||
2398 | 2592 | ||
2399 | return r; | 2593 | return r; |
2400 | } | 2594 | } |
2401 | 2595 | ||
2596 | static void emit_flags(struct pool_features *pf, char *result, | ||
2597 | unsigned sz, unsigned maxlen) | ||
2598 | { | ||
2599 | unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + | ||
2600 | !pf->discard_passdown + (pf->mode == PM_READ_ONLY); | ||
2601 | DMEMIT("%u ", count); | ||
2602 | |||
2603 | if (!pf->zero_new_blocks) | ||
2604 | DMEMIT("skip_block_zeroing "); | ||
2605 | |||
2606 | if (!pf->discard_enabled) | ||
2607 | DMEMIT("ignore_discard "); | ||
2608 | |||
2609 | if (!pf->discard_passdown) | ||
2610 | DMEMIT("no_discard_passdown "); | ||
2611 | |||
2612 | if (pf->mode == PM_READ_ONLY) | ||
2613 | DMEMIT("read_only "); | ||
2614 | } | ||
2615 | |||
2402 | /* | 2616 | /* |
2403 | * Status line is: | 2617 | * Status line is: |
2404 | * <transaction id> <used metadata sectors>/<total metadata sectors> | 2618 | * <transaction id> <used metadata sectors>/<total metadata sectors> |
@@ -2407,7 +2621,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) | |||
2407 | static int pool_status(struct dm_target *ti, status_type_t type, | 2621 | static int pool_status(struct dm_target *ti, status_type_t type, |
2408 | char *result, unsigned maxlen) | 2622 | char *result, unsigned maxlen) |
2409 | { | 2623 | { |
2410 | int r, count; | 2624 | int r; |
2411 | unsigned sz = 0; | 2625 | unsigned sz = 0; |
2412 | uint64_t transaction_id; | 2626 | uint64_t transaction_id; |
2413 | dm_block_t nr_free_blocks_data; | 2627 | dm_block_t nr_free_blocks_data; |
@@ -2422,6 +2636,11 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2422 | 2636 | ||
2423 | switch (type) { | 2637 | switch (type) { |
2424 | case STATUSTYPE_INFO: | 2638 | case STATUSTYPE_INFO: |
2639 | if (get_pool_mode(pool) == PM_FAIL) { | ||
2640 | DMEMIT("Fail"); | ||
2641 | break; | ||
2642 | } | ||
2643 | |||
2425 | r = dm_pool_get_metadata_transaction_id(pool->pmd, | 2644 | r = dm_pool_get_metadata_transaction_id(pool->pmd, |
2426 | &transaction_id); | 2645 | &transaction_id); |
2427 | if (r) | 2646 | if (r) |
@@ -2457,9 +2676,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2457 | (unsigned long long)nr_blocks_data); | 2676 | (unsigned long long)nr_blocks_data); |
2458 | 2677 | ||
2459 | if (held_root) | 2678 | if (held_root) |
2460 | DMEMIT("%llu", held_root); | 2679 | DMEMIT("%llu ", held_root); |
2680 | else | ||
2681 | DMEMIT("- "); | ||
2682 | |||
2683 | if (pool->pf.mode == PM_READ_ONLY) | ||
2684 | DMEMIT("ro "); | ||
2461 | else | 2685 | else |
2462 | DMEMIT("-"); | 2686 | DMEMIT("rw "); |
2687 | |||
2688 | if (pool->pf.discard_enabled && pool->pf.discard_passdown) | ||
2689 | DMEMIT("discard_passdown"); | ||
2690 | else | ||
2691 | DMEMIT("no_discard_passdown"); | ||
2463 | 2692 | ||
2464 | break; | 2693 | break; |
2465 | 2694 | ||
@@ -2469,20 +2698,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2469 | format_dev_t(buf2, pt->data_dev->bdev->bd_dev), | 2698 | format_dev_t(buf2, pt->data_dev->bdev->bd_dev), |
2470 | (unsigned long)pool->sectors_per_block, | 2699 | (unsigned long)pool->sectors_per_block, |
2471 | (unsigned long long)pt->low_water_blocks); | 2700 | (unsigned long long)pt->low_water_blocks); |
2472 | 2701 | emit_flags(&pt->pf, result, sz, maxlen); | |
2473 | count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled + | ||
2474 | !pt->pf.discard_passdown; | ||
2475 | DMEMIT("%u ", count); | ||
2476 | |||
2477 | if (!pool->pf.zero_new_blocks) | ||
2478 | DMEMIT("skip_block_zeroing "); | ||
2479 | |||
2480 | if (!pool->pf.discard_enabled) | ||
2481 | DMEMIT("ignore_discard "); | ||
2482 | |||
2483 | if (!pt->pf.discard_passdown) | ||
2484 | DMEMIT("no_discard_passdown "); | ||
2485 | |||
2486 | break; | 2702 | break; |
2487 | } | 2703 | } |
2488 | 2704 | ||
@@ -2542,7 +2758,7 @@ static struct target_type pool_target = { | |||
2542 | .name = "thin-pool", | 2758 | .name = "thin-pool", |
2543 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2759 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2544 | DM_TARGET_IMMUTABLE, | 2760 | DM_TARGET_IMMUTABLE, |
2545 | .version = {1, 2, 0}, | 2761 | .version = {1, 3, 0}, |
2546 | .module = THIS_MODULE, | 2762 | .module = THIS_MODULE, |
2547 | .ctr = pool_ctr, | 2763 | .ctr = pool_ctr, |
2548 | .dtr = pool_dtr, | 2764 | .dtr = pool_dtr, |
@@ -2647,6 +2863,11 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2647 | } | 2863 | } |
2648 | __pool_inc(tc->pool); | 2864 | __pool_inc(tc->pool); |
2649 | 2865 | ||
2866 | if (get_pool_mode(tc->pool) == PM_FAIL) { | ||
2867 | ti->error = "Couldn't open thin device, Pool is in fail mode"; | ||
2868 | goto bad_thin_open; | ||
2869 | } | ||
2870 | |||
2650 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); | 2871 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); |
2651 | if (r) { | 2872 | if (r) { |
2652 | ti->error = "Couldn't open thin internal device"; | 2873 | ti->error = "Couldn't open thin internal device"; |
@@ -2755,6 +2976,11 @@ static int thin_status(struct dm_target *ti, status_type_t type, | |||
2755 | char buf[BDEVNAME_SIZE]; | 2976 | char buf[BDEVNAME_SIZE]; |
2756 | struct thin_c *tc = ti->private; | 2977 | struct thin_c *tc = ti->private; |
2757 | 2978 | ||
2979 | if (get_pool_mode(tc->pool) == PM_FAIL) { | ||
2980 | DMEMIT("Fail"); | ||
2981 | return 0; | ||
2982 | } | ||
2983 | |||
2758 | if (!tc->td) | 2984 | if (!tc->td) |
2759 | DMEMIT("-"); | 2985 | DMEMIT("-"); |
2760 | else { | 2986 | else { |
@@ -2823,7 +3049,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
2823 | 3049 | ||
2824 | static struct target_type thin_target = { | 3050 | static struct target_type thin_target = { |
2825 | .name = "thin", | 3051 | .name = "thin", |
2826 | .version = {1, 2, 0}, | 3052 | .version = {1, 3, 0}, |
2827 | .module = THIS_MODULE, | 3053 | .module = THIS_MODULE, |
2828 | .ctr = thin_ctr, | 3054 | .ctr = thin_ctr, |
2829 | .dtr = thin_dtr, | 3055 | .dtr = thin_dtr, |