diff options
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 106 |
1 files changed, 99 insertions, 7 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 53728be84dee..242ac2ea5f29 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -27,6 +27,9 @@ | |||
27 | #define MAPPING_POOL_SIZE 1024 | 27 | #define MAPPING_POOL_SIZE 1024 |
28 | #define PRISON_CELLS 1024 | 28 | #define PRISON_CELLS 1024 |
29 | #define COMMIT_PERIOD HZ | 29 | #define COMMIT_PERIOD HZ |
30 | #define NO_SPACE_TIMEOUT_SECS 60 | ||
31 | |||
32 | static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS; | ||
30 | 33 | ||
31 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, | 34 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, |
32 | "A percentage of time allocated for copy on write"); | 35 | "A percentage of time allocated for copy on write"); |
@@ -175,6 +178,7 @@ struct pool { | |||
175 | struct workqueue_struct *wq; | 178 | struct workqueue_struct *wq; |
176 | struct work_struct worker; | 179 | struct work_struct worker; |
177 | struct delayed_work waker; | 180 | struct delayed_work waker; |
181 | struct delayed_work no_space_timeout; | ||
178 | 182 | ||
179 | unsigned long last_commit_jiffies; | 183 | unsigned long last_commit_jiffies; |
180 | unsigned ref_count; | 184 | unsigned ref_count; |
@@ -232,6 +236,13 @@ struct thin_c { | |||
232 | struct bio_list deferred_bio_list; | 236 | struct bio_list deferred_bio_list; |
233 | struct bio_list retry_on_resume_list; | 237 | struct bio_list retry_on_resume_list; |
234 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ | 238 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ |
239 | |||
240 | /* | ||
241 | * Ensures the thin is not destroyed until the worker has finished | ||
242 | * iterating the active_thins list. | ||
243 | */ | ||
244 | atomic_t refcount; | ||
245 | struct completion can_destroy; | ||
235 | }; | 246 | }; |
236 | 247 | ||
237 | /*----------------------------------------------------------------*/ | 248 | /*----------------------------------------------------------------*/ |
@@ -928,7 +939,7 @@ static int commit(struct pool *pool) | |||
928 | { | 939 | { |
929 | int r; | 940 | int r; |
930 | 941 | ||
931 | if (get_pool_mode(pool) != PM_WRITE) | 942 | if (get_pool_mode(pool) >= PM_READ_ONLY) |
932 | return -EINVAL; | 943 | return -EINVAL; |
933 | 944 | ||
934 | r = dm_pool_commit_metadata(pool->pmd); | 945 | r = dm_pool_commit_metadata(pool->pmd); |
@@ -1486,6 +1497,45 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1486 | blk_finish_plug(&plug); | 1497 | blk_finish_plug(&plug); |
1487 | } | 1498 | } |
1488 | 1499 | ||
1500 | static void thin_get(struct thin_c *tc); | ||
1501 | static void thin_put(struct thin_c *tc); | ||
1502 | |||
1503 | /* | ||
1504 | * We can't hold rcu_read_lock() around code that can block. So we | ||
1505 | * find a thin with the rcu lock held; bump a refcount; then drop | ||
1506 | * the lock. | ||
1507 | */ | ||
1508 | static struct thin_c *get_first_thin(struct pool *pool) | ||
1509 | { | ||
1510 | struct thin_c *tc = NULL; | ||
1511 | |||
1512 | rcu_read_lock(); | ||
1513 | if (!list_empty(&pool->active_thins)) { | ||
1514 | tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list); | ||
1515 | thin_get(tc); | ||
1516 | } | ||
1517 | rcu_read_unlock(); | ||
1518 | |||
1519 | return tc; | ||
1520 | } | ||
1521 | |||
1522 | static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc) | ||
1523 | { | ||
1524 | struct thin_c *old_tc = tc; | ||
1525 | |||
1526 | rcu_read_lock(); | ||
1527 | list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) { | ||
1528 | thin_get(tc); | ||
1529 | thin_put(old_tc); | ||
1530 | rcu_read_unlock(); | ||
1531 | return tc; | ||
1532 | } | ||
1533 | thin_put(old_tc); | ||
1534 | rcu_read_unlock(); | ||
1535 | |||
1536 | return NULL; | ||
1537 | } | ||
1538 | |||
1489 | static void process_deferred_bios(struct pool *pool) | 1539 | static void process_deferred_bios(struct pool *pool) |
1490 | { | 1540 | { |
1491 | unsigned long flags; | 1541 | unsigned long flags; |
@@ -1493,10 +1543,11 @@ static void process_deferred_bios(struct pool *pool) | |||
1493 | struct bio_list bios; | 1543 | struct bio_list bios; |
1494 | struct thin_c *tc; | 1544 | struct thin_c *tc; |
1495 | 1545 | ||
1496 | rcu_read_lock(); | 1546 | tc = get_first_thin(pool); |
1497 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | 1547 | while (tc) { |
1498 | process_thin_deferred_bios(tc); | 1548 | process_thin_deferred_bios(tc); |
1499 | rcu_read_unlock(); | 1549 | tc = get_next_thin(pool, tc); |
1550 | } | ||
1500 | 1551 | ||
1501 | /* | 1552 | /* |
1502 | * If there are any deferred flush bios, we must commit | 1553 | * If there are any deferred flush bios, we must commit |
@@ -1543,6 +1594,20 @@ static void do_waker(struct work_struct *ws) | |||
1543 | queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); | 1594 | queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); |
1544 | } | 1595 | } |
1545 | 1596 | ||
1597 | /* | ||
1598 | * We're holding onto IO to allow userland time to react. After the | ||
1599 | * timeout either the pool will have been resized (and thus back in | ||
1600 | * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. | ||
1601 | */ | ||
1602 | static void do_no_space_timeout(struct work_struct *ws) | ||
1603 | { | ||
1604 | struct pool *pool = container_of(to_delayed_work(ws), struct pool, | ||
1605 | no_space_timeout); | ||
1606 | |||
1607 | if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) | ||
1608 | set_pool_mode(pool, PM_READ_ONLY); | ||
1609 | } | ||
1610 | |||
1546 | /*----------------------------------------------------------------*/ | 1611 | /*----------------------------------------------------------------*/ |
1547 | 1612 | ||
1548 | struct noflush_work { | 1613 | struct noflush_work { |
@@ -1578,7 +1643,7 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) | |||
1578 | { | 1643 | { |
1579 | struct noflush_work w; | 1644 | struct noflush_work w; |
1580 | 1645 | ||
1581 | INIT_WORK(&w.worker, fn); | 1646 | INIT_WORK_ONSTACK(&w.worker, fn); |
1582 | w.tc = tc; | 1647 | w.tc = tc; |
1583 | atomic_set(&w.complete, 0); | 1648 | atomic_set(&w.complete, 0); |
1584 | init_waitqueue_head(&w.wait); | 1649 | init_waitqueue_head(&w.wait); |
@@ -1607,6 +1672,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1607 | struct pool_c *pt = pool->ti->private; | 1672 | struct pool_c *pt = pool->ti->private; |
1608 | bool needs_check = dm_pool_metadata_needs_check(pool->pmd); | 1673 | bool needs_check = dm_pool_metadata_needs_check(pool->pmd); |
1609 | enum pool_mode old_mode = get_pool_mode(pool); | 1674 | enum pool_mode old_mode = get_pool_mode(pool); |
1675 | unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ; | ||
1610 | 1676 | ||
1611 | /* | 1677 | /* |
1612 | * Never allow the pool to transition to PM_WRITE mode if user | 1678 | * Never allow the pool to transition to PM_WRITE mode if user |
@@ -1668,6 +1734,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1668 | pool->process_discard = process_discard; | 1734 | pool->process_discard = process_discard; |
1669 | pool->process_prepared_mapping = process_prepared_mapping; | 1735 | pool->process_prepared_mapping = process_prepared_mapping; |
1670 | pool->process_prepared_discard = process_prepared_discard_passdown; | 1736 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1737 | |||
1738 | if (!pool->pf.error_if_no_space && no_space_timeout) | ||
1739 | queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); | ||
1671 | break; | 1740 | break; |
1672 | 1741 | ||
1673 | case PM_WRITE: | 1742 | case PM_WRITE: |
@@ -2053,6 +2122,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2053 | 2122 | ||
2054 | INIT_WORK(&pool->worker, do_worker); | 2123 | INIT_WORK(&pool->worker, do_worker); |
2055 | INIT_DELAYED_WORK(&pool->waker, do_waker); | 2124 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
2125 | INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); | ||
2056 | spin_lock_init(&pool->lock); | 2126 | spin_lock_init(&pool->lock); |
2057 | bio_list_init(&pool->deferred_flush_bios); | 2127 | bio_list_init(&pool->deferred_flush_bios); |
2058 | INIT_LIST_HEAD(&pool->prepared_mappings); | 2128 | INIT_LIST_HEAD(&pool->prepared_mappings); |
@@ -2615,6 +2685,7 @@ static void pool_postsuspend(struct dm_target *ti) | |||
2615 | struct pool *pool = pt->pool; | 2685 | struct pool *pool = pt->pool; |
2616 | 2686 | ||
2617 | cancel_delayed_work(&pool->waker); | 2687 | cancel_delayed_work(&pool->waker); |
2688 | cancel_delayed_work(&pool->no_space_timeout); | ||
2618 | flush_workqueue(pool->wq); | 2689 | flush_workqueue(pool->wq); |
2619 | (void) commit(pool); | 2690 | (void) commit(pool); |
2620 | } | 2691 | } |
@@ -3061,11 +3132,25 @@ static struct target_type pool_target = { | |||
3061 | /*---------------------------------------------------------------- | 3132 | /*---------------------------------------------------------------- |
3062 | * Thin target methods | 3133 | * Thin target methods |
3063 | *--------------------------------------------------------------*/ | 3134 | *--------------------------------------------------------------*/ |
3135 | static void thin_get(struct thin_c *tc) | ||
3136 | { | ||
3137 | atomic_inc(&tc->refcount); | ||
3138 | } | ||
3139 | |||
3140 | static void thin_put(struct thin_c *tc) | ||
3141 | { | ||
3142 | if (atomic_dec_and_test(&tc->refcount)) | ||
3143 | complete(&tc->can_destroy); | ||
3144 | } | ||
3145 | |||
3064 | static void thin_dtr(struct dm_target *ti) | 3146 | static void thin_dtr(struct dm_target *ti) |
3065 | { | 3147 | { |
3066 | struct thin_c *tc = ti->private; | 3148 | struct thin_c *tc = ti->private; |
3067 | unsigned long flags; | 3149 | unsigned long flags; |
3068 | 3150 | ||
3151 | thin_put(tc); | ||
3152 | wait_for_completion(&tc->can_destroy); | ||
3153 | |||
3069 | spin_lock_irqsave(&tc->pool->lock, flags); | 3154 | spin_lock_irqsave(&tc->pool->lock, flags); |
3070 | list_del_rcu(&tc->list); | 3155 | list_del_rcu(&tc->list); |
3071 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3156 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
@@ -3101,6 +3186,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3101 | struct thin_c *tc; | 3186 | struct thin_c *tc; |
3102 | struct dm_dev *pool_dev, *origin_dev; | 3187 | struct dm_dev *pool_dev, *origin_dev; |
3103 | struct mapped_device *pool_md; | 3188 | struct mapped_device *pool_md; |
3189 | unsigned long flags; | ||
3104 | 3190 | ||
3105 | mutex_lock(&dm_thin_pool_table.mutex); | 3191 | mutex_lock(&dm_thin_pool_table.mutex); |
3106 | 3192 | ||
@@ -3191,9 +3277,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3191 | 3277 | ||
3192 | mutex_unlock(&dm_thin_pool_table.mutex); | 3278 | mutex_unlock(&dm_thin_pool_table.mutex); |
3193 | 3279 | ||
3194 | spin_lock(&tc->pool->lock); | 3280 | atomic_set(&tc->refcount, 1); |
3281 | init_completion(&tc->can_destroy); | ||
3282 | |||
3283 | spin_lock_irqsave(&tc->pool->lock, flags); | ||
3195 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | 3284 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); |
3196 | spin_unlock(&tc->pool->lock); | 3285 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
3197 | /* | 3286 | /* |
3198 | * This synchronize_rcu() call is needed here otherwise we risk a | 3287 | * This synchronize_rcu() call is needed here otherwise we risk a |
3199 | * wake_worker() call finding no bios to process (because the newly | 3288 | * wake_worker() call finding no bios to process (because the newly |
@@ -3422,6 +3511,9 @@ static void dm_thin_exit(void) | |||
3422 | module_init(dm_thin_init); | 3511 | module_init(dm_thin_init); |
3423 | module_exit(dm_thin_exit); | 3512 | module_exit(dm_thin_exit); |
3424 | 3513 | ||
3514 | module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR); | ||
3515 | MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds"); | ||
3516 | |||
3425 | MODULE_DESCRIPTION(DM_NAME " thin provisioning target"); | 3517 | MODULE_DESCRIPTION(DM_NAME " thin provisioning target"); |
3426 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 3518 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); |
3427 | MODULE_LICENSE("GPL"); | 3519 | MODULE_LICENSE("GPL"); |