aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-thin.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r--drivers/md/dm-thin.c106
1 files changed, 99 insertions, 7 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 53728be84dee..242ac2ea5f29 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -27,6 +27,9 @@
27#define MAPPING_POOL_SIZE 1024 27#define MAPPING_POOL_SIZE 1024
28#define PRISON_CELLS 1024 28#define PRISON_CELLS 1024
29#define COMMIT_PERIOD HZ 29#define COMMIT_PERIOD HZ
30#define NO_SPACE_TIMEOUT_SECS 60
31
32static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS;
30 33
31DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, 34DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
32 "A percentage of time allocated for copy on write"); 35 "A percentage of time allocated for copy on write");
@@ -175,6 +178,7 @@ struct pool {
175 struct workqueue_struct *wq; 178 struct workqueue_struct *wq;
176 struct work_struct worker; 179 struct work_struct worker;
177 struct delayed_work waker; 180 struct delayed_work waker;
181 struct delayed_work no_space_timeout;
178 182
179 unsigned long last_commit_jiffies; 183 unsigned long last_commit_jiffies;
180 unsigned ref_count; 184 unsigned ref_count;
@@ -232,6 +236,13 @@ struct thin_c {
232 struct bio_list deferred_bio_list; 236 struct bio_list deferred_bio_list;
233 struct bio_list retry_on_resume_list; 237 struct bio_list retry_on_resume_list;
234 struct rb_root sort_bio_list; /* sorted list of deferred bios */ 238 struct rb_root sort_bio_list; /* sorted list of deferred bios */
239
240 /*
241 * Ensures the thin is not destroyed until the worker has finished
242 * iterating the active_thins list.
243 */
244 atomic_t refcount;
245 struct completion can_destroy;
235}; 246};
236 247
237/*----------------------------------------------------------------*/ 248/*----------------------------------------------------------------*/
@@ -928,7 +939,7 @@ static int commit(struct pool *pool)
928{ 939{
929 int r; 940 int r;
930 941
931 if (get_pool_mode(pool) != PM_WRITE) 942 if (get_pool_mode(pool) >= PM_READ_ONLY)
932 return -EINVAL; 943 return -EINVAL;
933 944
934 r = dm_pool_commit_metadata(pool->pmd); 945 r = dm_pool_commit_metadata(pool->pmd);
@@ -1486,6 +1497,45 @@ static void process_thin_deferred_bios(struct thin_c *tc)
1486 blk_finish_plug(&plug); 1497 blk_finish_plug(&plug);
1487} 1498}
1488 1499
1500static void thin_get(struct thin_c *tc);
1501static void thin_put(struct thin_c *tc);
1502
1503/*
1504 * We can't hold rcu_read_lock() around code that can block. So we
1505 * find a thin with the rcu lock held; bump a refcount; then drop
1506 * the lock.
1507 */
1508static struct thin_c *get_first_thin(struct pool *pool)
1509{
1510 struct thin_c *tc = NULL;
1511
1512 rcu_read_lock();
1513 if (!list_empty(&pool->active_thins)) {
1514 tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
1515 thin_get(tc);
1516 }
1517 rcu_read_unlock();
1518
1519 return tc;
1520}
1521
1522static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
1523{
1524 struct thin_c *old_tc = tc;
1525
1526 rcu_read_lock();
1527 list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
1528 thin_get(tc);
1529 thin_put(old_tc);
1530 rcu_read_unlock();
1531 return tc;
1532 }
1533 thin_put(old_tc);
1534 rcu_read_unlock();
1535
1536 return NULL;
1537}
1538
1489static void process_deferred_bios(struct pool *pool) 1539static void process_deferred_bios(struct pool *pool)
1490{ 1540{
1491 unsigned long flags; 1541 unsigned long flags;
@@ -1493,10 +1543,11 @@ static void process_deferred_bios(struct pool *pool)
1493 struct bio_list bios; 1543 struct bio_list bios;
1494 struct thin_c *tc; 1544 struct thin_c *tc;
1495 1545
1496 rcu_read_lock(); 1546 tc = get_first_thin(pool);
1497 list_for_each_entry_rcu(tc, &pool->active_thins, list) 1547 while (tc) {
1498 process_thin_deferred_bios(tc); 1548 process_thin_deferred_bios(tc);
1499 rcu_read_unlock(); 1549 tc = get_next_thin(pool, tc);
1550 }
1500 1551
1501 /* 1552 /*
1502 * If there are any deferred flush bios, we must commit 1553 * If there are any deferred flush bios, we must commit
@@ -1543,6 +1594,20 @@ static void do_waker(struct work_struct *ws)
1543 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); 1594 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
1544} 1595}
1545 1596
1597/*
1598 * We're holding onto IO to allow userland time to react. After the
1599 * timeout either the pool will have been resized (and thus back in
1600 * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
1601 */
1602static void do_no_space_timeout(struct work_struct *ws)
1603{
1604 struct pool *pool = container_of(to_delayed_work(ws), struct pool,
1605 no_space_timeout);
1606
1607 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
1608 set_pool_mode(pool, PM_READ_ONLY);
1609}
1610
1546/*----------------------------------------------------------------*/ 1611/*----------------------------------------------------------------*/
1547 1612
1548struct noflush_work { 1613struct noflush_work {
@@ -1578,7 +1643,7 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
1578{ 1643{
1579 struct noflush_work w; 1644 struct noflush_work w;
1580 1645
1581 INIT_WORK(&w.worker, fn); 1646 INIT_WORK_ONSTACK(&w.worker, fn);
1582 w.tc = tc; 1647 w.tc = tc;
1583 atomic_set(&w.complete, 0); 1648 atomic_set(&w.complete, 0);
1584 init_waitqueue_head(&w.wait); 1649 init_waitqueue_head(&w.wait);
@@ -1607,6 +1672,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
1607 struct pool_c *pt = pool->ti->private; 1672 struct pool_c *pt = pool->ti->private;
1608 bool needs_check = dm_pool_metadata_needs_check(pool->pmd); 1673 bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
1609 enum pool_mode old_mode = get_pool_mode(pool); 1674 enum pool_mode old_mode = get_pool_mode(pool);
1675 unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ;
1610 1676
1611 /* 1677 /*
1612 * Never allow the pool to transition to PM_WRITE mode if user 1678 * Never allow the pool to transition to PM_WRITE mode if user
@@ -1668,6 +1734,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
1668 pool->process_discard = process_discard; 1734 pool->process_discard = process_discard;
1669 pool->process_prepared_mapping = process_prepared_mapping; 1735 pool->process_prepared_mapping = process_prepared_mapping;
1670 pool->process_prepared_discard = process_prepared_discard_passdown; 1736 pool->process_prepared_discard = process_prepared_discard_passdown;
1737
1738 if (!pool->pf.error_if_no_space && no_space_timeout)
1739 queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
1671 break; 1740 break;
1672 1741
1673 case PM_WRITE: 1742 case PM_WRITE:
@@ -2053,6 +2122,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
2053 2122
2054 INIT_WORK(&pool->worker, do_worker); 2123 INIT_WORK(&pool->worker, do_worker);
2055 INIT_DELAYED_WORK(&pool->waker, do_waker); 2124 INIT_DELAYED_WORK(&pool->waker, do_waker);
2125 INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
2056 spin_lock_init(&pool->lock); 2126 spin_lock_init(&pool->lock);
2057 bio_list_init(&pool->deferred_flush_bios); 2127 bio_list_init(&pool->deferred_flush_bios);
2058 INIT_LIST_HEAD(&pool->prepared_mappings); 2128 INIT_LIST_HEAD(&pool->prepared_mappings);
@@ -2615,6 +2685,7 @@ static void pool_postsuspend(struct dm_target *ti)
2615 struct pool *pool = pt->pool; 2685 struct pool *pool = pt->pool;
2616 2686
2617 cancel_delayed_work(&pool->waker); 2687 cancel_delayed_work(&pool->waker);
2688 cancel_delayed_work(&pool->no_space_timeout);
2618 flush_workqueue(pool->wq); 2689 flush_workqueue(pool->wq);
2619 (void) commit(pool); 2690 (void) commit(pool);
2620} 2691}
@@ -3061,11 +3132,25 @@ static struct target_type pool_target = {
3061/*---------------------------------------------------------------- 3132/*----------------------------------------------------------------
3062 * Thin target methods 3133 * Thin target methods
3063 *--------------------------------------------------------------*/ 3134 *--------------------------------------------------------------*/
3135static void thin_get(struct thin_c *tc)
3136{
3137 atomic_inc(&tc->refcount);
3138}
3139
3140static void thin_put(struct thin_c *tc)
3141{
3142 if (atomic_dec_and_test(&tc->refcount))
3143 complete(&tc->can_destroy);
3144}
3145
3064static void thin_dtr(struct dm_target *ti) 3146static void thin_dtr(struct dm_target *ti)
3065{ 3147{
3066 struct thin_c *tc = ti->private; 3148 struct thin_c *tc = ti->private;
3067 unsigned long flags; 3149 unsigned long flags;
3068 3150
3151 thin_put(tc);
3152 wait_for_completion(&tc->can_destroy);
3153
3069 spin_lock_irqsave(&tc->pool->lock, flags); 3154 spin_lock_irqsave(&tc->pool->lock, flags);
3070 list_del_rcu(&tc->list); 3155 list_del_rcu(&tc->list);
3071 spin_unlock_irqrestore(&tc->pool->lock, flags); 3156 spin_unlock_irqrestore(&tc->pool->lock, flags);
@@ -3101,6 +3186,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
3101 struct thin_c *tc; 3186 struct thin_c *tc;
3102 struct dm_dev *pool_dev, *origin_dev; 3187 struct dm_dev *pool_dev, *origin_dev;
3103 struct mapped_device *pool_md; 3188 struct mapped_device *pool_md;
3189 unsigned long flags;
3104 3190
3105 mutex_lock(&dm_thin_pool_table.mutex); 3191 mutex_lock(&dm_thin_pool_table.mutex);
3106 3192
@@ -3191,9 +3277,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
3191 3277
3192 mutex_unlock(&dm_thin_pool_table.mutex); 3278 mutex_unlock(&dm_thin_pool_table.mutex);
3193 3279
3194 spin_lock(&tc->pool->lock); 3280 atomic_set(&tc->refcount, 1);
3281 init_completion(&tc->can_destroy);
3282
3283 spin_lock_irqsave(&tc->pool->lock, flags);
3195 list_add_tail_rcu(&tc->list, &tc->pool->active_thins); 3284 list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
3196 spin_unlock(&tc->pool->lock); 3285 spin_unlock_irqrestore(&tc->pool->lock, flags);
3197 /* 3286 /*
3198 * This synchronize_rcu() call is needed here otherwise we risk a 3287 * This synchronize_rcu() call is needed here otherwise we risk a
3199 * wake_worker() call finding no bios to process (because the newly 3288 * wake_worker() call finding no bios to process (because the newly
@@ -3422,6 +3511,9 @@ static void dm_thin_exit(void)
3422module_init(dm_thin_init); 3511module_init(dm_thin_init);
3423module_exit(dm_thin_exit); 3512module_exit(dm_thin_exit);
3424 3513
3514module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR);
3515MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
3516
3425MODULE_DESCRIPTION(DM_NAME " thin provisioning target"); 3517MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
3426MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 3518MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3427MODULE_LICENSE("GPL"); 3519MODULE_LICENSE("GPL");