aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2013-03-01 17:45:49 -0500
committerAlasdair G Kergon <agk@redhat.com>2013-03-01 17:45:49 -0500
commitdf5d2e9089c7d5b8c46f767e4278610ea3e815b9 (patch)
tree5285aa8b0c794da419133a209437b32a11aa8a8d /drivers/md
parenta26062416ef8add48f16fbadded2b5f6fb84d024 (diff)
dm kcopyd: introduce configurable throttling
This patch allows the administrator to reduce the rate at which kcopyd issues I/O. Each module that uses kcopyd acquires a throttle parameter that can be set in /sys/module/*/parameters. We maintain a history of kcopyd usage by each module in the variables io_period and total_period in struct dm_kcopyd_throttle. The actual kcopyd activity is calculated as a percentage of time equal to "(100 * io_period / total_period)". This is compared with the user-defined throttle percentage threshold and if it is exceeded, we sleep. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-kcopyd.c121
-rw-r--r--drivers/md/dm-raid1.c5
-rw-r--r--drivers/md/dm-snap.c5
-rw-r--r--drivers/md/dm-thin.c5
4 files changed, 132 insertions, 4 deletions
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 68c02673263b..d581fe5d2faf 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -22,6 +22,7 @@
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/workqueue.h> 23#include <linux/workqueue.h>
24#include <linux/mutex.h> 24#include <linux/mutex.h>
25#include <linux/delay.h>
25#include <linux/device-mapper.h> 26#include <linux/device-mapper.h>
26#include <linux/dm-kcopyd.h> 27#include <linux/dm-kcopyd.h>
27 28
@@ -51,6 +52,8 @@ struct dm_kcopyd_client {
51 struct workqueue_struct *kcopyd_wq; 52 struct workqueue_struct *kcopyd_wq;
52 struct work_struct kcopyd_work; 53 struct work_struct kcopyd_work;
53 54
55 struct dm_kcopyd_throttle *throttle;
56
54/* 57/*
55 * We maintain three lists of jobs: 58 * We maintain three lists of jobs:
56 * 59 *
@@ -68,6 +71,117 @@ struct dm_kcopyd_client {
68 71
69static struct page_list zero_page_list; 72static struct page_list zero_page_list;
70 73
74static DEFINE_SPINLOCK(throttle_spinlock);
75
76/*
77 * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
78 * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
79 * by 2.
80 */
81#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ
82
83/*
84 * Sleep this number of milliseconds.
85 *
86 * The value was decided experimentally.
87 * Smaller values seem to cause an increased copy rate above the limit.
88 * The reason for this is unknown but possibly due to jiffies rounding errors
89 * or read/write cache inside the disk.
90 */
91#define SLEEP_MSEC 100
92
93/*
94 * Maximum number of sleep events. There is a theoretical livelock if more
95 * kcopyd clients do work simultaneously which this limit avoids.
96 */
97#define MAX_SLEEPS 10
98
99static void io_job_start(struct dm_kcopyd_throttle *t)
100{
101 unsigned throttle, now, difference;
102 int slept = 0, skew;
103
104 if (unlikely(!t))
105 return;
106
107try_again:
108 spin_lock_irq(&throttle_spinlock);
109
110 throttle = ACCESS_ONCE(t->throttle);
111
112 if (likely(throttle >= 100))
113 goto skip_limit;
114
115 now = jiffies;
116 difference = now - t->last_jiffies;
117 t->last_jiffies = now;
118 if (t->num_io_jobs)
119 t->io_period += difference;
120 t->total_period += difference;
121
122 /*
123 * Maintain sane values if we got a temporary overflow.
124 */
125 if (unlikely(t->io_period > t->total_period))
126 t->io_period = t->total_period;
127
128 if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
129 int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
130 t->total_period >>= shift;
131 t->io_period >>= shift;
132 }
133
134 skew = t->io_period - throttle * t->total_period / 100;
135
136 if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
137 slept++;
138 spin_unlock_irq(&throttle_spinlock);
139 msleep(SLEEP_MSEC);
140 goto try_again;
141 }
142
143skip_limit:
144 t->num_io_jobs++;
145
146 spin_unlock_irq(&throttle_spinlock);
147}
148
149static void io_job_finish(struct dm_kcopyd_throttle *t)
150{
151 unsigned long flags;
152
153 if (unlikely(!t))
154 return;
155
156 spin_lock_irqsave(&throttle_spinlock, flags);
157
158 t->num_io_jobs--;
159
160 if (likely(ACCESS_ONCE(t->throttle) >= 100))
161 goto skip_limit;
162
163 if (!t->num_io_jobs) {
164 unsigned now, difference;
165
166 now = jiffies;
167 difference = now - t->last_jiffies;
168 t->last_jiffies = now;
169
170 t->io_period += difference;
171 t->total_period += difference;
172
173 /*
174 * Maintain sane values if we got a temporary overflow.
175 */
176 if (unlikely(t->io_period > t->total_period))
177 t->io_period = t->total_period;
178 }
179
180skip_limit:
181 spin_unlock_irqrestore(&throttle_spinlock, flags);
182}
183
184
71static void wake(struct dm_kcopyd_client *kc) 185static void wake(struct dm_kcopyd_client *kc)
72{ 186{
73 queue_work(kc->kcopyd_wq, &kc->kcopyd_work); 187 queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context)
348 struct kcopyd_job *job = (struct kcopyd_job *) context; 462 struct kcopyd_job *job = (struct kcopyd_job *) context;
349 struct dm_kcopyd_client *kc = job->kc; 463 struct dm_kcopyd_client *kc = job->kc;
350 464
465 io_job_finish(kc->throttle);
466
351 if (error) { 467 if (error) {
352 if (job->rw & WRITE) 468 if (job->rw & WRITE)
353 job->write_err |= error; 469 job->write_err |= error;
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job)
389 .client = job->kc->io_client, 505 .client = job->kc->io_client,
390 }; 506 };
391 507
508 io_job_start(job->kc->throttle);
509
392 if (job->rw == READ) 510 if (job->rw == READ)
393 r = dm_io(&io_req, 1, &job->source, NULL); 511 r = dm_io(&io_req, 1, &job->source, NULL);
394 else 512 else
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
695/*----------------------------------------------------------------- 813/*-----------------------------------------------------------------
696 * Client setup 814 * Client setup
697 *---------------------------------------------------------------*/ 815 *---------------------------------------------------------------*/
698struct dm_kcopyd_client *dm_kcopyd_client_create(void) 816struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
699{ 817{
700 int r = -ENOMEM; 818 int r = -ENOMEM;
701 struct dm_kcopyd_client *kc; 819 struct dm_kcopyd_client *kc;
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void)
708 INIT_LIST_HEAD(&kc->complete_jobs); 826 INIT_LIST_HEAD(&kc->complete_jobs);
709 INIT_LIST_HEAD(&kc->io_jobs); 827 INIT_LIST_HEAD(&kc->io_jobs);
710 INIT_LIST_HEAD(&kc->pages_jobs); 828 INIT_LIST_HEAD(&kc->pages_jobs);
829 kc->throttle = throttle;
711 830
712 kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); 831 kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
713 if (!kc->job_pool) 832 if (!kc->job_pool)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index e2ea97723e10..d053098c6a91 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -82,6 +82,9 @@ struct mirror_set {
82 struct mirror mirror[0]; 82 struct mirror mirror[0];
83}; 83};
84 84
85DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle,
86 "A percentage of time allocated for raid resynchronization");
87
85static void wakeup_mirrord(void *context) 88static void wakeup_mirrord(void *context)
86{ 89{
87 struct mirror_set *ms = context; 90 struct mirror_set *ms = context;
@@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1111 goto err_destroy_wq; 1114 goto err_destroy_wq;
1112 } 1115 }
1113 1116
1114 ms->kcopyd_client = dm_kcopyd_client_create(); 1117 ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1115 if (IS_ERR(ms->kcopyd_client)) { 1118 if (IS_ERR(ms->kcopyd_client)) {
1116 r = PTR_ERR(ms->kcopyd_client); 1119 r = PTR_ERR(ms->kcopyd_client);
1117 goto err_destroy_wq; 1120 goto err_destroy_wq;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 58c2b5881377..c0e07026a8d1 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -124,6 +124,9 @@ struct dm_snapshot {
124#define RUNNING_MERGE 0 124#define RUNNING_MERGE 0
125#define SHUTDOWN_MERGE 1 125#define SHUTDOWN_MERGE 1
126 126
127DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
128 "A percentage of time allocated for copy on write");
129
127struct dm_dev *dm_snap_origin(struct dm_snapshot *s) 130struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
128{ 131{
129 return s->origin; 132 return s->origin;
@@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1108 goto bad_hash_tables; 1111 goto bad_hash_tables;
1109 } 1112 }
1110 1113
1111 s->kcopyd_client = dm_kcopyd_client_create(); 1114 s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1112 if (IS_ERR(s->kcopyd_client)) { 1115 if (IS_ERR(s->kcopyd_client)) {
1113 r = PTR_ERR(s->kcopyd_client); 1116 r = PTR_ERR(s->kcopyd_client);
1114 ti->error = "Could not create kcopyd client"; 1117 ti->error = "Could not create kcopyd client";
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 303e11da7c2a..35d9d0396cc2 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -26,6 +26,9 @@
26#define PRISON_CELLS 1024 26#define PRISON_CELLS 1024
27#define COMMIT_PERIOD HZ 27#define COMMIT_PERIOD HZ
28 28
29DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
30 "A percentage of time allocated for copy on write");
31
29/* 32/*
30 * The block size of the device holding pool data must be 33 * The block size of the device holding pool data must be
31 * between 64KB and 1GB. 34 * between 64KB and 1GB.
@@ -1642,7 +1645,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
1642 goto bad_prison; 1645 goto bad_prison;
1643 } 1646 }
1644 1647
1645 pool->copier = dm_kcopyd_client_create(); 1648 pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1646 if (IS_ERR(pool->copier)) { 1649 if (IS_ERR(pool->copier)) {
1647 r = PTR_ERR(pool->copier); 1650 r = PTR_ERR(pool->copier);
1648 *error = "Error creating pool's kcopyd client"; 1651 *error = "Error creating pool's kcopyd client";