diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2013-03-01 17:45:49 -0500 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2013-03-01 17:45:49 -0500 |
commit | df5d2e9089c7d5b8c46f767e4278610ea3e815b9 (patch) | |
tree | 5285aa8b0c794da419133a209437b32a11aa8a8d /drivers/md | |
parent | a26062416ef8add48f16fbadded2b5f6fb84d024 (diff) |
dm kcopyd: introduce configurable throttling
This patch allows the administrator to reduce the rate at which kcopyd
issues I/O.
Each module that uses kcopyd acquires a throttle parameter that can be
set in /sys/module/*/parameters.
We maintain a history of kcopyd usage by each module in the variables
io_period and total_period in struct dm_kcopyd_throttle. The actual
kcopyd activity is calculated as a percentage of time equal to
"(100 * io_period / total_period)". This is compared with the user-defined
throttle percentage threshold and if it is exceeded, we sleep.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-kcopyd.c | 121 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 5 |
4 files changed, 132 insertions, 4 deletions
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 68c02673263b..d581fe5d2faf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/workqueue.h> | 23 | #include <linux/workqueue.h> |
24 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
25 | #include <linux/delay.h> | ||
25 | #include <linux/device-mapper.h> | 26 | #include <linux/device-mapper.h> |
26 | #include <linux/dm-kcopyd.h> | 27 | #include <linux/dm-kcopyd.h> |
27 | 28 | ||
@@ -51,6 +52,8 @@ struct dm_kcopyd_client { | |||
51 | struct workqueue_struct *kcopyd_wq; | 52 | struct workqueue_struct *kcopyd_wq; |
52 | struct work_struct kcopyd_work; | 53 | struct work_struct kcopyd_work; |
53 | 54 | ||
55 | struct dm_kcopyd_throttle *throttle; | ||
56 | |||
54 | /* | 57 | /* |
55 | * We maintain three lists of jobs: | 58 | * We maintain three lists of jobs: |
56 | * | 59 | * |
@@ -68,6 +71,117 @@ struct dm_kcopyd_client { | |||
68 | 71 | ||
69 | static struct page_list zero_page_list; | 72 | static struct page_list zero_page_list; |
70 | 73 | ||
74 | static DEFINE_SPINLOCK(throttle_spinlock); | ||
75 | |||
76 | /* | ||
77 | * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. | ||
78 | * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided | ||
79 | * by 2. | ||
80 | */ | ||
81 | #define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ | ||
82 | |||
83 | /* | ||
84 | * Sleep this number of milliseconds. | ||
85 | * | ||
86 | * The value was decided experimentally. | ||
87 | * Smaller values seem to cause an increased copy rate above the limit. | ||
88 | * The reason for this is unknown but possibly due to jiffies rounding errors | ||
89 | * or read/write cache inside the disk. | ||
90 | */ | ||
91 | #define SLEEP_MSEC 100 | ||
92 | |||
93 | /* | ||
94 | * Maximum number of sleep events. There is a theoretical livelock if more | ||
95 | * kcopyd clients do work simultaneously which this limit avoids. | ||
96 | */ | ||
97 | #define MAX_SLEEPS 10 | ||
98 | |||
99 | static void io_job_start(struct dm_kcopyd_throttle *t) | ||
100 | { | ||
101 | unsigned throttle, now, difference; | ||
102 | int slept = 0, skew; | ||
103 | |||
104 | if (unlikely(!t)) | ||
105 | return; | ||
106 | |||
107 | try_again: | ||
108 | spin_lock_irq(&throttle_spinlock); | ||
109 | |||
110 | throttle = ACCESS_ONCE(t->throttle); | ||
111 | |||
112 | if (likely(throttle >= 100)) | ||
113 | goto skip_limit; | ||
114 | |||
115 | now = jiffies; | ||
116 | difference = now - t->last_jiffies; | ||
117 | t->last_jiffies = now; | ||
118 | if (t->num_io_jobs) | ||
119 | t->io_period += difference; | ||
120 | t->total_period += difference; | ||
121 | |||
122 | /* | ||
123 | * Maintain sane values if we got a temporary overflow. | ||
124 | */ | ||
125 | if (unlikely(t->io_period > t->total_period)) | ||
126 | t->io_period = t->total_period; | ||
127 | |||
128 | if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { | ||
129 | int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); | ||
130 | t->total_period >>= shift; | ||
131 | t->io_period >>= shift; | ||
132 | } | ||
133 | |||
134 | skew = t->io_period - throttle * t->total_period / 100; | ||
135 | |||
136 | if (unlikely(skew > 0) && slept < MAX_SLEEPS) { | ||
137 | slept++; | ||
138 | spin_unlock_irq(&throttle_spinlock); | ||
139 | msleep(SLEEP_MSEC); | ||
140 | goto try_again; | ||
141 | } | ||
142 | |||
143 | skip_limit: | ||
144 | t->num_io_jobs++; | ||
145 | |||
146 | spin_unlock_irq(&throttle_spinlock); | ||
147 | } | ||
148 | |||
149 | static void io_job_finish(struct dm_kcopyd_throttle *t) | ||
150 | { | ||
151 | unsigned long flags; | ||
152 | |||
153 | if (unlikely(!t)) | ||
154 | return; | ||
155 | |||
156 | spin_lock_irqsave(&throttle_spinlock, flags); | ||
157 | |||
158 | t->num_io_jobs--; | ||
159 | |||
160 | if (likely(ACCESS_ONCE(t->throttle) >= 100)) | ||
161 | goto skip_limit; | ||
162 | |||
163 | if (!t->num_io_jobs) { | ||
164 | unsigned now, difference; | ||
165 | |||
166 | now = jiffies; | ||
167 | difference = now - t->last_jiffies; | ||
168 | t->last_jiffies = now; | ||
169 | |||
170 | t->io_period += difference; | ||
171 | t->total_period += difference; | ||
172 | |||
173 | /* | ||
174 | * Maintain sane values if we got a temporary overflow. | ||
175 | */ | ||
176 | if (unlikely(t->io_period > t->total_period)) | ||
177 | t->io_period = t->total_period; | ||
178 | } | ||
179 | |||
180 | skip_limit: | ||
181 | spin_unlock_irqrestore(&throttle_spinlock, flags); | ||
182 | } | ||
183 | |||
184 | |||
71 | static void wake(struct dm_kcopyd_client *kc) | 185 | static void wake(struct dm_kcopyd_client *kc) |
72 | { | 186 | { |
73 | queue_work(kc->kcopyd_wq, &kc->kcopyd_work); | 187 | queue_work(kc->kcopyd_wq, &kc->kcopyd_work); |
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context) | |||
348 | struct kcopyd_job *job = (struct kcopyd_job *) context; | 462 | struct kcopyd_job *job = (struct kcopyd_job *) context; |
349 | struct dm_kcopyd_client *kc = job->kc; | 463 | struct dm_kcopyd_client *kc = job->kc; |
350 | 464 | ||
465 | io_job_finish(kc->throttle); | ||
466 | |||
351 | if (error) { | 467 | if (error) { |
352 | if (job->rw & WRITE) | 468 | if (job->rw & WRITE) |
353 | job->write_err |= error; | 469 | job->write_err |= error; |
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job) | |||
389 | .client = job->kc->io_client, | 505 | .client = job->kc->io_client, |
390 | }; | 506 | }; |
391 | 507 | ||
508 | io_job_start(job->kc->throttle); | ||
509 | |||
392 | if (job->rw == READ) | 510 | if (job->rw == READ) |
393 | r = dm_io(&io_req, 1, &job->source, NULL); | 511 | r = dm_io(&io_req, 1, &job->source, NULL); |
394 | else | 512 | else |
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) | |||
695 | /*----------------------------------------------------------------- | 813 | /*----------------------------------------------------------------- |
696 | * Client setup | 814 | * Client setup |
697 | *---------------------------------------------------------------*/ | 815 | *---------------------------------------------------------------*/ |
698 | struct dm_kcopyd_client *dm_kcopyd_client_create(void) | 816 | struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) |
699 | { | 817 | { |
700 | int r = -ENOMEM; | 818 | int r = -ENOMEM; |
701 | struct dm_kcopyd_client *kc; | 819 | struct dm_kcopyd_client *kc; |
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void) | |||
708 | INIT_LIST_HEAD(&kc->complete_jobs); | 826 | INIT_LIST_HEAD(&kc->complete_jobs); |
709 | INIT_LIST_HEAD(&kc->io_jobs); | 827 | INIT_LIST_HEAD(&kc->io_jobs); |
710 | INIT_LIST_HEAD(&kc->pages_jobs); | 828 | INIT_LIST_HEAD(&kc->pages_jobs); |
829 | kc->throttle = throttle; | ||
711 | 830 | ||
712 | kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); | 831 | kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); |
713 | if (!kc->job_pool) | 832 | if (!kc->job_pool) |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e2ea97723e10..d053098c6a91 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -82,6 +82,9 @@ struct mirror_set { | |||
82 | struct mirror mirror[0]; | 82 | struct mirror mirror[0]; |
83 | }; | 83 | }; |
84 | 84 | ||
85 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle, | ||
86 | "A percentage of time allocated for raid resynchronization"); | ||
87 | |||
85 | static void wakeup_mirrord(void *context) | 88 | static void wakeup_mirrord(void *context) |
86 | { | 89 | { |
87 | struct mirror_set *ms = context; | 90 | struct mirror_set *ms = context; |
@@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1111 | goto err_destroy_wq; | 1114 | goto err_destroy_wq; |
1112 | } | 1115 | } |
1113 | 1116 | ||
1114 | ms->kcopyd_client = dm_kcopyd_client_create(); | 1117 | ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); |
1115 | if (IS_ERR(ms->kcopyd_client)) { | 1118 | if (IS_ERR(ms->kcopyd_client)) { |
1116 | r = PTR_ERR(ms->kcopyd_client); | 1119 | r = PTR_ERR(ms->kcopyd_client); |
1117 | goto err_destroy_wq; | 1120 | goto err_destroy_wq; |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 58c2b5881377..c0e07026a8d1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -124,6 +124,9 @@ struct dm_snapshot { | |||
124 | #define RUNNING_MERGE 0 | 124 | #define RUNNING_MERGE 0 |
125 | #define SHUTDOWN_MERGE 1 | 125 | #define SHUTDOWN_MERGE 1 |
126 | 126 | ||
127 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, | ||
128 | "A percentage of time allocated for copy on write"); | ||
129 | |||
127 | struct dm_dev *dm_snap_origin(struct dm_snapshot *s) | 130 | struct dm_dev *dm_snap_origin(struct dm_snapshot *s) |
128 | { | 131 | { |
129 | return s->origin; | 132 | return s->origin; |
@@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1108 | goto bad_hash_tables; | 1111 | goto bad_hash_tables; |
1109 | } | 1112 | } |
1110 | 1113 | ||
1111 | s->kcopyd_client = dm_kcopyd_client_create(); | 1114 | s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); |
1112 | if (IS_ERR(s->kcopyd_client)) { | 1115 | if (IS_ERR(s->kcopyd_client)) { |
1113 | r = PTR_ERR(s->kcopyd_client); | 1116 | r = PTR_ERR(s->kcopyd_client); |
1114 | ti->error = "Could not create kcopyd client"; | 1117 | ti->error = "Could not create kcopyd client"; |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 303e11da7c2a..35d9d0396cc2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -26,6 +26,9 @@ | |||
26 | #define PRISON_CELLS 1024 | 26 | #define PRISON_CELLS 1024 |
27 | #define COMMIT_PERIOD HZ | 27 | #define COMMIT_PERIOD HZ |
28 | 28 | ||
29 | DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, | ||
30 | "A percentage of time allocated for copy on write"); | ||
31 | |||
29 | /* | 32 | /* |
30 | * The block size of the device holding pool data must be | 33 | * The block size of the device holding pool data must be |
31 | * between 64KB and 1GB. | 34 | * between 64KB and 1GB. |
@@ -1642,7 +1645,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1642 | goto bad_prison; | 1645 | goto bad_prison; |
1643 | } | 1646 | } |
1644 | 1647 | ||
1645 | pool->copier = dm_kcopyd_client_create(); | 1648 | pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); |
1646 | if (IS_ERR(pool->copier)) { | 1649 | if (IS_ERR(pool->copier)) { |
1647 | r = PTR_ERR(pool->copier); | 1650 | r = PTR_ERR(pool->copier); |
1648 | *error = "Error creating pool's kcopyd client"; | 1651 | *error = "Error creating pool's kcopyd client"; |