diff options
author | Kiyoshi Ueda <k-ueda@ct.jp.nec.com> | 2009-06-22 05:12:27 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2009-06-22 05:12:27 -0400 |
commit | fd5e033908b7b743b5650790f196761dd930f988 (patch) | |
tree | cd264d11371b9410e98388e00f4d87459a5e6325 | |
parent | 02ab823fd1a27d193bda06b74fdad685a20a3e5e (diff) |
dm mpath: add queue length load balancer
This patch adds a dynamic load balancer, dm-queue-length, which
balances the number of in-flight I/Os across the paths.
The code is based on the patch posted by Stefan Bader:
https://www.redhat.com/archives/dm-devel/2005-October/msg00050.html
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r-- | Documentation/device-mapper/dm-queue-length.txt | 39 | ||||
-rw-r--r-- | drivers/md/Kconfig | 9 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-queue-length.c | 263 |
4 files changed, 312 insertions, 0 deletions
diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt new file mode 100644 index 000000000000..f4db2562175c --- /dev/null +++ b/Documentation/device-mapper/dm-queue-length.txt | |||
@@ -0,0 +1,39 @@ | |||
1 | dm-queue-length | ||
2 | =============== | ||
3 | |||
4 | dm-queue-length is a path selector module for device-mapper targets, | ||
5 | which selects a path with the least number of in-flight I/Os. | ||
6 | The path selector name is 'queue-length'. | ||
7 | |||
8 | Table parameters for each path: [<repeat_count>] | ||
9 | <repeat_count>: The number of I/Os to dispatch using the selected | ||
10 | path before switching to the next path. | ||
11 | If not given, internal default is used. To check | ||
12 | the default value, see the activated table. | ||
13 | |||
14 | Status for each path: <status> <fail-count> <in-flight> | ||
15 | <status>: 'A' if the path is active, 'F' if the path is failed. | ||
16 | <fail-count>: The number of path failures. | ||
17 | <in-flight>: The number of in-flight I/Os on the path. | ||
18 | |||
19 | |||
20 | Algorithm | ||
21 | ========= | ||
22 | |||
23 | dm-queue-length increments/decrements 'in-flight' when an I/O is | ||
24 | dispatched/completed respectively. | ||
25 | dm-queue-length selects a path with the minimum 'in-flight'. | ||
26 | |||
27 | |||
28 | Examples | ||
29 | ======== | ||
30 | In case that 2 paths (sda and sdb) are used with repeat_count == 128. | ||
31 | |||
32 | # echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \ | ||
33 | dmsetup create test | ||
34 | # | ||
35 | # dmsetup table | ||
36 | test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128 | ||
37 | # | ||
38 | # dmsetup status | ||
39 | test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0 | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36e0675be9f7..3b311d273346 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -249,6 +249,15 @@ config DM_MULTIPATH | |||
249 | ---help--- | 249 | ---help--- |
250 | Allow volume managers to support multipath hardware. | 250 | Allow volume managers to support multipath hardware. |
251 | 251 | ||
252 | config DM_MULTIPATH_QL | ||
253 | tristate "I/O Path Selector based on the number of in-flight I/Os" | ||
254 | depends on DM_MULTIPATH | ||
255 | ---help--- | ||
256 | This path selector is a dynamic load balancer which selects | ||
257 | the path with the least number of in-flight I/Os. | ||
258 | |||
259 | If unsure, say N. | ||
260 | |||
252 | config DM_DELAY | 261 | config DM_DELAY |
253 | tristate "I/O delaying target (EXPERIMENTAL)" | 262 | tristate "I/O delaying target (EXPERIMENTAL)" |
254 | depends on BLK_DEV_DM && EXPERIMENTAL | 263 | depends on BLK_DEV_DM && EXPERIMENTAL |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 45cc5951d928..ff9f545dd516 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | |||
36 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 36 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
37 | obj-$(CONFIG_DM_DELAY) += dm-delay.o | 37 | obj-$(CONFIG_DM_DELAY) += dm-delay.o |
38 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o | 38 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o |
39 | obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o | ||
39 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 40 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
40 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o | 41 | obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o |
41 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 42 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c new file mode 100644 index 000000000000..f92b6cea9d9c --- /dev/null +++ b/drivers/md/dm-queue-length.c | |||
@@ -0,0 +1,263 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004-2005 IBM Corp. All Rights Reserved. | ||
3 | * Copyright (C) 2006-2009 NEC Corporation. | ||
4 | * | ||
5 | * dm-queue-length.c | ||
6 | * | ||
7 | * Module Author: Stefan Bader, IBM | ||
8 | * Modified by: Kiyoshi Ueda, NEC | ||
9 | * | ||
10 | * This file is released under the GPL. | ||
11 | * | ||
12 | * queue-length path selector - choose a path with the least number of | ||
13 | * in-flight I/Os. | ||
14 | */ | ||
15 | |||
16 | #include "dm.h" | ||
17 | #include "dm-path-selector.h" | ||
18 | |||
19 | #include <linux/slab.h> | ||
20 | #include <linux/ctype.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <asm/atomic.h> | ||
24 | |||
25 | #define DM_MSG_PREFIX "multipath queue-length" | ||
26 | #define QL_MIN_IO 128 | ||
27 | #define QL_VERSION "0.1.0" | ||
28 | |||
29 | struct selector { | ||
30 | struct list_head valid_paths; | ||
31 | struct list_head failed_paths; | ||
32 | }; | ||
33 | |||
34 | struct path_info { | ||
35 | struct list_head list; | ||
36 | struct dm_path *path; | ||
37 | unsigned repeat_count; | ||
38 | atomic_t qlen; /* the number of in-flight I/Os */ | ||
39 | }; | ||
40 | |||
41 | static struct selector *alloc_selector(void) | ||
42 | { | ||
43 | struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
44 | |||
45 | if (s) { | ||
46 | INIT_LIST_HEAD(&s->valid_paths); | ||
47 | INIT_LIST_HEAD(&s->failed_paths); | ||
48 | } | ||
49 | |||
50 | return s; | ||
51 | } | ||
52 | |||
53 | static int ql_create(struct path_selector *ps, unsigned argc, char **argv) | ||
54 | { | ||
55 | struct selector *s = alloc_selector(); | ||
56 | |||
57 | if (!s) | ||
58 | return -ENOMEM; | ||
59 | |||
60 | ps->context = s; | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static void ql_free_paths(struct list_head *paths) | ||
65 | { | ||
66 | struct path_info *pi, *next; | ||
67 | |||
68 | list_for_each_entry_safe(pi, next, paths, list) { | ||
69 | list_del(&pi->list); | ||
70 | kfree(pi); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | static void ql_destroy(struct path_selector *ps) | ||
75 | { | ||
76 | struct selector *s = ps->context; | ||
77 | |||
78 | ql_free_paths(&s->valid_paths); | ||
79 | ql_free_paths(&s->failed_paths); | ||
80 | kfree(s); | ||
81 | ps->context = NULL; | ||
82 | } | ||
83 | |||
84 | static int ql_status(struct path_selector *ps, struct dm_path *path, | ||
85 | status_type_t type, char *result, unsigned maxlen) | ||
86 | { | ||
87 | unsigned sz = 0; | ||
88 | struct path_info *pi; | ||
89 | |||
90 | /* When called with NULL path, return selector status/args. */ | ||
91 | if (!path) | ||
92 | DMEMIT("0 "); | ||
93 | else { | ||
94 | pi = path->pscontext; | ||
95 | |||
96 | switch (type) { | ||
97 | case STATUSTYPE_INFO: | ||
98 | DMEMIT("%d ", atomic_read(&pi->qlen)); | ||
99 | break; | ||
100 | case STATUSTYPE_TABLE: | ||
101 | DMEMIT("%u ", pi->repeat_count); | ||
102 | break; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | return sz; | ||
107 | } | ||
108 | |||
109 | static int ql_add_path(struct path_selector *ps, struct dm_path *path, | ||
110 | int argc, char **argv, char **error) | ||
111 | { | ||
112 | struct selector *s = ps->context; | ||
113 | struct path_info *pi; | ||
114 | unsigned repeat_count = QL_MIN_IO; | ||
115 | |||
116 | /* | ||
117 | * Arguments: [<repeat_count>] | ||
118 | * <repeat_count>: The number of I/Os before switching path. | ||
119 | * If not given, default (QL_MIN_IO) is used. | ||
120 | */ | ||
121 | if (argc > 1) { | ||
122 | *error = "queue-length ps: incorrect number of arguments"; | ||
123 | return -EINVAL; | ||
124 | } | ||
125 | |||
126 | if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) { | ||
127 | *error = "queue-length ps: invalid repeat count"; | ||
128 | return -EINVAL; | ||
129 | } | ||
130 | |||
131 | /* Allocate the path information structure */ | ||
132 | pi = kmalloc(sizeof(*pi), GFP_KERNEL); | ||
133 | if (!pi) { | ||
134 | *error = "queue-length ps: Error allocating path information"; | ||
135 | return -ENOMEM; | ||
136 | } | ||
137 | |||
138 | pi->path = path; | ||
139 | pi->repeat_count = repeat_count; | ||
140 | atomic_set(&pi->qlen, 0); | ||
141 | |||
142 | path->pscontext = pi; | ||
143 | |||
144 | list_add_tail(&pi->list, &s->valid_paths); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static void ql_fail_path(struct path_selector *ps, struct dm_path *path) | ||
150 | { | ||
151 | struct selector *s = ps->context; | ||
152 | struct path_info *pi = path->pscontext; | ||
153 | |||
154 | list_move(&pi->list, &s->failed_paths); | ||
155 | } | ||
156 | |||
157 | static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path) | ||
158 | { | ||
159 | struct selector *s = ps->context; | ||
160 | struct path_info *pi = path->pscontext; | ||
161 | |||
162 | list_move_tail(&pi->list, &s->valid_paths); | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Select a path having the minimum number of in-flight I/Os | ||
169 | */ | ||
170 | static struct dm_path *ql_select_path(struct path_selector *ps, | ||
171 | unsigned *repeat_count, size_t nr_bytes) | ||
172 | { | ||
173 | struct selector *s = ps->context; | ||
174 | struct path_info *pi = NULL, *best = NULL; | ||
175 | |||
176 | if (list_empty(&s->valid_paths)) | ||
177 | return NULL; | ||
178 | |||
179 | /* Change preferred (first in list) path to evenly balance. */ | ||
180 | list_move_tail(s->valid_paths.next, &s->valid_paths); | ||
181 | |||
182 | list_for_each_entry(pi, &s->valid_paths, list) { | ||
183 | if (!best || | ||
184 | (atomic_read(&pi->qlen) < atomic_read(&best->qlen))) | ||
185 | best = pi; | ||
186 | |||
187 | if (!atomic_read(&best->qlen)) | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | if (!best) | ||
192 | return NULL; | ||
193 | |||
194 | *repeat_count = best->repeat_count; | ||
195 | |||
196 | return best->path; | ||
197 | } | ||
198 | |||
199 | static int ql_start_io(struct path_selector *ps, struct dm_path *path, | ||
200 | size_t nr_bytes) | ||
201 | { | ||
202 | struct path_info *pi = path->pscontext; | ||
203 | |||
204 | atomic_inc(&pi->qlen); | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static int ql_end_io(struct path_selector *ps, struct dm_path *path, | ||
210 | size_t nr_bytes) | ||
211 | { | ||
212 | struct path_info *pi = path->pscontext; | ||
213 | |||
214 | atomic_dec(&pi->qlen); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static struct path_selector_type ql_ps = { | ||
220 | .name = "queue-length", | ||
221 | .module = THIS_MODULE, | ||
222 | .table_args = 1, | ||
223 | .info_args = 1, | ||
224 | .create = ql_create, | ||
225 | .destroy = ql_destroy, | ||
226 | .status = ql_status, | ||
227 | .add_path = ql_add_path, | ||
228 | .fail_path = ql_fail_path, | ||
229 | .reinstate_path = ql_reinstate_path, | ||
230 | .select_path = ql_select_path, | ||
231 | .start_io = ql_start_io, | ||
232 | .end_io = ql_end_io, | ||
233 | }; | ||
234 | |||
235 | static int __init dm_ql_init(void) | ||
236 | { | ||
237 | int r = dm_register_path_selector(&ql_ps); | ||
238 | |||
239 | if (r < 0) | ||
240 | DMERR("register failed %d", r); | ||
241 | |||
242 | DMINFO("version " QL_VERSION " loaded"); | ||
243 | |||
244 | return r; | ||
245 | } | ||
246 | |||
247 | static void __exit dm_ql_exit(void) | ||
248 | { | ||
249 | int r = dm_unregister_path_selector(&ql_ps); | ||
250 | |||
251 | if (r < 0) | ||
252 | DMERR("unregister failed %d", r); | ||
253 | } | ||
254 | |||
255 | module_init(dm_ql_init); | ||
256 | module_exit(dm_ql_exit); | ||
257 | |||
258 | MODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>"); | ||
259 | MODULE_DESCRIPTION( | ||
260 | "(C) Copyright IBM Corp. 2004,2005 All Rights Reserved.\n" | ||
261 | DM_NAME " path selector to balance the number of in-flight I/Os" | ||
262 | ); | ||
263 | MODULE_LICENSE("GPL"); | ||