diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2017-06-22 13:44:05 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2017-07-07 11:25:19 -0400 |
commit | 5cf9c4a9959b6273675310d14a834ef14fbca37c (patch) | |
tree | 8ea2729271f9bcb06f6b1448ac066e57e1e21d27 | |
parent | 069f3222ca96acfe8c59937e98c401bda5475b48 (diff) |
libceph, crush: per-pool crush_choose_arg_map for crush_do_rule()
If there is no crush_choose_arg_map for a given pool, a NULL pointer is
passed to preserve existing crush_do_rule() behavior.
Reflects ceph.git commits 55fb91d64071552ea1bc65ab4ea84d3c8b73ab4b,
dbe36e08be00c6519a8c89718dd47b0219c20516.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r-- | include/linux/crush/crush.h | 8 | ||||
-rw-r--r-- | net/ceph/crush/crush.c | 3 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 200 |
3 files changed, 208 insertions, 3 deletions
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index d8676e56fa23..92e165d417a6 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define CEPH_CRUSH_CRUSH_H | 2 | #define CEPH_CRUSH_CRUSH_H |
3 | 3 | ||
4 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
5 | # include <linux/rbtree.h> | ||
5 | # include <linux/types.h> | 6 | # include <linux/types.h> |
6 | #else | 7 | #else |
7 | # include "crush_compat.h" | 8 | # include "crush_compat.h" |
@@ -190,6 +191,10 @@ struct crush_choose_arg { | |||
190 | * | 191 | * |
191 | */ | 192 | */ |
192 | struct crush_choose_arg_map { | 193 | struct crush_choose_arg_map { |
194 | #ifdef __KERNEL__ | ||
195 | struct rb_node node; | ||
196 | u64 choose_args_index; | ||
197 | #endif | ||
193 | struct crush_choose_arg *args; /*!< replacement for each bucket | 198 | struct crush_choose_arg *args; /*!< replacement for each bucket |
194 | in the crushmap */ | 199 | in the crushmap */ |
195 | __u32 size; /*!< size of the __args__ array */ | 200 | __u32 size; /*!< size of the __args__ array */ |
@@ -294,6 +299,9 @@ struct crush_map { | |||
294 | __u32 allowed_bucket_algs; | 299 | __u32 allowed_bucket_algs; |
295 | 300 | ||
296 | __u32 *choose_tries; | 301 | __u32 *choose_tries; |
302 | #else | ||
303 | /* CrushWrapper::choose_args */ | ||
304 | struct rb_root choose_args; | ||
297 | #endif | 305 | #endif |
298 | }; | 306 | }; |
299 | 307 | ||
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 5bf94c04f645..4b428f46a8ca 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifdef __KERNEL__ | 1 | #ifdef __KERNEL__ |
2 | # include <linux/slab.h> | 2 | # include <linux/slab.h> |
3 | # include <linux/crush/crush.h> | 3 | # include <linux/crush/crush.h> |
4 | void clear_choose_args(struct crush_map *c); | ||
4 | #else | 5 | #else |
5 | # include "crush_compat.h" | 6 | # include "crush_compat.h" |
6 | # include "crush.h" | 7 | # include "crush.h" |
@@ -127,6 +128,8 @@ void crush_destroy(struct crush_map *map) | |||
127 | 128 | ||
128 | #ifndef __KERNEL__ | 129 | #ifndef __KERNEL__ |
129 | kfree(map->choose_tries); | 130 | kfree(map->choose_tries); |
131 | #else | ||
132 | clear_choose_args(map); | ||
130 | #endif | 133 | #endif |
131 | kfree(map); | 134 | kfree(map); |
132 | } | 135 | } |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 9da0ee61aca5..f630d1072299 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -138,6 +138,177 @@ bad: | |||
138 | return -EINVAL; | 138 | return -EINVAL; |
139 | } | 139 | } |
140 | 140 | ||
141 | static struct crush_choose_arg_map *alloc_choose_arg_map(void) | ||
142 | { | ||
143 | struct crush_choose_arg_map *arg_map; | ||
144 | |||
145 | arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO); | ||
146 | if (!arg_map) | ||
147 | return NULL; | ||
148 | |||
149 | RB_CLEAR_NODE(&arg_map->node); | ||
150 | return arg_map; | ||
151 | } | ||
152 | |||
153 | static void free_choose_arg_map(struct crush_choose_arg_map *arg_map) | ||
154 | { | ||
155 | if (arg_map) { | ||
156 | int i, j; | ||
157 | |||
158 | WARN_ON(!RB_EMPTY_NODE(&arg_map->node)); | ||
159 | |||
160 | for (i = 0; i < arg_map->size; i++) { | ||
161 | struct crush_choose_arg *arg = &arg_map->args[i]; | ||
162 | |||
163 | for (j = 0; j < arg->weight_set_size; j++) | ||
164 | kfree(arg->weight_set[j].weights); | ||
165 | kfree(arg->weight_set); | ||
166 | kfree(arg->ids); | ||
167 | } | ||
168 | kfree(arg_map->args); | ||
169 | kfree(arg_map); | ||
170 | } | ||
171 | } | ||
172 | |||
173 | DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index, | ||
174 | node); | ||
175 | |||
176 | void clear_choose_args(struct crush_map *c) | ||
177 | { | ||
178 | while (!RB_EMPTY_ROOT(&c->choose_args)) { | ||
179 | struct crush_choose_arg_map *arg_map = | ||
180 | rb_entry(rb_first(&c->choose_args), | ||
181 | struct crush_choose_arg_map, node); | ||
182 | |||
183 | erase_choose_arg_map(&c->choose_args, arg_map); | ||
184 | free_choose_arg_map(arg_map); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen) | ||
189 | { | ||
190 | u32 *a = NULL; | ||
191 | u32 len; | ||
192 | int ret; | ||
193 | |||
194 | ceph_decode_32_safe(p, end, len, e_inval); | ||
195 | if (len) { | ||
196 | u32 i; | ||
197 | |||
198 | a = kmalloc_array(len, sizeof(u32), GFP_NOIO); | ||
199 | if (!a) { | ||
200 | ret = -ENOMEM; | ||
201 | goto fail; | ||
202 | } | ||
203 | |||
204 | ceph_decode_need(p, end, len * sizeof(u32), e_inval); | ||
205 | for (i = 0; i < len; i++) | ||
206 | a[i] = ceph_decode_32(p); | ||
207 | } | ||
208 | |||
209 | *plen = len; | ||
210 | return a; | ||
211 | |||
212 | e_inval: | ||
213 | ret = -EINVAL; | ||
214 | fail: | ||
215 | kfree(a); | ||
216 | return ERR_PTR(ret); | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Assumes @arg is zero-initialized. | ||
221 | */ | ||
222 | static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg) | ||
223 | { | ||
224 | int ret; | ||
225 | |||
226 | ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval); | ||
227 | if (arg->weight_set_size) { | ||
228 | u32 i; | ||
229 | |||
230 | arg->weight_set = kmalloc_array(arg->weight_set_size, | ||
231 | sizeof(*arg->weight_set), | ||
232 | GFP_NOIO); | ||
233 | if (!arg->weight_set) | ||
234 | return -ENOMEM; | ||
235 | |||
236 | for (i = 0; i < arg->weight_set_size; i++) { | ||
237 | struct crush_weight_set *w = &arg->weight_set[i]; | ||
238 | |||
239 | w->weights = decode_array_32_alloc(p, end, &w->size); | ||
240 | if (IS_ERR(w->weights)) { | ||
241 | ret = PTR_ERR(w->weights); | ||
242 | w->weights = NULL; | ||
243 | return ret; | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | arg->ids = decode_array_32_alloc(p, end, &arg->ids_size); | ||
249 | if (IS_ERR(arg->ids)) { | ||
250 | ret = PTR_ERR(arg->ids); | ||
251 | arg->ids = NULL; | ||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | return 0; | ||
256 | |||
257 | e_inval: | ||
258 | return -EINVAL; | ||
259 | } | ||
260 | |||
261 | static int decode_choose_args(void **p, void *end, struct crush_map *c) | ||
262 | { | ||
263 | struct crush_choose_arg_map *arg_map = NULL; | ||
264 | u32 num_choose_arg_maps, num_buckets; | ||
265 | int ret; | ||
266 | |||
267 | ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval); | ||
268 | while (num_choose_arg_maps--) { | ||
269 | arg_map = alloc_choose_arg_map(); | ||
270 | if (!arg_map) { | ||
271 | ret = -ENOMEM; | ||
272 | goto fail; | ||
273 | } | ||
274 | |||
275 | ceph_decode_64_safe(p, end, arg_map->choose_args_index, | ||
276 | e_inval); | ||
277 | arg_map->size = c->max_buckets; | ||
278 | arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args), | ||
279 | GFP_NOIO); | ||
280 | if (!arg_map->args) { | ||
281 | ret = -ENOMEM; | ||
282 | goto fail; | ||
283 | } | ||
284 | |||
285 | ceph_decode_32_safe(p, end, num_buckets, e_inval); | ||
286 | while (num_buckets--) { | ||
287 | struct crush_choose_arg *arg; | ||
288 | u32 bucket_index; | ||
289 | |||
290 | ceph_decode_32_safe(p, end, bucket_index, e_inval); | ||
291 | if (bucket_index >= arg_map->size) | ||
292 | goto e_inval; | ||
293 | |||
294 | arg = &arg_map->args[bucket_index]; | ||
295 | ret = decode_choose_arg(p, end, arg); | ||
296 | if (ret) | ||
297 | goto fail; | ||
298 | } | ||
299 | |||
300 | insert_choose_arg_map(&c->choose_args, arg_map); | ||
301 | } | ||
302 | |||
303 | return 0; | ||
304 | |||
305 | e_inval: | ||
306 | ret = -EINVAL; | ||
307 | fail: | ||
308 | free_choose_arg_map(arg_map); | ||
309 | return ret; | ||
310 | } | ||
311 | |||
141 | static void crush_finalize(struct crush_map *c) | 312 | static void crush_finalize(struct crush_map *c) |
142 | { | 313 | { |
143 | __s32 b; | 314 | __s32 b; |
@@ -179,6 +350,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
179 | if (c == NULL) | 350 | if (c == NULL) |
180 | return ERR_PTR(-ENOMEM); | 351 | return ERR_PTR(-ENOMEM); |
181 | 352 | ||
353 | c->choose_args = RB_ROOT; | ||
354 | |||
182 | /* set tunables to default values */ | 355 | /* set tunables to default values */ |
183 | c->choose_local_tries = 2; | 356 | c->choose_local_tries = 2; |
184 | c->choose_local_fallback_tries = 5; | 357 | c->choose_local_fallback_tries = 5; |
@@ -372,6 +545,21 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
372 | dout("crush decode tunable chooseleaf_stable = %d\n", | 545 | dout("crush decode tunable chooseleaf_stable = %d\n", |
373 | c->chooseleaf_stable); | 546 | c->chooseleaf_stable); |
374 | 547 | ||
548 | if (*p != end) { | ||
549 | /* class_map */ | ||
550 | ceph_decode_skip_map(p, end, 32, 32, bad); | ||
551 | /* class_name */ | ||
552 | ceph_decode_skip_map(p, end, 32, string, bad); | ||
553 | /* class_bucket */ | ||
554 | ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad); | ||
555 | } | ||
556 | |||
557 | if (*p != end) { | ||
558 | err = decode_choose_args(p, end, c); | ||
559 | if (err) | ||
560 | goto bad; | ||
561 | } | ||
562 | |||
375 | done: | 563 | done: |
376 | crush_finalize(c); | 564 | crush_finalize(c); |
377 | dout("crush_decode success\n"); | 565 | dout("crush_decode success\n"); |
@@ -2103,15 +2291,21 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi, | |||
2103 | 2291 | ||
2104 | static int do_crush(struct ceph_osdmap *map, int ruleno, int x, | 2292 | static int do_crush(struct ceph_osdmap *map, int ruleno, int x, |
2105 | int *result, int result_max, | 2293 | int *result, int result_max, |
2106 | const __u32 *weight, int weight_max) | 2294 | const __u32 *weight, int weight_max, |
2295 | u64 choose_args_index) | ||
2107 | { | 2296 | { |
2297 | struct crush_choose_arg_map *arg_map; | ||
2108 | int r; | 2298 | int r; |
2109 | 2299 | ||
2110 | BUG_ON(result_max > CEPH_PG_MAX_SIZE); | 2300 | BUG_ON(result_max > CEPH_PG_MAX_SIZE); |
2111 | 2301 | ||
2302 | arg_map = lookup_choose_arg_map(&map->crush->choose_args, | ||
2303 | choose_args_index); | ||
2304 | |||
2112 | mutex_lock(&map->crush_workspace_mutex); | 2305 | mutex_lock(&map->crush_workspace_mutex); |
2113 | r = crush_do_rule(map->crush, ruleno, x, result, result_max, | 2306 | r = crush_do_rule(map->crush, ruleno, x, result, result_max, |
2114 | weight, weight_max, map->crush_workspace, NULL); | 2307 | weight, weight_max, map->crush_workspace, |
2308 | arg_map ? arg_map->args : NULL); | ||
2115 | mutex_unlock(&map->crush_workspace_mutex); | 2309 | mutex_unlock(&map->crush_workspace_mutex); |
2116 | 2310 | ||
2117 | return r; | 2311 | return r; |
@@ -2181,7 +2375,7 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap, | |||
2181 | } | 2375 | } |
2182 | 2376 | ||
2183 | len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size, | 2377 | len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size, |
2184 | osdmap->osd_weight, osdmap->max_osd); | 2378 | osdmap->osd_weight, osdmap->max_osd, pi->id); |
2185 | if (len < 0) { | 2379 | if (len < 0) { |
2186 | pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", | 2380 | pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", |
2187 | len, ruleno, pi->id, pi->crush_ruleset, pi->type, | 2381 | len, ruleno, pi->id, pi->crush_ruleset, pi->type, |