aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2017-06-22 13:44:05 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-07-07 11:25:19 -0400
commit5cf9c4a9959b6273675310d14a834ef14fbca37c (patch)
tree8ea2729271f9bcb06f6b1448ac066e57e1e21d27
parent069f3222ca96acfe8c59937e98c401bda5475b48 (diff)
libceph, crush: per-pool crush_choose_arg_map for crush_do_rule()
If there is no crush_choose_arg_map for a given pool, a NULL pointer is passed to preserve existing crush_do_rule() behavior. Reflects ceph.git commits 55fb91d64071552ea1bc65ab4ea84d3c8b73ab4b, dbe36e08be00c6519a8c89718dd47b0219c20516. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--include/linux/crush/crush.h8
-rw-r--r--net/ceph/crush/crush.c3
-rw-r--r--net/ceph/osdmap.c200
3 files changed, 208 insertions, 3 deletions
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index d8676e56fa23..92e165d417a6 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -2,6 +2,7 @@
2#define CEPH_CRUSH_CRUSH_H 2#define CEPH_CRUSH_CRUSH_H
3 3
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5# include <linux/rbtree.h>
5# include <linux/types.h> 6# include <linux/types.h>
6#else 7#else
7# include "crush_compat.h" 8# include "crush_compat.h"
@@ -190,6 +191,10 @@ struct crush_choose_arg {
190 * 191 *
191 */ 192 */
192struct crush_choose_arg_map { 193struct crush_choose_arg_map {
194#ifdef __KERNEL__
195 struct rb_node node;
196 u64 choose_args_index;
197#endif
193 struct crush_choose_arg *args; /*!< replacement for each bucket 198 struct crush_choose_arg *args; /*!< replacement for each bucket
194 in the crushmap */ 199 in the crushmap */
195 __u32 size; /*!< size of the __args__ array */ 200 __u32 size; /*!< size of the __args__ array */
@@ -294,6 +299,9 @@ struct crush_map {
294 __u32 allowed_bucket_algs; 299 __u32 allowed_bucket_algs;
295 300
296 __u32 *choose_tries; 301 __u32 *choose_tries;
302#else
303 /* CrushWrapper::choose_args */
304 struct rb_root choose_args;
297#endif 305#endif
298}; 306};
299 307
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 5bf94c04f645..4b428f46a8ca 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,6 +1,7 @@
1#ifdef __KERNEL__ 1#ifdef __KERNEL__
2# include <linux/slab.h> 2# include <linux/slab.h>
3# include <linux/crush/crush.h> 3# include <linux/crush/crush.h>
4void clear_choose_args(struct crush_map *c);
4#else 5#else
5# include "crush_compat.h" 6# include "crush_compat.h"
6# include "crush.h" 7# include "crush.h"
@@ -127,6 +128,8 @@ void crush_destroy(struct crush_map *map)
127 128
128#ifndef __KERNEL__ 129#ifndef __KERNEL__
129 kfree(map->choose_tries); 130 kfree(map->choose_tries);
131#else
132 clear_choose_args(map);
130#endif 133#endif
131 kfree(map); 134 kfree(map);
132} 135}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9da0ee61aca5..f630d1072299 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -138,6 +138,177 @@ bad:
138 return -EINVAL; 138 return -EINVAL;
139} 139}
140 140
141static struct crush_choose_arg_map *alloc_choose_arg_map(void)
142{
143 struct crush_choose_arg_map *arg_map;
144
145 arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
146 if (!arg_map)
147 return NULL;
148
149 RB_CLEAR_NODE(&arg_map->node);
150 return arg_map;
151}
152
153static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
154{
155 if (arg_map) {
156 int i, j;
157
158 WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
159
160 for (i = 0; i < arg_map->size; i++) {
161 struct crush_choose_arg *arg = &arg_map->args[i];
162
163 for (j = 0; j < arg->weight_set_size; j++)
164 kfree(arg->weight_set[j].weights);
165 kfree(arg->weight_set);
166 kfree(arg->ids);
167 }
168 kfree(arg_map->args);
169 kfree(arg_map);
170 }
171}
172
173DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
174 node);
175
176void clear_choose_args(struct crush_map *c)
177{
178 while (!RB_EMPTY_ROOT(&c->choose_args)) {
179 struct crush_choose_arg_map *arg_map =
180 rb_entry(rb_first(&c->choose_args),
181 struct crush_choose_arg_map, node);
182
183 erase_choose_arg_map(&c->choose_args, arg_map);
184 free_choose_arg_map(arg_map);
185 }
186}
187
188static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
189{
190 u32 *a = NULL;
191 u32 len;
192 int ret;
193
194 ceph_decode_32_safe(p, end, len, e_inval);
195 if (len) {
196 u32 i;
197
198 a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
199 if (!a) {
200 ret = -ENOMEM;
201 goto fail;
202 }
203
204 ceph_decode_need(p, end, len * sizeof(u32), e_inval);
205 for (i = 0; i < len; i++)
206 a[i] = ceph_decode_32(p);
207 }
208
209 *plen = len;
210 return a;
211
212e_inval:
213 ret = -EINVAL;
214fail:
215 kfree(a);
216 return ERR_PTR(ret);
217}
218
219/*
220 * Assumes @arg is zero-initialized.
221 */
222static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
223{
224 int ret;
225
226 ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
227 if (arg->weight_set_size) {
228 u32 i;
229
230 arg->weight_set = kmalloc_array(arg->weight_set_size,
231 sizeof(*arg->weight_set),
232 GFP_NOIO);
233 if (!arg->weight_set)
234 return -ENOMEM;
235
236 for (i = 0; i < arg->weight_set_size; i++) {
237 struct crush_weight_set *w = &arg->weight_set[i];
238
239 w->weights = decode_array_32_alloc(p, end, &w->size);
240 if (IS_ERR(w->weights)) {
241 ret = PTR_ERR(w->weights);
242 w->weights = NULL;
243 return ret;
244 }
245 }
246 }
247
248 arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
249 if (IS_ERR(arg->ids)) {
250 ret = PTR_ERR(arg->ids);
251 arg->ids = NULL;
252 return ret;
253 }
254
255 return 0;
256
257e_inval:
258 return -EINVAL;
259}
260
261static int decode_choose_args(void **p, void *end, struct crush_map *c)
262{
263 struct crush_choose_arg_map *arg_map = NULL;
264 u32 num_choose_arg_maps, num_buckets;
265 int ret;
266
267 ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
268 while (num_choose_arg_maps--) {
269 arg_map = alloc_choose_arg_map();
270 if (!arg_map) {
271 ret = -ENOMEM;
272 goto fail;
273 }
274
275 ceph_decode_64_safe(p, end, arg_map->choose_args_index,
276 e_inval);
277 arg_map->size = c->max_buckets;
278 arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
279 GFP_NOIO);
280 if (!arg_map->args) {
281 ret = -ENOMEM;
282 goto fail;
283 }
284
285 ceph_decode_32_safe(p, end, num_buckets, e_inval);
286 while (num_buckets--) {
287 struct crush_choose_arg *arg;
288 u32 bucket_index;
289
290 ceph_decode_32_safe(p, end, bucket_index, e_inval);
291 if (bucket_index >= arg_map->size)
292 goto e_inval;
293
294 arg = &arg_map->args[bucket_index];
295 ret = decode_choose_arg(p, end, arg);
296 if (ret)
297 goto fail;
298 }
299
300 insert_choose_arg_map(&c->choose_args, arg_map);
301 }
302
303 return 0;
304
305e_inval:
306 ret = -EINVAL;
307fail:
308 free_choose_arg_map(arg_map);
309 return ret;
310}
311
141static void crush_finalize(struct crush_map *c) 312static void crush_finalize(struct crush_map *c)
142{ 313{
143 __s32 b; 314 __s32 b;
@@ -179,6 +350,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
179 if (c == NULL) 350 if (c == NULL)
180 return ERR_PTR(-ENOMEM); 351 return ERR_PTR(-ENOMEM);
181 352
353 c->choose_args = RB_ROOT;
354
182 /* set tunables to default values */ 355 /* set tunables to default values */
183 c->choose_local_tries = 2; 356 c->choose_local_tries = 2;
184 c->choose_local_fallback_tries = 5; 357 c->choose_local_fallback_tries = 5;
@@ -372,6 +545,21 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
372 dout("crush decode tunable chooseleaf_stable = %d\n", 545 dout("crush decode tunable chooseleaf_stable = %d\n",
373 c->chooseleaf_stable); 546 c->chooseleaf_stable);
374 547
548 if (*p != end) {
549 /* class_map */
550 ceph_decode_skip_map(p, end, 32, 32, bad);
551 /* class_name */
552 ceph_decode_skip_map(p, end, 32, string, bad);
553 /* class_bucket */
554 ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
555 }
556
557 if (*p != end) {
558 err = decode_choose_args(p, end, c);
559 if (err)
560 goto bad;
561 }
562
375done: 563done:
376 crush_finalize(c); 564 crush_finalize(c);
377 dout("crush_decode success\n"); 565 dout("crush_decode success\n");
@@ -2103,15 +2291,21 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
2103 2291
2104static int do_crush(struct ceph_osdmap *map, int ruleno, int x, 2292static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
2105 int *result, int result_max, 2293 int *result, int result_max,
2106 const __u32 *weight, int weight_max) 2294 const __u32 *weight, int weight_max,
2295 u64 choose_args_index)
2107{ 2296{
2297 struct crush_choose_arg_map *arg_map;
2108 int r; 2298 int r;
2109 2299
2110 BUG_ON(result_max > CEPH_PG_MAX_SIZE); 2300 BUG_ON(result_max > CEPH_PG_MAX_SIZE);
2111 2301
2302 arg_map = lookup_choose_arg_map(&map->crush->choose_args,
2303 choose_args_index);
2304
2112 mutex_lock(&map->crush_workspace_mutex); 2305 mutex_lock(&map->crush_workspace_mutex);
2113 r = crush_do_rule(map->crush, ruleno, x, result, result_max, 2306 r = crush_do_rule(map->crush, ruleno, x, result, result_max,
2114 weight, weight_max, map->crush_workspace, NULL); 2307 weight, weight_max, map->crush_workspace,
2308 arg_map ? arg_map->args : NULL);
2115 mutex_unlock(&map->crush_workspace_mutex); 2309 mutex_unlock(&map->crush_workspace_mutex);
2116 2310
2117 return r; 2311 return r;
@@ -2181,7 +2375,7 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
2181 } 2375 }
2182 2376
2183 len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size, 2377 len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
2184 osdmap->osd_weight, osdmap->max_osd); 2378 osdmap->osd_weight, osdmap->max_osd, pi->id);
2185 if (len < 0) { 2379 if (len < 0) {
2186 pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", 2380 pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
2187 len, ruleno, pi->id, pi->crush_ruleset, pi->type, 2381 len, ruleno, pi->id, pi->crush_ruleset, pi->type,