aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-07-30 21:15:23 -0400
committerSage Weil <sage@inktank.com>2012-07-30 21:15:23 -0400
commit546f04ef716dd49521774653d8b032a7d64c05d9 (patch)
tree162f548fc7a81b05eb1db715997b3a04693c1bcc /net/ceph
parent1fe60e51a3744528f3939b1b1167ca909133d9ae (diff)
libceph: support crush tunables
The server side recently added support for tuning some magic crush variables. Decode these variables if they are present, or use the default values if they are not present. Corresponds to ceph.git commit 89af369c25f274fe62ef730e5e8aad0c54f1e5a5. Signed-off-by: caleb miles <caleb.miles@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com> Reviewed-by: Alex Elder <elder@inktank.com> Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/crush/mapper.c13
-rw-r--r--net/ceph/osdmap.c39
2 files changed, 46 insertions, 6 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index d7edc24333b8..35fce755ce10 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map,
306 int item = 0; 306 int item = 0;
307 int itemtype; 307 int itemtype;
308 int collide, reject; 308 int collide, reject;
309 const unsigned int orig_tries = 5; /* attempts before we fall back to search */
310 309
311 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", 310 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
312 bucket->id, x, outpos, numrep); 311 bucket->id, x, outpos, numrep);
@@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map,
351 reject = 1; 350 reject = 1;
352 goto reject; 351 goto reject;
353 } 352 }
354 if (flocal >= (in->size>>1) && 353 if (map->choose_local_fallback_tries > 0 &&
355 flocal > orig_tries) 354 flocal >= (in->size>>1) &&
355 flocal > map->choose_local_fallback_tries)
356 item = bucket_perm_choose(in, x, r); 356 item = bucket_perm_choose(in, x, r);
357 else 357 else
358 item = crush_bucket_choose(in, x, r); 358 item = crush_bucket_choose(in, x, r);
@@ -422,13 +422,14 @@ reject:
422 ftotal++; 422 ftotal++;
423 flocal++; 423 flocal++;
424 424
425 if (collide && flocal < 3) 425 if (collide && flocal <= map->choose_local_tries)
426 /* retry locally a few times */ 426 /* retry locally a few times */
427 retry_bucket = 1; 427 retry_bucket = 1;
428 else if (flocal <= in->size + orig_tries) 428 else if (map->choose_local_fallback_tries > 0 &&
429 flocal <= in->size + map->choose_local_fallback_tries)
429 /* exhaustive bucket search */ 430 /* exhaustive bucket search */
430 retry_bucket = 1; 431 retry_bucket = 1;
431 else if (ftotal < 20) 432 else if (ftotal <= map->choose_total_tries)
432 /* then retry descent */ 433 /* then retry descent */
433 retry_descent = 1; 434 retry_descent = 1;
434 else 435 else
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9600674c2c39..3124b71a8883 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -135,6 +135,21 @@ bad:
135 return -EINVAL; 135 return -EINVAL;
136} 136}
137 137
138static int skip_name_map(void **p, void *end)
139{
140 int len;
141 ceph_decode_32_safe(p, end, len ,bad);
142 while (len--) {
143 int strlen;
144 *p += sizeof(u32);
145 ceph_decode_32_safe(p, end, strlen, bad);
146 *p += strlen;
147}
148 return 0;
149bad:
150 return -EINVAL;
151}
152
138static struct crush_map *crush_decode(void *pbyval, void *end) 153static struct crush_map *crush_decode(void *pbyval, void *end)
139{ 154{
140 struct crush_map *c; 155 struct crush_map *c;
@@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
143 void **p = &pbyval; 158 void **p = &pbyval;
144 void *start = pbyval; 159 void *start = pbyval;
145 u32 magic; 160 u32 magic;
161 u32 num_name_maps;
146 162
147 dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); 163 dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
148 164
@@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
150 if (c == NULL) 166 if (c == NULL)
151 return ERR_PTR(-ENOMEM); 167 return ERR_PTR(-ENOMEM);
152 168
169 /* set tunables to default values */
170 c->choose_local_tries = 2;
171 c->choose_local_fallback_tries = 5;
172 c->choose_total_tries = 19;
173
153 ceph_decode_need(p, end, 4*sizeof(u32), bad); 174 ceph_decode_need(p, end, 4*sizeof(u32), bad);
154 magic = ceph_decode_32(p); 175 magic = ceph_decode_32(p);
155 if (magic != CRUSH_MAGIC) { 176 if (magic != CRUSH_MAGIC) {
@@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
297 } 318 }
298 319
299 /* ignore trailing name maps. */ 320 /* ignore trailing name maps. */
321 for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
322 err = skip_name_map(p, end);
323 if (err < 0)
324 goto done;
325 }
326
327 /* tunables */
328 ceph_decode_need(p, end, 3*sizeof(u32), done);
329 c->choose_local_tries = ceph_decode_32(p);
330 c->choose_local_fallback_tries = ceph_decode_32(p);
331 c->choose_total_tries = ceph_decode_32(p);
332 dout("crush decode tunable choose_local_tries = %d",
333 c->choose_local_tries);
334 dout("crush decode tunable choose_local_fallback_tries = %d",
335 c->choose_local_fallback_tries);
336 dout("crush decode tunable choose_total_tries = %d",
337 c->choose_total_tries);
300 338
339done:
301 dout("crush_decode success\n"); 340 dout("crush_decode success\n");
302 return c; 341 return c;
303 342