aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2010-02-11 06:01:39 -0500
committerBoaz Harrosh <bharrosh@panasas.com>2010-02-28 06:55:53 -0500
commit50a76fd3c352ed2740eba01512efcfceee0703be (patch)
tree425416e068648e225b41327a120d00bbddd16d0e /fs
parentb367e78bd1c7af4c018ce98b1f6d3e001aba895a (diff)
exofs: groups support
* _calc_stripe_info() changes to accommodate for grouping calculations. Returns additional information * old _prepare_pages() becomes _prepare_one_group() which stores pages belonging to one device group. * New _prepare_for_striping iterates on all groups calling _prepare_one_group(). * Enable mounting of groups data_maps (group_width != 0) [QUESTION] what is faster A or B; A. x += stride; x = x % width + first_x; B x += stride if (x < last_x) x = first_x; Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/exofs/exofs.h3
-rw-r--r--fs/exofs/ios.c129
-rw-r--r--fs/exofs/super.c46
3 files changed, 141 insertions, 37 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index acfebd36de83..59b8bf2825c7 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -63,6 +63,8 @@ struct exofs_layout {
63 unsigned mirrors_p1; 63 unsigned mirrors_p1;
64 64
65 unsigned group_width; 65 unsigned group_width;
66 u64 group_depth;
67 unsigned group_count;
66 68
67 enum exofs_inode_layout_gen_functions lay_func; 69 enum exofs_inode_layout_gen_functions lay_func;
68 70
@@ -132,6 +134,7 @@ struct exofs_io_state {
132 struct page **pages; 134 struct page **pages;
133 unsigned nr_pages; 135 unsigned nr_pages;
134 unsigned pgbase; 136 unsigned pgbase;
137 unsigned pages_consumed;
135 138
136 /* Attributes */ 139 /* Attributes */
137 unsigned in_attr_len; 140 unsigned in_attr_len;
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index d28febdf54ab..5293bc411d17 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -262,25 +262,50 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
262/* 262/*
263 * L - logical offset into the file 263 * L - logical offset into the file
264 * 264 *
265 * U - The number of bytes in a full stripe 265 * U - The number of bytes in a stripe within a group
266 * 266 *
267 * U = stripe_unit * group_width 267 * U = stripe_unit * group_width
268 * 268 *
269 * N - The stripe number 269 * T - The number of bytes striped within a group of component objects
270 * (before advancing to the next group)
270 * 271 *
271 * N = L / U 272 * T = stripe_unit * group_width * group_depth
273 *
274 * S - The number of bytes striped across all component objects
275 * before the pattern repeats
276 *
277 * S = stripe_unit * group_width * group_depth * group_count
278 *
279 * M - The "major" (i.e., across all components) stripe number
280 *
281 * M = L / S
282 *
283 * G - Counts the groups from the beginning of the major stripe
284 *
285 * G = (L - (M * S)) / T [or (L % S) / T]
286 *
287 * H - The byte offset within the group
288 *
289 * H = (L - (M * S)) % T [or (L % S) % T]
290 *
291 * N - The "minor" (i.e., across the group) stripe number
292 *
293 * N = H / U
272 * 294 *
273 * C - The component index coresponding to L 295 * C - The component index coresponding to L
274 * 296 *
275 * C = (L - (N*U)) / stripe_unit 297 * C = (H - (N * U)) / stripe_unit + G * group_width
298 * [or (L % U) / stripe_unit + G * group_width]
276 * 299 *
277 * O - The component offset coresponding to L 300 * O - The component offset coresponding to L
278 * 301 *
279 * (N*stripe_unit)+(L%stripe_unit) 302 * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
280 */ 303 */
281
282struct _striping_info { 304struct _striping_info {
283 u64 obj_offset; 305 u64 obj_offset;
306 u64 group_length;
307 u64 total_group_length;
308 u64 Major;
284 unsigned dev; 309 unsigned dev;
285 unsigned unit_off; 310 unsigned unit_off;
286}; 311};
@@ -290,15 +315,35 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
290{ 315{
291 u32 stripe_unit = ios->layout->stripe_unit; 316 u32 stripe_unit = ios->layout->stripe_unit;
292 u32 group_width = ios->layout->group_width; 317 u32 group_width = ios->layout->group_width;
318 u64 group_depth = ios->layout->group_depth;
319
293 u32 U = stripe_unit * group_width; 320 u32 U = stripe_unit * group_width;
321 u64 T = U * group_depth;
322 u64 S = T * ios->layout->group_count;
323 u64 M = div64_u64(file_offset, S);
324
325 /*
326 G = (L - (M * S)) / T
327 H = (L - (M * S)) % T
328 */
329 u64 LmodS = file_offset - M * S;
330 u32 G = div64_u64(LmodS, T);
331 u64 H = LmodS - G * T;
332
333 u32 N = div_u64(H, U);
334
335 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
336 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
337 si->dev *= ios->layout->mirrors_p1;
294 338
295 u32 LmodU; 339 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
296 u64 N = div_u64_rem(file_offset, U, &LmodU);
297 340
298 si->unit_off = LmodU % stripe_unit; 341 si->obj_offset = si->unit_off + (N * stripe_unit) +
299 si->obj_offset = N * stripe_unit + si->unit_off; 342 (M * group_depth * stripe_unit);
300 si->dev = LmodU / stripe_unit; 343
301 si->dev *= ios->layout->mirrors_p1; 344 si->group_length = T - H;
345 si->total_group_length = T;
346 si->Major = M;
302} 347}
303 348
304static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, 349static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
@@ -345,16 +390,17 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
345 return 0; 390 return 0;
346} 391}
347 392
348static int _prepare_pages(struct exofs_io_state *ios, 393static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
349 struct _striping_info *si) 394 struct _striping_info *si, unsigned first_comp)
350{ 395{
351 u64 length = ios->length;
352 unsigned stripe_unit = ios->layout->stripe_unit; 396 unsigned stripe_unit = ios->layout->stripe_unit;
353 unsigned mirrors_p1 = ios->layout->mirrors_p1; 397 unsigned mirrors_p1 = ios->layout->mirrors_p1;
398 unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
354 unsigned dev = si->dev; 399 unsigned dev = si->dev;
355 unsigned comp = 0; 400 unsigned first_dev = dev - (dev % devs_in_group);
356 unsigned stripes = 0; 401 unsigned comp = first_comp + (dev - first_dev);
357 unsigned cur_pg = 0; 402 unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
403 unsigned cur_pg = ios->pages_consumed;
358 int ret = 0; 404 int ret = 0;
359 405
360 while (length) { 406 while (length) {
@@ -377,10 +423,11 @@ static int _prepare_pages(struct exofs_io_state *ios,
377 cur_len = stripe_unit; 423 cur_len = stripe_unit;
378 } 424 }
379 425
380 stripes++; 426 if (max_comp < comp)
427 max_comp = comp;
381 428
382 dev += mirrors_p1; 429 dev += mirrors_p1;
383 dev %= ios->layout->s_numdevs; 430 dev = (dev % devs_in_group) + first_dev;
384 } else { 431 } else {
385 cur_len = stripe_unit; 432 cur_len = stripe_unit;
386 } 433 }
@@ -393,18 +440,24 @@ static int _prepare_pages(struct exofs_io_state *ios,
393 goto out; 440 goto out;
394 441
395 comp += mirrors_p1; 442 comp += mirrors_p1;
396 comp %= ios->layout->s_numdevs; 443 comp = (comp % devs_in_group) + first_comp;
397 444
398 length -= cur_len; 445 length -= cur_len;
399 } 446 }
400out: 447out:
401 ios->numdevs = stripes * mirrors_p1; 448 ios->numdevs = max_comp + mirrors_p1;
449 ios->pages_consumed = cur_pg;
402 return ret; 450 return ret;
403} 451}
404 452
405static int _prepare_for_striping(struct exofs_io_state *ios) 453static int _prepare_for_striping(struct exofs_io_state *ios)
406{ 454{
455 u64 length = ios->length;
407 struct _striping_info si; 456 struct _striping_info si;
457 unsigned devs_in_group = ios->layout->group_width *
458 ios->layout->mirrors_p1;
459 unsigned first_comp = 0;
460 int ret = 0;
408 461
409 _calc_stripe_info(ios, ios->offset, &si); 462 _calc_stripe_info(ios, ios->offset, &si);
410 463
@@ -424,7 +477,31 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
424 return 0; 477 return 0;
425 } 478 }
426 479
427 return _prepare_pages(ios, &si); 480 while (length) {
481 if (length < si.group_length)
482 si.group_length = length;
483
484 ret = _prepare_one_group(ios, si.group_length, &si, first_comp);
485 if (unlikely(ret))
486 goto out;
487
488 length -= si.group_length;
489
490 si.group_length = si.total_group_length;
491 si.unit_off = 0;
492 ++si.Major;
493 si.obj_offset = si.Major * ios->layout->stripe_unit *
494 ios->layout->group_depth;
495
496 si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
497 si.dev %= ios->layout->s_numdevs;
498
499 first_comp += devs_in_group;
500 first_comp %= ios->layout->s_numdevs;
501 }
502
503out:
504 return ret;
428} 505}
429 506
430int exofs_sbi_create(struct exofs_io_state *ios) 507int exofs_sbi_create(struct exofs_io_state *ios)
@@ -482,6 +559,9 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
482 unsigned last_comp = cur_comp + ios->layout->mirrors_p1; 559 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
483 int ret = 0; 560 int ret = 0;
484 561
562 if (ios->pages && !master_dev->length)
563 return 0; /* Just an empty slot */
564
485 for (; cur_comp < last_comp; ++cur_comp, ++dev) { 565 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
486 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 566 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
487 struct osd_request *or; 567 struct osd_request *or;
@@ -580,6 +660,9 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
580 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 660 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
581 unsigned first_dev = (unsigned)ios->obj.id; 661 unsigned first_dev = (unsigned)ios->obj.id;
582 662
663 if (ios->pages && !per_dev->length)
664 return 0; /* Just an empty slot */
665
583 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; 666 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
584 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); 667 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
585 if (unlikely(!or)) { 668 if (unlikely(!or)) {
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8f4e4b37a578..6cf5e4e84d61 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -323,11 +323,7 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
323 sbi->data_map.odm_raid_algorithm = 323 sbi->data_map.odm_raid_algorithm =
324 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); 324 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
325 325
326/* FIXME: Only raid0 !group_width/depth for now. if not so, do not mount */ 326/* FIXME: Only raid0 for now. if not so, do not mount */
327 if (sbi->data_map.odm_group_width || sbi->data_map.odm_group_depth) {
328 EXOFS_ERR("Group width/depth not supported\n");
329 return -EINVAL;
330 }
331 if (sbi->data_map.odm_num_comps != numdevs) { 327 if (sbi->data_map.odm_num_comps != numdevs) {
332 EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n", 328 EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
333 sbi->data_map.odm_num_comps, numdevs); 329 sbi->data_map.odm_num_comps, numdevs);
@@ -343,14 +339,6 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
343 return -EINVAL; 339 return -EINVAL;
344 } 340 }
345 341
346 stripe_length = sbi->data_map.odm_stripe_unit *
347 (numdevs / (sbi->data_map.odm_mirror_cnt + 1));
348 if (stripe_length >= (1ULL << 32)) {
349 EXOFS_ERR("Total Stripe length(0x%llx)"
350 " >= 32bit is not supported\n", _LLU(stripe_length));
351 return -EINVAL;
352 }
353
354 if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) { 342 if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
355 EXOFS_ERR("Stripe Unit(0x%llx)" 343 EXOFS_ERR("Stripe Unit(0x%llx)"
356 " must be Multples of PAGE_SIZE(0x%lx)\n", 344 " must be Multples of PAGE_SIZE(0x%lx)\n",
@@ -360,8 +348,36 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
360 348
361 sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit; 349 sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
362 sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1; 350 sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
363 sbi->layout.group_width = sbi->data_map.odm_num_comps / 351
352 if (sbi->data_map.odm_group_width) {
353 sbi->layout.group_width = sbi->data_map.odm_group_width;
354 sbi->layout.group_depth = sbi->data_map.odm_group_depth;
355 if (!sbi->layout.group_depth) {
356 EXOFS_ERR("group_depth == 0 && group_width != 0\n");
357 return -EINVAL;
358 }
359 sbi->layout.group_count = sbi->data_map.odm_num_comps /
360 sbi->layout.mirrors_p1 /
361 sbi->data_map.odm_group_width;
362 } else {
363 if (sbi->data_map.odm_group_depth) {
364 printk(KERN_NOTICE "Warning: group_depth ignored "
365 "group_width == 0 && group_depth == %d\n",
366 sbi->data_map.odm_group_depth);
367 sbi->data_map.odm_group_depth = 0;
368 }
369 sbi->layout.group_width = sbi->data_map.odm_num_comps /
364 sbi->layout.mirrors_p1; 370 sbi->layout.mirrors_p1;
371 sbi->layout.group_depth = -1;
372 sbi->layout.group_count = 1;
373 }
374
375 stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
376 if (stripe_length >= (1ULL << 32)) {
377 EXOFS_ERR("Total Stripe length(0x%llx)"
378 " >= 32bit is not supported\n", _LLU(stripe_length));
379 return -EINVAL;
380 }
365 381
366 return 0; 382 return 0;
367} 383}
@@ -540,6 +556,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
540 sbi->layout.stripe_unit = PAGE_SIZE; 556 sbi->layout.stripe_unit = PAGE_SIZE;
541 sbi->layout.mirrors_p1 = 1; 557 sbi->layout.mirrors_p1 = 1;
542 sbi->layout.group_width = 1; 558 sbi->layout.group_width = 1;
559 sbi->layout.group_depth = -1;
560 sbi->layout.group_count = 1;
543 sbi->layout.s_ods[0] = od; 561 sbi->layout.s_ods[0] = od;
544 sbi->layout.s_numdevs = 1; 562 sbi->layout.s_numdevs = 1;
545 sbi->layout.s_pid = opts->pid; 563 sbi->layout.s_pid = opts->pid;