aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-11 17:16:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-11 17:16:53 -0400
commit3123bca71993c2346a458875488863772c1d5dc4 (patch)
treea1e082130a3d7a4ba1faaea60e699939cf821ab6 /fs/btrfs/scrub.c
parent582076ab16779208e7eb6ce712a9c0a6cc5bafe4 (diff)
parente4fbaee29272533a242f117d18712e2974520d2c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull second set of btrfs updates from Chris Mason: "The most important changes here are from Josef, fixing a btrfs regression in 3.14 that can cause corruptions in the extent allocation tree when snapshots are in use. Josef also fixed some deadlocks in send/recv and other assorted races when balance is running" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (23 commits) Btrfs: fix compile warnings on on avr32 platform btrfs: allow mounting btrfs subvolumes with different ro/rw options btrfs: export global block reserve size as space_info btrfs: fix crash in remount(thread_pool=) case Btrfs: abort the transaction when we don't find our extent ref Btrfs: fix EINVAL checks in btrfs_clone Btrfs: fix unlock in __start_delalloc_inodes() Btrfs: scrub raid56 stripes in the right way Btrfs: don't compress for a small write Btrfs: more efficient io tree navigation on wait_extent_bit Btrfs: send, build path string only once in send_hole btrfs: filter invalid arg for btrfs resize Btrfs: send, fix data corruption due to incorrect hole detection Btrfs: kmalloc() doesn't return an ERR_PTR Btrfs: fix snapshot vs nocow writting btrfs: Change the expanding write sequence to fix snapshot related bug. btrfs: make device scan less noisy btrfs: fix lockdep warning with reclaim lock inversion Btrfs: hold the commit_root_sem when getting the commit root during send Btrfs: remove transaction from send ...
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c108
1 files changed, 89 insertions, 19 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 93e6d7172844..0be77993378e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2235,6 +2235,47 @@ behind_scrub_pages:
2235 return 0; 2235 return 0;
2236} 2236}
2237 2237
2238/*
2239 * Given a physical address, this will calculate it's
2240 * logical offset. if this is a parity stripe, it will return
2241 * the most left data stripe's logical offset.
2242 *
2243 * return 0 if it is a data stripe, 1 means parity stripe.
2244 */
2245static int get_raid56_logic_offset(u64 physical, int num,
2246 struct map_lookup *map, u64 *offset)
2247{
2248 int i;
2249 int j = 0;
2250 u64 stripe_nr;
2251 u64 last_offset;
2252 int stripe_index;
2253 int rot;
2254
2255 last_offset = (physical - map->stripes[num].physical) *
2256 nr_data_stripes(map);
2257 *offset = last_offset;
2258 for (i = 0; i < nr_data_stripes(map); i++) {
2259 *offset = last_offset + i * map->stripe_len;
2260
2261 stripe_nr = *offset;
2262 do_div(stripe_nr, map->stripe_len);
2263 do_div(stripe_nr, nr_data_stripes(map));
2264
2265 /* Work out the disk rotation on this stripe-set */
2266 rot = do_div(stripe_nr, map->num_stripes);
2267 /* calculate which stripe this data locates */
2268 rot += i;
2269 stripe_index = rot % map->num_stripes;
2270 if (stripe_index == num)
2271 return 0;
2272 if (stripe_index < num)
2273 j++;
2274 }
2275 *offset = last_offset + j * map->stripe_len;
2276 return 1;
2277}
2278
2238static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, 2279static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2239 struct map_lookup *map, 2280 struct map_lookup *map,
2240 struct btrfs_device *scrub_dev, 2281 struct btrfs_device *scrub_dev,
@@ -2256,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2256 u64 physical; 2297 u64 physical;
2257 u64 logical; 2298 u64 logical;
2258 u64 logic_end; 2299 u64 logic_end;
2300 u64 physical_end;
2259 u64 generation; 2301 u64 generation;
2260 int mirror_num; 2302 int mirror_num;
2261 struct reada_control *reada1; 2303 struct reada_control *reada1;
@@ -2269,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2269 u64 extent_len; 2311 u64 extent_len;
2270 struct btrfs_device *extent_dev; 2312 struct btrfs_device *extent_dev;
2271 int extent_mirror_num; 2313 int extent_mirror_num;
2272 int stop_loop; 2314 int stop_loop = 0;
2273
2274 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2275 BTRFS_BLOCK_GROUP_RAID6)) {
2276 if (num >= nr_data_stripes(map)) {
2277 return 0;
2278 }
2279 }
2280 2315
2281 nstripes = length; 2316 nstripes = length;
2317 physical = map->stripes[num].physical;
2282 offset = 0; 2318 offset = 0;
2283 do_div(nstripes, map->stripe_len); 2319 do_div(nstripes, map->stripe_len);
2284 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 2320 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
@@ -2296,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2296 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 2332 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2297 increment = map->stripe_len; 2333 increment = map->stripe_len;
2298 mirror_num = num % map->num_stripes + 1; 2334 mirror_num = num % map->num_stripes + 1;
2335 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2336 BTRFS_BLOCK_GROUP_RAID6)) {
2337 get_raid56_logic_offset(physical, num, map, &offset);
2338 increment = map->stripe_len * nr_data_stripes(map);
2339 mirror_num = 1;
2299 } else { 2340 } else {
2300 increment = map->stripe_len; 2341 increment = map->stripe_len;
2301 mirror_num = 1; 2342 mirror_num = 1;
@@ -2319,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2319 * to not hold off transaction commits 2360 * to not hold off transaction commits
2320 */ 2361 */
2321 logical = base + offset; 2362 logical = base + offset;
2322 2363 physical_end = physical + nstripes * map->stripe_len;
2364 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2365 BTRFS_BLOCK_GROUP_RAID6)) {
2366 get_raid56_logic_offset(physical_end, num,
2367 map, &logic_end);
2368 logic_end += base;
2369 } else {
2370 logic_end = logical + increment * nstripes;
2371 }
2323 wait_event(sctx->list_wait, 2372 wait_event(sctx->list_wait,
2324 atomic_read(&sctx->bios_in_flight) == 0); 2373 atomic_read(&sctx->bios_in_flight) == 0);
2325 scrub_blocked_if_needed(fs_info); 2374 scrub_blocked_if_needed(fs_info);
@@ -2328,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2328 key_start.objectid = logical; 2377 key_start.objectid = logical;
2329 key_start.type = BTRFS_EXTENT_ITEM_KEY; 2378 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2330 key_start.offset = (u64)0; 2379 key_start.offset = (u64)0;
2331 key_end.objectid = base + offset + nstripes * increment; 2380 key_end.objectid = logic_end;
2332 key_end.type = BTRFS_METADATA_ITEM_KEY; 2381 key_end.type = BTRFS_METADATA_ITEM_KEY;
2333 key_end.offset = (u64)-1; 2382 key_end.offset = (u64)-1;
2334 reada1 = btrfs_reada_add(root, &key_start, &key_end); 2383 reada1 = btrfs_reada_add(root, &key_start, &key_end);
@@ -2338,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2338 key_start.offset = logical; 2387 key_start.offset = logical;
2339 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 2388 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2340 key_end.type = BTRFS_EXTENT_CSUM_KEY; 2389 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2341 key_end.offset = base + offset + nstripes * increment; 2390 key_end.offset = logic_end;
2342 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); 2391 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2343 2392
2344 if (!IS_ERR(reada1)) 2393 if (!IS_ERR(reada1))
@@ -2356,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2356 /* 2405 /*
2357 * now find all extents for each stripe and scrub them 2406 * now find all extents for each stripe and scrub them
2358 */ 2407 */
2359 logical = base + offset;
2360 physical = map->stripes[num].physical;
2361 logic_end = logical + increment * nstripes;
2362 ret = 0; 2408 ret = 0;
2363 while (logical < logic_end) { 2409 while (physical < physical_end) {
2410 /* for raid56, we skip parity stripe */
2411 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2412 BTRFS_BLOCK_GROUP_RAID6)) {
2413 ret = get_raid56_logic_offset(physical, num,
2414 map, &logical);
2415 logical += base;
2416 if (ret)
2417 goto skip;
2418 }
2364 /* 2419 /*
2365 * canceled? 2420 * canceled?
2366 */ 2421 */
@@ -2504,15 +2559,29 @@ again:
2504 scrub_free_csums(sctx); 2559 scrub_free_csums(sctx);
2505 if (extent_logical + extent_len < 2560 if (extent_logical + extent_len <
2506 key.objectid + bytes) { 2561 key.objectid + bytes) {
2507 logical += increment; 2562 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2508 physical += map->stripe_len; 2563 BTRFS_BLOCK_GROUP_RAID6)) {
2509 2564 /*
2565 * loop until we find next data stripe
2566 * or we have finished all stripes.
2567 */
2568 do {
2569 physical += map->stripe_len;
2570 ret = get_raid56_logic_offset(
2571 physical, num,
2572 map, &logical);
2573 logical += base;
2574 } while (physical < physical_end && ret);
2575 } else {
2576 physical += map->stripe_len;
2577 logical += increment;
2578 }
2510 if (logical < key.objectid + bytes) { 2579 if (logical < key.objectid + bytes) {
2511 cond_resched(); 2580 cond_resched();
2512 goto again; 2581 goto again;
2513 } 2582 }
2514 2583
2515 if (logical >= logic_end) { 2584 if (physical >= physical_end) {
2516 stop_loop = 1; 2585 stop_loop = 1;
2517 break; 2586 break;
2518 } 2587 }
@@ -2521,6 +2590,7 @@ next:
2521 path->slots[0]++; 2590 path->slots[0]++;
2522 } 2591 }
2523 btrfs_release_path(path); 2592 btrfs_release_path(path);
2593skip:
2524 logical += increment; 2594 logical += increment;
2525 physical += map->stripe_len; 2595 physical += map->stripe_len;
2526 spin_lock(&sctx->stat_lock); 2596 spin_lock(&sctx->stat_lock);