diff options
author | Omar Sandoval <osandov@fb.com> | 2015-06-19 14:52:51 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-08-09 10:34:26 -0400 |
commit | 73ff61dbe5edeb1799d7e91c8b0641f87feb75fa (patch) | |
tree | cb58fb5cbe9a3c6c0077babd130ee54135e35e8a | |
parent | b4ee1782686d5b7a97826d67fdeaefaedbca23ce (diff) |
Btrfs: fix device replace of a missing RAID 5/6 device
The original implementation of device replace on RAID 5/6 seems to have
missed support for replacing a missing device. When this is attempted,
we end up calling bio_add_page() on a bio with a NULL ->bi_bdev, which
crashes when we try to dereference it. This happens because
btrfs_map_block() has no choice but to return us the missing device
because RAID 5/6 don't have any alternate mirrors to read from, and a
missing device has a NULL bdev.
The idea implemented here is to handle the missing device case
separately, which better only happen when we're replacing a missing RAID
5/6 device. We use the new BTRFS_RBIO_REBUILD_MISSING operation to
reconstruct the data from parity, check it with
scrub_recheck_block_checksum(), and write it out with
scrub_write_block_to_dev_replace().
Reported-by: Philip <bugzilla@philip-seeger.de>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96141
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/scrub.c | 157 |
1 files changed, 147 insertions, 10 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 038162456cfa..6bce7f2ff805 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -125,6 +125,7 @@ struct scrub_block { | |||
125 | /* It is for the data with checksum */ | 125 | /* It is for the data with checksum */ |
126 | unsigned int data_corrected:1; | 126 | unsigned int data_corrected:1; |
127 | }; | 127 | }; |
128 | struct btrfs_work work; | ||
128 | }; | 129 | }; |
129 | 130 | ||
130 | /* Used for the chunks with parity stripe such RAID5/6 */ | 131 | /* Used for the chunks with parity stripe such RAID5/6 */ |
@@ -2173,6 +2174,134 @@ again: | |||
2173 | return 0; | 2174 | return 0; |
2174 | } | 2175 | } |
2175 | 2176 | ||
2177 | static void scrub_missing_raid56_end_io(struct bio *bio, int error) | ||
2178 | { | ||
2179 | struct scrub_block *sblock = bio->bi_private; | ||
2180 | struct btrfs_fs_info *fs_info = sblock->sctx->dev_root->fs_info; | ||
2181 | |||
2182 | if (error) | ||
2183 | sblock->no_io_error_seen = 0; | ||
2184 | |||
2185 | btrfs_queue_work(fs_info->scrub_workers, &sblock->work); | ||
2186 | } | ||
2187 | |||
2188 | static void scrub_missing_raid56_worker(struct btrfs_work *work) | ||
2189 | { | ||
2190 | struct scrub_block *sblock = container_of(work, struct scrub_block, work); | ||
2191 | struct scrub_ctx *sctx = sblock->sctx; | ||
2192 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
2193 | unsigned int is_metadata; | ||
2194 | unsigned int have_csum; | ||
2195 | u8 *csum; | ||
2196 | u64 generation; | ||
2197 | u64 logical; | ||
2198 | struct btrfs_device *dev; | ||
2199 | |||
2200 | is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); | ||
2201 | have_csum = sblock->pagev[0]->have_csum; | ||
2202 | csum = sblock->pagev[0]->csum; | ||
2203 | generation = sblock->pagev[0]->generation; | ||
2204 | logical = sblock->pagev[0]->logical; | ||
2205 | dev = sblock->pagev[0]->dev; | ||
2206 | |||
2207 | if (sblock->no_io_error_seen) { | ||
2208 | scrub_recheck_block_checksum(fs_info, sblock, is_metadata, | ||
2209 | have_csum, csum, generation, | ||
2210 | sctx->csum_size); | ||
2211 | } | ||
2212 | |||
2213 | if (!sblock->no_io_error_seen) { | ||
2214 | spin_lock(&sctx->stat_lock); | ||
2215 | sctx->stat.read_errors++; | ||
2216 | spin_unlock(&sctx->stat_lock); | ||
2217 | printk_ratelimited_in_rcu(KERN_ERR | ||
2218 | "BTRFS: I/O error rebulding logical %llu for dev %s\n", | ||
2219 | logical, rcu_str_deref(dev->name)); | ||
2220 | } else if (sblock->header_error || sblock->checksum_error) { | ||
2221 | spin_lock(&sctx->stat_lock); | ||
2222 | sctx->stat.uncorrectable_errors++; | ||
2223 | spin_unlock(&sctx->stat_lock); | ||
2224 | printk_ratelimited_in_rcu(KERN_ERR | ||
2225 | "BTRFS: failed to rebuild valid logical %llu for dev %s\n", | ||
2226 | logical, rcu_str_deref(dev->name)); | ||
2227 | } else { | ||
2228 | scrub_write_block_to_dev_replace(sblock); | ||
2229 | } | ||
2230 | |||
2231 | scrub_block_put(sblock); | ||
2232 | |||
2233 | if (sctx->is_dev_replace && | ||
2234 | atomic_read(&sctx->wr_ctx.flush_all_writes)) { | ||
2235 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2236 | scrub_wr_submit(sctx); | ||
2237 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2238 | } | ||
2239 | |||
2240 | scrub_pending_bio_dec(sctx); | ||
2241 | } | ||
2242 | |||
2243 | static void scrub_missing_raid56_pages(struct scrub_block *sblock) | ||
2244 | { | ||
2245 | struct scrub_ctx *sctx = sblock->sctx; | ||
2246 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
2247 | u64 length = sblock->page_count * PAGE_SIZE; | ||
2248 | u64 logical = sblock->pagev[0]->logical; | ||
2249 | struct btrfs_bio *bbio; | ||
2250 | struct bio *bio; | ||
2251 | struct btrfs_raid_bio *rbio; | ||
2252 | int ret; | ||
2253 | int i; | ||
2254 | |||
2255 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length, | ||
2256 | &bbio, 0, 1); | ||
2257 | if (ret || !bbio || !bbio->raid_map) | ||
2258 | goto bbio_out; | ||
2259 | |||
2260 | if (WARN_ON(!sctx->is_dev_replace || | ||
2261 | !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) { | ||
2262 | /* | ||
2263 | * We shouldn't be scrubbing a missing device. Even for dev | ||
2264 | * replace, we should only get here for RAID 5/6. We either | ||
2265 | * managed to mount something with no mirrors remaining or | ||
2266 | * there's a bug in scrub_remap_extent()/btrfs_map_block(). | ||
2267 | */ | ||
2268 | goto bbio_out; | ||
2269 | } | ||
2270 | |||
2271 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | ||
2272 | if (!bio) | ||
2273 | goto bbio_out; | ||
2274 | |||
2275 | bio->bi_iter.bi_sector = logical >> 9; | ||
2276 | bio->bi_private = sblock; | ||
2277 | bio->bi_end_io = scrub_missing_raid56_end_io; | ||
2278 | |||
2279 | rbio = raid56_alloc_missing_rbio(sctx->dev_root, bio, bbio, length); | ||
2280 | if (!rbio) | ||
2281 | goto rbio_out; | ||
2282 | |||
2283 | for (i = 0; i < sblock->page_count; i++) { | ||
2284 | struct scrub_page *spage = sblock->pagev[i]; | ||
2285 | |||
2286 | raid56_add_scrub_pages(rbio, spage->page, spage->logical); | ||
2287 | } | ||
2288 | |||
2289 | btrfs_init_work(&sblock->work, btrfs_scrub_helper, | ||
2290 | scrub_missing_raid56_worker, NULL, NULL); | ||
2291 | scrub_block_get(sblock); | ||
2292 | scrub_pending_bio_inc(sctx); | ||
2293 | raid56_submit_missing_rbio(rbio); | ||
2294 | return; | ||
2295 | |||
2296 | rbio_out: | ||
2297 | bio_put(bio); | ||
2298 | bbio_out: | ||
2299 | btrfs_put_bbio(bbio); | ||
2300 | spin_lock(&sctx->stat_lock); | ||
2301 | sctx->stat.malloc_errors++; | ||
2302 | spin_unlock(&sctx->stat_lock); | ||
2303 | } | ||
2304 | |||
2176 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 2305 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
2177 | u64 physical, struct btrfs_device *dev, u64 flags, | 2306 | u64 physical, struct btrfs_device *dev, u64 flags, |
2178 | u64 gen, int mirror_num, u8 *csum, int force, | 2307 | u64 gen, int mirror_num, u8 *csum, int force, |
@@ -2236,19 +2365,27 @@ leave_nomem: | |||
2236 | } | 2365 | } |
2237 | 2366 | ||
2238 | WARN_ON(sblock->page_count == 0); | 2367 | WARN_ON(sblock->page_count == 0); |
2239 | for (index = 0; index < sblock->page_count; index++) { | 2368 | if (dev->missing) { |
2240 | struct scrub_page *spage = sblock->pagev[index]; | 2369 | /* |
2241 | int ret; | 2370 | * This case should only be hit for RAID 5/6 device replace. See |
2371 | * the comment in scrub_missing_raid56_pages() for details. | ||
2372 | */ | ||
2373 | scrub_missing_raid56_pages(sblock); | ||
2374 | } else { | ||
2375 | for (index = 0; index < sblock->page_count; index++) { | ||
2376 | struct scrub_page *spage = sblock->pagev[index]; | ||
2377 | int ret; | ||
2242 | 2378 | ||
2243 | ret = scrub_add_page_to_rd_bio(sctx, spage); | 2379 | ret = scrub_add_page_to_rd_bio(sctx, spage); |
2244 | if (ret) { | 2380 | if (ret) { |
2245 | scrub_block_put(sblock); | 2381 | scrub_block_put(sblock); |
2246 | return ret; | 2382 | return ret; |
2383 | } | ||
2247 | } | 2384 | } |
2248 | } | ||
2249 | 2385 | ||
2250 | if (force) | 2386 | if (force) |
2251 | scrub_submit(sctx); | 2387 | scrub_submit(sctx); |
2388 | } | ||
2252 | 2389 | ||
2253 | /* last one frees, either here or in bio completion for last page */ | 2390 | /* last one frees, either here or in bio completion for last page */ |
2254 | scrub_block_put(sblock); | 2391 | scrub_block_put(sblock); |