aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOmar Sandoval <osandov@fb.com>2015-06-19 14:52:51 -0400
committerChris Mason <clm@fb.com>2015-08-09 10:34:26 -0400
commit73ff61dbe5edeb1799d7e91c8b0641f87feb75fa (patch)
treecb58fb5cbe9a3c6c0077babd130ee54135e35e8a
parentb4ee1782686d5b7a97826d67fdeaefaedbca23ce (diff)
Btrfs: fix device replace of a missing RAID 5/6 device
The original implementation of device replace on RAID 5/6 seems to have missed support for replacing a missing device. When this is attempted, we end up calling bio_add_page() on a bio with a NULL ->bi_bdev, which crashes when we try to dereference it. This happens because btrfs_map_block() has no choice but to return us the missing device because RAID 5/6 don't have any alternate mirrors to read from, and a missing device has a NULL bdev. The idea implemented here is to handle the missing device case separately, which better only happen when we're replacing a missing RAID 5/6 device. We use the new BTRFS_RBIO_REBUILD_MISSING operation to reconstruct the data from parity, check it with scrub_recheck_block_checksum(), and write it out with scrub_write_block_to_dev_replace(). Reported-by: Philip <bugzilla@philip-seeger.de> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96141 Signed-off-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/scrub.c157
1 files changed, 147 insertions, 10 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 038162456cfa..6bce7f2ff805 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -125,6 +125,7 @@ struct scrub_block {
125 /* It is for the data with checksum */ 125 /* It is for the data with checksum */
126 unsigned int data_corrected:1; 126 unsigned int data_corrected:1;
127 }; 127 };
128 struct btrfs_work work;
128}; 129};
129 130
130/* Used for the chunks with parity stripe such RAID5/6 */ 131/* Used for the chunks with parity stripe such RAID5/6 */
@@ -2173,6 +2174,134 @@ again:
2173 return 0; 2174 return 0;
2174} 2175}
2175 2176
2177static void scrub_missing_raid56_end_io(struct bio *bio, int error)
2178{
2179 struct scrub_block *sblock = bio->bi_private;
2180 struct btrfs_fs_info *fs_info = sblock->sctx->dev_root->fs_info;
2181
2182 if (error)
2183 sblock->no_io_error_seen = 0;
2184
2185 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2186}
2187
2188static void scrub_missing_raid56_worker(struct btrfs_work *work)
2189{
2190 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2191 struct scrub_ctx *sctx = sblock->sctx;
2192 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2193 unsigned int is_metadata;
2194 unsigned int have_csum;
2195 u8 *csum;
2196 u64 generation;
2197 u64 logical;
2198 struct btrfs_device *dev;
2199
2200 is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA);
2201 have_csum = sblock->pagev[0]->have_csum;
2202 csum = sblock->pagev[0]->csum;
2203 generation = sblock->pagev[0]->generation;
2204 logical = sblock->pagev[0]->logical;
2205 dev = sblock->pagev[0]->dev;
2206
2207 if (sblock->no_io_error_seen) {
2208 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
2209 have_csum, csum, generation,
2210 sctx->csum_size);
2211 }
2212
2213 if (!sblock->no_io_error_seen) {
2214 spin_lock(&sctx->stat_lock);
2215 sctx->stat.read_errors++;
2216 spin_unlock(&sctx->stat_lock);
2217 printk_ratelimited_in_rcu(KERN_ERR
2218 "BTRFS: I/O error rebulding logical %llu for dev %s\n",
2219 logical, rcu_str_deref(dev->name));
2220 } else if (sblock->header_error || sblock->checksum_error) {
2221 spin_lock(&sctx->stat_lock);
2222 sctx->stat.uncorrectable_errors++;
2223 spin_unlock(&sctx->stat_lock);
2224 printk_ratelimited_in_rcu(KERN_ERR
2225 "BTRFS: failed to rebuild valid logical %llu for dev %s\n",
2226 logical, rcu_str_deref(dev->name));
2227 } else {
2228 scrub_write_block_to_dev_replace(sblock);
2229 }
2230
2231 scrub_block_put(sblock);
2232
2233 if (sctx->is_dev_replace &&
2234 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2235 mutex_lock(&sctx->wr_ctx.wr_lock);
2236 scrub_wr_submit(sctx);
2237 mutex_unlock(&sctx->wr_ctx.wr_lock);
2238 }
2239
2240 scrub_pending_bio_dec(sctx);
2241}
2242
2243static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2244{
2245 struct scrub_ctx *sctx = sblock->sctx;
2246 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2247 u64 length = sblock->page_count * PAGE_SIZE;
2248 u64 logical = sblock->pagev[0]->logical;
2249 struct btrfs_bio *bbio;
2250 struct bio *bio;
2251 struct btrfs_raid_bio *rbio;
2252 int ret;
2253 int i;
2254
2255 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
2256 &bbio, 0, 1);
2257 if (ret || !bbio || !bbio->raid_map)
2258 goto bbio_out;
2259
2260 if (WARN_ON(!sctx->is_dev_replace ||
2261 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2262 /*
2263 * We shouldn't be scrubbing a missing device. Even for dev
2264 * replace, we should only get here for RAID 5/6. We either
2265 * managed to mount something with no mirrors remaining or
2266 * there's a bug in scrub_remap_extent()/btrfs_map_block().
2267 */
2268 goto bbio_out;
2269 }
2270
2271 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2272 if (!bio)
2273 goto bbio_out;
2274
2275 bio->bi_iter.bi_sector = logical >> 9;
2276 bio->bi_private = sblock;
2277 bio->bi_end_io = scrub_missing_raid56_end_io;
2278
2279 rbio = raid56_alloc_missing_rbio(sctx->dev_root, bio, bbio, length);
2280 if (!rbio)
2281 goto rbio_out;
2282
2283 for (i = 0; i < sblock->page_count; i++) {
2284 struct scrub_page *spage = sblock->pagev[i];
2285
2286 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2287 }
2288
2289 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2290 scrub_missing_raid56_worker, NULL, NULL);
2291 scrub_block_get(sblock);
2292 scrub_pending_bio_inc(sctx);
2293 raid56_submit_missing_rbio(rbio);
2294 return;
2295
2296rbio_out:
2297 bio_put(bio);
2298bbio_out:
2299 btrfs_put_bbio(bbio);
2300 spin_lock(&sctx->stat_lock);
2301 sctx->stat.malloc_errors++;
2302 spin_unlock(&sctx->stat_lock);
2303}
2304
2176static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 2305static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2177 u64 physical, struct btrfs_device *dev, u64 flags, 2306 u64 physical, struct btrfs_device *dev, u64 flags,
2178 u64 gen, int mirror_num, u8 *csum, int force, 2307 u64 gen, int mirror_num, u8 *csum, int force,
@@ -2236,19 +2365,27 @@ leave_nomem:
2236 } 2365 }
2237 2366
2238 WARN_ON(sblock->page_count == 0); 2367 WARN_ON(sblock->page_count == 0);
2239 for (index = 0; index < sblock->page_count; index++) { 2368 if (dev->missing) {
2240 struct scrub_page *spage = sblock->pagev[index]; 2369 /*
2241 int ret; 2370 * This case should only be hit for RAID 5/6 device replace. See
2371 * the comment in scrub_missing_raid56_pages() for details.
2372 */
2373 scrub_missing_raid56_pages(sblock);
2374 } else {
2375 for (index = 0; index < sblock->page_count; index++) {
2376 struct scrub_page *spage = sblock->pagev[index];
2377 int ret;
2242 2378
2243 ret = scrub_add_page_to_rd_bio(sctx, spage); 2379 ret = scrub_add_page_to_rd_bio(sctx, spage);
2244 if (ret) { 2380 if (ret) {
2245 scrub_block_put(sblock); 2381 scrub_block_put(sblock);
2246 return ret; 2382 return ret;
2383 }
2247 } 2384 }
2248 }
2249 2385
2250 if (force) 2386 if (force)
2251 scrub_submit(sctx); 2387 scrub_submit(sctx);
2388 }
2252 2389
2253 /* last one frees, either here or in bio completion for last page */ 2390 /* last one frees, either here or in bio completion for last page */
2254 scrub_block_put(sblock); 2391 scrub_block_put(sblock);