diff options
Diffstat (limited to 'drivers/md/dm.c')
| -rw-r--r-- | drivers/md/dm.c | 105 |
1 files changed, 95 insertions, 10 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d8544e1a4c1f..fe7c56e10435 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -68,10 +68,12 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
| 68 | #define DMF_FROZEN 2 | 68 | #define DMF_FROZEN 2 |
| 69 | #define DMF_FREEING 3 | 69 | #define DMF_FREEING 3 |
| 70 | #define DMF_DELETING 4 | 70 | #define DMF_DELETING 4 |
| 71 | #define DMF_NOFLUSH_SUSPENDING 5 | ||
| 71 | 72 | ||
| 72 | struct mapped_device { | 73 | struct mapped_device { |
| 73 | struct rw_semaphore io_lock; | 74 | struct rw_semaphore io_lock; |
| 74 | struct semaphore suspend_lock; | 75 | struct semaphore suspend_lock; |
| 76 | spinlock_t pushback_lock; | ||
| 75 | rwlock_t map_lock; | 77 | rwlock_t map_lock; |
| 76 | atomic_t holders; | 78 | atomic_t holders; |
| 77 | atomic_t open_count; | 79 | atomic_t open_count; |
| @@ -90,6 +92,7 @@ struct mapped_device { | |||
| 90 | atomic_t pending; | 92 | atomic_t pending; |
| 91 | wait_queue_head_t wait; | 93 | wait_queue_head_t wait; |
| 92 | struct bio_list deferred; | 94 | struct bio_list deferred; |
| 95 | struct bio_list pushback; | ||
| 93 | 96 | ||
| 94 | /* | 97 | /* |
| 95 | * The current mapping. | 98 | * The current mapping. |
| @@ -444,23 +447,50 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) | |||
| 444 | * you this clearly demarcated crap. | 447 | * you this clearly demarcated crap. |
| 445 | *---------------------------------------------------------------*/ | 448 | *---------------------------------------------------------------*/ |
| 446 | 449 | ||
| 450 | static int __noflush_suspending(struct mapped_device *md) | ||
| 451 | { | ||
| 452 | return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
| 453 | } | ||
| 454 | |||
| 447 | /* | 455 | /* |
| 448 | * Decrements the number of outstanding ios that a bio has been | 456 | * Decrements the number of outstanding ios that a bio has been |
| 449 | * cloned into, completing the original io if necc. | 457 | * cloned into, completing the original io if necc. |
| 450 | */ | 458 | */ |
| 451 | static void dec_pending(struct dm_io *io, int error) | 459 | static void dec_pending(struct dm_io *io, int error) |
| 452 | { | 460 | { |
| 453 | if (error) | 461 | unsigned long flags; |
| 462 | |||
| 463 | /* Push-back supersedes any I/O errors */ | ||
| 464 | if (error && !(io->error > 0 && __noflush_suspending(io->md))) | ||
| 454 | io->error = error; | 465 | io->error = error; |
| 455 | 466 | ||
| 456 | if (atomic_dec_and_test(&io->io_count)) { | 467 | if (atomic_dec_and_test(&io->io_count)) { |
| 468 | if (io->error == DM_ENDIO_REQUEUE) { | ||
| 469 | /* | ||
| 470 | * Target requested pushing back the I/O. | ||
| 471 | * This must be handled before the sleeper on | ||
| 472 | * suspend queue merges the pushback list. | ||
| 473 | */ | ||
| 474 | spin_lock_irqsave(&io->md->pushback_lock, flags); | ||
| 475 | if (__noflush_suspending(io->md)) | ||
| 476 | bio_list_add(&io->md->pushback, io->bio); | ||
| 477 | else | ||
| 478 | /* noflush suspend was interrupted. */ | ||
| 479 | io->error = -EIO; | ||
| 480 | spin_unlock_irqrestore(&io->md->pushback_lock, flags); | ||
| 481 | } | ||
| 482 | |||
| 457 | if (end_io_acct(io)) | 483 | if (end_io_acct(io)) |
| 458 | /* nudge anyone waiting on suspend queue */ | 484 | /* nudge anyone waiting on suspend queue */ |
| 459 | wake_up(&io->md->wait); | 485 | wake_up(&io->md->wait); |
| 460 | 486 | ||
| 461 | blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE); | 487 | if (io->error != DM_ENDIO_REQUEUE) { |
| 488 | blk_add_trace_bio(io->md->queue, io->bio, | ||
| 489 | BLK_TA_COMPLETE); | ||
| 490 | |||
| 491 | bio_endio(io->bio, io->bio->bi_size, io->error); | ||
| 492 | } | ||
| 462 | 493 | ||
| 463 | bio_endio(io->bio, io->bio->bi_size, io->error); | ||
| 464 | free_io(io->md, io); | 494 | free_io(io->md, io); |
| 465 | } | 495 | } |
| 466 | } | 496 | } |
| @@ -480,7 +510,11 @@ static int clone_endio(struct bio *bio, unsigned int done, int error) | |||
| 480 | 510 | ||
| 481 | if (endio) { | 511 | if (endio) { |
| 482 | r = endio(tio->ti, bio, error, &tio->info); | 512 | r = endio(tio->ti, bio, error, &tio->info); |
| 483 | if (r < 0) | 513 | if (r < 0 || r == DM_ENDIO_REQUEUE) |
| 514 | /* | ||
| 515 | * error and requeue request are handled | ||
| 516 | * in dec_pending(). | ||
| 517 | */ | ||
| 484 | error = r; | 518 | error = r; |
| 485 | else if (r == DM_ENDIO_INCOMPLETE) | 519 | else if (r == DM_ENDIO_INCOMPLETE) |
| 486 | /* The target will handle the io */ | 520 | /* The target will handle the io */ |
| @@ -554,8 +588,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
| 554 | clone->bi_sector); | 588 | clone->bi_sector); |
| 555 | 589 | ||
| 556 | generic_make_request(clone); | 590 | generic_make_request(clone); |
| 557 | } else if (r < 0) { | 591 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
| 558 | /* error the io and bail out */ | 592 | /* error the io and bail out, or requeue it if needed */ |
| 559 | md = tio->io->md; | 593 | md = tio->io->md; |
| 560 | dec_pending(tio->io, r); | 594 | dec_pending(tio->io, r); |
| 561 | /* | 595 | /* |
| @@ -952,6 +986,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 952 | memset(md, 0, sizeof(*md)); | 986 | memset(md, 0, sizeof(*md)); |
| 953 | init_rwsem(&md->io_lock); | 987 | init_rwsem(&md->io_lock); |
| 954 | init_MUTEX(&md->suspend_lock); | 988 | init_MUTEX(&md->suspend_lock); |
| 989 | spin_lock_init(&md->pushback_lock); | ||
| 955 | rwlock_init(&md->map_lock); | 990 | rwlock_init(&md->map_lock); |
| 956 | atomic_set(&md->holders, 1); | 991 | atomic_set(&md->holders, 1); |
| 957 | atomic_set(&md->open_count, 0); | 992 | atomic_set(&md->open_count, 0); |
| @@ -1282,10 +1317,12 @@ static void unlock_fs(struct mapped_device *md) | |||
| 1282 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 1317 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
| 1283 | { | 1318 | { |
| 1284 | struct dm_table *map = NULL; | 1319 | struct dm_table *map = NULL; |
| 1320 | unsigned long flags; | ||
| 1285 | DECLARE_WAITQUEUE(wait, current); | 1321 | DECLARE_WAITQUEUE(wait, current); |
| 1286 | struct bio *def; | 1322 | struct bio *def; |
| 1287 | int r = -EINVAL; | 1323 | int r = -EINVAL; |
| 1288 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | 1324 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; |
| 1325 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | ||
| 1289 | 1326 | ||
| 1290 | down(&md->suspend_lock); | 1327 | down(&md->suspend_lock); |
| 1291 | 1328 | ||
| @@ -1294,6 +1331,13 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1294 | 1331 | ||
| 1295 | map = dm_get_table(md); | 1332 | map = dm_get_table(md); |
| 1296 | 1333 | ||
| 1334 | /* | ||
| 1335 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | ||
| 1336 | * This flag is cleared before dm_suspend returns. | ||
| 1337 | */ | ||
| 1338 | if (noflush) | ||
| 1339 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
| 1340 | |||
| 1297 | /* This does not get reverted if there's an error later. */ | 1341 | /* This does not get reverted if there's an error later. */ |
| 1298 | dm_table_presuspend_targets(map); | 1342 | dm_table_presuspend_targets(map); |
| 1299 | 1343 | ||
| @@ -1301,11 +1345,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1301 | if (!md->suspended_bdev) { | 1345 | if (!md->suspended_bdev) { |
| 1302 | DMWARN("bdget failed in dm_suspend"); | 1346 | DMWARN("bdget failed in dm_suspend"); |
| 1303 | r = -ENOMEM; | 1347 | r = -ENOMEM; |
| 1304 | goto out; | 1348 | goto flush_and_out; |
| 1305 | } | 1349 | } |
| 1306 | 1350 | ||
| 1307 | /* Flush I/O to the device. */ | 1351 | /* |
| 1308 | if (do_lockfs) { | 1352 | * Flush I/O to the device. |
| 1353 | * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os. | ||
| 1354 | */ | ||
| 1355 | if (do_lockfs && !noflush) { | ||
| 1309 | r = lock_fs(md); | 1356 | r = lock_fs(md); |
| 1310 | if (r) | 1357 | if (r) |
| 1311 | goto out; | 1358 | goto out; |
| @@ -1341,6 +1388,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1341 | down_write(&md->io_lock); | 1388 | down_write(&md->io_lock); |
| 1342 | remove_wait_queue(&md->wait, &wait); | 1389 | remove_wait_queue(&md->wait, &wait); |
| 1343 | 1390 | ||
| 1391 | if (noflush) { | ||
| 1392 | spin_lock_irqsave(&md->pushback_lock, flags); | ||
| 1393 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
| 1394 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
| 1395 | bio_list_init(&md->pushback); | ||
| 1396 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
| 1397 | } | ||
| 1398 | |||
| 1344 | /* were we interrupted ? */ | 1399 | /* were we interrupted ? */ |
| 1345 | r = -EINTR; | 1400 | r = -EINTR; |
| 1346 | if (atomic_read(&md->pending)) { | 1401 | if (atomic_read(&md->pending)) { |
| @@ -1349,7 +1404,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1349 | __flush_deferred_io(md, def); | 1404 | __flush_deferred_io(md, def); |
| 1350 | up_write(&md->io_lock); | 1405 | up_write(&md->io_lock); |
| 1351 | unlock_fs(md); | 1406 | unlock_fs(md); |
| 1352 | goto out; | 1407 | goto out; /* pushback list is already flushed, so skip flush */ |
| 1353 | } | 1408 | } |
| 1354 | up_write(&md->io_lock); | 1409 | up_write(&md->io_lock); |
| 1355 | 1410 | ||
| @@ -1359,6 +1414,25 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1359 | 1414 | ||
| 1360 | r = 0; | 1415 | r = 0; |
| 1361 | 1416 | ||
| 1417 | flush_and_out: | ||
| 1418 | if (r && noflush) { | ||
| 1419 | /* | ||
| 1420 | * Because there may be already I/Os in the pushback list, | ||
| 1421 | * flush them before return. | ||
| 1422 | */ | ||
| 1423 | down_write(&md->io_lock); | ||
| 1424 | |||
| 1425 | spin_lock_irqsave(&md->pushback_lock, flags); | ||
| 1426 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
| 1427 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
| 1428 | bio_list_init(&md->pushback); | ||
| 1429 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
| 1430 | |||
| 1431 | def = bio_list_get(&md->deferred); | ||
| 1432 | __flush_deferred_io(md, def); | ||
| 1433 | up_write(&md->io_lock); | ||
| 1434 | } | ||
| 1435 | |||
| 1362 | out: | 1436 | out: |
| 1363 | if (r && md->suspended_bdev) { | 1437 | if (r && md->suspended_bdev) { |
| 1364 | bdput(md->suspended_bdev); | 1438 | bdput(md->suspended_bdev); |
| @@ -1445,6 +1519,17 @@ int dm_suspended(struct mapped_device *md) | |||
| 1445 | return test_bit(DMF_SUSPENDED, &md->flags); | 1519 | return test_bit(DMF_SUSPENDED, &md->flags); |
| 1446 | } | 1520 | } |
| 1447 | 1521 | ||
| 1522 | int dm_noflush_suspending(struct dm_target *ti) | ||
| 1523 | { | ||
| 1524 | struct mapped_device *md = dm_table_get_md(ti->table); | ||
| 1525 | int r = __noflush_suspending(md); | ||
| 1526 | |||
| 1527 | dm_put(md); | ||
| 1528 | |||
| 1529 | return r; | ||
| 1530 | } | ||
| 1531 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | ||
| 1532 | |||
| 1448 | static struct block_device_operations dm_blk_dops = { | 1533 | static struct block_device_operations dm_blk_dops = { |
| 1449 | .open = dm_blk_open, | 1534 | .open = dm_blk_open, |
| 1450 | .release = dm_blk_close, | 1535 | .release = dm_blk_close, |
