diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 105 |
1 files changed, 95 insertions, 10 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d8544e1a4c1f..fe7c56e10435 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -68,10 +68,12 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
68 | #define DMF_FROZEN 2 | 68 | #define DMF_FROZEN 2 |
69 | #define DMF_FREEING 3 | 69 | #define DMF_FREEING 3 |
70 | #define DMF_DELETING 4 | 70 | #define DMF_DELETING 4 |
71 | #define DMF_NOFLUSH_SUSPENDING 5 | ||
71 | 72 | ||
72 | struct mapped_device { | 73 | struct mapped_device { |
73 | struct rw_semaphore io_lock; | 74 | struct rw_semaphore io_lock; |
74 | struct semaphore suspend_lock; | 75 | struct semaphore suspend_lock; |
76 | spinlock_t pushback_lock; | ||
75 | rwlock_t map_lock; | 77 | rwlock_t map_lock; |
76 | atomic_t holders; | 78 | atomic_t holders; |
77 | atomic_t open_count; | 79 | atomic_t open_count; |
@@ -90,6 +92,7 @@ struct mapped_device { | |||
90 | atomic_t pending; | 92 | atomic_t pending; |
91 | wait_queue_head_t wait; | 93 | wait_queue_head_t wait; |
92 | struct bio_list deferred; | 94 | struct bio_list deferred; |
95 | struct bio_list pushback; | ||
93 | 96 | ||
94 | /* | 97 | /* |
95 | * The current mapping. | 98 | * The current mapping. |
@@ -444,23 +447,50 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) | |||
444 | * you this clearly demarcated crap. | 447 | * you this clearly demarcated crap. |
445 | *---------------------------------------------------------------*/ | 448 | *---------------------------------------------------------------*/ |
446 | 449 | ||
450 | static int __noflush_suspending(struct mapped_device *md) | ||
451 | { | ||
452 | return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
453 | } | ||
454 | |||
447 | /* | 455 | /* |
448 | * Decrements the number of outstanding ios that a bio has been | 456 | * Decrements the number of outstanding ios that a bio has been |
449 | * cloned into, completing the original io if necc. | 457 | * cloned into, completing the original io if necc. |
450 | */ | 458 | */ |
451 | static void dec_pending(struct dm_io *io, int error) | 459 | static void dec_pending(struct dm_io *io, int error) |
452 | { | 460 | { |
453 | if (error) | 461 | unsigned long flags; |
462 | |||
463 | /* Push-back supersedes any I/O errors */ | ||
464 | if (error && !(io->error > 0 && __noflush_suspending(io->md))) | ||
454 | io->error = error; | 465 | io->error = error; |
455 | 466 | ||
456 | if (atomic_dec_and_test(&io->io_count)) { | 467 | if (atomic_dec_and_test(&io->io_count)) { |
468 | if (io->error == DM_ENDIO_REQUEUE) { | ||
469 | /* | ||
470 | * Target requested pushing back the I/O. | ||
471 | * This must be handled before the sleeper on | ||
472 | * suspend queue merges the pushback list. | ||
473 | */ | ||
474 | spin_lock_irqsave(&io->md->pushback_lock, flags); | ||
475 | if (__noflush_suspending(io->md)) | ||
476 | bio_list_add(&io->md->pushback, io->bio); | ||
477 | else | ||
478 | /* noflush suspend was interrupted. */ | ||
479 | io->error = -EIO; | ||
480 | spin_unlock_irqrestore(&io->md->pushback_lock, flags); | ||
481 | } | ||
482 | |||
457 | if (end_io_acct(io)) | 483 | if (end_io_acct(io)) |
458 | /* nudge anyone waiting on suspend queue */ | 484 | /* nudge anyone waiting on suspend queue */ |
459 | wake_up(&io->md->wait); | 485 | wake_up(&io->md->wait); |
460 | 486 | ||
461 | blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE); | 487 | if (io->error != DM_ENDIO_REQUEUE) { |
488 | blk_add_trace_bio(io->md->queue, io->bio, | ||
489 | BLK_TA_COMPLETE); | ||
490 | |||
491 | bio_endio(io->bio, io->bio->bi_size, io->error); | ||
492 | } | ||
462 | 493 | ||
463 | bio_endio(io->bio, io->bio->bi_size, io->error); | ||
464 | free_io(io->md, io); | 494 | free_io(io->md, io); |
465 | } | 495 | } |
466 | } | 496 | } |
@@ -480,7 +510,11 @@ static int clone_endio(struct bio *bio, unsigned int done, int error) | |||
480 | 510 | ||
481 | if (endio) { | 511 | if (endio) { |
482 | r = endio(tio->ti, bio, error, &tio->info); | 512 | r = endio(tio->ti, bio, error, &tio->info); |
483 | if (r < 0) | 513 | if (r < 0 || r == DM_ENDIO_REQUEUE) |
514 | /* | ||
515 | * error and requeue request are handled | ||
516 | * in dec_pending(). | ||
517 | */ | ||
484 | error = r; | 518 | error = r; |
485 | else if (r == DM_ENDIO_INCOMPLETE) | 519 | else if (r == DM_ENDIO_INCOMPLETE) |
486 | /* The target will handle the io */ | 520 | /* The target will handle the io */ |
@@ -554,8 +588,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
554 | clone->bi_sector); | 588 | clone->bi_sector); |
555 | 589 | ||
556 | generic_make_request(clone); | 590 | generic_make_request(clone); |
557 | } else if (r < 0) { | 591 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
558 | /* error the io and bail out */ | 592 | /* error the io and bail out, or requeue it if needed */ |
559 | md = tio->io->md; | 593 | md = tio->io->md; |
560 | dec_pending(tio->io, r); | 594 | dec_pending(tio->io, r); |
561 | /* | 595 | /* |
@@ -952,6 +986,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
952 | memset(md, 0, sizeof(*md)); | 986 | memset(md, 0, sizeof(*md)); |
953 | init_rwsem(&md->io_lock); | 987 | init_rwsem(&md->io_lock); |
954 | init_MUTEX(&md->suspend_lock); | 988 | init_MUTEX(&md->suspend_lock); |
989 | spin_lock_init(&md->pushback_lock); | ||
955 | rwlock_init(&md->map_lock); | 990 | rwlock_init(&md->map_lock); |
956 | atomic_set(&md->holders, 1); | 991 | atomic_set(&md->holders, 1); |
957 | atomic_set(&md->open_count, 0); | 992 | atomic_set(&md->open_count, 0); |
@@ -1282,10 +1317,12 @@ static void unlock_fs(struct mapped_device *md) | |||
1282 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 1317 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
1283 | { | 1318 | { |
1284 | struct dm_table *map = NULL; | 1319 | struct dm_table *map = NULL; |
1320 | unsigned long flags; | ||
1285 | DECLARE_WAITQUEUE(wait, current); | 1321 | DECLARE_WAITQUEUE(wait, current); |
1286 | struct bio *def; | 1322 | struct bio *def; |
1287 | int r = -EINVAL; | 1323 | int r = -EINVAL; |
1288 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; | 1324 | int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; |
1325 | int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; | ||
1289 | 1326 | ||
1290 | down(&md->suspend_lock); | 1327 | down(&md->suspend_lock); |
1291 | 1328 | ||
@@ -1294,6 +1331,13 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1294 | 1331 | ||
1295 | map = dm_get_table(md); | 1332 | map = dm_get_table(md); |
1296 | 1333 | ||
1334 | /* | ||
1335 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | ||
1336 | * This flag is cleared before dm_suspend returns. | ||
1337 | */ | ||
1338 | if (noflush) | ||
1339 | set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
1340 | |||
1297 | /* This does not get reverted if there's an error later. */ | 1341 | /* This does not get reverted if there's an error later. */ |
1298 | dm_table_presuspend_targets(map); | 1342 | dm_table_presuspend_targets(map); |
1299 | 1343 | ||
@@ -1301,11 +1345,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1301 | if (!md->suspended_bdev) { | 1345 | if (!md->suspended_bdev) { |
1302 | DMWARN("bdget failed in dm_suspend"); | 1346 | DMWARN("bdget failed in dm_suspend"); |
1303 | r = -ENOMEM; | 1347 | r = -ENOMEM; |
1304 | goto out; | 1348 | goto flush_and_out; |
1305 | } | 1349 | } |
1306 | 1350 | ||
1307 | /* Flush I/O to the device. */ | 1351 | /* |
1308 | if (do_lockfs) { | 1352 | * Flush I/O to the device. |
1353 | * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os. | ||
1354 | */ | ||
1355 | if (do_lockfs && !noflush) { | ||
1309 | r = lock_fs(md); | 1356 | r = lock_fs(md); |
1310 | if (r) | 1357 | if (r) |
1311 | goto out; | 1358 | goto out; |
@@ -1341,6 +1388,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1341 | down_write(&md->io_lock); | 1388 | down_write(&md->io_lock); |
1342 | remove_wait_queue(&md->wait, &wait); | 1389 | remove_wait_queue(&md->wait, &wait); |
1343 | 1390 | ||
1391 | if (noflush) { | ||
1392 | spin_lock_irqsave(&md->pushback_lock, flags); | ||
1393 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
1394 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
1395 | bio_list_init(&md->pushback); | ||
1396 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
1397 | } | ||
1398 | |||
1344 | /* were we interrupted ? */ | 1399 | /* were we interrupted ? */ |
1345 | r = -EINTR; | 1400 | r = -EINTR; |
1346 | if (atomic_read(&md->pending)) { | 1401 | if (atomic_read(&md->pending)) { |
@@ -1349,7 +1404,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1349 | __flush_deferred_io(md, def); | 1404 | __flush_deferred_io(md, def); |
1350 | up_write(&md->io_lock); | 1405 | up_write(&md->io_lock); |
1351 | unlock_fs(md); | 1406 | unlock_fs(md); |
1352 | goto out; | 1407 | goto out; /* pushback list is already flushed, so skip flush */ |
1353 | } | 1408 | } |
1354 | up_write(&md->io_lock); | 1409 | up_write(&md->io_lock); |
1355 | 1410 | ||
@@ -1359,6 +1414,25 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1359 | 1414 | ||
1360 | r = 0; | 1415 | r = 0; |
1361 | 1416 | ||
1417 | flush_and_out: | ||
1418 | if (r && noflush) { | ||
1419 | /* | ||
1420 | * Because there may be already I/Os in the pushback list, | ||
1421 | * flush them before return. | ||
1422 | */ | ||
1423 | down_write(&md->io_lock); | ||
1424 | |||
1425 | spin_lock_irqsave(&md->pushback_lock, flags); | ||
1426 | clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | ||
1427 | bio_list_merge_head(&md->deferred, &md->pushback); | ||
1428 | bio_list_init(&md->pushback); | ||
1429 | spin_unlock_irqrestore(&md->pushback_lock, flags); | ||
1430 | |||
1431 | def = bio_list_get(&md->deferred); | ||
1432 | __flush_deferred_io(md, def); | ||
1433 | up_write(&md->io_lock); | ||
1434 | } | ||
1435 | |||
1362 | out: | 1436 | out: |
1363 | if (r && md->suspended_bdev) { | 1437 | if (r && md->suspended_bdev) { |
1364 | bdput(md->suspended_bdev); | 1438 | bdput(md->suspended_bdev); |
@@ -1445,6 +1519,17 @@ int dm_suspended(struct mapped_device *md) | |||
1445 | return test_bit(DMF_SUSPENDED, &md->flags); | 1519 | return test_bit(DMF_SUSPENDED, &md->flags); |
1446 | } | 1520 | } |
1447 | 1521 | ||
1522 | int dm_noflush_suspending(struct dm_target *ti) | ||
1523 | { | ||
1524 | struct mapped_device *md = dm_table_get_md(ti->table); | ||
1525 | int r = __noflush_suspending(md); | ||
1526 | |||
1527 | dm_put(md); | ||
1528 | |||
1529 | return r; | ||
1530 | } | ||
1531 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | ||
1532 | |||
1448 | static struct block_device_operations dm_blk_dops = { | 1533 | static struct block_device_operations dm_blk_dops = { |
1449 | .open = dm_blk_open, | 1534 | .open = dm_blk_open, |
1450 | .release = dm_blk_close, | 1535 | .release = dm_blk_close, |