aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm.c8
-rw-r--r--drivers/md/md.c27
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid10.c133
-rw-r--r--drivers/md/raid5.c79
5 files changed, 141 insertions, 108 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 02db9183ca01..77e6eff41cae 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -740,8 +740,14 @@ static void rq_completed(struct mapped_device *md, int rw, int run_queue)
740 if (!md_in_flight(md)) 740 if (!md_in_flight(md))
741 wake_up(&md->wait); 741 wake_up(&md->wait);
742 742
743 /*
744 * Run this off this callpath, as drivers could invoke end_io while
745 * inside their request_fn (and holding the queue lock). Calling
746 * back into ->request_fn() could deadlock attempting to grab the
747 * queue lock again.
748 */
743 if (run_queue) 749 if (run_queue)
744 blk_run_queue(md->queue); 750 blk_run_queue_async(md->queue);
745 751
746 /* 752 /*
747 * dm_put() must be at the end of this function. See the comment above 753 * dm_put() must be at the end of this function. See the comment above
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9ab768acfb62..61200717687b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1817,10 +1817,10 @@ retry:
1817 memset(bbp, 0xff, PAGE_SIZE); 1817 memset(bbp, 0xff, PAGE_SIZE);
1818 1818
1819 for (i = 0 ; i < bb->count ; i++) { 1819 for (i = 0 ; i < bb->count ; i++) {
1820 u64 internal_bb = *p++; 1820 u64 internal_bb = p[i];
1821 u64 store_bb = ((BB_OFFSET(internal_bb) << 10) 1821 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1822 | BB_LEN(internal_bb)); 1822 | BB_LEN(internal_bb));
1823 *bbp++ = cpu_to_le64(store_bb); 1823 bbp[i] = cpu_to_le64(store_bb);
1824 } 1824 }
1825 bb->changed = 0; 1825 bb->changed = 0;
1826 if (read_seqretry(&bb->lock, seq)) 1826 if (read_seqretry(&bb->lock, seq))
@@ -5294,7 +5294,7 @@ void md_stop_writes(struct mddev *mddev)
5294} 5294}
5295EXPORT_SYMBOL_GPL(md_stop_writes); 5295EXPORT_SYMBOL_GPL(md_stop_writes);
5296 5296
5297void md_stop(struct mddev *mddev) 5297static void __md_stop(struct mddev *mddev)
5298{ 5298{
5299 mddev->ready = 0; 5299 mddev->ready = 0;
5300 mddev->pers->stop(mddev); 5300 mddev->pers->stop(mddev);
@@ -5304,6 +5304,18 @@ void md_stop(struct mddev *mddev)
5304 mddev->pers = NULL; 5304 mddev->pers = NULL;
5305 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 5305 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5306} 5306}
5307
5308void md_stop(struct mddev *mddev)
5309{
5310 /* stop the array and free an attached data structures.
5311 * This is called from dm-raid
5312 */
5313 __md_stop(mddev);
5314 bitmap_destroy(mddev);
5315 if (mddev->bio_set)
5316 bioset_free(mddev->bio_set);
5317}
5318
5307EXPORT_SYMBOL_GPL(md_stop); 5319EXPORT_SYMBOL_GPL(md_stop);
5308 5320
5309static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) 5321static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
@@ -5364,7 +5376,7 @@ static int do_md_stop(struct mddev * mddev, int mode,
5364 set_disk_ro(disk, 0); 5376 set_disk_ro(disk, 0);
5365 5377
5366 __md_stop_writes(mddev); 5378 __md_stop_writes(mddev);
5367 md_stop(mddev); 5379 __md_stop(mddev);
5368 mddev->queue->merge_bvec_fn = NULL; 5380 mddev->queue->merge_bvec_fn = NULL;
5369 mddev->queue->backing_dev_info.congested_fn = NULL; 5381 mddev->queue->backing_dev_info.congested_fn = NULL;
5370 5382
@@ -7936,9 +7948,9 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
7936 sector_t *first_bad, int *bad_sectors) 7948 sector_t *first_bad, int *bad_sectors)
7937{ 7949{
7938 int hi; 7950 int hi;
7939 int lo = 0; 7951 int lo;
7940 u64 *p = bb->page; 7952 u64 *p = bb->page;
7941 int rv = 0; 7953 int rv;
7942 sector_t target = s + sectors; 7954 sector_t target = s + sectors;
7943 unsigned seq; 7955 unsigned seq;
7944 7956
@@ -7953,7 +7965,8 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
7953 7965
7954retry: 7966retry:
7955 seq = read_seqbegin(&bb->lock); 7967 seq = read_seqbegin(&bb->lock);
7956 7968 lo = 0;
7969 rv = 0;
7957 hi = bb->count; 7970 hi = bb->count;
7958 7971
7959 /* Binary search between lo and hi for 'target' 7972 /* Binary search between lo and hi for 'target'
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 636bae0405e8..a0f73092176e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -963,7 +963,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
963 struct r1conf *conf = mddev->private; 963 struct r1conf *conf = mddev->private;
964 struct bio *bio; 964 struct bio *bio;
965 965
966 if (from_schedule) { 966 if (from_schedule || current->bio_list) {
967 spin_lock_irq(&conf->device_lock); 967 spin_lock_irq(&conf->device_lock);
968 bio_list_merge(&conf->pending_bio_list, &plug->pending); 968 bio_list_merge(&conf->pending_bio_list, &plug->pending);
969 conf->pending_count += plug->pending_cnt; 969 conf->pending_count += plug->pending_cnt;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d1295aff4173..c9acbd717131 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -499,7 +499,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
499 */ 499 */
500 one_write_done(r10_bio); 500 one_write_done(r10_bio);
501 if (dec_rdev) 501 if (dec_rdev)
502 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); 502 rdev_dec_pending(rdev, conf->mddev);
503} 503}
504 504
505/* 505/*
@@ -1069,7 +1069,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1069 struct r10conf *conf = mddev->private; 1069 struct r10conf *conf = mddev->private;
1070 struct bio *bio; 1070 struct bio *bio;
1071 1071
1072 if (from_schedule) { 1072 if (from_schedule || current->bio_list) {
1073 spin_lock_irq(&conf->device_lock); 1073 spin_lock_irq(&conf->device_lock);
1074 bio_list_merge(&conf->pending_bio_list, &plug->pending); 1074 bio_list_merge(&conf->pending_bio_list, &plug->pending);
1075 conf->pending_count += plug->pending_cnt; 1075 conf->pending_count += plug->pending_cnt;
@@ -1334,18 +1334,21 @@ retry_write:
1334 blocked_rdev = rrdev; 1334 blocked_rdev = rrdev;
1335 break; 1335 break;
1336 } 1336 }
1337 if (rdev && (test_bit(Faulty, &rdev->flags)
1338 || test_bit(Unmerged, &rdev->flags)))
1339 rdev = NULL;
1337 if (rrdev && (test_bit(Faulty, &rrdev->flags) 1340 if (rrdev && (test_bit(Faulty, &rrdev->flags)
1338 || test_bit(Unmerged, &rrdev->flags))) 1341 || test_bit(Unmerged, &rrdev->flags)))
1339 rrdev = NULL; 1342 rrdev = NULL;
1340 1343
1341 r10_bio->devs[i].bio = NULL; 1344 r10_bio->devs[i].bio = NULL;
1342 r10_bio->devs[i].repl_bio = NULL; 1345 r10_bio->devs[i].repl_bio = NULL;
1343 if (!rdev || test_bit(Faulty, &rdev->flags) || 1346
1344 test_bit(Unmerged, &rdev->flags)) { 1347 if (!rdev && !rrdev) {
1345 set_bit(R10BIO_Degraded, &r10_bio->state); 1348 set_bit(R10BIO_Degraded, &r10_bio->state);
1346 continue; 1349 continue;
1347 } 1350 }
1348 if (test_bit(WriteErrorSeen, &rdev->flags)) { 1351 if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
1349 sector_t first_bad; 1352 sector_t first_bad;
1350 sector_t dev_sector = r10_bio->devs[i].addr; 1353 sector_t dev_sector = r10_bio->devs[i].addr;
1351 int bad_sectors; 1354 int bad_sectors;
@@ -1387,8 +1390,10 @@ retry_write:
1387 max_sectors = good_sectors; 1390 max_sectors = good_sectors;
1388 } 1391 }
1389 } 1392 }
1390 r10_bio->devs[i].bio = bio; 1393 if (rdev) {
1391 atomic_inc(&rdev->nr_pending); 1394 r10_bio->devs[i].bio = bio;
1395 atomic_inc(&rdev->nr_pending);
1396 }
1392 if (rrdev) { 1397 if (rrdev) {
1393 r10_bio->devs[i].repl_bio = bio; 1398 r10_bio->devs[i].repl_bio = bio;
1394 atomic_inc(&rrdev->nr_pending); 1399 atomic_inc(&rrdev->nr_pending);
@@ -1444,69 +1449,71 @@ retry_write:
1444 for (i = 0; i < conf->copies; i++) { 1449 for (i = 0; i < conf->copies; i++) {
1445 struct bio *mbio; 1450 struct bio *mbio;
1446 int d = r10_bio->devs[i].devnum; 1451 int d = r10_bio->devs[i].devnum;
1447 if (!r10_bio->devs[i].bio) 1452 if (r10_bio->devs[i].bio) {
1448 continue; 1453 struct md_rdev *rdev = conf->mirrors[d].rdev;
1454 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1455 md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1456 max_sectors);
1457 r10_bio->devs[i].bio = mbio;
1458
1459 mbio->bi_sector = (r10_bio->devs[i].addr+
1460 choose_data_offset(r10_bio,
1461 rdev));
1462 mbio->bi_bdev = rdev->bdev;
1463 mbio->bi_end_io = raid10_end_write_request;
1464 mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1465 mbio->bi_private = r10_bio;
1449 1466
1450 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 1467 atomic_inc(&r10_bio->remaining);
1451 md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1452 max_sectors);
1453 r10_bio->devs[i].bio = mbio;
1454 1468
1455 mbio->bi_sector = (r10_bio->devs[i].addr+ 1469 cb = blk_check_plugged(raid10_unplug, mddev,
1456 choose_data_offset(r10_bio, 1470 sizeof(*plug));
1457 conf->mirrors[d].rdev)); 1471 if (cb)
1458 mbio->bi_bdev = conf->mirrors[d].rdev->bdev; 1472 plug = container_of(cb, struct raid10_plug_cb,
1459 mbio->bi_end_io = raid10_end_write_request; 1473 cb);
1460 mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; 1474 else
1461 mbio->bi_private = r10_bio; 1475 plug = NULL;
1476 spin_lock_irqsave(&conf->device_lock, flags);
1477 if (plug) {
1478 bio_list_add(&plug->pending, mbio);
1479 plug->pending_cnt++;
1480 } else {
1481 bio_list_add(&conf->pending_bio_list, mbio);
1482 conf->pending_count++;
1483 }
1484 spin_unlock_irqrestore(&conf->device_lock, flags);
1485 if (!plug)
1486 md_wakeup_thread(mddev->thread);
1487 }
1462 1488
1463 atomic_inc(&r10_bio->remaining); 1489 if (r10_bio->devs[i].repl_bio) {
1490 struct md_rdev *rdev = conf->mirrors[d].replacement;
1491 if (rdev == NULL) {
1492 /* Replacement just got moved to main 'rdev' */
1493 smp_mb();
1494 rdev = conf->mirrors[d].rdev;
1495 }
1496 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1497 md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1498 max_sectors);
1499 r10_bio->devs[i].repl_bio = mbio;
1500
1501 mbio->bi_sector = (r10_bio->devs[i].addr +
1502 choose_data_offset(
1503 r10_bio, rdev));
1504 mbio->bi_bdev = rdev->bdev;
1505 mbio->bi_end_io = raid10_end_write_request;
1506 mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1507 mbio->bi_private = r10_bio;
1464 1508
1465 cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); 1509 atomic_inc(&r10_bio->remaining);
1466 if (cb) 1510 spin_lock_irqsave(&conf->device_lock, flags);
1467 plug = container_of(cb, struct raid10_plug_cb, cb);
1468 else
1469 plug = NULL;
1470 spin_lock_irqsave(&conf->device_lock, flags);
1471 if (plug) {
1472 bio_list_add(&plug->pending, mbio);
1473 plug->pending_cnt++;
1474 } else {
1475 bio_list_add(&conf->pending_bio_list, mbio); 1511 bio_list_add(&conf->pending_bio_list, mbio);
1476 conf->pending_count++; 1512 conf->pending_count++;
1513 spin_unlock_irqrestore(&conf->device_lock, flags);
1514 if (!mddev_check_plugged(mddev))
1515 md_wakeup_thread(mddev->thread);
1477 } 1516 }
1478 spin_unlock_irqrestore(&conf->device_lock, flags);
1479 if (!plug)
1480 md_wakeup_thread(mddev->thread);
1481
1482 if (!r10_bio->devs[i].repl_bio)
1483 continue;
1484
1485 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1486 md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1487 max_sectors);
1488 r10_bio->devs[i].repl_bio = mbio;
1489
1490 /* We are actively writing to the original device
1491 * so it cannot disappear, so the replacement cannot
1492 * become NULL here
1493 */
1494 mbio->bi_sector = (r10_bio->devs[i].addr +
1495 choose_data_offset(
1496 r10_bio,
1497 conf->mirrors[d].replacement));
1498 mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
1499 mbio->bi_end_io = raid10_end_write_request;
1500 mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1501 mbio->bi_private = r10_bio;
1502
1503 atomic_inc(&r10_bio->remaining);
1504 spin_lock_irqsave(&conf->device_lock, flags);
1505 bio_list_add(&conf->pending_bio_list, mbio);
1506 conf->pending_count++;
1507 spin_unlock_irqrestore(&conf->device_lock, flags);
1508 if (!mddev_check_plugged(mddev))
1509 md_wakeup_thread(mddev->thread);
1510 } 1517 }
1511 1518
1512 /* Don't remove the bias on 'remaining' (one_write_done) until 1519 /* Don't remove the bias on 'remaining' (one_write_done) until
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c5439dce0295..a4502686e7a8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2774,10 +2774,12 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2774 dev = &sh->dev[i]; 2774 dev = &sh->dev[i];
2775 if (!test_bit(R5_LOCKED, &dev->flags) && 2775 if (!test_bit(R5_LOCKED, &dev->flags) &&
2776 (test_bit(R5_UPTODATE, &dev->flags) || 2776 (test_bit(R5_UPTODATE, &dev->flags) ||
2777 test_and_clear_bit(R5_Discard, &dev->flags))) { 2777 test_bit(R5_Discard, &dev->flags))) {
2778 /* We can return any write requests */ 2778 /* We can return any write requests */
2779 struct bio *wbi, *wbi2; 2779 struct bio *wbi, *wbi2;
2780 pr_debug("Return write for disc %d\n", i); 2780 pr_debug("Return write for disc %d\n", i);
2781 if (test_and_clear_bit(R5_Discard, &dev->flags))
2782 clear_bit(R5_UPTODATE, &dev->flags);
2781 wbi = dev->written; 2783 wbi = dev->written;
2782 dev->written = NULL; 2784 dev->written = NULL;
2783 while (wbi && wbi->bi_sector < 2785 while (wbi && wbi->bi_sector <
@@ -2795,7 +2797,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2795 !test_bit(STRIPE_DEGRADED, &sh->state), 2797 !test_bit(STRIPE_DEGRADED, &sh->state),
2796 0); 2798 0);
2797 } 2799 }
2798 } 2800 } else if (test_bit(R5_Discard, &sh->dev[i].flags))
2801 clear_bit(R5_Discard, &sh->dev[i].flags);
2799 2802
2800 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) 2803 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
2801 if (atomic_dec_and_test(&conf->pending_full_writes)) 2804 if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3490,40 +3493,6 @@ static void handle_stripe(struct stripe_head *sh)
3490 handle_failed_sync(conf, sh, &s); 3493 handle_failed_sync(conf, sh, &s);
3491 } 3494 }
3492 3495
3493 /*
3494 * might be able to return some write requests if the parity blocks
3495 * are safe, or on a failed drive
3496 */
3497 pdev = &sh->dev[sh->pd_idx];
3498 s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
3499 || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
3500 qdev = &sh->dev[sh->qd_idx];
3501 s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
3502 || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
3503 || conf->level < 6;
3504
3505 if (s.written &&
3506 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
3507 && !test_bit(R5_LOCKED, &pdev->flags)
3508 && (test_bit(R5_UPTODATE, &pdev->flags) ||
3509 test_bit(R5_Discard, &pdev->flags))))) &&
3510 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
3511 && !test_bit(R5_LOCKED, &qdev->flags)
3512 && (test_bit(R5_UPTODATE, &qdev->flags) ||
3513 test_bit(R5_Discard, &qdev->flags))))))
3514 handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
3515
3516 /* Now we might consider reading some blocks, either to check/generate
3517 * parity, or to satisfy requests
3518 * or to load a block that is being partially written.
3519 */
3520 if (s.to_read || s.non_overwrite
3521 || (conf->level == 6 && s.to_write && s.failed)
3522 || (s.syncing && (s.uptodate + s.compute < disks))
3523 || s.replacing
3524 || s.expanding)
3525 handle_stripe_fill(sh, &s, disks);
3526
3527 /* Now we check to see if any write operations have recently 3496 /* Now we check to see if any write operations have recently
3528 * completed 3497 * completed
3529 */ 3498 */
@@ -3561,6 +3530,40 @@ static void handle_stripe(struct stripe_head *sh)
3561 s.dec_preread_active = 1; 3530 s.dec_preread_active = 1;
3562 } 3531 }
3563 3532
3533 /*
3534 * might be able to return some write requests if the parity blocks
3535 * are safe, or on a failed drive
3536 */
3537 pdev = &sh->dev[sh->pd_idx];
3538 s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
3539 || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
3540 qdev = &sh->dev[sh->qd_idx];
3541 s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
3542 || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
3543 || conf->level < 6;
3544
3545 if (s.written &&
3546 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
3547 && !test_bit(R5_LOCKED, &pdev->flags)
3548 && (test_bit(R5_UPTODATE, &pdev->flags) ||
3549 test_bit(R5_Discard, &pdev->flags))))) &&
3550 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
3551 && !test_bit(R5_LOCKED, &qdev->flags)
3552 && (test_bit(R5_UPTODATE, &qdev->flags) ||
3553 test_bit(R5_Discard, &qdev->flags))))))
3554 handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
3555
3556 /* Now we might consider reading some blocks, either to check/generate
3557 * parity, or to satisfy requests
3558 * or to load a block that is being partially written.
3559 */
3560 if (s.to_read || s.non_overwrite
3561 || (conf->level == 6 && s.to_write && s.failed)
3562 || (s.syncing && (s.uptodate + s.compute < disks))
3563 || s.replacing
3564 || s.expanding)
3565 handle_stripe_fill(sh, &s, disks);
3566
3564 /* Now to consider new write requests and what else, if anything 3567 /* Now to consider new write requests and what else, if anything
3565 * should be read. We do not handle new writes when: 3568 * should be read. We do not handle new writes when:
3566 * 1/ A 'write' operation (copy+xor) is already in flight. 3569 * 1/ A 'write' operation (copy+xor) is already in flight.
@@ -5529,6 +5532,10 @@ static int run(struct mddev *mddev)
5529 * discard data disk but write parity disk 5532 * discard data disk but write parity disk
5530 */ 5533 */
5531 stripe = stripe * PAGE_SIZE; 5534 stripe = stripe * PAGE_SIZE;
5535 /* Round up to power of 2, as discard handling
5536 * currently assumes that */
5537 while ((stripe-1) & stripe)
5538 stripe = (stripe | (stripe-1)) + 1;
5532 mddev->queue->limits.discard_alignment = stripe; 5539 mddev->queue->limits.discard_alignment = stripe;
5533 mddev->queue->limits.discard_granularity = stripe; 5540 mddev->queue->limits.discard_granularity = stripe;
5534 /* 5541 /*