diff options
author | NeilBrown <neilb@cse.unsw.edu.au> | 2005-09-09 19:23:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-09 19:39:12 -0400 |
commit | 72626685dc66d455742a7f215a0535c551628b9e (patch) | |
tree | 91e19a61a5a3b782007132b6b2e353e8936dd656 | |
parent | 0002b2718dd04da67c21f8a7830de8d95a9b0345 (diff) |
[PATCH] md: add write-intent-bitmap support to raid5
Most awkward part of this is delaying write requests until bitmap updates have
been flushed.
To achieve this, we have a sequence number (seq_flush) which is incremented
each time the raid5 is unplugged.
If the raid thread notices that this has changed, it flushes bitmap changes,
and assigned the value of seq_flush to seq_write.
When a write request arrives, it is given the number from seq_write, and that
write request may not complete until seq_flush is larger than the saved seq
number.
We have a new queue for storing stripes which are waiting for a bitmap flush
and an extra flag for stripes to record if the write was 'degraded' and so
should not clear the a bit in the bitmap.
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/md.c | 3 | ||||
-rw-r--r-- | drivers/md/raid5.c | 133 | ||||
-rw-r--r-- | include/linux/raid/raid5.h | 14 |
3 files changed, 137 insertions, 13 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index be7873c61b3c..dbf540a7fccc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -645,7 +645,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
645 | 645 | ||
646 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 646 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
647 | mddev->bitmap_file == NULL) { | 647 | mddev->bitmap_file == NULL) { |
648 | if (mddev->level != 1) { | 648 | if (mddev->level != 1 && mddev->level != 5) { |
649 | /* FIXME use a better test */ | 649 | /* FIXME use a better test */ |
650 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); | 650 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); |
651 | return -EINVAL; | 651 | return -EINVAL; |
@@ -3517,7 +3517,6 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
3517 | */ | 3517 | */ |
3518 | void md_write_start(mddev_t *mddev, struct bio *bi) | 3518 | void md_write_start(mddev_t *mddev, struct bio *bi) |
3519 | { | 3519 | { |
3520 | DEFINE_WAIT(w); | ||
3521 | if (bio_data_dir(bi) != WRITE) | 3520 | if (bio_data_dir(bi) != WRITE) |
3522 | return; | 3521 | return; |
3523 | 3522 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ed859e08d600..4683ca24c046 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/bitops.h> | 24 | #include <linux/bitops.h> |
25 | #include <asm/atomic.h> | 25 | #include <asm/atomic.h> |
26 | 26 | ||
27 | #include <linux/raid/bitmap.h> | ||
28 | |||
27 | /* | 29 | /* |
28 | * Stripe cache | 30 | * Stripe cache |
29 | */ | 31 | */ |
@@ -79,8 +81,13 @@ static inline void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
79 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 81 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
80 | if (test_bit(STRIPE_DELAYED, &sh->state)) | 82 | if (test_bit(STRIPE_DELAYED, &sh->state)) |
81 | list_add_tail(&sh->lru, &conf->delayed_list); | 83 | list_add_tail(&sh->lru, &conf->delayed_list); |
82 | else | 84 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
85 | conf->seq_write == sh->bm_seq) | ||
86 | list_add_tail(&sh->lru, &conf->bitmap_list); | ||
87 | else { | ||
88 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | ||
83 | list_add_tail(&sh->lru, &conf->handle_list); | 89 | list_add_tail(&sh->lru, &conf->handle_list); |
90 | } | ||
84 | md_wakeup_thread(conf->mddev->thread); | 91 | md_wakeup_thread(conf->mddev->thread); |
85 | } else { | 92 | } else { |
86 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 93 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
@@ -244,6 +251,9 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
244 | spin_lock_irq(&conf->device_lock); | 251 | spin_lock_irq(&conf->device_lock); |
245 | 252 | ||
246 | do { | 253 | do { |
254 | wait_event_lock_irq(conf->wait_for_stripe, | ||
255 | conf->quiesce == 0, | ||
256 | conf->device_lock, /* nothing */); | ||
247 | sh = __find_stripe(conf, sector); | 257 | sh = __find_stripe(conf, sector); |
248 | if (!sh) { | 258 | if (!sh) { |
249 | if (!conf->inactive_blocked) | 259 | if (!conf->inactive_blocked) |
@@ -803,6 +813,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
803 | { | 813 | { |
804 | struct bio **bip; | 814 | struct bio **bip; |
805 | raid5_conf_t *conf = sh->raid_conf; | 815 | raid5_conf_t *conf = sh->raid_conf; |
816 | int firstwrite=0; | ||
806 | 817 | ||
807 | PRINTK("adding bh b#%llu to stripe s#%llu\n", | 818 | PRINTK("adding bh b#%llu to stripe s#%llu\n", |
808 | (unsigned long long)bi->bi_sector, | 819 | (unsigned long long)bi->bi_sector, |
@@ -811,9 +822,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
811 | 822 | ||
812 | spin_lock(&sh->lock); | 823 | spin_lock(&sh->lock); |
813 | spin_lock_irq(&conf->device_lock); | 824 | spin_lock_irq(&conf->device_lock); |
814 | if (forwrite) | 825 | if (forwrite) { |
815 | bip = &sh->dev[dd_idx].towrite; | 826 | bip = &sh->dev[dd_idx].towrite; |
816 | else | 827 | if (*bip == NULL && sh->dev[dd_idx].written == NULL) |
828 | firstwrite = 1; | ||
829 | } else | ||
817 | bip = &sh->dev[dd_idx].toread; | 830 | bip = &sh->dev[dd_idx].toread; |
818 | while (*bip && (*bip)->bi_sector < bi->bi_sector) { | 831 | while (*bip && (*bip)->bi_sector < bi->bi_sector) { |
819 | if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) | 832 | if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) |
@@ -836,6 +849,13 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
836 | (unsigned long long)bi->bi_sector, | 849 | (unsigned long long)bi->bi_sector, |
837 | (unsigned long long)sh->sector, dd_idx); | 850 | (unsigned long long)sh->sector, dd_idx); |
838 | 851 | ||
852 | if (conf->mddev->bitmap && firstwrite) { | ||
853 | sh->bm_seq = conf->seq_write; | ||
854 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | ||
855 | STRIPE_SECTORS, 0); | ||
856 | set_bit(STRIPE_BIT_DELAY, &sh->state); | ||
857 | } | ||
858 | |||
839 | if (forwrite) { | 859 | if (forwrite) { |
840 | /* check if page is covered */ | 860 | /* check if page is covered */ |
841 | sector_t sector = sh->dev[dd_idx].sector; | 861 | sector_t sector = sh->dev[dd_idx].sector; |
@@ -958,12 +978,13 @@ static void handle_stripe(struct stripe_head *sh) | |||
958 | * need to be failed | 978 | * need to be failed |
959 | */ | 979 | */ |
960 | if (failed > 1 && to_read+to_write+written) { | 980 | if (failed > 1 && to_read+to_write+written) { |
961 | spin_lock_irq(&conf->device_lock); | ||
962 | for (i=disks; i--; ) { | 981 | for (i=disks; i--; ) { |
982 | int bitmap_end = 0; | ||
983 | spin_lock_irq(&conf->device_lock); | ||
963 | /* fail all writes first */ | 984 | /* fail all writes first */ |
964 | bi = sh->dev[i].towrite; | 985 | bi = sh->dev[i].towrite; |
965 | sh->dev[i].towrite = NULL; | 986 | sh->dev[i].towrite = NULL; |
966 | if (bi) to_write--; | 987 | if (bi) { to_write--; bitmap_end = 1; } |
967 | 988 | ||
968 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 989 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
969 | wake_up(&conf->wait_for_overlap); | 990 | wake_up(&conf->wait_for_overlap); |
@@ -981,6 +1002,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
981 | /* and fail all 'written' */ | 1002 | /* and fail all 'written' */ |
982 | bi = sh->dev[i].written; | 1003 | bi = sh->dev[i].written; |
983 | sh->dev[i].written = NULL; | 1004 | sh->dev[i].written = NULL; |
1005 | if (bi) bitmap_end = 1; | ||
984 | while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { | 1006 | while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { |
985 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | 1007 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); |
986 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 1008 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
@@ -1009,8 +1031,11 @@ static void handle_stripe(struct stripe_head *sh) | |||
1009 | bi = nextbi; | 1031 | bi = nextbi; |
1010 | } | 1032 | } |
1011 | } | 1033 | } |
1034 | spin_unlock_irq(&conf->device_lock); | ||
1035 | if (bitmap_end) | ||
1036 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | ||
1037 | STRIPE_SECTORS, 0, 0); | ||
1012 | } | 1038 | } |
1013 | spin_unlock_irq(&conf->device_lock); | ||
1014 | } | 1039 | } |
1015 | if (failed > 1 && syncing) { | 1040 | if (failed > 1 && syncing) { |
1016 | md_done_sync(conf->mddev, STRIPE_SECTORS,0); | 1041 | md_done_sync(conf->mddev, STRIPE_SECTORS,0); |
@@ -1038,6 +1063,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
1038 | test_bit(R5_UPTODATE, &dev->flags) ) { | 1063 | test_bit(R5_UPTODATE, &dev->flags) ) { |
1039 | /* We can return any write requests */ | 1064 | /* We can return any write requests */ |
1040 | struct bio *wbi, *wbi2; | 1065 | struct bio *wbi, *wbi2; |
1066 | int bitmap_end = 0; | ||
1041 | PRINTK("Return write for disc %d\n", i); | 1067 | PRINTK("Return write for disc %d\n", i); |
1042 | spin_lock_irq(&conf->device_lock); | 1068 | spin_lock_irq(&conf->device_lock); |
1043 | wbi = dev->written; | 1069 | wbi = dev->written; |
@@ -1051,7 +1077,13 @@ static void handle_stripe(struct stripe_head *sh) | |||
1051 | } | 1077 | } |
1052 | wbi = wbi2; | 1078 | wbi = wbi2; |
1053 | } | 1079 | } |
1080 | if (dev->towrite == NULL) | ||
1081 | bitmap_end = 1; | ||
1054 | spin_unlock_irq(&conf->device_lock); | 1082 | spin_unlock_irq(&conf->device_lock); |
1083 | if (bitmap_end) | ||
1084 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | ||
1085 | STRIPE_SECTORS, | ||
1086 | !test_bit(STRIPE_DEGRADED, &sh->state), 0); | ||
1055 | } | 1087 | } |
1056 | } | 1088 | } |
1057 | } | 1089 | } |
@@ -1175,7 +1207,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
1175 | } | 1207 | } |
1176 | } | 1208 | } |
1177 | /* now if nothing is locked, and if we have enough data, we can start a write request */ | 1209 | /* now if nothing is locked, and if we have enough data, we can start a write request */ |
1178 | if (locked == 0 && (rcw == 0 ||rmw == 0)) { | 1210 | if (locked == 0 && (rcw == 0 ||rmw == 0) && |
1211 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) { | ||
1179 | PRINTK("Computing parity...\n"); | 1212 | PRINTK("Computing parity...\n"); |
1180 | compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); | 1213 | compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); |
1181 | /* now every locked buffer is ready to be written */ | 1214 | /* now every locked buffer is ready to be written */ |
@@ -1231,6 +1264,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
1231 | dev = &sh->dev[failed_num]; | 1264 | dev = &sh->dev[failed_num]; |
1232 | set_bit(R5_LOCKED, &dev->flags); | 1265 | set_bit(R5_LOCKED, &dev->flags); |
1233 | set_bit(R5_Wantwrite, &dev->flags); | 1266 | set_bit(R5_Wantwrite, &dev->flags); |
1267 | clear_bit(STRIPE_DEGRADED, &sh->state); | ||
1234 | locked++; | 1268 | locked++; |
1235 | set_bit(STRIPE_INSYNC, &sh->state); | 1269 | set_bit(STRIPE_INSYNC, &sh->state); |
1236 | set_bit(R5_Syncio, &dev->flags); | 1270 | set_bit(R5_Syncio, &dev->flags); |
@@ -1298,6 +1332,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
1298 | bi->bi_next = NULL; | 1332 | bi->bi_next = NULL; |
1299 | generic_make_request(bi); | 1333 | generic_make_request(bi); |
1300 | } else { | 1334 | } else { |
1335 | if (rw == 1) | ||
1336 | set_bit(STRIPE_DEGRADED, &sh->state); | ||
1301 | PRINTK("skip op %ld on disc %d for sector %llu\n", | 1337 | PRINTK("skip op %ld on disc %d for sector %llu\n", |
1302 | bi->bi_rw, i, (unsigned long long)sh->sector); | 1338 | bi->bi_rw, i, (unsigned long long)sh->sector); |
1303 | clear_bit(R5_LOCKED, &sh->dev[i].flags); | 1339 | clear_bit(R5_LOCKED, &sh->dev[i].flags); |
@@ -1322,6 +1358,20 @@ static inline void raid5_activate_delayed(raid5_conf_t *conf) | |||
1322 | } | 1358 | } |
1323 | } | 1359 | } |
1324 | 1360 | ||
1361 | static inline void activate_bit_delay(raid5_conf_t *conf) | ||
1362 | { | ||
1363 | /* device_lock is held */ | ||
1364 | struct list_head head; | ||
1365 | list_add(&head, &conf->bitmap_list); | ||
1366 | list_del_init(&conf->bitmap_list); | ||
1367 | while (!list_empty(&head)) { | ||
1368 | struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru); | ||
1369 | list_del_init(&sh->lru); | ||
1370 | atomic_inc(&sh->count); | ||
1371 | __release_stripe(conf, sh); | ||
1372 | } | ||
1373 | } | ||
1374 | |||
1325 | static void unplug_slaves(mddev_t *mddev) | 1375 | static void unplug_slaves(mddev_t *mddev) |
1326 | { | 1376 | { |
1327 | raid5_conf_t *conf = mddev_to_conf(mddev); | 1377 | raid5_conf_t *conf = mddev_to_conf(mddev); |
@@ -1354,8 +1404,10 @@ static void raid5_unplug_device(request_queue_t *q) | |||
1354 | 1404 | ||
1355 | spin_lock_irqsave(&conf->device_lock, flags); | 1405 | spin_lock_irqsave(&conf->device_lock, flags); |
1356 | 1406 | ||
1357 | if (blk_remove_plug(q)) | 1407 | if (blk_remove_plug(q)) { |
1408 | conf->seq_flush++; | ||
1358 | raid5_activate_delayed(conf); | 1409 | raid5_activate_delayed(conf); |
1410 | } | ||
1359 | md_wakeup_thread(mddev->thread); | 1411 | md_wakeup_thread(mddev->thread); |
1360 | 1412 | ||
1361 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1413 | spin_unlock_irqrestore(&conf->device_lock, flags); |
@@ -1493,10 +1545,20 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1493 | sector_t first_sector; | 1545 | sector_t first_sector; |
1494 | int raid_disks = conf->raid_disks; | 1546 | int raid_disks = conf->raid_disks; |
1495 | int data_disks = raid_disks-1; | 1547 | int data_disks = raid_disks-1; |
1548 | sector_t max_sector = mddev->size << 1; | ||
1549 | int sync_blocks; | ||
1496 | 1550 | ||
1497 | if (sector_nr >= mddev->size <<1) { | 1551 | if (sector_nr >= max_sector) { |
1498 | /* just being told to finish up .. nothing much to do */ | 1552 | /* just being told to finish up .. nothing much to do */ |
1499 | unplug_slaves(mddev); | 1553 | unplug_slaves(mddev); |
1554 | |||
1555 | if (mddev->curr_resync < max_sector) /* aborted */ | ||
1556 | bitmap_end_sync(mddev->bitmap, mddev->curr_resync, | ||
1557 | &sync_blocks, 1); | ||
1558 | else /* compelted sync */ | ||
1559 | conf->fullsync = 0; | ||
1560 | bitmap_close_sync(mddev->bitmap); | ||
1561 | |||
1500 | return 0; | 1562 | return 0; |
1501 | } | 1563 | } |
1502 | /* if there is 1 or more failed drives and we are trying | 1564 | /* if there is 1 or more failed drives and we are trying |
@@ -1508,6 +1570,13 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1508 | *skipped = 1; | 1570 | *skipped = 1; |
1509 | return rv; | 1571 | return rv; |
1510 | } | 1572 | } |
1573 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && | ||
1574 | !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { | ||
1575 | /* we can skip this block, and probably more */ | ||
1576 | sync_blocks /= STRIPE_SECTORS; | ||
1577 | *skipped = 1; | ||
1578 | return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ | ||
1579 | } | ||
1511 | 1580 | ||
1512 | x = sector_nr; | 1581 | x = sector_nr; |
1513 | chunk_offset = sector_div(x, sectors_per_chunk); | 1582 | chunk_offset = sector_div(x, sectors_per_chunk); |
@@ -1525,6 +1594,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1525 | set_current_state(TASK_UNINTERRUPTIBLE); | 1594 | set_current_state(TASK_UNINTERRUPTIBLE); |
1526 | schedule_timeout(1); | 1595 | schedule_timeout(1); |
1527 | } | 1596 | } |
1597 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 0); | ||
1528 | spin_lock(&sh->lock); | 1598 | spin_lock(&sh->lock); |
1529 | set_bit(STRIPE_SYNCING, &sh->state); | 1599 | set_bit(STRIPE_SYNCING, &sh->state); |
1530 | clear_bit(STRIPE_INSYNC, &sh->state); | 1600 | clear_bit(STRIPE_INSYNC, &sh->state); |
@@ -1558,6 +1628,13 @@ static void raid5d (mddev_t *mddev) | |||
1558 | while (1) { | 1628 | while (1) { |
1559 | struct list_head *first; | 1629 | struct list_head *first; |
1560 | 1630 | ||
1631 | if (conf->seq_flush - conf->seq_write > 0) { | ||
1632 | int seq = conf->seq_flush; | ||
1633 | bitmap_unplug(mddev->bitmap); | ||
1634 | conf->seq_write = seq; | ||
1635 | activate_bit_delay(conf); | ||
1636 | } | ||
1637 | |||
1561 | if (list_empty(&conf->handle_list) && | 1638 | if (list_empty(&conf->handle_list) && |
1562 | atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD && | 1639 | atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD && |
1563 | !blk_queue_plugged(mddev->queue) && | 1640 | !blk_queue_plugged(mddev->queue) && |
@@ -1591,7 +1668,7 @@ static void raid5d (mddev_t *mddev) | |||
1591 | PRINTK("--- raid5d inactive\n"); | 1668 | PRINTK("--- raid5d inactive\n"); |
1592 | } | 1669 | } |
1593 | 1670 | ||
1594 | static int run (mddev_t *mddev) | 1671 | static int run(mddev_t *mddev) |
1595 | { | 1672 | { |
1596 | raid5_conf_t *conf; | 1673 | raid5_conf_t *conf; |
1597 | int raid_disk, memory; | 1674 | int raid_disk, memory; |
@@ -1621,6 +1698,7 @@ static int run (mddev_t *mddev) | |||
1621 | init_waitqueue_head(&conf->wait_for_overlap); | 1698 | init_waitqueue_head(&conf->wait_for_overlap); |
1622 | INIT_LIST_HEAD(&conf->handle_list); | 1699 | INIT_LIST_HEAD(&conf->handle_list); |
1623 | INIT_LIST_HEAD(&conf->delayed_list); | 1700 | INIT_LIST_HEAD(&conf->delayed_list); |
1701 | INIT_LIST_HEAD(&conf->bitmap_list); | ||
1624 | INIT_LIST_HEAD(&conf->inactive_list); | 1702 | INIT_LIST_HEAD(&conf->inactive_list); |
1625 | atomic_set(&conf->active_stripes, 0); | 1703 | atomic_set(&conf->active_stripes, 0); |
1626 | atomic_set(&conf->preread_active_stripes, 0); | 1704 | atomic_set(&conf->preread_active_stripes, 0); |
@@ -1732,6 +1810,9 @@ memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | |||
1732 | 1810 | ||
1733 | /* Ok, everything is just fine now */ | 1811 | /* Ok, everything is just fine now */ |
1734 | 1812 | ||
1813 | if (mddev->bitmap) | ||
1814 | mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | ||
1815 | |||
1735 | mddev->queue->unplug_fn = raid5_unplug_device; | 1816 | mddev->queue->unplug_fn = raid5_unplug_device; |
1736 | mddev->queue->issue_flush_fn = raid5_issue_flush; | 1817 | mddev->queue->issue_flush_fn = raid5_issue_flush; |
1737 | 1818 | ||
@@ -1912,6 +1993,8 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1912 | rdev->in_sync = 0; | 1993 | rdev->in_sync = 0; |
1913 | rdev->raid_disk = disk; | 1994 | rdev->raid_disk = disk; |
1914 | found = 1; | 1995 | found = 1; |
1996 | if (rdev->saved_raid_disk != disk) | ||
1997 | conf->fullsync = 1; | ||
1915 | p->rdev = rdev; | 1998 | p->rdev = rdev; |
1916 | break; | 1999 | break; |
1917 | } | 2000 | } |
@@ -1941,6 +2024,35 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
1941 | return 0; | 2024 | return 0; |
1942 | } | 2025 | } |
1943 | 2026 | ||
2027 | static void raid5_quiesce(mddev_t *mddev, int state) | ||
2028 | { | ||
2029 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
2030 | |||
2031 | switch(state) { | ||
2032 | case 1: /* stop all writes */ | ||
2033 | spin_lock_irq(&conf->device_lock); | ||
2034 | conf->quiesce = 1; | ||
2035 | wait_event_lock_irq(conf->wait_for_stripe, | ||
2036 | atomic_read(&conf->active_stripes) == 0, | ||
2037 | conf->device_lock, /* nothing */); | ||
2038 | spin_unlock_irq(&conf->device_lock); | ||
2039 | break; | ||
2040 | |||
2041 | case 0: /* re-enable writes */ | ||
2042 | spin_lock_irq(&conf->device_lock); | ||
2043 | conf->quiesce = 0; | ||
2044 | wake_up(&conf->wait_for_stripe); | ||
2045 | spin_unlock_irq(&conf->device_lock); | ||
2046 | break; | ||
2047 | } | ||
2048 | if (mddev->thread) { | ||
2049 | if (mddev->bitmap) | ||
2050 | mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | ||
2051 | else | ||
2052 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
2053 | md_wakeup_thread(mddev->thread); | ||
2054 | } | ||
2055 | } | ||
1944 | static mdk_personality_t raid5_personality= | 2056 | static mdk_personality_t raid5_personality= |
1945 | { | 2057 | { |
1946 | .name = "raid5", | 2058 | .name = "raid5", |
@@ -1955,6 +2067,7 @@ static mdk_personality_t raid5_personality= | |||
1955 | .spare_active = raid5_spare_active, | 2067 | .spare_active = raid5_spare_active, |
1956 | .sync_request = sync_request, | 2068 | .sync_request = sync_request, |
1957 | .resize = raid5_resize, | 2069 | .resize = raid5_resize, |
2070 | .quiesce = raid5_quiesce, | ||
1958 | }; | 2071 | }; |
1959 | 2072 | ||
1960 | static int __init raid5_init (void) | 2073 | static int __init raid5_init (void) |
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index d63ddcb4afad..176fc653c284 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h | |||
@@ -134,6 +134,7 @@ struct stripe_head { | |||
134 | unsigned long state; /* state flags */ | 134 | unsigned long state; /* state flags */ |
135 | atomic_t count; /* nr of active thread/requests */ | 135 | atomic_t count; /* nr of active thread/requests */ |
136 | spinlock_t lock; | 136 | spinlock_t lock; |
137 | int bm_seq; /* sequence number for bitmap flushes */ | ||
137 | struct r5dev { | 138 | struct r5dev { |
138 | struct bio req; | 139 | struct bio req; |
139 | struct bio_vec vec; | 140 | struct bio_vec vec; |
@@ -165,12 +166,13 @@ struct stripe_head { | |||
165 | /* | 166 | /* |
166 | * Stripe state | 167 | * Stripe state |
167 | */ | 168 | */ |
168 | #define STRIPE_ERROR 1 | ||
169 | #define STRIPE_HANDLE 2 | 169 | #define STRIPE_HANDLE 2 |
170 | #define STRIPE_SYNCING 3 | 170 | #define STRIPE_SYNCING 3 |
171 | #define STRIPE_INSYNC 4 | 171 | #define STRIPE_INSYNC 4 |
172 | #define STRIPE_PREREAD_ACTIVE 5 | 172 | #define STRIPE_PREREAD_ACTIVE 5 |
173 | #define STRIPE_DELAYED 6 | 173 | #define STRIPE_DELAYED 6 |
174 | #define STRIPE_DEGRADED 7 | ||
175 | #define STRIPE_BIT_DELAY 8 | ||
174 | 176 | ||
175 | /* | 177 | /* |
176 | * Plugging: | 178 | * Plugging: |
@@ -210,10 +212,20 @@ struct raid5_private_data { | |||
210 | 212 | ||
211 | struct list_head handle_list; /* stripes needing handling */ | 213 | struct list_head handle_list; /* stripes needing handling */ |
212 | struct list_head delayed_list; /* stripes that have plugged requests */ | 214 | struct list_head delayed_list; /* stripes that have plugged requests */ |
215 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ | ||
213 | atomic_t preread_active_stripes; /* stripes with scheduled io */ | 216 | atomic_t preread_active_stripes; /* stripes with scheduled io */ |
214 | 217 | ||
215 | char cache_name[20]; | 218 | char cache_name[20]; |
216 | kmem_cache_t *slab_cache; /* for allocating stripes */ | 219 | kmem_cache_t *slab_cache; /* for allocating stripes */ |
220 | |||
221 | int seq_flush, seq_write; | ||
222 | int quiesce; | ||
223 | |||
224 | int fullsync; /* set to 1 if a full sync is needed, | ||
225 | * (fresh device added). | ||
226 | * Cleared when a sync completes. | ||
227 | */ | ||
228 | |||
217 | /* | 229 | /* |
218 | * Free stripes pool | 230 | * Free stripes pool |
219 | */ | 231 | */ |