aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-03-27 04:18:08 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:45:01 -0500
commit7ecaa1e6a1ad69862e9980b6c777e11f26c4782d (patch)
tree3cbd64ebc2a45f6b5ac45b0305fd3cf2c6916070
parentad01c9e3752f4ba4f3d99c89b7370fa4983a25b5 (diff)
[PATCH] md: Infrastructure to allow normal IO to continue while array is expanding
We need to allow that different stripes are of different effective sizes, and use the appropriate size. Also, when a stripe is being expanded, we must block any IO attempts until the stripe is stable again. Key elements in this change are: - each stripe_head gets a 'disk' field which is part of the key, thus there can sometimes be two stripe heads of the same area of the array, but covering different numbers of devices. One of these will be marked STRIPE_EXPANDING and so won't accept new requests. - conf->expand_progress tracks how the expansion is progressing and is used to determine whether the target part of the array has been expanded yet or not. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid5.c88
-rw-r--r--include/linux/raid/raid5.h6
2 files changed, 64 insertions, 30 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6c20b44509d8..7a6df515b008 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -178,10 +178,10 @@ static int grow_buffers(struct stripe_head *sh, int num)
178 178
179static void raid5_build_block (struct stripe_head *sh, int i); 179static void raid5_build_block (struct stripe_head *sh, int i);
180 180
181static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx) 181static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
182{ 182{
183 raid5_conf_t *conf = sh->raid_conf; 183 raid5_conf_t *conf = sh->raid_conf;
184 int disks = conf->raid_disks, i; 184 int i;
185 185
186 if (atomic_read(&sh->count) != 0) 186 if (atomic_read(&sh->count) != 0)
187 BUG(); 187 BUG();
@@ -198,7 +198,9 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
198 sh->pd_idx = pd_idx; 198 sh->pd_idx = pd_idx;
199 sh->state = 0; 199 sh->state = 0;
200 200
201 for (i=disks; i--; ) { 201 sh->disks = disks;
202
203 for (i = sh->disks; i--; ) {
202 struct r5dev *dev = &sh->dev[i]; 204 struct r5dev *dev = &sh->dev[i];
203 205
204 if (dev->toread || dev->towrite || dev->written || 206 if (dev->toread || dev->towrite || dev->written ||
@@ -215,7 +217,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
215 insert_hash(conf, sh); 217 insert_hash(conf, sh);
216} 218}
217 219
218static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector) 220static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks)
219{ 221{
220 struct stripe_head *sh; 222 struct stripe_head *sh;
221 struct hlist_node *hn; 223 struct hlist_node *hn;
@@ -223,7 +225,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
223 CHECK_DEVLOCK(); 225 CHECK_DEVLOCK();
224 PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); 226 PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
225 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) 227 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
226 if (sh->sector == sector) 228 if (sh->sector == sector && sh->disks == disks)
227 return sh; 229 return sh;
228 PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); 230 PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
229 return NULL; 231 return NULL;
@@ -232,8 +234,8 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
232static void unplug_slaves(mddev_t *mddev); 234static void unplug_slaves(mddev_t *mddev);
233static void raid5_unplug_device(request_queue_t *q); 235static void raid5_unplug_device(request_queue_t *q);
234 236
235static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, 237static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks,
236 int pd_idx, int noblock) 238 int pd_idx, int noblock)
237{ 239{
238 struct stripe_head *sh; 240 struct stripe_head *sh;
239 241
@@ -245,7 +247,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
245 wait_event_lock_irq(conf->wait_for_stripe, 247 wait_event_lock_irq(conf->wait_for_stripe,
246 conf->quiesce == 0, 248 conf->quiesce == 0,
247 conf->device_lock, /* nothing */); 249 conf->device_lock, /* nothing */);
248 sh = __find_stripe(conf, sector); 250 sh = __find_stripe(conf, sector, disks);
249 if (!sh) { 251 if (!sh) {
250 if (!conf->inactive_blocked) 252 if (!conf->inactive_blocked)
251 sh = get_free_stripe(conf); 253 sh = get_free_stripe(conf);
@@ -263,7 +265,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
263 ); 265 );
264 conf->inactive_blocked = 0; 266 conf->inactive_blocked = 0;
265 } else 267 } else
266 init_stripe(sh, sector, pd_idx); 268 init_stripe(sh, sector, pd_idx, disks);
267 } else { 269 } else {
268 if (atomic_read(&sh->count)) { 270 if (atomic_read(&sh->count)) {
269 if (!list_empty(&sh->lru)) 271 if (!list_empty(&sh->lru))
@@ -300,6 +302,7 @@ static int grow_one_stripe(raid5_conf_t *conf)
300 kmem_cache_free(conf->slab_cache, sh); 302 kmem_cache_free(conf->slab_cache, sh);
301 return 0; 303 return 0;
302 } 304 }
305 sh->disks = conf->raid_disks;
303 /* we just created an active stripe so... */ 306 /* we just created an active stripe so... */
304 atomic_set(&sh->count, 1); 307 atomic_set(&sh->count, 1);
305 atomic_inc(&conf->active_stripes); 308 atomic_inc(&conf->active_stripes);
@@ -483,7 +486,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
483{ 486{
484 struct stripe_head *sh = bi->bi_private; 487 struct stripe_head *sh = bi->bi_private;
485 raid5_conf_t *conf = sh->raid_conf; 488 raid5_conf_t *conf = sh->raid_conf;
486 int disks = conf->raid_disks, i; 489 int disks = sh->disks, i;
487 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 490 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
488 491
489 if (bi->bi_size) 492 if (bi->bi_size)
@@ -581,7 +584,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
581{ 584{
582 struct stripe_head *sh = bi->bi_private; 585 struct stripe_head *sh = bi->bi_private;
583 raid5_conf_t *conf = sh->raid_conf; 586 raid5_conf_t *conf = sh->raid_conf;
584 int disks = conf->raid_disks, i; 587 int disks = sh->disks, i;
585 unsigned long flags; 588 unsigned long flags;
586 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 589 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
587 590
@@ -735,7 +738,7 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
735static sector_t compute_blocknr(struct stripe_head *sh, int i) 738static sector_t compute_blocknr(struct stripe_head *sh, int i)
736{ 739{
737 raid5_conf_t *conf = sh->raid_conf; 740 raid5_conf_t *conf = sh->raid_conf;
738 int raid_disks = conf->raid_disks, data_disks = raid_disks - 1; 741 int raid_disks = sh->disks, data_disks = raid_disks - 1;
739 sector_t new_sector = sh->sector, check; 742 sector_t new_sector = sh->sector, check;
740 int sectors_per_chunk = conf->chunk_size >> 9; 743 int sectors_per_chunk = conf->chunk_size >> 9;
741 sector_t stripe; 744 sector_t stripe;
@@ -836,8 +839,7 @@ static void copy_data(int frombio, struct bio *bio,
836 839
837static void compute_block(struct stripe_head *sh, int dd_idx) 840static void compute_block(struct stripe_head *sh, int dd_idx)
838{ 841{
839 raid5_conf_t *conf = sh->raid_conf; 842 int i, count, disks = sh->disks;
840 int i, count, disks = conf->raid_disks;
841 void *ptr[MAX_XOR_BLOCKS], *p; 843 void *ptr[MAX_XOR_BLOCKS], *p;
842 844
843 PRINTK("compute_block, stripe %llu, idx %d\n", 845 PRINTK("compute_block, stripe %llu, idx %d\n",
@@ -867,7 +869,7 @@ static void compute_block(struct stripe_head *sh, int dd_idx)
867static void compute_parity(struct stripe_head *sh, int method) 869static void compute_parity(struct stripe_head *sh, int method)
868{ 870{
869 raid5_conf_t *conf = sh->raid_conf; 871 raid5_conf_t *conf = sh->raid_conf;
870 int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; 872 int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
871 void *ptr[MAX_XOR_BLOCKS]; 873 void *ptr[MAX_XOR_BLOCKS];
872 struct bio *chosen; 874 struct bio *chosen;
873 875
@@ -1055,7 +1057,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1055static void handle_stripe(struct stripe_head *sh) 1057static void handle_stripe(struct stripe_head *sh)
1056{ 1058{
1057 raid5_conf_t *conf = sh->raid_conf; 1059 raid5_conf_t *conf = sh->raid_conf;
1058 int disks = conf->raid_disks; 1060 int disks = sh->disks;
1059 struct bio *return_bi= NULL; 1061 struct bio *return_bi= NULL;
1060 struct bio *bi; 1062 struct bio *bi;
1061 int i; 1063 int i;
@@ -1649,12 +1651,10 @@ static inline void raid5_plug_device(raid5_conf_t *conf)
1649 spin_unlock_irq(&conf->device_lock); 1651 spin_unlock_irq(&conf->device_lock);
1650} 1652}
1651 1653
1652static int make_request (request_queue_t *q, struct bio * bi) 1654static int make_request(request_queue_t *q, struct bio * bi)
1653{ 1655{
1654 mddev_t *mddev = q->queuedata; 1656 mddev_t *mddev = q->queuedata;
1655 raid5_conf_t *conf = mddev_to_conf(mddev); 1657 raid5_conf_t *conf = mddev_to_conf(mddev);
1656 const unsigned int raid_disks = conf->raid_disks;
1657 const unsigned int data_disks = raid_disks - 1;
1658 unsigned int dd_idx, pd_idx; 1658 unsigned int dd_idx, pd_idx;
1659 sector_t new_sector; 1659 sector_t new_sector;
1660 sector_t logical_sector, last_sector; 1660 sector_t logical_sector, last_sector;
@@ -1678,20 +1678,48 @@ static int make_request (request_queue_t *q, struct bio * bi)
1678 1678
1679 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 1679 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
1680 DEFINE_WAIT(w); 1680 DEFINE_WAIT(w);
1681 int disks;
1681 1682
1682 new_sector = raid5_compute_sector(logical_sector, 1683 retry:
1683 raid_disks, data_disks, &dd_idx, &pd_idx, conf); 1684 if (likely(conf->expand_progress == MaxSector))
1684 1685 disks = conf->raid_disks;
1686 else {
1687 spin_lock_irq(&conf->device_lock);
1688 disks = conf->raid_disks;
1689 if (logical_sector >= conf->expand_progress)
1690 disks = conf->previous_raid_disks;
1691 spin_unlock_irq(&conf->device_lock);
1692 }
1693 new_sector = raid5_compute_sector(logical_sector, disks, disks - 1,
1694 &dd_idx, &pd_idx, conf);
1685 PRINTK("raid5: make_request, sector %llu logical %llu\n", 1695 PRINTK("raid5: make_request, sector %llu logical %llu\n",
1686 (unsigned long long)new_sector, 1696 (unsigned long long)new_sector,
1687 (unsigned long long)logical_sector); 1697 (unsigned long long)logical_sector);
1688 1698
1689 retry:
1690 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); 1699 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
1691 sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK)); 1700 sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
1692 if (sh) { 1701 if (sh) {
1693 if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { 1702 if (unlikely(conf->expand_progress != MaxSector)) {
1694 /* Add failed due to overlap. Flush everything 1703 /* expansion might have moved on while waiting for a
1704 * stripe, so we much do the range check again.
1705 */
1706 int must_retry = 0;
1707 spin_lock_irq(&conf->device_lock);
1708 if (logical_sector < conf->expand_progress &&
1709 disks == conf->previous_raid_disks)
1710 /* mismatch, need to try again */
1711 must_retry = 1;
1712 spin_unlock_irq(&conf->device_lock);
1713 if (must_retry) {
1714 release_stripe(sh);
1715 goto retry;
1716 }
1717 }
1718
1719 if (test_bit(STRIPE_EXPANDING, &sh->state) ||
1720 !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
1721 /* Stripe is busy expanding or
1722 * add failed due to overlap. Flush everything
1695 * and wait a while 1723 * and wait a while
1696 */ 1724 */
1697 raid5_unplug_device(mddev->queue); 1725 raid5_unplug_device(mddev->queue);
@@ -1703,7 +1731,6 @@ static int make_request (request_queue_t *q, struct bio * bi)
1703 raid5_plug_device(conf); 1731 raid5_plug_device(conf);
1704 handle_stripe(sh); 1732 handle_stripe(sh);
1705 release_stripe(sh); 1733 release_stripe(sh);
1706
1707 } else { 1734 } else {
1708 /* cannot get stripe for read-ahead, just give-up */ 1735 /* cannot get stripe for read-ahead, just give-up */
1709 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1736 clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1779,9 +1806,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1779 1806
1780 first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk 1807 first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
1781 + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); 1808 + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
1782 sh = get_active_stripe(conf, sector_nr, pd_idx, 1); 1809 sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
1783 if (sh == NULL) { 1810 if (sh == NULL) {
1784 sh = get_active_stripe(conf, sector_nr, pd_idx, 0); 1811 sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
1785 /* make sure we don't swamp the stripe cache if someone else 1812 /* make sure we don't swamp the stripe cache if someone else
1786 * is trying to get access 1813 * is trying to get access
1787 */ 1814 */
@@ -1998,6 +2025,7 @@ static int run(mddev_t *mddev)
1998 conf->level = mddev->level; 2025 conf->level = mddev->level;
1999 conf->algorithm = mddev->layout; 2026 conf->algorithm = mddev->layout;
2000 conf->max_nr_stripes = NR_STRIPES; 2027 conf->max_nr_stripes = NR_STRIPES;
2028 conf->expand_progress = MaxSector;
2001 2029
2002 /* device size must be a multiple of chunk size */ 2030 /* device size must be a multiple of chunk size */
2003 mddev->size &= ~(mddev->chunk_size/1024 -1); 2031 mddev->size &= ~(mddev->chunk_size/1024 -1);
@@ -2128,7 +2156,7 @@ static void print_sh (struct stripe_head *sh)
2128 printk("sh %llu, count %d.\n", 2156 printk("sh %llu, count %d.\n",
2129 (unsigned long long)sh->sector, atomic_read(&sh->count)); 2157 (unsigned long long)sh->sector, atomic_read(&sh->count));
2130 printk("sh %llu, ", (unsigned long long)sh->sector); 2158 printk("sh %llu, ", (unsigned long long)sh->sector);
2131 for (i = 0; i < sh->raid_conf->raid_disks; i++) { 2159 for (i = 0; i < sh->disks; i++) {
2132 printk("(cache%d: %p %ld) ", 2160 printk("(cache%d: %p %ld) ",
2133 i, sh->dev[i].page, sh->dev[i].flags); 2161 i, sh->dev[i].page, sh->dev[i].flags);
2134 } 2162 }
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index b7b2653af7bb..6fa274aea2a0 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -135,6 +135,7 @@ struct stripe_head {
135 atomic_t count; /* nr of active thread/requests */ 135 atomic_t count; /* nr of active thread/requests */
136 spinlock_t lock; 136 spinlock_t lock;
137 int bm_seq; /* sequence number for bitmap flushes */ 137 int bm_seq; /* sequence number for bitmap flushes */
138 int disks; /* disks in stripe */
138 struct r5dev { 139 struct r5dev {
139 struct bio req; 140 struct bio req;
140 struct bio_vec vec; 141 struct bio_vec vec;
@@ -174,6 +175,7 @@ struct stripe_head {
174#define STRIPE_DELAYED 6 175#define STRIPE_DELAYED 6
175#define STRIPE_DEGRADED 7 176#define STRIPE_DEGRADED 7
176#define STRIPE_BIT_DELAY 8 177#define STRIPE_BIT_DELAY 8
178#define STRIPE_EXPANDING 9
177 179
178/* 180/*
179 * Plugging: 181 * Plugging:
@@ -211,6 +213,10 @@ struct raid5_private_data {
211 int raid_disks, working_disks, failed_disks; 213 int raid_disks, working_disks, failed_disks;
212 int max_nr_stripes; 214 int max_nr_stripes;
213 215
216 /* used during an expand */
217 sector_t expand_progress; /* MaxSector when no expand happening */
218 int previous_raid_disks;
219
214 struct list_head handle_list; /* stripes needing handling */ 220 struct list_head handle_list; /* stripes needing handling */
215 struct list_head delayed_list; /* stripes that have plugged requests */ 221 struct list_head delayed_list; /* stripes that have plugged requests */
216 struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ 222 struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */