aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-06-14 14:21:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-14 14:21:21 -0400
commit81eb3dd8438802138ac9ce12428632f35562c060 (patch)
tree454415294a5737f44fb04f06d6346be1fae1324f
parent3e483f46756d9318fb0c59b01d5c8a26c2191d15 (diff)
parentfcde90759a985d8bfa4391346a821cc12fc16207 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md/raid5: remove unusual use of bio_iovec_idx() md/raid5: fix FUA request handling in ops_run_io() md/raid5: fix raid5_set_bi_hw_segments md:Documentation/md.txt - fix typo md/bitmap: remove unused fields from struct bitmap md/bitmap: use proper accessor macro md: check ->hot_remove_disk when removing disk md: Using poll /proc/mdstat can monitor the events of adding a spare disks MD: use is_power_of_2 macro MD: raid5 do not set fullsync MD: support initial bitmap creation in-kernel MD: add sync_super to mddev_t struct MD: raid1 changes to allow use by device mapper MD: move thread wakeups into resume MD: possible typo MD: no sync IO while suspended MD: no integrity register if no gendisk
-rw-r--r--Documentation/md.txt2
-rw-r--r--drivers/md/bitmap.c104
-rw-r--r--drivers/md/bitmap.h10
-rw-r--r--drivers/md/md.c41
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid1.c24
-rw-r--r--drivers/md/raid1.h2
-rw-r--r--drivers/md/raid5.c16
8 files changed, 152 insertions, 49 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 2366b1c8cf1..f0eee83ff78 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -555,7 +555,7 @@ also have
555 sync_min 555 sync_min
556 sync_max 556 sync_max
557 The two values, given as numbers of sectors, indicate a range 557 The two values, given as numbers of sectors, indicate a range
558 withing the array where 'check'/'repair' will operate. Must be 558 within the array where 'check'/'repair' will operate. Must be
559 a multiple of chunk_size. When it reaches "sync_max" it will 559 a multiple of chunk_size. When it reaches "sync_max" it will
560 pause, rather than complete. 560 pause, rather than complete.
561 You can use 'select' or 'poll' on "sync_completed" to wait for 561 You can use 'select' or 'poll' on "sync_completed" to wait for
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 70bd738b8b9..574b09afedd 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -534,6 +534,82 @@ void bitmap_print_sb(struct bitmap *bitmap)
534 kunmap_atomic(sb, KM_USER0); 534 kunmap_atomic(sb, KM_USER0);
535} 535}
536 536
537/*
538 * bitmap_new_disk_sb
539 * @bitmap
540 *
541 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb
542 * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
543 * This function verifies 'bitmap_info' and populates the on-disk bitmap
544 * structure, which is to be written to disk.
545 *
546 * Returns: 0 on success, -Exxx on error
547 */
548static int bitmap_new_disk_sb(struct bitmap *bitmap)
549{
550 bitmap_super_t *sb;
551 unsigned long chunksize, daemon_sleep, write_behind;
552 int err = -EINVAL;
553
554 bitmap->sb_page = alloc_page(GFP_KERNEL);
555 if (IS_ERR(bitmap->sb_page)) {
556 err = PTR_ERR(bitmap->sb_page);
557 bitmap->sb_page = NULL;
558 return err;
559 }
560 bitmap->sb_page->index = 0;
561
562 sb = kmap_atomic(bitmap->sb_page, KM_USER0);
563
564 sb->magic = cpu_to_le32(BITMAP_MAGIC);
565 sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
566
567 chunksize = bitmap->mddev->bitmap_info.chunksize;
568 BUG_ON(!chunksize);
569 if (!is_power_of_2(chunksize)) {
570 kunmap_atomic(sb, KM_USER0);
571 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
572 return -EINVAL;
573 }
574 sb->chunksize = cpu_to_le32(chunksize);
575
576 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
577 if (!daemon_sleep ||
578 (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
579 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
580 daemon_sleep = 5 * HZ;
581 }
582 sb->daemon_sleep = cpu_to_le32(daemon_sleep);
583 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
584
585 /*
586 * FIXME: write_behind for RAID1. If not specified, what
587 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily.
588 */
589 write_behind = bitmap->mddev->bitmap_info.max_write_behind;
590 if (write_behind > COUNTER_MAX)
591 write_behind = COUNTER_MAX / 2;
592 sb->write_behind = cpu_to_le32(write_behind);
593 bitmap->mddev->bitmap_info.max_write_behind = write_behind;
594
595 /* keep the array size field of the bitmap superblock up to date */
596 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
597
598 memcpy(sb->uuid, bitmap->mddev->uuid, 16);
599
600 bitmap->flags |= BITMAP_STALE;
601 sb->state |= cpu_to_le32(BITMAP_STALE);
602 bitmap->events_cleared = bitmap->mddev->events;
603 sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
604
605 bitmap->flags |= BITMAP_HOSTENDIAN;
606 sb->version = cpu_to_le32(BITMAP_MAJOR_HOSTENDIAN);
607
608 kunmap_atomic(sb, KM_USER0);
609
610 return 0;
611}
612
537/* read the superblock from the bitmap file and initialize some bitmap fields */ 613/* read the superblock from the bitmap file and initialize some bitmap fields */
538static int bitmap_read_sb(struct bitmap *bitmap) 614static int bitmap_read_sb(struct bitmap *bitmap)
539{ 615{
@@ -575,7 +651,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
575 reason = "unrecognized superblock version"; 651 reason = "unrecognized superblock version";
576 else if (chunksize < 512) 652 else if (chunksize < 512)
577 reason = "bitmap chunksize too small"; 653 reason = "bitmap chunksize too small";
578 else if ((1 << ffz(~chunksize)) != chunksize) 654 else if (!is_power_of_2(chunksize))
579 reason = "bitmap chunksize not a power of 2"; 655 reason = "bitmap chunksize not a power of 2";
580 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 656 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
581 reason = "daemon sleep period out of range"; 657 reason = "daemon sleep period out of range";
@@ -1076,8 +1152,8 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1076 } 1152 }
1077 1153
1078 printk(KERN_INFO "%s: bitmap initialized from disk: " 1154 printk(KERN_INFO "%s: bitmap initialized from disk: "
1079 "read %lu/%lu pages, set %lu bits\n", 1155 "read %lu/%lu pages, set %lu of %lu bits\n",
1080 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt); 1156 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks);
1081 1157
1082 return 0; 1158 return 0;
1083 1159
@@ -1332,7 +1408,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
1332 return 0; 1408 return 0;
1333 } 1409 }
1334 1410
1335 if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) { 1411 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1336 DEFINE_WAIT(__wait); 1412 DEFINE_WAIT(__wait);
1337 /* note that it is safe to do the prepare_to_wait 1413 /* note that it is safe to do the prepare_to_wait
1338 * after the test as long as we do it before dropping 1414 * after the test as long as we do it before dropping
@@ -1404,10 +1480,10 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1404 sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); 1480 sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1405 } 1481 }
1406 1482
1407 if (!success && ! (*bmc & NEEDED_MASK)) 1483 if (!success && !NEEDED(*bmc))
1408 *bmc |= NEEDED_MASK; 1484 *bmc |= NEEDED_MASK;
1409 1485
1410 if ((*bmc & COUNTER_MAX) == COUNTER_MAX) 1486 if (COUNTER(*bmc) == COUNTER_MAX)
1411 wake_up(&bitmap->overflow_wait); 1487 wake_up(&bitmap->overflow_wait);
1412 1488
1413 (*bmc)--; 1489 (*bmc)--;
@@ -1728,9 +1804,16 @@ int bitmap_create(mddev_t *mddev)
1728 vfs_fsync(file, 1); 1804 vfs_fsync(file, 1);
1729 } 1805 }
1730 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 1806 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1731 if (!mddev->bitmap_info.external) 1807 if (!mddev->bitmap_info.external) {
1732 err = bitmap_read_sb(bitmap); 1808 /*
1733 else { 1809 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1810 * instructing us to create a new on-disk bitmap instance.
1811 */
1812 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1813 err = bitmap_new_disk_sb(bitmap);
1814 else
1815 err = bitmap_read_sb(bitmap);
1816 } else {
1734 err = 0; 1817 err = 0;
1735 if (mddev->bitmap_info.chunksize == 0 || 1818 if (mddev->bitmap_info.chunksize == 0 ||
1736 mddev->bitmap_info.daemon_sleep == 0) 1819 mddev->bitmap_info.daemon_sleep == 0)
@@ -1754,9 +1837,6 @@ int bitmap_create(mddev_t *mddev)
1754 bitmap->chunks = chunks; 1837 bitmap->chunks = chunks;
1755 bitmap->pages = pages; 1838 bitmap->pages = pages;
1756 bitmap->missing_pages = pages; 1839 bitmap->missing_pages = pages;
1757 bitmap->counter_bits = COUNTER_BITS;
1758
1759 bitmap->syncchunk = ~0UL;
1760 1840
1761#ifdef INJECT_FATAL_FAULT_1 1841#ifdef INJECT_FATAL_FAULT_1
1762 bitmap->bp = NULL; 1842 bitmap->bp = NULL;
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index d0aeaf46d93..b2a127e891a 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -85,7 +85,6 @@
85typedef __u16 bitmap_counter_t; 85typedef __u16 bitmap_counter_t;
86#define COUNTER_BITS 16 86#define COUNTER_BITS 16
87#define COUNTER_BIT_SHIFT 4 87#define COUNTER_BIT_SHIFT 4
88#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
89#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) 88#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
90 89
91#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) 90#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
@@ -196,19 +195,10 @@ struct bitmap {
196 195
197 mddev_t *mddev; /* the md device that the bitmap is for */ 196 mddev_t *mddev; /* the md device that the bitmap is for */
198 197
199 int counter_bits; /* how many bits per block counter */
200
201 /* bitmap chunksize -- how much data does each bit represent? */ 198 /* bitmap chunksize -- how much data does each bit represent? */
202 unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ 199 unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
203 unsigned long chunks; /* total number of data chunks for the array */ 200 unsigned long chunks; /* total number of data chunks for the array */
204 201
205 /* We hold a count on the chunk currently being synced, and drop
206 * it when the last block is started. If the resync is aborted
207 * midway, we need to be able to drop that count, so we remember
208 * the counted chunk..
209 */
210 unsigned long syncchunk;
211
212 __u64 events_cleared; 202 __u64 events_cleared;
213 int need_sync; 203 int need_sync;
214 204
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aa640a85bb2..4332fc2f25d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -351,6 +351,9 @@ void mddev_resume(mddev_t *mddev)
351 mddev->suspended = 0; 351 mddev->suspended = 0;
352 wake_up(&mddev->sb_wait); 352 wake_up(&mddev->sb_wait);
353 mddev->pers->quiesce(mddev, 0); 353 mddev->pers->quiesce(mddev, 0);
354
355 md_wakeup_thread(mddev->thread);
356 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
354} 357}
355EXPORT_SYMBOL_GPL(mddev_resume); 358EXPORT_SYMBOL_GPL(mddev_resume);
356 359
@@ -1750,6 +1753,18 @@ static struct super_type super_types[] = {
1750 }, 1753 },
1751}; 1754};
1752 1755
1756static void sync_super(mddev_t *mddev, mdk_rdev_t *rdev)
1757{
1758 if (mddev->sync_super) {
1759 mddev->sync_super(mddev, rdev);
1760 return;
1761 }
1762
1763 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1764
1765 super_types[mddev->major_version].sync_super(mddev, rdev);
1766}
1767
1753static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) 1768static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1754{ 1769{
1755 mdk_rdev_t *rdev, *rdev2; 1770 mdk_rdev_t *rdev, *rdev2;
@@ -1781,8 +1796,8 @@ int md_integrity_register(mddev_t *mddev)
1781 1796
1782 if (list_empty(&mddev->disks)) 1797 if (list_empty(&mddev->disks))
1783 return 0; /* nothing to do */ 1798 return 0; /* nothing to do */
1784 if (blk_get_integrity(mddev->gendisk)) 1799 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1785 return 0; /* already registered */ 1800 return 0; /* shouldn't register, or already is */
1786 list_for_each_entry(rdev, &mddev->disks, same_set) { 1801 list_for_each_entry(rdev, &mddev->disks, same_set) {
1787 /* skip spares and non-functional disks */ 1802 /* skip spares and non-functional disks */
1788 if (test_bit(Faulty, &rdev->flags)) 1803 if (test_bit(Faulty, &rdev->flags))
@@ -2168,8 +2183,7 @@ static void sync_sbs(mddev_t * mddev, int nospares)
2168 /* Don't update this superblock */ 2183 /* Don't update this superblock */
2169 rdev->sb_loaded = 2; 2184 rdev->sb_loaded = 2;
2170 } else { 2185 } else {
2171 super_types[mddev->major_version]. 2186 sync_super(mddev, rdev);
2172 sync_super(mddev, rdev);
2173 rdev->sb_loaded = 1; 2187 rdev->sb_loaded = 1;
2174 } 2188 }
2175 } 2189 }
@@ -2462,7 +2476,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2462 if (rdev->raid_disk == -1) 2476 if (rdev->raid_disk == -1)
2463 return -EEXIST; 2477 return -EEXIST;
2464 /* personality does all needed checks */ 2478 /* personality does all needed checks */
2465 if (rdev->mddev->pers->hot_add_disk == NULL) 2479 if (rdev->mddev->pers->hot_remove_disk == NULL)
2466 return -EINVAL; 2480 return -EINVAL;
2467 err = rdev->mddev->pers-> 2481 err = rdev->mddev->pers->
2468 hot_remove_disk(rdev->mddev, rdev->raid_disk); 2482 hot_remove_disk(rdev->mddev, rdev->raid_disk);
@@ -4619,9 +4633,6 @@ int md_run(mddev_t *mddev)
4619 if (mddev->flags) 4633 if (mddev->flags)
4620 md_update_sb(mddev, 0); 4634 md_update_sb(mddev, 0);
4621 4635
4622 md_wakeup_thread(mddev->thread);
4623 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4624
4625 md_new_event(mddev); 4636 md_new_event(mddev);
4626 sysfs_notify_dirent_safe(mddev->sysfs_state); 4637 sysfs_notify_dirent_safe(mddev->sysfs_state);
4627 sysfs_notify_dirent_safe(mddev->sysfs_action); 4638 sysfs_notify_dirent_safe(mddev->sysfs_action);
@@ -4642,6 +4653,10 @@ static int do_md_run(mddev_t *mddev)
4642 bitmap_destroy(mddev); 4653 bitmap_destroy(mddev);
4643 goto out; 4654 goto out;
4644 } 4655 }
4656
4657 md_wakeup_thread(mddev->thread);
4658 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4659
4645 set_capacity(mddev->gendisk, mddev->array_sectors); 4660 set_capacity(mddev->gendisk, mddev->array_sectors);
4646 revalidate_disk(mddev->gendisk); 4661 revalidate_disk(mddev->gendisk);
4647 mddev->changed = 1; 4662 mddev->changed = 1;
@@ -5259,6 +5274,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5259 if (mddev->degraded) 5274 if (mddev->degraded)
5260 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 5275 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5261 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5276 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5277 if (!err)
5278 md_new_event(mddev);
5262 md_wakeup_thread(mddev->thread); 5279 md_wakeup_thread(mddev->thread);
5263 return err; 5280 return err;
5264 } 5281 }
@@ -6866,8 +6883,8 @@ void md_do_sync(mddev_t *mddev)
6866 * Tune reconstruction: 6883 * Tune reconstruction:
6867 */ 6884 */
6868 window = 32*(PAGE_SIZE/512); 6885 window = 32*(PAGE_SIZE/512);
6869 printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n", 6886 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
6870 window/2,(unsigned long long) max_sectors/2); 6887 window/2, (unsigned long long)max_sectors/2);
6871 6888
6872 atomic_set(&mddev->recovery_active, 0); 6889 atomic_set(&mddev->recovery_active, 0);
6873 last_check = 0; 6890 last_check = 0;
@@ -7045,7 +7062,6 @@ void md_do_sync(mddev_t *mddev)
7045} 7062}
7046EXPORT_SYMBOL_GPL(md_do_sync); 7063EXPORT_SYMBOL_GPL(md_do_sync);
7047 7064
7048
7049static int remove_and_add_spares(mddev_t *mddev) 7065static int remove_and_add_spares(mddev_t *mddev)
7050{ 7066{
7051 mdk_rdev_t *rdev; 7067 mdk_rdev_t *rdev;
@@ -7157,6 +7173,9 @@ static void reap_sync_thread(mddev_t *mddev)
7157 */ 7173 */
7158void md_check_recovery(mddev_t *mddev) 7174void md_check_recovery(mddev_t *mddev)
7159{ 7175{
7176 if (mddev->suspended)
7177 return;
7178
7160 if (mddev->bitmap) 7179 if (mddev->bitmap)
7161 bitmap_daemon_work(mddev); 7180 bitmap_daemon_work(mddev);
7162 7181
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 0b1fd3f1d85..1c26c7a08ae 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -124,6 +124,7 @@ struct mddev_s
124#define MD_CHANGE_DEVS 0 /* Some device status has changed */ 124#define MD_CHANGE_DEVS 0 /* Some device status has changed */
125#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ 125#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
126#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ 126#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */
127#define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */
127 128
128 int suspended; 129 int suspended;
129 atomic_t active_io; 130 atomic_t active_io;
@@ -330,6 +331,7 @@ struct mddev_s
330 atomic_t flush_pending; 331 atomic_t flush_pending;
331 struct work_struct flush_work; 332 struct work_struct flush_work;
332 struct work_struct event_work; /* used by dm to report failure event */ 333 struct work_struct event_work; /* used by dm to report failure event */
334 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
333}; 335};
334 336
335 337
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5d096096f95..f7431b6d844 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -497,21 +497,19 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
497 return best_disk; 497 return best_disk;
498} 498}
499 499
500static int raid1_congested(void *data, int bits) 500int md_raid1_congested(mddev_t *mddev, int bits)
501{ 501{
502 mddev_t *mddev = data;
503 conf_t *conf = mddev->private; 502 conf_t *conf = mddev->private;
504 int i, ret = 0; 503 int i, ret = 0;
505 504
506 if (mddev_congested(mddev, bits))
507 return 1;
508
509 rcu_read_lock(); 505 rcu_read_lock();
510 for (i = 0; i < mddev->raid_disks; i++) { 506 for (i = 0; i < mddev->raid_disks; i++) {
511 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); 507 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
512 if (rdev && !test_bit(Faulty, &rdev->flags)) { 508 if (rdev && !test_bit(Faulty, &rdev->flags)) {
513 struct request_queue *q = bdev_get_queue(rdev->bdev); 509 struct request_queue *q = bdev_get_queue(rdev->bdev);
514 510
511 BUG_ON(!q);
512
515 /* Note the '|| 1' - when read_balance prefers 513 /* Note the '|| 1' - when read_balance prefers
516 * non-congested targets, it can be removed 514 * non-congested targets, it can be removed
517 */ 515 */
@@ -524,7 +522,15 @@ static int raid1_congested(void *data, int bits)
524 rcu_read_unlock(); 522 rcu_read_unlock();
525 return ret; 523 return ret;
526} 524}
525EXPORT_SYMBOL_GPL(md_raid1_congested);
527 526
527static int raid1_congested(void *data, int bits)
528{
529 mddev_t *mddev = data;
530
531 return mddev_congested(mddev, bits) ||
532 md_raid1_congested(mddev, bits);
533}
528 534
529static void flush_pending_writes(conf_t *conf) 535static void flush_pending_writes(conf_t *conf)
530{ 536{
@@ -1972,6 +1978,8 @@ static int run(mddev_t *mddev)
1972 return PTR_ERR(conf); 1978 return PTR_ERR(conf);
1973 1979
1974 list_for_each_entry(rdev, &mddev->disks, same_set) { 1980 list_for_each_entry(rdev, &mddev->disks, same_set) {
1981 if (!mddev->gendisk)
1982 continue;
1975 disk_stack_limits(mddev->gendisk, rdev->bdev, 1983 disk_stack_limits(mddev->gendisk, rdev->bdev,
1976 rdev->data_offset << 9); 1984 rdev->data_offset << 9);
1977 /* as we don't honour merge_bvec_fn, we must never risk 1985 /* as we don't honour merge_bvec_fn, we must never risk
@@ -2013,8 +2021,10 @@ static int run(mddev_t *mddev)
2013 2021
2014 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); 2022 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2015 2023
2016 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2024 if (mddev->queue) {
2017 mddev->queue->backing_dev_info.congested_data = mddev; 2025 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
2026 mddev->queue->backing_dev_info.congested_data = mddev;
2027 }
2018 return md_integrity_register(mddev); 2028 return md_integrity_register(mddev);
2019} 2029}
2020 2030
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 5fc4ca1af86..e743a64fac4 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -126,4 +126,6 @@ struct r1bio_s {
126 */ 126 */
127#define R1BIO_Returned 6 127#define R1BIO_Returned 6
128 128
129extern int md_raid1_congested(mddev_t *mddev, int bits);
130
129#endif 131#endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 346e69bfdab..b72edf35ec5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -129,7 +129,7 @@ static inline int raid5_dec_bi_hw_segments(struct bio *bio)
129 129
130static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) 130static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
131{ 131{
132 bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); 132 bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16);
133} 133}
134 134
135/* Find first data disk in a raid6 stripe */ 135/* Find first data disk in a raid6 stripe */
@@ -514,7 +514,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
514 bi = &sh->dev[i].req; 514 bi = &sh->dev[i].req;
515 515
516 bi->bi_rw = rw; 516 bi->bi_rw = rw;
517 if (rw == WRITE) 517 if (rw & WRITE)
518 bi->bi_end_io = raid5_end_write_request; 518 bi->bi_end_io = raid5_end_write_request;
519 else 519 else
520 bi->bi_end_io = raid5_end_read_request; 520 bi->bi_end_io = raid5_end_read_request;
@@ -548,13 +548,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
548 bi->bi_io_vec[0].bv_offset = 0; 548 bi->bi_io_vec[0].bv_offset = 0;
549 bi->bi_size = STRIPE_SIZE; 549 bi->bi_size = STRIPE_SIZE;
550 bi->bi_next = NULL; 550 bi->bi_next = NULL;
551 if (rw == WRITE && 551 if ((rw & WRITE) &&
552 test_bit(R5_ReWrite, &sh->dev[i].flags)) 552 test_bit(R5_ReWrite, &sh->dev[i].flags))
553 atomic_add(STRIPE_SECTORS, 553 atomic_add(STRIPE_SECTORS,
554 &rdev->corrected_errors); 554 &rdev->corrected_errors);
555 generic_make_request(bi); 555 generic_make_request(bi);
556 } else { 556 } else {
557 if (rw == WRITE) 557 if (rw & WRITE)
558 set_bit(STRIPE_DEGRADED, &sh->state); 558 set_bit(STRIPE_DEGRADED, &sh->state);
559 pr_debug("skip op %ld on disc %d for sector %llu\n", 559 pr_debug("skip op %ld on disc %d for sector %llu\n",
560 bi->bi_rw, i, (unsigned long long)sh->sector); 560 bi->bi_rw, i, (unsigned long long)sh->sector);
@@ -585,7 +585,7 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
585 init_async_submit(&submit, flags, tx, NULL, NULL, NULL); 585 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
586 586
587 bio_for_each_segment(bvl, bio, i) { 587 bio_for_each_segment(bvl, bio, i) {
588 int len = bio_iovec_idx(bio, i)->bv_len; 588 int len = bvl->bv_len;
589 int clen; 589 int clen;
590 int b_offset = 0; 590 int b_offset = 0;
591 591
@@ -601,8 +601,8 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
601 clen = len; 601 clen = len;
602 602
603 if (clen > 0) { 603 if (clen > 0) {
604 b_offset += bio_iovec_idx(bio, i)->bv_offset; 604 b_offset += bvl->bv_offset;
605 bio_page = bio_iovec_idx(bio, i)->bv_page; 605 bio_page = bvl->bv_page;
606 if (frombio) 606 if (frombio)
607 tx = async_memcpy(page, bio_page, page_offset, 607 tx = async_memcpy(page, bio_page, page_offset,
608 b_offset, clen, &submit); 608 b_offset, clen, &submit);
@@ -4858,7 +4858,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4858 printk(KERN_INFO "md/raid:%s: device %s operational as raid" 4858 printk(KERN_INFO "md/raid:%s: device %s operational as raid"
4859 " disk %d\n", 4859 " disk %d\n",
4860 mdname(mddev), bdevname(rdev->bdev, b), raid_disk); 4860 mdname(mddev), bdevname(rdev->bdev, b), raid_disk);
4861 } else 4861 } else if (rdev->saved_raid_disk != raid_disk)
4862 /* Cannot rely on bitmap to complete recovery */ 4862 /* Cannot rely on bitmap to complete recovery */
4863 conf->fullsync = 1; 4863 conf->fullsync = 1;
4864 } 4864 }