diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 152 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 22 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 16 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/linear.c | 32 | ||||
-rw-r--r-- | drivers/md/md.c | 140 | ||||
-rw-r--r-- | drivers/md/md.h | 13 | ||||
-rw-r--r-- | drivers/md/multipath.c | 2 | ||||
-rw-r--r-- | drivers/md/raid0.c | 164 | ||||
-rw-r--r-- | drivers/md/raid0.h | 11 | ||||
-rw-r--r-- | drivers/md/raid1.c | 98 | ||||
-rw-r--r-- | drivers/md/raid10.c | 187 | ||||
-rw-r--r-- | drivers/md/raid5.c | 25 |
13 files changed, 491 insertions, 373 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 045e086144ad..3d0dfa7a89a2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/file.h> | 26 | #include <linux/file.h> |
27 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/seq_file.h> | ||
29 | #include "md.h" | 30 | #include "md.h" |
30 | #include "bitmap.h" | 31 | #include "bitmap.h" |
31 | 32 | ||
@@ -35,31 +36,6 @@ static inline char *bmname(struct bitmap *bitmap) | |||
35 | } | 36 | } |
36 | 37 | ||
37 | /* | 38 | /* |
38 | * just a placeholder - calls kmalloc for bitmap pages | ||
39 | */ | ||
40 | static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) | ||
41 | { | ||
42 | unsigned char *page; | ||
43 | |||
44 | page = kzalloc(PAGE_SIZE, GFP_NOIO); | ||
45 | if (!page) | ||
46 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); | ||
47 | else | ||
48 | pr_debug("%s: bitmap_alloc_page: allocated page at %p\n", | ||
49 | bmname(bitmap), page); | ||
50 | return page; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * for now just a placeholder -- just calls kfree for bitmap pages | ||
55 | */ | ||
56 | static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | ||
57 | { | ||
58 | pr_debug("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); | ||
59 | kfree(page); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * check a page and, if necessary, allocate it (or hijack it if the alloc fails) | 39 | * check a page and, if necessary, allocate it (or hijack it if the alloc fails) |
64 | * | 40 | * |
65 | * 1) check to see if this page is allocated, if it's not then try to alloc | 41 | * 1) check to see if this page is allocated, if it's not then try to alloc |
@@ -96,7 +72,7 @@ __acquires(bitmap->lock) | |||
96 | /* this page has not been allocated yet */ | 72 | /* this page has not been allocated yet */ |
97 | 73 | ||
98 | spin_unlock_irq(&bitmap->lock); | 74 | spin_unlock_irq(&bitmap->lock); |
99 | mappage = bitmap_alloc_page(bitmap); | 75 | mappage = kzalloc(PAGE_SIZE, GFP_NOIO); |
100 | spin_lock_irq(&bitmap->lock); | 76 | spin_lock_irq(&bitmap->lock); |
101 | 77 | ||
102 | if (mappage == NULL) { | 78 | if (mappage == NULL) { |
@@ -109,7 +85,7 @@ __acquires(bitmap->lock) | |||
109 | } else if (bitmap->bp[page].map || | 85 | } else if (bitmap->bp[page].map || |
110 | bitmap->bp[page].hijacked) { | 86 | bitmap->bp[page].hijacked) { |
111 | /* somebody beat us to getting the page */ | 87 | /* somebody beat us to getting the page */ |
112 | bitmap_free_page(bitmap, mappage); | 88 | kfree(mappage); |
113 | return 0; | 89 | return 0; |
114 | } else { | 90 | } else { |
115 | 91 | ||
@@ -141,7 +117,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | |||
141 | ptr = bitmap->bp[page].map; | 117 | ptr = bitmap->bp[page].map; |
142 | bitmap->bp[page].map = NULL; | 118 | bitmap->bp[page].map = NULL; |
143 | bitmap->missing_pages++; | 119 | bitmap->missing_pages++; |
144 | bitmap_free_page(bitmap, ptr); | 120 | kfree(ptr); |
145 | } | 121 | } |
146 | } | 122 | } |
147 | 123 | ||
@@ -171,7 +147,7 @@ static struct page *read_sb_page(struct mddev *mddev, loff_t offset, | |||
171 | did_alloc = 1; | 147 | did_alloc = 1; |
172 | } | 148 | } |
173 | 149 | ||
174 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 150 | rdev_for_each(rdev, mddev) { |
175 | if (! test_bit(In_sync, &rdev->flags) | 151 | if (! test_bit(In_sync, &rdev->flags) |
176 | || test_bit(Faulty, &rdev->flags)) | 152 | || test_bit(Faulty, &rdev->flags)) |
177 | continue; | 153 | continue; |
@@ -445,18 +421,13 @@ out: | |||
445 | void bitmap_update_sb(struct bitmap *bitmap) | 421 | void bitmap_update_sb(struct bitmap *bitmap) |
446 | { | 422 | { |
447 | bitmap_super_t *sb; | 423 | bitmap_super_t *sb; |
448 | unsigned long flags; | ||
449 | 424 | ||
450 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ | 425 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ |
451 | return; | 426 | return; |
452 | if (bitmap->mddev->bitmap_info.external) | 427 | if (bitmap->mddev->bitmap_info.external) |
453 | return; | 428 | return; |
454 | spin_lock_irqsave(&bitmap->lock, flags); | 429 | if (!bitmap->sb_page) /* no superblock */ |
455 | if (!bitmap->sb_page) { /* no superblock */ | ||
456 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
457 | return; | 430 | return; |
458 | } | ||
459 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
460 | sb = kmap_atomic(bitmap->sb_page); | 431 | sb = kmap_atomic(bitmap->sb_page); |
461 | sb->events = cpu_to_le64(bitmap->mddev->events); | 432 | sb->events = cpu_to_le64(bitmap->mddev->events); |
462 | if (bitmap->mddev->events < bitmap->events_cleared) | 433 | if (bitmap->mddev->events < bitmap->events_cleared) |
@@ -632,26 +603,28 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
632 | /* keep the array size field of the bitmap superblock up to date */ | 603 | /* keep the array size field of the bitmap superblock up to date */ |
633 | sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); | 604 | sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); |
634 | 605 | ||
635 | if (!bitmap->mddev->persistent) | 606 | if (bitmap->mddev->persistent) { |
636 | goto success; | 607 | /* |
637 | 608 | * We have a persistent array superblock, so compare the | |
638 | /* | 609 | * bitmap's UUID and event counter to the mddev's |
639 | * if we have a persistent array superblock, compare the | 610 | */ |
640 | * bitmap's UUID and event counter to the mddev's | 611 | if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { |
641 | */ | 612 | printk(KERN_INFO |
642 | if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { | 613 | "%s: bitmap superblock UUID mismatch\n", |
643 | printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", | 614 | bmname(bitmap)); |
644 | bmname(bitmap)); | 615 | goto out; |
645 | goto out; | 616 | } |
646 | } | 617 | events = le64_to_cpu(sb->events); |
647 | events = le64_to_cpu(sb->events); | 618 | if (events < bitmap->mddev->events) { |
648 | if (events < bitmap->mddev->events) { | 619 | printk(KERN_INFO |
649 | printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " | 620 | "%s: bitmap file is out of date (%llu < %llu) " |
650 | "-- forcing full recovery\n", bmname(bitmap), events, | 621 | "-- forcing full recovery\n", |
651 | (unsigned long long) bitmap->mddev->events); | 622 | bmname(bitmap), events, |
652 | sb->state |= cpu_to_le32(BITMAP_STALE); | 623 | (unsigned long long) bitmap->mddev->events); |
624 | sb->state |= cpu_to_le32(BITMAP_STALE); | ||
625 | } | ||
653 | } | 626 | } |
654 | success: | 627 | |
655 | /* assign fields using values from superblock */ | 628 | /* assign fields using values from superblock */ |
656 | bitmap->mddev->bitmap_info.chunksize = chunksize; | 629 | bitmap->mddev->bitmap_info.chunksize = chunksize; |
657 | bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; | 630 | bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; |
@@ -680,15 +653,10 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | |||
680 | enum bitmap_mask_op op) | 653 | enum bitmap_mask_op op) |
681 | { | 654 | { |
682 | bitmap_super_t *sb; | 655 | bitmap_super_t *sb; |
683 | unsigned long flags; | ||
684 | int old; | 656 | int old; |
685 | 657 | ||
686 | spin_lock_irqsave(&bitmap->lock, flags); | 658 | if (!bitmap->sb_page) /* can't set the state */ |
687 | if (!bitmap->sb_page) { /* can't set the state */ | ||
688 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
689 | return 0; | 659 | return 0; |
690 | } | ||
691 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
692 | sb = kmap_atomic(bitmap->sb_page); | 660 | sb = kmap_atomic(bitmap->sb_page); |
693 | old = le32_to_cpu(sb->state) & bits; | 661 | old = le32_to_cpu(sb->state) & bits; |
694 | switch (op) { | 662 | switch (op) { |
@@ -870,7 +838,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | |||
870 | unsigned long bit; | 838 | unsigned long bit; |
871 | struct page *page; | 839 | struct page *page; |
872 | void *kaddr; | 840 | void *kaddr; |
873 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); | 841 | unsigned long chunk = block >> bitmap->chunkshift; |
874 | 842 | ||
875 | if (!bitmap->filemap) | 843 | if (!bitmap->filemap) |
876 | return; | 844 | return; |
@@ -1069,10 +1037,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1069 | kunmap_atomic(paddr); | 1037 | kunmap_atomic(paddr); |
1070 | if (b) { | 1038 | if (b) { |
1071 | /* if the disk bit is set, set the memory bit */ | 1039 | /* if the disk bit is set, set the memory bit */ |
1072 | int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) | 1040 | int needed = ((sector_t)(i+1) << bitmap->chunkshift |
1073 | >= start); | 1041 | >= start); |
1074 | bitmap_set_memory_bits(bitmap, | 1042 | bitmap_set_memory_bits(bitmap, |
1075 | (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | 1043 | (sector_t)i << bitmap->chunkshift, |
1076 | needed); | 1044 | needed); |
1077 | bit_cnt++; | 1045 | bit_cnt++; |
1078 | } | 1046 | } |
@@ -1116,7 +1084,7 @@ void bitmap_write_all(struct bitmap *bitmap) | |||
1116 | 1084 | ||
1117 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | 1085 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) |
1118 | { | 1086 | { |
1119 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1087 | sector_t chunk = offset >> bitmap->chunkshift; |
1120 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1088 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1121 | bitmap->bp[page].count += inc; | 1089 | bitmap->bp[page].count += inc; |
1122 | bitmap_checkfree(bitmap, page); | 1090 | bitmap_checkfree(bitmap, page); |
@@ -1222,7 +1190,7 @@ void bitmap_daemon_work(struct mddev *mddev) | |||
1222 | bitmap->allclean = 0; | 1190 | bitmap->allclean = 0; |
1223 | } | 1191 | } |
1224 | bmc = bitmap_get_counter(bitmap, | 1192 | bmc = bitmap_get_counter(bitmap, |
1225 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1193 | (sector_t)j << bitmap->chunkshift, |
1226 | &blocks, 0); | 1194 | &blocks, 0); |
1227 | if (!bmc) | 1195 | if (!bmc) |
1228 | j |= PAGE_COUNTER_MASK; | 1196 | j |= PAGE_COUNTER_MASK; |
@@ -1231,7 +1199,7 @@ void bitmap_daemon_work(struct mddev *mddev) | |||
1231 | /* we can clear the bit */ | 1199 | /* we can clear the bit */ |
1232 | *bmc = 0; | 1200 | *bmc = 0; |
1233 | bitmap_count_page(bitmap, | 1201 | bitmap_count_page(bitmap, |
1234 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1202 | (sector_t)j << bitmap->chunkshift, |
1235 | -1); | 1203 | -1); |
1236 | 1204 | ||
1237 | /* clear the bit */ | 1205 | /* clear the bit */ |
@@ -1285,7 +1253,7 @@ __acquires(bitmap->lock) | |||
1285 | * The lock must have been taken with interrupts enabled. | 1253 | * The lock must have been taken with interrupts enabled. |
1286 | * If !create, we don't release the lock. | 1254 | * If !create, we don't release the lock. |
1287 | */ | 1255 | */ |
1288 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1256 | sector_t chunk = offset >> bitmap->chunkshift; |
1289 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1257 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1290 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; | 1258 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; |
1291 | sector_t csize; | 1259 | sector_t csize; |
@@ -1295,10 +1263,10 @@ __acquires(bitmap->lock) | |||
1295 | 1263 | ||
1296 | if (bitmap->bp[page].hijacked || | 1264 | if (bitmap->bp[page].hijacked || |
1297 | bitmap->bp[page].map == NULL) | 1265 | bitmap->bp[page].map == NULL) |
1298 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | 1266 | csize = ((sector_t)1) << (bitmap->chunkshift + |
1299 | PAGE_COUNTER_SHIFT - 1); | 1267 | PAGE_COUNTER_SHIFT - 1); |
1300 | else | 1268 | else |
1301 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | 1269 | csize = ((sector_t)1) << bitmap->chunkshift; |
1302 | *blocks = csize - (offset & (csize - 1)); | 1270 | *blocks = csize - (offset & (csize - 1)); |
1303 | 1271 | ||
1304 | if (err < 0) | 1272 | if (err < 0) |
@@ -1424,7 +1392,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1424 | set_page_attr(bitmap, | 1392 | set_page_attr(bitmap, |
1425 | filemap_get_page( | 1393 | filemap_get_page( |
1426 | bitmap, | 1394 | bitmap, |
1427 | offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1395 | offset >> bitmap->chunkshift), |
1428 | BITMAP_PAGE_PENDING); | 1396 | BITMAP_PAGE_PENDING); |
1429 | bitmap->allclean = 0; | 1397 | bitmap->allclean = 0; |
1430 | } | 1398 | } |
@@ -1512,7 +1480,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i | |||
1512 | else { | 1480 | else { |
1513 | if (*bmc <= 2) { | 1481 | if (*bmc <= 2) { |
1514 | set_page_attr(bitmap, | 1482 | set_page_attr(bitmap, |
1515 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1483 | filemap_get_page(bitmap, offset >> bitmap->chunkshift), |
1516 | BITMAP_PAGE_PENDING); | 1484 | BITMAP_PAGE_PENDING); |
1517 | bitmap->allclean = 0; | 1485 | bitmap->allclean = 0; |
1518 | } | 1486 | } |
@@ -1559,7 +1527,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1559 | 1527 | ||
1560 | bitmap->mddev->curr_resync_completed = sector; | 1528 | bitmap->mddev->curr_resync_completed = sector; |
1561 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | 1529 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); |
1562 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | 1530 | sector &= ~((1ULL << bitmap->chunkshift) - 1); |
1563 | s = 0; | 1531 | s = 0; |
1564 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | 1532 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { |
1565 | bitmap_end_sync(bitmap, s, &blocks, 0); | 1533 | bitmap_end_sync(bitmap, s, &blocks, 0); |
@@ -1589,7 +1557,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
1589 | struct page *page; | 1557 | struct page *page; |
1590 | *bmc = 2 | (needed ? NEEDED_MASK : 0); | 1558 | *bmc = 2 | (needed ? NEEDED_MASK : 0); |
1591 | bitmap_count_page(bitmap, offset, 1); | 1559 | bitmap_count_page(bitmap, offset, 1); |
1592 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); | 1560 | page = filemap_get_page(bitmap, offset >> bitmap->chunkshift); |
1593 | set_page_attr(bitmap, page, BITMAP_PAGE_PENDING); | 1561 | set_page_attr(bitmap, page, BITMAP_PAGE_PENDING); |
1594 | bitmap->allclean = 0; | 1562 | bitmap->allclean = 0; |
1595 | } | 1563 | } |
@@ -1602,7 +1570,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) | |||
1602 | unsigned long chunk; | 1570 | unsigned long chunk; |
1603 | 1571 | ||
1604 | for (chunk = s; chunk <= e; chunk++) { | 1572 | for (chunk = s; chunk <= e; chunk++) { |
1605 | sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); | 1573 | sector_t sec = (sector_t)chunk << bitmap->chunkshift; |
1606 | bitmap_set_memory_bits(bitmap, sec, 1); | 1574 | bitmap_set_memory_bits(bitmap, sec, 1); |
1607 | spin_lock_irq(&bitmap->lock); | 1575 | spin_lock_irq(&bitmap->lock); |
1608 | bitmap_file_set_bit(bitmap, sec); | 1576 | bitmap_file_set_bit(bitmap, sec); |
@@ -1759,11 +1727,12 @@ int bitmap_create(struct mddev *mddev) | |||
1759 | goto error; | 1727 | goto error; |
1760 | 1728 | ||
1761 | bitmap->daemon_lastrun = jiffies; | 1729 | bitmap->daemon_lastrun = jiffies; |
1762 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); | 1730 | bitmap->chunkshift = (ffz(~mddev->bitmap_info.chunksize) |
1731 | - BITMAP_BLOCK_SHIFT); | ||
1763 | 1732 | ||
1764 | /* now that chunksize and chunkshift are set, we can use these macros */ | 1733 | /* now that chunksize and chunkshift are set, we can use these macros */ |
1765 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> | 1734 | chunks = (blocks + bitmap->chunkshift - 1) >> |
1766 | CHUNK_BLOCK_SHIFT(bitmap); | 1735 | bitmap->chunkshift; |
1767 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; | 1736 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; |
1768 | 1737 | ||
1769 | BUG_ON(!pages); | 1738 | BUG_ON(!pages); |
@@ -1836,6 +1805,33 @@ out: | |||
1836 | } | 1805 | } |
1837 | EXPORT_SYMBOL_GPL(bitmap_load); | 1806 | EXPORT_SYMBOL_GPL(bitmap_load); |
1838 | 1807 | ||
1808 | void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) | ||
1809 | { | ||
1810 | unsigned long chunk_kb; | ||
1811 | unsigned long flags; | ||
1812 | |||
1813 | if (!bitmap) | ||
1814 | return; | ||
1815 | |||
1816 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1817 | chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; | ||
1818 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
1819 | "%lu%s chunk", | ||
1820 | bitmap->pages - bitmap->missing_pages, | ||
1821 | bitmap->pages, | ||
1822 | (bitmap->pages - bitmap->missing_pages) | ||
1823 | << (PAGE_SHIFT - 10), | ||
1824 | chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, | ||
1825 | chunk_kb ? "KB" : "B"); | ||
1826 | if (bitmap->file) { | ||
1827 | seq_printf(seq, ", file: "); | ||
1828 | seq_path(seq, &bitmap->file->f_path, " \t\n"); | ||
1829 | } | ||
1830 | |||
1831 | seq_printf(seq, "\n"); | ||
1832 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1833 | } | ||
1834 | |||
1839 | static ssize_t | 1835 | static ssize_t |
1840 | location_show(struct mddev *mddev, char *page) | 1836 | location_show(struct mddev *mddev, char *page) |
1841 | { | 1837 | { |
@@ -1904,6 +1900,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len) | |||
1904 | if (mddev->pers) { | 1900 | if (mddev->pers) { |
1905 | mddev->pers->quiesce(mddev, 1); | 1901 | mddev->pers->quiesce(mddev, 1); |
1906 | rv = bitmap_create(mddev); | 1902 | rv = bitmap_create(mddev); |
1903 | if (!rv) | ||
1904 | rv = bitmap_load(mddev); | ||
1907 | if (rv) { | 1905 | if (rv) { |
1908 | bitmap_destroy(mddev); | 1906 | bitmap_destroy(mddev); |
1909 | mddev->bitmap_info.offset = 0; | 1907 | mddev->bitmap_info.offset = 0; |
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index a15436dd9b3e..55ca5aec84e4 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -13,8 +13,6 @@ | |||
13 | #define BITMAP_MAJOR_HI 4 | 13 | #define BITMAP_MAJOR_HI 4 |
14 | #define BITMAP_MAJOR_HOSTENDIAN 3 | 14 | #define BITMAP_MAJOR_HOSTENDIAN 3 |
15 | 15 | ||
16 | #define BITMAP_MINOR 39 | ||
17 | |||
18 | /* | 16 | /* |
19 | * in-memory bitmap: | 17 | * in-memory bitmap: |
20 | * | 18 | * |
@@ -101,21 +99,10 @@ typedef __u16 bitmap_counter_t; | |||
101 | /* same, except a mask value for more efficient bitops */ | 99 | /* same, except a mask value for more efficient bitops */ |
102 | #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) | 100 | #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) |
103 | 101 | ||
104 | #define BITMAP_BLOCK_SIZE 512 | ||
105 | #define BITMAP_BLOCK_SHIFT 9 | 102 | #define BITMAP_BLOCK_SHIFT 9 |
106 | 103 | ||
107 | /* how many blocks per chunk? (this is variable) */ | 104 | /* how many blocks per chunk? (this is variable) */ |
108 | #define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT) | 105 | #define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT) |
109 | #define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) | ||
110 | #define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) | ||
111 | |||
112 | /* when hijacked, the counters and bits represent even larger "chunks" */ | ||
113 | /* there will be 1024 chunks represented by each counter in the page pointers */ | ||
114 | #define PAGEPTR_BLOCK_RATIO(bitmap) \ | ||
115 | (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) | ||
116 | #define PAGEPTR_BLOCK_SHIFT(bitmap) \ | ||
117 | (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) | ||
118 | #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) | ||
119 | 106 | ||
120 | #endif | 107 | #endif |
121 | 108 | ||
@@ -181,12 +168,6 @@ struct bitmap_page { | |||
181 | unsigned int count:31; | 168 | unsigned int count:31; |
182 | }; | 169 | }; |
183 | 170 | ||
184 | /* keep track of bitmap file pages that have pending writes on them */ | ||
185 | struct page_list { | ||
186 | struct list_head list; | ||
187 | struct page *page; | ||
188 | }; | ||
189 | |||
190 | /* the main bitmap structure - one per mddev */ | 171 | /* the main bitmap structure - one per mddev */ |
191 | struct bitmap { | 172 | struct bitmap { |
192 | struct bitmap_page *bp; | 173 | struct bitmap_page *bp; |
@@ -196,7 +177,7 @@ struct bitmap { | |||
196 | struct mddev *mddev; /* the md device that the bitmap is for */ | 177 | struct mddev *mddev; /* the md device that the bitmap is for */ |
197 | 178 | ||
198 | /* bitmap chunksize -- how much data does each bit represent? */ | 179 | /* bitmap chunksize -- how much data does each bit represent? */ |
199 | unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ | 180 | unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */ |
200 | unsigned long chunks; /* total number of data chunks for the array */ | 181 | unsigned long chunks; /* total number of data chunks for the array */ |
201 | 182 | ||
202 | __u64 events_cleared; | 183 | __u64 events_cleared; |
@@ -245,6 +226,7 @@ void bitmap_destroy(struct mddev *mddev); | |||
245 | 226 | ||
246 | void bitmap_print_sb(struct bitmap *bitmap); | 227 | void bitmap_print_sb(struct bitmap *bitmap); |
247 | void bitmap_update_sb(struct bitmap *bitmap); | 228 | void bitmap_update_sb(struct bitmap *bitmap); |
229 | void bitmap_status(struct seq_file *seq, struct bitmap *bitmap); | ||
248 | 230 | ||
249 | int bitmap_setallbits(struct bitmap *bitmap); | 231 | int bitmap_setallbits(struct bitmap *bitmap); |
250 | void bitmap_write_all(struct bitmap *bitmap); | 232 | void bitmap_write_all(struct bitmap *bitmap); |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 787022c18187..c5a875d7b882 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -615,14 +615,14 @@ static int read_disk_sb(struct md_rdev *rdev, int size) | |||
615 | 615 | ||
616 | static void super_sync(struct mddev *mddev, struct md_rdev *rdev) | 616 | static void super_sync(struct mddev *mddev, struct md_rdev *rdev) |
617 | { | 617 | { |
618 | struct md_rdev *r, *t; | 618 | struct md_rdev *r; |
619 | uint64_t failed_devices; | 619 | uint64_t failed_devices; |
620 | struct dm_raid_superblock *sb; | 620 | struct dm_raid_superblock *sb; |
621 | 621 | ||
622 | sb = page_address(rdev->sb_page); | 622 | sb = page_address(rdev->sb_page); |
623 | failed_devices = le64_to_cpu(sb->failed_devices); | 623 | failed_devices = le64_to_cpu(sb->failed_devices); |
624 | 624 | ||
625 | rdev_for_each(r, t, mddev) | 625 | rdev_for_each(r, mddev) |
626 | if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) | 626 | if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) |
627 | failed_devices |= (1ULL << r->raid_disk); | 627 | failed_devices |= (1ULL << r->raid_disk); |
628 | 628 | ||
@@ -707,7 +707,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
707 | struct dm_raid_superblock *sb; | 707 | struct dm_raid_superblock *sb; |
708 | uint32_t new_devs = 0; | 708 | uint32_t new_devs = 0; |
709 | uint32_t rebuilds = 0; | 709 | uint32_t rebuilds = 0; |
710 | struct md_rdev *r, *t; | 710 | struct md_rdev *r; |
711 | struct dm_raid_superblock *sb2; | 711 | struct dm_raid_superblock *sb2; |
712 | 712 | ||
713 | sb = page_address(rdev->sb_page); | 713 | sb = page_address(rdev->sb_page); |
@@ -750,7 +750,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
750 | * case the In_sync bit will /not/ be set and | 750 | * case the In_sync bit will /not/ be set and |
751 | * recovery_cp must be MaxSector. | 751 | * recovery_cp must be MaxSector. |
752 | */ | 752 | */ |
753 | rdev_for_each(r, t, mddev) { | 753 | rdev_for_each(r, mddev) { |
754 | if (!test_bit(In_sync, &r->flags)) { | 754 | if (!test_bit(In_sync, &r->flags)) { |
755 | DMINFO("Device %d specified for rebuild: " | 755 | DMINFO("Device %d specified for rebuild: " |
756 | "Clearing superblock", r->raid_disk); | 756 | "Clearing superblock", r->raid_disk); |
@@ -782,7 +782,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
782 | * Now we set the Faulty bit for those devices that are | 782 | * Now we set the Faulty bit for those devices that are |
783 | * recorded in the superblock as failed. | 783 | * recorded in the superblock as failed. |
784 | */ | 784 | */ |
785 | rdev_for_each(r, t, mddev) { | 785 | rdev_for_each(r, mddev) { |
786 | if (!r->sb_page) | 786 | if (!r->sb_page) |
787 | continue; | 787 | continue; |
788 | sb2 = page_address(r->sb_page); | 788 | sb2 = page_address(r->sb_page); |
@@ -855,11 +855,11 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
855 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | 855 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) |
856 | { | 856 | { |
857 | int ret; | 857 | int ret; |
858 | struct md_rdev *rdev, *freshest, *tmp; | 858 | struct md_rdev *rdev, *freshest; |
859 | struct mddev *mddev = &rs->md; | 859 | struct mddev *mddev = &rs->md; |
860 | 860 | ||
861 | freshest = NULL; | 861 | freshest = NULL; |
862 | rdev_for_each(rdev, tmp, mddev) { | 862 | rdev_for_each(rdev, mddev) { |
863 | if (!rdev->meta_bdev) | 863 | if (!rdev->meta_bdev) |
864 | continue; | 864 | continue; |
865 | 865 | ||
@@ -888,7 +888,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
888 | if (super_validate(mddev, freshest)) | 888 | if (super_validate(mddev, freshest)) |
889 | return -EINVAL; | 889 | return -EINVAL; |
890 | 890 | ||
891 | rdev_for_each(rdev, tmp, mddev) | 891 | rdev_for_each(rdev, mddev) |
892 | if ((rdev != freshest) && super_validate(mddev, rdev)) | 892 | if ((rdev != freshest) && super_validate(mddev, rdev)) |
893 | return -EINVAL; | 893 | return -EINVAL; |
894 | 894 | ||
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index feb2c3c7bb44..45135f69509c 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -315,7 +315,7 @@ static int run(struct mddev *mddev) | |||
315 | } | 315 | } |
316 | conf->nfaults = 0; | 316 | conf->nfaults = 0; |
317 | 317 | ||
318 | list_for_each_entry(rdev, &mddev->disks, same_set) | 318 | rdev_for_each(rdev, mddev) |
319 | conf->rdev = rdev; | 319 | conf->rdev = rdev; |
320 | 320 | ||
321 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); | 321 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 627456542fb3..b0fcc7d02adb 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
68 | struct dev_info *dev0; | 68 | struct dev_info *dev0; |
69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; | 69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; |
70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
71 | int maxbytes = biovec->bv_len; | ||
72 | struct request_queue *subq; | ||
71 | 73 | ||
72 | rcu_read_lock(); | 74 | rcu_read_lock(); |
73 | dev0 = which_dev(mddev, sector); | 75 | dev0 = which_dev(mddev, sector); |
74 | maxsectors = dev0->end_sector - sector; | 76 | maxsectors = dev0->end_sector - sector; |
77 | subq = bdev_get_queue(dev0->rdev->bdev); | ||
78 | if (subq->merge_bvec_fn) { | ||
79 | bvm->bi_bdev = dev0->rdev->bdev; | ||
80 | bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; | ||
81 | maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, | ||
82 | biovec)); | ||
83 | } | ||
75 | rcu_read_unlock(); | 84 | rcu_read_unlock(); |
76 | 85 | ||
77 | if (maxsectors < bio_sectors) | 86 | if (maxsectors < bio_sectors) |
@@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
80 | maxsectors -= bio_sectors; | 89 | maxsectors -= bio_sectors; |
81 | 90 | ||
82 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) | 91 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) |
83 | return biovec->bv_len; | 92 | return maxbytes; |
84 | /* The bytes available at this offset could be really big, | 93 | |
85 | * so we cap at 2^31 to avoid overflow */ | 94 | if (maxsectors > (maxbytes >> 9)) |
86 | if (maxsectors > (1 << (31-9))) | 95 | return maxbytes; |
87 | return 1<<31; | 96 | else |
88 | return maxsectors << 9; | 97 | return maxsectors << 9; |
89 | } | 98 | } |
90 | 99 | ||
91 | static int linear_congested(void *data, int bits) | 100 | static int linear_congested(void *data, int bits) |
@@ -138,7 +147,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
138 | cnt = 0; | 147 | cnt = 0; |
139 | conf->array_sectors = 0; | 148 | conf->array_sectors = 0; |
140 | 149 | ||
141 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 150 | rdev_for_each(rdev, mddev) { |
142 | int j = rdev->raid_disk; | 151 | int j = rdev->raid_disk; |
143 | struct dev_info *disk = conf->disks + j; | 152 | struct dev_info *disk = conf->disks + j; |
144 | sector_t sectors; | 153 | sector_t sectors; |
@@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
158 | 167 | ||
159 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 168 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
160 | rdev->data_offset << 9); | 169 | rdev->data_offset << 9); |
161 | /* as we don't honour merge_bvec_fn, we must never risk | ||
162 | * violating it, so limit max_segments to 1 lying within | ||
163 | * a single page. | ||
164 | */ | ||
165 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
166 | blk_queue_max_segments(mddev->queue, 1); | ||
167 | blk_queue_segment_boundary(mddev->queue, | ||
168 | PAGE_CACHE_SIZE - 1); | ||
169 | } | ||
170 | 170 | ||
171 | conf->array_sectors += rdev->sectors; | 171 | conf->array_sectors += rdev->sectors; |
172 | cnt++; | 172 | cnt++; |
diff --git a/drivers/md/md.c b/drivers/md/md.c index ce88755baf4a..b572e1e386ce 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -439,7 +439,7 @@ static void submit_flushes(struct work_struct *ws) | |||
439 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); | 439 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); |
440 | atomic_set(&mddev->flush_pending, 1); | 440 | atomic_set(&mddev->flush_pending, 1); |
441 | rcu_read_lock(); | 441 | rcu_read_lock(); |
442 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | 442 | rdev_for_each_rcu(rdev, mddev) |
443 | if (rdev->raid_disk >= 0 && | 443 | if (rdev->raid_disk >= 0 && |
444 | !test_bit(Faulty, &rdev->flags)) { | 444 | !test_bit(Faulty, &rdev->flags)) { |
445 | /* Take two references, one is dropped | 445 | /* Take two references, one is dropped |
@@ -749,7 +749,7 @@ static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr) | |||
749 | { | 749 | { |
750 | struct md_rdev *rdev; | 750 | struct md_rdev *rdev; |
751 | 751 | ||
752 | list_for_each_entry(rdev, &mddev->disks, same_set) | 752 | rdev_for_each(rdev, mddev) |
753 | if (rdev->desc_nr == nr) | 753 | if (rdev->desc_nr == nr) |
754 | return rdev; | 754 | return rdev; |
755 | 755 | ||
@@ -760,7 +760,7 @@ static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev) | |||
760 | { | 760 | { |
761 | struct md_rdev *rdev; | 761 | struct md_rdev *rdev; |
762 | 762 | ||
763 | list_for_each_entry(rdev, &mddev->disks, same_set) | 763 | rdev_for_each(rdev, mddev) |
764 | if (rdev->bdev->bd_dev == dev) | 764 | if (rdev->bdev->bd_dev == dev) |
765 | return rdev; | 765 | return rdev; |
766 | 766 | ||
@@ -1342,7 +1342,7 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1342 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 1342 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
1343 | 1343 | ||
1344 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 1344 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
1345 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 1345 | rdev_for_each(rdev2, mddev) { |
1346 | mdp_disk_t *d; | 1346 | mdp_disk_t *d; |
1347 | int desc_nr; | 1347 | int desc_nr; |
1348 | int is_active = test_bit(In_sync, &rdev2->flags); | 1348 | int is_active = test_bit(In_sync, &rdev2->flags); |
@@ -1805,18 +1805,18 @@ retry: | |||
1805 | | BB_LEN(internal_bb)); | 1805 | | BB_LEN(internal_bb)); |
1806 | *bbp++ = cpu_to_le64(store_bb); | 1806 | *bbp++ = cpu_to_le64(store_bb); |
1807 | } | 1807 | } |
1808 | bb->changed = 0; | ||
1808 | if (read_seqretry(&bb->lock, seq)) | 1809 | if (read_seqretry(&bb->lock, seq)) |
1809 | goto retry; | 1810 | goto retry; |
1810 | 1811 | ||
1811 | bb->sector = (rdev->sb_start + | 1812 | bb->sector = (rdev->sb_start + |
1812 | (int)le32_to_cpu(sb->bblog_offset)); | 1813 | (int)le32_to_cpu(sb->bblog_offset)); |
1813 | bb->size = le16_to_cpu(sb->bblog_size); | 1814 | bb->size = le16_to_cpu(sb->bblog_size); |
1814 | bb->changed = 0; | ||
1815 | } | 1815 | } |
1816 | } | 1816 | } |
1817 | 1817 | ||
1818 | max_dev = 0; | 1818 | max_dev = 0; |
1819 | list_for_each_entry(rdev2, &mddev->disks, same_set) | 1819 | rdev_for_each(rdev2, mddev) |
1820 | if (rdev2->desc_nr+1 > max_dev) | 1820 | if (rdev2->desc_nr+1 > max_dev) |
1821 | max_dev = rdev2->desc_nr+1; | 1821 | max_dev = rdev2->desc_nr+1; |
1822 | 1822 | ||
@@ -1833,7 +1833,7 @@ retry: | |||
1833 | for (i=0; i<max_dev;i++) | 1833 | for (i=0; i<max_dev;i++) |
1834 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1834 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1835 | 1835 | ||
1836 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 1836 | rdev_for_each(rdev2, mddev) { |
1837 | i = rdev2->desc_nr; | 1837 | i = rdev2->desc_nr; |
1838 | if (test_bit(Faulty, &rdev2->flags)) | 1838 | if (test_bit(Faulty, &rdev2->flags)) |
1839 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1839 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
@@ -1948,7 +1948,7 @@ int md_integrity_register(struct mddev *mddev) | |||
1948 | return 0; /* nothing to do */ | 1948 | return 0; /* nothing to do */ |
1949 | if (!mddev->gendisk || blk_get_integrity(mddev->gendisk)) | 1949 | if (!mddev->gendisk || blk_get_integrity(mddev->gendisk)) |
1950 | return 0; /* shouldn't register, or already is */ | 1950 | return 0; /* shouldn't register, or already is */ |
1951 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 1951 | rdev_for_each(rdev, mddev) { |
1952 | /* skip spares and non-functional disks */ | 1952 | /* skip spares and non-functional disks */ |
1953 | if (test_bit(Faulty, &rdev->flags)) | 1953 | if (test_bit(Faulty, &rdev->flags)) |
1954 | continue; | 1954 | continue; |
@@ -2175,7 +2175,7 @@ static void export_array(struct mddev *mddev) | |||
2175 | { | 2175 | { |
2176 | struct md_rdev *rdev, *tmp; | 2176 | struct md_rdev *rdev, *tmp; |
2177 | 2177 | ||
2178 | rdev_for_each(rdev, tmp, mddev) { | 2178 | rdev_for_each_safe(rdev, tmp, mddev) { |
2179 | if (!rdev->mddev) { | 2179 | if (!rdev->mddev) { |
2180 | MD_BUG(); | 2180 | MD_BUG(); |
2181 | continue; | 2181 | continue; |
@@ -2307,11 +2307,11 @@ static void md_print_devices(void) | |||
2307 | bitmap_print_sb(mddev->bitmap); | 2307 | bitmap_print_sb(mddev->bitmap); |
2308 | else | 2308 | else |
2309 | printk("%s: ", mdname(mddev)); | 2309 | printk("%s: ", mdname(mddev)); |
2310 | list_for_each_entry(rdev, &mddev->disks, same_set) | 2310 | rdev_for_each(rdev, mddev) |
2311 | printk("<%s>", bdevname(rdev->bdev,b)); | 2311 | printk("<%s>", bdevname(rdev->bdev,b)); |
2312 | printk("\n"); | 2312 | printk("\n"); |
2313 | 2313 | ||
2314 | list_for_each_entry(rdev, &mddev->disks, same_set) | 2314 | rdev_for_each(rdev, mddev) |
2315 | print_rdev(rdev, mddev->major_version); | 2315 | print_rdev(rdev, mddev->major_version); |
2316 | } | 2316 | } |
2317 | printk("md: **********************************\n"); | 2317 | printk("md: **********************************\n"); |
@@ -2328,7 +2328,7 @@ static void sync_sbs(struct mddev * mddev, int nospares) | |||
2328 | * with the rest of the array) | 2328 | * with the rest of the array) |
2329 | */ | 2329 | */ |
2330 | struct md_rdev *rdev; | 2330 | struct md_rdev *rdev; |
2331 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2331 | rdev_for_each(rdev, mddev) { |
2332 | if (rdev->sb_events == mddev->events || | 2332 | if (rdev->sb_events == mddev->events || |
2333 | (nospares && | 2333 | (nospares && |
2334 | rdev->raid_disk < 0 && | 2334 | rdev->raid_disk < 0 && |
@@ -2351,7 +2351,7 @@ static void md_update_sb(struct mddev * mddev, int force_change) | |||
2351 | 2351 | ||
2352 | repeat: | 2352 | repeat: |
2353 | /* First make sure individual recovery_offsets are correct */ | 2353 | /* First make sure individual recovery_offsets are correct */ |
2354 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2354 | rdev_for_each(rdev, mddev) { |
2355 | if (rdev->raid_disk >= 0 && | 2355 | if (rdev->raid_disk >= 0 && |
2356 | mddev->delta_disks >= 0 && | 2356 | mddev->delta_disks >= 0 && |
2357 | !test_bit(In_sync, &rdev->flags) && | 2357 | !test_bit(In_sync, &rdev->flags) && |
@@ -2364,8 +2364,9 @@ repeat: | |||
2364 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); | 2364 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); |
2365 | if (!mddev->external) { | 2365 | if (!mddev->external) { |
2366 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | 2366 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); |
2367 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2367 | rdev_for_each(rdev, mddev) { |
2368 | if (rdev->badblocks.changed) { | 2368 | if (rdev->badblocks.changed) { |
2369 | rdev->badblocks.changed = 0; | ||
2369 | md_ack_all_badblocks(&rdev->badblocks); | 2370 | md_ack_all_badblocks(&rdev->badblocks); |
2370 | md_error(mddev, rdev); | 2371 | md_error(mddev, rdev); |
2371 | } | 2372 | } |
@@ -2430,7 +2431,7 @@ repeat: | |||
2430 | mddev->events --; | 2431 | mddev->events --; |
2431 | } | 2432 | } |
2432 | 2433 | ||
2433 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2434 | rdev_for_each(rdev, mddev) { |
2434 | if (rdev->badblocks.changed) | 2435 | if (rdev->badblocks.changed) |
2435 | any_badblocks_changed++; | 2436 | any_badblocks_changed++; |
2436 | if (test_bit(Faulty, &rdev->flags)) | 2437 | if (test_bit(Faulty, &rdev->flags)) |
@@ -2444,7 +2445,7 @@ repeat: | |||
2444 | mdname(mddev), mddev->in_sync); | 2445 | mdname(mddev), mddev->in_sync); |
2445 | 2446 | ||
2446 | bitmap_update_sb(mddev->bitmap); | 2447 | bitmap_update_sb(mddev->bitmap); |
2447 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2448 | rdev_for_each(rdev, mddev) { |
2448 | char b[BDEVNAME_SIZE]; | 2449 | char b[BDEVNAME_SIZE]; |
2449 | 2450 | ||
2450 | if (rdev->sb_loaded != 1) | 2451 | if (rdev->sb_loaded != 1) |
@@ -2493,7 +2494,7 @@ repeat: | |||
2493 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 2494 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
2494 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 2495 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
2495 | 2496 | ||
2496 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2497 | rdev_for_each(rdev, mddev) { |
2497 | if (test_and_clear_bit(FaultRecorded, &rdev->flags)) | 2498 | if (test_and_clear_bit(FaultRecorded, &rdev->flags)) |
2498 | clear_bit(Blocked, &rdev->flags); | 2499 | clear_bit(Blocked, &rdev->flags); |
2499 | 2500 | ||
@@ -2896,7 +2897,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2896 | struct md_rdev *rdev2; | 2897 | struct md_rdev *rdev2; |
2897 | 2898 | ||
2898 | mddev_lock(mddev); | 2899 | mddev_lock(mddev); |
2899 | list_for_each_entry(rdev2, &mddev->disks, same_set) | 2900 | rdev_for_each(rdev2, mddev) |
2900 | if (rdev->bdev == rdev2->bdev && | 2901 | if (rdev->bdev == rdev2->bdev && |
2901 | rdev != rdev2 && | 2902 | rdev != rdev2 && |
2902 | overlaps(rdev->data_offset, rdev->sectors, | 2903 | overlaps(rdev->data_offset, rdev->sectors, |
@@ -3193,7 +3194,7 @@ static void analyze_sbs(struct mddev * mddev) | |||
3193 | char b[BDEVNAME_SIZE]; | 3194 | char b[BDEVNAME_SIZE]; |
3194 | 3195 | ||
3195 | freshest = NULL; | 3196 | freshest = NULL; |
3196 | rdev_for_each(rdev, tmp, mddev) | 3197 | rdev_for_each_safe(rdev, tmp, mddev) |
3197 | switch (super_types[mddev->major_version]. | 3198 | switch (super_types[mddev->major_version]. |
3198 | load_super(rdev, freshest, mddev->minor_version)) { | 3199 | load_super(rdev, freshest, mddev->minor_version)) { |
3199 | case 1: | 3200 | case 1: |
@@ -3214,7 +3215,7 @@ static void analyze_sbs(struct mddev * mddev) | |||
3214 | validate_super(mddev, freshest); | 3215 | validate_super(mddev, freshest); |
3215 | 3216 | ||
3216 | i = 0; | 3217 | i = 0; |
3217 | rdev_for_each(rdev, tmp, mddev) { | 3218 | rdev_for_each_safe(rdev, tmp, mddev) { |
3218 | if (mddev->max_disks && | 3219 | if (mddev->max_disks && |
3219 | (rdev->desc_nr >= mddev->max_disks || | 3220 | (rdev->desc_nr >= mddev->max_disks || |
3220 | i > mddev->max_disks)) { | 3221 | i > mddev->max_disks)) { |
@@ -3403,7 +3404,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3403 | return -EINVAL; | 3404 | return -EINVAL; |
3404 | } | 3405 | } |
3405 | 3406 | ||
3406 | list_for_each_entry(rdev, &mddev->disks, same_set) | 3407 | rdev_for_each(rdev, mddev) |
3407 | rdev->new_raid_disk = rdev->raid_disk; | 3408 | rdev->new_raid_disk = rdev->raid_disk; |
3408 | 3409 | ||
3409 | /* ->takeover must set new_* and/or delta_disks | 3410 | /* ->takeover must set new_* and/or delta_disks |
@@ -3456,7 +3457,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3456 | mddev->safemode = 0; | 3457 | mddev->safemode = 0; |
3457 | } | 3458 | } |
3458 | 3459 | ||
3459 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 3460 | rdev_for_each(rdev, mddev) { |
3460 | if (rdev->raid_disk < 0) | 3461 | if (rdev->raid_disk < 0) |
3461 | continue; | 3462 | continue; |
3462 | if (rdev->new_raid_disk >= mddev->raid_disks) | 3463 | if (rdev->new_raid_disk >= mddev->raid_disks) |
@@ -3465,7 +3466,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3465 | continue; | 3466 | continue; |
3466 | sysfs_unlink_rdev(mddev, rdev); | 3467 | sysfs_unlink_rdev(mddev, rdev); |
3467 | } | 3468 | } |
3468 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 3469 | rdev_for_each(rdev, mddev) { |
3469 | if (rdev->raid_disk < 0) | 3470 | if (rdev->raid_disk < 0) |
3470 | continue; | 3471 | continue; |
3471 | if (rdev->new_raid_disk == rdev->raid_disk) | 3472 | if (rdev->new_raid_disk == rdev->raid_disk) |
@@ -4796,7 +4797,7 @@ int md_run(struct mddev *mddev) | |||
4796 | * the only valid external interface is through the md | 4797 | * the only valid external interface is through the md |
4797 | * device. | 4798 | * device. |
4798 | */ | 4799 | */ |
4799 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4800 | rdev_for_each(rdev, mddev) { |
4800 | if (test_bit(Faulty, &rdev->flags)) | 4801 | if (test_bit(Faulty, &rdev->flags)) |
4801 | continue; | 4802 | continue; |
4802 | sync_blockdev(rdev->bdev); | 4803 | sync_blockdev(rdev->bdev); |
@@ -4867,8 +4868,8 @@ int md_run(struct mddev *mddev) | |||
4867 | struct md_rdev *rdev2; | 4868 | struct md_rdev *rdev2; |
4868 | int warned = 0; | 4869 | int warned = 0; |
4869 | 4870 | ||
4870 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4871 | rdev_for_each(rdev, mddev) |
4871 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 4872 | rdev_for_each(rdev2, mddev) { |
4872 | if (rdev < rdev2 && | 4873 | if (rdev < rdev2 && |
4873 | rdev->bdev->bd_contains == | 4874 | rdev->bdev->bd_contains == |
4874 | rdev2->bdev->bd_contains) { | 4875 | rdev2->bdev->bd_contains) { |
@@ -4945,7 +4946,7 @@ int md_run(struct mddev *mddev) | |||
4945 | mddev->in_sync = 1; | 4946 | mddev->in_sync = 1; |
4946 | smp_wmb(); | 4947 | smp_wmb(); |
4947 | mddev->ready = 1; | 4948 | mddev->ready = 1; |
4948 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4949 | rdev_for_each(rdev, mddev) |
4949 | if (rdev->raid_disk >= 0) | 4950 | if (rdev->raid_disk >= 0) |
4950 | if (sysfs_link_rdev(mddev, rdev)) | 4951 | if (sysfs_link_rdev(mddev, rdev)) |
4951 | /* failure here is OK */; | 4952 | /* failure here is OK */; |
@@ -5073,6 +5074,7 @@ static void md_clean(struct mddev *mddev) | |||
5073 | mddev->changed = 0; | 5074 | mddev->changed = 0; |
5074 | mddev->degraded = 0; | 5075 | mddev->degraded = 0; |
5075 | mddev->safemode = 0; | 5076 | mddev->safemode = 0; |
5077 | mddev->merge_check_needed = 0; | ||
5076 | mddev->bitmap_info.offset = 0; | 5078 | mddev->bitmap_info.offset = 0; |
5077 | mddev->bitmap_info.default_offset = 0; | 5079 | mddev->bitmap_info.default_offset = 0; |
5078 | mddev->bitmap_info.chunksize = 0; | 5080 | mddev->bitmap_info.chunksize = 0; |
@@ -5175,7 +5177,7 @@ static int do_md_stop(struct mddev * mddev, int mode, int is_open) | |||
5175 | /* tell userspace to handle 'inactive' */ | 5177 | /* tell userspace to handle 'inactive' */ |
5176 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 5178 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
5177 | 5179 | ||
5178 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5180 | rdev_for_each(rdev, mddev) |
5179 | if (rdev->raid_disk >= 0) | 5181 | if (rdev->raid_disk >= 0) |
5180 | sysfs_unlink_rdev(mddev, rdev); | 5182 | sysfs_unlink_rdev(mddev, rdev); |
5181 | 5183 | ||
@@ -5226,7 +5228,7 @@ static void autorun_array(struct mddev *mddev) | |||
5226 | 5228 | ||
5227 | printk(KERN_INFO "md: running: "); | 5229 | printk(KERN_INFO "md: running: "); |
5228 | 5230 | ||
5229 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5231 | rdev_for_each(rdev, mddev) { |
5230 | char b[BDEVNAME_SIZE]; | 5232 | char b[BDEVNAME_SIZE]; |
5231 | printk("<%s>", bdevname(rdev->bdev,b)); | 5233 | printk("<%s>", bdevname(rdev->bdev,b)); |
5232 | } | 5234 | } |
@@ -5356,7 +5358,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) | |||
5356 | struct md_rdev *rdev; | 5358 | struct md_rdev *rdev; |
5357 | 5359 | ||
5358 | nr=working=insync=failed=spare=0; | 5360 | nr=working=insync=failed=spare=0; |
5359 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5361 | rdev_for_each(rdev, mddev) { |
5360 | nr++; | 5362 | nr++; |
5361 | if (test_bit(Faulty, &rdev->flags)) | 5363 | if (test_bit(Faulty, &rdev->flags)) |
5362 | failed++; | 5364 | failed++; |
@@ -5923,7 +5925,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) | |||
5923 | * grow, and re-add. | 5925 | * grow, and re-add. |
5924 | */ | 5926 | */ |
5925 | return -EBUSY; | 5927 | return -EBUSY; |
5926 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5928 | rdev_for_each(rdev, mddev) { |
5927 | sector_t avail = rdev->sectors; | 5929 | sector_t avail = rdev->sectors; |
5928 | 5930 | ||
5929 | if (fit && (num_sectors == 0 || num_sectors > avail)) | 5931 | if (fit && (num_sectors == 0 || num_sectors > avail)) |
@@ -6724,7 +6726,6 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6724 | struct mddev *mddev = v; | 6726 | struct mddev *mddev = v; |
6725 | sector_t sectors; | 6727 | sector_t sectors; |
6726 | struct md_rdev *rdev; | 6728 | struct md_rdev *rdev; |
6727 | struct bitmap *bitmap; | ||
6728 | 6729 | ||
6729 | if (v == (void*)1) { | 6730 | if (v == (void*)1) { |
6730 | struct md_personality *pers; | 6731 | struct md_personality *pers; |
@@ -6758,7 +6759,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6758 | } | 6759 | } |
6759 | 6760 | ||
6760 | sectors = 0; | 6761 | sectors = 0; |
6761 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 6762 | rdev_for_each(rdev, mddev) { |
6762 | char b[BDEVNAME_SIZE]; | 6763 | char b[BDEVNAME_SIZE]; |
6763 | seq_printf(seq, " %s[%d]", | 6764 | seq_printf(seq, " %s[%d]", |
6764 | bdevname(rdev->bdev,b), rdev->desc_nr); | 6765 | bdevname(rdev->bdev,b), rdev->desc_nr); |
@@ -6812,27 +6813,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6812 | } else | 6813 | } else |
6813 | seq_printf(seq, "\n "); | 6814 | seq_printf(seq, "\n "); |
6814 | 6815 | ||
6815 | if ((bitmap = mddev->bitmap)) { | 6816 | bitmap_status(seq, mddev->bitmap); |
6816 | unsigned long chunk_kb; | ||
6817 | unsigned long flags; | ||
6818 | spin_lock_irqsave(&bitmap->lock, flags); | ||
6819 | chunk_kb = mddev->bitmap_info.chunksize >> 10; | ||
6820 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
6821 | "%lu%s chunk", | ||
6822 | bitmap->pages - bitmap->missing_pages, | ||
6823 | bitmap->pages, | ||
6824 | (bitmap->pages - bitmap->missing_pages) | ||
6825 | << (PAGE_SHIFT - 10), | ||
6826 | chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize, | ||
6827 | chunk_kb ? "KB" : "B"); | ||
6828 | if (bitmap->file) { | ||
6829 | seq_printf(seq, ", file: "); | ||
6830 | seq_path(seq, &bitmap->file->f_path, " \t\n"); | ||
6831 | } | ||
6832 | |||
6833 | seq_printf(seq, "\n"); | ||
6834 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
6835 | } | ||
6836 | 6817 | ||
6837 | seq_printf(seq, "\n"); | 6818 | seq_printf(seq, "\n"); |
6838 | } | 6819 | } |
@@ -7170,7 +7151,7 @@ void md_do_sync(struct mddev *mddev) | |||
7170 | max_sectors = mddev->dev_sectors; | 7151 | max_sectors = mddev->dev_sectors; |
7171 | j = MaxSector; | 7152 | j = MaxSector; |
7172 | rcu_read_lock(); | 7153 | rcu_read_lock(); |
7173 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | 7154 | rdev_for_each_rcu(rdev, mddev) |
7174 | if (rdev->raid_disk >= 0 && | 7155 | if (rdev->raid_disk >= 0 && |
7175 | !test_bit(Faulty, &rdev->flags) && | 7156 | !test_bit(Faulty, &rdev->flags) && |
7176 | !test_bit(In_sync, &rdev->flags) && | 7157 | !test_bit(In_sync, &rdev->flags) && |
@@ -7342,7 +7323,7 @@ void md_do_sync(struct mddev *mddev) | |||
7342 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 7323 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
7343 | mddev->curr_resync = MaxSector; | 7324 | mddev->curr_resync = MaxSector; |
7344 | rcu_read_lock(); | 7325 | rcu_read_lock(); |
7345 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | 7326 | rdev_for_each_rcu(rdev, mddev) |
7346 | if (rdev->raid_disk >= 0 && | 7327 | if (rdev->raid_disk >= 0 && |
7347 | mddev->delta_disks >= 0 && | 7328 | mddev->delta_disks >= 0 && |
7348 | !test_bit(Faulty, &rdev->flags) && | 7329 | !test_bit(Faulty, &rdev->flags) && |
@@ -7388,7 +7369,7 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
7388 | 7369 | ||
7389 | mddev->curr_resync_completed = 0; | 7370 | mddev->curr_resync_completed = 0; |
7390 | 7371 | ||
7391 | list_for_each_entry(rdev, &mddev->disks, same_set) | 7372 | rdev_for_each(rdev, mddev) |
7392 | if (rdev->raid_disk >= 0 && | 7373 | if (rdev->raid_disk >= 0 && |
7393 | !test_bit(Blocked, &rdev->flags) && | 7374 | !test_bit(Blocked, &rdev->flags) && |
7394 | (test_bit(Faulty, &rdev->flags) || | 7375 | (test_bit(Faulty, &rdev->flags) || |
@@ -7406,7 +7387,7 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
7406 | "degraded"); | 7387 | "degraded"); |
7407 | 7388 | ||
7408 | 7389 | ||
7409 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 7390 | rdev_for_each(rdev, mddev) { |
7410 | if (rdev->raid_disk >= 0 && | 7391 | if (rdev->raid_disk >= 0 && |
7411 | !test_bit(In_sync, &rdev->flags) && | 7392 | !test_bit(In_sync, &rdev->flags) && |
7412 | !test_bit(Faulty, &rdev->flags)) | 7393 | !test_bit(Faulty, &rdev->flags)) |
@@ -7451,7 +7432,7 @@ static void reap_sync_thread(struct mddev *mddev) | |||
7451 | * do the superblock for an incrementally recovered device | 7432 | * do the superblock for an incrementally recovered device |
7452 | * written out. | 7433 | * written out. |
7453 | */ | 7434 | */ |
7454 | list_for_each_entry(rdev, &mddev->disks, same_set) | 7435 | rdev_for_each(rdev, mddev) |
7455 | if (!mddev->degraded || | 7436 | if (!mddev->degraded || |
7456 | test_bit(In_sync, &rdev->flags)) | 7437 | test_bit(In_sync, &rdev->flags)) |
7457 | rdev->saved_raid_disk = -1; | 7438 | rdev->saved_raid_disk = -1; |
@@ -7529,7 +7510,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7529 | * failed devices. | 7510 | * failed devices. |
7530 | */ | 7511 | */ |
7531 | struct md_rdev *rdev; | 7512 | struct md_rdev *rdev; |
7532 | list_for_each_entry(rdev, &mddev->disks, same_set) | 7513 | rdev_for_each(rdev, mddev) |
7533 | if (rdev->raid_disk >= 0 && | 7514 | if (rdev->raid_disk >= 0 && |
7534 | !test_bit(Blocked, &rdev->flags) && | 7515 | !test_bit(Blocked, &rdev->flags) && |
7535 | test_bit(Faulty, &rdev->flags) && | 7516 | test_bit(Faulty, &rdev->flags) && |
@@ -8040,7 +8021,7 @@ void md_ack_all_badblocks(struct badblocks *bb) | |||
8040 | return; | 8021 | return; |
8041 | write_seqlock_irq(&bb->lock); | 8022 | write_seqlock_irq(&bb->lock); |
8042 | 8023 | ||
8043 | if (bb->changed == 0) { | 8024 | if (bb->changed == 0 && bb->unacked_exist) { |
8044 | u64 *p = bb->page; | 8025 | u64 *p = bb->page; |
8045 | int i; | 8026 | int i; |
8046 | for (i = 0; i < bb->count ; i++) { | 8027 | for (i = 0; i < bb->count ; i++) { |
@@ -8157,30 +8138,23 @@ static int md_notify_reboot(struct notifier_block *this, | |||
8157 | struct mddev *mddev; | 8138 | struct mddev *mddev; |
8158 | int need_delay = 0; | 8139 | int need_delay = 0; |
8159 | 8140 | ||
8160 | if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) { | 8141 | for_each_mddev(mddev, tmp) { |
8161 | 8142 | if (mddev_trylock(mddev)) { | |
8162 | printk(KERN_INFO "md: stopping all md devices.\n"); | 8143 | __md_stop_writes(mddev); |
8163 | 8144 | mddev->safemode = 2; | |
8164 | for_each_mddev(mddev, tmp) { | 8145 | mddev_unlock(mddev); |
8165 | if (mddev_trylock(mddev)) { | ||
8166 | /* Force a switch to readonly even array | ||
8167 | * appears to still be in use. Hence | ||
8168 | * the '100'. | ||
8169 | */ | ||
8170 | md_set_readonly(mddev, 100); | ||
8171 | mddev_unlock(mddev); | ||
8172 | } | ||
8173 | need_delay = 1; | ||
8174 | } | 8146 | } |
8175 | /* | 8147 | need_delay = 1; |
8176 | * certain more exotic SCSI devices are known to be | ||
8177 | * volatile wrt too early system reboots. While the | ||
8178 | * right place to handle this issue is the given | ||
8179 | * driver, we do want to have a safe RAID driver ... | ||
8180 | */ | ||
8181 | if (need_delay) | ||
8182 | mdelay(1000*1); | ||
8183 | } | 8148 | } |
8149 | /* | ||
8150 | * certain more exotic SCSI devices are known to be | ||
8151 | * volatile wrt too early system reboots. While the | ||
8152 | * right place to handle this issue is the given | ||
8153 | * driver, we do want to have a safe RAID driver ... | ||
8154 | */ | ||
8155 | if (need_delay) | ||
8156 | mdelay(1000*1); | ||
8157 | |||
8184 | return NOTIFY_DONE; | 8158 | return NOTIFY_DONE; |
8185 | } | 8159 | } |
8186 | 8160 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 44c63dfeeb2b..1c2063ccf48e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -128,6 +128,10 @@ struct md_rdev { | |||
128 | enum flag_bits { | 128 | enum flag_bits { |
129 | Faulty, /* device is known to have a fault */ | 129 | Faulty, /* device is known to have a fault */ |
130 | In_sync, /* device is in_sync with rest of array */ | 130 | In_sync, /* device is in_sync with rest of array */ |
131 | Unmerged, /* device is being added to array and should | ||
132 | * be considerred for bvec_merge_fn but not | ||
133 | * yet for actual IO | ||
134 | */ | ||
131 | WriteMostly, /* Avoid reading if at all possible */ | 135 | WriteMostly, /* Avoid reading if at all possible */ |
132 | AutoDetected, /* added by auto-detect */ | 136 | AutoDetected, /* added by auto-detect */ |
133 | Blocked, /* An error occurred but has not yet | 137 | Blocked, /* An error occurred but has not yet |
@@ -345,6 +349,10 @@ struct mddev { | |||
345 | int degraded; /* whether md should consider | 349 | int degraded; /* whether md should consider |
346 | * adding a spare | 350 | * adding a spare |
347 | */ | 351 | */ |
352 | int merge_check_needed; /* at least one | ||
353 | * member device | ||
354 | * has a | ||
355 | * merge_bvec_fn */ | ||
348 | 356 | ||
349 | atomic_t recovery_active; /* blocks scheduled, but not written */ | 357 | atomic_t recovery_active; /* blocks scheduled, but not written */ |
350 | wait_queue_head_t recovery_wait; | 358 | wait_queue_head_t recovery_wait; |
@@ -519,7 +527,10 @@ static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) | |||
519 | /* | 527 | /* |
520 | * iterates through the 'same array disks' ringlist | 528 | * iterates through the 'same array disks' ringlist |
521 | */ | 529 | */ |
522 | #define rdev_for_each(rdev, tmp, mddev) \ | 530 | #define rdev_for_each(rdev, mddev) \ |
531 | list_for_each_entry(rdev, &((mddev)->disks), same_set) | ||
532 | |||
533 | #define rdev_for_each_safe(rdev, tmp, mddev) \ | ||
523 | list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) | 534 | list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) |
524 | 535 | ||
525 | #define rdev_for_each_rcu(rdev, mddev) \ | 536 | #define rdev_for_each_rcu(rdev, mddev) \ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index a222f516660e..9339e67fcc79 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -428,7 +428,7 @@ static int multipath_run (struct mddev *mddev) | |||
428 | } | 428 | } |
429 | 429 | ||
430 | working_disks = 0; | 430 | working_disks = 0; |
431 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 431 | rdev_for_each(rdev, mddev) { |
432 | disk_idx = rdev->raid_disk; | 432 | disk_idx = rdev->raid_disk; |
433 | if (disk_idx < 0 || | 433 | if (disk_idx < 0 || |
434 | disk_idx >= mddev->raid_disks) | 434 | disk_idx >= mddev->raid_disks) |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7294bd115e34..6f31f5596e01 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -91,7 +91,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
91 | 91 | ||
92 | if (!conf) | 92 | if (!conf) |
93 | return -ENOMEM; | 93 | return -ENOMEM; |
94 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 94 | rdev_for_each(rdev1, mddev) { |
95 | pr_debug("md/raid0:%s: looking at %s\n", | 95 | pr_debug("md/raid0:%s: looking at %s\n", |
96 | mdname(mddev), | 96 | mdname(mddev), |
97 | bdevname(rdev1->bdev, b)); | 97 | bdevname(rdev1->bdev, b)); |
@@ -102,7 +102,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
102 | sector_div(sectors, mddev->chunk_sectors); | 102 | sector_div(sectors, mddev->chunk_sectors); |
103 | rdev1->sectors = sectors * mddev->chunk_sectors; | 103 | rdev1->sectors = sectors * mddev->chunk_sectors; |
104 | 104 | ||
105 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 105 | rdev_for_each(rdev2, mddev) { |
106 | pr_debug("md/raid0:%s: comparing %s(%llu)" | 106 | pr_debug("md/raid0:%s: comparing %s(%llu)" |
107 | " with %s(%llu)\n", | 107 | " with %s(%llu)\n", |
108 | mdname(mddev), | 108 | mdname(mddev), |
@@ -157,7 +157,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
157 | smallest = NULL; | 157 | smallest = NULL; |
158 | dev = conf->devlist; | 158 | dev = conf->devlist; |
159 | err = -EINVAL; | 159 | err = -EINVAL; |
160 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 160 | rdev_for_each(rdev1, mddev) { |
161 | int j = rdev1->raid_disk; | 161 | int j = rdev1->raid_disk; |
162 | 162 | ||
163 | if (mddev->level == 10) { | 163 | if (mddev->level == 10) { |
@@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
188 | 188 | ||
189 | disk_stack_limits(mddev->gendisk, rdev1->bdev, | 189 | disk_stack_limits(mddev->gendisk, rdev1->bdev, |
190 | rdev1->data_offset << 9); | 190 | rdev1->data_offset << 9); |
191 | /* as we don't honour merge_bvec_fn, we must never risk | ||
192 | * violating it, so limit ->max_segments to 1, lying within | ||
193 | * a single page. | ||
194 | */ | ||
195 | 191 | ||
196 | if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { | 192 | if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) |
197 | blk_queue_max_segments(mddev->queue, 1); | 193 | conf->has_merge_bvec = 1; |
198 | blk_queue_segment_boundary(mddev->queue, | 194 | |
199 | PAGE_CACHE_SIZE - 1); | ||
200 | } | ||
201 | if (!smallest || (rdev1->sectors < smallest->sectors)) | 195 | if (!smallest || (rdev1->sectors < smallest->sectors)) |
202 | smallest = rdev1; | 196 | smallest = rdev1; |
203 | cnt++; | 197 | cnt++; |
@@ -290,8 +284,64 @@ abort: | |||
290 | return err; | 284 | return err; |
291 | } | 285 | } |
292 | 286 | ||
287 | /* Find the zone which holds a particular offset | ||
288 | * Update *sectorp to be an offset in that zone | ||
289 | */ | ||
290 | static struct strip_zone *find_zone(struct r0conf *conf, | ||
291 | sector_t *sectorp) | ||
292 | { | ||
293 | int i; | ||
294 | struct strip_zone *z = conf->strip_zone; | ||
295 | sector_t sector = *sectorp; | ||
296 | |||
297 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
298 | if (sector < z[i].zone_end) { | ||
299 | if (i) | ||
300 | *sectorp = sector - z[i-1].zone_end; | ||
301 | return z + i; | ||
302 | } | ||
303 | BUG(); | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * remaps the bio to the target device. we separate two flows. | ||
308 | * power 2 flow and a general flow for the sake of perfromance | ||
309 | */ | ||
310 | static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | ||
311 | sector_t sector, sector_t *sector_offset) | ||
312 | { | ||
313 | unsigned int sect_in_chunk; | ||
314 | sector_t chunk; | ||
315 | struct r0conf *conf = mddev->private; | ||
316 | int raid_disks = conf->strip_zone[0].nb_dev; | ||
317 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
318 | |||
319 | if (is_power_of_2(chunk_sects)) { | ||
320 | int chunksect_bits = ffz(~chunk_sects); | ||
321 | /* find the sector offset inside the chunk */ | ||
322 | sect_in_chunk = sector & (chunk_sects - 1); | ||
323 | sector >>= chunksect_bits; | ||
324 | /* chunk in zone */ | ||
325 | chunk = *sector_offset; | ||
326 | /* quotient is the chunk in real device*/ | ||
327 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
328 | } else{ | ||
329 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
330 | chunk = *sector_offset; | ||
331 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
332 | } | ||
333 | /* | ||
334 | * position the bio over the real device | ||
335 | * real sector = chunk in device + starting of zone | ||
336 | * + the position in the chunk | ||
337 | */ | ||
338 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
339 | return conf->devlist[(zone - conf->strip_zone)*raid_disks | ||
340 | + sector_div(sector, zone->nb_dev)]; | ||
341 | } | ||
342 | |||
293 | /** | 343 | /** |
294 | * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged | 344 | * raid0_mergeable_bvec -- tell bio layer if two requests can be merged |
295 | * @q: request queue | 345 | * @q: request queue |
296 | * @bvm: properties of new bio | 346 | * @bvm: properties of new bio |
297 | * @biovec: the request that could be merged to it. | 347 | * @biovec: the request that could be merged to it. |
@@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
303 | struct bio_vec *biovec) | 353 | struct bio_vec *biovec) |
304 | { | 354 | { |
305 | struct mddev *mddev = q->queuedata; | 355 | struct mddev *mddev = q->queuedata; |
356 | struct r0conf *conf = mddev->private; | ||
306 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 357 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
358 | sector_t sector_offset = sector; | ||
307 | int max; | 359 | int max; |
308 | unsigned int chunk_sectors = mddev->chunk_sectors; | 360 | unsigned int chunk_sectors = mddev->chunk_sectors; |
309 | unsigned int bio_sectors = bvm->bi_size >> 9; | 361 | unsigned int bio_sectors = bvm->bi_size >> 9; |
362 | struct strip_zone *zone; | ||
363 | struct md_rdev *rdev; | ||
364 | struct request_queue *subq; | ||
310 | 365 | ||
311 | if (is_power_of_2(chunk_sectors)) | 366 | if (is_power_of_2(chunk_sectors)) |
312 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) | 367 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) |
@@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
314 | else | 369 | else |
315 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) | 370 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) |
316 | + bio_sectors)) << 9; | 371 | + bio_sectors)) << 9; |
317 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 372 | if (max < 0) |
373 | max = 0; /* bio_add cannot handle a negative return */ | ||
318 | if (max <= biovec->bv_len && bio_sectors == 0) | 374 | if (max <= biovec->bv_len && bio_sectors == 0) |
319 | return biovec->bv_len; | 375 | return biovec->bv_len; |
320 | else | 376 | if (max < biovec->bv_len) |
377 | /* too small already, no need to check further */ | ||
378 | return max; | ||
379 | if (!conf->has_merge_bvec) | ||
380 | return max; | ||
381 | |||
382 | /* May need to check subordinate device */ | ||
383 | sector = sector_offset; | ||
384 | zone = find_zone(mddev->private, §or_offset); | ||
385 | rdev = map_sector(mddev, zone, sector, §or_offset); | ||
386 | subq = bdev_get_queue(rdev->bdev); | ||
387 | if (subq->merge_bvec_fn) { | ||
388 | bvm->bi_bdev = rdev->bdev; | ||
389 | bvm->bi_sector = sector_offset + zone->dev_start + | ||
390 | rdev->data_offset; | ||
391 | return min(max, subq->merge_bvec_fn(subq, bvm, biovec)); | ||
392 | } else | ||
321 | return max; | 393 | return max; |
322 | } | 394 | } |
323 | 395 | ||
@@ -329,7 +401,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks | |||
329 | WARN_ONCE(sectors || raid_disks, | 401 | WARN_ONCE(sectors || raid_disks, |
330 | "%s does not support generic reshape\n", __func__); | 402 | "%s does not support generic reshape\n", __func__); |
331 | 403 | ||
332 | list_for_each_entry(rdev, &mddev->disks, same_set) | 404 | rdev_for_each(rdev, mddev) |
333 | array_sectors += rdev->sectors; | 405 | array_sectors += rdev->sectors; |
334 | 406 | ||
335 | return array_sectors; | 407 | return array_sectors; |
@@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev) | |||
397 | return 0; | 469 | return 0; |
398 | } | 470 | } |
399 | 471 | ||
400 | /* Find the zone which holds a particular offset | ||
401 | * Update *sectorp to be an offset in that zone | ||
402 | */ | ||
403 | static struct strip_zone *find_zone(struct r0conf *conf, | ||
404 | sector_t *sectorp) | ||
405 | { | ||
406 | int i; | ||
407 | struct strip_zone *z = conf->strip_zone; | ||
408 | sector_t sector = *sectorp; | ||
409 | |||
410 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
411 | if (sector < z[i].zone_end) { | ||
412 | if (i) | ||
413 | *sectorp = sector - z[i-1].zone_end; | ||
414 | return z + i; | ||
415 | } | ||
416 | BUG(); | ||
417 | } | ||
418 | |||
419 | /* | ||
420 | * remaps the bio to the target device. we separate two flows. | ||
421 | * power 2 flow and a general flow for the sake of perfromance | ||
422 | */ | ||
423 | static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | ||
424 | sector_t sector, sector_t *sector_offset) | ||
425 | { | ||
426 | unsigned int sect_in_chunk; | ||
427 | sector_t chunk; | ||
428 | struct r0conf *conf = mddev->private; | ||
429 | int raid_disks = conf->strip_zone[0].nb_dev; | ||
430 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
431 | |||
432 | if (is_power_of_2(chunk_sects)) { | ||
433 | int chunksect_bits = ffz(~chunk_sects); | ||
434 | /* find the sector offset inside the chunk */ | ||
435 | sect_in_chunk = sector & (chunk_sects - 1); | ||
436 | sector >>= chunksect_bits; | ||
437 | /* chunk in zone */ | ||
438 | chunk = *sector_offset; | ||
439 | /* quotient is the chunk in real device*/ | ||
440 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
441 | } else{ | ||
442 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
443 | chunk = *sector_offset; | ||
444 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
445 | } | ||
446 | /* | ||
447 | * position the bio over the real device | ||
448 | * real sector = chunk in device + starting of zone | ||
449 | * + the position in the chunk | ||
450 | */ | ||
451 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
452 | return conf->devlist[(zone - conf->strip_zone)*raid_disks | ||
453 | + sector_div(sector, zone->nb_dev)]; | ||
454 | } | ||
455 | |||
456 | /* | 472 | /* |
457 | * Is io distribute over 1 or more chunks ? | 473 | * Is io distribute over 1 or more chunks ? |
458 | */ | 474 | */ |
@@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) | |||
505 | } | 521 | } |
506 | 522 | ||
507 | sector_offset = bio->bi_sector; | 523 | sector_offset = bio->bi_sector; |
508 | zone = find_zone(mddev->private, §or_offset); | 524 | zone = find_zone(mddev->private, §or_offset); |
509 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, | 525 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, |
510 | §or_offset); | 526 | §or_offset); |
511 | bio->bi_bdev = tmp_dev->bdev; | 527 | bio->bi_bdev = tmp_dev->bdev; |
@@ -543,7 +559,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) | |||
543 | return ERR_PTR(-EINVAL); | 559 | return ERR_PTR(-EINVAL); |
544 | } | 560 | } |
545 | 561 | ||
546 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 562 | rdev_for_each(rdev, mddev) { |
547 | /* check slot number for a disk */ | 563 | /* check slot number for a disk */ |
548 | if (rdev->raid_disk == mddev->raid_disks-1) { | 564 | if (rdev->raid_disk == mddev->raid_disks-1) { |
549 | printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n", | 565 | printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n", |
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 0884bba8df4c..05539d9c97f0 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h | |||
@@ -4,13 +4,16 @@ | |||
4 | struct strip_zone { | 4 | struct strip_zone { |
5 | sector_t zone_end; /* Start of the next zone (in sectors) */ | 5 | sector_t zone_end; /* Start of the next zone (in sectors) */ |
6 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ | 6 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ |
7 | int nb_dev; /* # of devices attached to the zone */ | 7 | int nb_dev; /* # of devices attached to the zone */ |
8 | }; | 8 | }; |
9 | 9 | ||
10 | struct r0conf { | 10 | struct r0conf { |
11 | struct strip_zone *strip_zone; | 11 | struct strip_zone *strip_zone; |
12 | struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ | 12 | struct md_rdev **devlist; /* lists of rdevs, pointed to |
13 | int nr_strip_zones; | 13 | * by strip_zone->dev */ |
14 | int nr_strip_zones; | ||
15 | int has_merge_bvec; /* at least one member has | ||
16 | * a merge_bvec_fn */ | ||
14 | }; | 17 | }; |
15 | 18 | ||
16 | #endif | 19 | #endif |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a0b225eb4ac4..4a40a200d769 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
524 | if (r1_bio->bios[disk] == IO_BLOCKED | 524 | if (r1_bio->bios[disk] == IO_BLOCKED |
525 | || rdev == NULL | 525 | || rdev == NULL |
526 | || test_bit(Unmerged, &rdev->flags) | ||
526 | || test_bit(Faulty, &rdev->flags)) | 527 | || test_bit(Faulty, &rdev->flags)) |
527 | continue; | 528 | continue; |
528 | if (!test_bit(In_sync, &rdev->flags) && | 529 | if (!test_bit(In_sync, &rdev->flags) && |
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
614 | return best_disk; | 615 | return best_disk; |
615 | } | 616 | } |
616 | 617 | ||
618 | static int raid1_mergeable_bvec(struct request_queue *q, | ||
619 | struct bvec_merge_data *bvm, | ||
620 | struct bio_vec *biovec) | ||
621 | { | ||
622 | struct mddev *mddev = q->queuedata; | ||
623 | struct r1conf *conf = mddev->private; | ||
624 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | ||
625 | int max = biovec->bv_len; | ||
626 | |||
627 | if (mddev->merge_check_needed) { | ||
628 | int disk; | ||
629 | rcu_read_lock(); | ||
630 | for (disk = 0; disk < conf->raid_disks * 2; disk++) { | ||
631 | struct md_rdev *rdev = rcu_dereference( | ||
632 | conf->mirrors[disk].rdev); | ||
633 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
634 | struct request_queue *q = | ||
635 | bdev_get_queue(rdev->bdev); | ||
636 | if (q->merge_bvec_fn) { | ||
637 | bvm->bi_sector = sector + | ||
638 | rdev->data_offset; | ||
639 | bvm->bi_bdev = rdev->bdev; | ||
640 | max = min(max, q->merge_bvec_fn( | ||
641 | q, bvm, biovec)); | ||
642 | } | ||
643 | } | ||
644 | } | ||
645 | rcu_read_unlock(); | ||
646 | } | ||
647 | return max; | ||
648 | |||
649 | } | ||
650 | |||
617 | int md_raid1_congested(struct mddev *mddev, int bits) | 651 | int md_raid1_congested(struct mddev *mddev, int bits) |
618 | { | 652 | { |
619 | struct r1conf *conf = mddev->private; | 653 | struct r1conf *conf = mddev->private; |
@@ -737,9 +771,22 @@ static void wait_barrier(struct r1conf *conf) | |||
737 | spin_lock_irq(&conf->resync_lock); | 771 | spin_lock_irq(&conf->resync_lock); |
738 | if (conf->barrier) { | 772 | if (conf->barrier) { |
739 | conf->nr_waiting++; | 773 | conf->nr_waiting++; |
740 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 774 | /* Wait for the barrier to drop. |
775 | * However if there are already pending | ||
776 | * requests (preventing the barrier from | ||
777 | * rising completely), and the | ||
778 | * pre-process bio queue isn't empty, | ||
779 | * then don't wait, as we need to empty | ||
780 | * that queue to get the nr_pending | ||
781 | * count down. | ||
782 | */ | ||
783 | wait_event_lock_irq(conf->wait_barrier, | ||
784 | !conf->barrier || | ||
785 | (conf->nr_pending && | ||
786 | current->bio_list && | ||
787 | !bio_list_empty(current->bio_list)), | ||
741 | conf->resync_lock, | 788 | conf->resync_lock, |
742 | ); | 789 | ); |
743 | conf->nr_waiting--; | 790 | conf->nr_waiting--; |
744 | } | 791 | } |
745 | conf->nr_pending++; | 792 | conf->nr_pending++; |
@@ -1002,7 +1049,8 @@ read_again: | |||
1002 | break; | 1049 | break; |
1003 | } | 1050 | } |
1004 | r1_bio->bios[i] = NULL; | 1051 | r1_bio->bios[i] = NULL; |
1005 | if (!rdev || test_bit(Faulty, &rdev->flags)) { | 1052 | if (!rdev || test_bit(Faulty, &rdev->flags) |
1053 | || test_bit(Unmerged, &rdev->flags)) { | ||
1006 | if (i < conf->raid_disks) | 1054 | if (i < conf->raid_disks) |
1007 | set_bit(R1BIO_Degraded, &r1_bio->state); | 1055 | set_bit(R1BIO_Degraded, &r1_bio->state); |
1008 | continue; | 1056 | continue; |
@@ -1322,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1322 | struct mirror_info *p; | 1370 | struct mirror_info *p; |
1323 | int first = 0; | 1371 | int first = 0; |
1324 | int last = conf->raid_disks - 1; | 1372 | int last = conf->raid_disks - 1; |
1373 | struct request_queue *q = bdev_get_queue(rdev->bdev); | ||
1325 | 1374 | ||
1326 | if (mddev->recovery_disabled == conf->recovery_disabled) | 1375 | if (mddev->recovery_disabled == conf->recovery_disabled) |
1327 | return -EBUSY; | 1376 | return -EBUSY; |
@@ -1329,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1329 | if (rdev->raid_disk >= 0) | 1378 | if (rdev->raid_disk >= 0) |
1330 | first = last = rdev->raid_disk; | 1379 | first = last = rdev->raid_disk; |
1331 | 1380 | ||
1381 | if (q->merge_bvec_fn) { | ||
1382 | set_bit(Unmerged, &rdev->flags); | ||
1383 | mddev->merge_check_needed = 1; | ||
1384 | } | ||
1385 | |||
1332 | for (mirror = first; mirror <= last; mirror++) { | 1386 | for (mirror = first; mirror <= last; mirror++) { |
1333 | p = conf->mirrors+mirror; | 1387 | p = conf->mirrors+mirror; |
1334 | if (!p->rdev) { | 1388 | if (!p->rdev) { |
1335 | 1389 | ||
1336 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1390 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1337 | rdev->data_offset << 9); | 1391 | rdev->data_offset << 9); |
1338 | /* as we don't honour merge_bvec_fn, we must | ||
1339 | * never risk violating it, so limit | ||
1340 | * ->max_segments to one lying with a single | ||
1341 | * page, as a one page request is never in | ||
1342 | * violation. | ||
1343 | */ | ||
1344 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1345 | blk_queue_max_segments(mddev->queue, 1); | ||
1346 | blk_queue_segment_boundary(mddev->queue, | ||
1347 | PAGE_CACHE_SIZE - 1); | ||
1348 | } | ||
1349 | 1392 | ||
1350 | p->head_position = 0; | 1393 | p->head_position = 0; |
1351 | rdev->raid_disk = mirror; | 1394 | rdev->raid_disk = mirror; |
@@ -1370,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1370 | break; | 1413 | break; |
1371 | } | 1414 | } |
1372 | } | 1415 | } |
1416 | if (err == 0 && test_bit(Unmerged, &rdev->flags)) { | ||
1417 | /* Some requests might not have seen this new | ||
1418 | * merge_bvec_fn. We must wait for them to complete | ||
1419 | * before merging the device fully. | ||
1420 | * First we make sure any code which has tested | ||
1421 | * our function has submitted the request, then | ||
1422 | * we wait for all outstanding requests to complete. | ||
1423 | */ | ||
1424 | synchronize_sched(); | ||
1425 | raise_barrier(conf); | ||
1426 | lower_barrier(conf); | ||
1427 | clear_bit(Unmerged, &rdev->flags); | ||
1428 | } | ||
1373 | md_integrity_add_rdev(rdev, mddev); | 1429 | md_integrity_add_rdev(rdev, mddev); |
1374 | print_conf(conf); | 1430 | print_conf(conf); |
1375 | return err; | 1431 | return err; |
@@ -2491,7 +2547,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2491 | 2547 | ||
2492 | err = -EINVAL; | 2548 | err = -EINVAL; |
2493 | spin_lock_init(&conf->device_lock); | 2549 | spin_lock_init(&conf->device_lock); |
2494 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2550 | rdev_for_each(rdev, mddev) { |
2495 | int disk_idx = rdev->raid_disk; | 2551 | int disk_idx = rdev->raid_disk; |
2496 | if (disk_idx >= mddev->raid_disks | 2552 | if (disk_idx >= mddev->raid_disks |
2497 | || disk_idx < 0) | 2553 | || disk_idx < 0) |
@@ -2609,20 +2665,11 @@ static int run(struct mddev *mddev) | |||
2609 | if (IS_ERR(conf)) | 2665 | if (IS_ERR(conf)) |
2610 | return PTR_ERR(conf); | 2666 | return PTR_ERR(conf); |
2611 | 2667 | ||
2612 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2668 | rdev_for_each(rdev, mddev) { |
2613 | if (!mddev->gendisk) | 2669 | if (!mddev->gendisk) |
2614 | continue; | 2670 | continue; |
2615 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 2671 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
2616 | rdev->data_offset << 9); | 2672 | rdev->data_offset << 9); |
2617 | /* as we don't honour merge_bvec_fn, we must never risk | ||
2618 | * violating it, so limit ->max_segments to 1 lying within | ||
2619 | * a single page, as a one page request is never in violation. | ||
2620 | */ | ||
2621 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
2622 | blk_queue_max_segments(mddev->queue, 1); | ||
2623 | blk_queue_segment_boundary(mddev->queue, | ||
2624 | PAGE_CACHE_SIZE - 1); | ||
2625 | } | ||
2626 | } | 2673 | } |
2627 | 2674 | ||
2628 | mddev->degraded = 0; | 2675 | mddev->degraded = 0; |
@@ -2656,6 +2703,7 @@ static int run(struct mddev *mddev) | |||
2656 | if (mddev->queue) { | 2703 | if (mddev->queue) { |
2657 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2704 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
2658 | mddev->queue->backing_dev_info.congested_data = mddev; | 2705 | mddev->queue->backing_dev_info.congested_data = mddev; |
2706 | blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); | ||
2659 | } | 2707 | } |
2660 | return md_integrity_register(mddev); | 2708 | return md_integrity_register(mddev); |
2661 | } | 2709 | } |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 58c44d6453a0..3540316886f2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -586,25 +586,68 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
586 | * @biovec: the request that could be merged to it. | 586 | * @biovec: the request that could be merged to it. |
587 | * | 587 | * |
588 | * Return amount of bytes we can accept at this offset | 588 | * Return amount of bytes we can accept at this offset |
589 | * If near_copies == raid_disk, there are no striping issues, | 589 | * This requires checking for end-of-chunk if near_copies != raid_disks, |
590 | * but in that case, the function isn't called at all. | 590 | * and for subordinate merge_bvec_fns if merge_check_needed. |
591 | */ | 591 | */ |
592 | static int raid10_mergeable_bvec(struct request_queue *q, | 592 | static int raid10_mergeable_bvec(struct request_queue *q, |
593 | struct bvec_merge_data *bvm, | 593 | struct bvec_merge_data *bvm, |
594 | struct bio_vec *biovec) | 594 | struct bio_vec *biovec) |
595 | { | 595 | { |
596 | struct mddev *mddev = q->queuedata; | 596 | struct mddev *mddev = q->queuedata; |
597 | struct r10conf *conf = mddev->private; | ||
597 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 598 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
598 | int max; | 599 | int max; |
599 | unsigned int chunk_sectors = mddev->chunk_sectors; | 600 | unsigned int chunk_sectors = mddev->chunk_sectors; |
600 | unsigned int bio_sectors = bvm->bi_size >> 9; | 601 | unsigned int bio_sectors = bvm->bi_size >> 9; |
601 | 602 | ||
602 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 603 | if (conf->near_copies < conf->raid_disks) { |
603 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 604 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) |
604 | if (max <= biovec->bv_len && bio_sectors == 0) | 605 | + bio_sectors)) << 9; |
605 | return biovec->bv_len; | 606 | if (max < 0) |
606 | else | 607 | /* bio_add cannot handle a negative return */ |
607 | return max; | 608 | max = 0; |
609 | if (max <= biovec->bv_len && bio_sectors == 0) | ||
610 | return biovec->bv_len; | ||
611 | } else | ||
612 | max = biovec->bv_len; | ||
613 | |||
614 | if (mddev->merge_check_needed) { | ||
615 | struct r10bio r10_bio; | ||
616 | int s; | ||
617 | r10_bio.sector = sector; | ||
618 | raid10_find_phys(conf, &r10_bio); | ||
619 | rcu_read_lock(); | ||
620 | for (s = 0; s < conf->copies; s++) { | ||
621 | int disk = r10_bio.devs[s].devnum; | ||
622 | struct md_rdev *rdev = rcu_dereference( | ||
623 | conf->mirrors[disk].rdev); | ||
624 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
625 | struct request_queue *q = | ||
626 | bdev_get_queue(rdev->bdev); | ||
627 | if (q->merge_bvec_fn) { | ||
628 | bvm->bi_sector = r10_bio.devs[s].addr | ||
629 | + rdev->data_offset; | ||
630 | bvm->bi_bdev = rdev->bdev; | ||
631 | max = min(max, q->merge_bvec_fn( | ||
632 | q, bvm, biovec)); | ||
633 | } | ||
634 | } | ||
635 | rdev = rcu_dereference(conf->mirrors[disk].replacement); | ||
636 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
637 | struct request_queue *q = | ||
638 | bdev_get_queue(rdev->bdev); | ||
639 | if (q->merge_bvec_fn) { | ||
640 | bvm->bi_sector = r10_bio.devs[s].addr | ||
641 | + rdev->data_offset; | ||
642 | bvm->bi_bdev = rdev->bdev; | ||
643 | max = min(max, q->merge_bvec_fn( | ||
644 | q, bvm, biovec)); | ||
645 | } | ||
646 | } | ||
647 | } | ||
648 | rcu_read_unlock(); | ||
649 | } | ||
650 | return max; | ||
608 | } | 651 | } |
609 | 652 | ||
610 | /* | 653 | /* |
@@ -668,11 +711,12 @@ retry: | |||
668 | disk = r10_bio->devs[slot].devnum; | 711 | disk = r10_bio->devs[slot].devnum; |
669 | rdev = rcu_dereference(conf->mirrors[disk].replacement); | 712 | rdev = rcu_dereference(conf->mirrors[disk].replacement); |
670 | if (rdev == NULL || test_bit(Faulty, &rdev->flags) || | 713 | if (rdev == NULL || test_bit(Faulty, &rdev->flags) || |
714 | test_bit(Unmerged, &rdev->flags) || | ||
671 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) | 715 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) |
672 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 716 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
673 | if (rdev == NULL) | 717 | if (rdev == NULL || |
674 | continue; | 718 | test_bit(Faulty, &rdev->flags) || |
675 | if (test_bit(Faulty, &rdev->flags)) | 719 | test_bit(Unmerged, &rdev->flags)) |
676 | continue; | 720 | continue; |
677 | if (!test_bit(In_sync, &rdev->flags) && | 721 | if (!test_bit(In_sync, &rdev->flags) && |
678 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) | 722 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) |
@@ -863,9 +907,22 @@ static void wait_barrier(struct r10conf *conf) | |||
863 | spin_lock_irq(&conf->resync_lock); | 907 | spin_lock_irq(&conf->resync_lock); |
864 | if (conf->barrier) { | 908 | if (conf->barrier) { |
865 | conf->nr_waiting++; | 909 | conf->nr_waiting++; |
866 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 910 | /* Wait for the barrier to drop. |
911 | * However if there are already pending | ||
912 | * requests (preventing the barrier from | ||
913 | * rising completely), and the | ||
914 | * pre-process bio queue isn't empty, | ||
915 | * then don't wait, as we need to empty | ||
916 | * that queue to get the nr_pending | ||
917 | * count down. | ||
918 | */ | ||
919 | wait_event_lock_irq(conf->wait_barrier, | ||
920 | !conf->barrier || | ||
921 | (conf->nr_pending && | ||
922 | current->bio_list && | ||
923 | !bio_list_empty(current->bio_list)), | ||
867 | conf->resync_lock, | 924 | conf->resync_lock, |
868 | ); | 925 | ); |
869 | conf->nr_waiting--; | 926 | conf->nr_waiting--; |
870 | } | 927 | } |
871 | conf->nr_pending++; | 928 | conf->nr_pending++; |
@@ -1121,12 +1178,14 @@ retry_write: | |||
1121 | blocked_rdev = rrdev; | 1178 | blocked_rdev = rrdev; |
1122 | break; | 1179 | break; |
1123 | } | 1180 | } |
1124 | if (rrdev && test_bit(Faulty, &rrdev->flags)) | 1181 | if (rrdev && (test_bit(Faulty, &rrdev->flags) |
1182 | || test_bit(Unmerged, &rrdev->flags))) | ||
1125 | rrdev = NULL; | 1183 | rrdev = NULL; |
1126 | 1184 | ||
1127 | r10_bio->devs[i].bio = NULL; | 1185 | r10_bio->devs[i].bio = NULL; |
1128 | r10_bio->devs[i].repl_bio = NULL; | 1186 | r10_bio->devs[i].repl_bio = NULL; |
1129 | if (!rdev || test_bit(Faulty, &rdev->flags)) { | 1187 | if (!rdev || test_bit(Faulty, &rdev->flags) || |
1188 | test_bit(Unmerged, &rdev->flags)) { | ||
1130 | set_bit(R10BIO_Degraded, &r10_bio->state); | 1189 | set_bit(R10BIO_Degraded, &r10_bio->state); |
1131 | continue; | 1190 | continue; |
1132 | } | 1191 | } |
@@ -1477,18 +1536,24 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1477 | int mirror; | 1536 | int mirror; |
1478 | int first = 0; | 1537 | int first = 0; |
1479 | int last = conf->raid_disks - 1; | 1538 | int last = conf->raid_disks - 1; |
1539 | struct request_queue *q = bdev_get_queue(rdev->bdev); | ||
1480 | 1540 | ||
1481 | if (mddev->recovery_cp < MaxSector) | 1541 | if (mddev->recovery_cp < MaxSector) |
1482 | /* only hot-add to in-sync arrays, as recovery is | 1542 | /* only hot-add to in-sync arrays, as recovery is |
1483 | * very different from resync | 1543 | * very different from resync |
1484 | */ | 1544 | */ |
1485 | return -EBUSY; | 1545 | return -EBUSY; |
1486 | if (!enough(conf, -1)) | 1546 | if (rdev->saved_raid_disk < 0 && !enough(conf, -1)) |
1487 | return -EINVAL; | 1547 | return -EINVAL; |
1488 | 1548 | ||
1489 | if (rdev->raid_disk >= 0) | 1549 | if (rdev->raid_disk >= 0) |
1490 | first = last = rdev->raid_disk; | 1550 | first = last = rdev->raid_disk; |
1491 | 1551 | ||
1552 | if (q->merge_bvec_fn) { | ||
1553 | set_bit(Unmerged, &rdev->flags); | ||
1554 | mddev->merge_check_needed = 1; | ||
1555 | } | ||
1556 | |||
1492 | if (rdev->saved_raid_disk >= first && | 1557 | if (rdev->saved_raid_disk >= first && |
1493 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) | 1558 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) |
1494 | mirror = rdev->saved_raid_disk; | 1559 | mirror = rdev->saved_raid_disk; |
@@ -1508,11 +1573,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1508 | err = 0; | 1573 | err = 0; |
1509 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1574 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1510 | rdev->data_offset << 9); | 1575 | rdev->data_offset << 9); |
1511 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1512 | blk_queue_max_segments(mddev->queue, 1); | ||
1513 | blk_queue_segment_boundary(mddev->queue, | ||
1514 | PAGE_CACHE_SIZE - 1); | ||
1515 | } | ||
1516 | conf->fullsync = 1; | 1576 | conf->fullsync = 1; |
1517 | rcu_assign_pointer(p->replacement, rdev); | 1577 | rcu_assign_pointer(p->replacement, rdev); |
1518 | break; | 1578 | break; |
@@ -1520,17 +1580,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1520 | 1580 | ||
1521 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1581 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1522 | rdev->data_offset << 9); | 1582 | rdev->data_offset << 9); |
1523 | /* as we don't honour merge_bvec_fn, we must | ||
1524 | * never risk violating it, so limit | ||
1525 | * ->max_segments to one lying with a single | ||
1526 | * page, as a one page request is never in | ||
1527 | * violation. | ||
1528 | */ | ||
1529 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1530 | blk_queue_max_segments(mddev->queue, 1); | ||
1531 | blk_queue_segment_boundary(mddev->queue, | ||
1532 | PAGE_CACHE_SIZE - 1); | ||
1533 | } | ||
1534 | 1583 | ||
1535 | p->head_position = 0; | 1584 | p->head_position = 0; |
1536 | p->recovery_disabled = mddev->recovery_disabled - 1; | 1585 | p->recovery_disabled = mddev->recovery_disabled - 1; |
@@ -1541,7 +1590,19 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1541 | rcu_assign_pointer(p->rdev, rdev); | 1590 | rcu_assign_pointer(p->rdev, rdev); |
1542 | break; | 1591 | break; |
1543 | } | 1592 | } |
1544 | 1593 | if (err == 0 && test_bit(Unmerged, &rdev->flags)) { | |
1594 | /* Some requests might not have seen this new | ||
1595 | * merge_bvec_fn. We must wait for them to complete | ||
1596 | * before merging the device fully. | ||
1597 | * First we make sure any code which has tested | ||
1598 | * our function has submitted the request, then | ||
1599 | * we wait for all outstanding requests to complete. | ||
1600 | */ | ||
1601 | synchronize_sched(); | ||
1602 | raise_barrier(conf, 0); | ||
1603 | lower_barrier(conf); | ||
1604 | clear_bit(Unmerged, &rdev->flags); | ||
1605 | } | ||
1545 | md_integrity_add_rdev(rdev, mddev); | 1606 | md_integrity_add_rdev(rdev, mddev); |
1546 | print_conf(conf); | 1607 | print_conf(conf); |
1547 | return err; | 1608 | return err; |
@@ -1682,10 +1743,8 @@ static void end_sync_write(struct bio *bio, int error) | |||
1682 | d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); | 1743 | d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); |
1683 | if (repl) | 1744 | if (repl) |
1684 | rdev = conf->mirrors[d].replacement; | 1745 | rdev = conf->mirrors[d].replacement; |
1685 | if (!rdev) { | 1746 | else |
1686 | smp_mb(); | ||
1687 | rdev = conf->mirrors[d].rdev; | 1747 | rdev = conf->mirrors[d].rdev; |
1688 | } | ||
1689 | 1748 | ||
1690 | if (!uptodate) { | 1749 | if (!uptodate) { |
1691 | if (repl) | 1750 | if (repl) |
@@ -2087,6 +2146,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2087 | d = r10_bio->devs[sl].devnum; | 2146 | d = r10_bio->devs[sl].devnum; |
2088 | rdev = rcu_dereference(conf->mirrors[d].rdev); | 2147 | rdev = rcu_dereference(conf->mirrors[d].rdev); |
2089 | if (rdev && | 2148 | if (rdev && |
2149 | !test_bit(Unmerged, &rdev->flags) && | ||
2090 | test_bit(In_sync, &rdev->flags) && | 2150 | test_bit(In_sync, &rdev->flags) && |
2091 | is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, | 2151 | is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, |
2092 | &first_bad, &bad_sectors) == 0) { | 2152 | &first_bad, &bad_sectors) == 0) { |
@@ -2140,6 +2200,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2140 | d = r10_bio->devs[sl].devnum; | 2200 | d = r10_bio->devs[sl].devnum; |
2141 | rdev = rcu_dereference(conf->mirrors[d].rdev); | 2201 | rdev = rcu_dereference(conf->mirrors[d].rdev); |
2142 | if (!rdev || | 2202 | if (!rdev || |
2203 | test_bit(Unmerged, &rdev->flags) || | ||
2143 | !test_bit(In_sync, &rdev->flags)) | 2204 | !test_bit(In_sync, &rdev->flags)) |
2144 | continue; | 2205 | continue; |
2145 | 2206 | ||
@@ -3242,7 +3303,7 @@ static int run(struct mddev *mddev) | |||
3242 | blk_queue_io_opt(mddev->queue, chunk_size * | 3303 | blk_queue_io_opt(mddev->queue, chunk_size * |
3243 | (conf->raid_disks / conf->near_copies)); | 3304 | (conf->raid_disks / conf->near_copies)); |
3244 | 3305 | ||
3245 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 3306 | rdev_for_each(rdev, mddev) { |
3246 | 3307 | ||
3247 | disk_idx = rdev->raid_disk; | 3308 | disk_idx = rdev->raid_disk; |
3248 | if (disk_idx >= conf->raid_disks | 3309 | if (disk_idx >= conf->raid_disks |
@@ -3262,15 +3323,6 @@ static int run(struct mddev *mddev) | |||
3262 | 3323 | ||
3263 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 3324 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
3264 | rdev->data_offset << 9); | 3325 | rdev->data_offset << 9); |
3265 | /* as we don't honour merge_bvec_fn, we must never risk | ||
3266 | * violating it, so limit max_segments to 1 lying | ||
3267 | * within a single page. | ||
3268 | */ | ||
3269 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
3270 | blk_queue_max_segments(mddev->queue, 1); | ||
3271 | blk_queue_segment_boundary(mddev->queue, | ||
3272 | PAGE_CACHE_SIZE - 1); | ||
3273 | } | ||
3274 | 3326 | ||
3275 | disk->head_position = 0; | 3327 | disk->head_position = 0; |
3276 | } | 3328 | } |
@@ -3334,8 +3386,7 @@ static int run(struct mddev *mddev) | |||
3334 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 3386 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
3335 | } | 3387 | } |
3336 | 3388 | ||
3337 | if (conf->near_copies < conf->raid_disks) | 3389 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
3338 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3339 | 3390 | ||
3340 | if (md_integrity_register(mddev)) | 3391 | if (md_integrity_register(mddev)) |
3341 | goto out_free_conf; | 3392 | goto out_free_conf; |
@@ -3385,6 +3436,43 @@ static void raid10_quiesce(struct mddev *mddev, int state) | |||
3385 | } | 3436 | } |
3386 | } | 3437 | } |
3387 | 3438 | ||
3439 | static int raid10_resize(struct mddev *mddev, sector_t sectors) | ||
3440 | { | ||
3441 | /* Resize of 'far' arrays is not supported. | ||
3442 | * For 'near' and 'offset' arrays we can set the | ||
3443 | * number of sectors used to be an appropriate multiple | ||
3444 | * of the chunk size. | ||
3445 | * For 'offset', this is far_copies*chunksize. | ||
3446 | * For 'near' the multiplier is the LCM of | ||
3447 | * near_copies and raid_disks. | ||
3448 | * So if far_copies > 1 && !far_offset, fail. | ||
3449 | * Else find LCM(raid_disks, near_copy)*far_copies and | ||
3450 | * multiply by chunk_size. Then round to this number. | ||
3451 | * This is mostly done by raid10_size() | ||
3452 | */ | ||
3453 | struct r10conf *conf = mddev->private; | ||
3454 | sector_t oldsize, size; | ||
3455 | |||
3456 | if (conf->far_copies > 1 && !conf->far_offset) | ||
3457 | return -EINVAL; | ||
3458 | |||
3459 | oldsize = raid10_size(mddev, 0, 0); | ||
3460 | size = raid10_size(mddev, sectors, 0); | ||
3461 | md_set_array_sectors(mddev, size); | ||
3462 | if (mddev->array_sectors > size) | ||
3463 | return -EINVAL; | ||
3464 | set_capacity(mddev->gendisk, mddev->array_sectors); | ||
3465 | revalidate_disk(mddev->gendisk); | ||
3466 | if (sectors > mddev->dev_sectors && | ||
3467 | mddev->recovery_cp > oldsize) { | ||
3468 | mddev->recovery_cp = oldsize; | ||
3469 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3470 | } | ||
3471 | mddev->dev_sectors = sectors; | ||
3472 | mddev->resync_max_sectors = size; | ||
3473 | return 0; | ||
3474 | } | ||
3475 | |||
3388 | static void *raid10_takeover_raid0(struct mddev *mddev) | 3476 | static void *raid10_takeover_raid0(struct mddev *mddev) |
3389 | { | 3477 | { |
3390 | struct md_rdev *rdev; | 3478 | struct md_rdev *rdev; |
@@ -3408,7 +3496,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev) | |||
3408 | 3496 | ||
3409 | conf = setup_conf(mddev); | 3497 | conf = setup_conf(mddev); |
3410 | if (!IS_ERR(conf)) { | 3498 | if (!IS_ERR(conf)) { |
3411 | list_for_each_entry(rdev, &mddev->disks, same_set) | 3499 | rdev_for_each(rdev, mddev) |
3412 | if (rdev->raid_disk >= 0) | 3500 | if (rdev->raid_disk >= 0) |
3413 | rdev->new_raid_disk = rdev->raid_disk * 2; | 3501 | rdev->new_raid_disk = rdev->raid_disk * 2; |
3414 | conf->barrier = 1; | 3502 | conf->barrier = 1; |
@@ -3454,6 +3542,7 @@ static struct md_personality raid10_personality = | |||
3454 | .sync_request = sync_request, | 3542 | .sync_request = sync_request, |
3455 | .quiesce = raid10_quiesce, | 3543 | .quiesce = raid10_quiesce, |
3456 | .size = raid10_size, | 3544 | .size = raid10_size, |
3545 | .resize = raid10_resize, | ||
3457 | .takeover = raid10_takeover, | 3546 | .takeover = raid10_takeover, |
3458 | }; | 3547 | }; |
3459 | 3548 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 360f2b98f62b..23ac880bba9a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -208,11 +208,10 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
208 | md_wakeup_thread(conf->mddev->thread); | 208 | md_wakeup_thread(conf->mddev->thread); |
209 | } else { | 209 | } else { |
210 | BUG_ON(stripe_operations_active(sh)); | 210 | BUG_ON(stripe_operations_active(sh)); |
211 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 211 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
212 | atomic_dec(&conf->preread_active_stripes); | 212 | if (atomic_dec_return(&conf->preread_active_stripes) |
213 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) | 213 | < IO_THRESHOLD) |
214 | md_wakeup_thread(conf->mddev->thread); | 214 | md_wakeup_thread(conf->mddev->thread); |
215 | } | ||
216 | atomic_dec(&conf->active_stripes); | 215 | atomic_dec(&conf->active_stripes); |
217 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { | 216 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { |
218 | list_add_tail(&sh->lru, &conf->inactive_list); | 217 | list_add_tail(&sh->lru, &conf->inactive_list); |
@@ -4843,7 +4842,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
4843 | 4842 | ||
4844 | pr_debug("raid456: run(%s) called.\n", mdname(mddev)); | 4843 | pr_debug("raid456: run(%s) called.\n", mdname(mddev)); |
4845 | 4844 | ||
4846 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4845 | rdev_for_each(rdev, mddev) { |
4847 | raid_disk = rdev->raid_disk; | 4846 | raid_disk = rdev->raid_disk; |
4848 | if (raid_disk >= max_disks | 4847 | if (raid_disk >= max_disks |
4849 | || raid_disk < 0) | 4848 | || raid_disk < 0) |
@@ -5178,7 +5177,7 @@ static int run(struct mddev *mddev) | |||
5178 | blk_queue_io_opt(mddev->queue, chunk_size * | 5177 | blk_queue_io_opt(mddev->queue, chunk_size * |
5179 | (conf->raid_disks - conf->max_degraded)); | 5178 | (conf->raid_disks - conf->max_degraded)); |
5180 | 5179 | ||
5181 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5180 | rdev_for_each(rdev, mddev) |
5182 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5181 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5183 | rdev->data_offset << 9); | 5182 | rdev->data_offset << 9); |
5184 | } | 5183 | } |
@@ -5362,7 +5361,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
5362 | if (mddev->recovery_disabled == conf->recovery_disabled) | 5361 | if (mddev->recovery_disabled == conf->recovery_disabled) |
5363 | return -EBUSY; | 5362 | return -EBUSY; |
5364 | 5363 | ||
5365 | if (has_failed(conf)) | 5364 | if (rdev->saved_raid_disk < 0 && has_failed(conf)) |
5366 | /* no point adding a device */ | 5365 | /* no point adding a device */ |
5367 | return -EINVAL; | 5366 | return -EINVAL; |
5368 | 5367 | ||
@@ -5501,7 +5500,7 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5501 | if (!check_stripe_cache(mddev)) | 5500 | if (!check_stripe_cache(mddev)) |
5502 | return -ENOSPC; | 5501 | return -ENOSPC; |
5503 | 5502 | ||
5504 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5503 | rdev_for_each(rdev, mddev) |
5505 | if (!test_bit(In_sync, &rdev->flags) | 5504 | if (!test_bit(In_sync, &rdev->flags) |
5506 | && !test_bit(Faulty, &rdev->flags)) | 5505 | && !test_bit(Faulty, &rdev->flags)) |
5507 | spares++; | 5506 | spares++; |
@@ -5547,16 +5546,14 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5547 | * such devices during the reshape and confusion could result. | 5546 | * such devices during the reshape and confusion could result. |
5548 | */ | 5547 | */ |
5549 | if (mddev->delta_disks >= 0) { | 5548 | if (mddev->delta_disks >= 0) { |
5550 | int added_devices = 0; | 5549 | rdev_for_each(rdev, mddev) |
5551 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
5552 | if (rdev->raid_disk < 0 && | 5550 | if (rdev->raid_disk < 0 && |
5553 | !test_bit(Faulty, &rdev->flags)) { | 5551 | !test_bit(Faulty, &rdev->flags)) { |
5554 | if (raid5_add_disk(mddev, rdev) == 0) { | 5552 | if (raid5_add_disk(mddev, rdev) == 0) { |
5555 | if (rdev->raid_disk | 5553 | if (rdev->raid_disk |
5556 | >= conf->previous_raid_disks) { | 5554 | >= conf->previous_raid_disks) |
5557 | set_bit(In_sync, &rdev->flags); | 5555 | set_bit(In_sync, &rdev->flags); |
5558 | added_devices++; | 5556 | else |
5559 | } else | ||
5560 | rdev->recovery_offset = 0; | 5557 | rdev->recovery_offset = 0; |
5561 | 5558 | ||
5562 | if (sysfs_link_rdev(mddev, rdev)) | 5559 | if (sysfs_link_rdev(mddev, rdev)) |
@@ -5566,7 +5563,6 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5566 | && !test_bit(Faulty, &rdev->flags)) { | 5563 | && !test_bit(Faulty, &rdev->flags)) { |
5567 | /* This is a spare that was manually added */ | 5564 | /* This is a spare that was manually added */ |
5568 | set_bit(In_sync, &rdev->flags); | 5565 | set_bit(In_sync, &rdev->flags); |
5569 | added_devices++; | ||
5570 | } | 5566 | } |
5571 | 5567 | ||
5572 | /* When a reshape changes the number of devices, | 5568 | /* When a reshape changes the number of devices, |
@@ -5592,6 +5588,7 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5592 | spin_lock_irq(&conf->device_lock); | 5588 | spin_lock_irq(&conf->device_lock); |
5593 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 5589 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
5594 | conf->reshape_progress = MaxSector; | 5590 | conf->reshape_progress = MaxSector; |
5591 | mddev->reshape_position = MaxSector; | ||
5595 | spin_unlock_irq(&conf->device_lock); | 5592 | spin_unlock_irq(&conf->device_lock); |
5596 | return -EAGAIN; | 5593 | return -EAGAIN; |
5597 | } | 5594 | } |