diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 15:29:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 15:29:50 -0400 |
commit | 267d7b23dd62f6ec55e0fba777e456495c308fc7 (patch) | |
tree | 5c9fe0f07d5b87029b9c07eb003596c05d161a8f /drivers/md | |
parent | 28f23d1f3b6a6078312b6e9585e583cc7326fe22 (diff) | |
parent | ecb178bb2b154a40cfae9fa4c42e62ccfa81ac6b (diff) |
Merge tag 'md-3.4' of git://neil.brown.name/md
Pull md updates for 3.4 from Neil Brown:
"Mostly tidying up code in preparation for some bigger changes next
time.
A few bug fixes tagged for -stable.
Main functionality change is that some RAID10 arrays can now grow to
use extra space that may have been made available on the individual
devices."
Fixed up trivial conflicts with the k[un]map_atomic() cleanups in
drivers/md/bitmap.c.
* tag 'md-3.4' of git://neil.brown.name/md: (22 commits)
md: Add judgement bb->unacked_exist in function md_ack_all_badblocks().
md: fix clearing of the 'changed' flags for the bad blocks list.
md/bitmap: discard CHUNK_BLOCK_SHIFT macro
md/bitmap: remove unnecessary indirection when allocating.
md/bitmap: remove some pointless locking.
md/bitmap: change a 'goto' to a normal 'if' construct.
md/bitmap: move printing of bitmap status to bitmap.c
md/bitmap: remove some unused noise from bitmap.h
md/raid10 - support resizing some RAID10 arrays.
md/raid1: handle merge_bvec_fn in member devices.
md/raid10: handle merge_bvec_fn in member devices.
md: add proper merge_bvec handling to RAID0 and Linear.
md: tidy up rdev_for_each usage.
md/raid1,raid10: avoid deadlock during resync/recovery.
md/bitmap: ensure to load bitmap when creating via sysfs.
md: don't set md arrays to readonly on shutdown.
md: allow re-add to failed arrays.
md/raid5: use atomic_dec_return() instead of atomic_dec() and atomic_read().
md: Use existed macros instead of numbers
md/raid5: removed unused 'added_devices' variable.
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 152 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 22 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 16 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/linear.c | 32 | ||||
-rw-r--r-- | drivers/md/md.c | 140 | ||||
-rw-r--r-- | drivers/md/md.h | 13 | ||||
-rw-r--r-- | drivers/md/multipath.c | 2 | ||||
-rw-r--r-- | drivers/md/raid0.c | 164 | ||||
-rw-r--r-- | drivers/md/raid0.h | 11 | ||||
-rw-r--r-- | drivers/md/raid1.c | 98 | ||||
-rw-r--r-- | drivers/md/raid10.c | 187 | ||||
-rw-r--r-- | drivers/md/raid5.c | 25 |
13 files changed, 491 insertions, 373 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 045e086144ad..3d0dfa7a89a2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/file.h> | 26 | #include <linux/file.h> |
27 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/seq_file.h> | ||
29 | #include "md.h" | 30 | #include "md.h" |
30 | #include "bitmap.h" | 31 | #include "bitmap.h" |
31 | 32 | ||
@@ -35,31 +36,6 @@ static inline char *bmname(struct bitmap *bitmap) | |||
35 | } | 36 | } |
36 | 37 | ||
37 | /* | 38 | /* |
38 | * just a placeholder - calls kmalloc for bitmap pages | ||
39 | */ | ||
40 | static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) | ||
41 | { | ||
42 | unsigned char *page; | ||
43 | |||
44 | page = kzalloc(PAGE_SIZE, GFP_NOIO); | ||
45 | if (!page) | ||
46 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); | ||
47 | else | ||
48 | pr_debug("%s: bitmap_alloc_page: allocated page at %p\n", | ||
49 | bmname(bitmap), page); | ||
50 | return page; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * for now just a placeholder -- just calls kfree for bitmap pages | ||
55 | */ | ||
56 | static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | ||
57 | { | ||
58 | pr_debug("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); | ||
59 | kfree(page); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * check a page and, if necessary, allocate it (or hijack it if the alloc fails) | 39 | * check a page and, if necessary, allocate it (or hijack it if the alloc fails) |
64 | * | 40 | * |
65 | * 1) check to see if this page is allocated, if it's not then try to alloc | 41 | * 1) check to see if this page is allocated, if it's not then try to alloc |
@@ -96,7 +72,7 @@ __acquires(bitmap->lock) | |||
96 | /* this page has not been allocated yet */ | 72 | /* this page has not been allocated yet */ |
97 | 73 | ||
98 | spin_unlock_irq(&bitmap->lock); | 74 | spin_unlock_irq(&bitmap->lock); |
99 | mappage = bitmap_alloc_page(bitmap); | 75 | mappage = kzalloc(PAGE_SIZE, GFP_NOIO); |
100 | spin_lock_irq(&bitmap->lock); | 76 | spin_lock_irq(&bitmap->lock); |
101 | 77 | ||
102 | if (mappage == NULL) { | 78 | if (mappage == NULL) { |
@@ -109,7 +85,7 @@ __acquires(bitmap->lock) | |||
109 | } else if (bitmap->bp[page].map || | 85 | } else if (bitmap->bp[page].map || |
110 | bitmap->bp[page].hijacked) { | 86 | bitmap->bp[page].hijacked) { |
111 | /* somebody beat us to getting the page */ | 87 | /* somebody beat us to getting the page */ |
112 | bitmap_free_page(bitmap, mappage); | 88 | kfree(mappage); |
113 | return 0; | 89 | return 0; |
114 | } else { | 90 | } else { |
115 | 91 | ||
@@ -141,7 +117,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | |||
141 | ptr = bitmap->bp[page].map; | 117 | ptr = bitmap->bp[page].map; |
142 | bitmap->bp[page].map = NULL; | 118 | bitmap->bp[page].map = NULL; |
143 | bitmap->missing_pages++; | 119 | bitmap->missing_pages++; |
144 | bitmap_free_page(bitmap, ptr); | 120 | kfree(ptr); |
145 | } | 121 | } |
146 | } | 122 | } |
147 | 123 | ||
@@ -171,7 +147,7 @@ static struct page *read_sb_page(struct mddev *mddev, loff_t offset, | |||
171 | did_alloc = 1; | 147 | did_alloc = 1; |
172 | } | 148 | } |
173 | 149 | ||
174 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 150 | rdev_for_each(rdev, mddev) { |
175 | if (! test_bit(In_sync, &rdev->flags) | 151 | if (! test_bit(In_sync, &rdev->flags) |
176 | || test_bit(Faulty, &rdev->flags)) | 152 | || test_bit(Faulty, &rdev->flags)) |
177 | continue; | 153 | continue; |
@@ -445,18 +421,13 @@ out: | |||
445 | void bitmap_update_sb(struct bitmap *bitmap) | 421 | void bitmap_update_sb(struct bitmap *bitmap) |
446 | { | 422 | { |
447 | bitmap_super_t *sb; | 423 | bitmap_super_t *sb; |
448 | unsigned long flags; | ||
449 | 424 | ||
450 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ | 425 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ |
451 | return; | 426 | return; |
452 | if (bitmap->mddev->bitmap_info.external) | 427 | if (bitmap->mddev->bitmap_info.external) |
453 | return; | 428 | return; |
454 | spin_lock_irqsave(&bitmap->lock, flags); | 429 | if (!bitmap->sb_page) /* no superblock */ |
455 | if (!bitmap->sb_page) { /* no superblock */ | ||
456 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
457 | return; | 430 | return; |
458 | } | ||
459 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
460 | sb = kmap_atomic(bitmap->sb_page); | 431 | sb = kmap_atomic(bitmap->sb_page); |
461 | sb->events = cpu_to_le64(bitmap->mddev->events); | 432 | sb->events = cpu_to_le64(bitmap->mddev->events); |
462 | if (bitmap->mddev->events < bitmap->events_cleared) | 433 | if (bitmap->mddev->events < bitmap->events_cleared) |
@@ -632,26 +603,28 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
632 | /* keep the array size field of the bitmap superblock up to date */ | 603 | /* keep the array size field of the bitmap superblock up to date */ |
633 | sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); | 604 | sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); |
634 | 605 | ||
635 | if (!bitmap->mddev->persistent) | 606 | if (bitmap->mddev->persistent) { |
636 | goto success; | 607 | /* |
637 | 608 | * We have a persistent array superblock, so compare the | |
638 | /* | 609 | * bitmap's UUID and event counter to the mddev's |
639 | * if we have a persistent array superblock, compare the | 610 | */ |
640 | * bitmap's UUID and event counter to the mddev's | 611 | if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { |
641 | */ | 612 | printk(KERN_INFO |
642 | if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { | 613 | "%s: bitmap superblock UUID mismatch\n", |
643 | printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", | 614 | bmname(bitmap)); |
644 | bmname(bitmap)); | 615 | goto out; |
645 | goto out; | 616 | } |
646 | } | 617 | events = le64_to_cpu(sb->events); |
647 | events = le64_to_cpu(sb->events); | 618 | if (events < bitmap->mddev->events) { |
648 | if (events < bitmap->mddev->events) { | 619 | printk(KERN_INFO |
649 | printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " | 620 | "%s: bitmap file is out of date (%llu < %llu) " |
650 | "-- forcing full recovery\n", bmname(bitmap), events, | 621 | "-- forcing full recovery\n", |
651 | (unsigned long long) bitmap->mddev->events); | 622 | bmname(bitmap), events, |
652 | sb->state |= cpu_to_le32(BITMAP_STALE); | 623 | (unsigned long long) bitmap->mddev->events); |
624 | sb->state |= cpu_to_le32(BITMAP_STALE); | ||
625 | } | ||
653 | } | 626 | } |
654 | success: | 627 | |
655 | /* assign fields using values from superblock */ | 628 | /* assign fields using values from superblock */ |
656 | bitmap->mddev->bitmap_info.chunksize = chunksize; | 629 | bitmap->mddev->bitmap_info.chunksize = chunksize; |
657 | bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; | 630 | bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; |
@@ -680,15 +653,10 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | |||
680 | enum bitmap_mask_op op) | 653 | enum bitmap_mask_op op) |
681 | { | 654 | { |
682 | bitmap_super_t *sb; | 655 | bitmap_super_t *sb; |
683 | unsigned long flags; | ||
684 | int old; | 656 | int old; |
685 | 657 | ||
686 | spin_lock_irqsave(&bitmap->lock, flags); | 658 | if (!bitmap->sb_page) /* can't set the state */ |
687 | if (!bitmap->sb_page) { /* can't set the state */ | ||
688 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
689 | return 0; | 659 | return 0; |
690 | } | ||
691 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
692 | sb = kmap_atomic(bitmap->sb_page); | 660 | sb = kmap_atomic(bitmap->sb_page); |
693 | old = le32_to_cpu(sb->state) & bits; | 661 | old = le32_to_cpu(sb->state) & bits; |
694 | switch (op) { | 662 | switch (op) { |
@@ -870,7 +838,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | |||
870 | unsigned long bit; | 838 | unsigned long bit; |
871 | struct page *page; | 839 | struct page *page; |
872 | void *kaddr; | 840 | void *kaddr; |
873 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); | 841 | unsigned long chunk = block >> bitmap->chunkshift; |
874 | 842 | ||
875 | if (!bitmap->filemap) | 843 | if (!bitmap->filemap) |
876 | return; | 844 | return; |
@@ -1069,10 +1037,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1069 | kunmap_atomic(paddr); | 1037 | kunmap_atomic(paddr); |
1070 | if (b) { | 1038 | if (b) { |
1071 | /* if the disk bit is set, set the memory bit */ | 1039 | /* if the disk bit is set, set the memory bit */ |
1072 | int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) | 1040 | int needed = ((sector_t)(i+1) << bitmap->chunkshift |
1073 | >= start); | 1041 | >= start); |
1074 | bitmap_set_memory_bits(bitmap, | 1042 | bitmap_set_memory_bits(bitmap, |
1075 | (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | 1043 | (sector_t)i << bitmap->chunkshift, |
1076 | needed); | 1044 | needed); |
1077 | bit_cnt++; | 1045 | bit_cnt++; |
1078 | } | 1046 | } |
@@ -1116,7 +1084,7 @@ void bitmap_write_all(struct bitmap *bitmap) | |||
1116 | 1084 | ||
1117 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | 1085 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) |
1118 | { | 1086 | { |
1119 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1087 | sector_t chunk = offset >> bitmap->chunkshift; |
1120 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1088 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1121 | bitmap->bp[page].count += inc; | 1089 | bitmap->bp[page].count += inc; |
1122 | bitmap_checkfree(bitmap, page); | 1090 | bitmap_checkfree(bitmap, page); |
@@ -1222,7 +1190,7 @@ void bitmap_daemon_work(struct mddev *mddev) | |||
1222 | bitmap->allclean = 0; | 1190 | bitmap->allclean = 0; |
1223 | } | 1191 | } |
1224 | bmc = bitmap_get_counter(bitmap, | 1192 | bmc = bitmap_get_counter(bitmap, |
1225 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1193 | (sector_t)j << bitmap->chunkshift, |
1226 | &blocks, 0); | 1194 | &blocks, 0); |
1227 | if (!bmc) | 1195 | if (!bmc) |
1228 | j |= PAGE_COUNTER_MASK; | 1196 | j |= PAGE_COUNTER_MASK; |
@@ -1231,7 +1199,7 @@ void bitmap_daemon_work(struct mddev *mddev) | |||
1231 | /* we can clear the bit */ | 1199 | /* we can clear the bit */ |
1232 | *bmc = 0; | 1200 | *bmc = 0; |
1233 | bitmap_count_page(bitmap, | 1201 | bitmap_count_page(bitmap, |
1234 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1202 | (sector_t)j << bitmap->chunkshift, |
1235 | -1); | 1203 | -1); |
1236 | 1204 | ||
1237 | /* clear the bit */ | 1205 | /* clear the bit */ |
@@ -1285,7 +1253,7 @@ __acquires(bitmap->lock) | |||
1285 | * The lock must have been taken with interrupts enabled. | 1253 | * The lock must have been taken with interrupts enabled. |
1286 | * If !create, we don't release the lock. | 1254 | * If !create, we don't release the lock. |
1287 | */ | 1255 | */ |
1288 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1256 | sector_t chunk = offset >> bitmap->chunkshift; |
1289 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1257 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
1290 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; | 1258 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; |
1291 | sector_t csize; | 1259 | sector_t csize; |
@@ -1295,10 +1263,10 @@ __acquires(bitmap->lock) | |||
1295 | 1263 | ||
1296 | if (bitmap->bp[page].hijacked || | 1264 | if (bitmap->bp[page].hijacked || |
1297 | bitmap->bp[page].map == NULL) | 1265 | bitmap->bp[page].map == NULL) |
1298 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | 1266 | csize = ((sector_t)1) << (bitmap->chunkshift + |
1299 | PAGE_COUNTER_SHIFT - 1); | 1267 | PAGE_COUNTER_SHIFT - 1); |
1300 | else | 1268 | else |
1301 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | 1269 | csize = ((sector_t)1) << bitmap->chunkshift; |
1302 | *blocks = csize - (offset & (csize - 1)); | 1270 | *blocks = csize - (offset & (csize - 1)); |
1303 | 1271 | ||
1304 | if (err < 0) | 1272 | if (err < 0) |
@@ -1424,7 +1392,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1424 | set_page_attr(bitmap, | 1392 | set_page_attr(bitmap, |
1425 | filemap_get_page( | 1393 | filemap_get_page( |
1426 | bitmap, | 1394 | bitmap, |
1427 | offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1395 | offset >> bitmap->chunkshift), |
1428 | BITMAP_PAGE_PENDING); | 1396 | BITMAP_PAGE_PENDING); |
1429 | bitmap->allclean = 0; | 1397 | bitmap->allclean = 0; |
1430 | } | 1398 | } |
@@ -1512,7 +1480,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i | |||
1512 | else { | 1480 | else { |
1513 | if (*bmc <= 2) { | 1481 | if (*bmc <= 2) { |
1514 | set_page_attr(bitmap, | 1482 | set_page_attr(bitmap, |
1515 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1483 | filemap_get_page(bitmap, offset >> bitmap->chunkshift), |
1516 | BITMAP_PAGE_PENDING); | 1484 | BITMAP_PAGE_PENDING); |
1517 | bitmap->allclean = 0; | 1485 | bitmap->allclean = 0; |
1518 | } | 1486 | } |
@@ -1559,7 +1527,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1559 | 1527 | ||
1560 | bitmap->mddev->curr_resync_completed = sector; | 1528 | bitmap->mddev->curr_resync_completed = sector; |
1561 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | 1529 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); |
1562 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | 1530 | sector &= ~((1ULL << bitmap->chunkshift) - 1); |
1563 | s = 0; | 1531 | s = 0; |
1564 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | 1532 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { |
1565 | bitmap_end_sync(bitmap, s, &blocks, 0); | 1533 | bitmap_end_sync(bitmap, s, &blocks, 0); |
@@ -1589,7 +1557,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
1589 | struct page *page; | 1557 | struct page *page; |
1590 | *bmc = 2 | (needed ? NEEDED_MASK : 0); | 1558 | *bmc = 2 | (needed ? NEEDED_MASK : 0); |
1591 | bitmap_count_page(bitmap, offset, 1); | 1559 | bitmap_count_page(bitmap, offset, 1); |
1592 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); | 1560 | page = filemap_get_page(bitmap, offset >> bitmap->chunkshift); |
1593 | set_page_attr(bitmap, page, BITMAP_PAGE_PENDING); | 1561 | set_page_attr(bitmap, page, BITMAP_PAGE_PENDING); |
1594 | bitmap->allclean = 0; | 1562 | bitmap->allclean = 0; |
1595 | } | 1563 | } |
@@ -1602,7 +1570,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) | |||
1602 | unsigned long chunk; | 1570 | unsigned long chunk; |
1603 | 1571 | ||
1604 | for (chunk = s; chunk <= e; chunk++) { | 1572 | for (chunk = s; chunk <= e; chunk++) { |
1605 | sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); | 1573 | sector_t sec = (sector_t)chunk << bitmap->chunkshift; |
1606 | bitmap_set_memory_bits(bitmap, sec, 1); | 1574 | bitmap_set_memory_bits(bitmap, sec, 1); |
1607 | spin_lock_irq(&bitmap->lock); | 1575 | spin_lock_irq(&bitmap->lock); |
1608 | bitmap_file_set_bit(bitmap, sec); | 1576 | bitmap_file_set_bit(bitmap, sec); |
@@ -1759,11 +1727,12 @@ int bitmap_create(struct mddev *mddev) | |||
1759 | goto error; | 1727 | goto error; |
1760 | 1728 | ||
1761 | bitmap->daemon_lastrun = jiffies; | 1729 | bitmap->daemon_lastrun = jiffies; |
1762 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); | 1730 | bitmap->chunkshift = (ffz(~mddev->bitmap_info.chunksize) |
1731 | - BITMAP_BLOCK_SHIFT); | ||
1763 | 1732 | ||
1764 | /* now that chunksize and chunkshift are set, we can use these macros */ | 1733 | /* now that chunksize and chunkshift are set, we can use these macros */ |
1765 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> | 1734 | chunks = (blocks + bitmap->chunkshift - 1) >> |
1766 | CHUNK_BLOCK_SHIFT(bitmap); | 1735 | bitmap->chunkshift; |
1767 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; | 1736 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; |
1768 | 1737 | ||
1769 | BUG_ON(!pages); | 1738 | BUG_ON(!pages); |
@@ -1836,6 +1805,33 @@ out: | |||
1836 | } | 1805 | } |
1837 | EXPORT_SYMBOL_GPL(bitmap_load); | 1806 | EXPORT_SYMBOL_GPL(bitmap_load); |
1838 | 1807 | ||
1808 | void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) | ||
1809 | { | ||
1810 | unsigned long chunk_kb; | ||
1811 | unsigned long flags; | ||
1812 | |||
1813 | if (!bitmap) | ||
1814 | return; | ||
1815 | |||
1816 | spin_lock_irqsave(&bitmap->lock, flags); | ||
1817 | chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; | ||
1818 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
1819 | "%lu%s chunk", | ||
1820 | bitmap->pages - bitmap->missing_pages, | ||
1821 | bitmap->pages, | ||
1822 | (bitmap->pages - bitmap->missing_pages) | ||
1823 | << (PAGE_SHIFT - 10), | ||
1824 | chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, | ||
1825 | chunk_kb ? "KB" : "B"); | ||
1826 | if (bitmap->file) { | ||
1827 | seq_printf(seq, ", file: "); | ||
1828 | seq_path(seq, &bitmap->file->f_path, " \t\n"); | ||
1829 | } | ||
1830 | |||
1831 | seq_printf(seq, "\n"); | ||
1832 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
1833 | } | ||
1834 | |||
1839 | static ssize_t | 1835 | static ssize_t |
1840 | location_show(struct mddev *mddev, char *page) | 1836 | location_show(struct mddev *mddev, char *page) |
1841 | { | 1837 | { |
@@ -1904,6 +1900,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len) | |||
1904 | if (mddev->pers) { | 1900 | if (mddev->pers) { |
1905 | mddev->pers->quiesce(mddev, 1); | 1901 | mddev->pers->quiesce(mddev, 1); |
1906 | rv = bitmap_create(mddev); | 1902 | rv = bitmap_create(mddev); |
1903 | if (!rv) | ||
1904 | rv = bitmap_load(mddev); | ||
1907 | if (rv) { | 1905 | if (rv) { |
1908 | bitmap_destroy(mddev); | 1906 | bitmap_destroy(mddev); |
1909 | mddev->bitmap_info.offset = 0; | 1907 | mddev->bitmap_info.offset = 0; |
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index a15436dd9b3e..55ca5aec84e4 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -13,8 +13,6 @@ | |||
13 | #define BITMAP_MAJOR_HI 4 | 13 | #define BITMAP_MAJOR_HI 4 |
14 | #define BITMAP_MAJOR_HOSTENDIAN 3 | 14 | #define BITMAP_MAJOR_HOSTENDIAN 3 |
15 | 15 | ||
16 | #define BITMAP_MINOR 39 | ||
17 | |||
18 | /* | 16 | /* |
19 | * in-memory bitmap: | 17 | * in-memory bitmap: |
20 | * | 18 | * |
@@ -101,21 +99,10 @@ typedef __u16 bitmap_counter_t; | |||
101 | /* same, except a mask value for more efficient bitops */ | 99 | /* same, except a mask value for more efficient bitops */ |
102 | #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) | 100 | #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) |
103 | 101 | ||
104 | #define BITMAP_BLOCK_SIZE 512 | ||
105 | #define BITMAP_BLOCK_SHIFT 9 | 102 | #define BITMAP_BLOCK_SHIFT 9 |
106 | 103 | ||
107 | /* how many blocks per chunk? (this is variable) */ | 104 | /* how many blocks per chunk? (this is variable) */ |
108 | #define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT) | 105 | #define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT) |
109 | #define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) | ||
110 | #define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) | ||
111 | |||
112 | /* when hijacked, the counters and bits represent even larger "chunks" */ | ||
113 | /* there will be 1024 chunks represented by each counter in the page pointers */ | ||
114 | #define PAGEPTR_BLOCK_RATIO(bitmap) \ | ||
115 | (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) | ||
116 | #define PAGEPTR_BLOCK_SHIFT(bitmap) \ | ||
117 | (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) | ||
118 | #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) | ||
119 | 106 | ||
120 | #endif | 107 | #endif |
121 | 108 | ||
@@ -181,12 +168,6 @@ struct bitmap_page { | |||
181 | unsigned int count:31; | 168 | unsigned int count:31; |
182 | }; | 169 | }; |
183 | 170 | ||
184 | /* keep track of bitmap file pages that have pending writes on them */ | ||
185 | struct page_list { | ||
186 | struct list_head list; | ||
187 | struct page *page; | ||
188 | }; | ||
189 | |||
190 | /* the main bitmap structure - one per mddev */ | 171 | /* the main bitmap structure - one per mddev */ |
191 | struct bitmap { | 172 | struct bitmap { |
192 | struct bitmap_page *bp; | 173 | struct bitmap_page *bp; |
@@ -196,7 +177,7 @@ struct bitmap { | |||
196 | struct mddev *mddev; /* the md device that the bitmap is for */ | 177 | struct mddev *mddev; /* the md device that the bitmap is for */ |
197 | 178 | ||
198 | /* bitmap chunksize -- how much data does each bit represent? */ | 179 | /* bitmap chunksize -- how much data does each bit represent? */ |
199 | unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ | 180 | unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */ |
200 | unsigned long chunks; /* total number of data chunks for the array */ | 181 | unsigned long chunks; /* total number of data chunks for the array */ |
201 | 182 | ||
202 | __u64 events_cleared; | 183 | __u64 events_cleared; |
@@ -245,6 +226,7 @@ void bitmap_destroy(struct mddev *mddev); | |||
245 | 226 | ||
246 | void bitmap_print_sb(struct bitmap *bitmap); | 227 | void bitmap_print_sb(struct bitmap *bitmap); |
247 | void bitmap_update_sb(struct bitmap *bitmap); | 228 | void bitmap_update_sb(struct bitmap *bitmap); |
229 | void bitmap_status(struct seq_file *seq, struct bitmap *bitmap); | ||
248 | 230 | ||
249 | int bitmap_setallbits(struct bitmap *bitmap); | 231 | int bitmap_setallbits(struct bitmap *bitmap); |
250 | void bitmap_write_all(struct bitmap *bitmap); | 232 | void bitmap_write_all(struct bitmap *bitmap); |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 787022c18187..c5a875d7b882 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -615,14 +615,14 @@ static int read_disk_sb(struct md_rdev *rdev, int size) | |||
615 | 615 | ||
616 | static void super_sync(struct mddev *mddev, struct md_rdev *rdev) | 616 | static void super_sync(struct mddev *mddev, struct md_rdev *rdev) |
617 | { | 617 | { |
618 | struct md_rdev *r, *t; | 618 | struct md_rdev *r; |
619 | uint64_t failed_devices; | 619 | uint64_t failed_devices; |
620 | struct dm_raid_superblock *sb; | 620 | struct dm_raid_superblock *sb; |
621 | 621 | ||
622 | sb = page_address(rdev->sb_page); | 622 | sb = page_address(rdev->sb_page); |
623 | failed_devices = le64_to_cpu(sb->failed_devices); | 623 | failed_devices = le64_to_cpu(sb->failed_devices); |
624 | 624 | ||
625 | rdev_for_each(r, t, mddev) | 625 | rdev_for_each(r, mddev) |
626 | if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) | 626 | if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) |
627 | failed_devices |= (1ULL << r->raid_disk); | 627 | failed_devices |= (1ULL << r->raid_disk); |
628 | 628 | ||
@@ -707,7 +707,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
707 | struct dm_raid_superblock *sb; | 707 | struct dm_raid_superblock *sb; |
708 | uint32_t new_devs = 0; | 708 | uint32_t new_devs = 0; |
709 | uint32_t rebuilds = 0; | 709 | uint32_t rebuilds = 0; |
710 | struct md_rdev *r, *t; | 710 | struct md_rdev *r; |
711 | struct dm_raid_superblock *sb2; | 711 | struct dm_raid_superblock *sb2; |
712 | 712 | ||
713 | sb = page_address(rdev->sb_page); | 713 | sb = page_address(rdev->sb_page); |
@@ -750,7 +750,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
750 | * case the In_sync bit will /not/ be set and | 750 | * case the In_sync bit will /not/ be set and |
751 | * recovery_cp must be MaxSector. | 751 | * recovery_cp must be MaxSector. |
752 | */ | 752 | */ |
753 | rdev_for_each(r, t, mddev) { | 753 | rdev_for_each(r, mddev) { |
754 | if (!test_bit(In_sync, &r->flags)) { | 754 | if (!test_bit(In_sync, &r->flags)) { |
755 | DMINFO("Device %d specified for rebuild: " | 755 | DMINFO("Device %d specified for rebuild: " |
756 | "Clearing superblock", r->raid_disk); | 756 | "Clearing superblock", r->raid_disk); |
@@ -782,7 +782,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
782 | * Now we set the Faulty bit for those devices that are | 782 | * Now we set the Faulty bit for those devices that are |
783 | * recorded in the superblock as failed. | 783 | * recorded in the superblock as failed. |
784 | */ | 784 | */ |
785 | rdev_for_each(r, t, mddev) { | 785 | rdev_for_each(r, mddev) { |
786 | if (!r->sb_page) | 786 | if (!r->sb_page) |
787 | continue; | 787 | continue; |
788 | sb2 = page_address(r->sb_page); | 788 | sb2 = page_address(r->sb_page); |
@@ -855,11 +855,11 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
855 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | 855 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) |
856 | { | 856 | { |
857 | int ret; | 857 | int ret; |
858 | struct md_rdev *rdev, *freshest, *tmp; | 858 | struct md_rdev *rdev, *freshest; |
859 | struct mddev *mddev = &rs->md; | 859 | struct mddev *mddev = &rs->md; |
860 | 860 | ||
861 | freshest = NULL; | 861 | freshest = NULL; |
862 | rdev_for_each(rdev, tmp, mddev) { | 862 | rdev_for_each(rdev, mddev) { |
863 | if (!rdev->meta_bdev) | 863 | if (!rdev->meta_bdev) |
864 | continue; | 864 | continue; |
865 | 865 | ||
@@ -888,7 +888,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
888 | if (super_validate(mddev, freshest)) | 888 | if (super_validate(mddev, freshest)) |
889 | return -EINVAL; | 889 | return -EINVAL; |
890 | 890 | ||
891 | rdev_for_each(rdev, tmp, mddev) | 891 | rdev_for_each(rdev, mddev) |
892 | if ((rdev != freshest) && super_validate(mddev, rdev)) | 892 | if ((rdev != freshest) && super_validate(mddev, rdev)) |
893 | return -EINVAL; | 893 | return -EINVAL; |
894 | 894 | ||
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index feb2c3c7bb44..45135f69509c 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -315,7 +315,7 @@ static int run(struct mddev *mddev) | |||
315 | } | 315 | } |
316 | conf->nfaults = 0; | 316 | conf->nfaults = 0; |
317 | 317 | ||
318 | list_for_each_entry(rdev, &mddev->disks, same_set) | 318 | rdev_for_each(rdev, mddev) |
319 | conf->rdev = rdev; | 319 | conf->rdev = rdev; |
320 | 320 | ||
321 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); | 321 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 627456542fb3..b0fcc7d02adb 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
68 | struct dev_info *dev0; | 68 | struct dev_info *dev0; |
69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; | 69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; |
70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
71 | int maxbytes = biovec->bv_len; | ||
72 | struct request_queue *subq; | ||
71 | 73 | ||
72 | rcu_read_lock(); | 74 | rcu_read_lock(); |
73 | dev0 = which_dev(mddev, sector); | 75 | dev0 = which_dev(mddev, sector); |
74 | maxsectors = dev0->end_sector - sector; | 76 | maxsectors = dev0->end_sector - sector; |
77 | subq = bdev_get_queue(dev0->rdev->bdev); | ||
78 | if (subq->merge_bvec_fn) { | ||
79 | bvm->bi_bdev = dev0->rdev->bdev; | ||
80 | bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; | ||
81 | maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, | ||
82 | biovec)); | ||
83 | } | ||
75 | rcu_read_unlock(); | 84 | rcu_read_unlock(); |
76 | 85 | ||
77 | if (maxsectors < bio_sectors) | 86 | if (maxsectors < bio_sectors) |
@@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
80 | maxsectors -= bio_sectors; | 89 | maxsectors -= bio_sectors; |
81 | 90 | ||
82 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) | 91 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) |
83 | return biovec->bv_len; | 92 | return maxbytes; |
84 | /* The bytes available at this offset could be really big, | 93 | |
85 | * so we cap at 2^31 to avoid overflow */ | 94 | if (maxsectors > (maxbytes >> 9)) |
86 | if (maxsectors > (1 << (31-9))) | 95 | return maxbytes; |
87 | return 1<<31; | 96 | else |
88 | return maxsectors << 9; | 97 | return maxsectors << 9; |
89 | } | 98 | } |
90 | 99 | ||
91 | static int linear_congested(void *data, int bits) | 100 | static int linear_congested(void *data, int bits) |
@@ -138,7 +147,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
138 | cnt = 0; | 147 | cnt = 0; |
139 | conf->array_sectors = 0; | 148 | conf->array_sectors = 0; |
140 | 149 | ||
141 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 150 | rdev_for_each(rdev, mddev) { |
142 | int j = rdev->raid_disk; | 151 | int j = rdev->raid_disk; |
143 | struct dev_info *disk = conf->disks + j; | 152 | struct dev_info *disk = conf->disks + j; |
144 | sector_t sectors; | 153 | sector_t sectors; |
@@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
158 | 167 | ||
159 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 168 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
160 | rdev->data_offset << 9); | 169 | rdev->data_offset << 9); |
161 | /* as we don't honour merge_bvec_fn, we must never risk | ||
162 | * violating it, so limit max_segments to 1 lying within | ||
163 | * a single page. | ||
164 | */ | ||
165 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
166 | blk_queue_max_segments(mddev->queue, 1); | ||
167 | blk_queue_segment_boundary(mddev->queue, | ||
168 | PAGE_CACHE_SIZE - 1); | ||
169 | } | ||
170 | 170 | ||
171 | conf->array_sectors += rdev->sectors; | 171 | conf->array_sectors += rdev->sectors; |
172 | cnt++; | 172 | cnt++; |
diff --git a/drivers/md/md.c b/drivers/md/md.c index ce88755baf4a..b572e1e386ce 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -439,7 +439,7 @@ static void submit_flushes(struct work_struct *ws) | |||
439 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); | 439 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); |
440 | atomic_set(&mddev->flush_pending, 1); | 440 | atomic_set(&mddev->flush_pending, 1); |
441 | rcu_read_lock(); | 441 | rcu_read_lock(); |
442 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | 442 | rdev_for_each_rcu(rdev, mddev) |
443 | if (rdev->raid_disk >= 0 && | 443 | if (rdev->raid_disk >= 0 && |
444 | !test_bit(Faulty, &rdev->flags)) { | 444 | !test_bit(Faulty, &rdev->flags)) { |
445 | /* Take two references, one is dropped | 445 | /* Take two references, one is dropped |
@@ -749,7 +749,7 @@ static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr) | |||
749 | { | 749 | { |
750 | struct md_rdev *rdev; | 750 | struct md_rdev *rdev; |
751 | 751 | ||
752 | list_for_each_entry(rdev, &mddev->disks, same_set) | 752 | rdev_for_each(rdev, mddev) |
753 | if (rdev->desc_nr == nr) | 753 | if (rdev->desc_nr == nr) |
754 | return rdev; | 754 | return rdev; |
755 | 755 | ||
@@ -760,7 +760,7 @@ static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev) | |||
760 | { | 760 | { |
761 | struct md_rdev *rdev; | 761 | struct md_rdev *rdev; |
762 | 762 | ||
763 | list_for_each_entry(rdev, &mddev->disks, same_set) | 763 | rdev_for_each(rdev, mddev) |
764 | if (rdev->bdev->bd_dev == dev) | 764 | if (rdev->bdev->bd_dev == dev) |
765 | return rdev; | 765 | return rdev; |
766 | 766 | ||
@@ -1342,7 +1342,7 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1342 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 1342 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
1343 | 1343 | ||
1344 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 1344 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
1345 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 1345 | rdev_for_each(rdev2, mddev) { |
1346 | mdp_disk_t *d; | 1346 | mdp_disk_t *d; |
1347 | int desc_nr; | 1347 | int desc_nr; |
1348 | int is_active = test_bit(In_sync, &rdev2->flags); | 1348 | int is_active = test_bit(In_sync, &rdev2->flags); |
@@ -1805,18 +1805,18 @@ retry: | |||
1805 | | BB_LEN(internal_bb)); | 1805 | | BB_LEN(internal_bb)); |
1806 | *bbp++ = cpu_to_le64(store_bb); | 1806 | *bbp++ = cpu_to_le64(store_bb); |
1807 | } | 1807 | } |
1808 | bb->changed = 0; | ||
1808 | if (read_seqretry(&bb->lock, seq)) | 1809 | if (read_seqretry(&bb->lock, seq)) |
1809 | goto retry; | 1810 | goto retry; |
1810 | 1811 | ||
1811 | bb->sector = (rdev->sb_start + | 1812 | bb->sector = (rdev->sb_start + |
1812 | (int)le32_to_cpu(sb->bblog_offset)); | 1813 | (int)le32_to_cpu(sb->bblog_offset)); |
1813 | bb->size = le16_to_cpu(sb->bblog_size); | 1814 | bb->size = le16_to_cpu(sb->bblog_size); |
1814 | bb->changed = 0; | ||
1815 | } | 1815 | } |
1816 | } | 1816 | } |
1817 | 1817 | ||
1818 | max_dev = 0; | 1818 | max_dev = 0; |
1819 | list_for_each_entry(rdev2, &mddev->disks, same_set) | 1819 | rdev_for_each(rdev2, mddev) |
1820 | if (rdev2->desc_nr+1 > max_dev) | 1820 | if (rdev2->desc_nr+1 > max_dev) |
1821 | max_dev = rdev2->desc_nr+1; | 1821 | max_dev = rdev2->desc_nr+1; |
1822 | 1822 | ||
@@ -1833,7 +1833,7 @@ retry: | |||
1833 | for (i=0; i<max_dev;i++) | 1833 | for (i=0; i<max_dev;i++) |
1834 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1834 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1835 | 1835 | ||
1836 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 1836 | rdev_for_each(rdev2, mddev) { |
1837 | i = rdev2->desc_nr; | 1837 | i = rdev2->desc_nr; |
1838 | if (test_bit(Faulty, &rdev2->flags)) | 1838 | if (test_bit(Faulty, &rdev2->flags)) |
1839 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1839 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
@@ -1948,7 +1948,7 @@ int md_integrity_register(struct mddev *mddev) | |||
1948 | return 0; /* nothing to do */ | 1948 | return 0; /* nothing to do */ |
1949 | if (!mddev->gendisk || blk_get_integrity(mddev->gendisk)) | 1949 | if (!mddev->gendisk || blk_get_integrity(mddev->gendisk)) |
1950 | return 0; /* shouldn't register, or already is */ | 1950 | return 0; /* shouldn't register, or already is */ |
1951 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 1951 | rdev_for_each(rdev, mddev) { |
1952 | /* skip spares and non-functional disks */ | 1952 | /* skip spares and non-functional disks */ |
1953 | if (test_bit(Faulty, &rdev->flags)) | 1953 | if (test_bit(Faulty, &rdev->flags)) |
1954 | continue; | 1954 | continue; |
@@ -2175,7 +2175,7 @@ static void export_array(struct mddev *mddev) | |||
2175 | { | 2175 | { |
2176 | struct md_rdev *rdev, *tmp; | 2176 | struct md_rdev *rdev, *tmp; |
2177 | 2177 | ||
2178 | rdev_for_each(rdev, tmp, mddev) { | 2178 | rdev_for_each_safe(rdev, tmp, mddev) { |
2179 | if (!rdev->mddev) { | 2179 | if (!rdev->mddev) { |
2180 | MD_BUG(); | 2180 | MD_BUG(); |
2181 | continue; | 2181 | continue; |
@@ -2307,11 +2307,11 @@ static void md_print_devices(void) | |||
2307 | bitmap_print_sb(mddev->bitmap); | 2307 | bitmap_print_sb(mddev->bitmap); |
2308 | else | 2308 | else |
2309 | printk("%s: ", mdname(mddev)); | 2309 | printk("%s: ", mdname(mddev)); |
2310 | list_for_each_entry(rdev, &mddev->disks, same_set) | 2310 | rdev_for_each(rdev, mddev) |
2311 | printk("<%s>", bdevname(rdev->bdev,b)); | 2311 | printk("<%s>", bdevname(rdev->bdev,b)); |
2312 | printk("\n"); | 2312 | printk("\n"); |
2313 | 2313 | ||
2314 | list_for_each_entry(rdev, &mddev->disks, same_set) | 2314 | rdev_for_each(rdev, mddev) |
2315 | print_rdev(rdev, mddev->major_version); | 2315 | print_rdev(rdev, mddev->major_version); |
2316 | } | 2316 | } |
2317 | printk("md: **********************************\n"); | 2317 | printk("md: **********************************\n"); |
@@ -2328,7 +2328,7 @@ static void sync_sbs(struct mddev * mddev, int nospares) | |||
2328 | * with the rest of the array) | 2328 | * with the rest of the array) |
2329 | */ | 2329 | */ |
2330 | struct md_rdev *rdev; | 2330 | struct md_rdev *rdev; |
2331 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2331 | rdev_for_each(rdev, mddev) { |
2332 | if (rdev->sb_events == mddev->events || | 2332 | if (rdev->sb_events == mddev->events || |
2333 | (nospares && | 2333 | (nospares && |
2334 | rdev->raid_disk < 0 && | 2334 | rdev->raid_disk < 0 && |
@@ -2351,7 +2351,7 @@ static void md_update_sb(struct mddev * mddev, int force_change) | |||
2351 | 2351 | ||
2352 | repeat: | 2352 | repeat: |
2353 | /* First make sure individual recovery_offsets are correct */ | 2353 | /* First make sure individual recovery_offsets are correct */ |
2354 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2354 | rdev_for_each(rdev, mddev) { |
2355 | if (rdev->raid_disk >= 0 && | 2355 | if (rdev->raid_disk >= 0 && |
2356 | mddev->delta_disks >= 0 && | 2356 | mddev->delta_disks >= 0 && |
2357 | !test_bit(In_sync, &rdev->flags) && | 2357 | !test_bit(In_sync, &rdev->flags) && |
@@ -2364,8 +2364,9 @@ repeat: | |||
2364 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); | 2364 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); |
2365 | if (!mddev->external) { | 2365 | if (!mddev->external) { |
2366 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | 2366 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); |
2367 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2367 | rdev_for_each(rdev, mddev) { |
2368 | if (rdev->badblocks.changed) { | 2368 | if (rdev->badblocks.changed) { |
2369 | rdev->badblocks.changed = 0; | ||
2369 | md_ack_all_badblocks(&rdev->badblocks); | 2370 | md_ack_all_badblocks(&rdev->badblocks); |
2370 | md_error(mddev, rdev); | 2371 | md_error(mddev, rdev); |
2371 | } | 2372 | } |
@@ -2430,7 +2431,7 @@ repeat: | |||
2430 | mddev->events --; | 2431 | mddev->events --; |
2431 | } | 2432 | } |
2432 | 2433 | ||
2433 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2434 | rdev_for_each(rdev, mddev) { |
2434 | if (rdev->badblocks.changed) | 2435 | if (rdev->badblocks.changed) |
2435 | any_badblocks_changed++; | 2436 | any_badblocks_changed++; |
2436 | if (test_bit(Faulty, &rdev->flags)) | 2437 | if (test_bit(Faulty, &rdev->flags)) |
@@ -2444,7 +2445,7 @@ repeat: | |||
2444 | mdname(mddev), mddev->in_sync); | 2445 | mdname(mddev), mddev->in_sync); |
2445 | 2446 | ||
2446 | bitmap_update_sb(mddev->bitmap); | 2447 | bitmap_update_sb(mddev->bitmap); |
2447 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2448 | rdev_for_each(rdev, mddev) { |
2448 | char b[BDEVNAME_SIZE]; | 2449 | char b[BDEVNAME_SIZE]; |
2449 | 2450 | ||
2450 | if (rdev->sb_loaded != 1) | 2451 | if (rdev->sb_loaded != 1) |
@@ -2493,7 +2494,7 @@ repeat: | |||
2493 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 2494 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
2494 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 2495 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
2495 | 2496 | ||
2496 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2497 | rdev_for_each(rdev, mddev) { |
2497 | if (test_and_clear_bit(FaultRecorded, &rdev->flags)) | 2498 | if (test_and_clear_bit(FaultRecorded, &rdev->flags)) |
2498 | clear_bit(Blocked, &rdev->flags); | 2499 | clear_bit(Blocked, &rdev->flags); |
2499 | 2500 | ||
@@ -2896,7 +2897,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2896 | struct md_rdev *rdev2; | 2897 | struct md_rdev *rdev2; |
2897 | 2898 | ||
2898 | mddev_lock(mddev); | 2899 | mddev_lock(mddev); |
2899 | list_for_each_entry(rdev2, &mddev->disks, same_set) | 2900 | rdev_for_each(rdev2, mddev) |
2900 | if (rdev->bdev == rdev2->bdev && | 2901 | if (rdev->bdev == rdev2->bdev && |
2901 | rdev != rdev2 && | 2902 | rdev != rdev2 && |
2902 | overlaps(rdev->data_offset, rdev->sectors, | 2903 | overlaps(rdev->data_offset, rdev->sectors, |
@@ -3193,7 +3194,7 @@ static void analyze_sbs(struct mddev * mddev) | |||
3193 | char b[BDEVNAME_SIZE]; | 3194 | char b[BDEVNAME_SIZE]; |
3194 | 3195 | ||
3195 | freshest = NULL; | 3196 | freshest = NULL; |
3196 | rdev_for_each(rdev, tmp, mddev) | 3197 | rdev_for_each_safe(rdev, tmp, mddev) |
3197 | switch (super_types[mddev->major_version]. | 3198 | switch (super_types[mddev->major_version]. |
3198 | load_super(rdev, freshest, mddev->minor_version)) { | 3199 | load_super(rdev, freshest, mddev->minor_version)) { |
3199 | case 1: | 3200 | case 1: |
@@ -3214,7 +3215,7 @@ static void analyze_sbs(struct mddev * mddev) | |||
3214 | validate_super(mddev, freshest); | 3215 | validate_super(mddev, freshest); |
3215 | 3216 | ||
3216 | i = 0; | 3217 | i = 0; |
3217 | rdev_for_each(rdev, tmp, mddev) { | 3218 | rdev_for_each_safe(rdev, tmp, mddev) { |
3218 | if (mddev->max_disks && | 3219 | if (mddev->max_disks && |
3219 | (rdev->desc_nr >= mddev->max_disks || | 3220 | (rdev->desc_nr >= mddev->max_disks || |
3220 | i > mddev->max_disks)) { | 3221 | i > mddev->max_disks)) { |
@@ -3403,7 +3404,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3403 | return -EINVAL; | 3404 | return -EINVAL; |
3404 | } | 3405 | } |
3405 | 3406 | ||
3406 | list_for_each_entry(rdev, &mddev->disks, same_set) | 3407 | rdev_for_each(rdev, mddev) |
3407 | rdev->new_raid_disk = rdev->raid_disk; | 3408 | rdev->new_raid_disk = rdev->raid_disk; |
3408 | 3409 | ||
3409 | /* ->takeover must set new_* and/or delta_disks | 3410 | /* ->takeover must set new_* and/or delta_disks |
@@ -3456,7 +3457,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3456 | mddev->safemode = 0; | 3457 | mddev->safemode = 0; |
3457 | } | 3458 | } |
3458 | 3459 | ||
3459 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 3460 | rdev_for_each(rdev, mddev) { |
3460 | if (rdev->raid_disk < 0) | 3461 | if (rdev->raid_disk < 0) |
3461 | continue; | 3462 | continue; |
3462 | if (rdev->new_raid_disk >= mddev->raid_disks) | 3463 | if (rdev->new_raid_disk >= mddev->raid_disks) |
@@ -3465,7 +3466,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3465 | continue; | 3466 | continue; |
3466 | sysfs_unlink_rdev(mddev, rdev); | 3467 | sysfs_unlink_rdev(mddev, rdev); |
3467 | } | 3468 | } |
3468 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 3469 | rdev_for_each(rdev, mddev) { |
3469 | if (rdev->raid_disk < 0) | 3470 | if (rdev->raid_disk < 0) |
3470 | continue; | 3471 | continue; |
3471 | if (rdev->new_raid_disk == rdev->raid_disk) | 3472 | if (rdev->new_raid_disk == rdev->raid_disk) |
@@ -4796,7 +4797,7 @@ int md_run(struct mddev *mddev) | |||
4796 | * the only valid external interface is through the md | 4797 | * the only valid external interface is through the md |
4797 | * device. | 4798 | * device. |
4798 | */ | 4799 | */ |
4799 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4800 | rdev_for_each(rdev, mddev) { |
4800 | if (test_bit(Faulty, &rdev->flags)) | 4801 | if (test_bit(Faulty, &rdev->flags)) |
4801 | continue; | 4802 | continue; |
4802 | sync_blockdev(rdev->bdev); | 4803 | sync_blockdev(rdev->bdev); |
@@ -4867,8 +4868,8 @@ int md_run(struct mddev *mddev) | |||
4867 | struct md_rdev *rdev2; | 4868 | struct md_rdev *rdev2; |
4868 | int warned = 0; | 4869 | int warned = 0; |
4869 | 4870 | ||
4870 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4871 | rdev_for_each(rdev, mddev) |
4871 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 4872 | rdev_for_each(rdev2, mddev) { |
4872 | if (rdev < rdev2 && | 4873 | if (rdev < rdev2 && |
4873 | rdev->bdev->bd_contains == | 4874 | rdev->bdev->bd_contains == |
4874 | rdev2->bdev->bd_contains) { | 4875 | rdev2->bdev->bd_contains) { |
@@ -4945,7 +4946,7 @@ int md_run(struct mddev *mddev) | |||
4945 | mddev->in_sync = 1; | 4946 | mddev->in_sync = 1; |
4946 | smp_wmb(); | 4947 | smp_wmb(); |
4947 | mddev->ready = 1; | 4948 | mddev->ready = 1; |
4948 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4949 | rdev_for_each(rdev, mddev) |
4949 | if (rdev->raid_disk >= 0) | 4950 | if (rdev->raid_disk >= 0) |
4950 | if (sysfs_link_rdev(mddev, rdev)) | 4951 | if (sysfs_link_rdev(mddev, rdev)) |
4951 | /* failure here is OK */; | 4952 | /* failure here is OK */; |
@@ -5073,6 +5074,7 @@ static void md_clean(struct mddev *mddev) | |||
5073 | mddev->changed = 0; | 5074 | mddev->changed = 0; |
5074 | mddev->degraded = 0; | 5075 | mddev->degraded = 0; |
5075 | mddev->safemode = 0; | 5076 | mddev->safemode = 0; |
5077 | mddev->merge_check_needed = 0; | ||
5076 | mddev->bitmap_info.offset = 0; | 5078 | mddev->bitmap_info.offset = 0; |
5077 | mddev->bitmap_info.default_offset = 0; | 5079 | mddev->bitmap_info.default_offset = 0; |
5078 | mddev->bitmap_info.chunksize = 0; | 5080 | mddev->bitmap_info.chunksize = 0; |
@@ -5175,7 +5177,7 @@ static int do_md_stop(struct mddev * mddev, int mode, int is_open) | |||
5175 | /* tell userspace to handle 'inactive' */ | 5177 | /* tell userspace to handle 'inactive' */ |
5176 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 5178 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
5177 | 5179 | ||
5178 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5180 | rdev_for_each(rdev, mddev) |
5179 | if (rdev->raid_disk >= 0) | 5181 | if (rdev->raid_disk >= 0) |
5180 | sysfs_unlink_rdev(mddev, rdev); | 5182 | sysfs_unlink_rdev(mddev, rdev); |
5181 | 5183 | ||
@@ -5226,7 +5228,7 @@ static void autorun_array(struct mddev *mddev) | |||
5226 | 5228 | ||
5227 | printk(KERN_INFO "md: running: "); | 5229 | printk(KERN_INFO "md: running: "); |
5228 | 5230 | ||
5229 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5231 | rdev_for_each(rdev, mddev) { |
5230 | char b[BDEVNAME_SIZE]; | 5232 | char b[BDEVNAME_SIZE]; |
5231 | printk("<%s>", bdevname(rdev->bdev,b)); | 5233 | printk("<%s>", bdevname(rdev->bdev,b)); |
5232 | } | 5234 | } |
@@ -5356,7 +5358,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) | |||
5356 | struct md_rdev *rdev; | 5358 | struct md_rdev *rdev; |
5357 | 5359 | ||
5358 | nr=working=insync=failed=spare=0; | 5360 | nr=working=insync=failed=spare=0; |
5359 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5361 | rdev_for_each(rdev, mddev) { |
5360 | nr++; | 5362 | nr++; |
5361 | if (test_bit(Faulty, &rdev->flags)) | 5363 | if (test_bit(Faulty, &rdev->flags)) |
5362 | failed++; | 5364 | failed++; |
@@ -5923,7 +5925,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) | |||
5923 | * grow, and re-add. | 5925 | * grow, and re-add. |
5924 | */ | 5926 | */ |
5925 | return -EBUSY; | 5927 | return -EBUSY; |
5926 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5928 | rdev_for_each(rdev, mddev) { |
5927 | sector_t avail = rdev->sectors; | 5929 | sector_t avail = rdev->sectors; |
5928 | 5930 | ||
5929 | if (fit && (num_sectors == 0 || num_sectors > avail)) | 5931 | if (fit && (num_sectors == 0 || num_sectors > avail)) |
@@ -6724,7 +6726,6 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6724 | struct mddev *mddev = v; | 6726 | struct mddev *mddev = v; |
6725 | sector_t sectors; | 6727 | sector_t sectors; |
6726 | struct md_rdev *rdev; | 6728 | struct md_rdev *rdev; |
6727 | struct bitmap *bitmap; | ||
6728 | 6729 | ||
6729 | if (v == (void*)1) { | 6730 | if (v == (void*)1) { |
6730 | struct md_personality *pers; | 6731 | struct md_personality *pers; |
@@ -6758,7 +6759,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6758 | } | 6759 | } |
6759 | 6760 | ||
6760 | sectors = 0; | 6761 | sectors = 0; |
6761 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 6762 | rdev_for_each(rdev, mddev) { |
6762 | char b[BDEVNAME_SIZE]; | 6763 | char b[BDEVNAME_SIZE]; |
6763 | seq_printf(seq, " %s[%d]", | 6764 | seq_printf(seq, " %s[%d]", |
6764 | bdevname(rdev->bdev,b), rdev->desc_nr); | 6765 | bdevname(rdev->bdev,b), rdev->desc_nr); |
@@ -6812,27 +6813,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6812 | } else | 6813 | } else |
6813 | seq_printf(seq, "\n "); | 6814 | seq_printf(seq, "\n "); |
6814 | 6815 | ||
6815 | if ((bitmap = mddev->bitmap)) { | 6816 | bitmap_status(seq, mddev->bitmap); |
6816 | unsigned long chunk_kb; | ||
6817 | unsigned long flags; | ||
6818 | spin_lock_irqsave(&bitmap->lock, flags); | ||
6819 | chunk_kb = mddev->bitmap_info.chunksize >> 10; | ||
6820 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
6821 | "%lu%s chunk", | ||
6822 | bitmap->pages - bitmap->missing_pages, | ||
6823 | bitmap->pages, | ||
6824 | (bitmap->pages - bitmap->missing_pages) | ||
6825 | << (PAGE_SHIFT - 10), | ||
6826 | chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize, | ||
6827 | chunk_kb ? "KB" : "B"); | ||
6828 | if (bitmap->file) { | ||
6829 | seq_printf(seq, ", file: "); | ||
6830 | seq_path(seq, &bitmap->file->f_path, " \t\n"); | ||
6831 | } | ||
6832 | |||
6833 | seq_printf(seq, "\n"); | ||
6834 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
6835 | } | ||
6836 | 6817 | ||
6837 | seq_printf(seq, "\n"); | 6818 | seq_printf(seq, "\n"); |
6838 | } | 6819 | } |
@@ -7170,7 +7151,7 @@ void md_do_sync(struct mddev *mddev) | |||
7170 | max_sectors = mddev->dev_sectors; | 7151 | max_sectors = mddev->dev_sectors; |
7171 | j = MaxSector; | 7152 | j = MaxSector; |
7172 | rcu_read_lock(); | 7153 | rcu_read_lock(); |
7173 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | 7154 | rdev_for_each_rcu(rdev, mddev) |
7174 | if (rdev->raid_disk >= 0 && | 7155 | if (rdev->raid_disk >= 0 && |
7175 | !test_bit(Faulty, &rdev->flags) && | 7156 | !test_bit(Faulty, &rdev->flags) && |
7176 | !test_bit(In_sync, &rdev->flags) && | 7157 | !test_bit(In_sync, &rdev->flags) && |
@@ -7342,7 +7323,7 @@ void md_do_sync(struct mddev *mddev) | |||
7342 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 7323 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
7343 | mddev->curr_resync = MaxSector; | 7324 | mddev->curr_resync = MaxSector; |
7344 | rcu_read_lock(); | 7325 | rcu_read_lock(); |
7345 | list_for_each_entry_rcu(rdev, &mddev->disks, same_set) | 7326 | rdev_for_each_rcu(rdev, mddev) |
7346 | if (rdev->raid_disk >= 0 && | 7327 | if (rdev->raid_disk >= 0 && |
7347 | mddev->delta_disks >= 0 && | 7328 | mddev->delta_disks >= 0 && |
7348 | !test_bit(Faulty, &rdev->flags) && | 7329 | !test_bit(Faulty, &rdev->flags) && |
@@ -7388,7 +7369,7 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
7388 | 7369 | ||
7389 | mddev->curr_resync_completed = 0; | 7370 | mddev->curr_resync_completed = 0; |
7390 | 7371 | ||
7391 | list_for_each_entry(rdev, &mddev->disks, same_set) | 7372 | rdev_for_each(rdev, mddev) |
7392 | if (rdev->raid_disk >= 0 && | 7373 | if (rdev->raid_disk >= 0 && |
7393 | !test_bit(Blocked, &rdev->flags) && | 7374 | !test_bit(Blocked, &rdev->flags) && |
7394 | (test_bit(Faulty, &rdev->flags) || | 7375 | (test_bit(Faulty, &rdev->flags) || |
@@ -7406,7 +7387,7 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
7406 | "degraded"); | 7387 | "degraded"); |
7407 | 7388 | ||
7408 | 7389 | ||
7409 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 7390 | rdev_for_each(rdev, mddev) { |
7410 | if (rdev->raid_disk >= 0 && | 7391 | if (rdev->raid_disk >= 0 && |
7411 | !test_bit(In_sync, &rdev->flags) && | 7392 | !test_bit(In_sync, &rdev->flags) && |
7412 | !test_bit(Faulty, &rdev->flags)) | 7393 | !test_bit(Faulty, &rdev->flags)) |
@@ -7451,7 +7432,7 @@ static void reap_sync_thread(struct mddev *mddev) | |||
7451 | * do the superblock for an incrementally recovered device | 7432 | * do the superblock for an incrementally recovered device |
7452 | * written out. | 7433 | * written out. |
7453 | */ | 7434 | */ |
7454 | list_for_each_entry(rdev, &mddev->disks, same_set) | 7435 | rdev_for_each(rdev, mddev) |
7455 | if (!mddev->degraded || | 7436 | if (!mddev->degraded || |
7456 | test_bit(In_sync, &rdev->flags)) | 7437 | test_bit(In_sync, &rdev->flags)) |
7457 | rdev->saved_raid_disk = -1; | 7438 | rdev->saved_raid_disk = -1; |
@@ -7529,7 +7510,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7529 | * failed devices. | 7510 | * failed devices. |
7530 | */ | 7511 | */ |
7531 | struct md_rdev *rdev; | 7512 | struct md_rdev *rdev; |
7532 | list_for_each_entry(rdev, &mddev->disks, same_set) | 7513 | rdev_for_each(rdev, mddev) |
7533 | if (rdev->raid_disk >= 0 && | 7514 | if (rdev->raid_disk >= 0 && |
7534 | !test_bit(Blocked, &rdev->flags) && | 7515 | !test_bit(Blocked, &rdev->flags) && |
7535 | test_bit(Faulty, &rdev->flags) && | 7516 | test_bit(Faulty, &rdev->flags) && |
@@ -8040,7 +8021,7 @@ void md_ack_all_badblocks(struct badblocks *bb) | |||
8040 | return; | 8021 | return; |
8041 | write_seqlock_irq(&bb->lock); | 8022 | write_seqlock_irq(&bb->lock); |
8042 | 8023 | ||
8043 | if (bb->changed == 0) { | 8024 | if (bb->changed == 0 && bb->unacked_exist) { |
8044 | u64 *p = bb->page; | 8025 | u64 *p = bb->page; |
8045 | int i; | 8026 | int i; |
8046 | for (i = 0; i < bb->count ; i++) { | 8027 | for (i = 0; i < bb->count ; i++) { |
@@ -8157,30 +8138,23 @@ static int md_notify_reboot(struct notifier_block *this, | |||
8157 | struct mddev *mddev; | 8138 | struct mddev *mddev; |
8158 | int need_delay = 0; | 8139 | int need_delay = 0; |
8159 | 8140 | ||
8160 | if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) { | 8141 | for_each_mddev(mddev, tmp) { |
8161 | 8142 | if (mddev_trylock(mddev)) { | |
8162 | printk(KERN_INFO "md: stopping all md devices.\n"); | 8143 | __md_stop_writes(mddev); |
8163 | 8144 | mddev->safemode = 2; | |
8164 | for_each_mddev(mddev, tmp) { | 8145 | mddev_unlock(mddev); |
8165 | if (mddev_trylock(mddev)) { | ||
8166 | /* Force a switch to readonly even array | ||
8167 | * appears to still be in use. Hence | ||
8168 | * the '100'. | ||
8169 | */ | ||
8170 | md_set_readonly(mddev, 100); | ||
8171 | mddev_unlock(mddev); | ||
8172 | } | ||
8173 | need_delay = 1; | ||
8174 | } | 8146 | } |
8175 | /* | 8147 | need_delay = 1; |
8176 | * certain more exotic SCSI devices are known to be | ||
8177 | * volatile wrt too early system reboots. While the | ||
8178 | * right place to handle this issue is the given | ||
8179 | * driver, we do want to have a safe RAID driver ... | ||
8180 | */ | ||
8181 | if (need_delay) | ||
8182 | mdelay(1000*1); | ||
8183 | } | 8148 | } |
8149 | /* | ||
8150 | * certain more exotic SCSI devices are known to be | ||
8151 | * volatile wrt too early system reboots. While the | ||
8152 | * right place to handle this issue is the given | ||
8153 | * driver, we do want to have a safe RAID driver ... | ||
8154 | */ | ||
8155 | if (need_delay) | ||
8156 | mdelay(1000*1); | ||
8157 | |||
8184 | return NOTIFY_DONE; | 8158 | return NOTIFY_DONE; |
8185 | } | 8159 | } |
8186 | 8160 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 44c63dfeeb2b..1c2063ccf48e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -128,6 +128,10 @@ struct md_rdev { | |||
128 | enum flag_bits { | 128 | enum flag_bits { |
129 | Faulty, /* device is known to have a fault */ | 129 | Faulty, /* device is known to have a fault */ |
130 | In_sync, /* device is in_sync with rest of array */ | 130 | In_sync, /* device is in_sync with rest of array */ |
131 | Unmerged, /* device is being added to array and should | ||
132 | * be considerred for bvec_merge_fn but not | ||
133 | * yet for actual IO | ||
134 | */ | ||
131 | WriteMostly, /* Avoid reading if at all possible */ | 135 | WriteMostly, /* Avoid reading if at all possible */ |
132 | AutoDetected, /* added by auto-detect */ | 136 | AutoDetected, /* added by auto-detect */ |
133 | Blocked, /* An error occurred but has not yet | 137 | Blocked, /* An error occurred but has not yet |
@@ -345,6 +349,10 @@ struct mddev { | |||
345 | int degraded; /* whether md should consider | 349 | int degraded; /* whether md should consider |
346 | * adding a spare | 350 | * adding a spare |
347 | */ | 351 | */ |
352 | int merge_check_needed; /* at least one | ||
353 | * member device | ||
354 | * has a | ||
355 | * merge_bvec_fn */ | ||
348 | 356 | ||
349 | atomic_t recovery_active; /* blocks scheduled, but not written */ | 357 | atomic_t recovery_active; /* blocks scheduled, but not written */ |
350 | wait_queue_head_t recovery_wait; | 358 | wait_queue_head_t recovery_wait; |
@@ -519,7 +527,10 @@ static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) | |||
519 | /* | 527 | /* |
520 | * iterates through the 'same array disks' ringlist | 528 | * iterates through the 'same array disks' ringlist |
521 | */ | 529 | */ |
522 | #define rdev_for_each(rdev, tmp, mddev) \ | 530 | #define rdev_for_each(rdev, mddev) \ |
531 | list_for_each_entry(rdev, &((mddev)->disks), same_set) | ||
532 | |||
533 | #define rdev_for_each_safe(rdev, tmp, mddev) \ | ||
523 | list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) | 534 | list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) |
524 | 535 | ||
525 | #define rdev_for_each_rcu(rdev, mddev) \ | 536 | #define rdev_for_each_rcu(rdev, mddev) \ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index a222f516660e..9339e67fcc79 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -428,7 +428,7 @@ static int multipath_run (struct mddev *mddev) | |||
428 | } | 428 | } |
429 | 429 | ||
430 | working_disks = 0; | 430 | working_disks = 0; |
431 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 431 | rdev_for_each(rdev, mddev) { |
432 | disk_idx = rdev->raid_disk; | 432 | disk_idx = rdev->raid_disk; |
433 | if (disk_idx < 0 || | 433 | if (disk_idx < 0 || |
434 | disk_idx >= mddev->raid_disks) | 434 | disk_idx >= mddev->raid_disks) |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7294bd115e34..6f31f5596e01 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -91,7 +91,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
91 | 91 | ||
92 | if (!conf) | 92 | if (!conf) |
93 | return -ENOMEM; | 93 | return -ENOMEM; |
94 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 94 | rdev_for_each(rdev1, mddev) { |
95 | pr_debug("md/raid0:%s: looking at %s\n", | 95 | pr_debug("md/raid0:%s: looking at %s\n", |
96 | mdname(mddev), | 96 | mdname(mddev), |
97 | bdevname(rdev1->bdev, b)); | 97 | bdevname(rdev1->bdev, b)); |
@@ -102,7 +102,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
102 | sector_div(sectors, mddev->chunk_sectors); | 102 | sector_div(sectors, mddev->chunk_sectors); |
103 | rdev1->sectors = sectors * mddev->chunk_sectors; | 103 | rdev1->sectors = sectors * mddev->chunk_sectors; |
104 | 104 | ||
105 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 105 | rdev_for_each(rdev2, mddev) { |
106 | pr_debug("md/raid0:%s: comparing %s(%llu)" | 106 | pr_debug("md/raid0:%s: comparing %s(%llu)" |
107 | " with %s(%llu)\n", | 107 | " with %s(%llu)\n", |
108 | mdname(mddev), | 108 | mdname(mddev), |
@@ -157,7 +157,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
157 | smallest = NULL; | 157 | smallest = NULL; |
158 | dev = conf->devlist; | 158 | dev = conf->devlist; |
159 | err = -EINVAL; | 159 | err = -EINVAL; |
160 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 160 | rdev_for_each(rdev1, mddev) { |
161 | int j = rdev1->raid_disk; | 161 | int j = rdev1->raid_disk; |
162 | 162 | ||
163 | if (mddev->level == 10) { | 163 | if (mddev->level == 10) { |
@@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
188 | 188 | ||
189 | disk_stack_limits(mddev->gendisk, rdev1->bdev, | 189 | disk_stack_limits(mddev->gendisk, rdev1->bdev, |
190 | rdev1->data_offset << 9); | 190 | rdev1->data_offset << 9); |
191 | /* as we don't honour merge_bvec_fn, we must never risk | ||
192 | * violating it, so limit ->max_segments to 1, lying within | ||
193 | * a single page. | ||
194 | */ | ||
195 | 191 | ||
196 | if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { | 192 | if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) |
197 | blk_queue_max_segments(mddev->queue, 1); | 193 | conf->has_merge_bvec = 1; |
198 | blk_queue_segment_boundary(mddev->queue, | 194 | |
199 | PAGE_CACHE_SIZE - 1); | ||
200 | } | ||
201 | if (!smallest || (rdev1->sectors < smallest->sectors)) | 195 | if (!smallest || (rdev1->sectors < smallest->sectors)) |
202 | smallest = rdev1; | 196 | smallest = rdev1; |
203 | cnt++; | 197 | cnt++; |
@@ -290,8 +284,64 @@ abort: | |||
290 | return err; | 284 | return err; |
291 | } | 285 | } |
292 | 286 | ||
287 | /* Find the zone which holds a particular offset | ||
288 | * Update *sectorp to be an offset in that zone | ||
289 | */ | ||
290 | static struct strip_zone *find_zone(struct r0conf *conf, | ||
291 | sector_t *sectorp) | ||
292 | { | ||
293 | int i; | ||
294 | struct strip_zone *z = conf->strip_zone; | ||
295 | sector_t sector = *sectorp; | ||
296 | |||
297 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
298 | if (sector < z[i].zone_end) { | ||
299 | if (i) | ||
300 | *sectorp = sector - z[i-1].zone_end; | ||
301 | return z + i; | ||
302 | } | ||
303 | BUG(); | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * remaps the bio to the target device. we separate two flows. | ||
308 | * power 2 flow and a general flow for the sake of perfromance | ||
309 | */ | ||
310 | static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | ||
311 | sector_t sector, sector_t *sector_offset) | ||
312 | { | ||
313 | unsigned int sect_in_chunk; | ||
314 | sector_t chunk; | ||
315 | struct r0conf *conf = mddev->private; | ||
316 | int raid_disks = conf->strip_zone[0].nb_dev; | ||
317 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
318 | |||
319 | if (is_power_of_2(chunk_sects)) { | ||
320 | int chunksect_bits = ffz(~chunk_sects); | ||
321 | /* find the sector offset inside the chunk */ | ||
322 | sect_in_chunk = sector & (chunk_sects - 1); | ||
323 | sector >>= chunksect_bits; | ||
324 | /* chunk in zone */ | ||
325 | chunk = *sector_offset; | ||
326 | /* quotient is the chunk in real device*/ | ||
327 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
328 | } else{ | ||
329 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
330 | chunk = *sector_offset; | ||
331 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
332 | } | ||
333 | /* | ||
334 | * position the bio over the real device | ||
335 | * real sector = chunk in device + starting of zone | ||
336 | * + the position in the chunk | ||
337 | */ | ||
338 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
339 | return conf->devlist[(zone - conf->strip_zone)*raid_disks | ||
340 | + sector_div(sector, zone->nb_dev)]; | ||
341 | } | ||
342 | |||
293 | /** | 343 | /** |
294 | * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged | 344 | * raid0_mergeable_bvec -- tell bio layer if two requests can be merged |
295 | * @q: request queue | 345 | * @q: request queue |
296 | * @bvm: properties of new bio | 346 | * @bvm: properties of new bio |
297 | * @biovec: the request that could be merged to it. | 347 | * @biovec: the request that could be merged to it. |
@@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
303 | struct bio_vec *biovec) | 353 | struct bio_vec *biovec) |
304 | { | 354 | { |
305 | struct mddev *mddev = q->queuedata; | 355 | struct mddev *mddev = q->queuedata; |
356 | struct r0conf *conf = mddev->private; | ||
306 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 357 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
358 | sector_t sector_offset = sector; | ||
307 | int max; | 359 | int max; |
308 | unsigned int chunk_sectors = mddev->chunk_sectors; | 360 | unsigned int chunk_sectors = mddev->chunk_sectors; |
309 | unsigned int bio_sectors = bvm->bi_size >> 9; | 361 | unsigned int bio_sectors = bvm->bi_size >> 9; |
362 | struct strip_zone *zone; | ||
363 | struct md_rdev *rdev; | ||
364 | struct request_queue *subq; | ||
310 | 365 | ||
311 | if (is_power_of_2(chunk_sectors)) | 366 | if (is_power_of_2(chunk_sectors)) |
312 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) | 367 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) |
@@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
314 | else | 369 | else |
315 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) | 370 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) |
316 | + bio_sectors)) << 9; | 371 | + bio_sectors)) << 9; |
317 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 372 | if (max < 0) |
373 | max = 0; /* bio_add cannot handle a negative return */ | ||
318 | if (max <= biovec->bv_len && bio_sectors == 0) | 374 | if (max <= biovec->bv_len && bio_sectors == 0) |
319 | return biovec->bv_len; | 375 | return biovec->bv_len; |
320 | else | 376 | if (max < biovec->bv_len) |
377 | /* too small already, no need to check further */ | ||
378 | return max; | ||
379 | if (!conf->has_merge_bvec) | ||
380 | return max; | ||
381 | |||
382 | /* May need to check subordinate device */ | ||
383 | sector = sector_offset; | ||
384 | zone = find_zone(mddev->private, §or_offset); | ||
385 | rdev = map_sector(mddev, zone, sector, §or_offset); | ||
386 | subq = bdev_get_queue(rdev->bdev); | ||
387 | if (subq->merge_bvec_fn) { | ||
388 | bvm->bi_bdev = rdev->bdev; | ||
389 | bvm->bi_sector = sector_offset + zone->dev_start + | ||
390 | rdev->data_offset; | ||
391 | return min(max, subq->merge_bvec_fn(subq, bvm, biovec)); | ||
392 | } else | ||
321 | return max; | 393 | return max; |
322 | } | 394 | } |
323 | 395 | ||
@@ -329,7 +401,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks | |||
329 | WARN_ONCE(sectors || raid_disks, | 401 | WARN_ONCE(sectors || raid_disks, |
330 | "%s does not support generic reshape\n", __func__); | 402 | "%s does not support generic reshape\n", __func__); |
331 | 403 | ||
332 | list_for_each_entry(rdev, &mddev->disks, same_set) | 404 | rdev_for_each(rdev, mddev) |
333 | array_sectors += rdev->sectors; | 405 | array_sectors += rdev->sectors; |
334 | 406 | ||
335 | return array_sectors; | 407 | return array_sectors; |
@@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev) | |||
397 | return 0; | 469 | return 0; |
398 | } | 470 | } |
399 | 471 | ||
400 | /* Find the zone which holds a particular offset | ||
401 | * Update *sectorp to be an offset in that zone | ||
402 | */ | ||
403 | static struct strip_zone *find_zone(struct r0conf *conf, | ||
404 | sector_t *sectorp) | ||
405 | { | ||
406 | int i; | ||
407 | struct strip_zone *z = conf->strip_zone; | ||
408 | sector_t sector = *sectorp; | ||
409 | |||
410 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
411 | if (sector < z[i].zone_end) { | ||
412 | if (i) | ||
413 | *sectorp = sector - z[i-1].zone_end; | ||
414 | return z + i; | ||
415 | } | ||
416 | BUG(); | ||
417 | } | ||
418 | |||
419 | /* | ||
420 | * remaps the bio to the target device. we separate two flows. | ||
421 | * power 2 flow and a general flow for the sake of perfromance | ||
422 | */ | ||
423 | static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | ||
424 | sector_t sector, sector_t *sector_offset) | ||
425 | { | ||
426 | unsigned int sect_in_chunk; | ||
427 | sector_t chunk; | ||
428 | struct r0conf *conf = mddev->private; | ||
429 | int raid_disks = conf->strip_zone[0].nb_dev; | ||
430 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
431 | |||
432 | if (is_power_of_2(chunk_sects)) { | ||
433 | int chunksect_bits = ffz(~chunk_sects); | ||
434 | /* find the sector offset inside the chunk */ | ||
435 | sect_in_chunk = sector & (chunk_sects - 1); | ||
436 | sector >>= chunksect_bits; | ||
437 | /* chunk in zone */ | ||
438 | chunk = *sector_offset; | ||
439 | /* quotient is the chunk in real device*/ | ||
440 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
441 | } else{ | ||
442 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
443 | chunk = *sector_offset; | ||
444 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
445 | } | ||
446 | /* | ||
447 | * position the bio over the real device | ||
448 | * real sector = chunk in device + starting of zone | ||
449 | * + the position in the chunk | ||
450 | */ | ||
451 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
452 | return conf->devlist[(zone - conf->strip_zone)*raid_disks | ||
453 | + sector_div(sector, zone->nb_dev)]; | ||
454 | } | ||
455 | |||
456 | /* | 472 | /* |
457 | * Is io distribute over 1 or more chunks ? | 473 | * Is io distribute over 1 or more chunks ? |
458 | */ | 474 | */ |
@@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) | |||
505 | } | 521 | } |
506 | 522 | ||
507 | sector_offset = bio->bi_sector; | 523 | sector_offset = bio->bi_sector; |
508 | zone = find_zone(mddev->private, §or_offset); | 524 | zone = find_zone(mddev->private, §or_offset); |
509 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, | 525 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, |
510 | §or_offset); | 526 | §or_offset); |
511 | bio->bi_bdev = tmp_dev->bdev; | 527 | bio->bi_bdev = tmp_dev->bdev; |
@@ -543,7 +559,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) | |||
543 | return ERR_PTR(-EINVAL); | 559 | return ERR_PTR(-EINVAL); |
544 | } | 560 | } |
545 | 561 | ||
546 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 562 | rdev_for_each(rdev, mddev) { |
547 | /* check slot number for a disk */ | 563 | /* check slot number for a disk */ |
548 | if (rdev->raid_disk == mddev->raid_disks-1) { | 564 | if (rdev->raid_disk == mddev->raid_disks-1) { |
549 | printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n", | 565 | printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n", |
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 0884bba8df4c..05539d9c97f0 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h | |||
@@ -4,13 +4,16 @@ | |||
4 | struct strip_zone { | 4 | struct strip_zone { |
5 | sector_t zone_end; /* Start of the next zone (in sectors) */ | 5 | sector_t zone_end; /* Start of the next zone (in sectors) */ |
6 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ | 6 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ |
7 | int nb_dev; /* # of devices attached to the zone */ | 7 | int nb_dev; /* # of devices attached to the zone */ |
8 | }; | 8 | }; |
9 | 9 | ||
10 | struct r0conf { | 10 | struct r0conf { |
11 | struct strip_zone *strip_zone; | 11 | struct strip_zone *strip_zone; |
12 | struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ | 12 | struct md_rdev **devlist; /* lists of rdevs, pointed to |
13 | int nr_strip_zones; | 13 | * by strip_zone->dev */ |
14 | int nr_strip_zones; | ||
15 | int has_merge_bvec; /* at least one member has | ||
16 | * a merge_bvec_fn */ | ||
14 | }; | 17 | }; |
15 | 18 | ||
16 | #endif | 19 | #endif |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a0b225eb4ac4..4a40a200d769 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
524 | if (r1_bio->bios[disk] == IO_BLOCKED | 524 | if (r1_bio->bios[disk] == IO_BLOCKED |
525 | || rdev == NULL | 525 | || rdev == NULL |
526 | || test_bit(Unmerged, &rdev->flags) | ||
526 | || test_bit(Faulty, &rdev->flags)) | 527 | || test_bit(Faulty, &rdev->flags)) |
527 | continue; | 528 | continue; |
528 | if (!test_bit(In_sync, &rdev->flags) && | 529 | if (!test_bit(In_sync, &rdev->flags) && |
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
614 | return best_disk; | 615 | return best_disk; |
615 | } | 616 | } |
616 | 617 | ||
618 | static int raid1_mergeable_bvec(struct request_queue *q, | ||
619 | struct bvec_merge_data *bvm, | ||
620 | struct bio_vec *biovec) | ||
621 | { | ||
622 | struct mddev *mddev = q->queuedata; | ||
623 | struct r1conf *conf = mddev->private; | ||
624 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | ||
625 | int max = biovec->bv_len; | ||
626 | |||
627 | if (mddev->merge_check_needed) { | ||
628 | int disk; | ||
629 | rcu_read_lock(); | ||
630 | for (disk = 0; disk < conf->raid_disks * 2; disk++) { | ||
631 | struct md_rdev *rdev = rcu_dereference( | ||
632 | conf->mirrors[disk].rdev); | ||
633 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
634 | struct request_queue *q = | ||
635 | bdev_get_queue(rdev->bdev); | ||
636 | if (q->merge_bvec_fn) { | ||
637 | bvm->bi_sector = sector + | ||
638 | rdev->data_offset; | ||
639 | bvm->bi_bdev = rdev->bdev; | ||
640 | max = min(max, q->merge_bvec_fn( | ||
641 | q, bvm, biovec)); | ||
642 | } | ||
643 | } | ||
644 | } | ||
645 | rcu_read_unlock(); | ||
646 | } | ||
647 | return max; | ||
648 | |||
649 | } | ||
650 | |||
617 | int md_raid1_congested(struct mddev *mddev, int bits) | 651 | int md_raid1_congested(struct mddev *mddev, int bits) |
618 | { | 652 | { |
619 | struct r1conf *conf = mddev->private; | 653 | struct r1conf *conf = mddev->private; |
@@ -737,9 +771,22 @@ static void wait_barrier(struct r1conf *conf) | |||
737 | spin_lock_irq(&conf->resync_lock); | 771 | spin_lock_irq(&conf->resync_lock); |
738 | if (conf->barrier) { | 772 | if (conf->barrier) { |
739 | conf->nr_waiting++; | 773 | conf->nr_waiting++; |
740 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 774 | /* Wait for the barrier to drop. |
775 | * However if there are already pending | ||
776 | * requests (preventing the barrier from | ||
777 | * rising completely), and the | ||
778 | * pre-process bio queue isn't empty, | ||
779 | * then don't wait, as we need to empty | ||
780 | * that queue to get the nr_pending | ||
781 | * count down. | ||
782 | */ | ||
783 | wait_event_lock_irq(conf->wait_barrier, | ||
784 | !conf->barrier || | ||
785 | (conf->nr_pending && | ||
786 | current->bio_list && | ||
787 | !bio_list_empty(current->bio_list)), | ||
741 | conf->resync_lock, | 788 | conf->resync_lock, |
742 | ); | 789 | ); |
743 | conf->nr_waiting--; | 790 | conf->nr_waiting--; |
744 | } | 791 | } |
745 | conf->nr_pending++; | 792 | conf->nr_pending++; |
@@ -1002,7 +1049,8 @@ read_again: | |||
1002 | break; | 1049 | break; |
1003 | } | 1050 | } |
1004 | r1_bio->bios[i] = NULL; | 1051 | r1_bio->bios[i] = NULL; |
1005 | if (!rdev || test_bit(Faulty, &rdev->flags)) { | 1052 | if (!rdev || test_bit(Faulty, &rdev->flags) |
1053 | || test_bit(Unmerged, &rdev->flags)) { | ||
1006 | if (i < conf->raid_disks) | 1054 | if (i < conf->raid_disks) |
1007 | set_bit(R1BIO_Degraded, &r1_bio->state); | 1055 | set_bit(R1BIO_Degraded, &r1_bio->state); |
1008 | continue; | 1056 | continue; |
@@ -1322,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1322 | struct mirror_info *p; | 1370 | struct mirror_info *p; |
1323 | int first = 0; | 1371 | int first = 0; |
1324 | int last = conf->raid_disks - 1; | 1372 | int last = conf->raid_disks - 1; |
1373 | struct request_queue *q = bdev_get_queue(rdev->bdev); | ||
1325 | 1374 | ||
1326 | if (mddev->recovery_disabled == conf->recovery_disabled) | 1375 | if (mddev->recovery_disabled == conf->recovery_disabled) |
1327 | return -EBUSY; | 1376 | return -EBUSY; |
@@ -1329,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1329 | if (rdev->raid_disk >= 0) | 1378 | if (rdev->raid_disk >= 0) |
1330 | first = last = rdev->raid_disk; | 1379 | first = last = rdev->raid_disk; |
1331 | 1380 | ||
1381 | if (q->merge_bvec_fn) { | ||
1382 | set_bit(Unmerged, &rdev->flags); | ||
1383 | mddev->merge_check_needed = 1; | ||
1384 | } | ||
1385 | |||
1332 | for (mirror = first; mirror <= last; mirror++) { | 1386 | for (mirror = first; mirror <= last; mirror++) { |
1333 | p = conf->mirrors+mirror; | 1387 | p = conf->mirrors+mirror; |
1334 | if (!p->rdev) { | 1388 | if (!p->rdev) { |
1335 | 1389 | ||
1336 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1390 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1337 | rdev->data_offset << 9); | 1391 | rdev->data_offset << 9); |
1338 | /* as we don't honour merge_bvec_fn, we must | ||
1339 | * never risk violating it, so limit | ||
1340 | * ->max_segments to one lying with a single | ||
1341 | * page, as a one page request is never in | ||
1342 | * violation. | ||
1343 | */ | ||
1344 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1345 | blk_queue_max_segments(mddev->queue, 1); | ||
1346 | blk_queue_segment_boundary(mddev->queue, | ||
1347 | PAGE_CACHE_SIZE - 1); | ||
1348 | } | ||
1349 | 1392 | ||
1350 | p->head_position = 0; | 1393 | p->head_position = 0; |
1351 | rdev->raid_disk = mirror; | 1394 | rdev->raid_disk = mirror; |
@@ -1370,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1370 | break; | 1413 | break; |
1371 | } | 1414 | } |
1372 | } | 1415 | } |
1416 | if (err == 0 && test_bit(Unmerged, &rdev->flags)) { | ||
1417 | /* Some requests might not have seen this new | ||
1418 | * merge_bvec_fn. We must wait for them to complete | ||
1419 | * before merging the device fully. | ||
1420 | * First we make sure any code which has tested | ||
1421 | * our function has submitted the request, then | ||
1422 | * we wait for all outstanding requests to complete. | ||
1423 | */ | ||
1424 | synchronize_sched(); | ||
1425 | raise_barrier(conf); | ||
1426 | lower_barrier(conf); | ||
1427 | clear_bit(Unmerged, &rdev->flags); | ||
1428 | } | ||
1373 | md_integrity_add_rdev(rdev, mddev); | 1429 | md_integrity_add_rdev(rdev, mddev); |
1374 | print_conf(conf); | 1430 | print_conf(conf); |
1375 | return err; | 1431 | return err; |
@@ -2491,7 +2547,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2491 | 2547 | ||
2492 | err = -EINVAL; | 2548 | err = -EINVAL; |
2493 | spin_lock_init(&conf->device_lock); | 2549 | spin_lock_init(&conf->device_lock); |
2494 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2550 | rdev_for_each(rdev, mddev) { |
2495 | int disk_idx = rdev->raid_disk; | 2551 | int disk_idx = rdev->raid_disk; |
2496 | if (disk_idx >= mddev->raid_disks | 2552 | if (disk_idx >= mddev->raid_disks |
2497 | || disk_idx < 0) | 2553 | || disk_idx < 0) |
@@ -2609,20 +2665,11 @@ static int run(struct mddev *mddev) | |||
2609 | if (IS_ERR(conf)) | 2665 | if (IS_ERR(conf)) |
2610 | return PTR_ERR(conf); | 2666 | return PTR_ERR(conf); |
2611 | 2667 | ||
2612 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2668 | rdev_for_each(rdev, mddev) { |
2613 | if (!mddev->gendisk) | 2669 | if (!mddev->gendisk) |
2614 | continue; | 2670 | continue; |
2615 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 2671 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
2616 | rdev->data_offset << 9); | 2672 | rdev->data_offset << 9); |
2617 | /* as we don't honour merge_bvec_fn, we must never risk | ||
2618 | * violating it, so limit ->max_segments to 1 lying within | ||
2619 | * a single page, as a one page request is never in violation. | ||
2620 | */ | ||
2621 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
2622 | blk_queue_max_segments(mddev->queue, 1); | ||
2623 | blk_queue_segment_boundary(mddev->queue, | ||
2624 | PAGE_CACHE_SIZE - 1); | ||
2625 | } | ||
2626 | } | 2673 | } |
2627 | 2674 | ||
2628 | mddev->degraded = 0; | 2675 | mddev->degraded = 0; |
@@ -2656,6 +2703,7 @@ static int run(struct mddev *mddev) | |||
2656 | if (mddev->queue) { | 2703 | if (mddev->queue) { |
2657 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2704 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
2658 | mddev->queue->backing_dev_info.congested_data = mddev; | 2705 | mddev->queue->backing_dev_info.congested_data = mddev; |
2706 | blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); | ||
2659 | } | 2707 | } |
2660 | return md_integrity_register(mddev); | 2708 | return md_integrity_register(mddev); |
2661 | } | 2709 | } |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 58c44d6453a0..3540316886f2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -586,25 +586,68 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
586 | * @biovec: the request that could be merged to it. | 586 | * @biovec: the request that could be merged to it. |
587 | * | 587 | * |
588 | * Return amount of bytes we can accept at this offset | 588 | * Return amount of bytes we can accept at this offset |
589 | * If near_copies == raid_disk, there are no striping issues, | 589 | * This requires checking for end-of-chunk if near_copies != raid_disks, |
590 | * but in that case, the function isn't called at all. | 590 | * and for subordinate merge_bvec_fns if merge_check_needed. |
591 | */ | 591 | */ |
592 | static int raid10_mergeable_bvec(struct request_queue *q, | 592 | static int raid10_mergeable_bvec(struct request_queue *q, |
593 | struct bvec_merge_data *bvm, | 593 | struct bvec_merge_data *bvm, |
594 | struct bio_vec *biovec) | 594 | struct bio_vec *biovec) |
595 | { | 595 | { |
596 | struct mddev *mddev = q->queuedata; | 596 | struct mddev *mddev = q->queuedata; |
597 | struct r10conf *conf = mddev->private; | ||
597 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 598 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
598 | int max; | 599 | int max; |
599 | unsigned int chunk_sectors = mddev->chunk_sectors; | 600 | unsigned int chunk_sectors = mddev->chunk_sectors; |
600 | unsigned int bio_sectors = bvm->bi_size >> 9; | 601 | unsigned int bio_sectors = bvm->bi_size >> 9; |
601 | 602 | ||
602 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 603 | if (conf->near_copies < conf->raid_disks) { |
603 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 604 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) |
604 | if (max <= biovec->bv_len && bio_sectors == 0) | 605 | + bio_sectors)) << 9; |
605 | return biovec->bv_len; | 606 | if (max < 0) |
606 | else | 607 | /* bio_add cannot handle a negative return */ |
607 | return max; | 608 | max = 0; |
609 | if (max <= biovec->bv_len && bio_sectors == 0) | ||
610 | return biovec->bv_len; | ||
611 | } else | ||
612 | max = biovec->bv_len; | ||
613 | |||
614 | if (mddev->merge_check_needed) { | ||
615 | struct r10bio r10_bio; | ||
616 | int s; | ||
617 | r10_bio.sector = sector; | ||
618 | raid10_find_phys(conf, &r10_bio); | ||
619 | rcu_read_lock(); | ||
620 | for (s = 0; s < conf->copies; s++) { | ||
621 | int disk = r10_bio.devs[s].devnum; | ||
622 | struct md_rdev *rdev = rcu_dereference( | ||
623 | conf->mirrors[disk].rdev); | ||
624 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
625 | struct request_queue *q = | ||
626 | bdev_get_queue(rdev->bdev); | ||
627 | if (q->merge_bvec_fn) { | ||
628 | bvm->bi_sector = r10_bio.devs[s].addr | ||
629 | + rdev->data_offset; | ||
630 | bvm->bi_bdev = rdev->bdev; | ||
631 | max = min(max, q->merge_bvec_fn( | ||
632 | q, bvm, biovec)); | ||
633 | } | ||
634 | } | ||
635 | rdev = rcu_dereference(conf->mirrors[disk].replacement); | ||
636 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
637 | struct request_queue *q = | ||
638 | bdev_get_queue(rdev->bdev); | ||
639 | if (q->merge_bvec_fn) { | ||
640 | bvm->bi_sector = r10_bio.devs[s].addr | ||
641 | + rdev->data_offset; | ||
642 | bvm->bi_bdev = rdev->bdev; | ||
643 | max = min(max, q->merge_bvec_fn( | ||
644 | q, bvm, biovec)); | ||
645 | } | ||
646 | } | ||
647 | } | ||
648 | rcu_read_unlock(); | ||
649 | } | ||
650 | return max; | ||
608 | } | 651 | } |
609 | 652 | ||
610 | /* | 653 | /* |
@@ -668,11 +711,12 @@ retry: | |||
668 | disk = r10_bio->devs[slot].devnum; | 711 | disk = r10_bio->devs[slot].devnum; |
669 | rdev = rcu_dereference(conf->mirrors[disk].replacement); | 712 | rdev = rcu_dereference(conf->mirrors[disk].replacement); |
670 | if (rdev == NULL || test_bit(Faulty, &rdev->flags) || | 713 | if (rdev == NULL || test_bit(Faulty, &rdev->flags) || |
714 | test_bit(Unmerged, &rdev->flags) || | ||
671 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) | 715 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) |
672 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 716 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
673 | if (rdev == NULL) | 717 | if (rdev == NULL || |
674 | continue; | 718 | test_bit(Faulty, &rdev->flags) || |
675 | if (test_bit(Faulty, &rdev->flags)) | 719 | test_bit(Unmerged, &rdev->flags)) |
676 | continue; | 720 | continue; |
677 | if (!test_bit(In_sync, &rdev->flags) && | 721 | if (!test_bit(In_sync, &rdev->flags) && |
678 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) | 722 | r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) |
@@ -863,9 +907,22 @@ static void wait_barrier(struct r10conf *conf) | |||
863 | spin_lock_irq(&conf->resync_lock); | 907 | spin_lock_irq(&conf->resync_lock); |
864 | if (conf->barrier) { | 908 | if (conf->barrier) { |
865 | conf->nr_waiting++; | 909 | conf->nr_waiting++; |
866 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 910 | /* Wait for the barrier to drop. |
911 | * However if there are already pending | ||
912 | * requests (preventing the barrier from | ||
913 | * rising completely), and the | ||
914 | * pre-process bio queue isn't empty, | ||
915 | * then don't wait, as we need to empty | ||
916 | * that queue to get the nr_pending | ||
917 | * count down. | ||
918 | */ | ||
919 | wait_event_lock_irq(conf->wait_barrier, | ||
920 | !conf->barrier || | ||
921 | (conf->nr_pending && | ||
922 | current->bio_list && | ||
923 | !bio_list_empty(current->bio_list)), | ||
867 | conf->resync_lock, | 924 | conf->resync_lock, |
868 | ); | 925 | ); |
869 | conf->nr_waiting--; | 926 | conf->nr_waiting--; |
870 | } | 927 | } |
871 | conf->nr_pending++; | 928 | conf->nr_pending++; |
@@ -1121,12 +1178,14 @@ retry_write: | |||
1121 | blocked_rdev = rrdev; | 1178 | blocked_rdev = rrdev; |
1122 | break; | 1179 | break; |
1123 | } | 1180 | } |
1124 | if (rrdev && test_bit(Faulty, &rrdev->flags)) | 1181 | if (rrdev && (test_bit(Faulty, &rrdev->flags) |
1182 | || test_bit(Unmerged, &rrdev->flags))) | ||
1125 | rrdev = NULL; | 1183 | rrdev = NULL; |
1126 | 1184 | ||
1127 | r10_bio->devs[i].bio = NULL; | 1185 | r10_bio->devs[i].bio = NULL; |
1128 | r10_bio->devs[i].repl_bio = NULL; | 1186 | r10_bio->devs[i].repl_bio = NULL; |
1129 | if (!rdev || test_bit(Faulty, &rdev->flags)) { | 1187 | if (!rdev || test_bit(Faulty, &rdev->flags) || |
1188 | test_bit(Unmerged, &rdev->flags)) { | ||
1130 | set_bit(R10BIO_Degraded, &r10_bio->state); | 1189 | set_bit(R10BIO_Degraded, &r10_bio->state); |
1131 | continue; | 1190 | continue; |
1132 | } | 1191 | } |
@@ -1477,18 +1536,24 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1477 | int mirror; | 1536 | int mirror; |
1478 | int first = 0; | 1537 | int first = 0; |
1479 | int last = conf->raid_disks - 1; | 1538 | int last = conf->raid_disks - 1; |
1539 | struct request_queue *q = bdev_get_queue(rdev->bdev); | ||
1480 | 1540 | ||
1481 | if (mddev->recovery_cp < MaxSector) | 1541 | if (mddev->recovery_cp < MaxSector) |
1482 | /* only hot-add to in-sync arrays, as recovery is | 1542 | /* only hot-add to in-sync arrays, as recovery is |
1483 | * very different from resync | 1543 | * very different from resync |
1484 | */ | 1544 | */ |
1485 | return -EBUSY; | 1545 | return -EBUSY; |
1486 | if (!enough(conf, -1)) | 1546 | if (rdev->saved_raid_disk < 0 && !enough(conf, -1)) |
1487 | return -EINVAL; | 1547 | return -EINVAL; |
1488 | 1548 | ||
1489 | if (rdev->raid_disk >= 0) | 1549 | if (rdev->raid_disk >= 0) |
1490 | first = last = rdev->raid_disk; | 1550 | first = last = rdev->raid_disk; |
1491 | 1551 | ||
1552 | if (q->merge_bvec_fn) { | ||
1553 | set_bit(Unmerged, &rdev->flags); | ||
1554 | mddev->merge_check_needed = 1; | ||
1555 | } | ||
1556 | |||
1492 | if (rdev->saved_raid_disk >= first && | 1557 | if (rdev->saved_raid_disk >= first && |
1493 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) | 1558 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) |
1494 | mirror = rdev->saved_raid_disk; | 1559 | mirror = rdev->saved_raid_disk; |
@@ -1508,11 +1573,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1508 | err = 0; | 1573 | err = 0; |
1509 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1574 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1510 | rdev->data_offset << 9); | 1575 | rdev->data_offset << 9); |
1511 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1512 | blk_queue_max_segments(mddev->queue, 1); | ||
1513 | blk_queue_segment_boundary(mddev->queue, | ||
1514 | PAGE_CACHE_SIZE - 1); | ||
1515 | } | ||
1516 | conf->fullsync = 1; | 1576 | conf->fullsync = 1; |
1517 | rcu_assign_pointer(p->replacement, rdev); | 1577 | rcu_assign_pointer(p->replacement, rdev); |
1518 | break; | 1578 | break; |
@@ -1520,17 +1580,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1520 | 1580 | ||
1521 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1581 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1522 | rdev->data_offset << 9); | 1582 | rdev->data_offset << 9); |
1523 | /* as we don't honour merge_bvec_fn, we must | ||
1524 | * never risk violating it, so limit | ||
1525 | * ->max_segments to one lying with a single | ||
1526 | * page, as a one page request is never in | ||
1527 | * violation. | ||
1528 | */ | ||
1529 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1530 | blk_queue_max_segments(mddev->queue, 1); | ||
1531 | blk_queue_segment_boundary(mddev->queue, | ||
1532 | PAGE_CACHE_SIZE - 1); | ||
1533 | } | ||
1534 | 1583 | ||
1535 | p->head_position = 0; | 1584 | p->head_position = 0; |
1536 | p->recovery_disabled = mddev->recovery_disabled - 1; | 1585 | p->recovery_disabled = mddev->recovery_disabled - 1; |
@@ -1541,7 +1590,19 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1541 | rcu_assign_pointer(p->rdev, rdev); | 1590 | rcu_assign_pointer(p->rdev, rdev); |
1542 | break; | 1591 | break; |
1543 | } | 1592 | } |
1544 | 1593 | if (err == 0 && test_bit(Unmerged, &rdev->flags)) { | |
1594 | /* Some requests might not have seen this new | ||
1595 | * merge_bvec_fn. We must wait for them to complete | ||
1596 | * before merging the device fully. | ||
1597 | * First we make sure any code which has tested | ||
1598 | * our function has submitted the request, then | ||
1599 | * we wait for all outstanding requests to complete. | ||
1600 | */ | ||
1601 | synchronize_sched(); | ||
1602 | raise_barrier(conf, 0); | ||
1603 | lower_barrier(conf); | ||
1604 | clear_bit(Unmerged, &rdev->flags); | ||
1605 | } | ||
1545 | md_integrity_add_rdev(rdev, mddev); | 1606 | md_integrity_add_rdev(rdev, mddev); |
1546 | print_conf(conf); | 1607 | print_conf(conf); |
1547 | return err; | 1608 | return err; |
@@ -1682,10 +1743,8 @@ static void end_sync_write(struct bio *bio, int error) | |||
1682 | d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); | 1743 | d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); |
1683 | if (repl) | 1744 | if (repl) |
1684 | rdev = conf->mirrors[d].replacement; | 1745 | rdev = conf->mirrors[d].replacement; |
1685 | if (!rdev) { | 1746 | else |
1686 | smp_mb(); | ||
1687 | rdev = conf->mirrors[d].rdev; | 1747 | rdev = conf->mirrors[d].rdev; |
1688 | } | ||
1689 | 1748 | ||
1690 | if (!uptodate) { | 1749 | if (!uptodate) { |
1691 | if (repl) | 1750 | if (repl) |
@@ -2087,6 +2146,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2087 | d = r10_bio->devs[sl].devnum; | 2146 | d = r10_bio->devs[sl].devnum; |
2088 | rdev = rcu_dereference(conf->mirrors[d].rdev); | 2147 | rdev = rcu_dereference(conf->mirrors[d].rdev); |
2089 | if (rdev && | 2148 | if (rdev && |
2149 | !test_bit(Unmerged, &rdev->flags) && | ||
2090 | test_bit(In_sync, &rdev->flags) && | 2150 | test_bit(In_sync, &rdev->flags) && |
2091 | is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, | 2151 | is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, |
2092 | &first_bad, &bad_sectors) == 0) { | 2152 | &first_bad, &bad_sectors) == 0) { |
@@ -2140,6 +2200,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2140 | d = r10_bio->devs[sl].devnum; | 2200 | d = r10_bio->devs[sl].devnum; |
2141 | rdev = rcu_dereference(conf->mirrors[d].rdev); | 2201 | rdev = rcu_dereference(conf->mirrors[d].rdev); |
2142 | if (!rdev || | 2202 | if (!rdev || |
2203 | test_bit(Unmerged, &rdev->flags) || | ||
2143 | !test_bit(In_sync, &rdev->flags)) | 2204 | !test_bit(In_sync, &rdev->flags)) |
2144 | continue; | 2205 | continue; |
2145 | 2206 | ||
@@ -3242,7 +3303,7 @@ static int run(struct mddev *mddev) | |||
3242 | blk_queue_io_opt(mddev->queue, chunk_size * | 3303 | blk_queue_io_opt(mddev->queue, chunk_size * |
3243 | (conf->raid_disks / conf->near_copies)); | 3304 | (conf->raid_disks / conf->near_copies)); |
3244 | 3305 | ||
3245 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 3306 | rdev_for_each(rdev, mddev) { |
3246 | 3307 | ||
3247 | disk_idx = rdev->raid_disk; | 3308 | disk_idx = rdev->raid_disk; |
3248 | if (disk_idx >= conf->raid_disks | 3309 | if (disk_idx >= conf->raid_disks |
@@ -3262,15 +3323,6 @@ static int run(struct mddev *mddev) | |||
3262 | 3323 | ||
3263 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 3324 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
3264 | rdev->data_offset << 9); | 3325 | rdev->data_offset << 9); |
3265 | /* as we don't honour merge_bvec_fn, we must never risk | ||
3266 | * violating it, so limit max_segments to 1 lying | ||
3267 | * within a single page. | ||
3268 | */ | ||
3269 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
3270 | blk_queue_max_segments(mddev->queue, 1); | ||
3271 | blk_queue_segment_boundary(mddev->queue, | ||
3272 | PAGE_CACHE_SIZE - 1); | ||
3273 | } | ||
3274 | 3326 | ||
3275 | disk->head_position = 0; | 3327 | disk->head_position = 0; |
3276 | } | 3328 | } |
@@ -3334,8 +3386,7 @@ static int run(struct mddev *mddev) | |||
3334 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 3386 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
3335 | } | 3387 | } |
3336 | 3388 | ||
3337 | if (conf->near_copies < conf->raid_disks) | 3389 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
3338 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3339 | 3390 | ||
3340 | if (md_integrity_register(mddev)) | 3391 | if (md_integrity_register(mddev)) |
3341 | goto out_free_conf; | 3392 | goto out_free_conf; |
@@ -3385,6 +3436,43 @@ static void raid10_quiesce(struct mddev *mddev, int state) | |||
3385 | } | 3436 | } |
3386 | } | 3437 | } |
3387 | 3438 | ||
3439 | static int raid10_resize(struct mddev *mddev, sector_t sectors) | ||
3440 | { | ||
3441 | /* Resize of 'far' arrays is not supported. | ||
3442 | * For 'near' and 'offset' arrays we can set the | ||
3443 | * number of sectors used to be an appropriate multiple | ||
3444 | * of the chunk size. | ||
3445 | * For 'offset', this is far_copies*chunksize. | ||
3446 | * For 'near' the multiplier is the LCM of | ||
3447 | * near_copies and raid_disks. | ||
3448 | * So if far_copies > 1 && !far_offset, fail. | ||
3449 | * Else find LCM(raid_disks, near_copy)*far_copies and | ||
3450 | * multiply by chunk_size. Then round to this number. | ||
3451 | * This is mostly done by raid10_size() | ||
3452 | */ | ||
3453 | struct r10conf *conf = mddev->private; | ||
3454 | sector_t oldsize, size; | ||
3455 | |||
3456 | if (conf->far_copies > 1 && !conf->far_offset) | ||
3457 | return -EINVAL; | ||
3458 | |||
3459 | oldsize = raid10_size(mddev, 0, 0); | ||
3460 | size = raid10_size(mddev, sectors, 0); | ||
3461 | md_set_array_sectors(mddev, size); | ||
3462 | if (mddev->array_sectors > size) | ||
3463 | return -EINVAL; | ||
3464 | set_capacity(mddev->gendisk, mddev->array_sectors); | ||
3465 | revalidate_disk(mddev->gendisk); | ||
3466 | if (sectors > mddev->dev_sectors && | ||
3467 | mddev->recovery_cp > oldsize) { | ||
3468 | mddev->recovery_cp = oldsize; | ||
3469 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3470 | } | ||
3471 | mddev->dev_sectors = sectors; | ||
3472 | mddev->resync_max_sectors = size; | ||
3473 | return 0; | ||
3474 | } | ||
3475 | |||
3388 | static void *raid10_takeover_raid0(struct mddev *mddev) | 3476 | static void *raid10_takeover_raid0(struct mddev *mddev) |
3389 | { | 3477 | { |
3390 | struct md_rdev *rdev; | 3478 | struct md_rdev *rdev; |
@@ -3408,7 +3496,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev) | |||
3408 | 3496 | ||
3409 | conf = setup_conf(mddev); | 3497 | conf = setup_conf(mddev); |
3410 | if (!IS_ERR(conf)) { | 3498 | if (!IS_ERR(conf)) { |
3411 | list_for_each_entry(rdev, &mddev->disks, same_set) | 3499 | rdev_for_each(rdev, mddev) |
3412 | if (rdev->raid_disk >= 0) | 3500 | if (rdev->raid_disk >= 0) |
3413 | rdev->new_raid_disk = rdev->raid_disk * 2; | 3501 | rdev->new_raid_disk = rdev->raid_disk * 2; |
3414 | conf->barrier = 1; | 3502 | conf->barrier = 1; |
@@ -3454,6 +3542,7 @@ static struct md_personality raid10_personality = | |||
3454 | .sync_request = sync_request, | 3542 | .sync_request = sync_request, |
3455 | .quiesce = raid10_quiesce, | 3543 | .quiesce = raid10_quiesce, |
3456 | .size = raid10_size, | 3544 | .size = raid10_size, |
3545 | .resize = raid10_resize, | ||
3457 | .takeover = raid10_takeover, | 3546 | .takeover = raid10_takeover, |
3458 | }; | 3547 | }; |
3459 | 3548 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 360f2b98f62b..23ac880bba9a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -208,11 +208,10 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
208 | md_wakeup_thread(conf->mddev->thread); | 208 | md_wakeup_thread(conf->mddev->thread); |
209 | } else { | 209 | } else { |
210 | BUG_ON(stripe_operations_active(sh)); | 210 | BUG_ON(stripe_operations_active(sh)); |
211 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 211 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
212 | atomic_dec(&conf->preread_active_stripes); | 212 | if (atomic_dec_return(&conf->preread_active_stripes) |
213 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) | 213 | < IO_THRESHOLD) |
214 | md_wakeup_thread(conf->mddev->thread); | 214 | md_wakeup_thread(conf->mddev->thread); |
215 | } | ||
216 | atomic_dec(&conf->active_stripes); | 215 | atomic_dec(&conf->active_stripes); |
217 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { | 216 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { |
218 | list_add_tail(&sh->lru, &conf->inactive_list); | 217 | list_add_tail(&sh->lru, &conf->inactive_list); |
@@ -4843,7 +4842,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
4843 | 4842 | ||
4844 | pr_debug("raid456: run(%s) called.\n", mdname(mddev)); | 4843 | pr_debug("raid456: run(%s) called.\n", mdname(mddev)); |
4845 | 4844 | ||
4846 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4845 | rdev_for_each(rdev, mddev) { |
4847 | raid_disk = rdev->raid_disk; | 4846 | raid_disk = rdev->raid_disk; |
4848 | if (raid_disk >= max_disks | 4847 | if (raid_disk >= max_disks |
4849 | || raid_disk < 0) | 4848 | || raid_disk < 0) |
@@ -5178,7 +5177,7 @@ static int run(struct mddev *mddev) | |||
5178 | blk_queue_io_opt(mddev->queue, chunk_size * | 5177 | blk_queue_io_opt(mddev->queue, chunk_size * |
5179 | (conf->raid_disks - conf->max_degraded)); | 5178 | (conf->raid_disks - conf->max_degraded)); |
5180 | 5179 | ||
5181 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5180 | rdev_for_each(rdev, mddev) |
5182 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5181 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5183 | rdev->data_offset << 9); | 5182 | rdev->data_offset << 9); |
5184 | } | 5183 | } |
@@ -5362,7 +5361,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
5362 | if (mddev->recovery_disabled == conf->recovery_disabled) | 5361 | if (mddev->recovery_disabled == conf->recovery_disabled) |
5363 | return -EBUSY; | 5362 | return -EBUSY; |
5364 | 5363 | ||
5365 | if (has_failed(conf)) | 5364 | if (rdev->saved_raid_disk < 0 && has_failed(conf)) |
5366 | /* no point adding a device */ | 5365 | /* no point adding a device */ |
5367 | return -EINVAL; | 5366 | return -EINVAL; |
5368 | 5367 | ||
@@ -5501,7 +5500,7 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5501 | if (!check_stripe_cache(mddev)) | 5500 | if (!check_stripe_cache(mddev)) |
5502 | return -ENOSPC; | 5501 | return -ENOSPC; |
5503 | 5502 | ||
5504 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5503 | rdev_for_each(rdev, mddev) |
5505 | if (!test_bit(In_sync, &rdev->flags) | 5504 | if (!test_bit(In_sync, &rdev->flags) |
5506 | && !test_bit(Faulty, &rdev->flags)) | 5505 | && !test_bit(Faulty, &rdev->flags)) |
5507 | spares++; | 5506 | spares++; |
@@ -5547,16 +5546,14 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5547 | * such devices during the reshape and confusion could result. | 5546 | * such devices during the reshape and confusion could result. |
5548 | */ | 5547 | */ |
5549 | if (mddev->delta_disks >= 0) { | 5548 | if (mddev->delta_disks >= 0) { |
5550 | int added_devices = 0; | 5549 | rdev_for_each(rdev, mddev) |
5551 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
5552 | if (rdev->raid_disk < 0 && | 5550 | if (rdev->raid_disk < 0 && |
5553 | !test_bit(Faulty, &rdev->flags)) { | 5551 | !test_bit(Faulty, &rdev->flags)) { |
5554 | if (raid5_add_disk(mddev, rdev) == 0) { | 5552 | if (raid5_add_disk(mddev, rdev) == 0) { |
5555 | if (rdev->raid_disk | 5553 | if (rdev->raid_disk |
5556 | >= conf->previous_raid_disks) { | 5554 | >= conf->previous_raid_disks) |
5557 | set_bit(In_sync, &rdev->flags); | 5555 | set_bit(In_sync, &rdev->flags); |
5558 | added_devices++; | 5556 | else |
5559 | } else | ||
5560 | rdev->recovery_offset = 0; | 5557 | rdev->recovery_offset = 0; |
5561 | 5558 | ||
5562 | if (sysfs_link_rdev(mddev, rdev)) | 5559 | if (sysfs_link_rdev(mddev, rdev)) |
@@ -5566,7 +5563,6 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5566 | && !test_bit(Faulty, &rdev->flags)) { | 5563 | && !test_bit(Faulty, &rdev->flags)) { |
5567 | /* This is a spare that was manually added */ | 5564 | /* This is a spare that was manually added */ |
5568 | set_bit(In_sync, &rdev->flags); | 5565 | set_bit(In_sync, &rdev->flags); |
5569 | added_devices++; | ||
5570 | } | 5566 | } |
5571 | 5567 | ||
5572 | /* When a reshape changes the number of devices, | 5568 | /* When a reshape changes the number of devices, |
@@ -5592,6 +5588,7 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5592 | spin_lock_irq(&conf->device_lock); | 5588 | spin_lock_irq(&conf->device_lock); |
5593 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 5589 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
5594 | conf->reshape_progress = MaxSector; | 5590 | conf->reshape_progress = MaxSector; |
5591 | mddev->reshape_position = MaxSector; | ||
5595 | spin_unlock_irq(&conf->device_lock); | 5592 | spin_unlock_irq(&conf->device_lock); |
5596 | return -EAGAIN; | 5593 | return -EAGAIN; |
5597 | } | 5594 | } |